diff options
-rw-r--r-- | ChangeLog | 1 | ||||
-rw-r--r-- | configure.ac | 1 | ||||
-rw-r--r-- | src/libmpeg2/cpu_state.c | 4 | ||||
-rw-r--r-- | src/libmpeg2/idct.c | 2 | ||||
-rw-r--r-- | src/libmpeg2/idct_mmx.c | 2 | ||||
-rw-r--r-- | src/libmpeg2/motion_comp.c | 2 | ||||
-rw-r--r-- | src/post/deinterlace/plugins/greedy.c | 2 | ||||
-rw-r--r-- | src/post/deinterlace/plugins/greedy2frame_template.c | 8 | ||||
-rw-r--r-- | src/post/deinterlace/plugins/greedyh.asm | 2 | ||||
-rw-r--r-- | src/post/deinterlace/plugins/linearblend.c | 10 | ||||
-rw-r--r-- | src/post/deinterlace/plugins/tomsmocomp/TomsMoCompAll.inc | 2 | ||||
-rw-r--r-- | src/post/deinterlace/plugins/vfir.c | 4 | ||||
-rw-r--r-- | src/post/deinterlace/speedy.c | 44 | ||||
-rw-r--r-- | src/post/goom/convolve_fx.c | 7 | ||||
-rw-r--r-- | src/post/goom/diff_against_release.patch | 164 | ||||
-rw-r--r-- | src/post/goom/mmx.c | 10 | ||||
-rwxr-xr-x | src/post/goom/mmx.h | 8 | ||||
-rw-r--r-- | src/post/goom/xmmx.c | 7 | ||||
-rw-r--r-- | src/post/planar/eq.c | 10 | ||||
-rw-r--r-- | src/post/planar/eq2.c | 10 | ||||
-rw-r--r-- | src/xine-utils/cpu_accel.c | 2 | ||||
-rw-r--r-- | src/xine-utils/memcpy.c | 6 |
22 files changed, 236 insertions, 72 deletions
@@ -11,6 +11,7 @@ xine-lib (1.1.2) * Send events for tvtime filmmode changes * Add an image decoder based on gdk-pixbuf * Add browseable capability to smb input plugin + * Enable AMD64 mmx/sse support in some plugins (tvtime, libmpeg2, goom...) xine-lib (1.1.1) * Improve sound quality when using alsa 1.0.9 or above. diff --git a/configure.ac b/configure.ac index 26dd1bfd0..b7e5598f5 100644 --- a/configure.ac +++ b/configure.ac @@ -1959,6 +1959,7 @@ case "$host_or_hostalias" in x86_64-*) AC_DEFINE_UNQUOTED(ARCH_X86_64,,[Define this if you're running x86 architecture]) AC_DEFINE(FPM_64BIT,1,[Define to select libmad fixed point arithmetic implementation]) + enable_ffmmx="yes" ;; *darwin*) HOST_OS_DARWIN=1 diff --git a/src/libmpeg2/cpu_state.c b/src/libmpeg2/cpu_state.c index 433a85b85..07b4c5b7e 100644 --- a/src/libmpeg2/cpu_state.c +++ b/src/libmpeg2/cpu_state.c @@ -32,7 +32,7 @@ void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL; void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void state_restore_mmx (cpu_state_t * state) { emms (); @@ -168,7 +168,7 @@ static void state_restore_altivec (cpu_state_t * state) void mpeg2_cpu_state_init (uint32_t mm_accel) { -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) if (mm_accel & MM_ACCEL_X86_MMX) { mpeg2_cpu_state_restore = state_restore_mmx; } diff --git a/src/libmpeg2/idct.c b/src/libmpeg2/idct.c index ac0ad6e0e..157beea31 100644 --- a/src/libmpeg2/idct.c +++ b/src/libmpeg2/idct.c @@ -282,7 +282,7 @@ void mpeg2_idct_init (uint32_t mm_accel) { mpeg2_zero_block = mpeg2_zero_block_c; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) if (mm_accel & MM_ACCEL_X86_MMXEXT) { #ifdef LOG fprintf (stderr, "Using MMXEXT for IDCT transform\n"); diff --git a/src/libmpeg2/idct_mmx.c b/src/libmpeg2/idct_mmx.c index d3cc9cb0f..ce4bd064b 100644 --- a/src/libmpeg2/idct_mmx.c +++ b/src/libmpeg2/idct_mmx.c @@ -23,7 +23,7 @@ #include "config.h" -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #include <inttypes.h> diff --git a/src/libmpeg2/motion_comp.c b/src/libmpeg2/motion_comp.c index d20dceef8..8779c1296 100644 --- a/src/libmpeg2/motion_comp.c +++ b/src/libmpeg2/motion_comp.c @@ -42,7 +42,7 @@ void mpeg2_mc_init (uint32_t mm_accel) } #endif -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) if (mm_accel & MM_ACCEL_X86_MMXEXT) { #ifdef LOG fprintf (stderr, "Using MMXEXT for motion compensation\n"); diff --git a/src/post/deinterlace/plugins/greedy.c b/src/post/deinterlace/plugins/greedy.c index 9f0e313b9..fa157258f 100644 --- a/src/post/deinterlace/plugins/greedy.c +++ b/src/post/deinterlace/plugins/greedy.c @@ -64,7 +64,7 @@ static void deinterlace_greedy_packed422_scanline_mmxext( uint8_t *output, deinterlace_scanline_data_t *data, int width ) { -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) mmx_t MaxComb; uint8_t *m0 = data->m0; uint8_t *t1 = data->t1; diff --git a/src/post/deinterlace/plugins/greedy2frame_template.c b/src/post/deinterlace/plugins/greedy2frame_template.c index 7b68d6e46..42c575f58 100644 --- a/src/post/deinterlace/plugins/greedy2frame_template.c +++ b/src/post/deinterlace/plugins/greedy2frame_template.c @@ -1,5 +1,5 @@ /***************************************************************************** -** $Id: greedy2frame_template.c,v 1.8 2005/06/05 16:00:06 miguelfreitas Exp $ +** $Id: greedy2frame_template.c,v 1.9 2006/02/04 14:06:29 miguelfreitas Exp $ ****************************************************************************** ** Copyright (c) 2000 John Adcock, Tom Barry, Steve Grimm All rights reserved. ** port copyright (c) 2003 Miguel Freitas @@ -19,6 +19,10 @@ ** CVS Log ** ** $Log: greedy2frame_template.c,v $ +** Revision 1.9 2006/02/04 14:06:29 miguelfreitas +** Enable AMD64 mmx/sse support in some plugins (tvtime, libmpeg2, goom...) +** patch by dani3l +** ** Revision 1.8 2005/06/05 16:00:06 miguelfreitas ** quite some hacks for gcc 2.95 compatibility ** @@ -112,7 +116,7 @@ static void DeinterlaceGreedy2Frame_MMX(uint8_t *output, int outstride, int bottom_field, int second_field, int width, int height ) #endif { -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) int Line; int stride = width * 2; register uint8_t* M1; diff --git a/src/post/deinterlace/plugins/greedyh.asm b/src/post/deinterlace/plugins/greedyh.asm index a885b1dd3..0bbd745aa 100644 --- a/src/post/deinterlace/plugins/greedyh.asm +++ b/src/post/deinterlace/plugins/greedyh.asm @@ -323,7 +323,7 @@ static void FUNCT_NAME(uint8_t *output, int outstride, } // clear out the MMX registers ready for doing floating point again -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) __asm__ __volatile__ ("emms\n\t"); #endif } diff --git a/src/post/deinterlace/plugins/linearblend.c b/src/post/deinterlace/plugins/linearblend.c index d8ecacefc..3b65e381c 100644 --- a/src/post/deinterlace/plugins/linearblend.c +++ b/src/post/deinterlace/plugins/linearblend.c @@ -45,7 +45,7 @@ static void deinterlace_scanline_linear_blend( uint8_t *output, uint8_t *t0 = data->t0; uint8_t *b0 = data->b0; uint8_t *m1 = data->m1; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) int i; // Get width in bytes. @@ -110,7 +110,7 @@ static void deinterlace_scanline_linear_blend2( uint8_t *output, uint8_t *t1 = data->t1; uint8_t *b1 = data->b1; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) int i; // Get width in bytes. @@ -167,7 +167,7 @@ static void deinterlace_scanline_linear_blend2( uint8_t *output, #endif } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) /* MMXEXT version is about 15% faster with Athlon XP [MF] */ @@ -337,7 +337,7 @@ static deinterlace_method_t linearblendmethod = "BlurTemporal", */ 2, -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) MM_ACCEL_X86_MMX, #else 0, @@ -362,7 +362,7 @@ static deinterlace_method_t linearblendmethod = deinterlace_method_t *linearblend_get_method( void ) { -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) if( xine_mm_accel() & MM_ACCEL_X86_MMXEXT ) return &linearblendmethod_mmxext; else diff --git a/src/post/deinterlace/plugins/tomsmocomp/TomsMoCompAll.inc b/src/post/deinterlace/plugins/tomsmocomp/TomsMoCompAll.inc index 0fd3f451f..5870d77be 100644 --- a/src/post/deinterlace/plugins/tomsmocomp/TomsMoCompAll.inc +++ b/src/post/deinterlace/plugins/tomsmocomp/TomsMoCompAll.inc @@ -246,7 +246,7 @@ static void FUNCT_NAME(uint8_t *output, int outstride, } end: -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) __asm__ __volatile__("emms"); #endif return; diff --git a/src/post/deinterlace/plugins/vfir.c b/src/post/deinterlace/plugins/vfir.c index 6809b2244..3bf03a4a0 100644 --- a/src/post/deinterlace/plugins/vfir.c +++ b/src/post/deinterlace/plugins/vfir.c @@ -51,7 +51,7 @@ static void deinterlace_line( uint8_t *dst, uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *lum_m2, uint8_t *lum_m1, uint8_t *lum, int size ) { -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) mmx_t rounder; rounder.uw[0]=4; @@ -137,7 +137,7 @@ static deinterlace_method_t vfirmethod = "BlurVertical", */ 1, -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) MM_ACCEL_X86_MMXEXT, #else 0, diff --git a/src/post/deinterlace/speedy.c b/src/post/deinterlace/speedy.c index 4d3b9710a..0cf07258b 100644 --- a/src/post/deinterlace/speedy.c +++ b/src/post/deinterlace/speedy.c @@ -178,7 +178,7 @@ static inline __attribute__ ((always_inline,const)) uint8_t clip255( int x ) static unsigned long CombJaggieThreshold = 73; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static unsigned int comb_factor_packed422_scanline_mmx( uint8_t *top, uint8_t *mid, uint8_t *bot, int width ) { @@ -304,7 +304,7 @@ static unsigned int diff_factor_packed422_scanline_test_c( uint8_t *cur, uint8_t } */ -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *old, int width ) { const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL }; @@ -345,7 +345,7 @@ static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *o #define ABS(a) (((a) < 0)?-(a):(a)) -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void diff_packed422_block8x8_mmx( pulldown_metrics_t *m, uint8_t *old, uint8_t *new, int os, int ns ) { @@ -586,7 +586,7 @@ static void packed422_to_packed444_rec601_scanline_c( uint8_t *dest, uint8_t *sr } } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void vfilter_chroma_121_packed422_scanline_mmx( uint8_t *output, int width, uint8_t *m, uint8_t *t, uint8_t *b ) { @@ -652,7 +652,7 @@ static void vfilter_chroma_121_packed422_scanline_c( uint8_t *output, int width, } } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void vfilter_chroma_332_packed422_scanline_mmx( uint8_t *output, int width, uint8_t *m, uint8_t *t, uint8_t *b ) { @@ -726,7 +726,7 @@ static void vfilter_chroma_332_packed422_scanline_c( uint8_t *output, int width, } } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void kill_chroma_packed422_inplace_scanline_mmx( uint8_t *data, int width ) { const mmx_t ymask = { 0x00ff00ff00ff00ffULL }; @@ -758,7 +758,7 @@ static void kill_chroma_packed422_inplace_scanline_c( uint8_t *data, int width ) } } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void invert_colour_packed422_inplace_scanline_mmx( uint8_t *data, int width ) { const mmx_t allones = { 0xffffffffffffffffULL }; @@ -885,7 +885,7 @@ static void interpolate_packed422_scanline_c( uint8_t *output, uint8_t *top, } } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void interpolate_packed422_scanline_mmx( uint8_t *output, uint8_t *top, uint8_t *bot, int width ) { @@ -955,7 +955,7 @@ static void interpolate_packed422_scanline_mmx( uint8_t *output, uint8_t *top, } #endif -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void interpolate_packed422_scanline_mmxext( uint8_t *output, uint8_t *top, uint8_t *bot, int width ) { @@ -1015,7 +1015,7 @@ static void blit_colour_packed422_scanline_c( uint8_t *output, int width, int y, } } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void blit_colour_packed422_scanline_mmx( uint8_t *output, int width, int y, int cb, int cr ) { uint32_t colour = cr << 24 | y << 16 | cb << 8 | y; @@ -1055,7 +1055,7 @@ static void blit_colour_packed422_scanline_mmx( uint8_t *output, int width, int } #endif -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void blit_colour_packed422_scanline_mmxext( uint8_t *output, int width, int y, int cb, int cr ) { uint32_t colour = cr << 24 | y << 16 | cb << 8 | y; @@ -1109,7 +1109,7 @@ static void blit_colour_packed4444_scanline_c( uint8_t *output, int width, } } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void blit_colour_packed4444_scanline_mmx( uint8_t *output, int width, int alpha, int luma, int cb, int cr ) @@ -1146,7 +1146,7 @@ static void blit_colour_packed4444_scanline_mmx( uint8_t *output, int width, } #endif -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void blit_colour_packed4444_scanline_mmxext( uint8_t *output, int width, int alpha, int luma, int cb, int cr ) @@ -1195,14 +1195,14 @@ static void blit_packed422_scanline_c( uint8_t *dest, const uint8_t *src, int wi speedy_memcpy_c( dest, src, width*2 ); } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void blit_packed422_scanline_mmx( uint8_t *dest, const uint8_t *src, int width ) { speedy_memcpy_mmx( dest, src, width*2 ); } #endif -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void blit_packed422_scanline_mmxext( uint8_t *dest, const uint8_t *src, int width ) { speedy_memcpy_mmxext( dest, src, width*2 ); @@ -1267,7 +1267,7 @@ static void composite_packed4444_alpha_to_packed422_scanline_c( uint8_t *output, } } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void composite_packed4444_alpha_to_packed422_scanline_mmxext( uint8_t *output, uint8_t *input, uint8_t *foreground, @@ -1409,7 +1409,7 @@ static void composite_packed4444_to_packed422_scanline_c( uint8_t *output, uint8 } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void composite_packed4444_to_packed422_scanline_mmxext( uint8_t *output, uint8_t *input, uint8_t *foreground, int width ) { @@ -1543,7 +1543,7 @@ static void composite_alphamask_to_packed4444_scanline_c( uint8_t *output, } } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void composite_alphamask_to_packed4444_scanline_mmxext( uint8_t *output, uint8_t *input, uint8_t *mask, @@ -1691,7 +1691,7 @@ static void premultiply_packed4444_scanline_c( uint8_t *output, uint8_t *input, } } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void premultiply_packed4444_scanline_mmxext( uint8_t *output, uint8_t *input, int width ) { const mmx_t round = { 0x0080008000800080ULL }; @@ -1747,7 +1747,7 @@ static void blend_packed422_scanline_c( uint8_t *output, uint8_t *src1, } } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void blend_packed422_scanline_mmxext( uint8_t *output, uint8_t *src1, uint8_t *src2, int width, int pos ) { @@ -1792,7 +1792,7 @@ static void blend_packed422_scanline_mmxext( uint8_t *output, uint8_t *src1, } #endif -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void quarter_blit_vertical_packed422_scanline_mmxext( uint8_t *output, uint8_t *one, uint8_t *three, int width ) { @@ -2414,7 +2414,7 @@ void setup_speedy_calls( uint32_t accel, int verbose ) vfilter_chroma_121_packed422_scanline = vfilter_chroma_121_packed422_scanline_c; vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_c; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) if( speedy_accel & MM_ACCEL_X86_MMXEXT ) { if( verbose ) { printf( "speedycode: Using MMXEXT optimized functions.\n" ); diff --git a/src/post/goom/convolve_fx.c b/src/post/goom/convolve_fx.c index e16a17862..ccd24dbd6 100644 --- a/src/post/goom/convolve_fx.c +++ b/src/post/goom/convolve_fx.c @@ -7,6 +7,10 @@ #include <stdlib.h> #include <string.h> +#if HAVE_CONFIG_H +# include "config.h" +#endif + //#define CONV_MOTIF_W 32 //#define CONV_MOTIF_WMASK 0x1f @@ -151,7 +155,8 @@ static void create_output_with_brightness(VisualFX *_this, Pixel *src, Pixel *de ytex = yprime + yi + CONV_MOTIF_W * 0x10000 / 2; yprime += c; -#ifdef HAVE_MMX +#if defined(HAVE_MMX) && ! defined(ARCH_X86_64) +/* This code uses 32-bit registers eax,ecx,esi */ __asm__ __volatile__ ("\n\t pxor %%mm7, %%mm7" /* mm7 = 0 */ "\n\t movd %0, %%mm2" diff --git a/src/post/goom/diff_against_release.patch b/src/post/goom/diff_against_release.patch index 4fc5bd972..026cc9862 100644 --- a/src/post/goom/diff_against_release.patch +++ b/src/post/goom/diff_against_release.patch @@ -1,6 +1,6 @@ diff -ru goom2k4-0/src/filters.c xine-lib/src/post/goom/filters.c ---- goom2k4-0/src/filters.c 2005-02-07 11:46:41.000000000 -0200 -+++ xine-lib/src/post/goom/filters.c 2005-07-18 12:15:50.000000000 -0300 +--- filters.c 2005-02-07 11:46:41.000000000 -0200 ++++ filters.c 2005-07-18 12:15:50.000000000 -0300 @@ -704,7 +704,7 @@ data->general_speed = 0.0f; @@ -11,8 +11,8 @@ diff -ru goom2k4-0/src/filters.c xine-lib/src/post/goom/filters.c data->hypercosEffect = 0; data->vPlaneEffect = 0; diff -ru goom2k4-0/src/goom_core.c xine-lib/src/post/goom/goom_core.c ---- goom2k4-0/src/goom_core.c 2005-02-07 11:46:41.000000000 -0200 -+++ xine-lib/src/post/goom/goom_core.c 2005-07-19 12:39:22.000000000 -0300 +--- goom_core.c 2005-02-07 11:46:41.000000000 -0200 ++++ goom_core.c 2005-07-19 12:39:22.000000000 -0300 @@ -26,6 +26,8 @@ #include "goom_fx.h" #include "goomsl.h" @@ -36,8 +36,8 @@ diff -ru goom2k4-0/src/goom_core.c xine-lib/src/post/goom/goom_core.c return (guint32*)goomInfo->outputBuf; } diff -ru goom2k4-0/src/goom_tools.c xine-lib/src/post/goom/goom_tools.c ---- goom2k4-0/src/goom_tools.c 2005-02-07 11:46:41.000000000 -0200 -+++ xine-lib/src/post/goom/goom_tools.c 2005-07-18 14:30:02.000000000 -0300 +--- goom_tools.c 2005-02-07 11:46:41.000000000 -0200 ++++ goom_tools.c 2005-07-18 14:30:02.000000000 -0300 @@ -3,7 +3,6 @@ GoomRandom *goom_random_init(int i) { @@ -47,8 +47,8 @@ diff -ru goom2k4-0/src/goom_tools.c xine-lib/src/post/goom/goom_tools.c goom_random_update_array(grandom, GOOM_NB_RAND); return grandom; diff -ru goom2k4-0/src/ifs.c xine-lib/src/post/goom/ifs.c ---- goom2k4-0/src/ifs.c 2005-02-07 11:46:41.000000000 -0200 -+++ xine-lib/src/post/goom/ifs.c 2005-07-19 14:20:20.000000000 -0300 +--- ifs.c 2005-02-07 11:46:41.000000000 -0200 ++++ ifs.c 2005-07-19 14:20:20.000000000 -0300 @@ -503,6 +503,13 @@ for (i = 0; i < 4; i++) { @@ -64,8 +64,8 @@ diff -ru goom2k4-0/src/ifs.c xine-lib/src/post/goom/ifs.c } } diff -ru goom2k4-0/src/tentacle3d.c xine-lib/src/post/goom/tentacle3d.c ---- goom2k4-0/src/tentacle3d.c 2005-02-07 11:46:41.000000000 -0200 -+++ xine-lib/src/post/goom/tentacle3d.c 2005-07-19 14:04:57.000000000 -0300 +--- tentacle3d.c 2005-02-07 11:46:41.000000000 -0200 ++++ tentacle3d.c 2005-07-19 14:04:57.000000000 -0300 @@ -10,7 +10,7 @@ #define D 256.0f @@ -76,8 +76,8 @@ diff -ru goom2k4-0/src/tentacle3d.c xine-lib/src/post/goom/tentacle3d.c typedef struct _TENTACLE_FX_DATA { diff -ru goom2k4-0/src/xmmx.c xine-lib/src/post/goom/xmmx.c ---- goom2k4-0/src/xmmx.c 2005-02-07 11:46:41.000000000 -0200 -+++ xine-lib/src/post/goom/xmmx.c 2005-07-18 15:26:23.000000000 -0300 +--- xmmx.c 2005-02-07 11:46:41.000000000 -0200 ++++ xmmx.c 2005-07-18 15:26:23.000000000 -0300 @@ -239,7 +239,11 @@ ++loop; @@ -345,8 +345,8 @@ diff -u -p -r1.13 goom_core.c *param1 = goomInfo->screen.width / 7.0f; *param2 = 6.0f * goomInfo->screen.width / 7.0f; } ---- post/goom/filters.c.orig 2005-08-20 12:29:12.000000000 +0200 -+++ post/goom/filters.c 2005-08-20 12:28:25.000000000 +0200 +--- filters.c.orig 2005-08-20 12:29:12.000000000 +0200 ++++ filters.c 2005-08-20 12:28:25.000000000 +0200 @@ -201,8 +201,8 @@ static inline v2g zoomVector(ZoomFilterF /* Noise */ if (data->noisify) @@ -358,3 +358,139 @@ diff -u -p -r1.13 goom_core.c } /* Hypercos */ +diff -r -u xine-lib-1.1.0-orig/src/post/goom/convolve_fx.c xine-lib-1.1.0/src/post/goom/convolve_fx.c +--- convolve_fx.c 2005-07-22 12:42:00.000000000 -0400 ++++ convolve_fx.c 2005-11-11 14:59:39.925112333 -0500 +@@ -7,6 +7,10 @@ + #include <stdlib.h> + #include <string.h> + ++#if HAVE_CONFIG_H ++# include "config.h" ++#endif ++ + //#define CONV_MOTIF_W 32 + //#define CONV_MOTIF_WMASK 0x1f + +@@ -151,7 +155,8 @@ + ytex = yprime + yi + CONV_MOTIF_W * 0x10000 / 2; + yprime += c; + +-#ifdef HAVE_MMX ++#if defined(HAVE_MMX) && ! defined(ARCH_X86_64) ++/* This code uses 32-bit registers eax,ecx,esi */ + __asm__ __volatile__ + ("\n\t pxor %%mm7, %%mm7" /* mm7 = 0 */ + "\n\t movd %0, %%mm2" +diff -r -u xine-lib-1.1.0-orig/src/post/goom/mmx.c xine-lib-1.1.0/src/post/goom/mmx.c +--- mmx.c 2004-07-21 10:38:30.000000000 -0400 ++++ mmx.c 2005-11-11 14:51:52.890358793 -0500 +@@ -4,6 +4,7 @@ + #define BUFFPOINTMASK 0xffff + #define BUFFINCR 0xff + ++#include <stddef.h> + #include "mmx.h" + #include "goom_graphic.h" + +@@ -23,6 +24,7 @@ + int precalCoef[16][16]) + { + unsigned int ax = (prevX-1)<<PERTEDEC, ay = (prevY-1)<<PERTEDEC; ++ size_t sizeX = prevX; + + int bufsize = prevX * prevY; + int loop; +@@ -33,7 +35,7 @@ + { + /* int couleur; */ + int px,py; +- int pos; ++ size_t pos; + int coeffs; + + int myPos = loop << 1, +@@ -91,7 +93,7 @@ + "punpckhbw %%mm7, %%mm5 \n\t" /* 00-c4-00-c4-00-c4-00-c4 */ + + /* ajouter la longueur de ligne a esi */ +- "addl 8(%%ebp),%1 \n\t" ++ "add %4,%1 \n\t" + + /* recuperation des 2 derniers pixels */ + "movq (%3,%1,4), %%mm1 \n\t" +@@ -114,8 +116,8 @@ + "packuswb %%mm7, %%mm0 \n\t" + + "movd %%mm0,%0 \n\t" +- :"=g"(expix2[loop]) +- :"r"(pos),"r"(coeffs),"r"(expix1) ++ :"=g"(expix2[loop]),"=r"(pos) ++ :"r"(coeffs),"r"(expix1),"g"(sizeX) + + ); + +diff -r -u xine-lib-1.1.0-orig/src/post/goom/mmx.h xine-lib-1.1.0/src/post/goom/mmx.h +--- mmx.h 2005-07-19 14:10:30.000000000 -0400 ++++ mmx.h 2005-11-11 14:51:52.890358793 -0500 +@@ -27,6 +27,10 @@ + #ifndef _MMX_H + #define _MMX_H + ++#ifdef HAVE_CONFIG_H ++# include "config.h" ++#endif ++ + #include "goom_graphic.h" + + /* Warning: at this writing, the version of GAS packaged +@@ -69,6 +73,9 @@ + 13 if AMD Extended MMX, &3dNow supported + 0 if hardware does not support any of these + */ ++#ifdef ARCH_X86_64 ++ return 13; ++#else + register int rval = 0; + + __asm__ __volatile__ ( +@@ -223,6 +230,7 @@ + + /* Return */ + return(rval); ++#endif + } + + /* Function to test if mmx instructions are supported... +diff -r -u xine-lib-1.1.0-orig/src/post/goom/xmmx.c xine-lib-1.1.0/src/post/goom/xmmx.c +--- xmmx.c 2005-07-21 16:48:37.000000000 -0400 ++++ xmmx.c 2005-11-11 14:51:52.890358793 -0500 +@@ -23,7 +23,12 @@ + #include "goom_graphic.h" + + int xmmx_supported (void) { ++#ifdef ARCH_X86_64 ++ return 0; /* Haven't yet converted zoom_filter_xmmx ++ to support 64-bit memory index registers (rsi,rax) */ ++#else + return (mm_support()&0x8)>>3; ++#endif + } + + void zoom_filter_xmmx (int prevX, int prevY, +@@ -31,6 +36,7 @@ + int *lbruS, int *lbruD, int buffratio, + int precalCoef[16][16]) + { ++#ifndef ARCH_X86_64 + int bufsize = prevX * prevY; /* taille du buffer */ + volatile int loop; /* variable de boucle */ + +@@ -244,6 +250,7 @@ + /*#else + emms(); + #endif*/ ++#endif /* ARCH_X86_64 */ + } + + #define DRAWMETHOD_PLUS_XMMX(_out,_backbuf,_col) \ diff --git a/src/post/goom/mmx.c b/src/post/goom/mmx.c index fdf06492a..484c76092 100644 --- a/src/post/goom/mmx.c +++ b/src/post/goom/mmx.c @@ -4,6 +4,7 @@ #define BUFFPOINTMASK 0xffff #define BUFFINCR 0xff +#include <stddef.h> #include "mmx.h" #include "goom_graphic.h" @@ -23,6 +24,7 @@ void zoom_filter_mmx (int prevX, int prevY, int precalCoef[16][16]) { unsigned int ax = (prevX-1)<<PERTEDEC, ay = (prevY-1)<<PERTEDEC; + size_t sizeX = prevX; int bufsize = prevX * prevY; int loop; @@ -33,7 +35,7 @@ void zoom_filter_mmx (int prevX, int prevY, { /* int couleur; */ int px,py; - int pos; + size_t pos; int coeffs; int myPos = loop << 1, @@ -91,7 +93,7 @@ void zoom_filter_mmx (int prevX, int prevY, "punpckhbw %%mm7, %%mm5 \n\t" /* 00-c4-00-c4-00-c4-00-c4 */ /* ajouter la longueur de ligne a esi */ - "addl 8(%%ebp),%1 \n\t" + "add %4,%1 \n\t" /* recuperation des 2 derniers pixels */ "movq (%3,%1,4), %%mm1 \n\t" @@ -114,8 +116,8 @@ void zoom_filter_mmx (int prevX, int prevY, "packuswb %%mm7, %%mm0 \n\t" "movd %%mm0,%0 \n\t" - :"=g"(expix2[loop]) - :"r"(pos),"r"(coeffs),"r"(expix1) + :"=g"(expix2[loop]),"=r"(pos) + :"r"(coeffs),"r"(expix1),"g"(sizeX) ); diff --git a/src/post/goom/mmx.h b/src/post/goom/mmx.h index 3fae26b98..b650d8b12 100755 --- a/src/post/goom/mmx.h +++ b/src/post/goom/mmx.h @@ -27,6 +27,10 @@ #ifndef _MMX_H #define _MMX_H +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + #include "goom_graphic.h" /* Warning: at this writing, the version of GAS packaged @@ -69,6 +73,9 @@ mm_support(void) 13 if AMD Extended MMX, &3dNow supported 0 if hardware does not support any of these */ +#ifdef ARCH_X86_64 + return 13; +#else register int rval = 0; __asm__ __volatile__ ( @@ -223,6 +230,7 @@ mm_support(void) /* Return */ return(rval); +#endif } /* Function to test if mmx instructions are supported... diff --git a/src/post/goom/xmmx.c b/src/post/goom/xmmx.c index b06fb4ad6..7fc9acfc8 100644 --- a/src/post/goom/xmmx.c +++ b/src/post/goom/xmmx.c @@ -23,7 +23,12 @@ #include "goom_graphic.h" int xmmx_supported (void) { +#ifdef ARCH_X86_64 + return 0; /* Haven't yet converted zoom_filter_xmmx + to support 64-bit memory index registers (rsi,rax) */ +#else return (mm_support()&0x8)>>3; +#endif } void zoom_filter_xmmx (int prevX, int prevY, @@ -31,6 +36,7 @@ void zoom_filter_xmmx (int prevX, int prevY, int *lbruS, int *lbruD, int buffratio, int precalCoef[16][16]) { +#ifndef ARCH_X86_64 int bufsize = prevX * prevY; /* taille du buffer */ volatile int loop; /* variable de boucle */ @@ -244,6 +250,7 @@ void zoom_filter_xmmx (int prevX, int prevY, /*#else emms(); #endif*/ +#endif /* ARCH_X86_64 */ } #define DRAWMETHOD_PLUS_XMMX(_out,_backbuf,_col) \ diff --git a/src/post/planar/eq.c b/src/post/planar/eq.c index 7d38c83e8..50fd03b7f 100644 --- a/src/post/planar/eq.c +++ b/src/post/planar/eq.c @@ -17,7 +17,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * - * $Id: eq.c,v 1.13 2006/01/27 07:46:14 tmattern Exp $ + * $Id: eq.c,v 1.14 2006/02/04 14:06:52 miguelfreitas Exp $ * * mplayer's eq (soft video equalizer) * Copyright (C) Richard Felker @@ -29,7 +29,7 @@ #include <pthread.h> -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, int sstride, int w, int h, int brightness, int contrast) { @@ -65,9 +65,9 @@ static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, in "paddw %%mm3, %%mm1 \n\t" "paddw %%mm3, %%mm2 \n\t" "packuswb %%mm2, %%mm1 \n\t" - "addl $8, %0 \n\t" + "add $8, %0 \n\t" "movq %%mm1, (%1) \n\t" - "addl $8, %1 \n\t" + "add $8, %1 \n\t" "decl %%eax \n\t" "jnz 1b \n\t" : "=r" (src), "=r" (dest) @@ -260,7 +260,7 @@ static post_plugin_t *eq_open_plugin(post_class_t *class_gen, int inputs, } process = process_C; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) if( xine_mm_accel() & MM_ACCEL_X86_MMX ) process = process_MMX; #endif diff --git a/src/post/planar/eq2.c b/src/post/planar/eq2.c index 6badc61d9..80821d7e6 100644 --- a/src/post/planar/eq2.c +++ b/src/post/planar/eq2.c @@ -17,7 +17,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * - * $Id: eq2.c,v 1.14 2004/04/17 19:54:32 mroi Exp $ + * $Id: eq2.c,v 1.15 2006/02/04 14:06:52 miguelfreitas Exp $ * * mplayer's eq2 (soft video equalizer) * Software equalizer (brightness, contrast, gamma, saturation) @@ -101,7 +101,7 @@ void create_lut (eq2_param_t *par) } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src, unsigned w, unsigned h, unsigned dstride, unsigned sstride) @@ -141,9 +141,9 @@ void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src, "paddw %%mm3, %%mm1 \n\t" "paddw %%mm3, %%mm2 \n\t" "packuswb %%mm2, %%mm1 \n\t" - "addl $8, %0 \n\t" + "add $8, %0 \n\t" "movq %%mm1, (%1) \n\t" - "addl $8, %1 \n\t" + "add $8, %1 \n\t" "decl %%eax \n\t" "jnz 1b \n\t" : "=r" (src), "=r" (dst) @@ -198,7 +198,7 @@ void check_values (eq2_param_t *par) if ((par->c == 1.0) && (par->b == 0.0) && (par->g == 1.0)) { par->adjust = NULL; } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) else if (par->g == 1.0 && (xine_mm_accel() & MM_ACCEL_X86_MMX) ) { par->adjust = &affine_1d_MMX; } diff --git a/src/xine-utils/cpu_accel.c b/src/xine-utils/cpu_accel.c index 75f537613..e32a0693c 100644 --- a/src/xine-utils/cpu_accel.c +++ b/src/xine-utils/cpu_accel.c @@ -326,7 +326,7 @@ uint32_t xine_mm_accel (void) #endif #endif -#if defined(ARCH_X86) || (defined(ARCH_PPC) && defined(ENABLE_ALTIVEC)) || (defined(ARCH_SPARC) && defined(ENABLE_VIS)) +#if defined(ARCH_X86) || defined(ARCH_X86_64) || (defined(ARCH_PPC) && defined(ENABLE_ALTIVEC)) || (defined(ARCH_SPARC) && defined(ENABLE_VIS)) accel |= arch_accel(); #endif diff --git a/src/xine-utils/memcpy.c b/src/xine-utils/memcpy.c index 7b5b3a245..c8825ed6e 100644 --- a/src/xine-utils/memcpy.c +++ b/src/xine-utils/memcpy.c @@ -395,7 +395,7 @@ static struct { { { NULL, NULL, 0, 0 }, { "libc memcpy()", memcpy, 0, 0 }, -#if defined(ARCH_X86) && !defined(_MSC_VER) +#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && !defined(_MSC_VER) { "linux kernel memcpy()", linux_kernel_memcpy, 0, 0 }, { "MMX optimized memcpy()", mmx_memcpy, 0, MM_MMX }, { "MMXEXT optimized memcpy()", mmx2_memcpy, 0, MM_MMXEXT }, @@ -408,7 +408,7 @@ static struct { { NULL, NULL, 0, 0 } }; -#if defined(ARCH_X86) && defined(HAVE_SYS_TIMES_H) +#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && defined(HAVE_SYS_TIMES_H) static unsigned long long int rdtsc(int config_flags) { unsigned long long int x; @@ -465,7 +465,7 @@ void xine_probe_fast_memcpy(xine_t *xine) int config_flags = -1; static char *memcpy_methods[] = { "probe", "libc", -#if defined(ARCH_X86) && !defined(_MSC_VER) +#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && !defined(_MSC_VER) "kernel", "mmx", "mmxext", "sse", #endif #if defined (ARCH_PPC) && !defined (HOST_OS_DARWIN) |