22 files changed, 236 insertions, 72 deletions
diff --git a/ChangeLog b/ChangeLog
index a5820847b..2bacd5045 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -11,6 +11,7 @@ xine-lib (1.1.2)
   * Send events for tvtime filmmode changes
   * Add an image decoder based on gdk-pixbuf
   * Add browseable capability to smb input plugin
+  * Enable AMD64 mmx/sse support in some plugins (tvtime, libmpeg2, goom...)
 
 xine-lib (1.1.1)
   * Improve sound quality when using alsa 1.0.9 or above.
diff --git a/configure.ac b/configure.ac
index 26dd1bfd0..b7e5598f5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1959,6 +1959,7 @@ case "$host_or_hostalias" in
   x86_64-*)
     AC_DEFINE_UNQUOTED(ARCH_X86_64,,[Define this if you're running x86 architecture])
     AC_DEFINE(FPM_64BIT,1,[Define to select libmad fixed point arithmetic implementation])
+    enable_ffmmx="yes"
     ;;
   *darwin*)
     HOST_OS_DARWIN=1
diff --git a/src/libmpeg2/cpu_state.c b/src/libmpeg2/cpu_state.c
index 433a85b85..07b4c5b7e 100644
--- a/src/libmpeg2/cpu_state.c
+++ b/src/libmpeg2/cpu_state.c
@@ -32,7 +32,7 @@
 void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL;
 void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL;
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void state_restore_mmx (cpu_state_t * state)
 {
     emms ();
@@ -168,7 +168,7 @@ static void state_restore_altivec (cpu_state_t * state)
 
 void mpeg2_cpu_state_init (uint32_t mm_accel)
 {
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
     if (mm_accel & MM_ACCEL_X86_MMX) {
 	mpeg2_cpu_state_restore = state_restore_mmx;
     }
diff --git a/src/libmpeg2/idct.c b/src/libmpeg2/idct.c
index ac0ad6e0e..157beea31 100644
--- a/src/libmpeg2/idct.c
+++ b/src/libmpeg2/idct.c
@@ -282,7 +282,7 @@ void mpeg2_idct_init (uint32_t mm_accel)
 {
     mpeg2_zero_block = mpeg2_zero_block_c;
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
     if (mm_accel & MM_ACCEL_X86_MMXEXT) {
 #ifdef LOG
 	fprintf (stderr, "Using MMXEXT for IDCT transform\n");
diff --git a/src/libmpeg2/idct_mmx.c b/src/libmpeg2/idct_mmx.c
index d3cc9cb0f..ce4bd064b 100644
--- a/src/libmpeg2/idct_mmx.c
+++ b/src/libmpeg2/idct_mmx.c
@@ -23,7 +23,7 @@
 
 #include "config.h"
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 
 #include <inttypes.h>
 
diff --git a/src/libmpeg2/motion_comp.c b/src/libmpeg2/motion_comp.c
index d20dceef8..8779c1296 100644
--- a/src/libmpeg2/motion_comp.c
+++ b/src/libmpeg2/motion_comp.c
@@ -42,7 +42,7 @@ void mpeg2_mc_init (uint32_t mm_accel)
     }
 #endif
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
     if (mm_accel & MM_ACCEL_X86_MMXEXT) {
 #ifdef LOG
 	fprintf (stderr, "Using MMXEXT for motion compensation\n");
diff --git a/src/post/deinterlace/plugins/greedy.c b/src/post/deinterlace/plugins/greedy.c
index 9f0e313b9..fa157258f 100644
--- a/src/post/deinterlace/plugins/greedy.c
+++ b/src/post/deinterlace/plugins/greedy.c
@@ -64,7 +64,7 @@ static void deinterlace_greedy_packed422_scanline_mmxext( uint8_t *output,
                                                           deinterlace_scanline_data_t *data,
                                                           int width )
 {
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
     mmx_t MaxComb;
     uint8_t *m0 = data->m0;
     uint8_t *t1 = data->t1;
diff --git a/src/post/deinterlace/plugins/greedy2frame_template.c b/src/post/deinterlace/plugins/greedy2frame_template.c
index 7b68d6e46..42c575f58 100644
--- a/src/post/deinterlace/plugins/greedy2frame_template.c
+++ b/src/post/deinterlace/plugins/greedy2frame_template.c
@@ -1,5 +1,5 @@
 /*****************************************************************************
-** $Id: greedy2frame_template.c,v 1.8 2005/06/05 16:00:06 miguelfreitas Exp $
+** $Id: greedy2frame_template.c,v 1.9 2006/02/04 14:06:29 miguelfreitas Exp $
 ******************************************************************************
 ** Copyright (c) 2000 John Adcock, Tom Barry, Steve Grimm  All rights reserved.
 ** port copyright (c) 2003 Miguel Freitas
@@ -19,6 +19,10 @@
 ** CVS Log
 **
 ** $Log: greedy2frame_template.c,v $
+** Revision 1.9  2006/02/04 14:06:29  miguelfreitas
+** Enable AMD64 mmx/sse support in some plugins (tvtime, libmpeg2, goom...)
+** patch by dani3l
+**
 ** Revision 1.8  2005/06/05 16:00:06  miguelfreitas
 ** quite some hacks for gcc 2.95 compatibility
 **
@@ -112,7 +116,7 @@ static void DeinterlaceGreedy2Frame_MMX(uint8_t *output, int outstride,
                                  int bottom_field, int second_field, int width, int height )
 #endif
 {
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
     int Line;
     int stride = width * 2;
     register uint8_t* M1;
diff --git a/src/post/deinterlace/plugins/greedyh.asm b/src/post/deinterlace/plugins/greedyh.asm
index a885b1dd3..0bbd745aa 100644
--- a/src/post/deinterlace/plugins/greedyh.asm
+++ b/src/post/deinterlace/plugins/greedyh.asm
@@ -323,7 +323,7 @@ static void FUNCT_NAME(uint8_t *output, int outstride,
     }
 
     // clear out the MMX registers ready for doing floating point again
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
     __asm__ __volatile__ ("emms\n\t");
 #endif
 }
diff --git a/src/post/deinterlace/plugins/linearblend.c b/src/post/deinterlace/plugins/linearblend.c
index d8ecacefc..3b65e381c 100644
--- a/src/post/deinterlace/plugins/linearblend.c
+++ b/src/post/deinterlace/plugins/linearblend.c
@@ -45,7 +45,7 @@ static void deinterlace_scanline_linear_blend( uint8_t *output,
     uint8_t *t0 = data->t0;
     uint8_t *b0 = data->b0;
     uint8_t *m1 = data->m1;
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
     int i;
 
     // Get width in bytes.
@@ -110,7 +110,7 @@ static void deinterlace_scanline_linear_blend2( uint8_t *output,
     uint8_t *t1 = data->t1;
     uint8_t *b1 = data->b1;
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
     int i;
 
     // Get width in bytes.
@@ -167,7 +167,7 @@ static void deinterlace_scanline_linear_blend2( uint8_t *output,
 #endif
 }
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 
 /* MMXEXT version is about 15% faster with Athlon XP [MF] */
 
@@ -337,7 +337,7 @@ static deinterlace_method_t linearblendmethod =
     "BlurTemporal",
 */
     2,
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
     MM_ACCEL_X86_MMX,
 #else
     0,
@@ -362,7 +362,7 @@ static deinterlace_method_t linearblendmethod =
 
 deinterlace_method_t *linearblend_get_method( void )
 {
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
     if( xine_mm_accel() & MM_ACCEL_X86_MMXEXT )
       return &linearblendmethod_mmxext;
     else
diff --git a/src/post/deinterlace/plugins/tomsmocomp/TomsMoCompAll.inc b/src/post/deinterlace/plugins/tomsmocomp/TomsMoCompAll.inc
index 0fd3f451f..5870d77be 100644
--- a/src/post/deinterlace/plugins/tomsmocomp/TomsMoCompAll.inc
+++ b/src/post/deinterlace/plugins/tomsmocomp/TomsMoCompAll.inc
@@ -246,7 +246,7 @@ static void FUNCT_NAME(uint8_t *output, int outstride,
 	}
 
 end:
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
     __asm__ __volatile__("emms");
 #endif
     return;
diff --git a/src/post/deinterlace/plugins/vfir.c b/src/post/deinterlace/plugins/vfir.c
index 6809b2244..3bf03a4a0 100644
--- a/src/post/deinterlace/plugins/vfir.c
+++ b/src/post/deinterlace/plugins/vfir.c
@@ -51,7 +51,7 @@ static void deinterlace_line( uint8_t *dst, uint8_t *lum_m4,
                               uint8_t *lum_m3, uint8_t *lum_m2,
                               uint8_t *lum_m1, uint8_t *lum, int size )
 {
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
     mmx_t rounder;
 
     rounder.uw[0]=4;
@@ -137,7 +137,7 @@ static deinterlace_method_t vfirmethod =
     "BlurVertical",
 */
     1,
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
     MM_ACCEL_X86_MMXEXT,
 #else
     0,
diff --git a/src/post/deinterlace/speedy.c b/src/post/deinterlace/speedy.c
index 4d3b9710a..0cf07258b 100644
--- a/src/post/deinterlace/speedy.c
+++ b/src/post/deinterlace/speedy.c
@@ -178,7 +178,7 @@ static inline __attribute__ ((always_inline,const)) uint8_t clip255( int x )
 
 static unsigned long CombJaggieThreshold = 73;
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static unsigned int comb_factor_packed422_scanline_mmx( uint8_t *top, uint8_t *mid,
                                                         uint8_t *bot, int width )
 {
@@ -304,7 +304,7 @@ static unsigned int diff_factor_packed422_scanline_test_c( uint8_t *cur, uint8_t
 }
 */
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *old, int width )
 {
     const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL };
@@ -345,7 +345,7 @@ static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *o
 
 #define ABS(a) (((a) < 0)?-(a):(a))
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void diff_packed422_block8x8_mmx( pulldown_metrics_t *m, uint8_t *old,
                                          uint8_t *new, int os, int ns )
 {
@@ -586,7 +586,7 @@ static void packed422_to_packed444_rec601_scanline_c( uint8_t *dest, uint8_t *sr
     }
 }
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void vfilter_chroma_121_packed422_scanline_mmx( uint8_t *output, int width,
                                                        uint8_t *m, uint8_t *t, uint8_t *b )
 {
@@ -652,7 +652,7 @@ static void vfilter_chroma_121_packed422_scanline_c( uint8_t *output, int width,
     }
 }
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void vfilter_chroma_332_packed422_scanline_mmx( uint8_t *output, int width,
                                                        uint8_t *m, uint8_t *t, uint8_t *b )
 {
@@ -726,7 +726,7 @@ static void vfilter_chroma_332_packed422_scanline_c( uint8_t *output, int width,
     }
 }
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void kill_chroma_packed422_inplace_scanline_mmx( uint8_t *data, int width )
 {
     const mmx_t ymask = { 0x00ff00ff00ff00ffULL };
@@ -758,7 +758,7 @@ static void kill_chroma_packed422_inplace_scanline_c( uint8_t *data, int width )
     }
 }
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void invert_colour_packed422_inplace_scanline_mmx( uint8_t *data, int width )
 {
     const mmx_t allones = { 0xffffffffffffffffULL };
@@ -885,7 +885,7 @@ static void interpolate_packed422_scanline_c( uint8_t *output, uint8_t *top,
     }
 }
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void interpolate_packed422_scanline_mmx( uint8_t *output, uint8_t *top,
                                                 uint8_t *bot, int width )
 {
@@ -955,7 +955,7 @@ static void interpolate_packed422_scanline_mmx( uint8_t *output, uint8_t *top,
 }
 #endif
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void interpolate_packed422_scanline_mmxext( uint8_t *output, uint8_t *top,
                                                    uint8_t *bot, int width )
 {
@@ -1015,7 +1015,7 @@ static void blit_colour_packed422_scanline_c( uint8_t *output, int width, int y,
     }
 }
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void blit_colour_packed422_scanline_mmx( uint8_t *output, int width, int y, int cb, int cr )
 {
     uint32_t colour = cr << 24 | y << 16 | cb << 8 | y;
@@ -1055,7 +1055,7 @@ static void blit_colour_packed422_scanline_mmx( uint8_t *output, int width, int
 }
 #endif
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void blit_colour_packed422_scanline_mmxext( uint8_t *output, int width, int y, int cb, int cr )
 {
     uint32_t colour = cr << 24 | y << 16 | cb << 8 | y;
@@ -1109,7 +1109,7 @@ static void blit_colour_packed4444_scanline_c( uint8_t *output, int width,
     }
 }
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void blit_colour_packed4444_scanline_mmx( uint8_t *output, int width,
                                                  int alpha, int luma,
                                                  int cb, int cr )
@@ -1146,7 +1146,7 @@ static void blit_colour_packed4444_scanline_mmx( uint8_t *output, int width,
 }
 #endif
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void blit_colour_packed4444_scanline_mmxext( uint8_t *output, int width,
                                                     int alpha, int luma,
                                                     int cb, int cr )
@@ -1195,14 +1195,14 @@ static void blit_packed422_scanline_c( uint8_t *dest, const uint8_t *src, int wi
     speedy_memcpy_c( dest, src, width*2 );
 }
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void blit_packed422_scanline_mmx( uint8_t *dest, const uint8_t *src, int width )
 {
     speedy_memcpy_mmx( dest, src, width*2 );
 }
 #endif
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void blit_packed422_scanline_mmxext( uint8_t *dest, const uint8_t *src, int width )
 {
     speedy_memcpy_mmxext( dest, src, width*2 );
@@ -1267,7 +1267,7 @@ static void composite_packed4444_alpha_to_packed422_scanline_c( uint8_t *output,
     }
 }
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void composite_packed4444_alpha_to_packed422_scanline_mmxext( uint8_t *output,
                                                                      uint8_t *input,
                                                                      uint8_t *foreground,
@@ -1409,7 +1409,7 @@ static void composite_packed4444_to_packed422_scanline_c( uint8_t *output, uint8
 }
 
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void composite_packed4444_to_packed422_scanline_mmxext( uint8_t *output, uint8_t *input,
                                                                uint8_t *foreground, int width )
 {
@@ -1543,7 +1543,7 @@ static void composite_alphamask_to_packed4444_scanline_c( uint8_t *output,
     }
 }
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void composite_alphamask_to_packed4444_scanline_mmxext( uint8_t *output,
                                                                uint8_t *input,
                                                                uint8_t *mask,
@@ -1691,7 +1691,7 @@ static void premultiply_packed4444_scanline_c( uint8_t *output, uint8_t *input,
     }
 }
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void premultiply_packed4444_scanline_mmxext( uint8_t *output, uint8_t *input, int width )
 {
     const mmx_t round  = { 0x0080008000800080ULL };
@@ -1747,7 +1747,7 @@ static void blend_packed422_scanline_c( uint8_t *output, uint8_t *src1,
     }
 }
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void blend_packed422_scanline_mmxext( uint8_t *output, uint8_t *src1,
                                              uint8_t *src2, int width, int pos )
 {
@@ -1792,7 +1792,7 @@ static void blend_packed422_scanline_mmxext( uint8_t *output, uint8_t *src1,
 }
 #endif
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void quarter_blit_vertical_packed422_scanline_mmxext( uint8_t *output, uint8_t *one,
                                                              uint8_t *three, int width )
 {
@@ -2414,7 +2414,7 @@ void setup_speedy_calls( uint32_t accel, int verbose )
     vfilter_chroma_121_packed422_scanline = vfilter_chroma_121_packed422_scanline_c;
     vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_c;
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
     if( speedy_accel & MM_ACCEL_X86_MMXEXT ) {
         if( verbose ) {
             printf( "speedycode: Using MMXEXT optimized functions.\n" );
diff --git a/src/post/goom/convolve_fx.c b/src/post/goom/convolve_fx.c
index e16a17862..ccd24dbd6 100644
--- a/src/post/goom/convolve_fx.c
+++ b/src/post/goom/convolve_fx.c
@@ -7,6 +7,10 @@
 #include <stdlib.h>
 #include <string.h>
 
+#if HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
 //#define CONV_MOTIF_W 32
 //#define CONV_MOTIF_WMASK 0x1f
 
@@ -151,7 +155,8 @@ static void create_output_with_brightness(VisualFX *_this, Pixel *src, Pixel *de
     ytex = yprime + yi + CONV_MOTIF_W * 0x10000 / 2;
     yprime += c;
 
-#ifdef HAVE_MMX
+#if defined(HAVE_MMX) && ! defined(ARCH_X86_64)
+/* This code uses 32-bit registers eax,ecx,esi */
     __asm__ __volatile__
       ("\n\t pxor  %%mm7,  %%mm7"  /* mm7 = 0   */
        "\n\t movd %0,  %%mm2"
diff --git a/src/post/goom/diff_against_release.patch b/src/post/goom/diff_against_release.patch
index 4fc5bd972..026cc9862 100644
--- a/src/post/goom/diff_against_release.patch
+++ b/src/post/goom/diff_against_release.patch
@@ -1,6 +1,6 @@
 diff -ru goom2k4-0/src/filters.c xine-lib/src/post/goom/filters.c
---- goom2k4-0/src/filters.c	2005-02-07 11:46:41.000000000 -0200
-+++ xine-lib/src/post/goom/filters.c	2005-07-18 12:15:50.000000000 -0300
+--- filters.c	2005-02-07 11:46:41.000000000 -0200
++++ filters.c	2005-07-18 12:15:50.000000000 -0300
 @@ -704,7 +704,7 @@
      
      data->general_speed = 0.0f;
@@ -11,8 +11,8 @@ diff -ru goom2k4-0/src/filters.c xine-lib/src/post/goom/filters.c
      data->hypercosEffect = 0;
      data->vPlaneEffect = 0;
 diff -ru goom2k4-0/src/goom_core.c xine-lib/src/post/goom/goom_core.c
---- goom2k4-0/src/goom_core.c	2005-02-07 11:46:41.000000000 -0200
-+++ xine-lib/src/post/goom/goom_core.c	2005-07-19 12:39:22.000000000 -0300
+--- goom_core.c	2005-02-07 11:46:41.000000000 -0200
++++ goom_core.c	2005-07-19 12:39:22.000000000 -0300
 @@ -26,6 +26,8 @@
  #include "goom_fx.h"
  #include "goomsl.h"
@@ -36,8 +36,8 @@ diff -ru goom2k4-0/src/goom_core.c xine-lib/src/post/goom/goom_core.c
          return (guint32*)goomInfo->outputBuf;
  }
 diff -ru goom2k4-0/src/goom_tools.c xine-lib/src/post/goom/goom_tools.c
---- goom2k4-0/src/goom_tools.c	2005-02-07 11:46:41.000000000 -0200
-+++ xine-lib/src/post/goom/goom_tools.c	2005-07-18 14:30:02.000000000 -0300
+--- goom_tools.c	2005-02-07 11:46:41.000000000 -0200
++++ goom_tools.c	2005-07-18 14:30:02.000000000 -0300
 @@ -3,7 +3,6 @@
  
  GoomRandom *goom_random_init(int i) {
@@ -47,8 +47,8 @@ diff -ru goom2k4-0/src/goom_tools.c xine-lib/src/post/goom/goom_tools.c
  	goom_random_update_array(grandom, GOOM_NB_RAND);
  	return grandom;
 diff -ru goom2k4-0/src/ifs.c xine-lib/src/post/goom/ifs.c
---- goom2k4-0/src/ifs.c	2005-02-07 11:46:41.000000000 -0200
-+++ xine-lib/src/post/goom/ifs.c	2005-07-19 14:20:20.000000000 -0300
+--- ifs.c	2005-02-07 11:46:41.000000000 -0200
++++ ifs.c	2005-07-19 14:20:20.000000000 -0300
 @@ -503,6 +503,13 @@
  
  		for (i = 0; i < 4; i++) {
@@ -64,8 +64,8 @@ diff -ru goom2k4-0/src/ifs.c xine-lib/src/post/goom/ifs.c
  		}
  	}
 diff -ru goom2k4-0/src/tentacle3d.c xine-lib/src/post/goom/tentacle3d.c
---- goom2k4-0/src/tentacle3d.c	2005-02-07 11:46:41.000000000 -0200
-+++ xine-lib/src/post/goom/tentacle3d.c	2005-07-19 14:04:57.000000000 -0300
+--- tentacle3d.c	2005-02-07 11:46:41.000000000 -0200
++++ tentacle3d.c	2005-07-19 14:04:57.000000000 -0300
 @@ -10,7 +10,7 @@
  #define D 256.0f
  
@@ -76,8 +76,8 @@ diff -ru goom2k4-0/src/tentacle3d.c xine-lib/src/post/goom/tentacle3d.c
  
  typedef struct _TENTACLE_FX_DATA {
 diff -ru goom2k4-0/src/xmmx.c xine-lib/src/post/goom/xmmx.c
---- goom2k4-0/src/xmmx.c	2005-02-07 11:46:41.000000000 -0200
-+++ xine-lib/src/post/goom/xmmx.c	2005-07-18 15:26:23.000000000 -0300
+--- xmmx.c	2005-02-07 11:46:41.000000000 -0200
++++ xmmx.c	2005-07-18 15:26:23.000000000 -0300
 @@ -239,7 +239,11 @@
  
  		++loop;
@@ -345,8 +345,8 @@ diff -u -p -r1.13 goom_core.c
                  *param1 = goomInfo->screen.width / 7.0f;
                  *param2 = 6.0f * goomInfo->screen.width / 7.0f;
              }
---- post/goom/filters.c.orig	2005-08-20 12:29:12.000000000 +0200
-+++ post/goom/filters.c	2005-08-20 12:28:25.000000000 +0200
+--- filters.c.orig	2005-08-20 12:29:12.000000000 +0200
++++ filters.c	2005-08-20 12:28:25.000000000 +0200
 @@ -201,8 +201,8 @@ static inline v2g zoomVector(ZoomFilterF
      /* Noise */
      if (data->noisify)
@@ -358,3 +358,139 @@ diff -u -p -r1.13 goom_core.c
      }
      
      /* Hypercos */
+diff -r -u xine-lib-1.1.0-orig/src/post/goom/convolve_fx.c xine-lib-1.1.0/src/post/goom/convolve_fx.c
+--- convolve_fx.c	2005-07-22 12:42:00.000000000 -0400
++++ convolve_fx.c	2005-11-11 14:59:39.925112333 -0500
+@@ -7,6 +7,10 @@
+ #include <stdlib.h>
+ #include <string.h>
+ 
++#if HAVE_CONFIG_H
++#  include "config.h"
++#endif
++
+ //#define CONV_MOTIF_W 32
+ //#define CONV_MOTIF_WMASK 0x1f
+ 
+@@ -151,7 +155,8 @@
+     ytex = yprime + yi + CONV_MOTIF_W * 0x10000 / 2;
+     yprime += c;
+ 
+-#ifdef HAVE_MMX
++#if defined(HAVE_MMX) && ! defined(ARCH_X86_64)
++/* This code uses 32-bit registers eax,ecx,esi */
+     __asm__ __volatile__
+       ("\n\t pxor  %%mm7,  %%mm7"  /* mm7 = 0   */
+        "\n\t movd %0,  %%mm2"
+diff -r -u xine-lib-1.1.0-orig/src/post/goom/mmx.c xine-lib-1.1.0/src/post/goom/mmx.c
+--- mmx.c	2004-07-21 10:38:30.000000000 -0400
++++ mmx.c	2005-11-11 14:51:52.890358793 -0500
+@@ -4,6 +4,7 @@
+ #define BUFFPOINTMASK 0xffff
+ #define BUFFINCR 0xff
+ 
++#include <stddef.h>
+ #include "mmx.h"
+ #include "goom_graphic.h"
+ 
+@@ -23,6 +24,7 @@
+ 		      int precalCoef[16][16])
+ {
+ 	unsigned int ax = (prevX-1)<<PERTEDEC, ay = (prevY-1)<<PERTEDEC;
++        size_t sizeX = prevX;
+ 
+ 	int bufsize = prevX * prevY;
+ 	int loop;
+@@ -33,7 +35,7 @@
+ 	{
+ 		/*      int couleur; */
+ 		int px,py;
+-		int pos;
++		size_t pos;
+ 		int coeffs;
+ 
+ 		int myPos = loop << 1,
+@@ -91,7 +93,7 @@
+ 		"punpckhbw %%mm7, %%mm5 \n\t"	/* 00-c4-00-c4-00-c4-00-c4 */
+ 
+ 		/* ajouter la longueur de ligne a esi */
+-		"addl 8(%%ebp),%1 \n\t"
++		"add %4,%1 \n\t"
+ 
+ 		/* recuperation des 2 derniers pixels */
+ 		"movq (%3,%1,4), %%mm1 \n\t"
+@@ -114,8 +116,8 @@
+ 		"packuswb %%mm7, %%mm0 \n\t"
+ 
+ 		"movd %%mm0,%0 \n\t"
+-		  :"=g"(expix2[loop])
+-		  :"r"(pos),"r"(coeffs),"r"(expix1)
++		  :"=g"(expix2[loop]),"=r"(pos)
++		  :"r"(coeffs),"r"(expix1),"g"(sizeX)
+ 
+ 		);
+ 
+diff -r -u xine-lib-1.1.0-orig/src/post/goom/mmx.h xine-lib-1.1.0/src/post/goom/mmx.h
+--- mmx.h	2005-07-19 14:10:30.000000000 -0400
++++ mmx.h	2005-11-11 14:51:52.890358793 -0500
+@@ -27,6 +27,10 @@
+ #ifndef _MMX_H
+ #define _MMX_H
+ 
++#ifdef HAVE_CONFIG_H
++# include "config.h"
++#endif
++
+ #include "goom_graphic.h"
+ 
+ /*	Warning:  at this writing, the version of GAS packaged
+@@ -69,6 +73,9 @@
+ 		 13 if AMD Extended MMX, &3dNow supported
+ 	   0 if hardware does not support any of these
+ 	*/
++#ifdef ARCH_X86_64
++	return 13;
++#else
+ 	register int rval = 0;
+ 
+ 	__asm__ __volatile__ (
+@@ -223,6 +230,7 @@
+ 
+ 	/* Return */
+ 	return(rval);
++#endif
+ }
+ 
+ /*	Function to test if mmx instructions are supported...
+diff -r -u xine-lib-1.1.0-orig/src/post/goom/xmmx.c xine-lib-1.1.0/src/post/goom/xmmx.c
+--- xmmx.c	2005-07-21 16:48:37.000000000 -0400
++++ xmmx.c	2005-11-11 14:51:52.890358793 -0500
+@@ -23,7 +23,12 @@
+ #include "goom_graphic.h"
+ 
+ int xmmx_supported (void) {
++#ifdef ARCH_X86_64
++	return 0; /* Haven't yet converted zoom_filter_xmmx 
++                     to support 64-bit memory index registers (rsi,rax) */
++#else
+ 	return (mm_support()&0x8)>>3;
++#endif
+ }
+ 
+ void zoom_filter_xmmx (int prevX, int prevY,
+@@ -31,6 +36,7 @@
+                        int *lbruS, int *lbruD, int buffratio,
+                        int precalCoef[16][16])
+ {
++#ifndef ARCH_X86_64
+ 	int bufsize = prevX * prevY; /* taille du buffer */
+ 	volatile int loop;                    /* variable de boucle */
+ 
+@@ -244,6 +250,7 @@
+ /*#else
+ 	emms();
+ #endif*/
++#endif /* ARCH_X86_64 */
+ }
+ 
+ #define DRAWMETHOD_PLUS_XMMX(_out,_backbuf,_col) \
diff --git a/src/post/goom/mmx.c b/src/post/goom/mmx.c
index fdf06492a..484c76092 100644
--- a/src/post/goom/mmx.c
+++ b/src/post/goom/mmx.c
@@ -4,6 +4,7 @@
 #define BUFFPOINTMASK 0xffff
 #define BUFFINCR 0xff
 
+#include <stddef.h>
 #include "mmx.h"
 #include "goom_graphic.h"
 
@@ -23,6 +24,7 @@ void zoom_filter_mmx (int prevX, int prevY,
 		      int precalCoef[16][16])
 {
 	unsigned int ax = (prevX-1)<<PERTEDEC, ay = (prevY-1)<<PERTEDEC;
+        size_t sizeX = prevX;
 
 	int bufsize = prevX * prevY;
 	int loop;
@@ -33,7 +35,7 @@ void zoom_filter_mmx (int prevX, int prevY,
 	{
 		/*      int couleur; */
 		int px,py;
-		int pos;
+		size_t pos;
 		int coeffs;
 
 		int myPos = loop << 1,
@@ -91,7 +93,7 @@ void zoom_filter_mmx (int prevX, int prevY,
 		"punpckhbw %%mm7, %%mm5 \n\t"	/* 00-c4-00-c4-00-c4-00-c4 */
 
 		/* ajouter la longueur de ligne a esi */
-		"addl 8(%%ebp),%1 \n\t"
+		"add %4,%1 \n\t"
 
 		/* recuperation des 2 derniers pixels */
 		"movq (%3,%1,4), %%mm1 \n\t"
@@ -114,8 +116,8 @@ void zoom_filter_mmx (int prevX, int prevY,
 		"packuswb %%mm7, %%mm0 \n\t"
 
 		"movd %%mm0,%0 \n\t"
-		  :"=g"(expix2[loop])
-		  :"r"(pos),"r"(coeffs),"r"(expix1)
+		  :"=g"(expix2[loop]),"=r"(pos)
+		  :"r"(coeffs),"r"(expix1),"g"(sizeX)
 
 		);
 
diff --git a/src/post/goom/mmx.h b/src/post/goom/mmx.h
index 3fae26b98..b650d8b12 100755
--- a/src/post/goom/mmx.h
+++ b/src/post/goom/mmx.h
@@ -27,6 +27,10 @@
 #ifndef _MMX_H
 #define _MMX_H
 
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
 #include "goom_graphic.h"
 
 /*	Warning:  at this writing, the version of GAS packaged
@@ -69,6 +73,9 @@ mm_support(void)
 		 13 if AMD Extended MMX, &3dNow supported
 	   0 if hardware does not support any of these
 	*/
+#ifdef ARCH_X86_64
+	return 13;
+#else
 	register int rval = 0;
 
 	__asm__ __volatile__ (
@@ -223,6 +230,7 @@ mm_support(void)
 
 	/* Return */
 	return(rval);
+#endif
 }
 
 /*	Function to test if mmx instructions are supported...
diff --git a/src/post/goom/xmmx.c b/src/post/goom/xmmx.c
index b06fb4ad6..7fc9acfc8 100644
--- a/src/post/goom/xmmx.c
+++ b/src/post/goom/xmmx.c
@@ -23,7 +23,12 @@
 #include "goom_graphic.h"
 
 int xmmx_supported (void) {
+#ifdef ARCH_X86_64
+	return 0; /* Haven't yet converted zoom_filter_xmmx 
+                     to support 64-bit memory index registers (rsi,rax) */
+#else
 	return (mm_support()&0x8)>>3;
+#endif
 }
 
 void zoom_filter_xmmx (int prevX, int prevY,
@@ -31,6 +36,7 @@ void zoom_filter_xmmx (int prevX, int prevY,
                        int *lbruS, int *lbruD, int buffratio,
                        int precalCoef[16][16])
 {
+#ifndef ARCH_X86_64
 	int bufsize = prevX * prevY; /* taille du buffer */
 	volatile int loop;                    /* variable de boucle */
 
@@ -244,6 +250,7 @@ void zoom_filter_xmmx (int prevX, int prevY,
 /*#else
 	emms();
 #endif*/
+#endif /* ARCH_X86_64 */
 }
 
 #define DRAWMETHOD_PLUS_XMMX(_out,_backbuf,_col) \
diff --git a/src/post/planar/eq.c b/src/post/planar/eq.c
index 7d38c83e8..50fd03b7f 100644
--- a/src/post/planar/eq.c
+++ b/src/post/planar/eq.c
@@ -17,7 +17,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
  *
- * $Id: eq.c,v 1.13 2006/01/27 07:46:14 tmattern Exp $
+ * $Id: eq.c,v 1.14 2006/02/04 14:06:52 miguelfreitas Exp $
  *
  * mplayer's eq (soft video equalizer)
  * Copyright (C) Richard Felker
@@ -29,7 +29,7 @@
 #include <pthread.h>
 
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, int sstride,
 		    int w, int h, int brightness, int contrast)
 {
@@ -65,9 +65,9 @@ static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, in
 			"paddw %%mm3, %%mm1 \n\t"
 			"paddw %%mm3, %%mm2 \n\t"
 			"packuswb %%mm2, %%mm1 \n\t"
-			"addl $8, %0 \n\t"
+			"add $8, %0 \n\t"
 			"movq %%mm1, (%1) \n\t"
-			"addl $8, %1 \n\t"
+			"add $8, %1 \n\t"
 			"decl %%eax \n\t"
 			"jnz 1b \n\t"
 			: "=r" (src), "=r" (dest)
@@ -260,7 +260,7 @@ static post_plugin_t *eq_open_plugin(post_class_t *class_gen, int inputs,
   }
 
   process = process_C;
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
   if( xine_mm_accel() & MM_ACCEL_X86_MMX ) 
     process = process_MMX;
 #endif
diff --git a/src/post/planar/eq2.c b/src/post/planar/eq2.c
index 6badc61d9..80821d7e6 100644
--- a/src/post/planar/eq2.c
+++ b/src/post/planar/eq2.c
@@ -17,7 +17,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
  *
- * $Id: eq2.c,v 1.14 2004/04/17 19:54:32 mroi Exp $
+ * $Id: eq2.c,v 1.15 2006/02/04 14:06:52 miguelfreitas Exp $
  *
  * mplayer's eq2 (soft video equalizer)
  * Software equalizer (brightness, contrast, gamma, saturation)
@@ -101,7 +101,7 @@ void create_lut (eq2_param_t *par)
 }
 
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 static
 void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src,
   unsigned w, unsigned h, unsigned dstride, unsigned sstride)
@@ -141,9 +141,9 @@ void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src,
       "paddw %%mm3, %%mm1 \n\t"
       "paddw %%mm3, %%mm2 \n\t"
       "packuswb %%mm2, %%mm1 \n\t"
-      "addl $8, %0 \n\t"
+      "add $8, %0 \n\t"
       "movq %%mm1, (%1) \n\t"
-      "addl $8, %1 \n\t"
+      "add $8, %1 \n\t"
       "decl %%eax \n\t"
       "jnz 1b \n\t"
       : "=r" (src), "=r" (dst)
@@ -198,7 +198,7 @@ void check_values (eq2_param_t *par)
   if ((par->c == 1.0) && (par->b == 0.0) && (par->g == 1.0)) {
     par->adjust = NULL;
   }
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
   else if (par->g == 1.0 && (xine_mm_accel() & MM_ACCEL_X86_MMX) ) {
     par->adjust = &affine_1d_MMX;
   }
diff --git a/src/xine-utils/cpu_accel.c b/src/xine-utils/cpu_accel.c
index 75f537613..e32a0693c 100644
--- a/src/xine-utils/cpu_accel.c
+++ b/src/xine-utils/cpu_accel.c
@@ -326,7 +326,7 @@ uint32_t xine_mm_accel (void)
 #endif
 #endif
 
-#if defined(ARCH_X86) || (defined(ARCH_PPC) && defined(ENABLE_ALTIVEC)) || (defined(ARCH_SPARC) && defined(ENABLE_VIS))
+#if defined(ARCH_X86) || defined(ARCH_X86_64) || (defined(ARCH_PPC) && defined(ENABLE_ALTIVEC)) || (defined(ARCH_SPARC) && defined(ENABLE_VIS))
     accel |= arch_accel();
 #endif
 
diff --git a/src/xine-utils/memcpy.c b/src/xine-utils/memcpy.c
index 7b5b3a245..c8825ed6e 100644
--- a/src/xine-utils/memcpy.c
+++ b/src/xine-utils/memcpy.c
@@ -395,7 +395,7 @@ static struct {
 {
   { NULL, NULL, 0, 0 },
   { "libc memcpy()", memcpy, 0, 0 },
-#if defined(ARCH_X86) && !defined(_MSC_VER)
+#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && !defined(_MSC_VER)
   { "linux kernel memcpy()", linux_kernel_memcpy, 0, 0 },
   { "MMX optimized memcpy()", mmx_memcpy, 0, MM_MMX },
   { "MMXEXT optimized memcpy()", mmx2_memcpy, 0, MM_MMXEXT },
@@ -408,7 +408,7 @@ static struct {
   { NULL, NULL, 0, 0 }
 };
 
-#if defined(ARCH_X86) && defined(HAVE_SYS_TIMES_H)
+#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && defined(HAVE_SYS_TIMES_H)
 static unsigned long long int rdtsc(int config_flags)
 {
   unsigned long long int x;
@@ -465,7 +465,7 @@ void xine_probe_fast_memcpy(xine_t *xine)
   int               config_flags = -1;
   static char      *memcpy_methods[] = {
     "probe", "libc",
-#if defined(ARCH_X86) && !defined(_MSC_VER)
+#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && !defined(_MSC_VER)
     "kernel", "mmx", "mmxext", "sse",
 #endif
 #if defined (ARCH_PPC) && !defined (HOST_OS_DARWIN)