summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog1
-rw-r--r--configure.ac1
-rw-r--r--src/libmpeg2/cpu_state.c4
-rw-r--r--src/libmpeg2/idct.c2
-rw-r--r--src/libmpeg2/idct_mmx.c2
-rw-r--r--src/libmpeg2/motion_comp.c2
-rw-r--r--src/post/deinterlace/plugins/greedy.c2
-rw-r--r--src/post/deinterlace/plugins/greedy2frame_template.c8
-rw-r--r--src/post/deinterlace/plugins/greedyh.asm2
-rw-r--r--src/post/deinterlace/plugins/linearblend.c10
-rw-r--r--src/post/deinterlace/plugins/tomsmocomp/TomsMoCompAll.inc2
-rw-r--r--src/post/deinterlace/plugins/vfir.c4
-rw-r--r--src/post/deinterlace/speedy.c44
-rw-r--r--src/post/goom/convolve_fx.c7
-rw-r--r--src/post/goom/diff_against_release.patch164
-rw-r--r--src/post/goom/mmx.c10
-rwxr-xr-xsrc/post/goom/mmx.h8
-rw-r--r--src/post/goom/xmmx.c7
-rw-r--r--src/post/planar/eq.c10
-rw-r--r--src/post/planar/eq2.c10
-rw-r--r--src/xine-utils/cpu_accel.c2
-rw-r--r--src/xine-utils/memcpy.c6
22 files changed, 236 insertions, 72 deletions
diff --git a/ChangeLog b/ChangeLog
index a5820847b..2bacd5045 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -11,6 +11,7 @@ xine-lib (1.1.2)
* Send events for tvtime filmmode changes
* Add an image decoder based on gdk-pixbuf
* Add browseable capability to smb input plugin
+ * Enable AMD64 mmx/sse support in some plugins (tvtime, libmpeg2, goom...)
xine-lib (1.1.1)
* Improve sound quality when using alsa 1.0.9 or above.
diff --git a/configure.ac b/configure.ac
index 26dd1bfd0..b7e5598f5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1959,6 +1959,7 @@ case "$host_or_hostalias" in
x86_64-*)
AC_DEFINE_UNQUOTED(ARCH_X86_64,,[Define this if you're running x86 architecture])
AC_DEFINE(FPM_64BIT,1,[Define to select libmad fixed point arithmetic implementation])
+ enable_ffmmx="yes"
;;
*darwin*)
HOST_OS_DARWIN=1
diff --git a/src/libmpeg2/cpu_state.c b/src/libmpeg2/cpu_state.c
index 433a85b85..07b4c5b7e 100644
--- a/src/libmpeg2/cpu_state.c
+++ b/src/libmpeg2/cpu_state.c
@@ -32,7 +32,7 @@
void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL;
void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL;
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void state_restore_mmx (cpu_state_t * state)
{
emms ();
@@ -168,7 +168,7 @@ static void state_restore_altivec (cpu_state_t * state)
void mpeg2_cpu_state_init (uint32_t mm_accel)
{
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
if (mm_accel & MM_ACCEL_X86_MMX) {
mpeg2_cpu_state_restore = state_restore_mmx;
}
diff --git a/src/libmpeg2/idct.c b/src/libmpeg2/idct.c
index ac0ad6e0e..157beea31 100644
--- a/src/libmpeg2/idct.c
+++ b/src/libmpeg2/idct.c
@@ -282,7 +282,7 @@ void mpeg2_idct_init (uint32_t mm_accel)
{
mpeg2_zero_block = mpeg2_zero_block_c;
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
if (mm_accel & MM_ACCEL_X86_MMXEXT) {
#ifdef LOG
fprintf (stderr, "Using MMXEXT for IDCT transform\n");
diff --git a/src/libmpeg2/idct_mmx.c b/src/libmpeg2/idct_mmx.c
index d3cc9cb0f..ce4bd064b 100644
--- a/src/libmpeg2/idct_mmx.c
+++ b/src/libmpeg2/idct_mmx.c
@@ -23,7 +23,7 @@
#include "config.h"
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
#include <inttypes.h>
diff --git a/src/libmpeg2/motion_comp.c b/src/libmpeg2/motion_comp.c
index d20dceef8..8779c1296 100644
--- a/src/libmpeg2/motion_comp.c
+++ b/src/libmpeg2/motion_comp.c
@@ -42,7 +42,7 @@ void mpeg2_mc_init (uint32_t mm_accel)
}
#endif
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
if (mm_accel & MM_ACCEL_X86_MMXEXT) {
#ifdef LOG
fprintf (stderr, "Using MMXEXT for motion compensation\n");
diff --git a/src/post/deinterlace/plugins/greedy.c b/src/post/deinterlace/plugins/greedy.c
index 9f0e313b9..fa157258f 100644
--- a/src/post/deinterlace/plugins/greedy.c
+++ b/src/post/deinterlace/plugins/greedy.c
@@ -64,7 +64,7 @@ static void deinterlace_greedy_packed422_scanline_mmxext( uint8_t *output,
deinterlace_scanline_data_t *data,
int width )
{
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
mmx_t MaxComb;
uint8_t *m0 = data->m0;
uint8_t *t1 = data->t1;
diff --git a/src/post/deinterlace/plugins/greedy2frame_template.c b/src/post/deinterlace/plugins/greedy2frame_template.c
index 7b68d6e46..42c575f58 100644
--- a/src/post/deinterlace/plugins/greedy2frame_template.c
+++ b/src/post/deinterlace/plugins/greedy2frame_template.c
@@ -1,5 +1,5 @@
/*****************************************************************************
-** $Id: greedy2frame_template.c,v 1.8 2005/06/05 16:00:06 miguelfreitas Exp $
+** $Id: greedy2frame_template.c,v 1.9 2006/02/04 14:06:29 miguelfreitas Exp $
******************************************************************************
** Copyright (c) 2000 John Adcock, Tom Barry, Steve Grimm All rights reserved.
** port copyright (c) 2003 Miguel Freitas
@@ -19,6 +19,10 @@
** CVS Log
**
** $Log: greedy2frame_template.c,v $
+** Revision 1.9 2006/02/04 14:06:29 miguelfreitas
+** Enable AMD64 mmx/sse support in some plugins (tvtime, libmpeg2, goom...)
+** patch by dani3l
+**
** Revision 1.8 2005/06/05 16:00:06 miguelfreitas
** quite some hacks for gcc 2.95 compatibility
**
@@ -112,7 +116,7 @@ static void DeinterlaceGreedy2Frame_MMX(uint8_t *output, int outstride,
int bottom_field, int second_field, int width, int height )
#endif
{
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
int Line;
int stride = width * 2;
register uint8_t* M1;
diff --git a/src/post/deinterlace/plugins/greedyh.asm b/src/post/deinterlace/plugins/greedyh.asm
index a885b1dd3..0bbd745aa 100644
--- a/src/post/deinterlace/plugins/greedyh.asm
+++ b/src/post/deinterlace/plugins/greedyh.asm
@@ -323,7 +323,7 @@ static void FUNCT_NAME(uint8_t *output, int outstride,
}
// clear out the MMX registers ready for doing floating point again
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
__asm__ __volatile__ ("emms\n\t");
#endif
}
diff --git a/src/post/deinterlace/plugins/linearblend.c b/src/post/deinterlace/plugins/linearblend.c
index d8ecacefc..3b65e381c 100644
--- a/src/post/deinterlace/plugins/linearblend.c
+++ b/src/post/deinterlace/plugins/linearblend.c
@@ -45,7 +45,7 @@ static void deinterlace_scanline_linear_blend( uint8_t *output,
uint8_t *t0 = data->t0;
uint8_t *b0 = data->b0;
uint8_t *m1 = data->m1;
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
int i;
// Get width in bytes.
@@ -110,7 +110,7 @@ static void deinterlace_scanline_linear_blend2( uint8_t *output,
uint8_t *t1 = data->t1;
uint8_t *b1 = data->b1;
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
int i;
// Get width in bytes.
@@ -167,7 +167,7 @@ static void deinterlace_scanline_linear_blend2( uint8_t *output,
#endif
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
/* MMXEXT version is about 15% faster with Athlon XP [MF] */
@@ -337,7 +337,7 @@ static deinterlace_method_t linearblendmethod =
"BlurTemporal",
*/
2,
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
MM_ACCEL_X86_MMX,
#else
0,
@@ -362,7 +362,7 @@ static deinterlace_method_t linearblendmethod =
deinterlace_method_t *linearblend_get_method( void )
{
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
if( xine_mm_accel() & MM_ACCEL_X86_MMXEXT )
return &linearblendmethod_mmxext;
else
diff --git a/src/post/deinterlace/plugins/tomsmocomp/TomsMoCompAll.inc b/src/post/deinterlace/plugins/tomsmocomp/TomsMoCompAll.inc
index 0fd3f451f..5870d77be 100644
--- a/src/post/deinterlace/plugins/tomsmocomp/TomsMoCompAll.inc
+++ b/src/post/deinterlace/plugins/tomsmocomp/TomsMoCompAll.inc
@@ -246,7 +246,7 @@ static void FUNCT_NAME(uint8_t *output, int outstride,
}
end:
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
__asm__ __volatile__("emms");
#endif
return;
diff --git a/src/post/deinterlace/plugins/vfir.c b/src/post/deinterlace/plugins/vfir.c
index 6809b2244..3bf03a4a0 100644
--- a/src/post/deinterlace/plugins/vfir.c
+++ b/src/post/deinterlace/plugins/vfir.c
@@ -51,7 +51,7 @@ static void deinterlace_line( uint8_t *dst, uint8_t *lum_m4,
uint8_t *lum_m3, uint8_t *lum_m2,
uint8_t *lum_m1, uint8_t *lum, int size )
{
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
mmx_t rounder;
rounder.uw[0]=4;
@@ -137,7 +137,7 @@ static deinterlace_method_t vfirmethod =
"BlurVertical",
*/
1,
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
MM_ACCEL_X86_MMXEXT,
#else
0,
diff --git a/src/post/deinterlace/speedy.c b/src/post/deinterlace/speedy.c
index 4d3b9710a..0cf07258b 100644
--- a/src/post/deinterlace/speedy.c
+++ b/src/post/deinterlace/speedy.c
@@ -178,7 +178,7 @@ static inline __attribute__ ((always_inline,const)) uint8_t clip255( int x )
static unsigned long CombJaggieThreshold = 73;
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static unsigned int comb_factor_packed422_scanline_mmx( uint8_t *top, uint8_t *mid,
uint8_t *bot, int width )
{
@@ -304,7 +304,7 @@ static unsigned int diff_factor_packed422_scanline_test_c( uint8_t *cur, uint8_t
}
*/
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *old, int width )
{
const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL };
@@ -345,7 +345,7 @@ static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *o
#define ABS(a) (((a) < 0)?-(a):(a))
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void diff_packed422_block8x8_mmx( pulldown_metrics_t *m, uint8_t *old,
uint8_t *new, int os, int ns )
{
@@ -586,7 +586,7 @@ static void packed422_to_packed444_rec601_scanline_c( uint8_t *dest, uint8_t *sr
}
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void vfilter_chroma_121_packed422_scanline_mmx( uint8_t *output, int width,
uint8_t *m, uint8_t *t, uint8_t *b )
{
@@ -652,7 +652,7 @@ static void vfilter_chroma_121_packed422_scanline_c( uint8_t *output, int width,
}
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void vfilter_chroma_332_packed422_scanline_mmx( uint8_t *output, int width,
uint8_t *m, uint8_t *t, uint8_t *b )
{
@@ -726,7 +726,7 @@ static void vfilter_chroma_332_packed422_scanline_c( uint8_t *output, int width,
}
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void kill_chroma_packed422_inplace_scanline_mmx( uint8_t *data, int width )
{
const mmx_t ymask = { 0x00ff00ff00ff00ffULL };
@@ -758,7 +758,7 @@ static void kill_chroma_packed422_inplace_scanline_c( uint8_t *data, int width )
}
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void invert_colour_packed422_inplace_scanline_mmx( uint8_t *data, int width )
{
const mmx_t allones = { 0xffffffffffffffffULL };
@@ -885,7 +885,7 @@ static void interpolate_packed422_scanline_c( uint8_t *output, uint8_t *top,
}
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void interpolate_packed422_scanline_mmx( uint8_t *output, uint8_t *top,
uint8_t *bot, int width )
{
@@ -955,7 +955,7 @@ static void interpolate_packed422_scanline_mmx( uint8_t *output, uint8_t *top,
}
#endif
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void interpolate_packed422_scanline_mmxext( uint8_t *output, uint8_t *top,
uint8_t *bot, int width )
{
@@ -1015,7 +1015,7 @@ static void blit_colour_packed422_scanline_c( uint8_t *output, int width, int y,
}
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void blit_colour_packed422_scanline_mmx( uint8_t *output, int width, int y, int cb, int cr )
{
uint32_t colour = cr << 24 | y << 16 | cb << 8 | y;
@@ -1055,7 +1055,7 @@ static void blit_colour_packed422_scanline_mmx( uint8_t *output, int width, int
}
#endif
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void blit_colour_packed422_scanline_mmxext( uint8_t *output, int width, int y, int cb, int cr )
{
uint32_t colour = cr << 24 | y << 16 | cb << 8 | y;
@@ -1109,7 +1109,7 @@ static void blit_colour_packed4444_scanline_c( uint8_t *output, int width,
}
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void blit_colour_packed4444_scanline_mmx( uint8_t *output, int width,
int alpha, int luma,
int cb, int cr )
@@ -1146,7 +1146,7 @@ static void blit_colour_packed4444_scanline_mmx( uint8_t *output, int width,
}
#endif
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void blit_colour_packed4444_scanline_mmxext( uint8_t *output, int width,
int alpha, int luma,
int cb, int cr )
@@ -1195,14 +1195,14 @@ static void blit_packed422_scanline_c( uint8_t *dest, const uint8_t *src, int wi
speedy_memcpy_c( dest, src, width*2 );
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void blit_packed422_scanline_mmx( uint8_t *dest, const uint8_t *src, int width )
{
speedy_memcpy_mmx( dest, src, width*2 );
}
#endif
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void blit_packed422_scanline_mmxext( uint8_t *dest, const uint8_t *src, int width )
{
speedy_memcpy_mmxext( dest, src, width*2 );
@@ -1267,7 +1267,7 @@ static void composite_packed4444_alpha_to_packed422_scanline_c( uint8_t *output,
}
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void composite_packed4444_alpha_to_packed422_scanline_mmxext( uint8_t *output,
uint8_t *input,
uint8_t *foreground,
@@ -1409,7 +1409,7 @@ static void composite_packed4444_to_packed422_scanline_c( uint8_t *output, uint8
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void composite_packed4444_to_packed422_scanline_mmxext( uint8_t *output, uint8_t *input,
uint8_t *foreground, int width )
{
@@ -1543,7 +1543,7 @@ static void composite_alphamask_to_packed4444_scanline_c( uint8_t *output,
}
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void composite_alphamask_to_packed4444_scanline_mmxext( uint8_t *output,
uint8_t *input,
uint8_t *mask,
@@ -1691,7 +1691,7 @@ static void premultiply_packed4444_scanline_c( uint8_t *output, uint8_t *input,
}
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void premultiply_packed4444_scanline_mmxext( uint8_t *output, uint8_t *input, int width )
{
const mmx_t round = { 0x0080008000800080ULL };
@@ -1747,7 +1747,7 @@ static void blend_packed422_scanline_c( uint8_t *output, uint8_t *src1,
}
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void blend_packed422_scanline_mmxext( uint8_t *output, uint8_t *src1,
uint8_t *src2, int width, int pos )
{
@@ -1792,7 +1792,7 @@ static void blend_packed422_scanline_mmxext( uint8_t *output, uint8_t *src1,
}
#endif
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void quarter_blit_vertical_packed422_scanline_mmxext( uint8_t *output, uint8_t *one,
uint8_t *three, int width )
{
@@ -2414,7 +2414,7 @@ void setup_speedy_calls( uint32_t accel, int verbose )
vfilter_chroma_121_packed422_scanline = vfilter_chroma_121_packed422_scanline_c;
vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_c;
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
if( speedy_accel & MM_ACCEL_X86_MMXEXT ) {
if( verbose ) {
printf( "speedycode: Using MMXEXT optimized functions.\n" );
diff --git a/src/post/goom/convolve_fx.c b/src/post/goom/convolve_fx.c
index e16a17862..ccd24dbd6 100644
--- a/src/post/goom/convolve_fx.c
+++ b/src/post/goom/convolve_fx.c
@@ -7,6 +7,10 @@
#include <stdlib.h>
#include <string.h>
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
//#define CONV_MOTIF_W 32
//#define CONV_MOTIF_WMASK 0x1f
@@ -151,7 +155,8 @@ static void create_output_with_brightness(VisualFX *_this, Pixel *src, Pixel *de
ytex = yprime + yi + CONV_MOTIF_W * 0x10000 / 2;
yprime += c;
-#ifdef HAVE_MMX
+#if defined(HAVE_MMX) && ! defined(ARCH_X86_64)
+/* This code uses 32-bit registers eax,ecx,esi */
__asm__ __volatile__
("\n\t pxor %%mm7, %%mm7" /* mm7 = 0 */
"\n\t movd %0, %%mm2"
diff --git a/src/post/goom/diff_against_release.patch b/src/post/goom/diff_against_release.patch
index 4fc5bd972..026cc9862 100644
--- a/src/post/goom/diff_against_release.patch
+++ b/src/post/goom/diff_against_release.patch
@@ -1,6 +1,6 @@
diff -ru goom2k4-0/src/filters.c xine-lib/src/post/goom/filters.c
---- goom2k4-0/src/filters.c 2005-02-07 11:46:41.000000000 -0200
-+++ xine-lib/src/post/goom/filters.c 2005-07-18 12:15:50.000000000 -0300
+--- filters.c 2005-02-07 11:46:41.000000000 -0200
++++ filters.c 2005-07-18 12:15:50.000000000 -0300
@@ -704,7 +704,7 @@
data->general_speed = 0.0f;
@@ -11,8 +11,8 @@ diff -ru goom2k4-0/src/filters.c xine-lib/src/post/goom/filters.c
data->hypercosEffect = 0;
data->vPlaneEffect = 0;
diff -ru goom2k4-0/src/goom_core.c xine-lib/src/post/goom/goom_core.c
---- goom2k4-0/src/goom_core.c 2005-02-07 11:46:41.000000000 -0200
-+++ xine-lib/src/post/goom/goom_core.c 2005-07-19 12:39:22.000000000 -0300
+--- goom_core.c 2005-02-07 11:46:41.000000000 -0200
++++ goom_core.c 2005-07-19 12:39:22.000000000 -0300
@@ -26,6 +26,8 @@
#include "goom_fx.h"
#include "goomsl.h"
@@ -36,8 +36,8 @@ diff -ru goom2k4-0/src/goom_core.c xine-lib/src/post/goom/goom_core.c
return (guint32*)goomInfo->outputBuf;
}
diff -ru goom2k4-0/src/goom_tools.c xine-lib/src/post/goom/goom_tools.c
---- goom2k4-0/src/goom_tools.c 2005-02-07 11:46:41.000000000 -0200
-+++ xine-lib/src/post/goom/goom_tools.c 2005-07-18 14:30:02.000000000 -0300
+--- goom_tools.c 2005-02-07 11:46:41.000000000 -0200
++++ goom_tools.c 2005-07-18 14:30:02.000000000 -0300
@@ -3,7 +3,6 @@
GoomRandom *goom_random_init(int i) {
@@ -47,8 +47,8 @@ diff -ru goom2k4-0/src/goom_tools.c xine-lib/src/post/goom/goom_tools.c
goom_random_update_array(grandom, GOOM_NB_RAND);
return grandom;
diff -ru goom2k4-0/src/ifs.c xine-lib/src/post/goom/ifs.c
---- goom2k4-0/src/ifs.c 2005-02-07 11:46:41.000000000 -0200
-+++ xine-lib/src/post/goom/ifs.c 2005-07-19 14:20:20.000000000 -0300
+--- ifs.c 2005-02-07 11:46:41.000000000 -0200
++++ ifs.c 2005-07-19 14:20:20.000000000 -0300
@@ -503,6 +503,13 @@
for (i = 0; i < 4; i++) {
@@ -64,8 +64,8 @@ diff -ru goom2k4-0/src/ifs.c xine-lib/src/post/goom/ifs.c
}
}
diff -ru goom2k4-0/src/tentacle3d.c xine-lib/src/post/goom/tentacle3d.c
---- goom2k4-0/src/tentacle3d.c 2005-02-07 11:46:41.000000000 -0200
-+++ xine-lib/src/post/goom/tentacle3d.c 2005-07-19 14:04:57.000000000 -0300
+--- tentacle3d.c 2005-02-07 11:46:41.000000000 -0200
++++ tentacle3d.c 2005-07-19 14:04:57.000000000 -0300
@@ -10,7 +10,7 @@
#define D 256.0f
@@ -76,8 +76,8 @@ diff -ru goom2k4-0/src/tentacle3d.c xine-lib/src/post/goom/tentacle3d.c
typedef struct _TENTACLE_FX_DATA {
diff -ru goom2k4-0/src/xmmx.c xine-lib/src/post/goom/xmmx.c
---- goom2k4-0/src/xmmx.c 2005-02-07 11:46:41.000000000 -0200
-+++ xine-lib/src/post/goom/xmmx.c 2005-07-18 15:26:23.000000000 -0300
+--- xmmx.c 2005-02-07 11:46:41.000000000 -0200
++++ xmmx.c 2005-07-18 15:26:23.000000000 -0300
@@ -239,7 +239,11 @@
++loop;
@@ -345,8 +345,8 @@ diff -u -p -r1.13 goom_core.c
*param1 = goomInfo->screen.width / 7.0f;
*param2 = 6.0f * goomInfo->screen.width / 7.0f;
}
---- post/goom/filters.c.orig 2005-08-20 12:29:12.000000000 +0200
-+++ post/goom/filters.c 2005-08-20 12:28:25.000000000 +0200
+--- filters.c.orig 2005-08-20 12:29:12.000000000 +0200
++++ filters.c 2005-08-20 12:28:25.000000000 +0200
@@ -201,8 +201,8 @@ static inline v2g zoomVector(ZoomFilterF
/* Noise */
if (data->noisify)
@@ -358,3 +358,139 @@ diff -u -p -r1.13 goom_core.c
}
/* Hypercos */
+diff -r -u xine-lib-1.1.0-orig/src/post/goom/convolve_fx.c xine-lib-1.1.0/src/post/goom/convolve_fx.c
+--- convolve_fx.c 2005-07-22 12:42:00.000000000 -0400
++++ convolve_fx.c 2005-11-11 14:59:39.925112333 -0500
+@@ -7,6 +7,10 @@
+ #include <stdlib.h>
+ #include <string.h>
+
++#if HAVE_CONFIG_H
++# include "config.h"
++#endif
++
+ //#define CONV_MOTIF_W 32
+ //#define CONV_MOTIF_WMASK 0x1f
+
+@@ -151,7 +155,8 @@
+ ytex = yprime + yi + CONV_MOTIF_W * 0x10000 / 2;
+ yprime += c;
+
+-#ifdef HAVE_MMX
++#if defined(HAVE_MMX) && ! defined(ARCH_X86_64)
++/* This code uses 32-bit registers eax,ecx,esi */
+ __asm__ __volatile__
+ ("\n\t pxor %%mm7, %%mm7" /* mm7 = 0 */
+ "\n\t movd %0, %%mm2"
+diff -r -u xine-lib-1.1.0-orig/src/post/goom/mmx.c xine-lib-1.1.0/src/post/goom/mmx.c
+--- mmx.c 2004-07-21 10:38:30.000000000 -0400
++++ mmx.c 2005-11-11 14:51:52.890358793 -0500
+@@ -4,6 +4,7 @@
+ #define BUFFPOINTMASK 0xffff
+ #define BUFFINCR 0xff
+
++#include <stddef.h>
+ #include "mmx.h"
+ #include "goom_graphic.h"
+
+@@ -23,6 +24,7 @@
+ int precalCoef[16][16])
+ {
+ unsigned int ax = (prevX-1)<<PERTEDEC, ay = (prevY-1)<<PERTEDEC;
++ size_t sizeX = prevX;
+
+ int bufsize = prevX * prevY;
+ int loop;
+@@ -33,7 +35,7 @@
+ {
+ /* int couleur; */
+ int px,py;
+- int pos;
++ size_t pos;
+ int coeffs;
+
+ int myPos = loop << 1,
+@@ -91,7 +93,7 @@
+ "punpckhbw %%mm7, %%mm5 \n\t" /* 00-c4-00-c4-00-c4-00-c4 */
+
+ /* ajouter la longueur de ligne a esi */
+- "addl 8(%%ebp),%1 \n\t"
++ "add %4,%1 \n\t"
+
+ /* recuperation des 2 derniers pixels */
+ "movq (%3,%1,4), %%mm1 \n\t"
+@@ -114,8 +116,8 @@
+ "packuswb %%mm7, %%mm0 \n\t"
+
+ "movd %%mm0,%0 \n\t"
+- :"=g"(expix2[loop])
+- :"r"(pos),"r"(coeffs),"r"(expix1)
++ :"=g"(expix2[loop]),"=r"(pos)
++ :"r"(coeffs),"r"(expix1),"g"(sizeX)
+
+ );
+
+diff -r -u xine-lib-1.1.0-orig/src/post/goom/mmx.h xine-lib-1.1.0/src/post/goom/mmx.h
+--- mmx.h 2005-07-19 14:10:30.000000000 -0400
++++ mmx.h 2005-11-11 14:51:52.890358793 -0500
+@@ -27,6 +27,10 @@
+ #ifndef _MMX_H
+ #define _MMX_H
+
++#ifdef HAVE_CONFIG_H
++# include "config.h"
++#endif
++
+ #include "goom_graphic.h"
+
+ /* Warning: at this writing, the version of GAS packaged
+@@ -69,6 +73,9 @@
+ 13 if AMD Extended MMX, &3dNow supported
+ 0 if hardware does not support any of these
+ */
++#ifdef ARCH_X86_64
++ return 13;
++#else
+ register int rval = 0;
+
+ __asm__ __volatile__ (
+@@ -223,6 +230,7 @@
+
+ /* Return */
+ return(rval);
++#endif
+ }
+
+ /* Function to test if mmx instructions are supported...
+diff -r -u xine-lib-1.1.0-orig/src/post/goom/xmmx.c xine-lib-1.1.0/src/post/goom/xmmx.c
+--- xmmx.c 2005-07-21 16:48:37.000000000 -0400
++++ xmmx.c 2005-11-11 14:51:52.890358793 -0500
+@@ -23,7 +23,12 @@
+ #include "goom_graphic.h"
+
+ int xmmx_supported (void) {
++#ifdef ARCH_X86_64
++ return 0; /* Haven't yet converted zoom_filter_xmmx
++ to support 64-bit memory index registers (rsi,rax) */
++#else
+ return (mm_support()&0x8)>>3;
++#endif
+ }
+
+ void zoom_filter_xmmx (int prevX, int prevY,
+@@ -31,6 +36,7 @@
+ int *lbruS, int *lbruD, int buffratio,
+ int precalCoef[16][16])
+ {
++#ifndef ARCH_X86_64
+ int bufsize = prevX * prevY; /* taille du buffer */
+ volatile int loop; /* variable de boucle */
+
+@@ -244,6 +250,7 @@
+ /*#else
+ emms();
+ #endif*/
++#endif /* ARCH_X86_64 */
+ }
+
+ #define DRAWMETHOD_PLUS_XMMX(_out,_backbuf,_col) \
diff --git a/src/post/goom/mmx.c b/src/post/goom/mmx.c
index fdf06492a..484c76092 100644
--- a/src/post/goom/mmx.c
+++ b/src/post/goom/mmx.c
@@ -4,6 +4,7 @@
#define BUFFPOINTMASK 0xffff
#define BUFFINCR 0xff
+#include <stddef.h>
#include "mmx.h"
#include "goom_graphic.h"
@@ -23,6 +24,7 @@ void zoom_filter_mmx (int prevX, int prevY,
int precalCoef[16][16])
{
unsigned int ax = (prevX-1)<<PERTEDEC, ay = (prevY-1)<<PERTEDEC;
+ size_t sizeX = prevX;
int bufsize = prevX * prevY;
int loop;
@@ -33,7 +35,7 @@ void zoom_filter_mmx (int prevX, int prevY,
{
/* int couleur; */
int px,py;
- int pos;
+ size_t pos;
int coeffs;
int myPos = loop << 1,
@@ -91,7 +93,7 @@ void zoom_filter_mmx (int prevX, int prevY,
"punpckhbw %%mm7, %%mm5 \n\t" /* 00-c4-00-c4-00-c4-00-c4 */
/* ajouter la longueur de ligne a esi */
- "addl 8(%%ebp),%1 \n\t"
+ "add %4,%1 \n\t"
/* recuperation des 2 derniers pixels */
"movq (%3,%1,4), %%mm1 \n\t"
@@ -114,8 +116,8 @@ void zoom_filter_mmx (int prevX, int prevY,
"packuswb %%mm7, %%mm0 \n\t"
"movd %%mm0,%0 \n\t"
- :"=g"(expix2[loop])
- :"r"(pos),"r"(coeffs),"r"(expix1)
+ :"=g"(expix2[loop]),"=r"(pos)
+ :"r"(coeffs),"r"(expix1),"g"(sizeX)
);
diff --git a/src/post/goom/mmx.h b/src/post/goom/mmx.h
index 3fae26b98..b650d8b12 100755
--- a/src/post/goom/mmx.h
+++ b/src/post/goom/mmx.h
@@ -27,6 +27,10 @@
#ifndef _MMX_H
#define _MMX_H
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
#include "goom_graphic.h"
/* Warning: at this writing, the version of GAS packaged
@@ -69,6 +73,9 @@ mm_support(void)
13 if AMD Extended MMX, &3dNow supported
0 if hardware does not support any of these
*/
+#ifdef ARCH_X86_64
+ return 13;
+#else
register int rval = 0;
__asm__ __volatile__ (
@@ -223,6 +230,7 @@ mm_support(void)
/* Return */
return(rval);
+#endif
}
/* Function to test if mmx instructions are supported...
diff --git a/src/post/goom/xmmx.c b/src/post/goom/xmmx.c
index b06fb4ad6..7fc9acfc8 100644
--- a/src/post/goom/xmmx.c
+++ b/src/post/goom/xmmx.c
@@ -23,7 +23,12 @@
#include "goom_graphic.h"
int xmmx_supported (void) {
+#ifdef ARCH_X86_64
+ return 0; /* Haven't yet converted zoom_filter_xmmx
+ to support 64-bit memory index registers (rsi,rax) */
+#else
return (mm_support()&0x8)>>3;
+#endif
}
void zoom_filter_xmmx (int prevX, int prevY,
@@ -31,6 +36,7 @@ void zoom_filter_xmmx (int prevX, int prevY,
int *lbruS, int *lbruD, int buffratio,
int precalCoef[16][16])
{
+#ifndef ARCH_X86_64
int bufsize = prevX * prevY; /* taille du buffer */
volatile int loop; /* variable de boucle */
@@ -244,6 +250,7 @@ void zoom_filter_xmmx (int prevX, int prevY,
/*#else
emms();
#endif*/
+#endif /* ARCH_X86_64 */
}
#define DRAWMETHOD_PLUS_XMMX(_out,_backbuf,_col) \
diff --git a/src/post/planar/eq.c b/src/post/planar/eq.c
index 7d38c83e8..50fd03b7f 100644
--- a/src/post/planar/eq.c
+++ b/src/post/planar/eq.c
@@ -17,7 +17,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*
- * $Id: eq.c,v 1.13 2006/01/27 07:46:14 tmattern Exp $
+ * $Id: eq.c,v 1.14 2006/02/04 14:06:52 miguelfreitas Exp $
*
* mplayer's eq (soft video equalizer)
* Copyright (C) Richard Felker
@@ -29,7 +29,7 @@
#include <pthread.h>
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, int sstride,
int w, int h, int brightness, int contrast)
{
@@ -65,9 +65,9 @@ static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, in
"paddw %%mm3, %%mm1 \n\t"
"paddw %%mm3, %%mm2 \n\t"
"packuswb %%mm2, %%mm1 \n\t"
- "addl $8, %0 \n\t"
+ "add $8, %0 \n\t"
"movq %%mm1, (%1) \n\t"
- "addl $8, %1 \n\t"
+ "add $8, %1 \n\t"
"decl %%eax \n\t"
"jnz 1b \n\t"
: "=r" (src), "=r" (dest)
@@ -260,7 +260,7 @@ static post_plugin_t *eq_open_plugin(post_class_t *class_gen, int inputs,
}
process = process_C;
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
if( xine_mm_accel() & MM_ACCEL_X86_MMX )
process = process_MMX;
#endif
diff --git a/src/post/planar/eq2.c b/src/post/planar/eq2.c
index 6badc61d9..80821d7e6 100644
--- a/src/post/planar/eq2.c
+++ b/src/post/planar/eq2.c
@@ -17,7 +17,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*
- * $Id: eq2.c,v 1.14 2004/04/17 19:54:32 mroi Exp $
+ * $Id: eq2.c,v 1.15 2006/02/04 14:06:52 miguelfreitas Exp $
*
* mplayer's eq2 (soft video equalizer)
* Software equalizer (brightness, contrast, gamma, saturation)
@@ -101,7 +101,7 @@ void create_lut (eq2_param_t *par)
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
static
void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src,
unsigned w, unsigned h, unsigned dstride, unsigned sstride)
@@ -141,9 +141,9 @@ void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src,
"paddw %%mm3, %%mm1 \n\t"
"paddw %%mm3, %%mm2 \n\t"
"packuswb %%mm2, %%mm1 \n\t"
- "addl $8, %0 \n\t"
+ "add $8, %0 \n\t"
"movq %%mm1, (%1) \n\t"
- "addl $8, %1 \n\t"
+ "add $8, %1 \n\t"
"decl %%eax \n\t"
"jnz 1b \n\t"
: "=r" (src), "=r" (dst)
@@ -198,7 +198,7 @@ void check_values (eq2_param_t *par)
if ((par->c == 1.0) && (par->b == 0.0) && (par->g == 1.0)) {
par->adjust = NULL;
}
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
else if (par->g == 1.0 && (xine_mm_accel() & MM_ACCEL_X86_MMX) ) {
par->adjust = &affine_1d_MMX;
}
diff --git a/src/xine-utils/cpu_accel.c b/src/xine-utils/cpu_accel.c
index 75f537613..e32a0693c 100644
--- a/src/xine-utils/cpu_accel.c
+++ b/src/xine-utils/cpu_accel.c
@@ -326,7 +326,7 @@ uint32_t xine_mm_accel (void)
#endif
#endif
-#if defined(ARCH_X86) || (defined(ARCH_PPC) && defined(ENABLE_ALTIVEC)) || (defined(ARCH_SPARC) && defined(ENABLE_VIS))
+#if defined(ARCH_X86) || defined(ARCH_X86_64) || (defined(ARCH_PPC) && defined(ENABLE_ALTIVEC)) || (defined(ARCH_SPARC) && defined(ENABLE_VIS))
accel |= arch_accel();
#endif
diff --git a/src/xine-utils/memcpy.c b/src/xine-utils/memcpy.c
index 7b5b3a245..c8825ed6e 100644
--- a/src/xine-utils/memcpy.c
+++ b/src/xine-utils/memcpy.c
@@ -395,7 +395,7 @@ static struct {
{
{ NULL, NULL, 0, 0 },
{ "libc memcpy()", memcpy, 0, 0 },
-#if defined(ARCH_X86) && !defined(_MSC_VER)
+#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && !defined(_MSC_VER)
{ "linux kernel memcpy()", linux_kernel_memcpy, 0, 0 },
{ "MMX optimized memcpy()", mmx_memcpy, 0, MM_MMX },
{ "MMXEXT optimized memcpy()", mmx2_memcpy, 0, MM_MMXEXT },
@@ -408,7 +408,7 @@ static struct {
{ NULL, NULL, 0, 0 }
};
-#if defined(ARCH_X86) && defined(HAVE_SYS_TIMES_H)
+#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && defined(HAVE_SYS_TIMES_H)
static unsigned long long int rdtsc(int config_flags)
{
unsigned long long int x;
@@ -465,7 +465,7 @@ void xine_probe_fast_memcpy(xine_t *xine)
int config_flags = -1;
static char *memcpy_methods[] = {
"probe", "libc",
-#if defined(ARCH_X86) && !defined(_MSC_VER)
+#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && !defined(_MSC_VER)
"kernel", "mmx", "mmxext", "sse",
#endif
#if defined (ARCH_PPC) && !defined (HOST_OS_DARWIN)