diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/post/goom/ifs_display.c | 15 | ||||
| -rw-r--r-- | src/post/goom/zoom_filter_mmx.c | 81 | 
2 files changed, 37 insertions, 59 deletions
| diff --git a/src/post/goom/ifs_display.c b/src/post/goom/ifs_display.c index 657a74063..53530ccf6 100644 --- a/src/post/goom/ifs_display.c +++ b/src/post/goom/ifs_display.c @@ -18,25 +18,12 @@ ifs_fun_mmx(guint32 * data, guint32 * back, int width, int height,  		if ((x < width) && (y < height) && (x > 0) && (y > 0)) {  			int     pos = x + (y * width); -/*			register int b asm ("eax"); - -			b = back[pos]; -			__asm__ -				(" -				 movd %%eax, %%mm0 -				 movd %%edx, %%mm1 -				 paddusb %%mm1, %%mm0 -				 movd %%mm0, %%eax -				 ":: -				 "edx" (couleursl)); -			data[pos] = b; -*/  			movd_m2r(back[pos],mm0);  			paddusb_r2r(mm1,mm0);  			movd_r2m(mm0,data[pos]);  		}  	} -	__asm__ __volatile__ ("emms"); +	emms();  }  #endif diff --git a/src/post/goom/zoom_filter_mmx.c b/src/post/goom/zoom_filter_mmx.c index ff7a44214..a7a5ffd8f 100644 --- a/src/post/goom/zoom_filter_mmx.c +++ b/src/post/goom/zoom_filter_mmx.c @@ -1,4 +1,6 @@  #include "goom_config.h" +#include "xineutils.h" +  #ifdef MMX  #define BUFFPOINTNB 16  #define BUFFPOINTMASK 0xffff @@ -20,7 +22,7 @@ void zoom_filter_mmx (int prevX, int prevY,    int bufsize = prevX * prevY;    int loop; -  __asm__ ("pxor %mm7,%mm7"); +  pxor_r2r(mm7,mm7);    for (loop=0; loop<bufsize; loop++)  	{ @@ -45,75 +47,64 @@ void zoom_filter_mmx (int prevX, int prevY,  		/* coef en modulo 15 */  		coeffs = precalCoef [px & PERTEMASK][py & PERTEMASK];  	  } - -	  __asm__ __volatile__ (" -               movd %%eax,%%mm6 +   +		movd_m2r(coeffs, mm6);  			   ;/* recuperation des deux premiers pixels dans mm0 et mm1 */ -	   	       movq (%%edx,%%ebx,4), %%mm0		/* b1-v1-r1-a1-b2-v2-r2-a2 */ -			   movq %%mm0, %%mm1				/* b1-v1-r1-a1-b2-v2-r2-a2 */ +		movq_m2r(expix1[pos], mm0);		/* b1-v1-r1-a1-b2-v2-r2-a2 */ +		movq_r2r(mm0, mm1);			/* b1-v1-r1-a1-b2-v2-r2-a2 */  			   ;/* depackage du premier pixel */ -			   punpcklbw %%mm7, %%mm0	/* 00-b2-00-v2-00-r2-00-a2 */ +		punpcklbw_r2r(mm7, mm0);		/* 00-b2-00-v2-00-r2-00-a2 */ -			   movq %%mm6, %%mm5			/* ??-??-??-??-c4-c3-c2-c1 */ +		movq_r2r(mm6, mm5);			/* xx-xx-xx-xx-c4-c3-c2-c1 */  			   ;/* depackage du 2ieme pixel */ -			   punpckhbw %%mm7, %%mm1	/* 00-b1-00-v1-00-r1-00-a1 */ +		punpckhbw_r2r(mm7, mm1);		/* 00-b1-00-v1-00-r1-00-a1 */  			   ;/* extraction des coefficients... */ -			   punpcklbw %%mm5, %%mm6	/* c4-c4-c3-c3-c2-c2-c1-c1 */ -			   movq %%mm6, %%mm4			/* c4-c4-c3-c3-c2-c2-c1-c1 */ -			   movq %%mm6, %%mm5			/* c4-c4-c3-c3-c2-c2-c1-c1 */ +		punpcklbw_r2r(mm5, mm6);		/* c4-c4-c3-c3-c2-c2-c1-c1 */ +		movq_r2r(mm6, mm4);			/* c4-c4-c3-c3-c2-c2-c1-c1 */ +		movq_r2r(mm6, mm5);			/* c4-c4-c3-c3-c2-c2-c1-c1 */ -			   punpcklbw %%mm5, %%mm6	/* c2-c2-c2-c2-c1-c1-c1-c1 */ -			   punpckhbw %%mm5, %%mm4	/* c4-c4-c4-c4-c3-c3-c3-c3 */ +		punpcklbw_r2r(mm5, mm6);	/* c2-c2-c2-c2-c1-c1-c1-c1 */ +		punpckhbw_r2r(mm5, mm4);	/* c4-c4-c4-c4-c3-c3-c3-c3 */ -			   movq %%mm6, %%mm3			/* c2-c2-c2-c2-c1-c1-c1-c1 */ -			   punpcklbw %%mm7, %%mm6	/* 00-c1-00-c1-00-c1-00-c1 */ -			   punpckhbw %%mm7, %%mm3	/* 00-c2-00-c2-00-c2-00-c2 */ +		movq_r2r(mm6, mm3);			/* c2-c2-c2-c2-c1-c1-c1-c1 */ +		punpcklbw_r2r(mm7, mm6);	/* 00-c1-00-c1-00-c1-00-c1 */ +		punpckhbw_r2r(mm7, mm3);	/* 00-c2-00-c2-00-c2-00-c2 */  			   ;/* multiplication des pixels par les coefficients */ -			   pmullw %%mm6, %%mm0		/* c1*b2-c1*v2-c1*r2-c1*a2 */ -			   pmullw %%mm3, %%mm1		/* c2*b1-c2*v1-c2*r1-c2*a1 */ -			   paddw %%mm1, %%mm0 +		pmullw_r2r(mm6, mm0);		/* c1*b2-c1*v2-c1*r2-c1*a2 */ +		pmullw_r2r(mm3, mm1);		/* c2*b1-c2*v1-c2*r1-c2*a1 */ +		paddw_r2r(mm1, mm0);  			   ;/* ...extraction des 2 derniers coefficients */ -			   movq %%mm4, %%mm5			/* c4-c4-c4-c4-c3-c3-c3-c3 */ -			   punpcklbw %%mm7, %%mm4	/* 00-c3-00-c3-00-c3-00-c3 */ -			   punpckhbw %%mm7, %%mm5	/* 00-c4-00-c4-00-c4-00-c4 */ +		movq_r2r(mm4, mm5);			/* c4-c4-c4-c4-c3-c3-c3-c3 */ +		punpcklbw_r2r(mm7, mm4);	/* 00-c3-00-c3-00-c3-00-c3 */ +		punpckhbw_r2r(mm7, mm5);	/* 00-c4-00-c4-00-c4-00-c4 */  			   /* ajouter la longueur de ligne a esi */ -			   addl 8(%%ebp),%%ebx -	     			   ;/* recuperation des 2 derniers pixels */ -			   movq (%%edx,%%ebx,4), %%mm1 -			   movq %%mm1, %%mm2 +		movq_m2r(expix1[pos+prevX], mm1); +		movq_r2r(mm1, mm2);  			   ;/* depackage des pixels */ -			   punpcklbw %%mm7, %%mm1 -			   punpckhbw %%mm7, %%mm2 +		punpcklbw_r2r(mm7, mm1); +		punpckhbw_r2r(mm7, mm2);  			   ;/* multiplication pas les coeffs */ -			   pmullw %%mm4, %%mm1 -			   pmullw %%mm5, %%mm2 +		pmullw_r2r(mm4, mm1); +		pmullw_r2r(mm5, mm2);  			   ;/* ajout des valeurs obtenues à la valeur finale */ -			   paddw %%mm1, %%mm0 -			   paddw %%mm2, %%mm0 +		paddw_r2r(mm1, mm0); +		paddw_r2r(mm2, mm0);  			   ;/* division par 256 = 16+16+16+16, puis repackage du pixel final */ -			   psrlw $8, %%mm0 -			   packuswb %%mm7, %%mm0 - -               movd %%mm0,%%eax -			   " -							:"=eax"(expix2[loop]) -							:"ebx"(pos),"eax"(coeffs),"edx"(expix1) -							 -				); -	   -/*	  expix2[loop] = couleur; */ +		psrlw_i2r(8, mm0); +		packuswb_r2r(mm7, mm0); +		movd_r2m(mm0,expix2[loop]); -	  __asm__ __volatile__ ("emms");  	} +	emms();  }  #endif | 
