summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/post/goom/ifs_display.c15
-rw-r--r--src/post/goom/zoom_filter_mmx.c81
2 files changed, 37 insertions, 59 deletions
diff --git a/src/post/goom/ifs_display.c b/src/post/goom/ifs_display.c
index 657a74063..53530ccf6 100644
--- a/src/post/goom/ifs_display.c
+++ b/src/post/goom/ifs_display.c
@@ -18,25 +18,12 @@ ifs_fun_mmx(guint32 * data, guint32 * back, int width, int height,
if ((x < width) && (y < height) && (x > 0) && (y > 0)) {
int pos = x + (y * width);
-/* register int b asm ("eax");
-
- b = back[pos];
- __asm__
- ("
- movd %%eax, %%mm0
- movd %%edx, %%mm1
- paddusb %%mm1, %%mm0
- movd %%mm0, %%eax
- "::
- "edx" (couleursl));
- data[pos] = b;
-*/
movd_m2r(back[pos],mm0);
paddusb_r2r(mm1,mm0);
movd_r2m(mm0,data[pos]);
}
}
- __asm__ __volatile__ ("emms");
+ emms();
}
#endif
diff --git a/src/post/goom/zoom_filter_mmx.c b/src/post/goom/zoom_filter_mmx.c
index ff7a44214..a7a5ffd8f 100644
--- a/src/post/goom/zoom_filter_mmx.c
+++ b/src/post/goom/zoom_filter_mmx.c
@@ -1,4 +1,6 @@
#include "goom_config.h"
+#include "xineutils.h"
+
#ifdef MMX
#define BUFFPOINTNB 16
#define BUFFPOINTMASK 0xffff
@@ -20,7 +22,7 @@ void zoom_filter_mmx (int prevX, int prevY,
int bufsize = prevX * prevY;
int loop;
- __asm__ ("pxor %mm7,%mm7");
+ pxor_r2r(mm7,mm7);
for (loop=0; loop<bufsize; loop++)
{
@@ -45,75 +47,64 @@ void zoom_filter_mmx (int prevX, int prevY,
/* coef en modulo 15 */
coeffs = precalCoef [px & PERTEMASK][py & PERTEMASK];
}
-
- __asm__ __volatile__ ("
- movd %%eax,%%mm6
+
+ movd_m2r(coeffs, mm6);
;/* recuperation des deux premiers pixels dans mm0 et mm1 */
- movq (%%edx,%%ebx,4), %%mm0 /* b1-v1-r1-a1-b2-v2-r2-a2 */
- movq %%mm0, %%mm1 /* b1-v1-r1-a1-b2-v2-r2-a2 */
+ movq_m2r(expix1[pos], mm0); /* b1-v1-r1-a1-b2-v2-r2-a2 */
+ movq_r2r(mm0, mm1); /* b1-v1-r1-a1-b2-v2-r2-a2 */
;/* depackage du premier pixel */
- punpcklbw %%mm7, %%mm0 /* 00-b2-00-v2-00-r2-00-a2 */
+ punpcklbw_r2r(mm7, mm0); /* 00-b2-00-v2-00-r2-00-a2 */
- movq %%mm6, %%mm5 /* ??-??-??-??-c4-c3-c2-c1 */
+ movq_r2r(mm6, mm5); /* xx-xx-xx-xx-c4-c3-c2-c1 */
;/* depackage du 2ieme pixel */
- punpckhbw %%mm7, %%mm1 /* 00-b1-00-v1-00-r1-00-a1 */
+ punpckhbw_r2r(mm7, mm1); /* 00-b1-00-v1-00-r1-00-a1 */
;/* extraction des coefficients... */
- punpcklbw %%mm5, %%mm6 /* c4-c4-c3-c3-c2-c2-c1-c1 */
- movq %%mm6, %%mm4 /* c4-c4-c3-c3-c2-c2-c1-c1 */
- movq %%mm6, %%mm5 /* c4-c4-c3-c3-c2-c2-c1-c1 */
+ punpcklbw_r2r(mm5, mm6); /* c4-c4-c3-c3-c2-c2-c1-c1 */
+ movq_r2r(mm6, mm4); /* c4-c4-c3-c3-c2-c2-c1-c1 */
+ movq_r2r(mm6, mm5); /* c4-c4-c3-c3-c2-c2-c1-c1 */
- punpcklbw %%mm5, %%mm6 /* c2-c2-c2-c2-c1-c1-c1-c1 */
- punpckhbw %%mm5, %%mm4 /* c4-c4-c4-c4-c3-c3-c3-c3 */
+ punpcklbw_r2r(mm5, mm6); /* c2-c2-c2-c2-c1-c1-c1-c1 */
+ punpckhbw_r2r(mm5, mm4); /* c4-c4-c4-c4-c3-c3-c3-c3 */
- movq %%mm6, %%mm3 /* c2-c2-c2-c2-c1-c1-c1-c1 */
- punpcklbw %%mm7, %%mm6 /* 00-c1-00-c1-00-c1-00-c1 */
- punpckhbw %%mm7, %%mm3 /* 00-c2-00-c2-00-c2-00-c2 */
+ movq_r2r(mm6, mm3); /* c2-c2-c2-c2-c1-c1-c1-c1 */
+ punpcklbw_r2r(mm7, mm6); /* 00-c1-00-c1-00-c1-00-c1 */
+ punpckhbw_r2r(mm7, mm3); /* 00-c2-00-c2-00-c2-00-c2 */
;/* multiplication des pixels par les coefficients */
- pmullw %%mm6, %%mm0 /* c1*b2-c1*v2-c1*r2-c1*a2 */
- pmullw %%mm3, %%mm1 /* c2*b1-c2*v1-c2*r1-c2*a1 */
- paddw %%mm1, %%mm0
+ pmullw_r2r(mm6, mm0); /* c1*b2-c1*v2-c1*r2-c1*a2 */
+ pmullw_r2r(mm3, mm1); /* c2*b1-c2*v1-c2*r1-c2*a1 */
+ paddw_r2r(mm1, mm0);
;/* ...extraction des 2 derniers coefficients */
- movq %%mm4, %%mm5 /* c4-c4-c4-c4-c3-c3-c3-c3 */
- punpcklbw %%mm7, %%mm4 /* 00-c3-00-c3-00-c3-00-c3 */
- punpckhbw %%mm7, %%mm5 /* 00-c4-00-c4-00-c4-00-c4 */
+ movq_r2r(mm4, mm5); /* c4-c4-c4-c4-c3-c3-c3-c3 */
+ punpcklbw_r2r(mm7, mm4); /* 00-c3-00-c3-00-c3-00-c3 */
+ punpckhbw_r2r(mm7, mm5); /* 00-c4-00-c4-00-c4-00-c4 */
/* ajouter la longueur de ligne a esi */
- addl 8(%%ebp),%%ebx
-
;/* recuperation des 2 derniers pixels */
- movq (%%edx,%%ebx,4), %%mm1
- movq %%mm1, %%mm2
+ movq_m2r(expix1[pos+prevX], mm1);
+ movq_r2r(mm1, mm2);
;/* depackage des pixels */
- punpcklbw %%mm7, %%mm1
- punpckhbw %%mm7, %%mm2
+ punpcklbw_r2r(mm7, mm1);
+ punpckhbw_r2r(mm7, mm2);
;/* multiplication pas les coeffs */
- pmullw %%mm4, %%mm1
- pmullw %%mm5, %%mm2
+ pmullw_r2r(mm4, mm1);
+ pmullw_r2r(mm5, mm2);
;/* ajout des valeurs obtenues à la valeur finale */
- paddw %%mm1, %%mm0
- paddw %%mm2, %%mm0
+ paddw_r2r(mm1, mm0);
+ paddw_r2r(mm2, mm0);
;/* division par 256 = 16+16+16+16, puis repackage du pixel final */
- psrlw $8, %%mm0
- packuswb %%mm7, %%mm0
-
- movd %%mm0,%%eax
- "
- :"=eax"(expix2[loop])
- :"ebx"(pos),"eax"(coeffs),"edx"(expix1)
-
- );
-
-/* expix2[loop] = couleur; */
+ psrlw_i2r(8, mm0);
+ packuswb_r2r(mm7, mm0);
+ movd_r2m(mm0,expix2[loop]);
- __asm__ __volatile__ ("emms");
}
+ emms();
}
#endif