diff options
-rw-r--r-- | src/post/goom/ifs_display.c | 15 | ||||
-rw-r--r-- | src/post/goom/zoom_filter_mmx.c | 81 |
2 files changed, 37 insertions, 59 deletions
diff --git a/src/post/goom/ifs_display.c b/src/post/goom/ifs_display.c index 657a74063..53530ccf6 100644 --- a/src/post/goom/ifs_display.c +++ b/src/post/goom/ifs_display.c @@ -18,25 +18,12 @@ ifs_fun_mmx(guint32 * data, guint32 * back, int width, int height, if ((x < width) && (y < height) && (x > 0) && (y > 0)) { int pos = x + (y * width); -/* register int b asm ("eax"); - - b = back[pos]; - __asm__ - (" - movd %%eax, %%mm0 - movd %%edx, %%mm1 - paddusb %%mm1, %%mm0 - movd %%mm0, %%eax - ":: - "edx" (couleursl)); - data[pos] = b; -*/ movd_m2r(back[pos],mm0); paddusb_r2r(mm1,mm0); movd_r2m(mm0,data[pos]); } } - __asm__ __volatile__ ("emms"); + emms(); } #endif diff --git a/src/post/goom/zoom_filter_mmx.c b/src/post/goom/zoom_filter_mmx.c index ff7a44214..a7a5ffd8f 100644 --- a/src/post/goom/zoom_filter_mmx.c +++ b/src/post/goom/zoom_filter_mmx.c @@ -1,4 +1,6 @@ #include "goom_config.h" +#include "xineutils.h" + #ifdef MMX #define BUFFPOINTNB 16 #define BUFFPOINTMASK 0xffff @@ -20,7 +22,7 @@ void zoom_filter_mmx (int prevX, int prevY, int bufsize = prevX * prevY; int loop; - __asm__ ("pxor %mm7,%mm7"); + pxor_r2r(mm7,mm7); for (loop=0; loop<bufsize; loop++) { @@ -45,75 +47,64 @@ void zoom_filter_mmx (int prevX, int prevY, /* coef en modulo 15 */ coeffs = precalCoef [px & PERTEMASK][py & PERTEMASK]; } - - __asm__ __volatile__ (" - movd %%eax,%%mm6 + + movd_m2r(coeffs, mm6); ;/* recuperation des deux premiers pixels dans mm0 et mm1 */ - movq (%%edx,%%ebx,4), %%mm0 /* b1-v1-r1-a1-b2-v2-r2-a2 */ - movq %%mm0, %%mm1 /* b1-v1-r1-a1-b2-v2-r2-a2 */ + movq_m2r(expix1[pos], mm0); /* b1-v1-r1-a1-b2-v2-r2-a2 */ + movq_r2r(mm0, mm1); /* b1-v1-r1-a1-b2-v2-r2-a2 */ ;/* depackage du premier pixel */ - punpcklbw %%mm7, %%mm0 /* 00-b2-00-v2-00-r2-00-a2 */ + punpcklbw_r2r(mm7, mm0); /* 00-b2-00-v2-00-r2-00-a2 */ - movq %%mm6, %%mm5 /* ??-??-??-??-c4-c3-c2-c1 */ + movq_r2r(mm6, mm5); /* xx-xx-xx-xx-c4-c3-c2-c1 */ ;/* depackage du 2ieme pixel */ - punpckhbw %%mm7, %%mm1 /* 00-b1-00-v1-00-r1-00-a1 */ + punpckhbw_r2r(mm7, mm1); /* 00-b1-00-v1-00-r1-00-a1 */ ;/* extraction des coefficients... */ - punpcklbw %%mm5, %%mm6 /* c4-c4-c3-c3-c2-c2-c1-c1 */ - movq %%mm6, %%mm4 /* c4-c4-c3-c3-c2-c2-c1-c1 */ - movq %%mm6, %%mm5 /* c4-c4-c3-c3-c2-c2-c1-c1 */ + punpcklbw_r2r(mm5, mm6); /* c4-c4-c3-c3-c2-c2-c1-c1 */ + movq_r2r(mm6, mm4); /* c4-c4-c3-c3-c2-c2-c1-c1 */ + movq_r2r(mm6, mm5); /* c4-c4-c3-c3-c2-c2-c1-c1 */ - punpcklbw %%mm5, %%mm6 /* c2-c2-c2-c2-c1-c1-c1-c1 */ - punpckhbw %%mm5, %%mm4 /* c4-c4-c4-c4-c3-c3-c3-c3 */ + punpcklbw_r2r(mm5, mm6); /* c2-c2-c2-c2-c1-c1-c1-c1 */ + punpckhbw_r2r(mm5, mm4); /* c4-c4-c4-c4-c3-c3-c3-c3 */ - movq %%mm6, %%mm3 /* c2-c2-c2-c2-c1-c1-c1-c1 */ - punpcklbw %%mm7, %%mm6 /* 00-c1-00-c1-00-c1-00-c1 */ - punpckhbw %%mm7, %%mm3 /* 00-c2-00-c2-00-c2-00-c2 */ + movq_r2r(mm6, mm3); /* c2-c2-c2-c2-c1-c1-c1-c1 */ + punpcklbw_r2r(mm7, mm6); /* 00-c1-00-c1-00-c1-00-c1 */ + punpckhbw_r2r(mm7, mm3); /* 00-c2-00-c2-00-c2-00-c2 */ ;/* multiplication des pixels par les coefficients */ - pmullw %%mm6, %%mm0 /* c1*b2-c1*v2-c1*r2-c1*a2 */ - pmullw %%mm3, %%mm1 /* c2*b1-c2*v1-c2*r1-c2*a1 */ - paddw %%mm1, %%mm0 + pmullw_r2r(mm6, mm0); /* c1*b2-c1*v2-c1*r2-c1*a2 */ + pmullw_r2r(mm3, mm1); /* c2*b1-c2*v1-c2*r1-c2*a1 */ + paddw_r2r(mm1, mm0); ;/* ...extraction des 2 derniers coefficients */ - movq %%mm4, %%mm5 /* c4-c4-c4-c4-c3-c3-c3-c3 */ - punpcklbw %%mm7, %%mm4 /* 00-c3-00-c3-00-c3-00-c3 */ - punpckhbw %%mm7, %%mm5 /* 00-c4-00-c4-00-c4-00-c4 */ + movq_r2r(mm4, mm5); /* c4-c4-c4-c4-c3-c3-c3-c3 */ + punpcklbw_r2r(mm7, mm4); /* 00-c3-00-c3-00-c3-00-c3 */ + punpckhbw_r2r(mm7, mm5); /* 00-c4-00-c4-00-c4-00-c4 */ /* ajouter la longueur de ligne a esi */ - addl 8(%%ebp),%%ebx - ;/* recuperation des 2 derniers pixels */ - movq (%%edx,%%ebx,4), %%mm1 - movq %%mm1, %%mm2 + movq_m2r(expix1[pos+prevX], mm1); + movq_r2r(mm1, mm2); ;/* depackage des pixels */ - punpcklbw %%mm7, %%mm1 - punpckhbw %%mm7, %%mm2 + punpcklbw_r2r(mm7, mm1); + punpckhbw_r2r(mm7, mm2); ;/* multiplication pas les coeffs */ - pmullw %%mm4, %%mm1 - pmullw %%mm5, %%mm2 + pmullw_r2r(mm4, mm1); + pmullw_r2r(mm5, mm2); ;/* ajout des valeurs obtenues à la valeur finale */ - paddw %%mm1, %%mm0 - paddw %%mm2, %%mm0 + paddw_r2r(mm1, mm0); + paddw_r2r(mm2, mm0); ;/* division par 256 = 16+16+16+16, puis repackage du pixel final */ - psrlw $8, %%mm0 - packuswb %%mm7, %%mm0 - - movd %%mm0,%%eax - " - :"=eax"(expix2[loop]) - :"ebx"(pos),"eax"(coeffs),"edx"(expix1) - - ); - -/* expix2[loop] = couleur; */ + psrlw_i2r(8, mm0); + packuswb_r2r(mm7, mm0); + movd_r2m(mm0,expix2[loop]); - __asm__ __volatile__ ("emms"); } + emms(); } #endif |