Sync with a more recent version of FFmpeg.

author: Diego 'Flameeyes' Pettenò <flameeyes@gmail.com> 2007-04-03 01:18:24 +0200
committer: Diego 'Flameeyes' Pettenò <flameeyes@gmail.com> 2007-04-03 01:18:24 +0200
commit: fb09531720a4aa2dfa97e5a9a246a453b6278fd2 (patch)
tree: 61525c3a8ddb419d3838a26e488fc3659079bbcd /contrib/ffmpeg/libavcodec/i386
parent: 294d01046724e28b7193bcb65bf2a0391b0135b6 (diff)
download: xine-lib-fb09531720a4aa2dfa97e5a9a246a453b6278fd2.tar.gz
xine-lib-fb09531720a4aa2dfa97e5a9a246a453b6278fd2.tar.bz2
8 files changed, 169 insertions, 148 deletions
diff --git a/contrib/ffmpeg/libavcodec/i386/cputest.c b/contrib/ffmpeg/libavcodec/i386/cputest.c
index 262786b71..0705ab3e5 100644
--- a/contrib/ffmpeg/libavcodec/i386/cputest.c
+++ b/contrib/ffmpeg/libavcodec/i386/cputest.c
@@ -87,6 +87,8 @@ int mm_support(void)
             rval |= MM_SSE2;
         if (ecx & 1)
             rval |= MM_SSE3;
+        if (ecx & 0x00000200 )
+            rval |= MM_SSSE3;
     }
 
     cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
@@ -104,11 +106,13 @@ int mm_support(void)
     }
 
 #if 0
-    av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n",
+    av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s%s%s\n",
         (rval&MM_MMX) ? "MMX ":"",
         (rval&MM_MMXEXT) ? "MMX2 ":"",
         (rval&MM_SSE) ? "SSE ":"",
         (rval&MM_SSE2) ? "SSE2 ":"",
+        (rval&MM_SSE3) ? "SSE3 ":"",
+        (rval&MM_SSSE3) ? "SSSE3 ":"",
         (rval&MM_3DNOW) ? "3DNow ":"",
         (rval&MM_3DNOWEXT) ? "3DNowExt ":"");
 #endif
diff --git a/contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c b/contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c
index e09a1007e..a943a0371 100644
--- a/contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c
+++ b/contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c
@@ -184,91 +184,78 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
 
 static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
 {
-    DECLARE_ALIGNED_8(uint64_t, AA);
-    DECLARE_ALIGNED_8(uint64_t, DD);
-    int i;
-
-    /* no special case for mv=(0,0) in 4x*, since it's much less common than in 8x*.
-     * could still save a few cycles, but maybe not worth the complexity. */
-
-    assert(x<8 && y<8 && x>=0 && y>=0);
-
-    asm volatile("movd %2, %%mm4\n\t"
-                 "movd %3, %%mm6\n\t"
-                 "punpcklwd %%mm4, %%mm4\n\t"
-                 "punpcklwd %%mm6, %%mm6\n\t"
-                 "punpckldq %%mm4, %%mm4\n\t" /* mm4 = x words */
-                 "punpckldq %%mm6, %%mm6\n\t" /* mm6 = y words */
-                 "movq %%mm4, %%mm5\n\t"
-                 "pmullw %%mm6, %%mm4\n\t"    /* mm4 = x * y */
-                 "psllw $3, %%mm5\n\t"
-                 "psllw $3, %%mm6\n\t"
-                 "movq %%mm5, %%mm7\n\t"
-                 "paddw %%mm6, %%mm7\n\t"
-                 "movq %%mm4, %1\n\t"         /* DD = x * y */
-                 "psubw %%mm4, %%mm5\n\t"     /* mm5 = B = 8x - xy */
-                 "psubw %%mm4, %%mm6\n\t"     /* mm6 = C = 8y - xy */
-                 "paddw %4, %%mm4\n\t"
-                 "psubw %%mm7, %%mm4\n\t"     /* mm4 = A = xy - (8x+8y) + 64 */
-                 "pxor %%mm7, %%mm7\n\t"
-                 "movq %%mm4, %0\n\t"
-                 : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64));
-
     asm volatile(
-        /* mm0 = src[0..3], mm1 = src[1..4] */
-        "movd %0, %%mm0\n\t"
-        "movd %1, %%mm1\n\t"
-        "punpcklbw %%mm7, %%mm0\n\t"
-        "punpcklbw %%mm7, %%mm1\n\t"
-        : : "m" (src[0]), "m" (src[1]));
-
-    for(i=0; i<h; i++) {
-        asm volatile(
-            /* mm2 = A * src[0..3] + B * src[1..4] */
-            "movq %%mm0, %%mm2\n\t"
-            "pmullw %0, %%mm2\n\t"
-            "pmullw %%mm5, %%mm1\n\t"
-            "paddw %%mm1, %%mm2\n\t"
-            : : "m" (AA));
-
-        src += stride;
-        asm volatile(
-            /* mm0 = src[0..3], mm1 = src[1..4] */
-            "movd %0, %%mm0\n\t"
-            "movd %1, %%mm1\n\t"
-            "punpcklbw %%mm7, %%mm0\n\t"
-            "punpcklbw %%mm7, %%mm1\n\t"
-            : : "m" (src[0]), "m" (src[1]));
-
-        asm volatile(
-            /* mm2 += C * src[0..3] + D * src[1..4] */
-            "movq %%mm0, %%mm3\n\t"
-            "movq %%mm1, %%mm4\n\t"
-            "pmullw %%mm6, %%mm3\n\t"
-            "pmullw %0, %%mm4\n\t"
-            "paddw %%mm3, %%mm2\n\t"
-            "paddw %%mm4, %%mm2\n\t"
-            : : "m" (DD));
-
-        asm volatile(
-            /* dst[0..3] = pack((mm2 + 32) >> 6) */
-            "paddw %1, %%mm2\n\t"
-            "psrlw $6, %%mm2\n\t"
-            "packuswb %%mm7, %%mm2\n\t"
-            H264_CHROMA_OP4(%0, %%mm2, %%mm3)
-            "movd %%mm2, %0\n\t"
-            : "=m" (dst[0]) : "m" (ff_pw_32));
-        dst += stride;
-    }
+        "pxor   %%mm7, %%mm7        \n\t"
+        "movd %5, %%mm2             \n\t"
+        "movd %6, %%mm3             \n\t"
+        "movq "MANGLE(ff_pw_8)", %%mm4\n\t"
+        "movq "MANGLE(ff_pw_8)", %%mm5\n\t"
+        "punpcklwd %%mm2, %%mm2     \n\t"
+        "punpcklwd %%mm3, %%mm3     \n\t"
+        "punpcklwd %%mm2, %%mm2     \n\t"
+        "punpcklwd %%mm3, %%mm3     \n\t"
+        "psubw %%mm2, %%mm4         \n\t"
+        "psubw %%mm3, %%mm5         \n\t"
+
+        "movd  (%1), %%mm0          \n\t"
+        "movd 1(%1), %%mm6          \n\t"
+        "add %3, %1                 \n\t"
+        "punpcklbw %%mm7, %%mm0     \n\t"
+        "punpcklbw %%mm7, %%mm6     \n\t"
+        "pmullw %%mm4, %%mm0        \n\t"
+        "pmullw %%mm2, %%mm6        \n\t"
+        "paddw %%mm0, %%mm6         \n\t"
+
+        "1:                         \n\t"
+        "movd  (%1), %%mm0          \n\t"
+        "movd 1(%1), %%mm1          \n\t"
+        "add %3, %1                 \n\t"
+        "punpcklbw %%mm7, %%mm0     \n\t"
+        "punpcklbw %%mm7, %%mm1     \n\t"
+        "pmullw %%mm4, %%mm0        \n\t"
+        "pmullw %%mm2, %%mm1        \n\t"
+        "paddw %%mm0, %%mm1         \n\t"
+        "movq %%mm1, %%mm0          \n\t"
+        "pmullw %%mm5, %%mm6        \n\t"
+        "pmullw %%mm3, %%mm1        \n\t"
+        "paddw %4, %%mm6            \n\t"
+        "paddw %%mm6, %%mm1         \n\t"
+        "psrlw $6, %%mm1            \n\t"
+        "packuswb %%mm1, %%mm1      \n\t"
+        H264_CHROMA_OP4((%0), %%mm1, %%mm6)
+        "movd %%mm1, (%0)           \n\t"
+        "add %3, %0                 \n\t"
+        "movd  (%1), %%mm6          \n\t"
+        "movd 1(%1), %%mm1          \n\t"
+        "add %3, %1                 \n\t"
+        "punpcklbw %%mm7, %%mm6     \n\t"
+        "punpcklbw %%mm7, %%mm1     \n\t"
+        "pmullw %%mm4, %%mm6        \n\t"
+        "pmullw %%mm2, %%mm1        \n\t"
+        "paddw %%mm6, %%mm1         \n\t"
+        "movq %%mm1, %%mm6          \n\t"
+        "pmullw %%mm5, %%mm0        \n\t"
+        "pmullw %%mm3, %%mm1        \n\t"
+        "paddw %4, %%mm0            \n\t"
+        "paddw %%mm0, %%mm1         \n\t"
+        "psrlw $6, %%mm1            \n\t"
+        "packuswb %%mm1, %%mm1      \n\t"
+        H264_CHROMA_OP4((%0), %%mm1, %%mm0)
+        "movd %%mm1, (%0)           \n\t"
+        "add %3, %0                 \n\t"
+        "sub $2, %2                 \n\t"
+        "jnz 1b                     \n\t"
+        : "+r"(dst), "+r"(src), "+r"(h)
+        : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y)
+    );
 }
 
 #ifdef H264_CHROMA_MC2_TMPL
-static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*/, long stride, int h, int x, int y)
 {
-    int CD=((1<<16)-1)*x*y + 8*y;
-    int AB=((8<<16)-8)*x + 64 - CD;
-    int i;
-
+    int tmp = ((1<<16)-1)*x + 8;
+    int CD= tmp*y;
+    int AB= (tmp<<3) - CD;
     asm volatile(
         /* mm5 = {A,B,A,B} */
         /* mm6 = {C,D,C,D} */
@@ -277,50 +264,42 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*
         "punpckldq %%mm5, %%mm5\n\t"
         "punpckldq %%mm6, %%mm6\n\t"
         "pxor %%mm7, %%mm7\n\t"
-        :: "r"(AB), "r"(CD));
+        /* mm0 = src[0,1,1,2] */
+        "movd %2, %%mm2\n\t"
+        "punpcklbw %%mm7, %%mm2\n\t"
+        "pshufw $0x94, %%mm2, %%mm2\n\t"
+        :: "r"(AB), "r"(CD), "m"(src[0]));
+
 
     asm volatile(
+        "1:\n\t"
+        "add %4, %1\n\t"
+        /* mm1 = A * src[0,1] + B * src[1,2] */
+        "movq    %%mm2, %%mm1\n\t"
+        "pmaddwd %%mm5, %%mm1\n\t"
         /* mm0 = src[0,1,1,2] */
-        "movd %0, %%mm0\n\t"
+        "movd (%1), %%mm0\n\t"
         "punpcklbw %%mm7, %%mm0\n\t"
         "pshufw $0x94, %%mm0, %%mm0\n\t"
-        :: "m"(src[0]));
+        /* mm1 += C * src[0,1] + D * src[1,2] */
+        "movq    %%mm0, %%mm2\n\t"
+        "pmaddwd %%mm6, %%mm0\n\t"
+        "paddw      %3, %%mm1\n\t"
+        "paddw   %%mm0, %%mm1\n\t"
+        /* dst[0,1] = pack((mm1 + 32) >> 6) */
+        "psrlw $6, %%mm1\n\t"
+        "packssdw %%mm7, %%mm1\n\t"
+        "packuswb %%mm7, %%mm1\n\t"
+        H264_CHROMA_OP4((%0), %%mm1, %%mm3)
+        "movd %%mm1, %%esi\n\t"
+        "movw %%si, (%0)\n\t"
+        "add %4, %0\n\t"
+        "sub $1, %2\n\t"
+        "jnz 1b\n\t"
+        : "+r" (dst), "+r"(src), "+r"(h)
+        : "m" (ff_pw_32), "r"(stride)
+        : "%esi");
 
-    for(i=0; i<h; i++) {
-        asm volatile(
-            /* mm1 = A * src[0,1] + B * src[1,2] */
-            "movq    %%mm0, %%mm1\n\t"
-            "pmaddwd %%mm5, %%mm1\n\t"
-            ::);
-
-        src += stride;
-        asm volatile(
-            /* mm0 = src[0,1,1,2] */
-            "movd %0, %%mm0\n\t"
-            "punpcklbw %%mm7, %%mm0\n\t"
-            "pshufw $0x94, %%mm0, %%mm0\n\t"
-            :: "m"(src[0]));
-
-        asm volatile(
-            /* mm1 += C * src[0,1] + D * src[1,2] */
-            "movq    %%mm0, %%mm2\n\t"
-            "pmaddwd %%mm6, %%mm2\n\t"
-            "paddw   %%mm2, %%mm1\n\t"
-            ::);
-
-        asm volatile(
-            /* dst[0,1] = pack((mm1 + 32) >> 6) */
-            "paddw %1, %%mm1\n\t"
-            "psrlw $6, %%mm1\n\t"
-            "packssdw %%mm7, %%mm1\n\t"
-            "packuswb %%mm7, %%mm1\n\t"
-            /* writes garbage to the right of dst.
-             * ok because partitions are processed from left to right. */
-            H264_CHROMA_OP4(%0, %%mm1, %%mm3)
-            "movd %%mm1, %0\n\t"
-            : "=m" (dst[0]) : "m" (ff_pw_32));
-        dst += stride;
-    }
 }
 #endif
 
diff --git a/contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c b/contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c
index 5675828a4..23a717acd 100644
--- a/contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c
+++ b/contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c
@@ -1730,6 +1730,38 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride
 
 WARPER8_16_SQ(hadamard8_diff_mmx, hadamard8_diff16_mmx)
 WARPER8_16_SQ(hadamard8_diff_mmx2, hadamard8_diff16_mmx2)
+
+static int ssd_int8_vs_int16_mmx(int8_t *pix1, int16_t *pix2, int size){
+    int sum;
+    long i=size;
+    asm volatile(
+        "pxor %%mm4, %%mm4 \n"
+        "1: \n"
+        "sub $8, %0 \n"
+        "movq (%2,%0), %%mm2 \n"
+        "movq (%3,%0,2), %%mm0 \n"
+        "movq 8(%3,%0,2), %%mm1 \n"
+        "punpckhbw %%mm2, %%mm3 \n"
+        "punpcklbw %%mm2, %%mm2 \n"
+        "psraw $8, %%mm3 \n"
+        "psraw $8, %%mm2 \n"
+        "psubw %%mm3, %%mm1 \n"
+        "psubw %%mm2, %%mm0 \n"
+        "pmaddwd %%mm1, %%mm1 \n"
+        "pmaddwd %%mm0, %%mm0 \n"
+        "paddd %%mm1, %%mm4 \n"
+        "paddd %%mm0, %%mm4 \n"
+        "jg 1b \n"
+        "movq %%mm4, %%mm3 \n"
+        "psrlq $32, %%mm3 \n"
+        "paddd %%mm3, %%mm4 \n"
+        "movd %%mm4, %1 \n"
+        :"+r"(i), "=r"(sum)
+        :"r"(pix1), "r"(pix2)
+    );
+    return sum;
+}
+
 #endif //CONFIG_ENCODERS
 
 #define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d)
@@ -3037,14 +3069,14 @@ static void float_to_int16_sse(int16_t *dst, const float *src, int len){
     asm volatile("emms");
 }
 
-#ifdef CONFIG_SNOW_ENCODER
+#ifdef CONFIG_SNOW_DECODER
 extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width);
 extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width);
 extern void ff_snow_vertical_compose97i_sse2(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
 extern void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
-extern void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+extern void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
                            int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
-extern void ff_snow_inner_add_yblock_mmx(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+extern void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
                           int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
 #endif
 
@@ -3215,6 +3247,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         }
         c->add_8x8basis= add_8x8basis_mmx;
 
+        c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
+
 #endif //CONFIG_ENCODERS
 
         c->h263_v_loop_filter= h263_v_loop_filter_mmx;
@@ -3462,7 +3496,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
         }
 
-#ifdef CONFIG_SNOW_ENCODER
+#ifdef CONFIG_SNOW_DECODER
         if(mm_flags & MM_SSE2){
             c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
             c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
diff --git a/contrib/ffmpeg/libavcodec/i386/fdct_mmx.c b/contrib/ffmpeg/libavcodec/i386/fdct_mmx.c
index 2ffbfecf6..7e2682a4a 100644
--- a/contrib/ffmpeg/libavcodec/i386/fdct_mmx.c
+++ b/contrib/ffmpeg/libavcodec/i386/fdct_mmx.c
@@ -284,7 +284,7 @@ TABLE_SSE2
 }};
 
 
-static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
+static av_always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
 {
     movq_m2r(*(in + offset + 1 * 8), mm0);
     movq_m2r(*(in + offset + 6 * 8), mm1);
@@ -364,7 +364,7 @@ static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
 }
 
 
-static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
+static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
 {
     asm volatile(
 #define FDCT_ROW_SSE2_H1(i,t)                    \
@@ -426,7 +426,7 @@ static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
     );
 }
 
-static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
+static av_always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
 {
     pshufw_m2r(*(in + 4), mm5, 0x1B);
     movq_m2r(*(in + 0), mm0);
@@ -469,7 +469,7 @@ static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const i
     movq_r2m(mm7, *(out + 4));
 }
 
-static always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
+static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
 {
 //FIXME reorder (i dont have a old mmx only cpu here to benchmark ...)
     movd_m2r(*(in + 6), mm1);
diff --git a/contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c b/contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c
index 40baf199b..56004b674 100644
--- a/contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c
+++ b/contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c
@@ -684,7 +684,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40]
         "movq %%mm6, "#OF"(%1)      \n\t"
 
 #define QPEL_H264(OPNAME, OP, MMX)\
-static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
     int h=4;\
 \
     asm volatile(\
@@ -724,7 +724,7 @@ static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
         : "memory"\
     );\
 }\
-static void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
     int h=4;\
     asm volatile(\
         "pxor %%mm7, %%mm7          \n\t"\
@@ -768,7 +768,7 @@ static void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src
     );\
     }while(--h);\
 }\
-static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
     src -= 2*srcStride;\
     asm volatile(\
         "pxor %%mm7, %%mm7          \n\t"\
@@ -797,7 +797,7 @@ static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
         : "memory"\
     );\
 }\
-static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
     int h=4;\
     int w=3;\
     src -= 2*srcStride+2;\
@@ -861,7 +861,7 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
     );\
 }\
 \
-static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
     int h=8;\
     asm volatile(\
         "pxor %%mm7, %%mm7          \n\t"\
@@ -918,7 +918,7 @@ static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
     );\
 }\
 \
-static void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
     int h=8;\
     asm volatile(\
         "pxor %%mm7, %%mm7          \n\t"\
@@ -981,7 +981,7 @@ static void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src
     }while(--h);\
 }\
 \
-static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
     int w= 2;\
     src -= 2*srcStride;\
     \
@@ -1036,7 +1036,7 @@ static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint
      dst += 4-h*dstStride;\
    }\
 }\
-static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
+static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
     int h = size;\
     int w = (size+8)>>2;\
     src -= 2*srcStride+2;\
@@ -1141,12 +1141,12 @@ static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int
 static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
     OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 8);\
 }\
-static void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
     OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 16);\
     OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
 }\
 \
-static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static av_noinline void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
     OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\
     OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
     src += 8*srcStride;\
@@ -1155,7 +1155,7 @@ static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src,
     OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
 }\
 \
-static void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
     OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\
     OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
     src += 8*dstStride;\
@@ -1173,7 +1173,7 @@ static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
     OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst  , tmp  , src  , dstStride, tmpStride, srcStride, 16);\
 }\
 \
-static void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
+static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
 {\
     asm volatile(\
         "movq       %5,  %%mm6          \n\t"\
@@ -1207,7 +1207,7 @@ static void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, ui
         :"S"((long)src8Stride), "D"((long)dstStride), "m"(ff_pw_16)\
         :"memory");\
 }\
-static void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
+static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
 {\
     asm volatile(\
         "movq       %0,  %%mm6          \n\t"\
@@ -1311,6 +1311,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *
     uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\
     uint8_t * const halfHV= (uint8_t*)temp;\
     int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE/2;\
+    assert((int)temp & 7 == 0);\
     put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\
     OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
 }\
@@ -1319,6 +1320,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *
     uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\
     uint8_t * const halfHV= (uint8_t*)temp;\
     int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE/2;\
+    assert((int)temp & 7 == 0);\
     put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\
     OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
 }\
@@ -1327,6 +1329,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *
     uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\
     int16_t * const halfV= ((int16_t*)temp) + SIZE*SIZE/2;\
     uint8_t * const halfHV= ((uint8_t*)temp);\
+    assert((int)temp & 7 == 0);\
     put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
 }\
@@ -1335,6 +1338,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *
     uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\
     int16_t * const halfV= ((int16_t*)temp) + SIZE*SIZE/2;\
     uint8_t * const halfHV= ((uint8_t*)temp);\
+    assert((int)temp & 7 == 0);\
     put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
 }\
diff --git a/contrib/ffmpeg/libavcodec/i386/idct_mmx.c b/contrib/ffmpeg/libavcodec/i386/idct_mmx.c
index ba595845a..4c548fdce 100644
--- a/contrib/ffmpeg/libavcodec/i386/idct_mmx.c
+++ b/contrib/ffmpeg/libavcodec/i386/idct_mmx.c
@@ -15,7 +15,7 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
+ * along with mpeg2dec; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c b/contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c
index 43eb329cc..85cfbc9cd 100644
--- a/contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c
+++ b/contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c
@@ -21,7 +21,7 @@
 // * along with FFmpeg; if not, write to the Free Software Foundation,
 // * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 // *
-// * $Id: idct_mmx_xvid.c,v 1.1.2.1 2006/12/02 01:19:55 dgp85 Exp $
+// * $Id: idct_mmx_xvid.c 6577 2006-10-07 15:30:46Z diego $
 // *
 // ***************************************************************************/
 
diff --git a/contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c b/contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c
index 718202632..8f182303d 100644
--- a/contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c
+++ b/contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c
@@ -708,7 +708,7 @@ void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTE
              "dec %2                         \n\t"\
              snow_inner_add_yblock_sse2_end_common2
 
-static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
+static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
                       int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){
 snow_inner_add_yblock_sse2_header
 snow_inner_add_yblock_sse2_start_8("xmm1", "xmm5", "3", "0")
@@ -756,7 +756,7 @@ snow_inner_add_yblock_sse2_accum_8("0", "136")
 snow_inner_add_yblock_sse2_end_8
 }
 
-static void inner_add_yblock_bw_16_obmc_32_sse2(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
+static void inner_add_yblock_bw_16_obmc_32_sse2(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
                       int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){
 snow_inner_add_yblock_sse2_header
 snow_inner_add_yblock_sse2_start_16("xmm1", "xmm5", "3", "0")
@@ -868,7 +868,7 @@ snow_inner_add_yblock_sse2_end_16
              "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\
              "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
 
-static void inner_add_yblock_bw_8_obmc_16_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
+static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
                       int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){
 snow_inner_add_yblock_mmx_header
 snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0")
@@ -879,7 +879,7 @@ snow_inner_add_yblock_mmx_mix("0", "0")
 snow_inner_add_yblock_mmx_end("16")
 }
 
-static void inner_add_yblock_bw_16_obmc_32_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
+static void inner_add_yblock_bw_16_obmc_32_mmx(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
                       int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){
 snow_inner_add_yblock_mmx_header
 snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0")
@@ -896,7 +896,7 @@ snow_inner_add_yblock_mmx_mix("32", "8")
 snow_inner_add_yblock_mmx_end("32")
 }
 
-void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
                            int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
 
     if (b_w == 16)
@@ -910,7 +910,7 @@ void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t
          ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
 }
 
-void ff_snow_inner_add_yblock_mmx(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
                           int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
     if (b_w == 16)
         inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
author	Diego 'Flameeyes' Pettenò <flameeyes@gmail.com>	2007-04-03 01:18:24 +0200
committer	Diego 'Flameeyes' Pettenò <flameeyes@gmail.com>	2007-04-03 01:18:24 +0200
commit	fb09531720a4aa2dfa97e5a9a246a453b6278fd2 (patch)
tree	61525c3a8ddb419d3838a26e488fc3659079bbcd /contrib/ffmpeg/libavcodec/i386
parent	294d01046724e28b7193bcb65bf2a0391b0135b6 (diff)
download	xine-lib-fb09531720a4aa2dfa97e5a9a246a453b6278fd2.tar.gz xine-lib-fb09531720a4aa2dfa97e5a9a246a453b6278fd2.tar.bz2