summaryrefslogtreecommitdiff
path: root/contrib/ffmpeg/libavcodec/i386
diff options
context:
space:
mode:
authorDiego 'Flameeyes' Pettenò <flameeyes@gmail.com>2007-04-03 01:18:24 +0200
committerDiego 'Flameeyes' Pettenò <flameeyes@gmail.com>2007-04-03 01:18:24 +0200
commitfb09531720a4aa2dfa97e5a9a246a453b6278fd2 (patch)
tree61525c3a8ddb419d3838a26e488fc3659079bbcd /contrib/ffmpeg/libavcodec/i386
parent294d01046724e28b7193bcb65bf2a0391b0135b6 (diff)
downloadxine-lib-fb09531720a4aa2dfa97e5a9a246a453b6278fd2.tar.gz
xine-lib-fb09531720a4aa2dfa97e5a9a246a453b6278fd2.tar.bz2
Sync with a more recent version of FFmpeg.
Diffstat (limited to 'contrib/ffmpeg/libavcodec/i386')
-rw-r--r--contrib/ffmpeg/libavcodec/i386/cputest.c6
-rw-r--r--contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c215
-rw-r--r--contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c42
-rw-r--r--contrib/ffmpeg/libavcodec/i386/fdct_mmx.c8
-rw-r--r--contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c30
-rw-r--r--contrib/ffmpeg/libavcodec/i386/idct_mmx.c2
-rw-r--r--contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c2
-rw-r--r--contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c12
8 files changed, 169 insertions, 148 deletions
diff --git a/contrib/ffmpeg/libavcodec/i386/cputest.c b/contrib/ffmpeg/libavcodec/i386/cputest.c
index 262786b71..0705ab3e5 100644
--- a/contrib/ffmpeg/libavcodec/i386/cputest.c
+++ b/contrib/ffmpeg/libavcodec/i386/cputest.c
@@ -87,6 +87,8 @@ int mm_support(void)
rval |= MM_SSE2;
if (ecx & 1)
rval |= MM_SSE3;
+ if (ecx & 0x00000200 )
+ rval |= MM_SSSE3;
}
cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
@@ -104,11 +106,13 @@ int mm_support(void)
}
#if 0
- av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n",
+ av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s%s%s\n",
(rval&MM_MMX) ? "MMX ":"",
(rval&MM_MMXEXT) ? "MMX2 ":"",
(rval&MM_SSE) ? "SSE ":"",
(rval&MM_SSE2) ? "SSE2 ":"",
+ (rval&MM_SSE3) ? "SSE3 ":"",
+ (rval&MM_SSSE3) ? "SSSE3 ":"",
(rval&MM_3DNOW) ? "3DNow ":"",
(rval&MM_3DNOWEXT) ? "3DNowExt ":"");
#endif
diff --git a/contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c b/contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c
index e09a1007e..a943a0371 100644
--- a/contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c
+++ b/contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c
@@ -184,91 +184,78 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
{
- DECLARE_ALIGNED_8(uint64_t, AA);
- DECLARE_ALIGNED_8(uint64_t, DD);
- int i;
-
- /* no special case for mv=(0,0) in 4x*, since it's much less common than in 8x*.
- * could still save a few cycles, but maybe not worth the complexity. */
-
- assert(x<8 && y<8 && x>=0 && y>=0);
-
- asm volatile("movd %2, %%mm4\n\t"
- "movd %3, %%mm6\n\t"
- "punpcklwd %%mm4, %%mm4\n\t"
- "punpcklwd %%mm6, %%mm6\n\t"
- "punpckldq %%mm4, %%mm4\n\t" /* mm4 = x words */
- "punpckldq %%mm6, %%mm6\n\t" /* mm6 = y words */
- "movq %%mm4, %%mm5\n\t"
- "pmullw %%mm6, %%mm4\n\t" /* mm4 = x * y */
- "psllw $3, %%mm5\n\t"
- "psllw $3, %%mm6\n\t"
- "movq %%mm5, %%mm7\n\t"
- "paddw %%mm6, %%mm7\n\t"
- "movq %%mm4, %1\n\t" /* DD = x * y */
- "psubw %%mm4, %%mm5\n\t" /* mm5 = B = 8x - xy */
- "psubw %%mm4, %%mm6\n\t" /* mm6 = C = 8y - xy */
- "paddw %4, %%mm4\n\t"
- "psubw %%mm7, %%mm4\n\t" /* mm4 = A = xy - (8x+8y) + 64 */
- "pxor %%mm7, %%mm7\n\t"
- "movq %%mm4, %0\n\t"
- : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64));
-
asm volatile(
- /* mm0 = src[0..3], mm1 = src[1..4] */
- "movd %0, %%mm0\n\t"
- "movd %1, %%mm1\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- : : "m" (src[0]), "m" (src[1]));
-
- for(i=0; i<h; i++) {
- asm volatile(
- /* mm2 = A * src[0..3] + B * src[1..4] */
- "movq %%mm0, %%mm2\n\t"
- "pmullw %0, %%mm2\n\t"
- "pmullw %%mm5, %%mm1\n\t"
- "paddw %%mm1, %%mm2\n\t"
- : : "m" (AA));
-
- src += stride;
- asm volatile(
- /* mm0 = src[0..3], mm1 = src[1..4] */
- "movd %0, %%mm0\n\t"
- "movd %1, %%mm1\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- : : "m" (src[0]), "m" (src[1]));
-
- asm volatile(
- /* mm2 += C * src[0..3] + D * src[1..4] */
- "movq %%mm0, %%mm3\n\t"
- "movq %%mm1, %%mm4\n\t"
- "pmullw %%mm6, %%mm3\n\t"
- "pmullw %0, %%mm4\n\t"
- "paddw %%mm3, %%mm2\n\t"
- "paddw %%mm4, %%mm2\n\t"
- : : "m" (DD));
-
- asm volatile(
- /* dst[0..3] = pack((mm2 + 32) >> 6) */
- "paddw %1, %%mm2\n\t"
- "psrlw $6, %%mm2\n\t"
- "packuswb %%mm7, %%mm2\n\t"
- H264_CHROMA_OP4(%0, %%mm2, %%mm3)
- "movd %%mm2, %0\n\t"
- : "=m" (dst[0]) : "m" (ff_pw_32));
- dst += stride;
- }
+ "pxor %%mm7, %%mm7 \n\t"
+ "movd %5, %%mm2 \n\t"
+ "movd %6, %%mm3 \n\t"
+ "movq "MANGLE(ff_pw_8)", %%mm4\n\t"
+ "movq "MANGLE(ff_pw_8)", %%mm5\n\t"
+ "punpcklwd %%mm2, %%mm2 \n\t"
+ "punpcklwd %%mm3, %%mm3 \n\t"
+ "punpcklwd %%mm2, %%mm2 \n\t"
+ "punpcklwd %%mm3, %%mm3 \n\t"
+ "psubw %%mm2, %%mm4 \n\t"
+ "psubw %%mm3, %%mm5 \n\t"
+
+ "movd (%1), %%mm0 \n\t"
+ "movd 1(%1), %%mm6 \n\t"
+ "add %3, %1 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm6 \n\t"
+ "pmullw %%mm4, %%mm0 \n\t"
+ "pmullw %%mm2, %%mm6 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+
+ "1: \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 1(%1), %%mm1 \n\t"
+ "add %3, %1 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm1 \n\t"
+ "pmullw %%mm4, %%mm0 \n\t"
+ "pmullw %%mm2, %%mm1 \n\t"
+ "paddw %%mm0, %%mm1 \n\t"
+ "movq %%mm1, %%mm0 \n\t"
+ "pmullw %%mm5, %%mm6 \n\t"
+ "pmullw %%mm3, %%mm1 \n\t"
+ "paddw %4, %%mm6 \n\t"
+ "paddw %%mm6, %%mm1 \n\t"
+ "psrlw $6, %%mm1 \n\t"
+ "packuswb %%mm1, %%mm1 \n\t"
+ H264_CHROMA_OP4((%0), %%mm1, %%mm6)
+ "movd %%mm1, (%0) \n\t"
+ "add %3, %0 \n\t"
+ "movd (%1), %%mm6 \n\t"
+ "movd 1(%1), %%mm1 \n\t"
+ "add %3, %1 \n\t"
+ "punpcklbw %%mm7, %%mm6 \n\t"
+ "punpcklbw %%mm7, %%mm1 \n\t"
+ "pmullw %%mm4, %%mm6 \n\t"
+ "pmullw %%mm2, %%mm1 \n\t"
+ "paddw %%mm6, %%mm1 \n\t"
+ "movq %%mm1, %%mm6 \n\t"
+ "pmullw %%mm5, %%mm0 \n\t"
+ "pmullw %%mm3, %%mm1 \n\t"
+ "paddw %4, %%mm0 \n\t"
+ "paddw %%mm0, %%mm1 \n\t"
+ "psrlw $6, %%mm1 \n\t"
+ "packuswb %%mm1, %%mm1 \n\t"
+ H264_CHROMA_OP4((%0), %%mm1, %%mm0)
+ "movd %%mm1, (%0) \n\t"
+ "add %3, %0 \n\t"
+ "sub $2, %2 \n\t"
+ "jnz 1b \n\t"
+ : "+r"(dst), "+r"(src), "+r"(h)
+ : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y)
+ );
}
#ifdef H264_CHROMA_MC2_TMPL
-static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*/, long stride, int h, int x, int y)
{
- int CD=((1<<16)-1)*x*y + 8*y;
- int AB=((8<<16)-8)*x + 64 - CD;
- int i;
-
+ int tmp = ((1<<16)-1)*x + 8;
+ int CD= tmp*y;
+ int AB= (tmp<<3) - CD;
asm volatile(
/* mm5 = {A,B,A,B} */
/* mm6 = {C,D,C,D} */
@@ -277,50 +264,42 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*
"punpckldq %%mm5, %%mm5\n\t"
"punpckldq %%mm6, %%mm6\n\t"
"pxor %%mm7, %%mm7\n\t"
- :: "r"(AB), "r"(CD));
+ /* mm0 = src[0,1,1,2] */
+ "movd %2, %%mm2\n\t"
+ "punpcklbw %%mm7, %%mm2\n\t"
+ "pshufw $0x94, %%mm2, %%mm2\n\t"
+ :: "r"(AB), "r"(CD), "m"(src[0]));
+
asm volatile(
+ "1:\n\t"
+ "add %4, %1\n\t"
+ /* mm1 = A * src[0,1] + B * src[1,2] */
+ "movq %%mm2, %%mm1\n\t"
+ "pmaddwd %%mm5, %%mm1\n\t"
/* mm0 = src[0,1,1,2] */
- "movd %0, %%mm0\n\t"
+ "movd (%1), %%mm0\n\t"
"punpcklbw %%mm7, %%mm0\n\t"
"pshufw $0x94, %%mm0, %%mm0\n\t"
- :: "m"(src[0]));
+ /* mm1 += C * src[0,1] + D * src[1,2] */
+ "movq %%mm0, %%mm2\n\t"
+ "pmaddwd %%mm6, %%mm0\n\t"
+ "paddw %3, %%mm1\n\t"
+ "paddw %%mm0, %%mm1\n\t"
+ /* dst[0,1] = pack((mm1 + 32) >> 6) */
+ "psrlw $6, %%mm1\n\t"
+ "packssdw %%mm7, %%mm1\n\t"
+ "packuswb %%mm7, %%mm1\n\t"
+ H264_CHROMA_OP4((%0), %%mm1, %%mm3)
+ "movd %%mm1, %%esi\n\t"
+ "movw %%si, (%0)\n\t"
+ "add %4, %0\n\t"
+ "sub $1, %2\n\t"
+ "jnz 1b\n\t"
+ : "+r" (dst), "+r"(src), "+r"(h)
+ : "m" (ff_pw_32), "r"(stride)
+ : "%esi");
- for(i=0; i<h; i++) {
- asm volatile(
- /* mm1 = A * src[0,1] + B * src[1,2] */
- "movq %%mm0, %%mm1\n\t"
- "pmaddwd %%mm5, %%mm1\n\t"
- ::);
-
- src += stride;
- asm volatile(
- /* mm0 = src[0,1,1,2] */
- "movd %0, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "pshufw $0x94, %%mm0, %%mm0\n\t"
- :: "m"(src[0]));
-
- asm volatile(
- /* mm1 += C * src[0,1] + D * src[1,2] */
- "movq %%mm0, %%mm2\n\t"
- "pmaddwd %%mm6, %%mm2\n\t"
- "paddw %%mm2, %%mm1\n\t"
- ::);
-
- asm volatile(
- /* dst[0,1] = pack((mm1 + 32) >> 6) */
- "paddw %1, %%mm1\n\t"
- "psrlw $6, %%mm1\n\t"
- "packssdw %%mm7, %%mm1\n\t"
- "packuswb %%mm7, %%mm1\n\t"
- /* writes garbage to the right of dst.
- * ok because partitions are processed from left to right. */
- H264_CHROMA_OP4(%0, %%mm1, %%mm3)
- "movd %%mm1, %0\n\t"
- : "=m" (dst[0]) : "m" (ff_pw_32));
- dst += stride;
- }
}
#endif
diff --git a/contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c b/contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c
index 5675828a4..23a717acd 100644
--- a/contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c
+++ b/contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c
@@ -1730,6 +1730,38 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride
WARPER8_16_SQ(hadamard8_diff_mmx, hadamard8_diff16_mmx)
WARPER8_16_SQ(hadamard8_diff_mmx2, hadamard8_diff16_mmx2)
+
+static int ssd_int8_vs_int16_mmx(int8_t *pix1, int16_t *pix2, int size){
+ int sum;
+ long i=size;
+ asm volatile(
+ "pxor %%mm4, %%mm4 \n"
+ "1: \n"
+ "sub $8, %0 \n"
+ "movq (%2,%0), %%mm2 \n"
+ "movq (%3,%0,2), %%mm0 \n"
+ "movq 8(%3,%0,2), %%mm1 \n"
+ "punpckhbw %%mm2, %%mm3 \n"
+ "punpcklbw %%mm2, %%mm2 \n"
+ "psraw $8, %%mm3 \n"
+ "psraw $8, %%mm2 \n"
+ "psubw %%mm3, %%mm1 \n"
+ "psubw %%mm2, %%mm0 \n"
+ "pmaddwd %%mm1, %%mm1 \n"
+ "pmaddwd %%mm0, %%mm0 \n"
+ "paddd %%mm1, %%mm4 \n"
+ "paddd %%mm0, %%mm4 \n"
+ "jg 1b \n"
+ "movq %%mm4, %%mm3 \n"
+ "psrlq $32, %%mm3 \n"
+ "paddd %%mm3, %%mm4 \n"
+ "movd %%mm4, %1 \n"
+ :"+r"(i), "=r"(sum)
+ :"r"(pix1), "r"(pix2)
+ );
+ return sum;
+}
+
#endif //CONFIG_ENCODERS
#define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d)
@@ -3037,14 +3069,14 @@ static void float_to_int16_sse(int16_t *dst, const float *src, int len){
asm volatile("emms");
}
-#ifdef CONFIG_SNOW_ENCODER
+#ifdef CONFIG_SNOW_DECODER
extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width);
extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width);
extern void ff_snow_vertical_compose97i_sse2(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
extern void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
-extern void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+extern void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
-extern void ff_snow_inner_add_yblock_mmx(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+extern void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
#endif
@@ -3215,6 +3247,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
c->add_8x8basis= add_8x8basis_mmx;
+ c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
+
#endif //CONFIG_ENCODERS
c->h263_v_loop_filter= h263_v_loop_filter_mmx;
@@ -3462,7 +3496,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
}
-#ifdef CONFIG_SNOW_ENCODER
+#ifdef CONFIG_SNOW_DECODER
if(mm_flags & MM_SSE2){
c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
diff --git a/contrib/ffmpeg/libavcodec/i386/fdct_mmx.c b/contrib/ffmpeg/libavcodec/i386/fdct_mmx.c
index 2ffbfecf6..7e2682a4a 100644
--- a/contrib/ffmpeg/libavcodec/i386/fdct_mmx.c
+++ b/contrib/ffmpeg/libavcodec/i386/fdct_mmx.c
@@ -284,7 +284,7 @@ TABLE_SSE2
}};
-static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
+static av_always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
{
movq_m2r(*(in + offset + 1 * 8), mm0);
movq_m2r(*(in + offset + 6 * 8), mm1);
@@ -364,7 +364,7 @@ static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
}
-static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
+static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
{
asm volatile(
#define FDCT_ROW_SSE2_H1(i,t) \
@@ -426,7 +426,7 @@ static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
);
}
-static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
+static av_always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
{
pshufw_m2r(*(in + 4), mm5, 0x1B);
movq_m2r(*(in + 0), mm0);
@@ -469,7 +469,7 @@ static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const i
movq_r2m(mm7, *(out + 4));
}
-static always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
+static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
{
//FIXME reorder (i dont have a old mmx only cpu here to benchmark ...)
movd_m2r(*(in + 6), mm1);
diff --git a/contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c b/contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c
index 40baf199b..56004b674 100644
--- a/contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c
+++ b/contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c
@@ -684,7 +684,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40]
"movq %%mm6, "#OF"(%1) \n\t"
#define QPEL_H264(OPNAME, OP, MMX)\
-static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
int h=4;\
\
asm volatile(\
@@ -724,7 +724,7 @@ static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
: "memory"\
);\
}\
-static void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
int h=4;\
asm volatile(\
"pxor %%mm7, %%mm7 \n\t"\
@@ -768,7 +768,7 @@ static void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src
);\
}while(--h);\
}\
-static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
src -= 2*srcStride;\
asm volatile(\
"pxor %%mm7, %%mm7 \n\t"\
@@ -797,7 +797,7 @@ static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
: "memory"\
);\
}\
-static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
int h=4;\
int w=3;\
src -= 2*srcStride+2;\
@@ -861,7 +861,7 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
);\
}\
\
-static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
int h=8;\
asm volatile(\
"pxor %%mm7, %%mm7 \n\t"\
@@ -918,7 +918,7 @@ static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
);\
}\
\
-static void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
int h=8;\
asm volatile(\
"pxor %%mm7, %%mm7 \n\t"\
@@ -981,7 +981,7 @@ static void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src
}while(--h);\
}\
\
-static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
int w= 2;\
src -= 2*srcStride;\
\
@@ -1036,7 +1036,7 @@ static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint
dst += 4-h*dstStride;\
}\
}\
-static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
+static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
int h = size;\
int w = (size+8)>>2;\
src -= 2*srcStride+2;\
@@ -1141,12 +1141,12 @@ static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int
static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
}\
-static void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
}\
\
-static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static av_noinline void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
src += 8*srcStride;\
@@ -1155,7 +1155,7 @@ static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src,
OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
}\
\
-static void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
src += 8*dstStride;\
@@ -1173,7 +1173,7 @@ static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\
}\
\
-static void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
+static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
{\
asm volatile(\
"movq %5, %%mm6 \n\t"\
@@ -1207,7 +1207,7 @@ static void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, ui
:"S"((long)src8Stride), "D"((long)dstStride), "m"(ff_pw_16)\
:"memory");\
}\
-static void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
+static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
{\
asm volatile(\
"movq %0, %%mm6 \n\t"\
@@ -1311,6 +1311,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *
uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\
uint8_t * const halfHV= (uint8_t*)temp;\
int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE/2;\
+ assert((int)temp & 7 == 0);\
put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\
OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
}\
@@ -1319,6 +1320,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *
uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\
uint8_t * const halfHV= (uint8_t*)temp;\
int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE/2;\
+ assert((int)temp & 7 == 0);\
put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\
OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
}\
@@ -1327,6 +1329,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *
uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\
int16_t * const halfV= ((int16_t*)temp) + SIZE*SIZE/2;\
uint8_t * const halfHV= ((uint8_t*)temp);\
+ assert((int)temp & 7 == 0);\
put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
}\
@@ -1335,6 +1338,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *
uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\
int16_t * const halfV= ((int16_t*)temp) + SIZE*SIZE/2;\
uint8_t * const halfHV= ((uint8_t*)temp);\
+ assert((int)temp & 7 == 0);\
put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
}\
diff --git a/contrib/ffmpeg/libavcodec/i386/idct_mmx.c b/contrib/ffmpeg/libavcodec/i386/idct_mmx.c
index ba595845a..4c548fdce 100644
--- a/contrib/ffmpeg/libavcodec/i386/idct_mmx.c
+++ b/contrib/ffmpeg/libavcodec/i386/idct_mmx.c
@@ -15,7 +15,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
+ * along with mpeg2dec; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c b/contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c
index 43eb329cc..85cfbc9cd 100644
--- a/contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c
+++ b/contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c
@@ -21,7 +21,7 @@
// * along with FFmpeg; if not, write to the Free Software Foundation,
// * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
// *
-// * $Id: idct_mmx_xvid.c,v 1.1.2.1 2006/12/02 01:19:55 dgp85 Exp $
+// * $Id: idct_mmx_xvid.c 6577 2006-10-07 15:30:46Z diego $
// *
// ***************************************************************************/
diff --git a/contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c b/contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c
index 718202632..8f182303d 100644
--- a/contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c
+++ b/contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c
@@ -708,7 +708,7 @@ void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTE
"dec %2 \n\t"\
snow_inner_add_yblock_sse2_end_common2
-static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
+static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){
snow_inner_add_yblock_sse2_header
snow_inner_add_yblock_sse2_start_8("xmm1", "xmm5", "3", "0")
@@ -756,7 +756,7 @@ snow_inner_add_yblock_sse2_accum_8("0", "136")
snow_inner_add_yblock_sse2_end_8
}
-static void inner_add_yblock_bw_16_obmc_32_sse2(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
+static void inner_add_yblock_bw_16_obmc_32_sse2(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){
snow_inner_add_yblock_sse2_header
snow_inner_add_yblock_sse2_start_16("xmm1", "xmm5", "3", "0")
@@ -868,7 +868,7 @@ snow_inner_add_yblock_sse2_end_16
"rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\
"%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
-static void inner_add_yblock_bw_8_obmc_16_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
+static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){
snow_inner_add_yblock_mmx_header
snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0")
@@ -879,7 +879,7 @@ snow_inner_add_yblock_mmx_mix("0", "0")
snow_inner_add_yblock_mmx_end("16")
}
-static void inner_add_yblock_bw_16_obmc_32_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
+static void inner_add_yblock_bw_16_obmc_32_mmx(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){
snow_inner_add_yblock_mmx_header
snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0")
@@ -896,7 +896,7 @@ snow_inner_add_yblock_mmx_mix("32", "8")
snow_inner_add_yblock_mmx_end("32")
}
-void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
if (b_w == 16)
@@ -910,7 +910,7 @@ void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t
ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
}
-void ff_snow_inner_add_yblock_mmx(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
if (b_w == 16)
inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);