diff options
| author | Diego 'Flameeyes' Pettenò <flameeyes@gmail.com> | 2007-04-03 01:18:24 +0200 |
|---|---|---|
| committer | Diego 'Flameeyes' Pettenò <flameeyes@gmail.com> | 2007-04-03 01:18:24 +0200 |
| commit | fb09531720a4aa2dfa97e5a9a246a453b6278fd2 (patch) | |
| tree | 61525c3a8ddb419d3838a26e488fc3659079bbcd /contrib/ffmpeg/libavcodec/i386 | |
| parent | 294d01046724e28b7193bcb65bf2a0391b0135b6 (diff) | |
| download | xine-lib-fb09531720a4aa2dfa97e5a9a246a453b6278fd2.tar.gz xine-lib-fb09531720a4aa2dfa97e5a9a246a453b6278fd2.tar.bz2 | |
Sync with a more recent version of FFmpeg.
Diffstat (limited to 'contrib/ffmpeg/libavcodec/i386')
| -rw-r--r-- | contrib/ffmpeg/libavcodec/i386/cputest.c | 6 | ||||
| -rw-r--r-- | contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c | 215 | ||||
| -rw-r--r-- | contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c | 42 | ||||
| -rw-r--r-- | contrib/ffmpeg/libavcodec/i386/fdct_mmx.c | 8 | ||||
| -rw-r--r-- | contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c | 30 | ||||
| -rw-r--r-- | contrib/ffmpeg/libavcodec/i386/idct_mmx.c | 2 | ||||
| -rw-r--r-- | contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c | 2 | ||||
| -rw-r--r-- | contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c | 12 |
8 files changed, 169 insertions, 148 deletions
diff --git a/contrib/ffmpeg/libavcodec/i386/cputest.c b/contrib/ffmpeg/libavcodec/i386/cputest.c index 262786b71..0705ab3e5 100644 --- a/contrib/ffmpeg/libavcodec/i386/cputest.c +++ b/contrib/ffmpeg/libavcodec/i386/cputest.c @@ -87,6 +87,8 @@ int mm_support(void) rval |= MM_SSE2; if (ecx & 1) rval |= MM_SSE3; + if (ecx & 0x00000200 ) + rval |= MM_SSSE3; } cpuid(0x80000000, max_ext_level, ebx, ecx, edx); @@ -104,11 +106,13 @@ int mm_support(void) } #if 0 - av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n", + av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s%s%s\n", (rval&MM_MMX) ? "MMX ":"", (rval&MM_MMXEXT) ? "MMX2 ":"", (rval&MM_SSE) ? "SSE ":"", (rval&MM_SSE2) ? "SSE2 ":"", + (rval&MM_SSE3) ? "SSE3 ":"", + (rval&MM_SSSE3) ? "SSSE3 ":"", (rval&MM_3DNOW) ? "3DNow ":"", (rval&MM_3DNOWEXT) ? "3DNowExt ":""); #endif diff --git a/contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c b/contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c index e09a1007e..a943a0371 100644 --- a/contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c +++ b/contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c @@ -184,91 +184,78 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) { - DECLARE_ALIGNED_8(uint64_t, AA); - DECLARE_ALIGNED_8(uint64_t, DD); - int i; - - /* no special case for mv=(0,0) in 4x*, since it's much less common than in 8x*. - * could still save a few cycles, but maybe not worth the complexity. */ - - assert(x<8 && y<8 && x>=0 && y>=0); - - asm volatile("movd %2, %%mm4\n\t" - "movd %3, %%mm6\n\t" - "punpcklwd %%mm4, %%mm4\n\t" - "punpcklwd %%mm6, %%mm6\n\t" - "punpckldq %%mm4, %%mm4\n\t" /* mm4 = x words */ - "punpckldq %%mm6, %%mm6\n\t" /* mm6 = y words */ - "movq %%mm4, %%mm5\n\t" - "pmullw %%mm6, %%mm4\n\t" /* mm4 = x * y */ - "psllw $3, %%mm5\n\t" - "psllw $3, %%mm6\n\t" - "movq %%mm5, %%mm7\n\t" - "paddw %%mm6, %%mm7\n\t" - "movq %%mm4, %1\n\t" /* DD = x * y */ - "psubw %%mm4, %%mm5\n\t" /* mm5 = B = 8x - xy */ - "psubw %%mm4, %%mm6\n\t" /* mm6 = C = 8y - xy */ - "paddw %4, %%mm4\n\t" - "psubw %%mm7, %%mm4\n\t" /* mm4 = A = xy - (8x+8y) + 64 */ - "pxor %%mm7, %%mm7\n\t" - "movq %%mm4, %0\n\t" - : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64)); - asm volatile( - /* mm0 = src[0..3], mm1 = src[1..4] */ - "movd %0, %%mm0\n\t" - "movd %1, %%mm1\n\t" - "punpcklbw %%mm7, %%mm0\n\t" - "punpcklbw %%mm7, %%mm1\n\t" - : : "m" (src[0]), "m" (src[1])); - - for(i=0; i<h; i++) { - asm volatile( - /* mm2 = A * src[0..3] + B * src[1..4] */ - "movq %%mm0, %%mm2\n\t" - "pmullw %0, %%mm2\n\t" - "pmullw %%mm5, %%mm1\n\t" - "paddw %%mm1, %%mm2\n\t" - : : "m" (AA)); - - src += stride; - asm volatile( - /* mm0 = src[0..3], mm1 = src[1..4] */ - "movd %0, %%mm0\n\t" - "movd %1, %%mm1\n\t" - "punpcklbw %%mm7, %%mm0\n\t" - "punpcklbw %%mm7, %%mm1\n\t" - : : "m" (src[0]), "m" (src[1])); - - asm volatile( - /* mm2 += C * src[0..3] + D * src[1..4] */ - "movq %%mm0, %%mm3\n\t" - "movq %%mm1, %%mm4\n\t" - "pmullw %%mm6, %%mm3\n\t" - "pmullw %0, %%mm4\n\t" - "paddw %%mm3, %%mm2\n\t" - "paddw %%mm4, %%mm2\n\t" - : : "m" (DD)); - - asm volatile( - /* dst[0..3] = pack((mm2 + 32) >> 6) */ - "paddw %1, %%mm2\n\t" - "psrlw $6, %%mm2\n\t" - "packuswb %%mm7, %%mm2\n\t" - H264_CHROMA_OP4(%0, %%mm2, %%mm3) - "movd %%mm2, %0\n\t" - : "=m" (dst[0]) : "m" (ff_pw_32)); - dst += stride; - } + "pxor %%mm7, %%mm7 \n\t" + "movd %5, %%mm2 \n\t" + "movd %6, %%mm3 \n\t" + "movq "MANGLE(ff_pw_8)", %%mm4\n\t" + "movq "MANGLE(ff_pw_8)", %%mm5\n\t" + "punpcklwd %%mm2, %%mm2 \n\t" + "punpcklwd %%mm3, %%mm3 \n\t" + "punpcklwd %%mm2, %%mm2 \n\t" + "punpcklwd %%mm3, %%mm3 \n\t" + "psubw %%mm2, %%mm4 \n\t" + "psubw %%mm3, %%mm5 \n\t" + + "movd (%1), %%mm0 \n\t" + "movd 1(%1), %%mm6 \n\t" + "add %3, %1 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm6 \n\t" + "pmullw %%mm4, %%mm0 \n\t" + "pmullw %%mm2, %%mm6 \n\t" + "paddw %%mm0, %%mm6 \n\t" + + "1: \n\t" + "movd (%1), %%mm0 \n\t" + "movd 1(%1), %%mm1 \n\t" + "add %3, %1 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "pmullw %%mm4, %%mm0 \n\t" + "pmullw %%mm2, %%mm1 \n\t" + "paddw %%mm0, %%mm1 \n\t" + "movq %%mm1, %%mm0 \n\t" + "pmullw %%mm5, %%mm6 \n\t" + "pmullw %%mm3, %%mm1 \n\t" + "paddw %4, %%mm6 \n\t" + "paddw %%mm6, %%mm1 \n\t" + "psrlw $6, %%mm1 \n\t" + "packuswb %%mm1, %%mm1 \n\t" + H264_CHROMA_OP4((%0), %%mm1, %%mm6) + "movd %%mm1, (%0) \n\t" + "add %3, %0 \n\t" + "movd (%1), %%mm6 \n\t" + "movd 1(%1), %%mm1 \n\t" + "add %3, %1 \n\t" + "punpcklbw %%mm7, %%mm6 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "pmullw %%mm4, %%mm6 \n\t" + "pmullw %%mm2, %%mm1 \n\t" + "paddw %%mm6, %%mm1 \n\t" + "movq %%mm1, %%mm6 \n\t" + "pmullw %%mm5, %%mm0 \n\t" + "pmullw %%mm3, %%mm1 \n\t" + "paddw %4, %%mm0 \n\t" + "paddw %%mm0, %%mm1 \n\t" + "psrlw $6, %%mm1 \n\t" + "packuswb %%mm1, %%mm1 \n\t" + H264_CHROMA_OP4((%0), %%mm1, %%mm0) + "movd %%mm1, (%0) \n\t" + "add %3, %0 \n\t" + "sub $2, %2 \n\t" + "jnz 1b \n\t" + : "+r"(dst), "+r"(src), "+r"(h) + : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y) + ); } #ifdef H264_CHROMA_MC2_TMPL -static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) +static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*/, long stride, int h, int x, int y) { - int CD=((1<<16)-1)*x*y + 8*y; - int AB=((8<<16)-8)*x + 64 - CD; - int i; - + int tmp = ((1<<16)-1)*x + 8; + int CD= tmp*y; + int AB= (tmp<<3) - CD; asm volatile( /* mm5 = {A,B,A,B} */ /* mm6 = {C,D,C,D} */ @@ -277,50 +264,42 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1* "punpckldq %%mm5, %%mm5\n\t" "punpckldq %%mm6, %%mm6\n\t" "pxor %%mm7, %%mm7\n\t" - :: "r"(AB), "r"(CD)); + /* mm0 = src[0,1,1,2] */ + "movd %2, %%mm2\n\t" + "punpcklbw %%mm7, %%mm2\n\t" + "pshufw $0x94, %%mm2, %%mm2\n\t" + :: "r"(AB), "r"(CD), "m"(src[0])); + asm volatile( + "1:\n\t" + "add %4, %1\n\t" + /* mm1 = A * src[0,1] + B * src[1,2] */ + "movq %%mm2, %%mm1\n\t" + "pmaddwd %%mm5, %%mm1\n\t" /* mm0 = src[0,1,1,2] */ - "movd %0, %%mm0\n\t" + "movd (%1), %%mm0\n\t" "punpcklbw %%mm7, %%mm0\n\t" "pshufw $0x94, %%mm0, %%mm0\n\t" - :: "m"(src[0])); + /* mm1 += C * src[0,1] + D * src[1,2] */ + "movq %%mm0, %%mm2\n\t" + "pmaddwd %%mm6, %%mm0\n\t" + "paddw %3, %%mm1\n\t" + "paddw %%mm0, %%mm1\n\t" + /* dst[0,1] = pack((mm1 + 32) >> 6) */ + "psrlw $6, %%mm1\n\t" + "packssdw %%mm7, %%mm1\n\t" + "packuswb %%mm7, %%mm1\n\t" + H264_CHROMA_OP4((%0), %%mm1, %%mm3) + "movd %%mm1, %%esi\n\t" + "movw %%si, (%0)\n\t" + "add %4, %0\n\t" + "sub $1, %2\n\t" + "jnz 1b\n\t" + : "+r" (dst), "+r"(src), "+r"(h) + : "m" (ff_pw_32), "r"(stride) + : "%esi"); - for(i=0; i<h; i++) { - asm volatile( - /* mm1 = A * src[0,1] + B * src[1,2] */ - "movq %%mm0, %%mm1\n\t" - "pmaddwd %%mm5, %%mm1\n\t" - ::); - - src += stride; - asm volatile( - /* mm0 = src[0,1,1,2] */ - "movd %0, %%mm0\n\t" - "punpcklbw %%mm7, %%mm0\n\t" - "pshufw $0x94, %%mm0, %%mm0\n\t" - :: "m"(src[0])); - - asm volatile( - /* mm1 += C * src[0,1] + D * src[1,2] */ - "movq %%mm0, %%mm2\n\t" - "pmaddwd %%mm6, %%mm2\n\t" - "paddw %%mm2, %%mm1\n\t" - ::); - - asm volatile( - /* dst[0,1] = pack((mm1 + 32) >> 6) */ - "paddw %1, %%mm1\n\t" - "psrlw $6, %%mm1\n\t" - "packssdw %%mm7, %%mm1\n\t" - "packuswb %%mm7, %%mm1\n\t" - /* writes garbage to the right of dst. - * ok because partitions are processed from left to right. */ - H264_CHROMA_OP4(%0, %%mm1, %%mm3) - "movd %%mm1, %0\n\t" - : "=m" (dst[0]) : "m" (ff_pw_32)); - dst += stride; - } } #endif diff --git a/contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c b/contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c index 5675828a4..23a717acd 100644 --- a/contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c +++ b/contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c @@ -1730,6 +1730,38 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride WARPER8_16_SQ(hadamard8_diff_mmx, hadamard8_diff16_mmx) WARPER8_16_SQ(hadamard8_diff_mmx2, hadamard8_diff16_mmx2) + +static int ssd_int8_vs_int16_mmx(int8_t *pix1, int16_t *pix2, int size){ + int sum; + long i=size; + asm volatile( + "pxor %%mm4, %%mm4 \n" + "1: \n" + "sub $8, %0 \n" + "movq (%2,%0), %%mm2 \n" + "movq (%3,%0,2), %%mm0 \n" + "movq 8(%3,%0,2), %%mm1 \n" + "punpckhbw %%mm2, %%mm3 \n" + "punpcklbw %%mm2, %%mm2 \n" + "psraw $8, %%mm3 \n" + "psraw $8, %%mm2 \n" + "psubw %%mm3, %%mm1 \n" + "psubw %%mm2, %%mm0 \n" + "pmaddwd %%mm1, %%mm1 \n" + "pmaddwd %%mm0, %%mm0 \n" + "paddd %%mm1, %%mm4 \n" + "paddd %%mm0, %%mm4 \n" + "jg 1b \n" + "movq %%mm4, %%mm3 \n" + "psrlq $32, %%mm3 \n" + "paddd %%mm3, %%mm4 \n" + "movd %%mm4, %1 \n" + :"+r"(i), "=r"(sum) + :"r"(pix1), "r"(pix2) + ); + return sum; +} + #endif //CONFIG_ENCODERS #define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d) @@ -3037,14 +3069,14 @@ static void float_to_int16_sse(int16_t *dst, const float *src, int len){ asm volatile("emms"); } -#ifdef CONFIG_SNOW_ENCODER +#ifdef CONFIG_SNOW_DECODER extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width); extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width); extern void ff_snow_vertical_compose97i_sse2(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); extern void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); -extern void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, +extern void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); -extern void ff_snow_inner_add_yblock_mmx(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, +extern void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); #endif @@ -3215,6 +3247,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } c->add_8x8basis= add_8x8basis_mmx; + c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; + #endif //CONFIG_ENCODERS c->h263_v_loop_filter= h263_v_loop_filter_mmx; @@ -3462,7 +3496,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow; } -#ifdef CONFIG_SNOW_ENCODER +#ifdef CONFIG_SNOW_DECODER if(mm_flags & MM_SSE2){ c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; diff --git a/contrib/ffmpeg/libavcodec/i386/fdct_mmx.c b/contrib/ffmpeg/libavcodec/i386/fdct_mmx.c index 2ffbfecf6..7e2682a4a 100644 --- a/contrib/ffmpeg/libavcodec/i386/fdct_mmx.c +++ b/contrib/ffmpeg/libavcodec/i386/fdct_mmx.c @@ -284,7 +284,7 @@ TABLE_SSE2 }}; -static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset) +static av_always_inline void fdct_col(const int16_t *in, int16_t *out, int offset) { movq_m2r(*(in + offset + 1 * 8), mm0); movq_m2r(*(in + offset + 6 * 8), mm1); @@ -364,7 +364,7 @@ static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset) } -static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) +static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) { asm volatile( #define FDCT_ROW_SSE2_H1(i,t) \ @@ -426,7 +426,7 @@ static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) ); } -static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table) +static av_always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table) { pshufw_m2r(*(in + 4), mm5, 0x1B); movq_m2r(*(in + 0), mm0); @@ -469,7 +469,7 @@ static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const i movq_r2m(mm7, *(out + 4)); } -static always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table) +static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table) { //FIXME reorder (i dont have a old mmx only cpu here to benchmark ...) movd_m2r(*(in + 6), mm1); diff --git a/contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c b/contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c index 40baf199b..56004b674 100644 --- a/contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c +++ b/contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c @@ -684,7 +684,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] "movq %%mm6, "#OF"(%1) \n\t" #define QPEL_H264(OPNAME, OP, MMX)\ -static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ +static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ int h=4;\ \ asm volatile(\ @@ -724,7 +724,7 @@ static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i : "memory"\ );\ }\ -static void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ +static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ int h=4;\ asm volatile(\ "pxor %%mm7, %%mm7 \n\t"\ @@ -768,7 +768,7 @@ static void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src );\ }while(--h);\ }\ -static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ +static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ src -= 2*srcStride;\ asm volatile(\ "pxor %%mm7, %%mm7 \n\t"\ @@ -797,7 +797,7 @@ static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i : "memory"\ );\ }\ -static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ +static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ int h=4;\ int w=3;\ src -= 2*srcStride+2;\ @@ -861,7 +861,7 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, );\ }\ \ -static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ +static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ int h=8;\ asm volatile(\ "pxor %%mm7, %%mm7 \n\t"\ @@ -918,7 +918,7 @@ static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i );\ }\ \ -static void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ +static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ int h=8;\ asm volatile(\ "pxor %%mm7, %%mm7 \n\t"\ @@ -981,7 +981,7 @@ static void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src }while(--h);\ }\ \ -static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ +static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ int w= 2;\ src -= 2*srcStride;\ \ @@ -1036,7 +1036,7 @@ static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint dst += 4-h*dstStride;\ }\ }\ -static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ +static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ int h = size;\ int w = (size+8)>>2;\ src -= 2*srcStride+2;\ @@ -1141,12 +1141,12 @@ static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\ }\ -static void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ +static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\ OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ }\ \ -static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ +static av_noinline void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ src += 8*srcStride;\ @@ -1155,7 +1155,7 @@ static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ }\ \ -static void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ +static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ src += 8*dstStride;\ @@ -1173,7 +1173,7 @@ static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\ }\ \ -static void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ +static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ {\ asm volatile(\ "movq %5, %%mm6 \n\t"\ @@ -1207,7 +1207,7 @@ static void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, ui :"S"((long)src8Stride), "D"((long)dstStride), "m"(ff_pw_16)\ :"memory");\ }\ -static void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ +static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ {\ asm volatile(\ "movq %0, %%mm6 \n\t"\ @@ -1311,6 +1311,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t * uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\ uint8_t * const halfHV= (uint8_t*)temp;\ int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE/2;\ + assert((int)temp & 7 == 0);\ put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\ }\ @@ -1319,6 +1320,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t * uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\ uint8_t * const halfHV= (uint8_t*)temp;\ int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE/2;\ + assert((int)temp & 7 == 0);\ put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\ }\ @@ -1327,6 +1329,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t * uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\ int16_t * const halfV= ((int16_t*)temp) + SIZE*SIZE/2;\ uint8_t * const halfHV= ((uint8_t*)temp);\ + assert((int)temp & 7 == 0);\ put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+2, halfHV, stride, SIZE, SIZE);\ }\ @@ -1335,6 +1338,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t * uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\ int16_t * const halfV= ((int16_t*)temp) + SIZE*SIZE/2;\ uint8_t * const halfHV= ((uint8_t*)temp);\ + assert((int)temp & 7 == 0);\ put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+3, halfHV, stride, SIZE, SIZE);\ }\ diff --git a/contrib/ffmpeg/libavcodec/i386/idct_mmx.c b/contrib/ffmpeg/libavcodec/i386/idct_mmx.c index ba595845a..4c548fdce 100644 --- a/contrib/ffmpeg/libavcodec/i386/idct_mmx.c +++ b/contrib/ffmpeg/libavcodec/i386/idct_mmx.c @@ -15,7 +15,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with mpeg2dec; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c b/contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c index 43eb329cc..85cfbc9cd 100644 --- a/contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c +++ b/contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c @@ -21,7 +21,7 @@ // * along with FFmpeg; if not, write to the Free Software Foundation, // * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // * -// * $Id: idct_mmx_xvid.c,v 1.1.2.1 2006/12/02 01:19:55 dgp85 Exp $ +// * $Id: idct_mmx_xvid.c 6577 2006-10-07 15:30:46Z diego $ // * // ***************************************************************************/ diff --git a/contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c b/contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c index 718202632..8f182303d 100644 --- a/contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c +++ b/contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c @@ -708,7 +708,7 @@ void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTE "dec %2 \n\t"\ snow_inner_add_yblock_sse2_end_common2 -static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, +static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ snow_inner_add_yblock_sse2_header snow_inner_add_yblock_sse2_start_8("xmm1", "xmm5", "3", "0") @@ -756,7 +756,7 @@ snow_inner_add_yblock_sse2_accum_8("0", "136") snow_inner_add_yblock_sse2_end_8 } -static void inner_add_yblock_bw_16_obmc_32_sse2(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, +static void inner_add_yblock_bw_16_obmc_32_sse2(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ snow_inner_add_yblock_sse2_header snow_inner_add_yblock_sse2_start_16("xmm1", "xmm5", "3", "0") @@ -868,7 +868,7 @@ snow_inner_add_yblock_sse2_end_16 "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\ "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d""); -static void inner_add_yblock_bw_8_obmc_16_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, +static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ snow_inner_add_yblock_mmx_header snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0") @@ -879,7 +879,7 @@ snow_inner_add_yblock_mmx_mix("0", "0") snow_inner_add_yblock_mmx_end("16") } -static void inner_add_yblock_bw_16_obmc_32_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, +static void inner_add_yblock_bw_16_obmc_32_mmx(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ snow_inner_add_yblock_mmx_header snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0") @@ -896,7 +896,7 @@ snow_inner_add_yblock_mmx_mix("32", "8") snow_inner_add_yblock_mmx_end("32") } -void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, +void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ if (b_w == 16) @@ -910,7 +910,7 @@ void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); } -void ff_snow_inner_add_yblock_mmx(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, +void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ if (b_w == 16) inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
