From a5adaebc130805962f83deccb29f47a7a2384fc8 Mon Sep 17 00:00:00 2001 From: Miguel Freitas Date: Wed, 26 Mar 2003 14:43:46 +0000 Subject: update ffmpeg. trying to keep local changes (see diff_to_ffmpeg_cvs.txt), let me know if i overlooked something. as usual, preliminary QA: tested non debug builds and several codecs including divx3/4/5, mpeg4, xvid, msmpeg4v3, svq1, wmv7, dv (video/audio), wma i also enabled wmv8 by default since it worked fine with the streams i have. i'm not sure about current state of that so we might enable it only for non-x86 users in case of trouble. CVS patchset: 4488 CVS date: 2003/03/26 14:43:46 --- src/libffmpeg/libavcodec/i386/dsputil_mmx.c | 392 +++++++++++++++++----------- 1 file changed, 238 insertions(+), 154 deletions(-) (limited to 'src/libffmpeg/libavcodec/i386/dsputil_mmx.c') diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c index 857f1d398..d5a2d3734 100644 --- a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c +++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c @@ -20,33 +20,9 @@ */ #include "../dsputil.h" +#include "../simple_idct.h" int mm_flags; /* multimedia extension flags */ -/* FIXME use them in static form */ -int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx); -int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); -int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); -int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); - -int pix_abs16x16_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); -int pix_abs16x16_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); -int pix_abs16x16_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); -int pix_abs16x16_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); - -int pix_abs8x8_mmx(UINT8 *blk1, UINT8 *blk2, int lx); -int pix_abs8x8_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); -int pix_abs8x8_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); -int pix_abs8x8_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); - -int pix_abs8x8_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); -int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); -int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); -int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); - -int sad16x16_mmx(void *s, UINT8 *blk1, UINT8 *blk2, int lx); -int sad8x8_mmx(void *s, UINT8 *blk1, UINT8 *blk2, int lx); -int sad16x16_mmx2(void *s, UINT8 *blk1, UINT8 *blk2, int lx); -int sad8x8_mmx2(void *s, UINT8 *blk1, UINT8 *blk2, int lx); /* pixel operations */ static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL; @@ -195,7 +171,7 @@ static const uint64_t ff_pw_15 __attribute__ ((aligned(8))) = 0x000F000F000F000F /***********************************/ /* standard MMX */ -static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size) +static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) { asm volatile( "movl $-128, %%eax \n\t" @@ -223,7 +199,7 @@ static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size) ); } -static inline void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride) +static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride) { asm volatile( "pxor %%mm7, %%mm7 \n\t" @@ -252,10 +228,10 @@ static inline void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 ); } -void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) +void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) { const DCTELEM *p; - UINT8 *pix; + uint8_t *pix; /* read the pixels */ p = block; @@ -307,10 +283,10 @@ void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) :"memory"); } -void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) +void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) { const DCTELEM *p; - UINT8 *pix; + uint8_t *pix; int i; /* read the pixels */ @@ -348,7 +324,7 @@ void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) } while (--i); } -static void put_pixels8_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) +static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) { __asm __volatile( "lea (%3, %3), %%eax \n\t" @@ -374,7 +350,7 @@ static void put_pixels8_mmx(UINT8 *block, const UINT8 *pixels, int line_size, in ); } -static void put_pixels16_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) +static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) { __asm __volatile( "lea (%3, %3), %%eax \n\t" @@ -425,7 +401,7 @@ static void clear_blocks_mmx(DCTELEM *blocks) ); } -static int pix_sum16_mmx(UINT8 * pix, int line_size){ +static int pix_sum16_mmx(uint8_t * pix, int line_size){ const int h=16; int sum; int index= -line_size*h; @@ -528,7 +504,7 @@ static int pix_norm1_mmx(uint8_t *pix, int line_size) { return tmp; } -static int sse16_mmx(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) { +static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) { int tmp; asm volatile ( "movl $16,%%ecx\n" @@ -607,26 +583,21 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ for(; iput_ ## postfix1 = put_ ## postfix2;\ c->put_no_rnd_ ## postfix1 = put_no_rnd_ ## postfix2;\ c->avg_ ## postfix1 = avg_ ## postfix2; + +/* external functions, from idct_mmx.c */ +void ff_mmx_idct(DCTELEM *block); +void ff_mmxext_idct(DCTELEM *block); + +/* XXX: those functions should be suppressed ASAP when all IDCTs are + converted */ +static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_mmx_idct (block); + put_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_mmx_idct (block); + add_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_mmxext_idct (block); + put_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_mmxext_idct (block); + add_pixels_clamped_mmx(block, dest, line_size); +} -void dsputil_init_mmx(DSPContext* c, unsigned mask) +void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) { mm_flags = mm_support(); + + if (avctx->dsp_mask) { + if (avctx->dsp_mask & FF_MM_FORCE) + mm_flags |= (avctx->dsp_mask & 0xffff); + else + mm_flags &= ~(avctx->dsp_mask & 0xffff); + } + #if 0 fprintf(stderr, "libavcodec: CPU flags:"); if (mm_flags & MM_MMX) @@ -1453,6 +1556,27 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) #endif if (mm_flags & MM_MMX) { + const int dct_algo = avctx->dct_algo; + const int idct_algo= avctx->idct_algo; + + if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX) + c->fdct = ff_fdct_mmx; + + if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ + c->idct_put= ff_simple_idct_put_mmx; + c->idct_add= ff_simple_idct_add_mmx; + c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; + }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ + if(mm_flags & MM_MMXEXT){ + c->idct_put= ff_libmpeg2mmx2_idct_put; + c->idct_add= ff_libmpeg2mmx2_idct_add; + }else{ + c->idct_put= ff_libmpeg2mmx_idct_put; + c->idct_add= ff_libmpeg2mmx_idct_add; + } + c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + } + c->get_pixels = get_pixels_mmx; c->diff_pixels = diff_pixels_mmx; c->put_pixels_clamped = put_pixels_clamped_mmx; @@ -1460,15 +1584,6 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) c->clear_blocks = clear_blocks_mmx; c->pix_sum = pix_sum16_mmx; - c->pix_abs16x16 = pix_abs16x16_mmx; - c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx; - c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; - c->pix_abs8x8 = pix_abs8x8_mmx; - c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx; - c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx; - c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx; - c->put_pixels_tab[0][0] = put_pixels16_mmx; c->put_pixels_tab[0][1] = put_pixels16_x2_mmx; c->put_pixels_tab[0][2] = put_pixels16_y2_mmx; @@ -1515,45 +1630,35 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) c->hadamard8_diff[0]= hadamard8_diff16_mmx; c->hadamard8_diff[1]= hadamard8_diff_mmx; - c->sad[0]= sad16x16_mmx; - c->sad[1]= sad8x8_mmx; - c->pix_norm1 = pix_norm1_mmx; c->sse[0] = sse16_mmx; if (mm_flags & MM_MMXEXT) { - c->pix_abs16x16 = pix_abs16x16_mmx2; - c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; - c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2; - - c->pix_abs8x8 = pix_abs8x8_mmx2; - c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; - c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; - c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2; - - c->sad[0]= sad16x16_mmx2; - c->sad[1]= sad8x8_mmx2; - c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; - c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; + + c->hadamard8_diff[0]= hadamard8_diff16_mmx2; + c->hadamard8_diff[1]= hadamard8_diff_mmx2; + + if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; + } #if 1 SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_mmx2) @@ -1592,23 +1697,26 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) } else if (mm_flags & MM_3DNOW) { c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow; c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow; - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; c->avg_pixels_tab[1][0] = avg_pixels8_3dnow; c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; - c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; + + if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; + } SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow) SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow) @@ -1644,7 +1752,8 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_3dnow) } } - + + dsputil_init_pix_mmx(c, avctx); #if 0 // for speed testing get_pixels = just_return; @@ -1680,28 +1789,3 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) //ff_idct = just_return; #endif } - -/* remove any non bit exact operation (testing purpose). NOTE that - this function should be kept as small as possible because it is - always difficult to test automatically non bit exact cases. */ -void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask) -{ - if (mm_flags & MM_MMX) { - /* MMX2 & 3DNOW */ - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; - c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; - - if (mm_flags & MM_MMXEXT) { - c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx; - c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; - c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx; - c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx; - c->pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; - } - } -} -- cgit v1.2.3