From a5adaebc130805962f83deccb29f47a7a2384fc8 Mon Sep 17 00:00:00 2001 From: Miguel Freitas Date: Wed, 26 Mar 2003 14:43:46 +0000 Subject: update ffmpeg. trying to keep local changes (see diff_to_ffmpeg_cvs.txt), let me know if i overlooked something. as usual, preliminary QA: tested non debug builds and several codecs including divx3/4/5, mpeg4, xvid, msmpeg4v3, svq1, wmv7, dv (video/audio), wma i also enabled wmv8 by default since it worked fine with the streams i have. i'm not sure about current state of that so we might enable it only for non-x86 users in case of trouble. CVS patchset: 4488 CVS date: 2003/03/26 14:43:46 --- src/libffmpeg/libavcodec/dsputil.c | 508 +++++++++++++++++++++---------------- 1 file changed, 286 insertions(+), 222 deletions(-) (limited to 'src/libffmpeg/libavcodec/dsputil.c') diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c index 01bc84a86..5f4190f75 100644 --- a/src/libffmpeg/libavcodec/dsputil.c +++ b/src/libffmpeg/libavcodec/dsputil.c @@ -18,17 +18,22 @@ * * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer */ + +/** + * @file dsputil.c + * DSP utils + */ + #include "avcodec.h" #include "dsputil.h" #include "mpegvideo.h" -#include "xineutils.h" +#include "simple_idct.h" -int ff_bit_exact=0; -UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; -UINT32 squareTbl[512]; +uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; +uint32_t squareTbl[512]; -const UINT8 ff_zigzag_direct[64] = { +const uint8_t ff_zigzag_direct[64] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, @@ -40,9 +45,9 @@ const UINT8 ff_zigzag_direct[64] = { }; /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ -UINT16 __align8 inv_zigzag_direct16[64]; +uint16_t __align8 inv_zigzag_direct16[64]; -const UINT8 ff_alternate_horizontal_scan[64] = { +const uint8_t ff_alternate_horizontal_scan[64] = { 0, 1, 2, 3, 8, 9, 16, 17, 10, 11, 4, 5, 6, 7, 15, 14, 13, 12, 19, 18, 24, 25, 32, 33, @@ -53,7 +58,7 @@ const UINT8 ff_alternate_horizontal_scan[64] = { 52, 53, 54, 55, 60, 61, 62, 63, }; -const UINT8 ff_alternate_vertical_scan[64] = { +const uint8_t ff_alternate_vertical_scan[64] = { 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49, 41, 33, 26, 18, 3, 11, 4, 12, @@ -65,7 +70,7 @@ const UINT8 ff_alternate_vertical_scan[64] = { }; /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ -const UINT32 inverse[256]={ +const uint32_t inverse[256]={ 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, @@ -100,7 +105,19 @@ const UINT32 inverse[256]={ 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, }; -static int pix_sum_c(UINT8 * pix, int line_size) +/* Input permutation for the simple_idct_mmx */ +static const uint8_t simple_mmx_permutation[64]={ + 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, + 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, + 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, + 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, + 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, + 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, + 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, + 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, +}; + +static int pix_sum_c(uint8_t * pix, int line_size) { int s, i, j; @@ -122,10 +139,10 @@ static int pix_sum_c(UINT8 * pix, int line_size) return s; } -static int pix_norm1_c(UINT8 * pix, int line_size) +static int pix_norm1_c(uint8_t * pix, int line_size) { int s, i, j; - UINT32 *sq = squareTbl + 256; + uint32_t *sq = squareTbl + 256; s = 0; for (i = 0; i < 16; i++) { @@ -171,10 +188,10 @@ static int pix_norm1_c(UINT8 * pix, int line_size) } -static int sse8_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) +static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) { int s, i; - UINT32 *sq = squareTbl + 256; + uint32_t *sq = squareTbl + 256; s = 0; for (i = 0; i < 8; i++) { @@ -222,7 +239,7 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size) return s; } -static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) +static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) { int i; @@ -241,8 +258,8 @@ static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_ } } -static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, - const UINT8 *s2, int stride){ +static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, + const uint8_t *s2, int stride){ int i; /* read the pixels */ @@ -262,11 +279,11 @@ static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, } -static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, +static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size) { int i; - UINT8 *cm = cropTbl + MAX_NEG_CROP; + uint8_t *cm = cropTbl + MAX_NEG_CROP; /* read the pixels */ for(i=0;i<8;i++) { @@ -284,11 +301,11 @@ static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, } } -static void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, +static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size) { int i; - UINT8 *cm = cropTbl + MAX_NEG_CROP; + uint8_t *cm = cropTbl + MAX_NEG_CROP; /* read the pixels */ for(i=0;i<8;i++) { @@ -703,7 +720,7 @@ PIXOP2(put, op_put) #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) -static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, int rounder) +static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) { const int A=(16-x16)*(16-y16); const int B=( x16)*(16-y16); @@ -726,7 +743,7 @@ static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, } } -static void gmc_c(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy, +static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) { int y, vx, vy; @@ -784,7 +801,7 @@ static void gmc_c(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy, } } -static inline void copy_block17(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h) +static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) { int i; for(i=0; idsp.diff_pixels(temp, src1, src2, stride); - s->fdct(temp); + s->dsp.fdct(temp); for(i=0; i<64; i++) sum+= ABS(temp[i]); @@ -1823,7 +1851,7 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ MpegEncContext * const s= (MpegEncContext *)c; - const UINT8 *scantable= s->intra_scantable.permutated; + const uint8_t *scantable= s->intra_scantable.permutated; uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; uint64_t __align8 aligned_bak[stride]; DCTELEM * const temp= (DCTELEM*)aligned_temp; @@ -1875,7 +1903,7 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int level= temp[i] + 64; - XINE_ASSERT(level - 64,"?"); + assert(level - 64); if((level&(~127)) == 0){ bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; @@ -1888,7 +1916,7 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int s->dct_unquantize(s, temp, 0, s->qscale); } - s->idct_add(bak, stride, temp); + s->dsp.idct_add(bak, stride, temp); distoration= s->dsp.sse[1](NULL, bak, src1, stride); @@ -1897,7 +1925,7 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ MpegEncContext * const s= (MpegEncContext *)c; - const UINT8 *scantable= s->intra_scantable.permutated; + const uint8_t *scantable= s->intra_scantable.permutated; uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; DCTELEM * const temp= (DCTELEM*)aligned_temp; int i, last, run, bits, level, start_i; @@ -1942,7 +1970,7 @@ static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in level= temp[i] + 64; - XINE_ASSERT(level - 64,"?"); + assert(level - 64); if((level&(~127)) == 0){ bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; @@ -1960,7 +1988,20 @@ WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c) WARPER88_1616(rd8x8_c, rd16x16_c) WARPER88_1616(bit8x8_c, bit16x16_c) -void dsputil_init(DSPContext* c, unsigned mask) +/* XXX: those functions should be suppressed ASAP when all IDCTs are + converted */ +static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct (block); + put_pixels_clamped_c(block, dest, line_size); +} +static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct (block); + add_pixels_clamped_c(block, dest, line_size); +} + +void dsputil_init(DSPContext* c, AVCodecContext *avctx) { static int init_done = 0; int i; @@ -1981,6 +2022,23 @@ void dsputil_init(DSPContext* c, unsigned mask) init_done = 1; } +#ifdef CONFIG_ENCODERS + if(avctx->dct_algo==FF_DCT_FASTINT) + c->fdct = fdct_ifast; + else + c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default +#endif //CONFIG_ENCODERS + + if(avctx->idct_algo==FF_IDCT_INT){ + c->idct_put= ff_jref_idct_put; + c->idct_add= ff_jref_idct_add; + c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + }else{ //accurate/default + c->idct_put= simple_idct_put; + c->idct_add= simple_idct_add; + c->idct_permutation_type= FF_NO_IDCT_PERM; + } + c->get_pixels = get_pixels_c; c->diff_pixels = diff_pixels_c; c->put_pixels_clamped = put_pixels_clamped_c; @@ -2083,37 +2141,43 @@ void dsputil_init(DSPContext* c, unsigned mask) c->diff_bytes= diff_bytes_c; #ifdef HAVE_MMX - dsputil_init_mmx(c, mask); - if (ff_bit_exact) - { - /* FIXME - AVCodec context should have flag for bitexact match */ - /* fprintf(stderr, "\n\n\nff_bit_exact %d\n\n\n\n", ff_bit_exact); */ - dsputil_set_bit_exact_mmx(c, mask); - } + dsputil_init_mmx(c, avctx); #endif #ifdef ARCH_ARMV4L - dsputil_init_armv4l(c, mask); + dsputil_init_armv4l(c, avctx); #endif #ifdef HAVE_MLIB - dsputil_init_mlib(c, mask); + dsputil_init_mlib(c, avctx); #endif #ifdef ARCH_ALPHA - dsputil_init_alpha(c, mask); + dsputil_init_alpha(c, avctx); #endif #ifdef ARCH_POWERPC - dsputil_init_ppc(c, mask); + dsputil_init_ppc(c, avctx); #endif #ifdef HAVE_MMI - dsputil_init_mmi(c, mask); + dsputil_init_mmi(c, avctx); #endif -} -/* remove any non bit exact operation (testing purpose) */ -void avcodec_set_bit_exact(void) -{ - ff_bit_exact=1; -#ifdef HAVE_MMX -// FIXME - better set_bit_exact -// dsputil_set_bit_exact_mmx(); -#endif + switch(c->idct_permutation_type){ + case FF_NO_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= i; + break; + case FF_LIBMPEG2_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); + break; + case FF_SIMPLE_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= simple_mmx_permutation[i]; + break; + case FF_TRANSPOSE_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= ((i&7)<<3) | (i>>3); + break; + default: + fprintf(stderr, "Internal error, IDCT permutation not set\n"); + } } + -- cgit v1.2.3