diff options
Diffstat (limited to 'src/libffmpeg/libavcodec/dsputil.c')
-rw-r--r-- | src/libffmpeg/libavcodec/dsputil.c | 346 |
1 files changed, 142 insertions, 204 deletions
diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c index f98ad388a..a8578b5c7 100644 --- a/src/libffmpeg/libavcodec/dsputil.c +++ b/src/libffmpeg/libavcodec/dsputil.c @@ -20,16 +20,14 @@ */ #include "avcodec.h" #include "dsputil.h" -#include "simple_idct.h" -void (*ff_idct)(DCTELEM *block); -void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block); -void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block); void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); -void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); +void (*ff_gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); +void (*ff_gmc )(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); void (*clear_blocks)(DCTELEM *blocks); int (*pix_sum)(UINT8 * pix, int line_size); int (*pix_norm1)(UINT8 * pix, int line_size); @@ -49,16 +47,11 @@ int ff_bit_exact=0; UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; UINT32 squareTbl[512]; -extern INT16 ff_mpeg1_default_intra_matrix[64]; -extern INT16 ff_mpeg1_default_non_intra_matrix[64]; -extern INT16 ff_mpeg4_default_intra_matrix[64]; -extern INT16 ff_mpeg4_default_non_intra_matrix[64]; - -UINT8 zigzag_direct[64] = { - 0, 1, 8, 16, 9, 2, 3, 10, - 17, 24, 32, 25, 18, 11, 4, 5, +const UINT8 ff_zigzag_direct[64] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, - 27, 20, 13, 6, 7, 14, 21, 28, + 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, @@ -68,100 +61,64 @@ UINT8 zigzag_direct[64] = { /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ UINT16 __align8 inv_zigzag_direct16[64]; -/* not permutated zigzag_direct for MMX quantizer */ -UINT8 zigzag_direct_noperm[64]; - -UINT8 ff_alternate_horizontal_scan[64] = { - 0, 1, 2, 3, 8, 9, 16, 17, +const UINT8 ff_alternate_horizontal_scan[64] = { + 0, 1, 2, 3, 8, 9, 16, 17, 10, 11, 4, 5, 6, 7, 15, 14, - 13, 12, 19, 18, 24, 25, 32, 33, + 13, 12, 19, 18, 24, 25, 32, 33, 26, 27, 20, 21, 22, 23, 28, 29, - 30, 31, 34, 35, 40, 41, 48, 49, + 30, 31, 34, 35, 40, 41, 48, 49, 42, 43, 36, 37, 38, 39, 44, 45, - 46, 47, 50, 51, 56, 57, 58, 59, + 46, 47, 50, 51, 56, 57, 58, 59, 52, 53, 54, 55, 60, 61, 62, 63, }; -UINT8 ff_alternate_vertical_scan[64] = { - 0, 8, 16, 24, 1, 9, 2, 10, +const UINT8 ff_alternate_vertical_scan[64] = { + 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49, - 41, 33, 26, 18, 3, 11, 4, 12, + 41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43, - 51, 59, 20, 28, 5, 13, 6, 14, + 51, 59, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 52, 60, 37, 45, - 53, 61, 22, 30, 7, 15, 23, 31, + 53, 61, 22, 30, 7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63, }; -#ifdef SIMPLE_IDCT - -/* Input permutation for the simple_idct_mmx */ -static UINT8 simple_mmx_permutation[64]={ - 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, - 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, - 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, - 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, - 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, - 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, - 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, - 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, -}; -#endif - /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ UINT32 inverse[256]={ - 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, - 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, - 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, - 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, - 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, - 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, - 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, - 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, - 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, - 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, - 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, - 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, - 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, - 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, - 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, - 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, - 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, - 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, - 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, - 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, - 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, - 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, - 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, - 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, - 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, - 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, - 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, - 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, - 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, - 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, - 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, + 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, + 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, + 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, + 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, + 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, + 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, + 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, + 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, + 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, + 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, + 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, + 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, + 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, + 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, + 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, + 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, + 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, + 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, + 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, + 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, + 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, + 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, + 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, + 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, + 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, + 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, + 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, + 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, + 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, + 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, + 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, }; -/* used to skip zeros at the end */ -UINT8 zigzag_end[64]; - -UINT8 permutation[64]; -/* UINT8 invPermutation[64]; */ - -static void build_zigzag_end(void) -{ - int lastIndex; - int lastIndexAfterPerm=0; - for(lastIndex=0; lastIndex<64; lastIndex++) - { - if(zigzag_direct[lastIndex] > lastIndexAfterPerm) - lastIndexAfterPerm= zigzag_direct[lastIndex]; - zigzag_end[lastIndex]= lastIndexAfterPerm + 1; - } -} - int pix_sum_c(UINT8 * pix, int line_size) { int s, i, j; @@ -253,7 +210,7 @@ void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, { int i; UINT8 *cm = cropTbl + MAX_NEG_CROP; - + /* read the pixels */ for(i=0;i<8;i++) { pixels[0] = cm[block[0]]; @@ -275,7 +232,7 @@ void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, { int i; UINT8 *cm = cropTbl + MAX_NEG_CROP; - + /* read the pixels */ for(i=0;i<8;i++) { pixels[0] = cm[pixels[0] + block[0]]; @@ -458,7 +415,7 @@ void (*OPNAME ## _no_rnd_pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels }; #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) -#else /* 64 bit variant */ +#else // 64 bit variant #define PIXOP2(OPNAME, OP) \ static void OPNAME ## _pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ @@ -867,6 +824,7 @@ PIXOP(uint8_t, put_no_rnd, op_put, line_size) #define avg2(a,b) ((a+b+1)>>1) #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) + static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, int rounder) { const int A=(16-x16)*(16-y16); @@ -874,7 +832,6 @@ static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, const int C=(16-x16)*( y16); const int D=( x16)*( y16); int i; - rounder= 128 - rounder; for(i=0; i<h; i++) { @@ -891,6 +848,64 @@ static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, } } +static void gmc_c(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) +{ + int y, vx, vy; + const int s= 1<<shift; + + width--; + height--; + + for(y=0; y<h; y++){ + int x; + + vx= ox; + vy= oy; + for(x=0; x<8; x++){ //XXX FIXME optimize + int src_x, src_y, frac_x, frac_y, index; + + src_x= vx>>16; + src_y= vy>>16; + frac_x= src_x&(s-1); + frac_y= src_y&(s-1); + src_x>>=shift; + src_y>>=shift; + + if((unsigned)src_x < width){ + if((unsigned)src_y < height){ + index= src_x + src_y*stride; + dst[y*stride + x]= ( ( src[index ]*(s-frac_x) + + src[index +1]* frac_x )*(s-frac_y) + + ( src[index+stride ]*(s-frac_x) + + src[index+stride+1]* frac_x )* frac_y + + r)>>(shift*2); + }else{ + index= src_x + clip(src_y, 0, height)*stride; + dst[y*stride + x]= ( ( src[index ]*(s-frac_x) + + src[index +1]* frac_x )*s + + r)>>(shift*2); + } + }else{ + if((unsigned)src_y < height){ + index= clip(src_x, 0, width) + src_y*stride; + dst[y*stride + x]= ( ( src[index ]*(s-frac_y) + + src[index+stride ]* frac_y )*s + + r)>>(shift*2); + }else{ + index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride; + dst[y*stride + x]= src[index ]; + } + } + + vx+= dxx; + vy+= dyx; + } + ox += dxy; + oy += dyy; + } +} + static inline void copy_block17(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h) { int i; @@ -1332,7 +1347,7 @@ qpel_mc_func OPNAME ## qpel_pixels_tab[2][16]={ \ QPEL_MC(0, put_ , _ , op_put) QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) QPEL_MC(0, avg_ , _ , op_avg) -/* QPEL_MC(1, avg_no_rnd , _ , op_avg) */ +//QPEL_MC(1, avg_no_rnd , _ , op_avg) #undef op_avg #undef op_avg_no_rnd #undef op_put @@ -1538,67 +1553,35 @@ int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) return s; } -/* permute block according so that it corresponds to the MMX idct - order */ -#ifdef SIMPLE_IDCT - /* general permutation, but perhaps slightly slower */ -void block_permute(INT16 *block) -{ - int i; - INT16 temp[64]; - - for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i]; - - for(i=0; i<64; i++) block[i] = temp[i]; -} -#else - -void block_permute(INT16 *block) +void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last) { - int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; int i; - - for(i=0;i<8;i++) { - tmp1 = block[1]; - tmp2 = block[2]; - tmp3 = block[3]; - tmp4 = block[4]; - tmp5 = block[5]; - tmp6 = block[6]; - block[1] = tmp2; - block[2] = tmp4; - block[3] = tmp6; - block[4] = tmp1; - block[5] = tmp3; - block[6] = tmp5; - block += 8; + INT16 temp[64]; + + if(last<=0) return; + if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms + + for(i=0; i<=last; i++){ + const int j= scantable[i]; + temp[j]= block[j]; + block[j]=0; + } + + for(i=0; i<=last; i++){ + const int j= scantable[i]; + const int perm_j= permutation[j]; + block[perm_j]= temp[j]; } } -#endif void clear_blocks_c(DCTELEM *blocks) { memset(blocks, 0, sizeof(DCTELEM)*6*64); } -/* XXX: those functions should be suppressed ASAP when all IDCTs are - converted */ -void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block) -{ - ff_idct (block); - put_pixels_clamped(block, dest, line_size); -} - -void gen_idct_add(UINT8 *dest, int line_size, DCTELEM *block) -{ - ff_idct (block); - add_pixels_clamped(block, dest, line_size); -} - void dsputil_init(void) { - int i, j; - int use_permuted_idct; + int i; for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; for(i=0;i<MAX_NEG_CROP;i++) { @@ -1610,16 +1593,12 @@ void dsputil_init(void) squareTbl[i] = (i - 256) * (i - 256); } -#ifdef SIMPLE_IDCT - ff_idct = NULL; -#else - ff_idct = j_rev_dct; -#endif get_pixels = get_pixels_c; diff_pixels = diff_pixels_c; put_pixels_clamped = put_pixels_clamped_c; add_pixels_clamped = add_pixels_clamped_c; - gmc1= gmc1_c; + ff_gmc1= gmc1_c; + ff_gmc= gmc_c; clear_blocks= clear_blocks_c; pix_sum= pix_sum_c; pix_norm1= pix_norm1_c; @@ -1633,67 +1612,26 @@ void dsputil_init(void) pix_abs8x8_y2 = pix_abs8x8_y2_c; pix_abs8x8_xy2 = pix_abs8x8_xy2_c; - use_permuted_idct = 1; - #ifdef HAVE_MMX dsputil_init_mmx(); #endif #ifdef ARCH_ARMV4L dsputil_init_armv4l(); #endif +#ifdef HAVE_MLIB + dsputil_init_mlib(); +#endif #ifdef ARCH_ALPHA dsputil_init_alpha(); - use_permuted_idct = 0; #endif #ifdef ARCH_POWERPC dsputil_init_ppc(); #endif -#if defined(HAVE_MLIB) && !defined(HAVE_MMX) - dsputil_init_mlib(); - use_permuted_idct = 0; +#ifdef HAVE_MMI + dsputil_init_mmi(); #endif -#ifdef SIMPLE_IDCT - if (ff_idct == NULL) { - ff_idct_put = simple_idct_put; - ff_idct_add = simple_idct_add; - use_permuted_idct=0; - } -#endif - if(ff_idct != NULL) { - ff_idct_put = gen_idct_put; - ff_idct_add = gen_idct_add; - } - - if(use_permuted_idct) -#ifdef SIMPLE_IDCT - for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i]; -#else - for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); -#endif - else - for(i=0; i<64; i++) permutation[i]=i; - - for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1; - for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i]; - - if (use_permuted_idct) { - /* permute for IDCT */ - for(i=0;i<64;i++) { - j = zigzag_direct[i]; - zigzag_direct[i] = block_permute_op(j); - j = ff_alternate_horizontal_scan[i]; - ff_alternate_horizontal_scan[i] = block_permute_op(j); - j = ff_alternate_vertical_scan[i]; - ff_alternate_vertical_scan[i] = block_permute_op(j); - } - block_permute(ff_mpeg1_default_intra_matrix); - block_permute(ff_mpeg1_default_non_intra_matrix); - block_permute(ff_mpeg4_default_intra_matrix); - block_permute(ff_mpeg4_default_non_intra_matrix); - } - - build_zigzag_end(); + for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; } /* remove any non bit exact operation (testing purpose) */ @@ -1712,14 +1650,14 @@ void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3], int quad, diff, x, y; UINT8 *orig, *coded; UINT32 *sq = squareTbl + 256; - + quad = 0; diff = 0; - + /* Luminance */ orig = orig_image[0]; coded = coded_image[0]; - + for (y=0;y<avctx->height;y++) { for (x=0;x<avctx->width;x++) { diff = *(orig + x) - *(coded + x); @@ -1728,12 +1666,12 @@ void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3], orig += orig_linesize[0]; coded += coded_linesize; } - + avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height); - + if (avctx->psnr_y) { avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y; - avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y); + avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y); } else avctx->psnr_y = 99.99; } |