diff options
Diffstat (limited to 'src/libffmpeg/libavcodec/dsputil.c')
-rw-r--r-- | src/libffmpeg/libavcodec/dsputil.c | 478 |
1 files changed, 464 insertions, 14 deletions
diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c index b1252251a..926832ff1 100644 --- a/src/libffmpeg/libavcodec/dsputil.c +++ b/src/libffmpeg/libavcodec/dsputil.c @@ -31,8 +31,11 @@ #include "simple_idct.h" #include "faandct.h" -uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; -uint32_t squareTbl[512]; +/* snow.c */ +void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count); + +uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; +uint32_t squareTbl[512] = {0, }; const uint8_t ff_zigzag_direct[64] = { 0, 1, 8, 16, 9, 2, 3, 10, @@ -59,7 +62,7 @@ const uint8_t ff_zigzag248_direct[64] = { }; /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ -uint16_t __align8 inv_zigzag_direct16[64]; +uint16_t __align8 inv_zigzag_direct16[64] = {0, }; const uint8_t ff_alternate_horizontal_scan[64] = { 0, 1, 2, 3, 8, 9, 16, 17, @@ -219,6 +222,23 @@ static void bswap_buf(uint32_t *dst, uint32_t *src, int w){ } } +static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) +{ + int s, i; + uint32_t *sq = squareTbl + 256; + + s = 0; + for (i = 0; i < h; i++) { + s += sq[pix1[0] - pix2[0]]; + s += sq[pix1[1] - pix2[1]]; + s += sq[pix1[2] - pix2[2]]; + s += sq[pix1[3] - pix2[3]]; + pix1 += line_size; + pix2 += line_size; + } + return s; +} + static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { int s, i; @@ -270,6 +290,103 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) return s; } + +static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){ + int s, i, j; + const int dec_count= w==8 ? 3 : 4; + int tmp[16*16]; +#if 0 + int level, ori; + static const int scale[2][2][4][4]={ + { + { + //8x8 dec=3 + {268, 239, 239, 213}, + { 0, 224, 224, 152}, + { 0, 135, 135, 110}, + },{ + //16x16 dec=4 + {344, 310, 310, 280}, + { 0, 320, 320, 228}, + { 0, 175, 175, 136}, + { 0, 129, 129, 102}, + } + },{ + {//FIXME 5/3 + //8x8 dec=3 + {275, 245, 245, 218}, + { 0, 230, 230, 156}, + { 0, 138, 138, 113}, + },{ + //16x16 dec=4 + {352, 317, 317, 286}, + { 0, 328, 328, 233}, + { 0, 180, 180, 140}, + { 0, 132, 132, 105}, + } + } + }; +#endif + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j+=4) { + tmp[16*i+j+0] = (pix1[j+0] - pix2[j+0])<<4; + tmp[16*i+j+1] = (pix1[j+1] - pix2[j+1])<<4; + tmp[16*i+j+2] = (pix1[j+2] - pix2[j+2])<<4; + tmp[16*i+j+3] = (pix1[j+3] - pix2[j+3])<<4; + } + pix1 += line_size; + pix2 += line_size; + } + ff_spatial_dwt(tmp, w, h, 16, type, dec_count); + + s=0; +#if 0 + for(level=0; level<dec_count; level++){ + for(ori= level ? 1 : 0; ori<4; ori++){ + int sx= (ori&1) ? 1<<level: 0; + int stride= 16<<(dec_count-level); + int sy= (ori&2) ? stride>>1 : 0; + int size= 1<<level; + + for(i=0; i<size; i++){ + for(j=0; j<size; j++){ + int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori]; + s += ABS(v); + } + } + } + } +#endif + for (i = 0; i < h; i++) { + for (j = 0; j < w; j+=4) { + s+= ABS(tmp[16*i+j+0]); + s+= ABS(tmp[16*i+j+1]); + s+= ABS(tmp[16*i+j+2]); + s+= ABS(tmp[16*i+j+3]); + } + } + assert(s>=0); + + return s>>2; +} + +static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 8, h, 1); +} + +static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 8, h, 0); +} + +static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 16, h, 1); +} + +static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 16, h, 0); +} + static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) { int i; @@ -332,6 +449,40 @@ static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, } } +static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<4;i++) { + pixels[0] = cm[block[0]]; + pixels[1] = cm[block[1]]; + pixels[2] = cm[block[2]]; + pixels[3] = cm[block[3]]; + + pixels += line_size; + block += 8; + } +} + +static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<2;i++) { + pixels[0] = cm[block[0]]; + pixels[1] = cm[block[1]]; + + pixels += line_size; + block += 8; + } +} + static void put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size) @@ -373,6 +524,38 @@ static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, block += 8; } } + +static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<4;i++) { + pixels[0] = cm[pixels[0] + block[0]]; + pixels[1] = cm[pixels[1] + block[1]]; + pixels[2] = cm[pixels[2] + block[2]]; + pixels[3] = cm[pixels[3] + block[3]]; + pixels += line_size; + block += 8; + } +} + +static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<2;i++) { + pixels[0] = cm[pixels[0] + block[0]]; + pixels[1] = cm[pixels[1] + block[1]]; + pixels += line_size; + block += 8; + } +} #if 0 #define PIXOP2(OPNAME, OP) \ @@ -2031,7 +2214,6 @@ static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ src += 8*srcStride;\ - tmp += 8*tmpStride;\ dst += 8*dstStride;\ OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ @@ -2195,6 +2377,77 @@ H264_MC(avg_, 16) #undef op2_put #endif +#define op_scale1(x) block[x] = clip_uint8( (block[x]*weight + offset) >> log2_denom ) +#define op_scale2(x) dst[x] = clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) +#define H264_WEIGHT(W,H) \ +static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ + int attribute_unused x, y; \ + offset <<= log2_denom; \ + if(log2_denom) offset += 1<<(log2_denom-1); \ + for(y=0; y<H; y++, block += stride){ \ + op_scale1(0); \ + op_scale1(1); \ + if(W==2) continue; \ + op_scale1(2); \ + op_scale1(3); \ + if(W==4) continue; \ + op_scale1(4); \ + op_scale1(5); \ + op_scale1(6); \ + op_scale1(7); \ + if(W==8) continue; \ + op_scale1(8); \ + op_scale1(9); \ + op_scale1(10); \ + op_scale1(11); \ + op_scale1(12); \ + op_scale1(13); \ + op_scale1(14); \ + op_scale1(15); \ + } \ +} \ +static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offsetd, int offsets){ \ + int attribute_unused x, y; \ + int offset = (offsets + offsetd + 1) >> 1; \ + offset = ((offset << 1) + 1) << log2_denom; \ + for(y=0; y<H; y++, dst += stride, src += stride){ \ + op_scale2(0); \ + op_scale2(1); \ + if(W==2) continue; \ + op_scale2(2); \ + op_scale2(3); \ + if(W==4) continue; \ + op_scale2(4); \ + op_scale2(5); \ + op_scale2(6); \ + op_scale2(7); \ + if(W==8) continue; \ + op_scale2(8); \ + op_scale2(9); \ + op_scale2(10); \ + op_scale2(11); \ + op_scale2(12); \ + op_scale2(13); \ + op_scale2(14); \ + op_scale2(15); \ + } \ +} + +H264_WEIGHT(16,16) +H264_WEIGHT(16,8) +H264_WEIGHT(8,16) +H264_WEIGHT(8,8) +H264_WEIGHT(8,4) +H264_WEIGHT(4,8) +H264_WEIGHT(4,4) +H264_WEIGHT(4,2) +H264_WEIGHT(2,4) +H264_WEIGHT(2,2) + +#undef op_scale1 +#undef op_scale2 +#undef H264_WEIGHT + static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ uint8_t *cm = cropTbl + MAX_NEG_CROP; int i; @@ -2360,6 +2613,33 @@ static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ } } +static void h261_loop_filter_c(uint8_t *src, int stride){ + int x,y,xy,yz; + int temp[64]; + + for(x=0; x<8; x++){ + temp[x ] = 4*src[x ]; + temp[x + 7*8] = 4*src[x + 7*stride]; + } + for(y=1; y<7; y++){ + for(x=0; x<8; x++){ + xy = y * stride + x; + yz = y * 8 + x; + temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; + } + } + + for(y=0; y<8; y++){ + src[ y*stride] = (temp[ y*8] + 2)>>2; + src[7+y*stride] = (temp[7+y*8] + 2)>>2; + for(x=1; x<7; x++){ + xy = y * stride + x; + yz = y * 8 + x; + src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; + } + } +} + static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int s, i; @@ -2560,6 +2840,56 @@ static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, return s; } +static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h){ + int score1=0; + int score2=0; + int x,y; + + for(y=0; y<h; y++){ + for(x=0; x<16; x++){ + score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); + } + if(y+1<h){ + for(x=0; x<15; x++){ + score2+= ABS( s1[x ] - s1[x +stride] + - s1[x+1] + s1[x+1+stride]) + -ABS( s2[x ] - s2[x +stride] + - s2[x+1] + s2[x+1+stride]); + } + } + s1+= stride; + s2+= stride; + } + + if(c) return score1 + ABS(score2)*c->avctx->nsse_weight; + else return score1 + ABS(score2)*8; +} + +static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h){ + int score1=0; + int score2=0; + int x,y; + + for(y=0; y<h; y++){ + for(x=0; x<8; x++){ + score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); + } + if(y+1<h){ + for(x=0; x<7; x++){ + score2+= ABS( s1[x ] - s1[x +stride] + - s1[x+1] + s1[x+1+stride]) + -ABS( s2[x ] - s2[x +stride] + - s2[x+1] + s2[x+1+stride]); + } + } + s1+= stride; + s2+= stride; + } + + if(c) return score1 + ABS(score2)*c->avctx->nsse_weight; + else return score1 + ABS(score2)*8; +} + static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ int i; unsigned int sum=0; @@ -2635,6 +2965,9 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ case FF_CMP_DCT: cmp[i]= c->dct_sad[i]; break; + case FF_CMP_DCTMAX: + cmp[i]= c->dct_max[i]; + break; case FF_CMP_PSNR: cmp[i]= c->quant_psnr[i]; break; @@ -2653,6 +2986,15 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ case FF_CMP_ZERO: cmp[i]= zero_cmp; break; + case FF_CMP_NSSE: + cmp[i]= c->nsse[i]; + break; + case FF_CMP_W53: + cmp[i]= c->w53[i]; + break; + case FF_CMP_W97: + cmp[i]= c->w97[i]; + break; default: av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); } @@ -2849,6 +3191,23 @@ static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2 return sum; } +static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ + MpegEncContext * const s= (MpegEncContext *)c; + uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; + DCTELEM * const temp= (DCTELEM*)aligned_temp; + int sum=0, i; + + assert(h==8); + + s->dsp.diff_pixels(temp, src1, src2, stride); + s->dsp.fdct(temp); + + for(i=0; i<64; i++) + sum= FFMAX(sum, ABS(temp[i])); + + return sum; +} + void simple_idct(DCTELEM *block); //FIXME static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ @@ -3078,6 +3437,7 @@ static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int st WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) +WARPER8_16_SQ(dct_max8x8_c, dct_max16_c) WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) WARPER8_16_SQ(rd8x8_c, rd16_c) WARPER8_16_SQ(bit8x8_c, bit16_c) @@ -3095,6 +3455,41 @@ static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) add_pixels_clamped_c(block, dest, line_size); } +static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct4 (block); + put_pixels_clamped4_c(block, dest, line_size); +} +static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct4 (block); + add_pixels_clamped4_c(block, dest, line_size); +} + +static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct2 (block); + put_pixels_clamped2_c(block, dest, line_size); +} +static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct2 (block); + add_pixels_clamped2_c(block, dest, line_size); +} + +static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + dest[0] = cm[(block[0] + 4)>>3]; +} +static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; +} + /* init static data */ void dsputil_static_init(void) { @@ -3133,18 +3528,42 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) } #endif //CONFIG_ENCODERS - if(avctx->idct_algo==FF_IDCT_INT){ - c->idct_put= ff_jref_idct_put; - c->idct_add= ff_jref_idct_add; - c->idct = j_rev_dct; - c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; - }else{ //accurate/default - c->idct_put= simple_idct_put; - c->idct_add= simple_idct_add; - c->idct = simple_idct; + if(avctx->lowres==1){ + if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO){ + c->idct_put= ff_jref_idct4_put; + c->idct_add= ff_jref_idct4_add; + }else{ + c->idct_put= ff_h264_lowres_idct_put_c; + c->idct_add= ff_h264_lowres_idct_add_c; + } + c->idct = j_rev_dct4; + c->idct_permutation_type= FF_NO_IDCT_PERM; + }else if(avctx->lowres==2){ + c->idct_put= ff_jref_idct2_put; + c->idct_add= ff_jref_idct2_add; + c->idct = j_rev_dct2; + c->idct_permutation_type= FF_NO_IDCT_PERM; + }else if(avctx->lowres==3){ + c->idct_put= ff_jref_idct1_put; + c->idct_add= ff_jref_idct1_add; + c->idct = j_rev_dct1; c->idct_permutation_type= FF_NO_IDCT_PERM; + }else{ + if(avctx->idct_algo==FF_IDCT_INT){ + c->idct_put= ff_jref_idct_put; + c->idct_add= ff_jref_idct_add; + c->idct = j_rev_dct; + c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + }else{ //accurate/default + c->idct_put= simple_idct_put; + c->idct_add= simple_idct_add; + c->idct = simple_idct; + c->idct_permutation_type= FF_NO_IDCT_PERM; + } } + c->h264_idct_add= ff_h264_idct_add_c; + /* VP3 DSP support */ c->vp3_dsp_init = vp3_dsp_init_c; c->vp3_idct = vp3_idct_c; @@ -3259,6 +3678,27 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; + c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c; + c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c; + c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c; + c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c; + c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c; + c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c; + c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c; + c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c; + c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c; + c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c; + c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c; + c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c; + c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c; + c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c; + c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c; + c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c; + c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c; + c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c; + c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c; + c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c; + c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; @@ -3275,10 +3715,12 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) SET_CMP_FUNC(hadamard8_diff) c->hadamard8_diff[4]= hadamard8_intra16_c; SET_CMP_FUNC(dct_sad) + SET_CMP_FUNC(dct_max) c->sad[0]= pix_abs16_c; c->sad[1]= pix_abs8_c; c->sse[0]= sse16_c; c->sse[1]= sse8_c; + c->sse[2]= sse4_c; SET_CMP_FUNC(quant_psnr) SET_CMP_FUNC(rd) SET_CMP_FUNC(bit) @@ -3286,7 +3728,13 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->vsad[4]= vsad_intra16_c; c->vsse[0]= vsse16_c; c->vsse[4]= vsse_intra16_c; - + c->nsse[0]= nsse16_c; + c->nsse[1]= nsse8_c; + c->w53[0]= w53_16_c; + c->w53[1]= w53_8_c; + c->w97[0]= w97_16_c; + c->w97[1]= w97_8_c; + c->add_bytes= add_bytes_c; c->diff_bytes= diff_bytes_c; c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; @@ -3295,6 +3743,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->h263_h_loop_filter= h263_h_loop_filter_c; c->h263_v_loop_filter= h263_v_loop_filter_c; + c->h261_loop_filter= h261_loop_filter_c; + c->try_8x8basis= try_8x8basis_c; c->add_8x8basis= add_8x8basis_c; |