diff options
Diffstat (limited to 'src/libffmpeg/libavcodec/dsputil.c')
-rw-r--r-- | src/libffmpeg/libavcodec/dsputil.c | 380 |
1 files changed, 373 insertions, 7 deletions
diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c index b9a5f1382..f6ce967da 100644 --- a/src/libffmpeg/libavcodec/dsputil.c +++ b/src/libffmpeg/libavcodec/dsputil.c @@ -187,6 +187,23 @@ static int pix_norm1_c(uint8_t * pix, int line_size) return s; } +static void bswap_buf(uint32_t *dst, uint32_t *src, int w){ + int i; + + for(i=0; i+8<=w; i+=8){ + dst[i+0]= bswap_32(src[i+0]); + dst[i+1]= bswap_32(src[i+1]); + dst[i+2]= bswap_32(src[i+2]); + dst[i+3]= bswap_32(src[i+3]); + dst[i+4]= bswap_32(src[i+4]); + dst[i+5]= bswap_32(src[i+5]); + dst[i+6]= bswap_32(src[i+6]); + dst[i+7]= bswap_32(src[i+7]); + } + for(;i<w; i++){ + dst[i+0]= bswap_32(src[i+0]); + } +} static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) { @@ -466,6 +483,14 @@ CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, #else // 64 bit variant #define PIXOP2(OPNAME, OP) \ +static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + int i;\ + for(i=0; i<h; i++){\ + OP(*((uint16_t*)(block )), LD16(pixels ));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ int i;\ for(i=0; i<h; i++){\ @@ -494,10 +519,10 @@ static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src uint32_t a,b;\ a= LD32(&src1[i*src_stride1 ]);\ b= LD32(&src2[i*src_stride2 ]);\ - OP(*((uint32_t*)&dst[i*dst_stride ]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\ + OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ a= LD32(&src1[i*src_stride1+4]);\ b= LD32(&src2[i*src_stride2+4]);\ - OP(*((uint32_t*)&dst[i*dst_stride+4]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\ + OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ }\ }\ \ @@ -508,10 +533,10 @@ static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, cons uint32_t a,b;\ a= LD32(&src1[i*src_stride1 ]);\ b= LD32(&src2[i*src_stride2 ]);\ - OP(*((uint32_t*)&dst[i*dst_stride ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ + OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ a= LD32(&src1[i*src_stride1+4]);\ b= LD32(&src2[i*src_stride2+4]);\ - OP(*((uint32_t*)&dst[i*dst_stride+4]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ + OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ }\ }\ \ @@ -522,7 +547,18 @@ static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, cons uint32_t a,b;\ a= LD32(&src1[i*src_stride1 ]);\ b= LD32(&src2[i*src_stride2 ]);\ - OP(*((uint32_t*)&dst[i*dst_stride ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ + OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ + }\ +}\ +\ +static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ + int src_stride1, int src_stride2, int h){\ + int i;\ + for(i=0; i<h; i++){\ + uint32_t a,b;\ + a= LD16(&src1[i*src_stride1 ]);\ + b= LD16(&src2[i*src_stride2 ]);\ + OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ }\ }\ \ @@ -589,6 +625,23 @@ static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ }\ }\ +\ +static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ +}\ +\ +static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ +}\ +\ +static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ +}\ +\ +static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ +}\ +\ static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ int i;\ @@ -635,6 +688,75 @@ static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *sr OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ }\ \ +static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i, a0, b0, a1, b1;\ + a0= pixels[0];\ + b0= pixels[1] + 2;\ + a0 += b0;\ + b0 += pixels[2];\ +\ + pixels+=line_size;\ + for(i=0; i<h; i+=2){\ + a1= pixels[0];\ + b1= pixels[1];\ + a1 += b1;\ + b1 += pixels[2];\ +\ + block[0]= (a1+a0)>>2; /* FIXME non put */\ + block[1]= (b1+b0)>>2;\ +\ + pixels+=line_size;\ + block +=line_size;\ +\ + a0= pixels[0];\ + b0= pixels[1] + 2;\ + a0 += b0;\ + b0 += pixels[2];\ +\ + block[0]= (a1+a0)>>2;\ + block[1]= (b1+b0)>>2;\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + const uint32_t a= LD32(pixels );\ + const uint32_t b= LD32(pixels+1);\ + uint32_t l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + uint32_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i<h; i+=2){\ + uint32_t a= LD32(pixels );\ + uint32_t b= LD32(pixels+1);\ + l1= (a&0x03030303UL)\ + + (b&0x03030303UL);\ + h1= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + a= LD32(pixels );\ + b= LD32(pixels+1);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ {\ int j;\ @@ -726,7 +848,7 @@ CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ -#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) +#define op_avg(a, b) a = rnd_avg32(a, b) #endif #define op_put(a, b) a = b @@ -819,6 +941,222 @@ static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, oy += dyy; } } + +static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + switch(width){ + case 2: put_pixels2_c (dst, src, stride, height); break; + case 4: put_pixels4_c (dst, src, stride, height); break; + case 8: put_pixels8_c (dst, src, stride, height); break; + case 16:put_pixels16_c(dst, src, stride, height); break; + } +} + +static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; + } + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; + } + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; + } + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; + } + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; + } + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; + } + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; + } + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + switch(width){ + case 2: avg_pixels2_c (dst, src, stride, height); break; + case 4: avg_pixels4_c (dst, src, stride, height); break; + case 8: avg_pixels8_c (dst, src, stride, height); break; + case 16:avg_pixels16_c(dst, src, stride, height); break; + } +} + +static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; + } + src += stride; + dst += stride; + } +} +#if 0 +#define TPEL_WIDTH(width)\ +static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} +#endif + #define H264_CHROMA_MC(OPNAME, OP)\ static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ const int A=(8-x)*(8-y);\ @@ -2561,13 +2899,37 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) dspfunc(put_no_rnd, 0, 16); dspfunc(put, 1, 8); dspfunc(put_no_rnd, 1, 8); + dspfunc(put, 2, 4); + dspfunc(put, 3, 2); dspfunc(avg, 0, 16); dspfunc(avg_no_rnd, 0, 16); dspfunc(avg, 1, 8); dspfunc(avg_no_rnd, 1, 8); + dspfunc(avg, 2, 4); + dspfunc(avg, 3, 2); #undef dspfunc + c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; + c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; + c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; + c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; + c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; + c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; + c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; + c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; + c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; + + c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; + c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; + c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; + c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; + c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; + c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; + c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; + c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; + c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; + #define dspfunc(PFX, IDX, NUM) \ c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ @@ -2621,7 +2983,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; - + c->hadamard8_diff[0]= hadamard8_diff16_c; c->hadamard8_diff[1]= hadamard8_diff_c; c->hadamard8_abs = hadamard8_abs_c; @@ -2643,6 +3005,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->add_bytes= add_bytes_c; c->diff_bytes= diff_bytes_c; + c->bswap_buf= bswap_buf; #ifdef HAVE_MMX dsputil_init_mmx(c, avctx); @@ -2662,6 +3025,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) #ifdef HAVE_MMI dsputil_init_mmi(c, avctx); #endif +#ifdef ARCH_SH4 + dsputil_init_sh4(c,avctx); +#endif switch(c->idct_permutation_type){ case FF_NO_IDCT_PERM: |