diff options
Diffstat (limited to 'contrib/ffmpeg/libavcodec/dsputil.c')
-rw-r--r-- | contrib/ffmpeg/libavcodec/dsputil.c | 542 |
1 files changed, 353 insertions, 189 deletions
diff --git a/contrib/ffmpeg/libavcodec/dsputil.c b/contrib/ffmpeg/libavcodec/dsputil.c index 3f5e845e7..055486d49 100644 --- a/contrib/ffmpeg/libavcodec/dsputil.c +++ b/contrib/ffmpeg/libavcodec/dsputil.c @@ -3,6 +3,8 @@ * Copyright (c) 2000, 2001 Fabrice Bellard. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> * + * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -18,8 +20,6 @@ * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * - * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> */ /** @@ -32,6 +32,8 @@ #include "mpegvideo.h" #include "simple_idct.h" #include "faandct.h" +#include "faanidct.h" +#include "h263.h" #include "snow.h" /* snow.c */ @@ -40,9 +42,19 @@ void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, in /* vorbis.c */ void vorbis_inverse_coupling(float *mag, float *ang, int blocksize); +/* flacenc.c */ +void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc); + +/* pngdec.c */ +void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); + uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; uint32_t ff_squareTbl[512] = {0, }; +// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size +#define pb_7f (~0UL/255 * 0x7f) +#define pb_80 (~0UL/255 * 0x80) + const uint8_t ff_zigzag_direct[64] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, @@ -210,7 +222,7 @@ static int pix_norm1_c(uint8_t * pix, int line_size) return s; } -static void bswap_buf(uint32_t *dst, uint32_t *src, int w){ +static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){ int i; for(i=0; i+8<=w; i+=8){ @@ -592,6 +604,14 @@ static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_siz } } +static int sum_abs_dctelem_c(DCTELEM *block) +{ + int sum=0, i; + for(i=0; i<64; i++) + sum+= FFABS(block[i]); + return sum; +} + #if 0 #define PIXOP2(OPNAME, OP) \ @@ -599,7 +619,7 @@ static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_si {\ int i;\ for(i=0; i<h; i++){\ - OP(*((uint64_t*)block), LD64(pixels));\ + OP(*((uint64_t*)block), AV_RN64(pixels));\ pixels+=line_size;\ block +=line_size;\ }\ @@ -609,8 +629,8 @@ static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, {\ int i;\ for(i=0; i<h; i++){\ - const uint64_t a= LD64(pixels );\ - const uint64_t b= LD64(pixels+1);\ + const uint64_t a= AV_RN64(pixels );\ + const uint64_t b= AV_RN64(pixels+1);\ OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ pixels+=line_size;\ block +=line_size;\ @@ -621,8 +641,8 @@ static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int li {\ int i;\ for(i=0; i<h; i++){\ - const uint64_t a= LD64(pixels );\ - const uint64_t b= LD64(pixels+1);\ + const uint64_t a= AV_RN64(pixels );\ + const uint64_t b= AV_RN64(pixels+1);\ OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ pixels+=line_size;\ block +=line_size;\ @@ -633,8 +653,8 @@ static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, {\ int i;\ for(i=0; i<h; i++){\ - const uint64_t a= LD64(pixels );\ - const uint64_t b= LD64(pixels+line_size);\ + const uint64_t a= AV_RN64(pixels );\ + const uint64_t b= AV_RN64(pixels+line_size);\ OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ pixels+=line_size;\ block +=line_size;\ @@ -645,8 +665,8 @@ static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int li {\ int i;\ for(i=0; i<h; i++){\ - const uint64_t a= LD64(pixels );\ - const uint64_t b= LD64(pixels+line_size);\ + const uint64_t a= AV_RN64(pixels );\ + const uint64_t b= AV_RN64(pixels+line_size);\ OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ pixels+=line_size;\ block +=line_size;\ @@ -656,8 +676,8 @@ static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int li static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ {\ int i;\ - const uint64_t a= LD64(pixels );\ - const uint64_t b= LD64(pixels+1);\ + const uint64_t a= AV_RN64(pixels );\ + const uint64_t b= AV_RN64(pixels+1);\ uint64_t l0= (a&0x0303030303030303ULL)\ + (b&0x0303030303030303ULL)\ + 0x0202020202020202ULL;\ @@ -667,8 +687,8 @@ static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int l \ pixels+=line_size;\ for(i=0; i<h; i+=2){\ - uint64_t a= LD64(pixels );\ - uint64_t b= LD64(pixels+1);\ + uint64_t a= AV_RN64(pixels );\ + uint64_t b= AV_RN64(pixels+1);\ l1= (a&0x0303030303030303ULL)\ + (b&0x0303030303030303ULL);\ h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ @@ -676,8 +696,8 @@ static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int l OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ pixels+=line_size;\ block +=line_size;\ - a= LD64(pixels );\ - b= LD64(pixels+1);\ + a= AV_RN64(pixels );\ + b= AV_RN64(pixels+1);\ l0= (a&0x0303030303030303ULL)\ + (b&0x0303030303030303ULL)\ + 0x0202020202020202ULL;\ @@ -692,8 +712,8 @@ static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int l static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ {\ int i;\ - const uint64_t a= LD64(pixels );\ - const uint64_t b= LD64(pixels+1);\ + const uint64_t a= AV_RN64(pixels );\ + const uint64_t b= AV_RN64(pixels+1);\ uint64_t l0= (a&0x0303030303030303ULL)\ + (b&0x0303030303030303ULL)\ + 0x0101010101010101ULL;\ @@ -703,8 +723,8 @@ static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels \ pixels+=line_size;\ for(i=0; i<h; i+=2){\ - uint64_t a= LD64(pixels );\ - uint64_t b= LD64(pixels+1);\ + uint64_t a= AV_RN64(pixels );\ + uint64_t b= AV_RN64(pixels+1);\ l1= (a&0x0303030303030303ULL)\ + (b&0x0303030303030303ULL);\ h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ @@ -712,8 +732,8 @@ static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ pixels+=line_size;\ block +=line_size;\ - a= LD64(pixels );\ - b= LD64(pixels+1);\ + a= AV_RN64(pixels );\ + b= AV_RN64(pixels+1);\ l0= (a&0x0303030303030303ULL)\ + (b&0x0303030303030303ULL)\ + 0x0101010101010101ULL;\ @@ -740,7 +760,7 @@ CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ int i;\ for(i=0; i<h; i++){\ - OP(*((uint16_t*)(block )), LD16(pixels ));\ + OP(*((uint16_t*)(block )), AV_RN16(pixels ));\ pixels+=line_size;\ block +=line_size;\ }\ @@ -748,7 +768,7 @@ static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ int i;\ for(i=0; i<h; i++){\ - OP(*((uint32_t*)(block )), LD32(pixels ));\ + OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ pixels+=line_size;\ block +=line_size;\ }\ @@ -756,8 +776,8 @@ static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ int i;\ for(i=0; i<h; i++){\ - OP(*((uint32_t*)(block )), LD32(pixels ));\ - OP(*((uint32_t*)(block+4)), LD32(pixels+4));\ + OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ + OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\ pixels+=line_size;\ block +=line_size;\ }\ @@ -771,11 +791,11 @@ static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src int i;\ for(i=0; i<h; i++){\ uint32_t a,b;\ - a= LD32(&src1[i*src_stride1 ]);\ - b= LD32(&src2[i*src_stride2 ]);\ + a= AV_RN32(&src1[i*src_stride1 ]);\ + b= AV_RN32(&src2[i*src_stride2 ]);\ OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ - a= LD32(&src1[i*src_stride1+4]);\ - b= LD32(&src2[i*src_stride2+4]);\ + a= AV_RN32(&src1[i*src_stride1+4]);\ + b= AV_RN32(&src2[i*src_stride2+4]);\ OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ }\ }\ @@ -785,11 +805,11 @@ static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, cons int i;\ for(i=0; i<h; i++){\ uint32_t a,b;\ - a= LD32(&src1[i*src_stride1 ]);\ - b= LD32(&src2[i*src_stride2 ]);\ + a= AV_RN32(&src1[i*src_stride1 ]);\ + b= AV_RN32(&src2[i*src_stride2 ]);\ OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ - a= LD32(&src1[i*src_stride1+4]);\ - b= LD32(&src2[i*src_stride2+4]);\ + a= AV_RN32(&src1[i*src_stride1+4]);\ + b= AV_RN32(&src2[i*src_stride2+4]);\ OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ }\ }\ @@ -799,8 +819,8 @@ static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, cons int i;\ for(i=0; i<h; i++){\ uint32_t a,b;\ - a= LD32(&src1[i*src_stride1 ]);\ - b= LD32(&src2[i*src_stride2 ]);\ + a= AV_RN32(&src1[i*src_stride1 ]);\ + b= AV_RN32(&src2[i*src_stride2 ]);\ OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ }\ }\ @@ -810,8 +830,8 @@ static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, cons int i;\ for(i=0; i<h; i++){\ uint32_t a,b;\ - a= LD16(&src1[i*src_stride1 ]);\ - b= LD16(&src2[i*src_stride2 ]);\ + a= AV_RN16(&src1[i*src_stride1 ]);\ + b= AV_RN16(&src2[i*src_stride2 ]);\ OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ }\ }\ @@ -849,10 +869,10 @@ static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint int i;\ for(i=0; i<h; i++){\ uint32_t a, b, c, d, l0, l1, h0, h1;\ - a= LD32(&src1[i*src_stride1]);\ - b= LD32(&src2[i*src_stride2]);\ - c= LD32(&src3[i*src_stride3]);\ - d= LD32(&src4[i*src_stride4]);\ + a= AV_RN32(&src1[i*src_stride1]);\ + b= AV_RN32(&src2[i*src_stride2]);\ + c= AV_RN32(&src3[i*src_stride3]);\ + d= AV_RN32(&src4[i*src_stride4]);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x02020202UL;\ @@ -863,10 +883,10 @@ static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint h1= ((c&0xFCFCFCFCUL)>>2)\ + ((d&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - a= LD32(&src1[i*src_stride1+4]);\ - b= LD32(&src2[i*src_stride2+4]);\ - c= LD32(&src3[i*src_stride3+4]);\ - d= LD32(&src4[i*src_stride4+4]);\ + a= AV_RN32(&src1[i*src_stride1+4]);\ + b= AV_RN32(&src2[i*src_stride2+4]);\ + c= AV_RN32(&src3[i*src_stride3+4]);\ + d= AV_RN32(&src4[i*src_stride4+4]);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x02020202UL;\ @@ -901,10 +921,10 @@ static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src int i;\ for(i=0; i<h; i++){\ uint32_t a, b, c, d, l0, l1, h0, h1;\ - a= LD32(&src1[i*src_stride1]);\ - b= LD32(&src2[i*src_stride2]);\ - c= LD32(&src3[i*src_stride3]);\ - d= LD32(&src4[i*src_stride4]);\ + a= AV_RN32(&src1[i*src_stride1]);\ + b= AV_RN32(&src2[i*src_stride2]);\ + c= AV_RN32(&src3[i*src_stride3]);\ + d= AV_RN32(&src4[i*src_stride4]);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x01010101UL;\ @@ -915,10 +935,10 @@ static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src h1= ((c&0xFCFCFCFCUL)>>2)\ + ((d&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - a= LD32(&src1[i*src_stride1+4]);\ - b= LD32(&src2[i*src_stride2+4]);\ - c= LD32(&src3[i*src_stride3+4]);\ - d= LD32(&src4[i*src_stride4+4]);\ + a= AV_RN32(&src1[i*src_stride1+4]);\ + b= AV_RN32(&src2[i*src_stride2+4]);\ + c= AV_RN32(&src3[i*src_stride3+4]);\ + d= AV_RN32(&src4[i*src_stride4+4]);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x01010101UL;\ @@ -978,8 +998,8 @@ static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixel static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ {\ int i;\ - const uint32_t a= LD32(pixels );\ - const uint32_t b= LD32(pixels+1);\ + const uint32_t a= AV_RN32(pixels );\ + const uint32_t b= AV_RN32(pixels+1);\ uint32_t l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x02020202UL;\ @@ -989,8 +1009,8 @@ static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixel \ pixels+=line_size;\ for(i=0; i<h; i+=2){\ - uint32_t a= LD32(pixels );\ - uint32_t b= LD32(pixels+1);\ + uint32_t a= AV_RN32(pixels );\ + uint32_t b= AV_RN32(pixels+1);\ l1= (a&0x03030303UL)\ + (b&0x03030303UL);\ h1= ((a&0xFCFCFCFCUL)>>2)\ @@ -998,8 +1018,8 @@ static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixel OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ pixels+=line_size;\ block +=line_size;\ - a= LD32(pixels );\ - b= LD32(pixels+1);\ + a= AV_RN32(pixels );\ + b= AV_RN32(pixels+1);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x02020202UL;\ @@ -1016,8 +1036,8 @@ static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixel int j;\ for(j=0; j<2; j++){\ int i;\ - const uint32_t a= LD32(pixels );\ - const uint32_t b= LD32(pixels+1);\ + const uint32_t a= AV_RN32(pixels );\ + const uint32_t b= AV_RN32(pixels+1);\ uint32_t l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x02020202UL;\ @@ -1027,8 +1047,8 @@ static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixel \ pixels+=line_size;\ for(i=0; i<h; i+=2){\ - uint32_t a= LD32(pixels );\ - uint32_t b= LD32(pixels+1);\ + uint32_t a= AV_RN32(pixels );\ + uint32_t b= AV_RN32(pixels+1);\ l1= (a&0x03030303UL)\ + (b&0x03030303UL);\ h1= ((a&0xFCFCFCFCUL)>>2)\ @@ -1036,8 +1056,8 @@ static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixel OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ pixels+=line_size;\ block +=line_size;\ - a= LD32(pixels );\ - b= LD32(pixels+1);\ + a= AV_RN32(pixels );\ + b= AV_RN32(pixels+1);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x02020202UL;\ @@ -1057,8 +1077,8 @@ static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t int j;\ for(j=0; j<2; j++){\ int i;\ - const uint32_t a= LD32(pixels );\ - const uint32_t b= LD32(pixels+1);\ + const uint32_t a= AV_RN32(pixels );\ + const uint32_t b= AV_RN32(pixels+1);\ uint32_t l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x01010101UL;\ @@ -1068,8 +1088,8 @@ static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t \ pixels+=line_size;\ for(i=0; i<h; i+=2){\ - uint32_t a= LD32(pixels );\ - uint32_t b= LD32(pixels+1);\ + uint32_t a= AV_RN32(pixels );\ + uint32_t b= AV_RN32(pixels+1);\ l1= (a&0x03030303UL)\ + (b&0x03030303UL);\ h1= ((a&0xFCFCFCFCUL)>>2)\ @@ -1077,8 +1097,8 @@ static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ pixels+=line_size;\ block +=line_size;\ - a= LD32(pixels );\ - b= LD32(pixels+1);\ + a= AV_RN32(pixels );\ + b= AV_RN32(pixels+1);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x01010101UL;\ @@ -1428,12 +1448,22 @@ static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*a \ assert(x<8 && y<8 && x>=0 && y>=0);\ \ - for(i=0; i<h; i++)\ - {\ - OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ - OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ - dst+= stride;\ - src+= stride;\ + if(D){\ + for(i=0; i<h; i++){\ + OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ + OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ + dst+= stride;\ + src+= stride;\ + }\ + }else{\ + const int E= B+C;\ + const int step= C ? stride : 1;\ + for(i=0; i<h; i++){\ + OP(dst[0], (A*src[0] + E*src[step+0]));\ + OP(dst[1], (A*src[1] + E*src[step+1]));\ + dst+= stride;\ + src+= stride;\ + }\ }\ }\ \ @@ -1446,14 +1476,26 @@ static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*a \ assert(x<8 && y<8 && x>=0 && y>=0);\ \ - for(i=0; i<h; i++)\ - {\ - OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ - OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ - OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ - OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ - dst+= stride;\ - src+= stride;\ + if(D){\ + for(i=0; i<h; i++){\ + OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ + OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ + OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ + OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ + dst+= stride;\ + src+= stride;\ + }\ + }else{\ + const int E= B+C;\ + const int step= C ? stride : 1;\ + for(i=0; i<h; i++){\ + OP(dst[0], (A*src[0] + E*src[step+0]));\ + OP(dst[1], (A*src[1] + E*src[step+1]));\ + OP(dst[2], (A*src[2] + E*src[step+2]));\ + OP(dst[3], (A*src[3] + E*src[step+3]));\ + dst+= stride;\ + src+= stride;\ + }\ }\ }\ \ @@ -1466,18 +1508,34 @@ static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a \ assert(x<8 && y<8 && x>=0 && y>=0);\ \ - for(i=0; i<h; i++)\ - {\ - OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ - OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ - OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ - OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ - OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ - OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ - OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ - OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ - dst+= stride;\ - src+= stride;\ + if(D){\ + for(i=0; i<h; i++){\ + OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ + OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ + OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ + OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ + OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ + OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ + OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ + OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ + dst+= stride;\ + src+= stride;\ + }\ + }else{\ + const int E= B+C;\ + const int step= C ? stride : 1;\ + for(i=0; i<h; i++){\ + OP(dst[0], (A*src[0] + E*src[step+0]));\ + OP(dst[1], (A*src[1] + E*src[step+1]));\ + OP(dst[2], (A*src[2] + E*src[step+2]));\ + OP(dst[3], (A*src[3] + E*src[step+3]));\ + OP(dst[4], (A*src[4] + E*src[step+4]));\ + OP(dst[5], (A*src[5] + E*src[step+5]));\ + OP(dst[6], (A*src[6] + E*src[step+6]));\ + OP(dst[7], (A*src[7] + E*src[step+7]));\ + dst+= stride;\ + src+= stride;\ + }\ }\ } @@ -2012,7 +2070,7 @@ QPEL_MC(0, avg_ , _ , op_avg) #if 1 #define H264_LOWPASS(OPNAME, OP, OP2) \ -static void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ +static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ const int h=2;\ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ int i;\ @@ -2025,7 +2083,7 @@ static void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstSt }\ }\ \ -static void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ +static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ const int w=2;\ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ int i;\ @@ -2045,7 +2103,7 @@ static void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstSt }\ }\ \ -static void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ +static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ const int h=2;\ const int w=2;\ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ @@ -2549,10 +2607,10 @@ void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { } #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */ -#if defined(CONFIG_H264_ENCODER) +void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx); + /* H264 specific */ -void ff_h264dsp_init(DSPContext* c, AVCodecContext *avctx); -#endif /* CONFIG_H264_ENCODER */ +void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx); static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; @@ -2632,6 +2690,7 @@ static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ } static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ + if(ENABLE_ANY_H263) { int x; const int strength= ff_h263_loop_filter_strength[qscale]; @@ -2664,9 +2723,11 @@ static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ src[x-2*stride] = p0 - d2; src[x+ stride] = p3 + d2; } + } } static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ + if(ENABLE_ANY_H263) { int y; const int strength= ff_h263_loop_filter_strength[qscale]; @@ -2699,6 +2760,7 @@ static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ src[y*stride-2] = p0 - d2; src[y*stride+1] = p3 + d2; } + } } static void h261_loop_filter_c(uint8_t *src, int stride){ @@ -3131,7 +3193,7 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant DCTELEM temp[64]; if(last<=0) return; - //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms + //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations for(i=0; i<=last; i++){ const int j= scantable[i]; @@ -3219,32 +3281,47 @@ static void clear_blocks_c(DCTELEM *blocks) } static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ - int i; - for(i=0; i+7<w; i+=8){ - dst[i+0] += src[i+0]; - dst[i+1] += src[i+1]; - dst[i+2] += src[i+2]; - dst[i+3] += src[i+3]; - dst[i+4] += src[i+4]; - dst[i+5] += src[i+5]; - dst[i+6] += src[i+6]; - dst[i+7] += src[i+7]; + long i; + for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ + long a = *(long*)(src+i); + long b = *(long*)(dst+i); + *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); } for(; i<w; i++) dst[i+0] += src[i+0]; } +static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ + long i; + for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ + long a = *(long*)(src1+i); + long b = *(long*)(src2+i); + *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); + } + for(; i<w; i++) + dst[i] = src1[i]+src2[i]; +} + static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ - int i; - for(i=0; i+7<w; i+=8){ - dst[i+0] = src1[i+0]-src2[i+0]; - dst[i+1] = src1[i+1]-src2[i+1]; - dst[i+2] = src1[i+2]-src2[i+2]; - dst[i+3] = src1[i+3]-src2[i+3]; - dst[i+4] = src1[i+4]-src2[i+4]; - dst[i+5] = src1[i+5]-src2[i+5]; - dst[i+6] = src1[i+6]-src2[i+6]; - dst[i+7] = src1[i+7]-src2[i+7]; + long i; +#ifndef HAVE_FAST_UNALIGNED + if((long)src2 & (sizeof(long)-1)){ + for(i=0; i+7<w; i+=8){ + dst[i+0] = src1[i+0]-src2[i+0]; + dst[i+1] = src1[i+1]-src2[i+1]; + dst[i+2] = src1[i+2]-src2[i+2]; + dst[i+3] = src1[i+3]-src2[i+3]; + dst[i+4] = src1[i+4]-src2[i+4]; + dst[i+5] = src1[i+5]-src2[i+5]; + dst[i+6] = src1[i+6]-src2[i+6]; + dst[i+7] = src1[i+7]-src2[i+7]; + } + }else +#endif + for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ + long a = *(long*)(src1+i); + long b = *(long*)(src2+i); + *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80); } for(; i<w; i++) dst[i+0] = src1[i+0]-src2[i+0]; @@ -3385,19 +3462,14 @@ static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_ static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ MpegEncContext * const s= (MpegEncContext *)c; - DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); + DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); DCTELEM * const temp= (DCTELEM*)aligned_temp; - int sum=0, i; assert(h==8); s->dsp.diff_pixels(temp, src1, src2, stride); s->dsp.fdct(temp); - - for(i=0; i<64; i++) - sum+= FFABS(temp[i]); - - return sum; + return s->dsp.sum_abs_dctelem(temp); } #ifdef CONFIG_GPL @@ -3430,11 +3502,11 @@ static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ MpegEncContext * const s= (MpegEncContext *)c; - int16_t dct[8][8]; + DCTELEM dct[8][8]; int i; int sum=0; - s->dsp.diff_pixels(dct, src1, src2, stride); + s->dsp.diff_pixels(dct[0], src1, src2, stride); #define SRC(x) dct[i][x] #define DST(x,v) dct[i][x]= v @@ -3486,7 +3558,7 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); s->dct_unquantize_inter(s, temp, 0, s->qscale); - simple_idct(temp); //FIXME + ff_simple_idct(temp); //FIXME for(i=0; i<64; i++) sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); @@ -3694,7 +3766,8 @@ static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int st return score; } -static int ssd_int8_vs_int16_c(int8_t *pix1, int16_t *pix2, int size){ +static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, + int size){ int score=0; int i; for(i=0; i<size; i++) @@ -3702,16 +3775,16 @@ static int ssd_int8_vs_int16_c(int8_t *pix1, int16_t *pix2, int size){ return score; } -WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) -WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) -WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) +WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) +WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) +WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) #ifdef CONFIG_GPL -WARPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) +WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) #endif -WARPER8_16_SQ(dct_max8x8_c, dct_max16_c) -WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) -WARPER8_16_SQ(rd8x8_c, rd16_c) -WARPER8_16_SQ(bit8x8_c, bit16_c) +WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c) +WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) +WRAPPER8_16_SQ(rd8x8_c, rd16_c) +WRAPPER8_16_SQ(bit8x8_c, bit16_c) static void vector_fmul_c(float *dst, const float *src, int len){ int i; @@ -3735,7 +3808,7 @@ void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, void ff_float_to_int16_c(int16_t *dst, const float *src, int len){ int i; for(i=0; i<len; i++) { - int_fast32_t tmp = ((int32_t*)src)[i]; + int_fast32_t tmp = ((const int32_t*)src)[i]; if(tmp & 0xf0000){ tmp = (0x43c0ffff - tmp)>>31; // is this faster on some gcc/cpu combinations? @@ -3746,8 +3819,90 @@ void ff_float_to_int16_c(int16_t *dst, const float *src, int len){ } } +#define W0 2048 +#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ +#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ +#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ +#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ +#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ +#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ +#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ + +static void wmv2_idct_row(short * b) +{ + int s1,s2; + int a0,a1,a2,a3,a4,a5,a6,a7; + /*step 1*/ + a1 = W1*b[1]+W7*b[7]; + a7 = W7*b[1]-W1*b[7]; + a5 = W5*b[5]+W3*b[3]; + a3 = W3*b[5]-W5*b[3]; + a2 = W2*b[2]+W6*b[6]; + a6 = W6*b[2]-W2*b[6]; + a0 = W0*b[0]+W0*b[4]; + a4 = W0*b[0]-W0*b[4]; + /*step 2*/ + s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7, + s2 = (181*(a1-a5-a7+a3)+128)>>8; + /*step 3*/ + b[0] = (a0+a2+a1+a5 + (1<<7))>>8; + b[1] = (a4+a6 +s1 + (1<<7))>>8; + b[2] = (a4-a6 +s2 + (1<<7))>>8; + b[3] = (a0-a2+a7+a3 + (1<<7))>>8; + b[4] = (a0-a2-a7-a3 + (1<<7))>>8; + b[5] = (a4-a6 -s2 + (1<<7))>>8; + b[6] = (a4+a6 -s1 + (1<<7))>>8; + b[7] = (a0+a2-a1-a5 + (1<<7))>>8; +} +static void wmv2_idct_col(short * b) +{ + int s1,s2; + int a0,a1,a2,a3,a4,a5,a6,a7; + /*step 1, with extended precision*/ + a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3; + a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3; + a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3; + a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3; + a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3; + a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3; + a0 = (W0*b[8*0]+W0*b[8*4] )>>3; + a4 = (W0*b[8*0]-W0*b[8*4] )>>3; + /*step 2*/ + s1 = (181*(a1-a5+a7-a3)+128)>>8; + s2 = (181*(a1-a5-a7+a3)+128)>>8; + /*step 3*/ + b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14; + b[8*1] = (a4+a6 +s1 + (1<<13))>>14; + b[8*2] = (a4-a6 +s2 + (1<<13))>>14; + b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14; + + b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14; + b[8*5] = (a4-a6 -s2 + (1<<13))>>14; + b[8*6] = (a4+a6 -s1 + (1<<13))>>14; + b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14; +} +void ff_wmv2_idct_c(short * block){ + int i; + + for(i=0;i<64;i+=8){ + wmv2_idct_row(block+i); + } + for(i=0;i<8;i++){ + wmv2_idct_col(block+i); + } +} /* XXX: those functions should be suppressed ASAP when all IDCTs are converted */ +static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_wmv2_idct_c(block); + put_pixels_clamped_c(block, dest, line_size); +} +static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_wmv2_idct_c(block); + add_pixels_clamped_c(block, dest, line_size); +} static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) { j_rev_dct (block); @@ -3794,7 +3949,7 @@ static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; } -static void just_return() { return; } +static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } /* init static data */ void dsputil_static_init(void) @@ -3818,13 +3973,14 @@ int ff_check_alignment(void){ static int did_fail=0; DECLARE_ALIGNED_16(int, aligned); - if((int)&aligned & 15){ + if((long)&aligned & 15){ if(!did_fail){ #if defined(HAVE_MMX) || defined(HAVE_ALTIVEC) av_log(NULL, AV_LOG_ERROR, "Compiler did not align stack variables. Libavcodec has been miscompiled\n" "and may be very slow or crash. This is not a bug in libavcodec,\n" - "but in the compiler. Do not report crashes to FFmpeg developers.\n"); + "but in the compiler. You may try recompiling using gcc >= 4.2.\n" + "Do not report crashes to FFmpeg developers.\n"); #endif did_fail=1; } @@ -3855,7 +4011,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) #endif //CONFIG_ENCODERS if(avctx->lowres==1){ - if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO){ + if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !ENABLE_H264_DECODER){ c->idct_put= ff_jref_idct4_put; c->idct_add= ff_jref_idct4_add; }else{ @@ -3880,23 +4036,36 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->idct_add= ff_jref_idct_add; c->idct = j_rev_dct; c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; - }else if(avctx->idct_algo==FF_IDCT_VP3){ + }else if((ENABLE_VP3_DECODER || ENABLE_VP5_DECODER || ENABLE_VP6_DECODER || ENABLE_THEORA_DECODER ) && + avctx->idct_algo==FF_IDCT_VP3){ c->idct_put= ff_vp3_idct_put_c; c->idct_add= ff_vp3_idct_add_c; c->idct = ff_vp3_idct_c; c->idct_permutation_type= FF_NO_IDCT_PERM; + }else if(avctx->idct_algo==FF_IDCT_WMV2){ + c->idct_put= ff_wmv2_idct_put_c; + c->idct_add= ff_wmv2_idct_add_c; + c->idct = ff_wmv2_idct_c; + c->idct_permutation_type= FF_NO_IDCT_PERM; + }else if(avctx->idct_algo==FF_IDCT_FAAN){ + c->idct_put= ff_faanidct_put; + c->idct_add= ff_faanidct_add; + c->idct = ff_faanidct; + c->idct_permutation_type= FF_NO_IDCT_PERM; }else{ //accurate/default - c->idct_put= simple_idct_put; - c->idct_add= simple_idct_add; - c->idct = simple_idct; + c->idct_put= ff_simple_idct_put; + c->idct_add= ff_simple_idct_add; + c->idct = ff_simple_idct; c->idct_permutation_type= FF_NO_IDCT_PERM; } } - c->h264_idct_add= ff_h264_idct_add_c; - c->h264_idct8_add= ff_h264_idct8_add_c; - c->h264_idct_dc_add= ff_h264_idct_dc_add_c; - c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; + if (ENABLE_H264_DECODER) { + c->h264_idct_add= ff_h264_idct_add_c; + c->h264_idct8_add= ff_h264_idct8_add_c; + c->h264_idct_dc_add= ff_h264_idct_dc_add_c; + c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; + } c->get_pixels = get_pixels_c; c->diff_pixels = diff_pixels_c; @@ -3905,6 +4074,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->add_pixels_clamped = add_pixels_clamped_c; c->add_pixels8 = add_pixels8_c; c->add_pixels4 = add_pixels4_c; + c->sum_abs_dctelem = sum_abs_dctelem_c; c->gmc1 = gmc1_c; c->gmc = ff_gmc_c; c->clear_blocks = clear_blocks_c; @@ -4039,8 +4209,11 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER) ff_vc1dsp_init(c,avctx); #endif +#if defined(CONFIG_WMV2_DECODER) || defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER) + ff_intrax8dsp_init(c,avctx); +#endif #if defined(CONFIG_H264_ENCODER) - ff_h264dsp_init(c,avctx); + ff_h264dspenc_init(c,avctx); #endif c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; @@ -4087,9 +4260,13 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; c->add_bytes= add_bytes_c; + c->add_bytes_l2= add_bytes_l2_c; c->diff_bytes= diff_bytes_c; c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; c->bswap_buf= bswap_buf; +#ifdef CONFIG_PNG_DECODER + c->add_png_paeth_prediction= ff_add_png_paeth_prediction; +#endif c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; @@ -4099,8 +4276,10 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c; c->h264_loop_filter_strength= NULL; - c->h263_h_loop_filter= h263_h_loop_filter_c; - c->h263_v_loop_filter= h263_v_loop_filter_c; + if (ENABLE_ANY_H263) { + c->h263_h_loop_filter= h263_h_loop_filter_c; + c->h263_v_loop_filter= h263_v_loop_filter_c; + } c->h261_loop_filter= h261_loop_filter_c; @@ -4116,6 +4295,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) #ifdef CONFIG_VORBIS_DECODER c->vorbis_inverse_coupling = vorbis_inverse_coupling; #endif +#ifdef CONFIG_FLAC_ENCODER + c->flac_compute_autocorr = ff_flac_compute_autocorr; +#endif c->vector_fmul = vector_fmul_c; c->vector_fmul_reverse = vector_fmul_reverse_c; c->vector_fmul_add_add = ff_vector_fmul_add_add_c; @@ -4131,33 +4313,15 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab)); memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab)); -#ifdef HAVE_MMX - dsputil_init_mmx(c, avctx); -#endif -#ifdef ARCH_ARMV4L - dsputil_init_armv4l(c, avctx); -#endif -#ifdef HAVE_MLIB - dsputil_init_mlib(c, avctx); -#endif -#ifdef ARCH_SPARC - dsputil_init_vis(c,avctx); -#endif -#ifdef ARCH_ALPHA - dsputil_init_alpha(c, avctx); -#endif -#ifdef ARCH_POWERPC - dsputil_init_ppc(c, avctx); -#endif -#ifdef HAVE_MMI - dsputil_init_mmi(c, avctx); -#endif -#ifdef ARCH_SH4 - dsputil_init_sh4(c,avctx); -#endif -#ifdef ARCH_BFIN - dsputil_init_bfin(c,avctx); -#endif + if (ENABLE_MMX) dsputil_init_mmx (c, avctx); + if (ENABLE_ARMV4L) dsputil_init_armv4l(c, avctx); + if (ENABLE_MLIB) dsputil_init_mlib (c, avctx); + if (ENABLE_VIS) dsputil_init_vis (c, avctx); + if (ENABLE_ALPHA) dsputil_init_alpha (c, avctx); + if (ENABLE_POWERPC) dsputil_init_ppc (c, avctx); + if (ENABLE_MMI) dsputil_init_mmi (c, avctx); + if (ENABLE_SH4) dsputil_init_sh4 (c, avctx); + if (ENABLE_BFIN) dsputil_init_bfin (c, avctx); for(i=0; i<64; i++){ if(!c->put_2tap_qpel_pixels_tab[0][i]) |