diff options
Diffstat (limited to 'src/libffmpeg/libavcodec/dsputil.c')
-rw-r--r-- | src/libffmpeg/libavcodec/dsputil.c | 508 |
1 files changed, 286 insertions, 222 deletions
diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c index 01bc84a86..5f4190f75 100644 --- a/src/libffmpeg/libavcodec/dsputil.c +++ b/src/libffmpeg/libavcodec/dsputil.c @@ -18,17 +18,22 @@ * * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> */ + +/** + * @file dsputil.c + * DSP utils + */ + #include "avcodec.h" #include "dsputil.h" #include "mpegvideo.h" -#include "xineutils.h" +#include "simple_idct.h" -int ff_bit_exact=0; -UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; -UINT32 squareTbl[512]; +uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; +uint32_t squareTbl[512]; -const UINT8 ff_zigzag_direct[64] = { +const uint8_t ff_zigzag_direct[64] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, @@ -40,9 +45,9 @@ const UINT8 ff_zigzag_direct[64] = { }; /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ -UINT16 __align8 inv_zigzag_direct16[64]; +uint16_t __align8 inv_zigzag_direct16[64]; -const UINT8 ff_alternate_horizontal_scan[64] = { +const uint8_t ff_alternate_horizontal_scan[64] = { 0, 1, 2, 3, 8, 9, 16, 17, 10, 11, 4, 5, 6, 7, 15, 14, 13, 12, 19, 18, 24, 25, 32, 33, @@ -53,7 +58,7 @@ const UINT8 ff_alternate_horizontal_scan[64] = { 52, 53, 54, 55, 60, 61, 62, 63, }; -const UINT8 ff_alternate_vertical_scan[64] = { +const uint8_t ff_alternate_vertical_scan[64] = { 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49, 41, 33, 26, 18, 3, 11, 4, 12, @@ -65,7 +70,7 @@ const UINT8 ff_alternate_vertical_scan[64] = { }; /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ -const UINT32 inverse[256]={ +const uint32_t inverse[256]={ 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, @@ -100,7 +105,19 @@ const UINT32 inverse[256]={ 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, }; -static int pix_sum_c(UINT8 * pix, int line_size) +/* Input permutation for the simple_idct_mmx */ +static const uint8_t simple_mmx_permutation[64]={ + 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, + 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, + 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, + 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, + 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, + 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, + 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, + 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, +}; + +static int pix_sum_c(uint8_t * pix, int line_size) { int s, i, j; @@ -122,10 +139,10 @@ static int pix_sum_c(UINT8 * pix, int line_size) return s; } -static int pix_norm1_c(UINT8 * pix, int line_size) +static int pix_norm1_c(uint8_t * pix, int line_size) { int s, i, j; - UINT32 *sq = squareTbl + 256; + uint32_t *sq = squareTbl + 256; s = 0; for (i = 0; i < 16; i++) { @@ -171,10 +188,10 @@ static int pix_norm1_c(UINT8 * pix, int line_size) } -static int sse8_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) +static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) { int s, i; - UINT32 *sq = squareTbl + 256; + uint32_t *sq = squareTbl + 256; s = 0; for (i = 0; i < 8; i++) { @@ -222,7 +239,7 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size) return s; } -static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) +static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) { int i; @@ -241,8 +258,8 @@ static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_ } } -static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, - const UINT8 *s2, int stride){ +static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, + const uint8_t *s2, int stride){ int i; /* read the pixels */ @@ -262,11 +279,11 @@ static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, } -static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, +static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size) { int i; - UINT8 *cm = cropTbl + MAX_NEG_CROP; + uint8_t *cm = cropTbl + MAX_NEG_CROP; /* read the pixels */ for(i=0;i<8;i++) { @@ -284,11 +301,11 @@ static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, } } -static void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, +static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size) { int i; - UINT8 *cm = cropTbl + MAX_NEG_CROP; + uint8_t *cm = cropTbl + MAX_NEG_CROP; /* read the pixels */ for(i=0;i<8;i++) { @@ -703,7 +720,7 @@ PIXOP2(put, op_put) #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) -static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, int rounder) +static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) { const int A=(16-x16)*(16-y16); const int B=( x16)*(16-y16); @@ -726,7 +743,7 @@ static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, } } -static void gmc_c(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy, +static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) { int y, vx, vy; @@ -784,7 +801,7 @@ static void gmc_c(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy, } } -static inline void copy_block17(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h) +static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) { int i; for(i=0; i<h; i++) @@ -799,7 +816,7 @@ static inline void copy_block17(UINT8 *dst, UINT8 *src, int dstStride, int srcSt } } -static inline void copy_block9(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h) +static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) { int i; for(i=0; i<h; i++) @@ -814,8 +831,8 @@ static inline void copy_block9(UINT8 *dst, UINT8 *src, int dstStride, int srcStr #define QPEL_MC(r, OPNAME, RND, OP) \ -static void OPNAME ## mpeg4_qpel8_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){\ - UINT8 *cm = cropTbl + MAX_NEG_CROP;\ +static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ + uint8_t *cm = cropTbl + MAX_NEG_CROP;\ int i;\ for(i=0; i<h; i++)\ {\ @@ -832,9 +849,9 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass(UINT8 *dst, UINT8 *src, int dstStrid }\ }\ \ -static void OPNAME ## mpeg4_qpel8_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride){\ +static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ const int w=8;\ - UINT8 *cm = cropTbl + MAX_NEG_CROP;\ + uint8_t *cm = cropTbl + MAX_NEG_CROP;\ int i;\ for(i=0; i<w; i++)\ {\ @@ -860,8 +877,8 @@ static void OPNAME ## mpeg4_qpel8_v_lowpass(UINT8 *dst, UINT8 *src, int dstStrid }\ }\ \ -static void OPNAME ## mpeg4_qpel16_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){\ - UINT8 *cm = cropTbl + MAX_NEG_CROP;\ +static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ + uint8_t *cm = cropTbl + MAX_NEG_CROP;\ int i;\ \ for(i=0; i<h; i++)\ @@ -887,8 +904,8 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass(UINT8 *dst, UINT8 *src, int dstStri }\ }\ \ -static void OPNAME ## mpeg4_qpel16_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride){\ - UINT8 *cm = cropTbl + MAX_NEG_CROP;\ +static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + uint8_t *cm = cropTbl + MAX_NEG_CROP;\ int i;\ const int w=16;\ for(i=0; i<w; i++)\ @@ -931,367 +948,367 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass(UINT8 *dst, UINT8 *src, int dstStri }\ }\ \ -static void OPNAME ## qpel8_mc00_c (UINT8 *dst, UINT8 *src, int stride){\ +static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ OPNAME ## pixels8_c(dst, src, stride, 8);\ }\ \ -static void OPNAME ## qpel8_mc10_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 half[64];\ +static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t half[64];\ put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\ }\ \ -static void OPNAME ## qpel8_mc20_c(UINT8 *dst, UINT8 *src, int stride){\ +static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ }\ \ -static void OPNAME ## qpel8_mc30_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 half[64];\ +static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t half[64];\ put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\ }\ \ -static void OPNAME ## qpel8_mc01_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[16*9];\ - UINT8 half[64];\ +static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[16*9];\ + uint8_t half[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\ }\ \ -static void OPNAME ## qpel8_mc02_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[16*9];\ +static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[16*9];\ copy_block9(full, src, 16, stride, 9);\ OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ }\ \ -static void OPNAME ## qpel8_mc03_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[16*9];\ - UINT8 half[64];\ +static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[16*9];\ + uint8_t half[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\ }\ -void ff_ ## OPNAME ## qpel8_mc11_old_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[16*9];\ - UINT8 halfH[72];\ - UINT8 halfV[64];\ - UINT8 halfHV[64];\ +void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[16*9];\ + uint8_t halfH[72];\ + uint8_t halfV[64];\ + uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ }\ -static void OPNAME ## qpel8_mc11_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[16*9];\ - UINT8 halfH[72];\ - UINT8 halfHV[64];\ +static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[16*9];\ + uint8_t halfH[72];\ + uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ }\ -void ff_ ## OPNAME ## qpel8_mc31_old_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[16*9];\ - UINT8 halfH[72];\ - UINT8 halfV[64];\ - UINT8 halfHV[64];\ +void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[16*9];\ + uint8_t halfH[72];\ + uint8_t halfV[64];\ + uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ }\ -static void OPNAME ## qpel8_mc31_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[16*9];\ - UINT8 halfH[72];\ - UINT8 halfHV[64];\ +static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[16*9];\ + uint8_t halfH[72];\ + uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ }\ -void ff_ ## OPNAME ## qpel8_mc13_old_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[16*9];\ - UINT8 halfH[72];\ - UINT8 halfV[64];\ - UINT8 halfHV[64];\ +void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[16*9];\ + uint8_t halfH[72];\ + uint8_t halfV[64];\ + uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ }\ -static void OPNAME ## qpel8_mc13_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[16*9];\ - UINT8 halfH[72];\ - UINT8 halfHV[64];\ +static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[16*9];\ + uint8_t halfH[72];\ + uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ }\ -void ff_ ## OPNAME ## qpel8_mc33_old_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[16*9];\ - UINT8 halfH[72];\ - UINT8 halfV[64];\ - UINT8 halfHV[64];\ +void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[16*9];\ + uint8_t halfH[72];\ + uint8_t halfV[64];\ + uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ }\ -static void OPNAME ## qpel8_mc33_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[16*9];\ - UINT8 halfH[72];\ - UINT8 halfHV[64];\ +static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[16*9];\ + uint8_t halfH[72];\ + uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ }\ -static void OPNAME ## qpel8_mc21_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 halfH[72];\ - UINT8 halfHV[64];\ +static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t halfH[72];\ + uint8_t halfHV[64];\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ }\ -static void OPNAME ## qpel8_mc23_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 halfH[72];\ - UINT8 halfHV[64];\ +static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t halfH[72];\ + uint8_t halfHV[64];\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ }\ -void ff_ ## OPNAME ## qpel8_mc12_old_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[16*9];\ - UINT8 halfH[72];\ - UINT8 halfV[64];\ - UINT8 halfHV[64];\ +void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[16*9];\ + uint8_t halfH[72];\ + uint8_t halfV[64];\ + uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ }\ -static void OPNAME ## qpel8_mc12_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[16*9];\ - UINT8 halfH[72];\ +static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[16*9];\ + uint8_t halfH[72];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ }\ -void ff_ ## OPNAME ## qpel8_mc32_old_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[16*9];\ - UINT8 halfH[72];\ - UINT8 halfV[64];\ - UINT8 halfHV[64];\ +void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[16*9];\ + uint8_t halfH[72];\ + uint8_t halfV[64];\ + uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ }\ -static void OPNAME ## qpel8_mc32_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[16*9];\ - UINT8 halfH[72];\ +static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[16*9];\ + uint8_t halfH[72];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ }\ -static void OPNAME ## qpel8_mc22_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 halfH[72];\ +static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t halfH[72];\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ }\ -static void OPNAME ## qpel16_mc00_c (UINT8 *dst, UINT8 *src, int stride){\ +static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ OPNAME ## pixels16_c(dst, src, stride, 16);\ }\ \ -static void OPNAME ## qpel16_mc10_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 half[256];\ +static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t half[256];\ put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\ }\ \ -static void OPNAME ## qpel16_mc20_c(UINT8 *dst, UINT8 *src, int stride){\ +static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ }\ \ -static void OPNAME ## qpel16_mc30_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 half[256];\ +static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t half[256];\ put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\ }\ \ -static void OPNAME ## qpel16_mc01_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[24*17];\ - UINT8 half[256];\ +static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[24*17];\ + uint8_t half[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\ }\ \ -static void OPNAME ## qpel16_mc02_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[24*17];\ +static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[24*17];\ copy_block17(full, src, 24, stride, 17);\ OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ }\ \ -static void OPNAME ## qpel16_mc03_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[24*17];\ - UINT8 half[256];\ +static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[24*17];\ + uint8_t half[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\ }\ -void ff_ ## OPNAME ## qpel16_mc11_old_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[24*17];\ - UINT8 halfH[272];\ - UINT8 halfV[256];\ - UINT8 halfHV[256];\ +void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[24*17];\ + uint8_t halfH[272];\ + uint8_t halfV[256];\ + uint8_t halfHV[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ }\ -static void OPNAME ## qpel16_mc11_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[24*17];\ - UINT8 halfH[272];\ - UINT8 halfHV[256];\ +static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[24*17];\ + uint8_t halfH[272];\ + uint8_t halfHV[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ }\ -void ff_ ## OPNAME ## qpel16_mc31_old_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[24*17];\ - UINT8 halfH[272];\ - UINT8 halfV[256];\ - UINT8 halfHV[256];\ +void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[24*17];\ + uint8_t halfH[272];\ + uint8_t halfV[256];\ + uint8_t halfHV[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ }\ -static void OPNAME ## qpel16_mc31_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[24*17];\ - UINT8 halfH[272];\ - UINT8 halfHV[256];\ +static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[24*17];\ + uint8_t halfH[272];\ + uint8_t halfHV[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ }\ -void ff_ ## OPNAME ## qpel16_mc13_old_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[24*17];\ - UINT8 halfH[272];\ - UINT8 halfV[256];\ - UINT8 halfHV[256];\ +void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[24*17];\ + uint8_t halfH[272];\ + uint8_t halfV[256];\ + uint8_t halfHV[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ }\ -static void OPNAME ## qpel16_mc13_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[24*17];\ - UINT8 halfH[272];\ - UINT8 halfHV[256];\ +static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[24*17];\ + uint8_t halfH[272];\ + uint8_t halfHV[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ }\ -void ff_ ## OPNAME ## qpel16_mc33_old_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[24*17];\ - UINT8 halfH[272];\ - UINT8 halfV[256];\ - UINT8 halfHV[256];\ +void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[24*17];\ + uint8_t halfH[272];\ + uint8_t halfV[256];\ + uint8_t halfHV[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ }\ -static void OPNAME ## qpel16_mc33_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[24*17];\ - UINT8 halfH[272];\ - UINT8 halfHV[256];\ +static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[24*17];\ + uint8_t halfH[272];\ + uint8_t halfHV[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ }\ -static void OPNAME ## qpel16_mc21_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 halfH[272];\ - UINT8 halfHV[256];\ +static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t halfH[272];\ + uint8_t halfHV[256];\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ }\ -static void OPNAME ## qpel16_mc23_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 halfH[272];\ - UINT8 halfHV[256];\ +static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t halfH[272];\ + uint8_t halfHV[256];\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ }\ -void ff_ ## OPNAME ## qpel16_mc12_old_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[24*17];\ - UINT8 halfH[272];\ - UINT8 halfV[256];\ - UINT8 halfHV[256];\ +void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[24*17];\ + uint8_t halfH[272];\ + uint8_t halfV[256];\ + uint8_t halfHV[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ }\ -static void OPNAME ## qpel16_mc12_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[24*17];\ - UINT8 halfH[272];\ +static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[24*17];\ + uint8_t halfH[272];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ }\ -void ff_ ## OPNAME ## qpel16_mc32_old_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[24*17];\ - UINT8 halfH[272];\ - UINT8 halfV[256];\ - UINT8 halfHV[256];\ +void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[24*17];\ + uint8_t halfH[272];\ + uint8_t halfV[256];\ + uint8_t halfHV[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ }\ -static void OPNAME ## qpel16_mc32_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 full[24*17];\ - UINT8 halfH[272];\ +static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[24*17];\ + uint8_t halfH[272];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ }\ -static void OPNAME ## qpel16_mc22_c(UINT8 *dst, UINT8 *src, int stride){\ - UINT8 halfH[272];\ +static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t halfH[272];\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ } @@ -1406,7 +1423,7 @@ static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ } -static inline int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) +static inline int pix_abs16x16_c(uint8_t *pix1, uint8_t *pix2, int line_size) { int s, i; @@ -1434,7 +1451,7 @@ static inline int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) return s; } -static int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) +static int pix_abs16x16_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size) { int s, i; @@ -1462,10 +1479,10 @@ static int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) return s; } -static int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) +static int pix_abs16x16_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size) { int s, i; - UINT8 *pix3 = pix2 + line_size; + uint8_t *pix3 = pix2 + line_size; s = 0; for(i=0;i<16;i++) { @@ -1492,10 +1509,10 @@ static int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) return s; } -static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) +static int pix_abs16x16_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size) { int s, i; - UINT8 *pix3 = pix2 + line_size; + uint8_t *pix3 = pix2 + line_size; s = 0; for(i=0;i<16;i++) { @@ -1522,7 +1539,7 @@ static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) return s; } -static inline int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) +static inline int pix_abs8x8_c(uint8_t *pix1, uint8_t *pix2, int line_size) { int s, i; @@ -1542,7 +1559,7 @@ static inline int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) return s; } -static int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) +static int pix_abs8x8_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size) { int s, i; @@ -1562,10 +1579,10 @@ static int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) return s; } -static int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) +static int pix_abs8x8_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size) { int s, i; - UINT8 *pix3 = pix2 + line_size; + uint8_t *pix3 = pix2 + line_size; s = 0; for(i=0;i<8;i++) { @@ -1584,10 +1601,10 @@ static int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) return s; } -static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) +static int pix_abs8x8_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size) { int s, i; - UINT8 *pix3 = pix2 + line_size; + uint8_t *pix3 = pix2 + line_size; s = 0; for(i=0;i<8;i++) { @@ -1614,7 +1631,15 @@ static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){ return pix_abs8x8_c(a,b,stride); } -void ff_block_permute(DCTELEM *block, UINT8 *permutation, const UINT8 *scantable, int last) +/** + * permutes an 8x8 block. + * @param block the block which will be permuted according to the given permutation vector + * @param permutation the permutation vector + * @param last the last non zero coefficient in scantable order, used to speed the permutation up + * @param scantable the used scantable, this is only used to speed the permutation up, the block is not + * (inverse) permutated to scantable order! + */ +void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) { int i; DCTELEM temp[64]; @@ -1635,6 +1660,9 @@ void ff_block_permute(DCTELEM *block, UINT8 *permutation, const UINT8 *scantable } } +/** + * memset(blocks, 0, sizeof(DCTELEM)*6*64) + */ static void clear_blocks_c(DCTELEM *blocks) { memset(blocks, 0, sizeof(DCTELEM)*6*64); @@ -1788,7 +1816,7 @@ static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2 int sum=0, i; s->dsp.diff_pixels(temp, src1, src2, stride); - s->fdct(temp); + s->dsp.fdct(temp); for(i=0; i<64; i++) sum+= ABS(temp[i]); @@ -1823,7 +1851,7 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ MpegEncContext * const s= (MpegEncContext *)c; - const UINT8 *scantable= s->intra_scantable.permutated; + const uint8_t *scantable= s->intra_scantable.permutated; uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; uint64_t __align8 aligned_bak[stride]; DCTELEM * const temp= (DCTELEM*)aligned_temp; @@ -1875,7 +1903,7 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int level= temp[i] + 64; - XINE_ASSERT(level - 64,"?"); + assert(level - 64); if((level&(~127)) == 0){ bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; @@ -1888,7 +1916,7 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int s->dct_unquantize(s, temp, 0, s->qscale); } - s->idct_add(bak, stride, temp); + s->dsp.idct_add(bak, stride, temp); distoration= s->dsp.sse[1](NULL, bak, src1, stride); @@ -1897,7 +1925,7 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ MpegEncContext * const s= (MpegEncContext *)c; - const UINT8 *scantable= s->intra_scantable.permutated; + const uint8_t *scantable= s->intra_scantable.permutated; uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; DCTELEM * const temp= (DCTELEM*)aligned_temp; int i, last, run, bits, level, start_i; @@ -1942,7 +1970,7 @@ static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in level= temp[i] + 64; - XINE_ASSERT(level - 64,"?"); + assert(level - 64); if((level&(~127)) == 0){ bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; @@ -1960,7 +1988,20 @@ WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c) WARPER88_1616(rd8x8_c, rd16x16_c) WARPER88_1616(bit8x8_c, bit16x16_c) -void dsputil_init(DSPContext* c, unsigned mask) +/* XXX: those functions should be suppressed ASAP when all IDCTs are + converted */ +static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct (block); + put_pixels_clamped_c(block, dest, line_size); +} +static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct (block); + add_pixels_clamped_c(block, dest, line_size); +} + +void dsputil_init(DSPContext* c, AVCodecContext *avctx) { static int init_done = 0; int i; @@ -1981,6 +2022,23 @@ void dsputil_init(DSPContext* c, unsigned mask) init_done = 1; } +#ifdef CONFIG_ENCODERS + if(avctx->dct_algo==FF_DCT_FASTINT) + c->fdct = fdct_ifast; + else + c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default +#endif //CONFIG_ENCODERS + + if(avctx->idct_algo==FF_IDCT_INT){ + c->idct_put= ff_jref_idct_put; + c->idct_add= ff_jref_idct_add; + c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + }else{ //accurate/default + c->idct_put= simple_idct_put; + c->idct_add= simple_idct_add; + c->idct_permutation_type= FF_NO_IDCT_PERM; + } + c->get_pixels = get_pixels_c; c->diff_pixels = diff_pixels_c; c->put_pixels_clamped = put_pixels_clamped_c; @@ -2083,37 +2141,43 @@ void dsputil_init(DSPContext* c, unsigned mask) c->diff_bytes= diff_bytes_c; #ifdef HAVE_MMX - dsputil_init_mmx(c, mask); - if (ff_bit_exact) - { - /* FIXME - AVCodec context should have flag for bitexact match */ - /* fprintf(stderr, "\n\n\nff_bit_exact %d\n\n\n\n", ff_bit_exact); */ - dsputil_set_bit_exact_mmx(c, mask); - } + dsputil_init_mmx(c, avctx); #endif #ifdef ARCH_ARMV4L - dsputil_init_armv4l(c, mask); + dsputil_init_armv4l(c, avctx); #endif #ifdef HAVE_MLIB - dsputil_init_mlib(c, mask); + dsputil_init_mlib(c, avctx); #endif #ifdef ARCH_ALPHA - dsputil_init_alpha(c, mask); + dsputil_init_alpha(c, avctx); #endif #ifdef ARCH_POWERPC - dsputil_init_ppc(c, mask); + dsputil_init_ppc(c, avctx); #endif #ifdef HAVE_MMI - dsputil_init_mmi(c, mask); + dsputil_init_mmi(c, avctx); #endif -} -/* remove any non bit exact operation (testing purpose) */ -void avcodec_set_bit_exact(void) -{ - ff_bit_exact=1; -#ifdef HAVE_MMX -// FIXME - better set_bit_exact -// dsputil_set_bit_exact_mmx(); -#endif + switch(c->idct_permutation_type){ + case FF_NO_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= i; + break; + case FF_LIBMPEG2_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); + break; + case FF_SIMPLE_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= simple_mmx_permutation[i]; + break; + case FF_TRANSPOSE_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= ((i&7)<<3) | (i>>3); + break; + default: + fprintf(stderr, "Internal error, IDCT permutation not set\n"); + } } + |