diff options
Diffstat (limited to 'src/libffmpeg/libavcodec/dsputil.c')
-rw-r--r-- | src/libffmpeg/libavcodec/dsputil.c | 468 |
1 files changed, 437 insertions, 31 deletions
diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c index dcfad05a5..945b7cc9d 100644 --- a/src/libffmpeg/libavcodec/dsputil.c +++ b/src/libffmpeg/libavcodec/dsputil.c @@ -1,32 +1,33 @@ /* * DSP utils - * Copyright (c) 2000, 2001 Gerard Lantau. + * Copyright (c) 2000, 2001 Fabrice Bellard. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. * - * This program is distributed in the hope that it will be useful, + * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * gmc & q-pel support by Michael Niedermayer <michaelni@gmx.at> + * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> */ -#include <stdlib.h> -#include <stdio.h> -#include <math.h> #include "avcodec.h" #include "dsputil.h" #include "simple_idct.h" void (*ff_idct)(DCTELEM *block); +void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block); +void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block); +void (*av_fdct)(DCTELEM *block); void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); +void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); @@ -45,8 +46,10 @@ op_pixels_abs_func pix_abs8x8_xy2; UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; UINT32 squareTbl[512]; -extern UINT16 default_intra_matrix[64]; -extern UINT16 default_non_intra_matrix[64]; +extern INT16 default_intra_matrix[64]; +extern INT16 default_non_intra_matrix[64]; +extern INT16 ff_mpeg4_default_intra_matrix[64]; +extern INT16 ff_mpeg4_default_non_intra_matrix[64]; UINT8 zigzag_direct[64] = { 0, 1, 8, 16, 9, 2, 3, 10, @@ -87,6 +90,8 @@ UINT8 ff_alternate_vertical_scan[64] = { 38, 46, 54, 62, 39, 47, 55, 63, }; +#ifdef SIMPLE_IDCT + /* Input permutation for the simple_idct_mmx */ static UINT8 simple_mmx_permutation[64]={ 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, @@ -98,6 +103,7 @@ static UINT8 simple_mmx_permutation[64]={ 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, }; +#endif /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ UINT32 inverse[256]={ @@ -141,7 +147,7 @@ UINT8 zigzag_end[64]; UINT8 permutation[64]; //UINT8 invPermutation[64]; -static void build_zigzag_end() +static void build_zigzag_end(void) { int lastIndex; int lastIndexAfterPerm=0; @@ -176,6 +182,28 @@ void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size) } } +void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride){ + DCTELEM *p; + int i; + + /* read the pixels */ + p = block; + for(i=0;i<8;i++) { + p[0] = s1[0] - s2[0]; + p[1] = s1[1] - s2[1]; + p[2] = s1[2] - s2[2]; + p[3] = s1[3] - s2[3]; + p[4] = s1[4] - s2[4]; + p[5] = s1[5] - s2[5]; + p[6] = s1[6] - s2[6]; + p[7] = s1[7] - s2[7]; + s1 += stride; + s2 += stride; + p += 8; + } +} + + void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size) { const DCTELEM *p; @@ -224,6 +252,358 @@ void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size) } } +#ifdef __GNUC__ + +struct unaligned_64 { uint64_t l; } __attribute__((packed)); +struct unaligned_32 { uint32_t l; } __attribute__((packed)); + +#define LD32(a) (((const struct unaligned_32 *) (a))->l) +#define LD64(a) (((const struct unaligned_64 *) (a))->l) + +#else /* __GNUC__ */ + +#define LD32(a) (*((uint32_t*)(a))) +#define LD64(a) (*((uint64_t*)(a))) + +#endif /* !__GNUC__ */ + +#if 0 + +#define PIXOP2(OPNAME, OP) \ +void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i<h; i++){\ + OP(*((uint64_t*)block), LD64(pixels));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +void OPNAME ## _no_rnd_pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i<h; i++){\ + const uint64_t a= LD64(pixels );\ + const uint64_t b= LD64(pixels+1);\ + OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +void OPNAME ## _pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i<h; i++){\ + const uint64_t a= LD64(pixels );\ + const uint64_t b= LD64(pixels+1);\ + OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +void OPNAME ## _no_rnd_pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i<h; i++){\ + const uint64_t a= LD64(pixels );\ + const uint64_t b= LD64(pixels+line_size);\ + OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +void OPNAME ## _pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i<h; i++){\ + const uint64_t a= LD64(pixels );\ + const uint64_t b= LD64(pixels+line_size);\ + OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +void OPNAME ## _pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + const uint64_t a= LD64(pixels );\ + const uint64_t b= LD64(pixels+1);\ + uint64_t l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0202020202020202ULL;\ + uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + uint64_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i<h; i+=2){\ + uint64_t a= LD64(pixels );\ + uint64_t b= LD64(pixels+1);\ + l1= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL);\ + h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + a= LD64(pixels );\ + b= LD64(pixels+1);\ + l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0202020202020202ULL;\ + h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +void OPNAME ## _no_rnd_pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + const uint64_t a= LD64(pixels );\ + const uint64_t b= LD64(pixels+1);\ + uint64_t l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0101010101010101ULL;\ + uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + uint64_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i<h; i+=2){\ + uint64_t a= LD64(pixels );\ + uint64_t b= LD64(pixels+1);\ + l1= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL);\ + h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + a= LD64(pixels );\ + b= LD64(pixels+1);\ + l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0101010101010101ULL;\ + h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +void (*OPNAME ## _pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\ + OPNAME ## _pixels,\ + OPNAME ## _pixels_x2,\ + OPNAME ## _pixels_y2,\ + OPNAME ## _pixels_xy2,\ +};\ +\ +void (*OPNAME ## _no_rnd_pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\ + OPNAME ## _pixels,\ + OPNAME ## _no_rnd_pixels_x2,\ + OPNAME ## _no_rnd_pixels_y2,\ + OPNAME ## _no_rnd_pixels_xy2,\ +}; + +#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) +#else // 64 bit variant + +#define PIXOP2(OPNAME, OP) \ +void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i<h; i++){\ + OP(*((uint32_t*)(block )), LD32(pixels ));\ + OP(*((uint32_t*)(block+4)), LD32(pixels+4));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +void OPNAME ## _no_rnd_pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i<h; i++){\ + int j;\ + for(j=0; j<2; j++){\ + const uint32_t a= LD32(pixels );\ + const uint32_t b= LD32(pixels+1);\ + OP(*((uint32_t*)block), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\ + pixels+=4;\ + block +=4;\ + }\ + pixels+=line_size-8;\ + block +=line_size-8;\ + }\ +}\ +\ +void OPNAME ## _pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i<h; i++){\ + int j;\ + for(j=0; j<2; j++){\ + const uint32_t a= LD32(pixels );\ + const uint32_t b= LD32(pixels+1);\ + OP(*((uint32_t*)block), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ + pixels+=4;\ + block +=4;\ + }\ + pixels+=line_size-8;\ + block +=line_size-8;\ + }\ +}\ +\ +void OPNAME ## _no_rnd_pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i<h; i++){\ + int j;\ + for(j=0; j<2; j++){\ + const uint32_t a= LD32(pixels );\ + const uint32_t b= LD32(pixels+line_size);\ + OP(*((uint32_t*)block), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\ + pixels+=4;\ + block +=4;\ + }\ + pixels+=line_size-8;\ + block +=line_size-8;\ + }\ +}\ +\ +void OPNAME ## _pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i<h; i++){\ + int j;\ + for(j=0; j<2; j++){\ + const uint32_t a= LD32(pixels );\ + const uint32_t b= LD32(pixels+line_size);\ + OP(*((uint32_t*)block), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ + pixels+=4;\ + block +=4;\ + }\ + pixels+=line_size-8;\ + block +=line_size-8;\ + }\ +}\ +\ +void OPNAME ## _pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int j;\ + for(j=0; j<2; j++){\ + int i;\ + const uint32_t a= LD32(pixels );\ + const uint32_t b= LD32(pixels+1);\ + uint32_t l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + uint32_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i<h; i+=2){\ + uint32_t a= LD32(pixels );\ + uint32_t b= LD32(pixels+1);\ + l1= (a&0x03030303UL)\ + + (b&0x03030303UL);\ + h1= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + a= LD32(pixels );\ + b= LD32(pixels+1);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ + pixels+=4-line_size*(h+1);\ + block +=4-line_size*h;\ + }\ +}\ +\ +void OPNAME ## _no_rnd_pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int j;\ + for(j=0; j<2; j++){\ + int i;\ + const uint32_t a= LD32(pixels );\ + const uint32_t b= LD32(pixels+1);\ + uint32_t l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x01010101UL;\ + uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + uint32_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i<h; i+=2){\ + uint32_t a= LD32(pixels );\ + uint32_t b= LD32(pixels+1);\ + l1= (a&0x03030303UL)\ + + (b&0x03030303UL);\ + h1= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + a= LD32(pixels );\ + b= LD32(pixels+1);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x01010101UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ + pixels+=4-line_size*(h+1);\ + block +=4-line_size*h;\ + }\ +}\ +\ +void (*OPNAME ## _pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\ + OPNAME ## _pixels,\ + OPNAME ## _pixels_x2,\ + OPNAME ## _pixels_y2,\ + OPNAME ## _pixels_xy2,\ +};\ +\ +void (*OPNAME ## _no_rnd_pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\ + OPNAME ## _pixels,\ + OPNAME ## _no_rnd_pixels_x2,\ + OPNAME ## _no_rnd_pixels_y2,\ + OPNAME ## _no_rnd_pixels_xy2,\ +}; +#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) +#endif + +#define op_put(a, b) a = b + +PIXOP2(avg, op_avg) +PIXOP2(put, op_put) +#undef op_avg +#undef op_put + +#if 0 +/* FIXME this stuff could be removed as its ot really used anymore */ #define PIXOP(BTYPE, OPNAME, OP, INCR) \ \ static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \ @@ -323,18 +703,13 @@ void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_siz OPNAME ## _pixels_xy2, \ }; - /* rounding primitives */ #define avg2(a,b) ((a+b+1)>>1) #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) -#define op_put(a, b) a = b #define op_avg(a, b) a = avg2(a, b) #define op_sub(a, b) a -= b -PIXOP(UINT8, put, op_put, line_size) -PIXOP(UINT8, avg, op_avg, line_size) - PIXOP(DCTELEM, sub, op_sub, 8) /* not rounding primitives */ @@ -343,13 +718,12 @@ PIXOP(DCTELEM, sub, op_sub, 8) #define avg2(a,b) ((a+b)>>1) #define avg4(a,b,c,d) ((a+b+c+d+1)>>2) -PIXOP(UINT8, put_no_rnd, op_put, line_size) -PIXOP(UINT8, avg_no_rnd, op_avg, line_size) - /* motion estimation */ #undef avg2 #undef avg4 +#endif + #define avg2(a,b) ((a+b+1)>>1) #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) @@ -872,6 +1246,20 @@ void clear_blocks_c(DCTELEM *blocks) memset(blocks, 0, sizeof(DCTELEM)*6*64); } +/* XXX: those functions should be suppressed ASAP when all IDCTs are + converted */ +void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block) +{ + ff_idct (block); + put_pixels_clamped(block, dest, line_size); +} + +void gen_idct_add(UINT8 *dest, int line_size, DCTELEM *block) +{ + ff_idct (block); + add_pixels_clamped(block, dest, line_size); +} + void dsputil_init(void) { int i, j; @@ -888,11 +1276,12 @@ void dsputil_init(void) } #ifdef SIMPLE_IDCT - ff_idct = simple_idct; + ff_idct = NULL; #else ff_idct = j_rev_dct; #endif get_pixels = get_pixels_c; + diff_pixels = diff_pixels_c; put_pixels_clamped = put_pixels_clamped_c; add_pixels_clamped = add_pixels_clamped_c; gmc1= gmc1_c; @@ -906,7 +1295,7 @@ void dsputil_init(void) pix_abs8x8_x2 = pix_abs8x8_x2_c; pix_abs8x8_y2 = pix_abs8x8_y2_c; pix_abs8x8_xy2 = pix_abs8x8_xy2_c; - av_fdct = jpeg_fdct_ifast; + av_fdct = fdct_ifast; use_permuted_idct = 1; @@ -925,9 +1314,16 @@ void dsputil_init(void) use_permuted_idct = 0; #endif -#ifdef SIMPLE_IDCT - if(ff_idct == simple_idct) use_permuted_idct=0; -#endif +//#ifdef SIMPLE_IDCT + if (ff_idct == NULL) { + ff_idct_put = simple_idct_put; + ff_idct_add = simple_idct_add; + use_permuted_idct=0; + } else { + ff_idct_put = gen_idct_put; + ff_idct_add = gen_idct_add; + } +//#endif if(use_permuted_idct) #ifdef SIMPLE_IDCT @@ -953,11 +1349,21 @@ void dsputil_init(void) } block_permute(default_intra_matrix); block_permute(default_non_intra_matrix); + block_permute(ff_mpeg4_default_intra_matrix); + block_permute(ff_mpeg4_default_non_intra_matrix); } build_zigzag_end(); } +/* remove any non bit exact operation (testing purpose) */ +void avcodec_set_bit_exact(void) +{ +#ifdef HAVE_MMX + dsputil_set_bit_exact_mmx(); +#endif +} + void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3], int orig_linesize[3], int coded_linesize, AVCodecContext *avctx) |