From 875da7c6e187d3aacaac0107deb45200286dfa7d Mon Sep 17 00:00:00 2001 From: Mike Melanson Date: Sat, 28 May 2005 01:40:40 +0000 Subject: sync to libavcodec build 4755 CVS patchset: 7567 CVS date: 2005/05/28 01:40:40 --- src/libffmpeg/libavcodec/armv4l/dsputil_arm.c | 178 ++++ src/libffmpeg/libavcodec/armv4l/mpegvideo_arm.c | 5 + src/libffmpeg/libavcodec/asv1.c | 19 - src/libffmpeg/libavcodec/avcodec.h | 19 +- src/libffmpeg/libavcodec/bitstream.c | 20 +- src/libffmpeg/libavcodec/bitstream.h | 36 +- src/libffmpeg/libavcodec/bswap.h | 73 +- src/libffmpeg/libavcodec/common.h | 31 +- src/libffmpeg/libavcodec/dsputil.c | 59 +- src/libffmpeg/libavcodec/dsputil.h | 43 +- src/libffmpeg/libavcodec/dv.c | 2 + src/libffmpeg/libavcodec/ffv1.c | 4 +- src/libffmpeg/libavcodec/g726.c | 2 +- src/libffmpeg/libavcodec/h263.c | 11 +- src/libffmpeg/libavcodec/h264.c | 94 +-- src/libffmpeg/libavcodec/huffyuv.c | 4 +- src/libffmpeg/libavcodec/i386/dsputil_mmx.c | 480 +++++++---- src/libffmpeg/libavcodec/i386/vp3dsp_mmx.c | 247 +----- src/libffmpeg/libavcodec/i386/vp3dsp_sse2.c | 16 +- src/libffmpeg/libavcodec/indeo2.c | 14 +- src/libffmpeg/libavcodec/indeo2data.h | 39 + src/libffmpeg/libavcodec/libpostproc/postprocess.c | 1 + .../libavcodec/libpostproc/postprocess_template.c | 4 +- src/libffmpeg/libavcodec/mpeg12.c | 10 +- src/libffmpeg/libavcodec/mpegvideo.c | 17 +- src/libffmpeg/libavcodec/raw.c | 2 + src/libffmpeg/libavcodec/snow.c | 4 +- src/libffmpeg/libavcodec/utils.c | 35 +- src/libffmpeg/libavcodec/vp3.c | 910 +++++++++++++++++---- src/libffmpeg/libavcodec/vp3data.h | 357 +++++++- src/libffmpeg/libavcodec/vp3dsp.c | 157 ++-- src/libffmpeg/libavcodec/wmv2.c | 6 +- 32 files changed, 2044 insertions(+), 855 deletions(-) diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c b/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c index ff61097d7..4ba628096 100644 --- a/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c +++ b/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c @@ -18,6 +18,11 @@ */ #include "../dsputil.h" +#ifdef HAVE_IPP +#include "ipp.h" +#endif + +extern void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx); extern void j_rev_dct_ARM(DCTELEM *data); extern void simple_idct_ARM(DCTELEM *data); @@ -26,6 +31,117 @@ extern void simple_idct_ARM(DCTELEM *data); static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); +void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); +void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); +void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); +void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); + +void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); +void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); +void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); + +void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); + +CALL_2X_PIXELS(put_pixels16_x2_arm , put_pixels8_x2_arm , 8) +CALL_2X_PIXELS(put_pixels16_y2_arm , put_pixels8_y2_arm , 8) +CALL_2X_PIXELS(put_pixels16_xy2_arm, put_pixels8_xy2_arm, 8) +CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8) +CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8) +CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8) + +static void add_pixels_clamped_ARM(short *block, unsigned char *dest, int line_size) +{ + asm volatile ( + "mov r10, #8 \n\t" + + "1: \n\t" + + /* load dest */ + "ldr r4, [%1] \n\t" + /* block[0] and block[1]*/ + "ldrsh r5, [%0] \n\t" + "ldrsh r7, [%0, #2] \n\t" + "and r6, r4, #0xFF \n\t" + "and r8, r4, #0xFF00 \n\t" + "add r6, r5, r6 \n\t" + "add r8, r7, r8, lsr #8 \n\t" + "mvn r5, r5 \n\t" + "mvn r7, r7 \n\t" + "tst r6, #0x100 \n\t" + "movne r6, r5, lsr #24 \n\t" + "tst r8, #0x100 \n\t" + "movne r8, r7, lsr #24 \n\t" + "mov r9, r6 \n\t" + "ldrsh r5, [%0, #4] \n\t" /* moved form [A] */ + "orr r9, r9, r8, lsl #8 \n\t" + /* block[2] and block[3] */ + /* [A] */ + "ldrsh r7, [%0, #6] \n\t" + "and r6, r4, #0xFF0000 \n\t" + "and r8, r4, #0xFF000000 \n\t" + "add r6, r5, r6, lsr #16 \n\t" + "add r8, r7, r8, lsr #24 \n\t" + "mvn r5, r5 \n\t" + "mvn r7, r7 \n\t" + "tst r6, #0x100 \n\t" + "movne r6, r5, lsr #24 \n\t" + "tst r8, #0x100 \n\t" + "movne r8, r7, lsr #24 \n\t" + "orr r9, r9, r6, lsl #16 \n\t" + "ldr r4, [%1, #4] \n\t" /* moved form [B] */ + "orr r9, r9, r8, lsl #24 \n\t" + /* store dest */ + "ldrsh r5, [%0, #8] \n\t" /* moved form [C] */ + "str r9, [%1] \n\t" + + /* load dest */ + /* [B] */ + /* block[4] and block[5] */ + /* [C] */ + "ldrsh r7, [%0, #10] \n\t" + "and r6, r4, #0xFF \n\t" + "and r8, r4, #0xFF00 \n\t" + "add r6, r5, r6 \n\t" + "add r8, r7, r8, lsr #8 \n\t" + "mvn r5, r5 \n\t" + "mvn r7, r7 \n\t" + "tst r6, #0x100 \n\t" + "movne r6, r5, lsr #24 \n\t" + "tst r8, #0x100 \n\t" + "movne r8, r7, lsr #24 \n\t" + "mov r9, r6 \n\t" + "ldrsh r5, [%0, #12] \n\t" /* moved from [D] */ + "orr r9, r9, r8, lsl #8 \n\t" + /* block[6] and block[7] */ + /* [D] */ + "ldrsh r7, [%0, #14] \n\t" + "and r6, r4, #0xFF0000 \n\t" + "and r8, r4, #0xFF000000 \n\t" + "add r6, r5, r6, lsr #16 \n\t" + "add r8, r7, r8, lsr #24 \n\t" + "mvn r5, r5 \n\t" + "mvn r7, r7 \n\t" + "tst r6, #0x100 \n\t" + "movne r6, r5, lsr #24 \n\t" + "tst r8, #0x100 \n\t" + "movne r8, r7, lsr #24 \n\t" + "orr r9, r9, r6, lsl #16 \n\t" + "add %0, %0, #16 \n\t" /* moved from [E] */ + "orr r9, r9, r8, lsl #24 \n\t" + "subs r10, r10, #1 \n\t" /* moved from [F] */ + /* store dest */ + "str r9, [%1, #4] \n\t" + + /* [E] */ + /* [F] */ + "add %1, %1, %2 \n\t" + "bne 1b \n\t" + : "+r"(block), + "+r"(dest) + : "r"(line_size) + : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" ); +} + /* XXX: those functions should be suppressed ASAP when all IDCTs are converted */ static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block) @@ -48,6 +164,32 @@ static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block) simple_idct_ARM (block); ff_add_pixels_clamped(block, dest, line_size); } +static void simple_idct_ipp(DCTELEM *block) +{ +#ifdef HAVE_IPP + ippiDCT8x8Inv_Video_16s_C1I(block); +#endif +} +static void simple_idct_ipp_put(uint8_t *dest, int line_size, DCTELEM *block) +{ +#ifdef HAVE_IPP + ippiDCT8x8Inv_Video_16s8u_C1R(block, dest, line_size); +#endif +} + +void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size); + +static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block) +{ +#ifdef HAVE_IPP + ippiDCT8x8Inv_Video_16s_C1I(block); +#ifdef HAVE_IWMMXT + add_pixels_clamped_iwmmxt(block, dest, line_size); +#else + add_pixels_clamped_ARM(block, dest, line_size); +#endif +#endif +} void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx) { @@ -56,7 +198,11 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx) ff_put_pixels_clamped = c->put_pixels_clamped; ff_add_pixels_clamped = c->add_pixels_clamped; +#ifdef HAVE_IPP + if(idct_algo==FF_IDCT_ARM){ +#else if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_ARM){ +#endif c->idct_put= j_rev_dct_ARM_put; c->idct_add= j_rev_dct_ARM_add; c->idct = j_rev_dct_ARM; @@ -66,5 +212,37 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx) c->idct_add= simple_idct_ARM_add; c->idct = simple_idct_ARM; c->idct_permutation_type= FF_NO_IDCT_PERM; +#ifdef HAVE_IPP + } else if (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_IPP){ +#else + } else if (idct_algo==FF_IDCT_IPP){ +#endif + c->idct_put= simple_idct_ipp_put; + c->idct_add= simple_idct_ipp_add; + c->idct = simple_idct_ipp; + c->idct_permutation_type= FF_NO_IDCT_PERM; } + +/* c->put_pixels_tab[0][0] = put_pixels16_arm; */ // NG! + c->put_pixels_tab[0][1] = put_pixels16_x2_arm; //OK! + c->put_pixels_tab[0][2] = put_pixels16_y2_arm; //OK! +/* c->put_pixels_tab[0][3] = put_pixels16_xy2_arm; /\* NG *\/ */ +/* c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm; // ?(»È¤ï¤ì¤Ê¤¤) */ + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_arm; // OK + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_arm; //OK +/* c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm; //NG */ + c->put_pixels_tab[1][0] = put_pixels8_arm; //OK + c->put_pixels_tab[1][1] = put_pixels8_x2_arm; //OK +/* c->put_pixels_tab[1][2] = put_pixels8_y2_arm; //NG */ +/* c->put_pixels_tab[1][3] = put_pixels8_xy2_arm; //NG */ + c->put_no_rnd_pixels_tab[1][0] = put_pixels8_arm;//OK + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_arm; //OK + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm; //OK +/* c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm;//NG */ + +#if 1 +#ifdef HAVE_IWMMXT + dsputil_init_iwmmxt(c, avctx); +#endif +#endif } diff --git a/src/libffmpeg/libavcodec/armv4l/mpegvideo_arm.c b/src/libffmpeg/libavcodec/armv4l/mpegvideo_arm.c index 9c5d8bf86..6e4c9fb3c 100644 --- a/src/libffmpeg/libavcodec/armv4l/mpegvideo_arm.c +++ b/src/libffmpeg/libavcodec/armv4l/mpegvideo_arm.c @@ -21,6 +21,11 @@ #include "../mpegvideo.h" #include "../avcodec.h" +extern void MPV_common_init_iwmmxt(MpegEncContext *s); + void MPV_common_init_armv4l(MpegEncContext *s) { +#ifdef HAVE_IWMMXT + MPV_common_init_iwmmxt(s); +#endif } diff --git a/src/libffmpeg/libavcodec/asv1.c b/src/libffmpeg/libavcodec/asv1.c index 13976db61..e07880e4f 100644 --- a/src/libffmpeg/libavcodec/asv1.c +++ b/src/libffmpeg/libavcodec/asv1.c @@ -63,25 +63,6 @@ static const uint8_t scantab[64]={ }; -const uint8_t ff_reverse[256]={ -0x00,0x80,0x40,0xC0,0x20,0xA0,0x60,0xE0,0x10,0x90,0x50,0xD0,0x30,0xB0,0x70,0xF0, -0x08,0x88,0x48,0xC8,0x28,0xA8,0x68,0xE8,0x18,0x98,0x58,0xD8,0x38,0xB8,0x78,0xF8, -0x04,0x84,0x44,0xC4,0x24,0xA4,0x64,0xE4,0x14,0x94,0x54,0xD4,0x34,0xB4,0x74,0xF4, -0x0C,0x8C,0x4C,0xCC,0x2C,0xAC,0x6C,0xEC,0x1C,0x9C,0x5C,0xDC,0x3C,0xBC,0x7C,0xFC, -0x02,0x82,0x42,0xC2,0x22,0xA2,0x62,0xE2,0x12,0x92,0x52,0xD2,0x32,0xB2,0x72,0xF2, -0x0A,0x8A,0x4A,0xCA,0x2A,0xAA,0x6A,0xEA,0x1A,0x9A,0x5A,0xDA,0x3A,0xBA,0x7A,0xFA, -0x06,0x86,0x46,0xC6,0x26,0xA6,0x66,0xE6,0x16,0x96,0x56,0xD6,0x36,0xB6,0x76,0xF6, -0x0E,0x8E,0x4E,0xCE,0x2E,0xAE,0x6E,0xEE,0x1E,0x9E,0x5E,0xDE,0x3E,0xBE,0x7E,0xFE, -0x01,0x81,0x41,0xC1,0x21,0xA1,0x61,0xE1,0x11,0x91,0x51,0xD1,0x31,0xB1,0x71,0xF1, -0x09,0x89,0x49,0xC9,0x29,0xA9,0x69,0xE9,0x19,0x99,0x59,0xD9,0x39,0xB9,0x79,0xF9, -0x05,0x85,0x45,0xC5,0x25,0xA5,0x65,0xE5,0x15,0x95,0x55,0xD5,0x35,0xB5,0x75,0xF5, -0x0D,0x8D,0x4D,0xCD,0x2D,0xAD,0x6D,0xED,0x1D,0x9D,0x5D,0xDD,0x3D,0xBD,0x7D,0xFD, -0x03,0x83,0x43,0xC3,0x23,0xA3,0x63,0xE3,0x13,0x93,0x53,0xD3,0x33,0xB3,0x73,0xF3, -0x0B,0x8B,0x4B,0xCB,0x2B,0xAB,0x6B,0xEB,0x1B,0x9B,0x5B,0xDB,0x3B,0xBB,0x7B,0xFB, -0x07,0x87,0x47,0xC7,0x27,0xA7,0x67,0xE7,0x17,0x97,0x57,0xD7,0x37,0xB7,0x77,0xF7, -0x0F,0x8F,0x4F,0xCF,0x2F,0xAF,0x6F,0xEF,0x1F,0x9F,0x5F,0xDF,0x3F,0xBF,0x7F,0xFF, -}; - static const uint8_t ccp_tab[17][2]={ {0x2,2}, {0x7,5}, {0xB,5}, {0x3,5}, {0xD,5}, {0x5,5}, {0x9,5}, {0x1,5}, diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h index 4de1dd573..bbceb2329 100644 --- a/src/libffmpeg/libavcodec/avcodec.h +++ b/src/libffmpeg/libavcodec/avcodec.h @@ -17,7 +17,7 @@ extern "C" { #define FFMPEG_VERSION_INT 0x000409 #define FFMPEG_VERSION "0.4.9-pre1" -#define LIBAVCODEC_BUILD 4754 +#define LIBAVCODEC_BUILD 4755 #define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT #define LIBAVCODEC_VERSION FFMPEG_VERSION @@ -116,6 +116,7 @@ enum CodecID { CODEC_ID_WNV1, CODEC_ID_AASC, CODEC_ID_INDEO2, + CODEC_ID_FRAPS, /* various pcm "codecs" */ CODEC_ID_PCM_S16LE= 0x10000, @@ -178,6 +179,7 @@ enum CodecID { CODEC_ID_SHORTEN, CODEC_ID_ALAC, CODEC_ID_WESTWOOD_SND1, + CODEC_ID_GSM, CODEC_ID_OGGTHEORA= 0x16000, @@ -961,7 +963,12 @@ typedef struct AVCodecContext { * - decoding: unused */ int strict_std_compliance; - +#define FF_COMPLIANCE_VERY_STRICT 2 ///< strictly conform to a older more strict version of the spec or reference software +#define FF_COMPLIANCE_STRICT 1 ///< strictly conform to all the things in the spec no matter what consequences +#define FF_COMPLIANCE_NORMAL 0 +#define FF_COMPLIANCE_INOFFICIAL -1 ///< allow inofficial extensions +#define FF_COMPLIANCE_EXPERIMENTAL -2 ///< allow non standarized experimental things + /** * qscale offset between ip and b frames. * if > 0 then the last p frame quantizer will be used (q= lastp_q*factor+offset) @@ -1179,6 +1186,8 @@ typedef struct AVCodecContext { #define FF_IDCT_SH4 9 #define FF_IDCT_SIMPLEARM 10 #define FF_IDCT_H264 11 +#define FF_IDCT_VP3 12 +#define FP_IDCT_IPP 13 /** * slice count. @@ -2019,6 +2028,10 @@ extern AVCodec aasc_decoder; extern AVCodec alac_decoder; extern AVCodec ws_snd1_decoder; extern AVCodec indeo2_decoder; +extern AVCodec vorbis_decoder; +extern AVCodec fraps_decoder; +extern AVCodec libgsm_encoder; +extern AVCodec libgsm_decoder; /* pcm codecs */ #define PCM_CODEC(id, name) \ @@ -2359,6 +2372,8 @@ extern void av_log_set_callback(void (*)(void*, int, const char*, va_list)); ((uint8_t*)(x))[0]) #endif +extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v); + /* unused static macro */ #if defined(__GNUC__) && !defined(DEBUG) /* since we do not compile the encoder part of ffmpeg, some static diff --git a/src/libffmpeg/libavcodec/bitstream.c b/src/libffmpeg/libavcodec/bitstream.c index 2678772c4..fcd4fd9cb 100755 --- a/src/libffmpeg/libavcodec/bitstream.c +++ b/src/libffmpeg/libavcodec/bitstream.c @@ -132,14 +132,14 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes, const void *bits, int bits_wrap, int bits_size, const void *codes, int codes_wrap, int codes_size, - uint32_t code_prefix, int n_prefix, int use_static) + uint32_t code_prefix, int n_prefix, int flags) { - int i, j, k, n, table_size, table_index, nb, n1, index; + int i, j, k, n, table_size, table_index, nb, n1, index, code_prefix2; uint32_t code; VLC_TYPE (*table)[2]; table_size = 1 << table_nb_bits; - table_index = alloc_table(vlc, table_size, use_static); + table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_STATIC); #ifdef DEBUG_VLC printf("new table index=%d size=%d code_prefix=%x n=%d\n", table_index, table_size, code_prefix, n_prefix); @@ -165,12 +165,18 @@ static int build_table(VLC *vlc, int table_nb_bits, #endif /* if code matches the prefix, it is in the table */ n -= n_prefix; - if (n > 0 && (code >> n) == code_prefix) { + if(flags & INIT_VLC_LE) + code_prefix2= code & (n_prefix>=32 ? 0xffffffff : (1 << n_prefix)-1); + else + code_prefix2= code >> n; + if (n > 0 && code_prefix2 == code_prefix) { if (n <= table_nb_bits) { /* no need to add another table */ j = (code << (table_nb_bits - n)) & (table_size - 1); nb = 1 << (table_nb_bits - n); for(k=0;k> n_prefix) + (k<> n) & ((1 << table_nb_bits) - 1); + j = (code >> ((flags & INIT_VLC_LE) ? n_prefix : n)) & ((1 << table_nb_bits) - 1); #ifdef DEBUG_VLC printf("%4x: n=%d (subtable)\n", j, n); @@ -211,8 +217,8 @@ static int build_table(VLC *vlc, int table_nb_bits, index = build_table(vlc, n, nb_codes, bits, bits_wrap, bits_size, codes, codes_wrap, codes_size, - (code_prefix << table_nb_bits) | i, - n_prefix + table_nb_bits, use_static); + (flags & INIT_VLC_LE) ? (code_prefix | (i << n_prefix)) : ((code_prefix << table_nb_bits) | i), + n_prefix + table_nb_bits, flags); if (index < 0) return -1; /* note: realloc has been done, so reload tables */ diff --git a/src/libffmpeg/libavcodec/bitstream.h b/src/libffmpeg/libavcodec/bitstream.h index fd69915d8..ce664cee1 100644 --- a/src/libffmpeg/libavcodec/bitstream.h +++ b/src/libffmpeg/libavcodec/bitstream.h @@ -130,7 +130,7 @@ typedef struct RL_VLC_ELEM { uint8_t run; } RL_VLC_ELEM; -#ifdef ARCH_SPARC +#if defined(ARCH_SPARC) || defined(ARCH_ARMV4L) #define UNALIGNED_STORES_ARE_BAD #endif @@ -368,6 +368,16 @@ static inline int unaligned32_be(const void *v) #endif } +static inline int unaligned32_le(const void *v) +{ +#ifdef CONFIG_ALIGN + const uint8_t *p=v; + return (((p[3]<<8) | p[2])<<16) | (p[1]<<8) | (p[0]); +#else + return le2me_32( unaligned32(v)); //original +#endif +} + #ifdef ALT_BITSTREAM_READER # define MIN_CACHE_BITS 25 @@ -378,11 +388,19 @@ static inline int unaligned32_be(const void *v) # define CLOSE_READER(name, gb)\ (gb)->index= name##_index;\ +# ifdef ALT_BITSTREAM_READER_LE +# define UPDATE_CACHE(name, gb)\ + name##_cache= unaligned32_le( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) >> (name##_index&0x07);\ + +# define SKIP_CACHE(name, gb, num)\ + name##_cache >>= (num); +# else # define UPDATE_CACHE(name, gb)\ name##_cache= unaligned32_be( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) << (name##_index&0x07);\ # define SKIP_CACHE(name, gb, num)\ - name##_cache <<= (num);\ + name##_cache <<= (num); +# endif // FIXME name? # define SKIP_COUNTER(name, gb, num)\ @@ -397,8 +415,13 @@ static inline int unaligned32_be(const void *v) # define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num) # define LAST_SKIP_CACHE(name, gb, num) ; +# ifdef ALT_BITSTREAM_READER_LE +# define SHOW_UBITS(name, gb, num)\ + ((name##_cache) & (NEG_USR32(0xffffffff,num))) +# else # define SHOW_UBITS(name, gb, num)\ NEG_USR32(name##_cache, num) +# endif # define SHOW_SBITS(name, gb, num)\ NEG_SSR32(name##_cache, num) @@ -616,8 +639,13 @@ static inline unsigned int get_bits1(GetBitContext *s){ #ifdef ALT_BITSTREAM_READER int index= s->index; uint8_t result= s->buffer[ index>>3 ]; +#ifdef ALT_BITSTREAM_READER_LE + result>>= (index&0x07); + result&= 1; +#else result<<= (index&0x07); result>>= 8 - 1; +#endif index++; s->index= index; @@ -687,7 +715,9 @@ void align_get_bits(GetBitContext *s); int init_vlc(VLC *vlc, int nb_bits, int nb_codes, const void *bits, int bits_wrap, int bits_size, const void *codes, int codes_wrap, int codes_size, - int use_static); + int flags); +#define INIT_VLC_USE_STATIC 1 +#define INIT_VLC_LE 2 void free_vlc(VLC *vlc); /** diff --git a/src/libffmpeg/libavcodec/bswap.h b/src/libffmpeg/libavcodec/bswap.h index eb1d87a55..50fd57178 100644 --- a/src/libffmpeg/libavcodec/bswap.h +++ b/src/libffmpeg/libavcodec/bswap.h @@ -17,16 +17,15 @@ #endif #if defined(ARCH_X86) || defined(ARCH_X86_64) -static inline uint16_t ByteSwap16(uint16_t x) +static always_inline uint16_t bswap_16(uint16_t x) { - __asm("xchgb %b0,%h0" : + __asm("rorw $8, %0" : LEGACY_REGS (x) : "0" (x)); return x; } -#define bswap_16(x) ByteSwap16(x) -static inline uint32_t ByteSwap32(uint32_t x) +static always_inline uint32_t bswap_32(uint32_t x) { #if __CPU__ > 386 __asm("bswap %0": @@ -40,9 +39,8 @@ static inline uint32_t ByteSwap32(uint32_t x) "0" (x)); return x; } -#define bswap_32(x) ByteSwap32(x) -static inline uint64_t ByteSwap64(uint64_t x) +static inline uint64_t bswap_64(uint64_t x) { #ifdef ARCH_X86_64 __asm("bswap %0": @@ -50,24 +48,26 @@ static inline uint64_t ByteSwap64(uint64_t x) "0" (x)); return x; #else - register union { __extension__ uint64_t __ll; - uint32_t __l[2]; } __x; - asm("xchgl %0,%1": - "=r"(__x.__l[0]),"=r"(__x.__l[1]): - "0"(bswap_32((uint32_t)x)),"1"(bswap_32((uint32_t)(x>>32)))); - return __x.__ll; + union { + uint64_t ll; + struct { + uint32_t l,h; + } l; + } r; + r.l.l = bswap_32 (x); + r.l.h = bswap_32 (x>>32); + return r.ll; #endif } -#define bswap_64(x) ByteSwap64(x) #elif defined(ARCH_SH4) -static inline uint16_t ByteSwap16(uint16_t x) { +static always_inline uint16_t bswap_16(uint16_t x) { __asm__("swap.b %0,%0":"=r"(x):"0"(x)); return x; } -static inline uint32_t ByteSwap32(uint32_t x) { +static always_inline uint32_t bswap_32(uint32_t x) { __asm__( "swap.b %0,%0\n" "swap.w %0,%0\n" @@ -76,10 +76,7 @@ static inline uint32_t ByteSwap32(uint32_t x) { return x; } -#define bswap_16(x) ByteSwap16(x) -#define bswap_32(x) ByteSwap32(x) - -static inline uint64_t ByteSwap64(uint64_t x) +static inline uint64_t bswap_64(uint64_t x) { union { uint64_t ll; @@ -91,20 +88,37 @@ static inline uint64_t ByteSwap64(uint64_t x) r.l.h = bswap_32 (x>>32); return r.ll; } -#define bswap_64(x) ByteSwap64(x) - #else -#define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8) - +static always_inline uint16_t bswap_16(uint16_t x){ + return (x>>8) | (x<<8); +} -// code from bits/byteswap.h (C) 1997, 1998 Free Software Foundation, Inc. -#define bswap_32(x) \ - ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \ - (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)) +#ifdef ARCH_ARM +static always_inline uint32_t bswap_32(uint32_t x){ + uint32_t t; + __asm__ ( + "eor %1, %0, %0, ror #16 \n\t" + "bic %1, %1, #0xFF0000 \n\t" + "mov %0, %0, ror #8 \n\t" + "eor %0, %0, %1, lsr #8 \n\t" + : "+r"(x), "+r"(t)); + return x; +} +#else +static always_inline uint32_t bswap_32(uint32_t x){ + x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF); + return (x>>16) | (x<<16); +} +#endif -static inline uint64_t ByteSwap64(uint64_t x) +static inline uint64_t bswap_64(uint64_t x) { +#if 0 + x= ((x<< 8)&0xFF00FF00FF00FF00ULL) | ((x>> 8)&0x00FF00FF00FF00FFULL); + x= ((x<<16)&0xFFFF0000FFFF0000ULL) | ((x>>16)&0x0000FFFF0000FFFFULL); + return (x>>32) | (x<<32); +#else union { uint64_t ll; uint32_t l[2]; @@ -113,9 +127,8 @@ static inline uint64_t ByteSwap64(uint64_t x) r.l[0] = bswap_32 (w.l[1]); r.l[1] = bswap_32 (w.l[0]); return r.ll; +#endif } -#define bswap_64(x) ByteSwap64(x) - #endif /* !ARCH_X86 */ #endif /* !HAVE_BYTESWAP_H */ diff --git a/src/libffmpeg/libavcodec/common.h b/src/libffmpeg/libavcodec/common.h index 9feb68e1f..a54ab233d 100644 --- a/src/libffmpeg/libavcodec/common.h +++ b/src/libffmpeg/libavcodec/common.h @@ -463,9 +463,9 @@ if((y)<(x)){\ } #endif -#if defined(ARCH_X86) || defined(ARCH_X86_64) +#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_POWERPC) #if defined(ARCH_X86_64) -static inline uint64_t rdtsc(void) +static inline uint64_t read_time(void) { uint64_t a, d; asm volatile( "rdtsc\n\t" @@ -473,8 +473,8 @@ static inline uint64_t rdtsc(void) ); return (d << 32) | (a & 0xffffffff); } -#else -static inline long long rdtsc(void) +#elif defined(ARCH_X86) +static inline long long read_time(void) { long long l; asm volatile( "rdtsc\n\t" @@ -482,14 +482,33 @@ static inline long long rdtsc(void) ); return l; } +#else //FIXME check ppc64 +static inline uint64_t read_time(void) +{ + uint32_t tbu, tbl, temp; + + /* from section 2.2.1 of the 32-bit PowerPC PEM */ + __asm__ __volatile__( + "1:\n" + "mftbu %2\n" + "mftb %0\n" + "mftbu %1\n" + "cmpw %2,%1\n" + "bne 1b\n" + : "=r"(tbl), "=r"(tbu), "=r"(temp) + : + : "cc"); + + return (((uint64_t)tbu)<<32) | (uint64_t)tbl; +} #endif #define START_TIMER \ uint64_t tend;\ -uint64_t tstart= rdtsc();\ +uint64_t tstart= read_time();\ #define STOP_TIMER(id) \ -tend= rdtsc();\ +tend= read_time();\ {\ static uint64_t tsum=0;\ static int tcount=0;\ diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c index 1ef956cc4..69731b070 100644 --- a/src/libffmpeg/libavcodec/dsputil.c +++ b/src/libffmpeg/libavcodec/dsputil.c @@ -2645,7 +2645,7 @@ static void h261_loop_filter_c(uint8_t *src, int stride){ } } -static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int *tc0) +static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) { int i, d; for( i = 0; i < 4; i++ ) { @@ -2669,11 +2669,11 @@ static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystrid int i_delta; if( ABS( p2 - p0 ) < beta ) { - pix[-2*xstride] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0[i], tc0[i] ); + pix[-2*xstride] = p1 + clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); tc++; } if( ABS( q2 - q0 ) < beta ) { - pix[xstride] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0[i], tc0[i] ); + pix[ xstride] = q1 + clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); tc++; } @@ -2685,16 +2685,16 @@ static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystrid } } } -static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int *tc0) +static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0); } -static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int *tc0) +static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0); } -static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int *tc0) +static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) { int i, d; for( i = 0; i < 4; i++ ) { @@ -2722,15 +2722,43 @@ static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystr } } } -static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int *tc0) +static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0); } -static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int *tc0) +static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0); } +static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) +{ + int d; + for( d = 0; d < 8; d++ ) { + const int p0 = pix[-1*xstride]; + const int p1 = pix[-2*xstride]; + const int q0 = pix[0]; + const int q1 = pix[1*xstride]; + + if( ABS( p0 - q0 ) < alpha && + ABS( p1 - p0 ) < beta && + ABS( q1 - q0 ) < beta ) { + + pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ + pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ + } + pix += ystride; + } +} +static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) +{ + h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta); +} +static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) +{ + h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta); +} + static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int s, i; @@ -3645,6 +3673,11 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->idct_add= ff_jref_idct_add; c->idct = j_rev_dct; c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + }else if(avctx->idct_algo==FF_IDCT_VP3){ + c->idct_put= ff_vp3_idct_put_c; + c->idct_add= ff_vp3_idct_add_c; + c->idct = ff_vp3_idct_c; + c->idct_permutation_type= FF_NO_IDCT_PERM; }else{ //accurate/default c->idct_put= simple_idct_put; c->idct_add= simple_idct_add; @@ -3655,10 +3688,6 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->h264_idct_add= ff_h264_idct_add_c; - /* VP3 DSP support */ - c->vp3_dsp_init = vp3_dsp_init_c; - c->vp3_idct = vp3_idct_c; - c->get_pixels = get_pixels_c; c->diff_pixels = diff_pixels_c; c->put_pixels_clamped = put_pixels_clamped_c; @@ -3835,6 +3864,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; + c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c; + c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c; c->h263_h_loop_filter= h263_h_loop_filter_c; c->h263_v_loop_filter= h263_v_loop_filter_c; @@ -3886,6 +3917,10 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) for(i=0; i<64; i++) c->idct_permutation[i]= ((i&7)<<3) | (i>>3); break; + case FF_PARTTRANS_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); + break; default: av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); } diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h index 10d2d072a..341d350b5 100644 --- a/src/libffmpeg/libavcodec/dsputil.h +++ b/src/libffmpeg/libavcodec/dsputil.h @@ -71,17 +71,9 @@ extern uint32_t squareTbl[512]; extern uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; /* VP3 DSP functions */ -void vp3_dsp_init_c(void); -void vp3_idct_c(int16_t *input_data, int16_t *dequant_matrix, - int coeff_count, DCTELEM *output_data); - -void vp3_dsp_init_mmx(void); -void vp3_idct_mmx(int16_t *input_data, int16_t *dequant_matrix, - int coeff_count, DCTELEM *output_data); - -void vp3_dsp_init_sse2(void); -void vp3_idct_sse2(int16_t *input_data, int16_t *dequant_matrix, - int coeff_count, DCTELEM *output_data); +void ff_vp3_idct_c(DCTELEM *block/* align 16*/); +void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); +void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); /* minimum alignment rules ;) if u notice errors in the align stuff, need more alignment for some asm code for some cpu @@ -278,10 +270,12 @@ typedef struct DSPContext { void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top); void (*bswap_buf)(uint32_t *dst, uint32_t *src, int w); - void (*h264_v_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int *tc0); - void (*h264_h_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int *tc0); - void (*h264_v_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int *tc0); - void (*h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int *tc0); + void (*h264_v_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_v_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta); + void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta); void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); @@ -326,29 +320,12 @@ typedef struct DSPContext { #define FF_LIBMPEG2_IDCT_PERM 2 #define FF_SIMPLE_IDCT_PERM 3 #define FF_TRANSPOSE_IDCT_PERM 4 +#define FF_PARTTRANS_IDCT_PERM 5 int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale); void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); #define BASIS_SHIFT 16 #define RECON_SHIFT 6 - - /** - * This function handles any initialization for the VP3 DSP functions. - */ - void (*vp3_dsp_init)(void); - - /** - * This function is responsible for taking a block of zigzag'd, - * quantized DCT coefficients and reconstructing the original block of - * samples. - * @param input_data 64 zigzag'd, quantized DCT coefficients - * @param dequant_matrix 64 zigzag'd quantizer coefficients - * @param coeff_count index of the last coefficient - * @param output_samples space for 64 DCTELEMs where the transformed - * samples will be stored - */ - void (*vp3_idct)(int16_t *input_data, int16_t *dequant_matrix, - int coeff_count, DCTELEM *output_samples); void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride); } DSPContext; diff --git a/src/libffmpeg/libavcodec/dv.c b/src/libffmpeg/libavcodec/dv.c index 94440ed5c..bc750bcbc 100644 --- a/src/libffmpeg/libavcodec/dv.c +++ b/src/libffmpeg/libavcodec/dv.c @@ -942,6 +942,7 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size, return s->sys->frame_size; } +#ifdef CONFIG_DVVIDEO_ENCODER AVCodec dvvideo_encoder = { "dvvideo", CODEC_TYPE_VIDEO, @@ -954,6 +955,7 @@ AVCodec dvvideo_encoder = { CODEC_CAP_DR1, NULL }; +#endif // CONFIG_DVVIDEO_ENCODER AVCodec dvvideo_decoder = { "dvvideo", diff --git a/src/libffmpeg/libavcodec/ffv1.c b/src/libffmpeg/libavcodec/ffv1.c index 107eab1bc..57ed9adb5 100644 --- a/src/libffmpeg/libavcodec/ffv1.c +++ b/src/libffmpeg/libavcodec/ffv1.c @@ -550,9 +550,9 @@ static int encode_init(AVCodecContext *avctx) FFV1Context *s = avctx->priv_data; int i; - if(avctx->strict_std_compliance >= 0){ + if(avctx->strict_std_compliance >FF_COMPLIANCE_EXPERIMENTAL){ av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodeable with future versions!!!\n" - "use vstrict=-1 / -strict -1 to use it anyway\n"); + "use vstrict=-2 / -strict -2 to use it anyway\n"); return -1; } diff --git a/src/libffmpeg/libavcodec/g726.c b/src/libffmpeg/libavcodec/g726.c index bc9374d3e..efc3c5fae 100644 --- a/src/libffmpeg/libavcodec/g726.c +++ b/src/libffmpeg/libavcodec/g726.c @@ -327,7 +327,7 @@ static int g726_init(AVCodecContext * avctx) av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n"); return -1; } - if (avctx->sample_rate != 8000 && avctx->strict_std_compliance>=0) { + if (avctx->sample_rate != 8000 && avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL) { av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n"); return -1; } diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c index b924a6b5a..08306011b 100644 --- a/src/libffmpeg/libavcodec/h263.c +++ b/src/libffmpeg/libavcodec/h263.c @@ -2653,9 +2653,9 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) if(s->pict_type==I_TYPE){ if(!(s->flags&CODEC_FLAG_GLOBAL_HEADER)){ - if(s->strict_std_compliance < 2) //HACK, the reference sw is buggy + if(s->strict_std_compliance < FF_COMPLIANCE_VERY_STRICT) //HACK, the reference sw is buggy mpeg4_encode_visual_object_header(s); - if(s->strict_std_compliance < 2 || picture_number==0) //HACK, the reference sw is buggy + if(s->strict_std_compliance < FF_COMPLIANCE_VERY_STRICT || picture_number==0) //HACK, the reference sw is buggy mpeg4_encode_vol_header(s, 0, 0); } if(!(s->workaround_bugs & FF_BUG_MS)) @@ -2670,6 +2670,7 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) put_bits(&s->pb, 16, VOP_STARTCODE); /* vop header */ put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */ + assert(s->time>=0); time_div= s->time/s->avctx->time_base.den; time_mod= s->time%s->avctx->time_base.den; time_incr= time_div - s->last_time_base; @@ -5801,8 +5802,8 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){ time_incr++; check_marker(gb, "before time_increment"); - - if(s->time_increment_bits==0){ + + if(s->time_increment_bits==0 || !(show_bits(gb, s->time_increment_bits+1)&1)){ av_log(s->avctx, AV_LOG_ERROR, "hmm, seems the headers are not complete, trying to guess time_increment_bits\n"); for(s->time_increment_bits=1 ;s->time_increment_bits<16; s->time_increment_bits++){ @@ -5849,7 +5850,7 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){ s->current_picture_ptr->pts= (s->time + s->avctx->time_base.num/2) / s->avctx->time_base.num; if(s->avctx->debug&FF_DEBUG_PTS) - av_log(s->avctx, AV_LOG_DEBUG, "MPEG4 PTS: %lld\n", s->current_picture_ptr->pts); + av_log(s->avctx, AV_LOG_DEBUG, "MPEG4 PTS: %Ld\n", s->current_picture_ptr->pts); check_marker(gb, "before vop_coded"); diff --git a/src/libffmpeg/libavcodec/h264.c b/src/libffmpeg/libavcodec/h264.c index 0cd04f923..d8dbc3fa0 100644 --- a/src/libffmpeg/libavcodec/h264.c +++ b/src/libffmpeg/libavcodec/h264.c @@ -370,6 +370,7 @@ static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t v stride *= size; assert((((int)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0); + assert((stride&(w-1))==0); //FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it if(w==2 && h==2){ *(uint16_t*)(p + 0)= @@ -727,6 +728,7 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){ *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0; h->ref_cache[list][scan8[0] - 1 + 2*8]= h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE; + assert((!left_type[0]) == (!left_type[1])); } if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) @@ -1376,7 +1378,8 @@ static inline void write_back_motion(H264Context *h, int mb_type){ } } for(y=0; y<2; y++){ - *(uint16_t*)&s->current_picture.ref_index[list][b8_xy + y*h->b8_stride]= (LIST_NOT_USED&0xFF)*0x0101; + s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= + s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED; } } continue; @@ -3153,11 +3156,11 @@ static int fill_default_ref_list(H264Context *h){ if(h->slice_type==B_TYPE){ int out_i; - int limit= -1; + int limit= INT_MIN; /* sort frame according to poc in B slice */ for(out_i=0; out_ishort_ref_count; out_i++){ - int best_i=-1; + int best_i=INT_MIN; int best_poc=INT_MAX; for(i=0; ishort_ref_count; i++){ @@ -3168,7 +3171,7 @@ static int fill_default_ref_list(H264Context *h){ } } - assert(best_i != -1); + assert(best_i != INT_MIN); limit= best_poc; sorted_short_ref[out_i]= *h->short_ref[best_i]; @@ -3194,6 +3197,8 @@ static int fill_default_ref_list(H264Context *h){ for(i=0; ishort_ref_count && index < h->ref_count[list]; i++, j+=step) { while(j<0 || j>= h->short_ref_count){ + if(j != -99 && step == (list ? -1 : 1)) + return -1; step = -step; j= smallest_poc_greater_than_current + (step>>1); } @@ -3215,7 +3220,7 @@ static int fill_default_ref_list(H264Context *h){ // L0 and L1 are identical Picture temp= h->default_ref_list[1][0]; h->default_ref_list[1][0] = h->default_ref_list[1][1]; - h->default_ref_list[1][0] = temp; + h->default_ref_list[1][1] = temp; } if(index < h->ref_count[ list ]) @@ -3261,7 +3266,7 @@ static void print_long_term(H264Context *h); static int decode_ref_pic_list_reordering(H264Context *h){ MpegEncContext * const s = &h->s; - int list; + int list, index; print_short_term(h); print_long_term(h); @@ -3272,7 +3277,6 @@ static int decode_ref_pic_list_reordering(H264Context *h){ if(get_bits1(&s->gb)){ int pred= h->curr_pic_num; - int index; for(index=0; ; index++){ int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb); @@ -3303,18 +3307,33 @@ static int decode_ref_pic_list_reordering(H264Context *h){ for(i= h->short_ref_count-1; i>=0; i--){ ref = h->short_ref[i]; + assert(ref->reference == 3); + assert(!ref->long_ref); if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer break; } + if(i>=0) + ref->pic_id= ref->frame_num; }else{ pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx ref = h->long_ref[pic_id]; + ref->pic_id= pic_id; + assert(ref->reference == 3); + assert(ref->long_ref); + i=0; } if (i < 0) { av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n"); memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME } else { + for(i=index; i+1ref_count[list]; i++){ + if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id) + break; + } + for(; i > index; i--){ + h->ref_list[list][i]= h->ref_list[list][i-1]; + } h->ref_list[list][index]= *ref; } }else{ @@ -3326,6 +3345,13 @@ static int decode_ref_pic_list_reordering(H264Context *h){ if(h->slice_type!=B_TYPE) break; } + for(list=0; list<2; list++){ + for(index= 0; index < h->ref_count[list]; index++){ + if(!h->ref_list[list][index].data[0]) + h->ref_list[list][index]= s->current_picture; + } + if(h->slice_type!=B_TYPE) break; + } if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred) direct_dist_scale_factor(h); @@ -3465,6 +3491,7 @@ static void flush_dpb(AVCodecContext *avctx){ h->delayed_pic[i]= NULL; h->delayed_output_pic= NULL; idr(h); + h->s.current_picture_ptr->reference= 0; } /** @@ -4179,7 +4206,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in static void decode_mb_skip(H264Context *h){ MpegEncContext * const s = &h->s; const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; - int mb_type; + int mb_type=0; memset(h->non_zero_count[mb_xy], 0, 16); memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui @@ -4189,11 +4216,11 @@ static void decode_mb_skip(H264Context *h){ } if(h->mb_field_decoding_flag) mb_type|= MB_TYPE_INTERLACED; - + if( h->slice_type == B_TYPE ) { // just for fill_caches. pred_direct_motion will set the real mb_type - mb_type= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; + mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ... pred_direct_motion(h, &mb_type); @@ -4205,7 +4232,7 @@ static void decode_mb_skip(H264Context *h){ else { int mx, my; - mb_type= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; + mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ... pred_pskip_motion(h, &mx, &my); @@ -5639,7 +5666,7 @@ static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4] const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; if( bS[0] < 4 ) { - int tc[4]; + int8_t tc[4]; for(i=0; i<4; i++) tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1; h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); @@ -5700,28 +5727,12 @@ static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; if( bS[0] < 4 ) { - int tc[4]; + int8_t tc[4]; for(i=0; i<4; i++) tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0; h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); } else { - /* 8px edge length, see filter_mb_edgev */ - for( d = 0; d < 8; d++ ){ - const int p0 = pix[-1]; - const int p1 = pix[-2]; - const int q0 = pix[0]; - const int q1 = pix[1]; - - if( ABS( p0 - q0 ) < alpha && - ABS( p1 - p0 ) < beta && - ABS( q1 - q0 ) < beta ) { - - pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ - pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ - tprintf("filter_mb_edgecv i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); - } - pix += stride; - } + h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); } } @@ -5887,7 +5898,7 @@ static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4] const int pix_next = stride; if( bS[0] < 4 ) { - int tc[4]; + int8_t tc[4]; for(i=0; i<4; i++) tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1; h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); @@ -5944,31 +5955,14 @@ static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); const int alpha = alpha_table[index_a]; const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; - const int pix_next = stride; if( bS[0] < 4 ) { - int tc[4]; + int8_t tc[4]; for(i=0; i<4; i++) tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0; h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); } else { - /* 8px edge length, see filter_mb_edgev */ - for( d = 0; d < 8; d++ ) { - const int p0 = pix[-1*pix_next]; - const int p1 = pix[-2*pix_next]; - const int q0 = pix[0]; - const int q1 = pix[1*pix_next]; - - if( ABS( p0 - q0 ) < alpha && - ABS( p1 - p0 ) < beta && - ABS( q1 - q0 ) < beta ) { - - pix[-pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ - pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ - tprintf("filter_mb_edgech i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], pix[-3*pix_next], p1, p0, q0, q1, pix[2*pix_next], pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]); - } - pix++; - } + h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); } } diff --git a/src/libffmpeg/libavcodec/huffyuv.c b/src/libffmpeg/libavcodec/huffyuv.c index d2f358575..ebb1340ac 100644 --- a/src/libffmpeg/libavcodec/huffyuv.c +++ b/src/libffmpeg/libavcodec/huffyuv.c @@ -541,8 +541,8 @@ static int encode_init(AVCodecContext *avctx) } if(s->interlaced != ( s->height > 288 )) av_log(avctx, AV_LOG_INFO, "using huffyuv 2.2.0 or newer interlacing flag\n"); - }else if(avctx->strict_std_compliance>=0){ - av_log(avctx, AV_LOG_ERROR, "This codec is under development; files encoded with it may not be decodable with future versions!!! Set vstrict=-1 / -strict -1 to use it anyway.\n"); + }else if(avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){ + av_log(avctx, AV_LOG_ERROR, "This codec is under development; files encoded with it may not be decodable with future versions!!! Set vstrict=-2 / -strict -2 to use it anyway.\n"); return -1; } diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c index 1d1f2b213..32565c3d3 100644 --- a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c +++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c @@ -45,6 +45,7 @@ static const uint64_t ff_pw_16 attribute_used __attribute__ ((aligned(8))) = 0x0 static const uint64_t ff_pw_32 attribute_used __attribute__ ((aligned(8))) = 0x0020002000200020ULL; static const uint64_t ff_pw_15 attribute_used __attribute__ ((aligned(8))) = 0x000F000F000F000FULL; +static const uint64_t ff_pb_3F attribute_used __attribute__ ((aligned(8))) = 0x3F3F3F3F3F3F3F3FULL; static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL; #define JUMPALIGN() __asm __volatile (".balign 8"::) @@ -692,204 +693,265 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ ); } -// dst = ABS( a - b ) -#define MMABS_DIFF_MMX2(a,b,dst,z)\ - "movq " #b ", " #dst " \n\t"\ - "movq " #a ", " #z " \n\t"\ - "psubusw " #b ", " #z " \n\t"\ - "psubusw " #a ", " #dst " \n\t"\ - "pmaxsw " #z ", " #dst " \n\t" - -// a = clip( a, -tc, tc ) -#define CLIP_MMX2(a,tc,z)\ - "pxor " #z ", " #z " \n\t"\ - "psubw " #tc ", " #z " \n\t"\ - "pmaxsw " #z ", " #a " \n\t"\ - "pminsw " #tc ", " #a " \n\t" - -// in: mm0=p1, mm1=p0, mm2=q0, mm3=q1 -// out: mm7 = do we filter this pixel? -#define H264_DEBLOCK_THRESH(alpha,beta)\ - "pxor %%mm7, %%mm7 \n\t"\ - "punpcklbw %%mm7, %%mm0 \n\t"\ - "punpcklbw %%mm7, %%mm1 \n\t"\ - "punpcklbw %%mm7, %%mm2 \n\t"\ - "punpcklbw %%mm7, %%mm3 \n\t"\ - MMABS_DIFF_MMX2(%%mm1, %%mm2, %%mm5, %%mm4)\ - "movd " #alpha ", %%mm6 \n\t"\ - "pshufw $0, %%mm6, %%mm6 \n\t"\ - "pcmpgtw %%mm5, %%mm6 \n\t" /* ABS(p0-q0) < alpha */\ - MMABS_DIFF_MMX2(%%mm0, %%mm1, %%mm5, %%mm4)\ - MMABS_DIFF_MMX2(%%mm3, %%mm2, %%mm7, %%mm4)\ - "pmaxsw %%mm7, %%mm5 \n\t"\ - "movd " #beta ", %%mm7 \n\t"\ - "pshufw $0, %%mm7, %%mm7 \n\t"\ - "movq %%mm7, %%mm4 \n\t"\ - "pcmpgtw %%mm5, %%mm7 \n\t" /* ABS(p1-p0) < beta && ABS(q1-q0) < beta */\ - "pand %%mm6, %%mm7 \n\t" - -// in: mm0=p1, mm1=p0, mm2=q0, mm3=q1, mm6=tc -// out: mm1=p0', mm2=q0' -#define H264_DEBLOCK_P0_Q0(pw4)\ - "movq " #pw4 ", %%mm4 \n\t"\ - "movq %%mm2, %%mm5 \n\t"\ - "paddw %%mm4, %%mm0 \n\t"\ - "psubw %%mm1, %%mm5 \n\t"\ - "psubw %%mm3, %%mm0 \n\t"\ - "psllw $2, %%mm5 \n\t"\ - "paddw %%mm0, %%mm5 \n\t"\ - "psraw $3, %%mm5 \n\t" /* mm5 = (((q0 - p0) << 2) + (p1 - q1) + 4) >> 3 */\ - CLIP_MMX2(%%mm5, %%mm6, %%mm4) /* delta = clip( mm5, -tc, tc ) */\ - "paddw %%mm5, %%mm1 \n\t" /* p0 += delta */\ - "psubw %%mm5, %%mm2 \n\t" /* q0 -= delta */ - -// in: mm1=p0, mm2=q0, mm6=tc0 -// out: mm5=delta -#define H264_DEBLOCK_DELTA_PQ1(p1,p2,z)\ - "movq %%mm1, %%mm5 \n\t"\ - "pavgb %%mm2, %%mm5 \n\t"\ - "paddw " #p2 ", %%mm5 \n\t"\ - "psraw $1, %%mm5 \n\t"\ - "psubw " #p1 ", %%mm5 \n\t" /* ( ( q2 + ((p0+q0+1)>>1) ) >> 1 ) - q1 */\ - CLIP_MMX2(%%mm5, %%mm6, z) - -static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int tc0) + +// out: o = |x-y|>a +// clobbers: t +#define DIFF_GT_MMX(x,y,a,o,t)\ + "movq "#y", "#t" \n\t"\ + "movq "#x", "#o" \n\t"\ + "psubusb "#x", "#t" \n\t"\ + "psubusb "#y", "#o" \n\t"\ + "por "#t", "#o" \n\t"\ + "psubusb "#a", "#o" \n\t" + +// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 +// out: mm5=beta-1, mm7=mask +// clobbers: mm4,mm6 +#define H264_DEBLOCK_MASK(alpha1, beta1) \ + "pshufw $0, "#alpha1", %%mm4 \n\t"\ + "pshufw $0, "#beta1 ", %%mm5 \n\t"\ + "packuswb %%mm4, %%mm4 \n\t"\ + "packuswb %%mm5, %%mm5 \n\t"\ + DIFF_GT_MMX(%%mm1, %%mm2, %%mm4, %%mm7, %%mm6) /* |p0-q0| > alpha-1 */\ + DIFF_GT_MMX(%%mm0, %%mm1, %%mm5, %%mm4, %%mm6) /* |p1-p0| > beta-1 */\ + "por %%mm4, %%mm7 \n\t"\ + DIFF_GT_MMX(%%mm3, %%mm2, %%mm5, %%mm4, %%mm6) /* |q1-q0| > beta-1 */\ + "por %%mm4, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "pcmpeqb %%mm6, %%mm7 \n\t" + +// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask) +// out: mm1=p0' mm2=q0' +// clobbers: mm0,3-6 +#define H264_DEBLOCK_P0_Q0(pb_01, pb_3f)\ + /* a = q0^p0^((p1-q1)>>2) */\ + "movq %%mm0, %%mm4 \n\t"\ + "psubb %%mm3, %%mm4 \n\t"\ + "psrlw $2, %%mm4 \n\t"\ + "pxor %%mm1, %%mm4 \n\t"\ + "pxor %%mm2, %%mm4 \n\t"\ + /* b = p0^(q1>>2) */\ + "psrlw $2, %%mm3 \n\t"\ + "pand "#pb_3f", %%mm3 \n\t"\ + "movq %%mm1, %%mm5 \n\t"\ + "pxor %%mm3, %%mm5 \n\t"\ + /* c = q0^(p1>>2) */\ + "psrlw $2, %%mm0 \n\t"\ + "pand "#pb_3f", %%mm0 \n\t"\ + "movq %%mm2, %%mm6 \n\t"\ + "pxor %%mm0, %%mm6 \n\t"\ + /* d = (c^b) & ~(b^a) & 1 */\ + "pxor %%mm5, %%mm6 \n\t"\ + "pxor %%mm4, %%mm5 \n\t"\ + "pandn %%mm6, %%mm5 \n\t"\ + "pand "#pb_01", %%mm5 \n\t"\ + /* delta = (avg(q0, p1>>2) + (d&a)) + * - (avg(p0, q1>>2) + (d&~a)) */\ + "pavgb %%mm2, %%mm0 \n\t"\ + "movq %%mm5, %%mm6 \n\t"\ + "pand %%mm4, %%mm6 \n\t"\ + "paddusb %%mm6, %%mm0 \n\t"\ + "pavgb %%mm1, %%mm3 \n\t"\ + "pandn %%mm5, %%mm4 \n\t"\ + "paddusb %%mm4, %%mm3 \n\t"\ + /* p0 += clip(delta, -tc0, tc0) + * q0 -= clip(delta, -tc0, tc0) */\ + "movq %%mm0, %%mm4 \n\t"\ + "psubusb %%mm3, %%mm0 \n\t"\ + "psubusb %%mm4, %%mm3 \n\t"\ + "pminub %%mm7, %%mm0 \n\t"\ + "pminub %%mm7, %%mm3 \n\t"\ + "paddusb %%mm0, %%mm1 \n\t"\ + "paddusb %%mm3, %%mm2 \n\t"\ + "psubusb %%mm3, %%mm1 \n\t"\ + "psubusb %%mm0, %%mm2 \n\t" + +// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask) %8=mm_bone +// out: (q1addr) = clip( (q2+((p0+q0+1)>>1))>>1, q1-tc0, q1+tc0 ) +// clobbers: q2, tmp, tc0 +#define H264_DEBLOCK_Q1(p1, q2, q2addr, q1addr, tc0, tmp)\ + "movq %%mm1, "#tmp" \n\t"\ + "pavgb %%mm2, "#tmp" \n\t"\ + "pavgb "#tmp", "#q2" \n\t" /* avg(p2,avg(p0,q0)) */\ + "pxor "q2addr", "#tmp" \n\t"\ + "pand %8, "#tmp" \n\t" /* (p2^avg(p0,q0))&1 */\ + "psubusb "#tmp", "#q2" \n\t" /* (p2+((p0+q0+1)>>1))>>1 */\ + "movq "#p1", "#tmp" \n\t"\ + "psubusb "#tc0", "#tmp" \n\t"\ + "paddusb "#p1", "#tc0" \n\t"\ + "pmaxub "#tmp", "#q2" \n\t"\ + "pminub "#tc0", "#q2" \n\t"\ + "movq "#q2", "q1addr" \n\t" + +static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0) { - uint64_t tmp0, tmp1; + uint64_t tmp0; + uint64_t tc = (uint8_t)tc0[1]*0x01010000 | (uint8_t)tc0[0]*0x0101; + // with luma, tc0=0 doesn't mean no filtering, so we need a separate input mask + uint32_t mask[2] = { (tc0[0]>=0)*0xffffffff, (tc0[1]>=0)*0xffffffff }; + asm volatile( - "movd (%2,%4), %%mm0 \n\t" //p1 - "movd (%2,%4,2), %%mm1 \n\t" //p0 - "movd (%3), %%mm2 \n\t" //q0 - "movd (%3,%4), %%mm3 \n\t" //q1 - H264_DEBLOCK_THRESH(%6,%7) - "movq %%mm7, %0 \n\t" - -// filter p1 if ABS(p2-p0) < beta - "movd (%2), %%mm3 \n\t" - "pxor %%mm6, %%mm6 \n\t" - "punpcklbw %%mm6, %%mm3 \n\t" //p2 - MMABS_DIFF_MMX2(%%mm1, %%mm3, %%mm5, %%mm6) - "pcmpgtw %%mm5, %%mm4 \n\t" - "pand %%mm7, %%mm4 \n\t" // mm4 = ( ABS( p2 - p0 ) < beta && filterp ) - "movd %5, %%mm6 \n\t" - "pshufw $0, %%mm6, %%mm6 \n\t" //tc - - H264_DEBLOCK_DELTA_PQ1(%%mm0, %%mm3, %%mm7) // delta = clip( ( p2 + ((p0+q0+1)>>1) ) >> 1 ) - p1 ) - "pand %%mm4, %%mm5 \n\t" - "paddw %%mm0, %%mm5 \n\t" - "packuswb %%mm5, %%mm5 \n\t" - "movd %%mm5, (%2,%4) \n\t" // *p1 += delta - "psrlw $15, %%mm4 \n\t" - "paddw %%mm6, %%mm4 \n\t" // tc++ - "movq %%mm4, %1 \n\t" - -// filter q1 if ABS(q2-q0) < beta - "pxor %%mm7, %%mm7 \n\t" - "movd (%3,%4), %%mm3 \n\t" //q1 - "movd (%3,%4,2), %%mm4 \n\t" //q2 - "punpcklbw %%mm7, %%mm3 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - MMABS_DIFF_MMX2(%%mm2, %%mm4, %%mm5, %%mm7) - "movd %7, %%mm7 \n\t" - "pshufw $0, %%mm7, %%mm7 \n\t" - "pcmpgtw %%mm5, %%mm7 \n\t" - - H264_DEBLOCK_DELTA_PQ1(%%mm3, %%mm4, %%mm4) // delta = clip( ( q2 + ((p0+q0+1)>>1) ) >> 1 ) - q1 ) - "movq %0, %%mm4 \n\t" - "pand %%mm4, %%mm7 \n\t" // mm7 = ( ABS( q2 - q0 ) < beta && filterp ) - "pand %%mm7, %%mm5 \n\t" - "paddw %%mm3, %%mm5 \n\t" - "packuswb %%mm5, %%mm5 \n\t" - "movd %%mm5, (%3,%4) \n\t" // *q1 += delta - "movq %1, %%mm6 \n\t" - "psrlw $15, %%mm7 \n\t" - "paddw %%mm7, %%mm6 \n\t" // tc++ - "movq %0, %%mm4 \n\t" - "pand %%mm4, %%mm6 \n\t" - - H264_DEBLOCK_P0_Q0(%8) - "packuswb %%mm1, %%mm1 \n\t" - "packuswb %%mm2, %%mm2 \n\t" - "movd %%mm1, (%2,%4,2) \n\t" - "movd %%mm2, (%3) \n\t" - - : "=m"(tmp0), "=m"(tmp1) + "movq (%1,%3), %%mm0 \n\t" //p1 + "movq (%1,%3,2), %%mm1 \n\t" //p0 + "movq (%2), %%mm2 \n\t" //q0 + "movq (%2,%3), %%mm3 \n\t" //q1 + H264_DEBLOCK_MASK(%6, %7) + "pand %5, %%mm7 \n\t" + "movq %%mm7, %0 \n\t" + + /* filter p1 */ + "movq (%1), %%mm3 \n\t" //p2 + DIFF_GT_MMX(%%mm1, %%mm3, %%mm5, %%mm6, %%mm4) // |p2-p0|>beta-1 + "pandn %%mm7, %%mm6 \n\t" + "pcmpeqb %%mm7, %%mm6 \n\t" + "pand %%mm7, %%mm6 \n\t" // mask & |p2-p0|beta-1 + "pandn %0, %%mm6 \n\t" + "pcmpeqb %0, %%mm6 \n\t" + "pand %0, %%mm6 \n\t" + "pshufw $80, %4, %%mm5 \n\t" + "pand %%mm6, %%mm5 \n\t" + "pand %8, %%mm6 \n\t" + "paddb %%mm6, %%mm7 \n\t" + "movq (%2,%3), %%mm3 \n\t" + H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%2,%3,2)", "(%2,%3)", %%mm5, %%mm6) + + /* filter p0, q0 */ + H264_DEBLOCK_P0_Q0(%8, %9) + "movq %%mm1, (%1,%3,2) \n\t" + "movq %%mm2, (%2) \n\t" + + : "=m"(tmp0) : "r"(pix-3*stride), "r"(pix), "r"((long)stride), - "r"(tc0), "r"(alpha), "r"(beta), "m"(ff_pw_4) + "m"(tc), "m"(*(uint64_t*)mask), "m"(alpha1), "m"(beta1), + "m"(mm_bone), "m"(ff_pb_3F) ); } -static void h264_v_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int *tc0) +static void h264_v_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { - int i; - for(i=0; i<4; i++, pix+=4) { - if(tc0[i] < 0) - continue; - h264_loop_filter_luma_mmx2(pix, stride, alpha, beta, tc0[i]); - } + if((tc0[0] & tc0[1]) >= 0) + h264_loop_filter_luma_mmx2(pix, stride, alpha-1, beta-1, tc0); + if((tc0[2] & tc0[3]) >= 0) + h264_loop_filter_luma_mmx2(pix+8, stride, alpha-1, beta-1, tc0+2); } - -static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int *tc0) +static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { - uint8_t trans[4*8]; + //FIXME: could cut some load/stores by merging transpose with filter + // also, it only needs to transpose 6x8 + uint8_t trans[8*8]; int i; - for(i=0; i<4; i++, pix+=4*stride) { - if(tc0[i] < 0) + for(i=0; i<2; i++, pix+=8*stride, tc0+=2) { + if((tc0[0] & tc0[1]) < 0) continue; - //FIXME: could cut some load/stores by merging transpose with filter - transpose4x4(trans, pix-4, 4, stride); - transpose4x4(trans+4*4, pix, 4, stride); - h264_loop_filter_luma_mmx2(trans+4*4, 4, alpha, beta, tc0[i]); - transpose4x4(pix-2, trans+2*4, stride, 4); + transpose4x4(trans, pix-4, 8, stride); + transpose4x4(trans +4*8, pix, 8, stride); + transpose4x4(trans+4, pix-4+4*stride, 8, stride); + transpose4x4(trans+4+4*8, pix +4*stride, 8, stride); + h264_loop_filter_luma_mmx2(trans+4*8, 8, alpha-1, beta-1, tc0); + transpose4x4(pix-2, trans +2*8, stride, 8); + transpose4x4(pix-2+4*stride, trans+4+2*8, stride, 8); } } -static inline void h264_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int *tc0) +static inline void h264_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0) { asm volatile( - "movd (%0), %%mm0 \n\t" - "movd (%0,%2), %%mm1 \n\t" - "movd (%1), %%mm2 \n\t" - "movd (%1,%2), %%mm3 \n\t" - H264_DEBLOCK_THRESH(%4,%5) + "movq (%0), %%mm0 \n\t" //p1 + "movq (%0,%2), %%mm1 \n\t" //p0 + "movq (%1), %%mm2 \n\t" //q0 + "movq (%1,%2), %%mm3 \n\t" //q1 + H264_DEBLOCK_MASK(%4, %5) "movd %3, %%mm6 \n\t" - "pshufw $0x50, %%mm6, %%mm6 \n\t" // mm6 = tc[1], tc[1], tc[0], tc[0] - "pand %%mm7, %%mm6 \n\t" - H264_DEBLOCK_P0_Q0(%6) - "packuswb %%mm1, %%mm1 \n\t" - "packuswb %%mm2, %%mm2 \n\t" - "movd %%mm1, (%0,%2) \n\t" - "movd %%mm2, (%1) \n\t" + "punpcklbw %%mm6, %%mm6 \n\t" + "pand %%mm6, %%mm7 \n\t" // mm7 = tc&mask + H264_DEBLOCK_P0_Q0(%6, %7) + "movq %%mm1, (%0,%2) \n\t" + "movq %%mm2, (%1) \n\t" + :: "r"(pix-2*stride), "r"(pix), "r"((long)stride), - "r"(tc0[1]<<16 | tc0[0]), - "r"(alpha), "r"(beta), "m"(ff_pw_4) + "r"(*(uint32_t*)tc0), + "m"(alpha1), "m"(beta1), "m"(mm_bone), "m"(ff_pb_3F) ); } -static void h264_v_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int *tc0) +static void h264_v_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { - int i; - for(i=0; i<2; i++) { - h264_loop_filter_chroma_mmx2(pix, stride, alpha, beta, tc0); - pix += 4; - tc0 += 2; - } + h264_loop_filter_chroma_mmx2(pix, stride, alpha-1, beta-1, tc0); } -static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int *tc0) +static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { - uint8_t trans[4*4]; - int i; - for(i=0; i<2; i++) { - //FIXME: could cut some load/stores by merging transpose with filter - transpose4x4(trans, pix-2, 4, stride); - h264_loop_filter_chroma_mmx2(trans+2*4, 4, alpha, beta, tc0); - transpose4x4(pix-2, trans, stride, 4); - pix += 4*stride; - tc0 += 2; - } + //FIXME: could cut some load/stores by merging transpose with filter + uint8_t trans[8*4]; + transpose4x4(trans, pix-2, 8, stride); + transpose4x4(trans+4, pix-2+4*stride, 8, stride); + h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0); + transpose4x4(pix-2, trans, stride, 8); + transpose4x4(pix-2+4*stride, trans+4, stride, 8); +} + +// p0 = (p0 + q1 + 2*p1 + 2) >> 2 +#define H264_FILTER_CHROMA4(p0, p1, q1, one) \ + "movq "#p0", %%mm4 \n\t"\ + "pxor "#q1", %%mm4 \n\t"\ + "pand "#one", %%mm4 \n\t" /* mm4 = (p0^q1)&1 */\ + "pavgb "#q1", "#p0" \n\t"\ + "psubusb %%mm4, "#p0" \n\t"\ + "pavgb "#p1", "#p0" \n\t" /* dst = avg(p1, avg(p0,q1) - ((p0^q1)&1)) */\ + +static inline void h264_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha1, int beta1) +{ + asm volatile( + "movq (%0), %%mm0 \n\t" + "movq (%0,%2), %%mm1 \n\t" + "movq (%1), %%mm2 \n\t" + "movq (%1,%2), %%mm3 \n\t" + H264_DEBLOCK_MASK(%3, %4) + "movq %%mm1, %%mm5 \n\t" + "movq %%mm2, %%mm6 \n\t" + H264_FILTER_CHROMA4(%%mm1, %%mm0, %%mm3, %5) //p0' + H264_FILTER_CHROMA4(%%mm2, %%mm3, %%mm0, %5) //q0' + "psubb %%mm5, %%mm1 \n\t" + "psubb %%mm6, %%mm2 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm7, %%mm2 \n\t" + "paddb %%mm5, %%mm1 \n\t" + "paddb %%mm6, %%mm2 \n\t" + "movq %%mm1, (%0,%2) \n\t" + "movq %%mm2, (%1) \n\t" + :: "r"(pix-2*stride), "r"(pix), "r"((long)stride), + "m"(alpha1), "m"(beta1), "m"(mm_bone) + ); +} + +static void h264_v_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta) +{ + h264_loop_filter_chroma_intra_mmx2(pix, stride, alpha-1, beta-1); +} + +static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta) +{ + //FIXME: could cut some load/stores by merging transpose with filter + uint8_t trans[8*4]; + transpose4x4(trans, pix-2, 8, stride); + transpose4x4(trans+4, pix-2+4*stride, 8, stride); + h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1); + transpose4x4(pix-2, trans, stride, 8); + transpose4x4(pix-2+4*stride, trans+4, stride, 8); } + #ifdef CONFIG_ENCODERS static int pix_norm1_mmx(uint8_t *pix, int line_size) { int tmp; @@ -3016,6 +3078,30 @@ H264_MC(avg_, 4, mmx2) H264_MC(avg_, 8, mmx2) H264_MC(avg_, 16,mmx2) + +/** These are used by *_h264_chroma_mc8_* */ +static const uint64_t thirtytwo __align8 = 0x0020002000200020ULL; +static const uint64_t sixtyfour __align8 = 0x0040004000400040ULL; + +#define H264_CHROMA_OP(S,D) +#define H264_CHROMA_MC8_TMPL put_h264_chroma_mc8_mmx +#include "dsputil_h264_template_mmx.c" +#undef H264_CHROMA_OP +#undef H264_CHROMA_MC8_TMPL + +#define H264_CHROMA_OP(S,D) "pavgb " #S ", " #D " \n\t" +#define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_mmx2 +#include "dsputil_h264_template_mmx.c" +#undef H264_CHROMA_OP +#undef H264_CHROMA_MC8_TMPL + +#define H264_CHROMA_OP(S,D) "pavgusb " #S ", " #D " \n\t" +#define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_3dnow +#include "dsputil_h264_template_mmx.c" +#undef H264_CHROMA_OP +#undef H264_CHROMA_MC8_TMPL + + #if 0 static void just_return() { return; } #endif @@ -3115,6 +3201,10 @@ static void add_8x8basis_mmx(int16_t rem[64], int16_t basis[64], int scale){ void ff_mmx_idct(DCTELEM *block); void ff_mmxext_idct(DCTELEM *block); +void ff_vp3_idct_sse2(int16_t *input_data); +void ff_vp3_idct_mmx(int16_t *data); +void ff_vp3_dsp_init_mmx(void); + /* XXX: those functions should be suppressed ASAP when all IDCTs are converted */ static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block) @@ -3137,6 +3227,26 @@ static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *bloc ff_mmxext_idct (block); add_pixels_clamped_mmx(block, dest, line_size); } +static void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_vp3_idct_sse2(block); + put_signed_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_vp3_idct_sse2(block); + add_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_vp3_idct_mmx(block); + put_signed_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_vp3_idct_mmx(block); + add_pixels_clamped_mmx(block, dest, line_size); +} void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) { @@ -3196,18 +3306,22 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->idct = ff_mmx_idct; } c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + }else if(idct_algo==FF_IDCT_VP3){ + if(mm_flags & MM_SSE2){ + c->idct_put= ff_vp3_idct_put_sse2; + c->idct_add= ff_vp3_idct_add_sse2; + c->idct = ff_vp3_idct_sse2; + c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; + }else{ + ff_vp3_dsp_init_mmx(); + c->idct_put= ff_vp3_idct_put_mmx; + c->idct_add= ff_vp3_idct_add_mmx; + c->idct = ff_vp3_idct_mmx; + c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM; + } } } - /* VP3 optimized DSP functions */ - if (mm_flags & MM_SSE2) { - c->vp3_dsp_init = vp3_dsp_init_sse2; - c->vp3_idct = vp3_idct_sse2; - } else { - c->vp3_dsp_init = vp3_dsp_init_mmx; - c->vp3_idct = vp3_idct_mmx; - } - #ifdef CONFIG_ENCODERS c->get_pixels = get_pixels_mmx; c->diff_pixels = diff_pixels_mmx; @@ -3287,6 +3401,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->h263_v_loop_filter= h263_v_loop_filter_mmx; c->h263_h_loop_filter= h263_h_loop_filter_mmx; + c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx; if (mm_flags & MM_MMXEXT) { c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; @@ -3383,10 +3498,13 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) dspfunc(avg_h264_qpel, 2, 4); #undef dspfunc + c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2; c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2; c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2; c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2; c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2; + c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2; + c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2; #ifdef CONFIG_ENCODERS c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; @@ -3472,6 +3590,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) dspfunc(avg_h264_qpel, 0, 16); dspfunc(avg_h264_qpel, 1, 8); dspfunc(avg_h264_qpel, 2, 4); + + c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow; } } diff --git a/src/libffmpeg/libavcodec/i386/vp3dsp_mmx.c b/src/libffmpeg/libavcodec/i386/vp3dsp_mmx.c index 319e57f1b..ea0405717 100644 --- a/src/libffmpeg/libavcodec/i386/vp3dsp_mmx.c +++ b/src/libffmpeg/libavcodec/i386/vp3dsp_mmx.c @@ -257,18 +257,11 @@ static uint16_t idct_cosine_table[7] = { movq_r2m(r2, *I(2)); \ } -void vp3_dsp_init_mmx(void) +void ff_vp3_dsp_init_mmx(void) { int j = 16; uint16_t *p; - do { - idct_constants[--j] = 0; - } while (j); - - idct_constants[0] = idct_constants[5] = - idct_constants[10] = idct_constants[15] = 65535; - j = 1; do { p = idct_constants + ((j + 3) << 2); @@ -279,8 +272,7 @@ void vp3_dsp_init_mmx(void) idct_constants[46] = idct_constants[47] = IdctAdjustBeforeShift; } -void vp3_idct_mmx(int16_t *input_data, int16_t *dequant_matrix, - int coeff_count, int16_t *output_data) +void ff_vp3_idct_mmx(int16_t *output_data) { /* eax = quantized input * ebx = dequantizer matrix @@ -291,246 +283,11 @@ void vp3_idct_mmx(int16_t *input_data, int16_t *dequant_matrix, * r0..r7 = mm0..mm7 */ -#define M(x) (idct_constants + x * 4) #define C(x) (idct_constants + 16 + (x - 1) * 4) #define Eight (idct_constants + 44) - unsigned char *input_bytes = (unsigned char *)input_data; - unsigned char *dequant_matrix_bytes = (unsigned char *)dequant_matrix; - unsigned char *output_data_bytes = (unsigned char *)output_data; - - movq_m2r(*(input_bytes), r0); - pmullw_m2r(*(dequant_matrix_bytes), r0); /* r0 = 03 02 01 00 */ - movq_m2r(*(input_bytes+16), r1); - pmullw_m2r(*(dequant_matrix_bytes+16), r1); /* r1 = 13 12 11 10 */ - movq_m2r(*M(0), r2); /* r2 = __ __ __ FF */ - movq_r2r(r0, r3); /* r3 = 03 02 01 00 */ - movq_m2r(*(input_bytes+8), r4); - psrlq_i2r(16, r0); /* r0 = __ 03 02 01 */ - pmullw_m2r(*(dequant_matrix_bytes+8), r4); /* r4 = 07 06 05 04 */ - pand_r2r(r2, r3); /* r3 = __ __ __ 00 */ - movq_r2r(r0, r5); /* r5 = __ 03 02 01 */ - movq_r2r(r1, r6); /* r6 = 13 12 11 10 */ - pand_r2r(r2, r5); /* r5 = __ __ __ 01 */ - psllq_i2r(32, r6); /* r6 = 11 10 __ __ */ - movq_m2r(*M(3), r7); /* r7 = FF __ __ __ */ - pxor_r2r(r5, r0); /* r0 = __ 03 02 __ */ - pand_r2r(r6, r7); /* r7 = 11 __ __ __ */ - por_r2r(r3, r0); /* r0 = __ 03 02 00 */ - pxor_r2r(r7, r6); /* r6 = __ 10 __ __ */ - por_r2r(r7, r0); /* r0 = 11 03 02 00 = R0 */ - movq_m2r(*M(3), r7); /* r7 = FF __ __ __ */ - movq_r2r(r4, r3); /* r3 = 07 06 05 04 */ - movq_r2m(r0, *(output_data_bytes)); /* write R0 = r0 */ - pand_r2r(r2, r3); /* r3 = __ __ __ 04 */ - movq_m2r(*(input_bytes+32), r0); - psllq_i2r(16, r3); /* r3 = __ __ 04 __ */ - pmullw_m2r(*(dequant_matrix_bytes+32), r0); /* r0 = 23 22 21 20 */ - pand_r2r(r1, r7); /* r7 = 13 __ __ __ */ - por_r2r(r3, r5); /* r5 = __ __ 04 01 */ - por_r2r(r6, r7); /* r7 = 13 10 __ __ */ - movq_m2r(*(input_bytes+24), r3); - por_r2r(r5, r7); /* r7 = 13 10 04 01 = R1 */ - pmullw_m2r(*(dequant_matrix_bytes+24), r3); /* r3 = 17 16 15 14 */ - psrlq_i2r(16, r4); /* r4 = __ 07 06 05 */ - movq_r2m(r7, *(output_data_bytes+16)); /* write R1 = r7 */ - movq_r2r(r4, r5); /* r5 = __ 07 06 05 */ - movq_r2r(r0, r7); /* r7 = 23 22 21 20 */ - psrlq_i2r(16, r4); /* r4 = __ __ 07 06 */ - psrlq_i2r(48, r7); /* r7 = __ __ __ 23 */ - movq_r2r(r2, r6); /* r6 = __ __ __ FF */ - pand_r2r(r2, r5); /* r5 = __ __ __ 05 */ - pand_r2r(r4, r6); /* r6 = __ __ __ 06 */ - movq_r2m(r7, *(output_data_bytes+80)); /* partial R9 = __ __ __ 23 */ - pxor_r2r(r6, r4); /* r4 = __ __ 07 __ */ - psrlq_i2r(32, r1); /* r1 = __ __ 13 12 */ - por_r2r(r5, r4); /* r4 = __ __ 07 05 */ - movq_m2r(*M(3), r7); /* r7 = FF __ __ __ */ - pand_r2r(r2, r1); /* r1 = __ __ __ 12 */ - movq_m2r(*(input_bytes+48), r5); - psllq_i2r(16, r0); /* r0 = 22 21 20 __ */ - pmullw_m2r(*(dequant_matrix_bytes+48), r5); /* r5 = 33 32 31 30 */ - pand_r2r(r0, r7); /* r7 = 22 __ __ __ */ - movq_r2m(r1, *(output_data_bytes+64)); /* partial R8 = __ __ __ 12 */ - por_r2r(r4, r7); /* r7 = 22 __ 07 05 */ - movq_r2r(r3, r4); /* r4 = 17 16 15 14 */ - pand_r2r(r2, r3); /* r3 = __ __ __ 14 */ - movq_m2r(*M(2), r1); /* r1 = __ FF __ __ */ - psllq_i2r(32, r3); /* r3 = __ 14 __ __ */ - por_r2r(r3, r7); /* r7 = 22 14 07 05 = R2 */ - movq_r2r(r5, r3); /* r3 = 33 32 31 30 */ - psllq_i2r(48, r3); /* r3 = 30 __ __ __ */ - pand_r2r(r0, r1); /* r1 = __ 21 __ __ */ - movq_r2m(r7, *(output_data_bytes+32)); /* write R2 = r7 */ - por_r2r(r3, r6); /* r6 = 30 __ __ 06 */ - movq_m2r(*M(1), r7); /* r7 = __ __ FF __ */ - por_r2r(r1, r6); /* r6 = 30 21 __ 06 */ - movq_m2r(*(input_bytes+56), r1); - pand_r2r(r4, r7); /* r7 = __ __ 15 __ */ - pmullw_m2r(*(dequant_matrix_bytes+56), r1); /* r1 = 37 36 35 34 */ - por_r2r(r6, r7); /* r7 = 30 21 15 06 = R3 */ - pand_m2r(*M(1), r0); /* r0 = __ __ 20 __ */ - psrlq_i2r(32, r4); /* r4 = __ __ 17 16 */ - movq_r2m(r7, *(output_data_bytes+48)); /* write R3 = r7 */ - movq_r2r(r4, r6); /* r6 = __ __ 17 16 */ - movq_m2r(*M(3), r7); /* r7 = FF __ __ __ */ - pand_r2r(r2, r4); /* r4 = __ __ __ 16 */ - movq_m2r(*M(1), r3); /* r3 = __ __ FF __ */ - pand_r2r(r1, r7); /* r7 = 37 __ __ __ */ - pand_r2r(r5, r3); /* r3 = __ __ 31 __ */ - por_r2r(r4, r0); /* r0 = __ __ 20 16 */ - psllq_i2r(16, r3); /* r3 = __ 31 __ __ */ - por_r2r(r0, r7); /* r7 = 37 __ 20 16 */ - movq_m2r(*M(2), r4); /* r4 = __ FF __ __ */ - por_r2r(r3, r7); /* r7 = 37 31 20 16 = R4 */ - movq_m2r(*(input_bytes+80), r0); - movq_r2r(r4, r3); /* r3 = __ __ FF __ */ - pmullw_m2r(*(dequant_matrix_bytes+80), r0); /* r0 = 53 52 51 50 */ - pand_r2r(r5, r4); /* r4 = __ 32 __ __ */ - movq_r2m(r7, *(output_data_bytes+8)); /* write R4 = r7 */ - por_r2r(r4, r6); /* r6 = __ 32 17 16 */ - movq_r2r(r3, r4); /* r4 = __ FF __ __ */ - psrlq_i2r(16, r6); /* r6 = __ __ 32 17 */ - movq_r2r(r0, r7); /* r7 = 53 52 51 50 */ - pand_r2r(r1, r4); /* r4 = __ 36 __ __ */ - psllq_i2r(48, r7); /* r7 = 50 __ __ __ */ - por_r2r(r4, r6); /* r6 = __ 36 32 17 */ - movq_m2r(*(input_bytes+88), r4); - por_r2r(r6, r7); /* r7 = 50 36 32 17 = R5 */ - pmullw_m2r(*(dequant_matrix_bytes+88), r4); /* r4 = 57 56 55 54 */ - psrlq_i2r(16, r3); /* r3 = __ __ FF __ */ - movq_r2m(r7, *(output_data_bytes+24)); /* write R5 = r7 */ - pand_r2r(r1, r3); /* r3 = __ __ 35 __ */ - psrlq_i2r(48, r5); /* r5 = __ __ __ 33 */ - pand_r2r(r2, r1); /* r1 = __ __ __ 34 */ - movq_m2r(*(input_bytes+104), r6); - por_r2r(r3, r5); /* r5 = __ __ 35 33 */ - pmullw_m2r(*(dequant_matrix_bytes+104), r6); /* r6 = 67 66 65 64 */ - psrlq_i2r(16, r0); /* r0 = __ 53 52 51 */ - movq_r2r(r4, r7); /* r7 = 57 56 55 54 */ - movq_r2r(r2, r3); /* r3 = __ __ __ FF */ - psllq_i2r(48, r7); /* r7 = 54 __ __ __ */ - pand_r2r(r0, r3); /* r3 = __ __ __ 51 */ - pxor_r2r(r3, r0); /* r0 = __ 53 52 __ */ - psllq_i2r(32, r3); /* r3 = __ 51 __ __ */ - por_r2r(r5, r7); /* r7 = 54 __ 35 33 */ - movq_r2r(r6, r5); /* r5 = 67 66 65 64 */ - pand_m2r(*M(1), r6); /* r6 = __ __ 65 __ */ - por_r2r(r3, r7); /* r7 = 54 51 35 33 = R6 */ - psllq_i2r(32, r6); /* r6 = 65 __ __ __ */ - por_r2r(r1, r0); /* r0 = __ 53 52 34 */ - movq_r2m(r7, *(output_data_bytes+40)); /* write R6 = r7 */ - por_r2r(r6, r0); /* r0 = 65 53 52 34 = R7 */ - movq_m2r(*(input_bytes+120), r7); - movq_r2r(r5, r6); /* r6 = 67 66 65 64 */ - pmullw_m2r(*(dequant_matrix_bytes+120), r7); /* r7 = 77 76 75 74 */ - psrlq_i2r(32, r5); /* r5 = __ __ 67 66 */ - pand_r2r(r2, r6); /* r6 = __ __ __ 64 */ - movq_r2r(r5, r1); /* r1 = __ __ 67 66 */ - movq_r2m(r0, *(output_data_bytes+56)); /* write R7 = r0 */ - pand_r2r(r2, r1); /* r1 = __ __ __ 66 */ - movq_m2r(*(input_bytes+112), r0); - movq_r2r(r7, r3); /* r3 = 77 76 75 74 */ - pmullw_m2r(*(dequant_matrix_bytes+112), r0); /* r0 = 73 72 71 70 */ - psllq_i2r(16, r3); /* r3 = 76 75 74 __ */ - pand_m2r(*M(3), r7); /* r7 = 77 __ __ __ */ - pxor_r2r(r1, r5); /* r5 = __ __ 67 __ */ - por_r2r(r5, r6); /* r6 = __ __ 67 64 */ - movq_r2r(r3, r5); /* r5 = 76 75 74 __ */ - pand_m2r(*M(3), r5); /* r5 = 76 __ __ __ */ - por_r2r(r1, r7); /* r7 = 77 __ __ 66 */ - movq_m2r(*(input_bytes+96), r1); - pxor_r2r(r5, r3); /* r3 = __ 75 74 __ */ - pmullw_m2r(*(dequant_matrix_bytes+96), r1); /* r1 = 63 62 61 60 */ - por_r2r(r3, r7); /* r7 = 77 75 74 66 = R15 */ - por_r2r(r5, r6); /* r6 = 76 __ 67 64 */ - movq_r2r(r0, r5); /* r5 = 73 72 71 70 */ - movq_r2m(r7, *(output_data_bytes+120)); /* store R15 = r7 */ - psrlq_i2r(16, r5); /* r5 = __ 73 72 71 */ - pand_m2r(*M(2), r5); /* r5 = __ 73 __ __ */ - movq_r2r(r0, r7); /* r7 = 73 72 71 70 */ - por_r2r(r5, r6); /* r6 = 76 73 67 64 = R14 */ - pand_r2r(r2, r0); /* r0 = __ __ __ 70 */ - pxor_r2r(r0, r7); /* r7 = 73 72 71 __ */ - psllq_i2r(32, r0); /* r0 = __ 70 __ __ */ - movq_r2m(r6, *(output_data_bytes+104)); /* write R14 = r6 */ - psrlq_i2r(16, r4); /* r4 = __ 57 56 55 */ - movq_m2r(*(input_bytes+72), r5); - psllq_i2r(16, r7); /* r7 = 72 71 __ __ */ - pmullw_m2r(*(dequant_matrix_bytes+72), r5); /* r5 = 47 46 45 44 */ - movq_r2r(r7, r6); /* r6 = 72 71 __ __ */ - movq_m2r(*M(2), r3); /* r3 = __ FF __ __ */ - psllq_i2r(16, r6); /* r6 = 71 __ __ __ */ - pand_m2r(*M(3), r7); /* r7 = 72 __ __ __ */ - pand_r2r(r1, r3); /* r3 = __ 62 __ __ */ - por_r2r(r0, r7); /* r7 = 72 70 __ __ */ - movq_r2r(r1, r0); /* r0 = 63 62 61 60 */ - pand_m2r(*M(3), r1); /* r1 = 63 __ __ __ */ - por_r2r(r3, r6); /* r6 = 71 62 __ __ */ - movq_r2r(r4, r3); /* r3 = __ 57 56 55 */ - psrlq_i2r(32, r1); /* r1 = __ __ 63 __ */ - pand_r2r(r2, r3); /* r3 = __ __ __ 55 */ - por_r2r(r1, r7); /* r7 = 72 70 63 __ */ - por_r2r(r3, r7); /* r7 = 72 70 63 55 = R13 */ - movq_r2r(r4, r3); /* r3 = __ 57 56 55 */ - pand_m2r(*M(1), r3); /* r3 = __ __ 56 __ */ - movq_r2r(r5, r1); /* r1 = 47 46 45 44 */ - movq_r2m(r7, *(output_data_bytes+88)); /* write R13 = r7 */ - psrlq_i2r(48, r5); /* r5 = __ __ __ 47 */ - movq_m2r(*(input_bytes+64), r7); - por_r2r(r3, r6); /* r6 = 71 62 56 __ */ - pmullw_m2r(*(dequant_matrix_bytes+64), r7); /* r7 = 43 42 41 40 */ - por_r2r(r5, r6); /* r6 = 71 62 56 47 = R12 */ - pand_m2r(*M(2), r4); /* r4 = __ 57 __ __ */ - psllq_i2r(32, r0); /* r0 = 61 60 __ __ */ - movq_r2m(r6, *(output_data_bytes+72)); /* write R12 = r6 */ - movq_r2r(r0, r6); /* r6 = 61 60 __ __ */ - pand_m2r(*M(3), r0); /* r0 = 61 __ __ __ */ - psllq_i2r(16, r6); /* r6 = 60 __ __ __ */ - movq_m2r(*(input_bytes+40), r5); - movq_r2r(r1, r3); /* r3 = 47 46 45 44 */ - pmullw_m2r(*(dequant_matrix_bytes+40), r5); /* r5 = 27 26 25 24 */ - psrlq_i2r(16, r1); /* r1 = __ 47 46 45 */ - pand_m2r(*M(1), r1); /* r1 = __ __ 46 __ */ - por_r2r(r4, r0); /* r0 = 61 57 __ __ */ - pand_r2r(r7, r2); /* r2 = __ __ __ 40 */ - por_r2r(r1, r0); /* r0 = 61 57 46 __ */ - por_r2r(r2, r0); /* r0 = 61 57 46 40 = R11 */ - psllq_i2r(16, r3); /* r3 = 46 45 44 __ */ - movq_r2r(r3, r4); /* r4 = 46 45 44 __ */ - movq_r2r(r5, r2); /* r2 = 27 26 25 24 */ - movq_r2m(r0, *(output_data_bytes+112)); /* write R11 = r0 */ - psrlq_i2r(48, r2); /* r2 = __ __ __ 27 */ - pand_m2r(*M(2), r4); /* r4 = __ 45 __ __ */ - por_r2r(r2, r6); /* r6 = 60 __ __ 27 */ - movq_m2r(*M(1), r2); /* r2 = __ __ FF __ */ - por_r2r(r4, r6); /* r6 = 60 45 __ 27 */ - pand_r2r(r7, r2); /* r2 = __ __ 41 __ */ - psllq_i2r(32, r3); /* r3 = 44 __ __ __ */ - por_m2r(*(output_data_bytes+80), r3); /* r3 = 44 __ __ 23 */ - por_r2r(r2, r6); /* r6 = 60 45 41 27 = R10 */ - movq_m2r(*M(3), r2); /* r2 = FF __ __ __ */ - psllq_i2r(16, r5); /* r5 = 26 25 24 __ */ - movq_r2m(r6, *(output_data_bytes+96)); /* store R10 = r6 */ - pand_r2r(r5, r2); /* r2 = 26 __ __ __ */ - movq_m2r(*M(2), r6); /* r6 = __ FF __ __ */ - pxor_r2r(r2, r5); /* r5 = __ 25 24 __ */ - pand_r2r(r7, r6); /* r6 = __ 42 __ __ */ - psrlq_i2r(32, r2); /* r2 = __ __ 26 __ */ - pand_m2r(*M(3), r7); /* r7 = 43 __ __ __ */ - por_r2r(r2, r3); /* r3 = 44 __ 26 23 */ - por_m2r(*(output_data_bytes+64), r7); /* r7 = 43 __ __ 12 */ - por_r2r(r3, r6); /* r6 = 44 42 26 23 = R9 */ - por_r2r(r5, r7); /* r7 = 43 25 24 12 = R8 */ - movq_r2m(r6, *(output_data_bytes+80)); /* store R9 = r6 */ - movq_r2m(r7, *(output_data_bytes+64)); /* store R8 = r7 */ - - -#undef M - /* at this point, function has completed dequantization + dezigzag + * partial transposition; now do the idct itself */ - #define I(K) (output_data + K * 8) #define J(K) (output_data + ((K - 4) * 8) + 4) diff --git a/src/libffmpeg/libavcodec/i386/vp3dsp_sse2.c b/src/libffmpeg/libavcodec/i386/vp3dsp_sse2.c index 60c6bf80e..9c69ddb21 100644 --- a/src/libffmpeg/libavcodec/i386/vp3dsp_sse2.c +++ b/src/libffmpeg/libavcodec/i386/vp3dsp_sse2.c @@ -796,24 +796,16 @@ static unsigned short __align16 SSE2_idct_data[7 * 8] = } /* end of SSE2_Dequantize Macro */ -void vp3_dsp_init_sse2(void) -{ - /* nop */ -} - - -void vp3_idct_sse2(int16_t *input_data, int16_t *dequant_matrix, - int coeff_count, int16_t *output_data) +void ff_vp3_idct_sse2(int16_t *input_data) { unsigned char *input_bytes = (unsigned char *)input_data; - unsigned char *dequant_matrix_bytes = (unsigned char *)dequant_matrix; unsigned char *dequant_const_bytes = (unsigned char *)SSE2_dequant_const; - unsigned char *output_data_bytes = (unsigned char *)output_data; + unsigned char *output_data_bytes = (unsigned char *)input_data; unsigned char *idct_data_bytes = (unsigned char *)SSE2_idct_data; unsigned char *Eight = (unsigned char *)eight_data; #define eax input_bytes -#define ebx dequant_matrix_bytes +//#define ebx dequant_matrix_bytes #define ecx dequant_const_bytes #define edx idct_data_bytes @@ -821,7 +813,7 @@ void vp3_idct_sse2(int16_t *input_data, int16_t *dequant_matrix, #define O(i) (ebx + 16 * i) #define C(i) (edx + 16 * (i-1)) - SSE2_Dequantize(); + // SSE2_Dequantize(); #undef ebx #define ebx output_data_bytes diff --git a/src/libffmpeg/libavcodec/indeo2.c b/src/libffmpeg/libavcodec/indeo2.c index 1cee019dd..25561ec2d 100644 --- a/src/libffmpeg/libavcodec/indeo2.c +++ b/src/libffmpeg/libavcodec/indeo2.c @@ -22,7 +22,7 @@ * @file indeo2.c * Intel Indeo 2 decoder. */ - +#define ALT_BITSTREAM_READER_LE #include "avcodec.h" #include "bitstream.h" #include "indeo2data.h" @@ -156,10 +156,10 @@ static int ir2_decode_frame(AVCodecContext *avctx, s->decode_delta = buf[18]; /* decide whether frame uses deltas or not */ - +#ifndef ALT_BITSTREAM_READER_LE for (i = 0; i < buf_size; i++) buf[i] = ff_reverse[buf[i]]; - +#endif start = 48; /* hardcoded for now */ init_get_bits(&s->gb, buf + start, buf_size - start); @@ -198,8 +198,12 @@ static int ir2_decode_init(AVCodecContext *avctx){ if (!ir2_vlc.table) init_vlc(&ir2_vlc, CODE_VLC_BITS, IR2_CODES, &ir2_codes[0][1], 4, 2, - &ir2_codes[0][0], 4, 2, 1); - +#ifdef ALT_BITSTREAM_READER_LE + &ir2_codes[0][0], 4, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE); +#else + &ir2_codes[0][0], 4, 2, INIT_VLC_USE_STATIC); +#endif + return 0; } diff --git a/src/libffmpeg/libavcodec/indeo2data.h b/src/libffmpeg/libavcodec/indeo2data.h index f58b8415f..58e7e48dc 100644 --- a/src/libffmpeg/libavcodec/indeo2data.h +++ b/src/libffmpeg/libavcodec/indeo2data.h @@ -1,5 +1,43 @@ #define IR2_CODES 143 static const uint16_t ir2_codes[IR2_CODES][2] = { +#ifdef ALT_BITSTREAM_READER_LE +{0x0000, 3}, {0x0004, 3}, {0x0006, 3}, {0x0001, 5}, +{0x0009, 5}, {0x0019, 5}, {0x000D, 5}, {0x001D, 5}, +{0x0023, 6}, {0x0013, 6}, {0x0033, 6}, {0x000B, 6}, +{0x002B, 6}, {0x001B, 6}, {0x0007, 8}, {0x0087, 8}, +{0x0027, 8}, {0x00A7, 8}, {0x0067, 8}, {0x00E7, 8}, +{0x0097, 8}, {0x0057, 8}, {0x0037, 8}, {0x00B7, 8}, +{0x00F7, 8}, {0x000F, 9}, {0x008F, 9}, {0x018F, 9}, +{0x014F, 9}, {0x00CF, 9}, {0x002F, 9}, {0x012F, 9}, +{0x01AF, 9}, {0x006F, 9}, {0x00EF, 9}, {0x01EF, 9}, +{0x001F, 10}, {0x021F, 10}, {0x011F, 10}, {0x031F, 10}, +{0x009F, 10}, {0x029F, 10}, {0x019F, 10}, {0x039F, 10}, +{0x005F, 10}, {0x025F, 10}, {0x015F, 10}, {0x035F, 10}, +{0x00DF, 10}, {0x02DF, 10}, {0x01DF, 10}, {0x03DF, 10}, +{0x003F, 13}, {0x103F, 13}, {0x083F, 13}, {0x183F, 13}, +{0x043F, 13}, {0x143F, 13}, {0x0C3F, 13}, {0x1C3F, 13}, +{0x023F, 13}, {0x123F, 13}, {0x0A3F, 13}, {0x1A3F, 13}, +{0x063F, 13}, {0x163F, 13}, {0x0E3F, 13}, {0x1E3F, 13}, +{0x013F, 13}, {0x113F, 13}, {0x093F, 13}, {0x193F, 13}, +{0x053F, 13}, {0x153F, 13}, {0x0D3F, 13}, {0x1D3F, 13}, +{0x033F, 13}, {0x133F, 13}, {0x0B3F, 13}, {0x1B3F, 13}, +{0x073F, 13}, {0x173F, 13}, {0x0F3F, 13}, {0x1F3F, 13}, +{0x00BF, 13}, {0x10BF, 13}, {0x08BF, 13}, {0x18BF, 13}, +{0x04BF, 13}, {0x14BF, 13}, {0x0CBF, 13}, {0x1CBF, 13}, +{0x02BF, 13}, {0x12BF, 13}, {0x0ABF, 13}, {0x1ABF, 13}, +{0x06BF, 13}, {0x16BF, 13}, {0x0EBF, 13}, {0x1EBF, 13}, +{0x01BF, 13}, {0x11BF, 13}, {0x09BF, 13}, {0x19BF, 13}, +{0x05BF, 13}, {0x15BF, 13}, {0x0DBF, 13}, {0x1DBF, 13}, +{0x03BF, 13}, {0x13BF, 13}, {0x0BBF, 13}, {0x1BBF, 13}, +{0x07BF, 13}, {0x17BF, 13}, {0x0FBF, 13}, {0x1FBF, 13}, +{0x007F, 14}, {0x207F, 14}, {0x107F, 14}, {0x307F, 14}, +{0x087F, 14}, {0x287F, 14}, {0x187F, 14}, {0x387F, 14}, +{0x047F, 14}, {0x247F, 14}, {0x147F, 14}, {0x0002, 3}, +{0x0011, 5}, {0x0005, 5}, {0x0015, 5}, {0x0003, 6}, +{0x003B, 6}, {0x0047, 8}, {0x00C7, 8}, {0x0017, 8}, +{0x00D7, 8}, {0x0077, 8}, {0x010F, 9}, {0x004F, 9}, +{0x01CF, 9}, {0x00AF, 9}, {0x016F, 9}, +#else {0x0000, 3}, {0x0001, 3}, {0x0003, 3}, {0x0010, 5}, {0x0012, 5}, {0x0013, 5}, {0x0016, 5}, {0x0017, 5}, {0x0031, 6}, {0x0032, 6}, {0x0033, 6}, {0x0034, 6}, @@ -36,6 +74,7 @@ static const uint16_t ir2_codes[IR2_CODES][2] = { {0x0037, 6}, {0x00E2, 8}, {0x00E3, 8}, {0x00E8, 8}, {0x00EB, 8}, {0x00EE, 8}, {0x01E1, 9}, {0x01E4, 9}, {0x01E7, 9}, {0x01EA, 9}, {0x01ED, 9} +#endif }; static const uint8_t ir2_luma_table[256] = { diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess.c b/src/libffmpeg/libavcodec/libpostproc/postprocess.c index e7ca0191d..9f3e522ed 100644 --- a/src/libffmpeg/libavcodec/libpostproc/postprocess.c +++ b/src/libffmpeg/libavcodec/libpostproc/postprocess.c @@ -747,6 +747,7 @@ char *pp_help= "l5 lowpass5 FIR lowpass deinterlacer\n" "de default hb:a,vb:a,dr:a\n" "fa fast h1:a,v1:a,dr:a\n" +"ac ha:a:128:7,va:a,dr:a\n" "tn tmpnoise (3 threshold) temporal noise reducer\n" " 1. <= 2. <= 3. larger -> stronger filtering\n" "fq forceQuant force quantizer\n" diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c b/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c index 0c99260a7..0b88be69d 100644 --- a/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c +++ b/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c @@ -3467,7 +3467,7 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int uint8_t *dstBlock= tempDst + dstStride; // From this point on it is guranteed that we can read and write 16 lines downward - // finish 1 block before the next otherwise we´ll might have a problem + // finish 1 block before the next otherwise we might have a problem // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing for(x=0; xavctx->time_base.num; int64_t n1= 1001LL*s->avctx->time_base.den; - if(s->avctx->strict_std_compliance >= 0 && i>=9) break; + if(s->avctx->strict_std_compliance > FF_COMPLIANCE_INOFFICIAL && i>=9) break; d = ABS(n0 - n1); if(d < dmin){ @@ -217,7 +217,7 @@ static int encode_init(AVCodecContext *avctx) return -1; if(find_frame_rate_index(s) < 0){ - if(s->strict_std_compliance >=0){ + if(s->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){ av_log(avctx, AV_LOG_ERROR, "MPEG1/2 does not support %d/%d fps\n", avctx->time_base.den, avctx->time_base.num); return -1; }else{ @@ -460,8 +460,8 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number) put_bits(&s->pb, 1, s->intra_vlc_format); put_bits(&s->pb, 1, s->alternate_scan); put_bits(&s->pb, 1, s->repeat_first_field); - put_bits(&s->pb, 1, s->chroma_420_type=1); s->progressive_frame = s->progressive_sequence; + put_bits(&s->pb, 1, s->chroma_420_type=s->progressive_frame); put_bits(&s->pb, 1, s->progressive_frame); put_bits(&s->pb, 1, 0); //composite_display_flag } @@ -2075,7 +2075,10 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){ { if (s1->mpeg_enc_ctx_allocated) { + ParseContext pc= s->parse_context; + s->parse_context.buffer=0; MPV_common_end(s); + s->parse_context= pc; } if( (s->width == 0 )||(s->height == 0)) @@ -2613,6 +2616,7 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y, s->current_picture.motion_val[dir][xy + 1][1] = motion_y; s->current_picture.ref_index [dir][xy ]= s->current_picture.ref_index [dir][xy + 1]= s->field_select[dir][i]; + assert(s->field_select[dir][i]==0 || s->field_select[dir][i]==1); } xy += wrap; } diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c index 0bdfd6304..d37087c3f 100644 --- a/src/libffmpeg/libavcodec/mpegvideo.c +++ b/src/libffmpeg/libavcodec/mpegvideo.c @@ -906,12 +906,12 @@ int MPV_encode_init(AVCodecContext *avctx) } if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){ - if(avctx->strict_std_compliance>=0 && avctx->pix_fmt != PIX_FMT_YUVJ420P){ + if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUVJ420P){ av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n"); return -1; } }else{ - if(avctx->strict_std_compliance>=0 && avctx->pix_fmt != PIX_FMT_YUV420P){ + if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUV420P){ av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n"); return -1; } @@ -1073,6 +1073,11 @@ int MPV_encode_init(AVCodecContext *avctx) return -1; } + if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){ + av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass"); + return -1; + } + i= ff_gcd(avctx->time_base.den, avctx->time_base.num); if(i > 1){ av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n"); @@ -1099,7 +1104,7 @@ int MPV_encode_init(AVCodecContext *avctx) avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift); - if(s->avctx->time_base.den > (1<<16)-1){ + if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){ av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n"); return -1; } @@ -1784,7 +1789,7 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){ else if(IS_16X8(mb_type)) av_log(s->avctx, AV_LOG_DEBUG, "-"); else if(IS_8X16(mb_type)) - av_log(s->avctx, AV_LOG_DEBUG, "¦"); + av_log(s->avctx, AV_LOG_DEBUG, "|"); else if(IS_INTRA(mb_type) || IS_16X16(mb_type)) av_log(s->avctx, AV_LOG_DEBUG, " "); else @@ -2214,7 +2219,7 @@ static void select_input_picture(MpegEncContext *s){ s->input_picture[i-1]->data[0], s->linesize) + 1; } } - for(i=0; imax_b_frames; i++){ + for(i=0; imax_b_frames+1; i++){ if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break; } @@ -5442,7 +5447,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) for(i=1;i<64;i++){ int j= s->dsp.idct_permutation[i]; - s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); + s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3) & 0xFF; } convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, s->intra_quant_bias, 8, 8, 1); diff --git a/src/libffmpeg/libavcodec/raw.c b/src/libffmpeg/libavcodec/raw.c index 957a809d8..e2614e503 100644 --- a/src/libffmpeg/libavcodec/raw.c +++ b/src/libffmpeg/libavcodec/raw.c @@ -172,6 +172,7 @@ static int raw_encode(AVCodecContext *avctx, avctx->height, frame, buf_size); } +#ifdef CONFIG_RAWVIDEO_ENCODER AVCodec rawvideo_encoder = { "rawvideo", CODEC_TYPE_VIDEO, @@ -180,6 +181,7 @@ AVCodec rawvideo_encoder = { raw_init_encoder, raw_encode, }; +#endif // CONFIG_RAWVIDEO_ENCODER AVCodec rawvideo_decoder = { "rawvideo", diff --git a/src/libffmpeg/libavcodec/snow.c b/src/libffmpeg/libavcodec/snow.c index d6f9a14a9..10f0b76ab 100644 --- a/src/libffmpeg/libavcodec/snow.c +++ b/src/libffmpeg/libavcodec/snow.c @@ -3345,9 +3345,9 @@ static int encode_init(AVCodecContext *avctx) SnowContext *s = avctx->priv_data; int plane_index; - if(avctx->strict_std_compliance >= 0){ + if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){ av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n" - "use vstrict=-1 / -strict -1 to use it anyway\n"); + "use vstrict=-2 / -strict -2 to use it anyway\n"); return -1; } diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c index b5bd17f4a..d1debfe40 100644 --- a/src/libffmpeg/libavcodec/utils.c +++ b/src/libffmpeg/libavcodec/utils.c @@ -49,6 +49,25 @@ const uint8_t ff_log2_tab[256]={ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 }; +const uint8_t ff_reverse[256]={ +0x00,0x80,0x40,0xC0,0x20,0xA0,0x60,0xE0,0x10,0x90,0x50,0xD0,0x30,0xB0,0x70,0xF0, +0x08,0x88,0x48,0xC8,0x28,0xA8,0x68,0xE8,0x18,0x98,0x58,0xD8,0x38,0xB8,0x78,0xF8, +0x04,0x84,0x44,0xC4,0x24,0xA4,0x64,0xE4,0x14,0x94,0x54,0xD4,0x34,0xB4,0x74,0xF4, +0x0C,0x8C,0x4C,0xCC,0x2C,0xAC,0x6C,0xEC,0x1C,0x9C,0x5C,0xDC,0x3C,0xBC,0x7C,0xFC, +0x02,0x82,0x42,0xC2,0x22,0xA2,0x62,0xE2,0x12,0x92,0x52,0xD2,0x32,0xB2,0x72,0xF2, +0x0A,0x8A,0x4A,0xCA,0x2A,0xAA,0x6A,0xEA,0x1A,0x9A,0x5A,0xDA,0x3A,0xBA,0x7A,0xFA, +0x06,0x86,0x46,0xC6,0x26,0xA6,0x66,0xE6,0x16,0x96,0x56,0xD6,0x36,0xB6,0x76,0xF6, +0x0E,0x8E,0x4E,0xCE,0x2E,0xAE,0x6E,0xEE,0x1E,0x9E,0x5E,0xDE,0x3E,0xBE,0x7E,0xFE, +0x01,0x81,0x41,0xC1,0x21,0xA1,0x61,0xE1,0x11,0x91,0x51,0xD1,0x31,0xB1,0x71,0xF1, +0x09,0x89,0x49,0xC9,0x29,0xA9,0x69,0xE9,0x19,0x99,0x59,0xD9,0x39,0xB9,0x79,0xF9, +0x05,0x85,0x45,0xC5,0x25,0xA5,0x65,0xE5,0x15,0x95,0x55,0xD5,0x35,0xB5,0x75,0xF5, +0x0D,0x8D,0x4D,0xCD,0x2D,0xAD,0x6D,0xED,0x1D,0x9D,0x5D,0xDD,0x3D,0xBD,0x7D,0xFD, +0x03,0x83,0x43,0xC3,0x23,0xA3,0x63,0xE3,0x13,0x93,0x53,0xD3,0x33,0xB3,0x73,0xF3, +0x0B,0x8B,0x4B,0xCB,0x2B,0xAB,0x6B,0xEB,0x1B,0x9B,0x5B,0xDB,0x3B,0xBB,0x7B,0xFB, +0x07,0x87,0x47,0xC7,0x27,0xA7,0x67,0xE7,0x17,0x97,0x57,0xD7,0x37,0xB7,0x77,0xF7, +0x0F,0x8F,0x4F,0xCF,0x2F,0xAF,0x6F,0xEF,0x1F,0x9F,0x5F,0xDF,0x3F,0xBF,0x7F,0xFF, +}; + void avcodec_default_free_buffers(AVCodecContext *s); void *av_mallocz(unsigned int size) @@ -942,7 +961,7 @@ int64_t ff_gcd(int64_t a, int64_t b){ /* av_log API */ -static int av_log_level = AV_LOG_DEBUG; +static int av_log_level = AV_LOG_INFO; static void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl) { @@ -996,3 +1015,17 @@ int avcodec_thread_init(AVCodecContext *s, int thread_count){ return -1; } #endif + +unsigned int av_xiphlacing(unsigned char *s, unsigned int v) +{ + unsigned int n = 0; + + while(v >= 0xff) { + *s++ = 0xff; + v -= 0xff; + n++; + } + *s = v; + n++; + return n; +} diff --git a/src/libffmpeg/libavcodec/vp3.c b/src/libffmpeg/libavcodec/vp3.c index 659d6913b..757036d97 100644 --- a/src/libffmpeg/libavcodec/vp3.c +++ b/src/libffmpeg/libavcodec/vp3.c @@ -15,17 +15,17 @@ * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * VP3 Video Decoder by Mike Melanson (melanson@pcisys.net) - * For more information about the VP3 coding process, visit: - * http://www.pcisys.net/~melanson/codecs/ - * - * Theora decoder by Alex Beregszaszi - * */ /** * @file vp3.c * On2 VP3 Video Decoder + * + * VP3 Video Decoder by Mike Melanson (mike at multimedia.cx) + * For more information about the VP3 coding process, visit: + * http://multimedia.cx/ + * + * Theora decoder by Alex Beregszaszi */ #include @@ -134,18 +134,24 @@ static inline void debug_dc_pred(const char *format, ...) { } static inline void debug_idct(const char *format, ...) { } #endif +typedef struct Coeff { + struct Coeff *next; + DCTELEM coeff; + uint8_t index; +} Coeff; + +//FIXME split things out into their own arrays typedef struct Vp3Fragment { - DCTELEM coeffs[64]; - int coding_method; - int coeff_count; - int last_coeff; - int motion_x; - int motion_y; + Coeff *next_coeff; /* address of first pixel taking into account which plane the fragment * lives on as well as the plane stride */ int first_pixel; /* this is the macroblock that the fragment belongs to */ - int macroblock; + uint16_t macroblock; + uint8_t coding_method; + uint8_t coeff_count; + int8_t motion_x; + int8_t motion_y; } Vp3Fragment; #define SB_NOT_CODED 0 @@ -246,9 +252,13 @@ typedef struct Vp3DecodeContext { int fragment_height; Vp3Fragment *all_fragments; + Coeff *coeffs; + Coeff *next_coeff; int u_fragment_start; int v_fragment_start; + ScanTable scantable; + /* tables */ uint16_t coded_dc_scale_factor[64]; uint32_t coded_ac_scale_factor[64]; @@ -268,6 +278,11 @@ typedef struct Vp3DecodeContext { VLC ac_vlc_3[16]; VLC ac_vlc_4[16]; + VLC superblock_run_length_vlc; + VLC fragment_run_length_vlc; + VLC mode_code_vlc; + VLC motion_vector_vlc; + /* these arrays need to be on 16-byte boundaries since SSE2 operations * index into them */ int16_t __align16 intra_y_dequant[64]; @@ -301,6 +316,16 @@ typedef struct Vp3DecodeContext { uint8_t edge_emu_buffer[9*2048]; //FIXME dynamic alloc uint8_t qscale_table[2048]; //FIXME dynamic alloc (width+15)/16 + + /* Huffman decode */ + int hti; + unsigned int hbits; + int entries; + int huff_code_size; + uint16_t huffman_table[80][32][2]; + + uint32_t filter_limit_values[64]; + int bounding_values_array[256]; } Vp3DecodeContext; static int theora_decode_comments(AVCodecContext *avctx, GetBitContext gb); @@ -829,16 +854,18 @@ static void init_frame(Vp3DecodeContext *s, GetBitContext *gb) /* zero out all of the fragment information */ s->coded_fragment_list_index = 0; for (i = 0; i < s->fragment_count; i++) { - memset(s->all_fragments[i].coeffs, 0, 64 * sizeof(DCTELEM)); s->all_fragments[i].coeff_count = 0; - s->all_fragments[i].last_coeff = 0; -s->all_fragments[i].motion_x = 0xbeef; -s->all_fragments[i].motion_y = 0xbeef; + s->all_fragments[i].motion_x = 127; + s->all_fragments[i].motion_y = 127; + s->all_fragments[i].next_coeff= NULL; + s->coeffs[i].index= + s->coeffs[i].coeff=0; + s->coeffs[i].next= NULL; } } /* - * This function sets of the dequantization tables used for a particular + * This function sets up the dequantization tables used for a particular * frame. */ static void init_dequantizer(Vp3DecodeContext *s) @@ -883,20 +910,20 @@ static void init_dequantizer(Vp3DecodeContext *s) /* scale AC quantizers, zigzag at the same time in preparation for * the dequantization phase */ for (i = 1; i < 64; i++) { + int k= s->scantable.scantable[i]; + j = s->scantable.permutated[i]; - j = zigzag_index[i]; - - s->intra_y_dequant[j] = s->coded_intra_y_dequant[i] * ac_scale_factor / 100; + s->intra_y_dequant[j] = s->coded_intra_y_dequant[k] * ac_scale_factor / 100; if (s->intra_y_dequant[j] < MIN_DEQUANT_VAL) s->intra_y_dequant[j] = MIN_DEQUANT_VAL; s->intra_y_dequant[j] *= SCALER; - s->intra_c_dequant[j] = s->coded_intra_c_dequant[i] * ac_scale_factor / 100; + s->intra_c_dequant[j] = s->coded_intra_c_dequant[k] * ac_scale_factor / 100; if (s->intra_c_dequant[j] < MIN_DEQUANT_VAL) s->intra_c_dequant[j] = MIN_DEQUANT_VAL; s->intra_c_dequant[j] *= SCALER; - s->inter_dequant[j] = s->coded_inter_dequant[i] * ac_scale_factor / 100; + s->inter_dequant[j] = s->coded_inter_dequant[k] * ac_scale_factor / 100; if (s->inter_dequant[j] < MIN_DEQUANT_VAL * 2) s->inter_dequant[j] = MIN_DEQUANT_VAL * 2; s->inter_dequant[j] *= SCALER; @@ -933,6 +960,28 @@ static void init_dequantizer(Vp3DecodeContext *s) debug_dequantizers("\n"); } +/* + * This function initializes the loop filter boundary limits if the frame's + * quality index is different from the previous frame's. + */ +static void init_loop_filter(Vp3DecodeContext *s) +{ + int *bounding_values= s->bounding_values_array+127; + int filter_limit; + int x; + + filter_limit = s->filter_limit_values[s->quality_index]; + + /* set up the bounding values */ + memset(s->bounding_values_array, 0, 256 * sizeof(int)); + for (x = 0; x < filter_limit; x++) { + bounding_values[-x - filter_limit] = -filter_limit + x; + bounding_values[-x] = -x; + bounding_values[x] = x; + bounding_values[x + filter_limit] = filter_limit - x; + } +} + /* * This function is used to fetch runs of 1s or 0s from the bitstream for * use in determining which superblocks are fully and partially coded. @@ -1171,9 +1220,16 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) * fetched the bit will be toggled again */ bit ^= 1; while (current_superblock < s->superblock_count) { - if (current_run == 0) { + if (current_run-- == 0) { bit ^= 1; +#if 1 + current_run = get_vlc2(gb, + s->superblock_run_length_vlc.table, 6, 2); + if (current_run == 33) + current_run += get_bits(gb, 12); +#else current_run = get_superblock_run_length(gb); +#endif debug_block_coding(" setting superblocks %d..%d to %s\n", current_superblock, current_superblock + current_run - 1, @@ -1190,9 +1246,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) decode_partial_blocks = 1; } } - s->superblock_coding[current_superblock++] = - (bit) ? SB_PARTIALLY_CODED : SB_NOT_CODED; - current_run--; + s->superblock_coding[current_superblock++] = bit; } /* unpack the list of fully coded superblocks if any of the blocks were @@ -1210,17 +1264,22 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) /* skip any superblocks already marked as partially coded */ if (s->superblock_coding[current_superblock] == SB_NOT_CODED) { - if (current_run == 0) { + if (current_run-- == 0) { bit ^= 1; +#if 1 + current_run = get_vlc2(gb, + s->superblock_run_length_vlc.table, 6, 2); + if (current_run == 33) + current_run += get_bits(gb, 12); +#else current_run = get_superblock_run_length(gb); +#endif } debug_block_coding(" setting superblock %d to %s\n", current_superblock, (bit) ? "fully coded" : "not coded"); - s->superblock_coding[current_superblock] = - (bit) ? SB_FULLY_CODED : SB_NOT_CODED; - current_run--; + s->superblock_coding[current_superblock] = 2*bit; } current_superblock++; } @@ -1241,6 +1300,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) /* figure out which fragments are coded; iterate through each * superblock (all planes) */ s->coded_fragment_list_index = 0; + s->next_coeff= s->coeffs + s->fragment_count; s->first_coded_y_fragment = s->first_coded_c_fragment = 0; s->last_coded_y_fragment = s->last_coded_c_fragment = -1; first_c_fragment_seen = 0; @@ -1268,9 +1328,14 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) /* fragment may or may not be coded; this is the case * that cares about the fragment coding runs */ - if (current_run == 0) { + if (current_run-- == 0) { bit ^= 1; +#if 1 + current_run = get_vlc2(gb, + s->fragment_run_length_vlc.table, 5, 2); +#else current_run = get_fragment_run_length(gb); +#endif } if (bit) { @@ -1278,6 +1343,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) * the next phase */ s->all_fragments[current_fragment].coding_method = MODE_INTER_NO_MV; + s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment; s->coded_fragment_list[s->coded_fragment_list_index] = current_fragment; if ((current_fragment >= s->u_fragment_start) && @@ -1299,14 +1365,13 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) i, current_fragment); } - current_run--; - } else { /* fragments are fully coded in this superblock; actual * coding will be determined in next step */ s->all_fragments[current_fragment].coding_method = MODE_INTER_NO_MV; + s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment; s->coded_fragment_list[s->coded_fragment_list_index] = current_fragment; if ((current_fragment >= s->u_fragment_start) && @@ -1398,7 +1463,14 @@ static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb) if (scheme == 7) coding_mode = get_bits(gb, 3); else +{ +#if 1 + coding_mode = ModeAlphabet[scheme] + [get_vlc2(gb, s->mode_code_vlc.table, 3, 3)]; +#else coding_mode = ModeAlphabet[scheme][get_mode_code(gb)]; +#endif +} s->macroblock_coding[current_macroblock] = coding_mode; for (k = 0; k < 6; k++) { @@ -1485,12 +1557,23 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb) case MODE_GOLDEN_MV: /* all 6 fragments use the same motion vector */ if (coding_mode == 0) { +#if 1 + motion_x[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)]; + motion_y[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)]; +#else motion_x[0] = get_motion_vector_vlc(gb); motion_y[0] = get_motion_vector_vlc(gb); +#endif } else { +#if 1 + motion_x[0] = fixed_motion_vector_table[get_bits(gb, 6)]; + motion_y[0] = fixed_motion_vector_table[get_bits(gb, 6)]; +#else motion_x[0] = get_motion_vector_fixed(gb); motion_y[0] = get_motion_vector_fixed(gb); +#endif } + for (k = 1; k < 6; k++) { motion_x[k] = motion_x[0]; motion_y[k] = motion_y[0]; @@ -1512,11 +1595,21 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb) motion_x[4] = motion_y[4] = 0; for (k = 0; k < 4; k++) { if (coding_mode == 0) { +#if 1 + motion_x[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)]; + motion_y[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)]; +#else motion_x[k] = get_motion_vector_vlc(gb); motion_y[k] = get_motion_vector_vlc(gb); +#endif } else { +#if 1 + motion_x[k] = fixed_motion_vector_table[get_bits(gb, 6)]; + motion_y[k] = fixed_motion_vector_table[get_bits(gb, 6)]; +#else motion_x[k] = get_motion_vector_fixed(gb); motion_y[k] = get_motion_vector_fixed(gb); +#endif } motion_x[4] += motion_x[k]; motion_y[4] += motion_y[k]; @@ -1626,9 +1719,11 @@ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb, { int i; int token; - int zero_run; - DCTELEM coeff; + int zero_run = 0; + DCTELEM coeff = 0; Vp3Fragment *fragment; + uint8_t *perm= s->scantable.permutated; + int bits_to_get; if ((first_fragment >= s->fragment_count) || (last_fragment >= s->fragment_count)) { @@ -1649,20 +1744,43 @@ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb, token = get_vlc2(gb, table->table, 5, 3); debug_vlc(" token = %2d, ", token); /* use the token to get a zero run, a coefficient, and an eob run */ +#if 1 + if (token <= 6) { + eob_run = eob_run_base[token]; + if (eob_run_get_bits[token]) + eob_run += get_bits(gb, eob_run_get_bits[token]); + coeff = zero_run = 0; + } else { + bits_to_get = coeff_get_bits[token]; + if (!bits_to_get) + coeff = coeff_tables[token][0]; + else + coeff = coeff_tables[token][get_bits(gb, bits_to_get)]; + + zero_run = zero_run_base[token]; + if (zero_run_get_bits[token]) + zero_run += get_bits(gb, zero_run_get_bits[token]); + } +#else unpack_token(gb, token, &zero_run, &coeff, &eob_run); +#endif } if (!eob_run) { fragment->coeff_count += zero_run; - if (fragment->coeff_count < 64) - fragment->coeffs[fragment->coeff_count++] = coeff; + if (fragment->coeff_count < 64){ + fragment->next_coeff->coeff= coeff; + fragment->next_coeff->index= perm[fragment->coeff_count++]; //FIXME perm here already? + fragment->next_coeff->next= s->next_coeff; + s->next_coeff->next=NULL; + fragment->next_coeff= s->next_coeff++; + } debug_vlc(" fragment %d coeff = %d\n", - s->coded_fragment_list[i], fragment->coeffs[coeff_index]); + s->coded_fragment_list[i], fragment->next_coeff[coeff_index]); } else { - fragment->last_coeff = fragment->coeff_count; - fragment->coeff_count = 64; + fragment->coeff_count |= 128; debug_vlc(" fragment %d eob with %d coefficients\n", - s->coded_fragment_list[i], fragment->last_coeff); + s->coded_fragment_list[i], fragment->coeff_count&127); eob_run--; } } @@ -1770,6 +1888,7 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb) #define COMPATIBLE_FRAME(x) \ (compatible_frame[s->all_fragments[x].coding_method] == current_frame_type) #define FRAME_CODED(x) (s->all_fragments[x].coding_method != MODE_COPY) +#define DC_COEFF(u) (s->coeffs[u].index ? 0 : s->coeffs[u].coeff) //FIXME do somethin to simplify this static inline int iabs (int x) { return ((x < 0) ? -x : x); } static void reverse_dc_prediction(Vp3DecodeContext *s, @@ -1880,7 +1999,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, predictor_group = (x == 0) + ((y == 0) << 1) + ((x + 1 == fragment_width) << 2); debug_dc_pred(" frag %d: group %d, orig DC = %d, ", - i, predictor_group, s->all_fragments[i].coeffs[0]); + i, predictor_group, DC_COEFF(i)); switch (predictor_group) { @@ -1895,10 +2014,10 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, l = i - 1; /* fetch the DC values for the predicting fragments */ - vul = s->all_fragments[ul].coeffs[0]; - vu = s->all_fragments[u].coeffs[0]; - vur = s->all_fragments[ur].coeffs[0]; - vl = s->all_fragments[l].coeffs[0]; + vul = DC_COEFF(ul); + vu = DC_COEFF(u); + vur = DC_COEFF(ur); + vl = DC_COEFF(l); /* figure out which fragments are valid */ ful = FRAME_CODED(ul) && COMPATIBLE_FRAME(ul); @@ -1920,8 +2039,8 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, ur = i - fragment_width + 1; /* fetch the DC values for the predicting fragments */ - vu = s->all_fragments[u].coeffs[0]; - vur = s->all_fragments[ur].coeffs[0]; + vu = DC_COEFF(u); + vur = DC_COEFF(ur); /* figure out which fragments are valid */ fur = FRAME_CODED(ur) && COMPATIBLE_FRAME(ur); @@ -1941,7 +2060,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, l = i - 1; /* fetch the DC values for the predicting fragments */ - vl = s->all_fragments[l].coeffs[0]; + vl = DC_COEFF(l); /* figure out which fragments are valid */ fl = FRAME_CODED(l) && COMPATIBLE_FRAME(l); @@ -1970,9 +2089,9 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, l = i - 1; /* fetch the DC values for the predicting fragments */ - vul = s->all_fragments[ul].coeffs[0]; - vu = s->all_fragments[u].coeffs[0]; - vl = s->all_fragments[l].coeffs[0]; + vul = DC_COEFF(ul); + vu = DC_COEFF(u); + vl = DC_COEFF(l); /* figure out which fragments are valid */ ful = FRAME_CODED(ul) && COMPATIBLE_FRAME(ul); @@ -1992,9 +2111,9 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, /* if there were no fragments to predict from, use last * DC saved */ - s->all_fragments[i].coeffs[0] += last_dc[current_frame_type]; + predicted_dc = last_dc[current_frame_type]; debug_dc_pred("from last DC (%d) = %d\n", - current_frame_type, s->all_fragments[i].coeffs[0]); + current_frame_type, DC_COEFF(i)); } else { @@ -2024,17 +2143,304 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, predicted_dc = vul; } - /* at long last, apply the predictor */ - s->all_fragments[i].coeffs[0] += predicted_dc; debug_dc_pred("from pred DC = %d\n", - s->all_fragments[i].coeffs[0]); + DC_COEFF(i)); } + /* at long last, apply the predictor */ + if(s->coeffs[i].index){ + *s->next_coeff= s->coeffs[i]; + s->coeffs[i].index=0; + s->coeffs[i].coeff=0; + s->coeffs[i].next= s->next_coeff++; + } + s->coeffs[i].coeff += predicted_dc; /* save the DC */ - last_dc[current_frame_type] = s->all_fragments[i].coeffs[0]; + last_dc[current_frame_type] = DC_COEFF(i); + if(DC_COEFF(i) && !(s->all_fragments[i].coeff_count&127)){ + s->all_fragments[i].coeff_count= 129; +// s->all_fragments[i].next_coeff= s->next_coeff; + s->coeffs[i].next= s->next_coeff; + (s->next_coeff++)->next=NULL; + } + } + } + } +} + + +static void horizontal_filter(unsigned char *first_pixel, int stride, + int *bounding_values); +static void vertical_filter(unsigned char *first_pixel, int stride, + int *bounding_values); + +/* + * Perform the final rendering for a particular slice of data. + * The slice number ranges from 0..(macroblock_height - 1). + */ +static void render_slice(Vp3DecodeContext *s, int slice) +{ + int x, y; + int m, n; + int i; /* indicates current fragment */ + int16_t *dequantizer; + DCTELEM __align16 block[64]; + unsigned char *output_plane; + unsigned char *last_plane; + unsigned char *golden_plane; + int stride; + int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef; + int upper_motion_limit, lower_motion_limit; + int motion_halfpel_index; + uint8_t *motion_source; + int plane; + int plane_width; + int plane_height; + int slice_height; + int current_macroblock_entry = slice * s->macroblock_width * 6; + int *bounding_values= s->bounding_values_array+127; + int fragment_width; + + if (slice >= s->macroblock_height) + return; + + for (plane = 0; plane < 3; plane++) { + + /* set up plane-specific parameters */ + if (plane == 0) { + output_plane = s->current_frame.data[0]; + last_plane = s->last_frame.data[0]; + golden_plane = s->golden_frame.data[0]; + stride = s->current_frame.linesize[0]; + if (!s->flipped_image) stride = -stride; + upper_motion_limit = 7 * s->current_frame.linesize[0]; + lower_motion_limit = s->height * s->current_frame.linesize[0] + s->width - 8; + y = slice * FRAGMENT_PIXELS * 2; + plane_width = s->width; + plane_height = s->height; + slice_height = y + FRAGMENT_PIXELS * 2; + i = s->macroblock_fragments[current_macroblock_entry + 0]; + } else if (plane == 1) { + output_plane = s->current_frame.data[1]; + last_plane = s->last_frame.data[1]; + golden_plane = s->golden_frame.data[1]; + stride = s->current_frame.linesize[1]; + if (!s->flipped_image) stride = -stride; + upper_motion_limit = 7 * s->current_frame.linesize[1]; + lower_motion_limit = (s->height / 2) * s->current_frame.linesize[1] + (s->width / 2) - 8; + y = slice * FRAGMENT_PIXELS; + plane_width = s->width / 2; + plane_height = s->height / 2; + slice_height = y + FRAGMENT_PIXELS; + i = s->macroblock_fragments[current_macroblock_entry + 4]; + } else { + output_plane = s->current_frame.data[2]; + last_plane = s->last_frame.data[2]; + golden_plane = s->golden_frame.data[2]; + stride = s->current_frame.linesize[2]; + if (!s->flipped_image) stride = -stride; + upper_motion_limit = 7 * s->current_frame.linesize[2]; + lower_motion_limit = (s->height / 2) * s->current_frame.linesize[2] + (s->width / 2) - 8; + y = slice * FRAGMENT_PIXELS; + plane_width = s->width / 2; + plane_height = s->height / 2; + slice_height = y + FRAGMENT_PIXELS; + i = s->macroblock_fragments[current_macroblock_entry + 5]; + } + fragment_width = plane_width / FRAGMENT_PIXELS; + + if(ABS(stride) > 2048) + return; //various tables are fixed size + + /* for each fragment row in the slice (both of them)... */ + for (; y < slice_height; y += 8) { + + /* for each fragment in a row... */ + for (x = 0; x < plane_width; x += 8, i++) { + + if ((i < 0) || (i >= s->fragment_count)) { + av_log(s->avctx, AV_LOG_ERROR, " vp3:render_slice(): bad fragment number (%d)\n", i); + return; + } + + /* transform if this block was coded */ + if ((s->all_fragments[i].coding_method != MODE_COPY) && + !((s->avctx->flags & CODEC_FLAG_GRAY) && plane)) { + + if ((s->all_fragments[i].coding_method == MODE_USING_GOLDEN) || + (s->all_fragments[i].coding_method == MODE_GOLDEN_MV)) + motion_source= golden_plane; + else + motion_source= last_plane; + + motion_source += s->all_fragments[i].first_pixel; + motion_halfpel_index = 0; + + /* sort out the motion vector if this fragment is coded + * using a motion vector method */ + if ((s->all_fragments[i].coding_method > MODE_INTRA) && + (s->all_fragments[i].coding_method != MODE_USING_GOLDEN)) { + int src_x, src_y; + motion_x = s->all_fragments[i].motion_x; + motion_y = s->all_fragments[i].motion_y; + if(plane){ + motion_x= (motion_x>>1) | (motion_x&1); + motion_y= (motion_y>>1) | (motion_y&1); + } + + src_x= (motion_x>>1) + x; + src_y= (motion_y>>1) + y; + if ((motion_x == 127) || (motion_y == 127)) + av_log(s->avctx, AV_LOG_ERROR, " help! got invalid motion vector! (%X, %X)\n", motion_x, motion_y); + + motion_halfpel_index = motion_x & 0x01; + motion_source += (motion_x >> 1); + + motion_halfpel_index |= (motion_y & 0x01) << 1; + motion_source += ((motion_y >> 1) * stride); + + if(src_x<0 || src_y<0 || src_x + 9 >= plane_width || src_y + 9 >= plane_height){ + uint8_t *temp= s->edge_emu_buffer; + if(stride<0) temp -= 9*stride; + else temp += 9*stride; + + ff_emulated_edge_mc(temp, motion_source, stride, 9, 9, src_x, src_y, plane_width, plane_height); + motion_source= temp; + } + } + + + /* first, take care of copying a block from either the + * previous or the golden frame */ + if (s->all_fragments[i].coding_method != MODE_INTRA) { + /* Note, it is possible to implement all MC cases with + put_no_rnd_pixels_l2 which would look more like the + VP3 source but this would be slower as + put_no_rnd_pixels_tab is better optimzed */ + if(motion_halfpel_index != 3){ + s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index]( + output_plane + s->all_fragments[i].first_pixel, + motion_source, stride, 8); + }else{ + int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1 + s->dsp.put_no_rnd_pixels_l2[1]( + output_plane + s->all_fragments[i].first_pixel, + motion_source - d, + motion_source + stride + 1 + d, + stride, 8); + } + dequantizer = s->inter_dequant; + }else{ + if (plane == 0) + dequantizer = s->intra_y_dequant; + else + dequantizer = s->intra_c_dequant; + } + + /* dequantize the DCT coefficients */ + debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n", + i, s->all_fragments[i].coding_method, + DC_COEFF(i), dequantizer[0]); + + if(s->avctx->idct_algo==FF_IDCT_VP3){ + Coeff *coeff= s->coeffs + i; + memset(block, 0, sizeof(block)); + while(coeff->next){ + block[coeff->index]= coeff->coeff * dequantizer[coeff->index]; + coeff= coeff->next; + } + }else{ + Coeff *coeff= s->coeffs + i; + memset(block, 0, sizeof(block)); + while(coeff->next){ + block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2; + coeff= coeff->next; + } + } + + /* invert DCT and place (or add) in final output */ + + if (s->all_fragments[i].coding_method == MODE_INTRA) { + if(s->avctx->idct_algo!=FF_IDCT_VP3) + block[0] += 128<<3; + s->dsp.idct_put( + output_plane + s->all_fragments[i].first_pixel, + stride, + block); + } else { + s->dsp.idct_add( + output_plane + s->all_fragments[i].first_pixel, + stride, + block); + } + + debug_idct("block after idct_%s():\n", + (s->all_fragments[i].coding_method == MODE_INTRA)? + "put" : "add"); + for (m = 0; m < 8; m++) { + for (n = 0; n < 8; n++) { + debug_idct(" %3d", *(output_plane + + s->all_fragments[i].first_pixel + (m * stride + n))); + } + debug_idct("\n"); + } + debug_idct("\n"); + + } else { + + /* copy directly from the previous frame */ + s->dsp.put_pixels_tab[1][0]( + output_plane + s->all_fragments[i].first_pixel, + last_plane + s->all_fragments[i].first_pixel, + stride, 8); + + } +#if 0 + /* perform the left edge filter if: + * - the fragment is not on the left column + * - the fragment is coded in this frame + * - the fragment is not coded in this frame but the left + * fragment is coded in this frame (this is done instead + * of a right edge filter when rendering the left fragment + * since this fragment is not available yet) */ + if ((x > 0) && + ((s->all_fragments[i].coding_method != MODE_COPY) || + ((s->all_fragments[i].coding_method == MODE_COPY) && + (s->all_fragments[i - 1].coding_method != MODE_COPY)) )) { + horizontal_filter( + output_plane + s->all_fragments[i].first_pixel + 7*stride, + -stride, bounding_values); + } + + /* perform the top edge filter if: + * - the fragment is not on the top row + * - the fragment is coded in this frame + * - the fragment is not coded in this frame but the above + * fragment is coded in this frame (this is done instead + * of a bottom edge filter when rendering the above + * fragment since this fragment is not available yet) */ + if ((y > 0) && + ((s->all_fragments[i].coding_method != MODE_COPY) || + ((s->all_fragments[i].coding_method == MODE_COPY) && + (s->all_fragments[i - fragment_width].coding_method != MODE_COPY)) )) { + vertical_filter( + output_plane + s->all_fragments[i].first_pixel - stride, + -stride, bounding_values); + } +#endif } } } + + /* this looks like a good place for slice dispatch... */ + /* algorithm: + * if (slice == s->macroblock_height - 1) + * dispatch (both last slice & 2nd-to-last slice); + * else if (slice > 0) + * dispatch (slice - 1); + */ + + emms_c(); } /* @@ -2051,7 +2457,7 @@ static void render_fragments(Vp3DecodeContext *s, int m, n; int i = first_fragment; int16_t *dequantizer; - DCTELEM __align16 output_samples[64]; + DCTELEM __align16 block[64]; unsigned char *output_plane; unsigned char *last_plane; unsigned char *golden_plane; @@ -2066,7 +2472,6 @@ static void render_fragments(Vp3DecodeContext *s, /* set up plane-specific parameters */ if (plane == 0) { - dequantizer = s->intra_y_dequant; output_plane = s->current_frame.data[0]; last_plane = s->last_frame.data[0]; golden_plane = s->golden_frame.data[0]; @@ -2075,7 +2480,6 @@ static void render_fragments(Vp3DecodeContext *s, upper_motion_limit = 7 * s->current_frame.linesize[0]; lower_motion_limit = height * s->current_frame.linesize[0] + width - 8; } else if (plane == 1) { - dequantizer = s->intra_c_dequant; output_plane = s->current_frame.data[1]; last_plane = s->last_frame.data[1]; golden_plane = s->golden_frame.data[1]; @@ -2084,7 +2488,6 @@ static void render_fragments(Vp3DecodeContext *s, upper_motion_limit = 7 * s->current_frame.linesize[1]; lower_motion_limit = height * s->current_frame.linesize[1] + width - 8; } else { - dequantizer = s->intra_c_dequant; output_plane = s->current_frame.data[2]; last_plane = s->last_frame.data[2]; golden_plane = s->golden_frame.data[2]; @@ -2135,13 +2538,12 @@ static void render_fragments(Vp3DecodeContext *s, src_x= (motion_x>>1) + x; src_y= (motion_y>>1) + y; -if ((motion_x == 0xbeef) || (motion_y == 0xbeef)) -av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x, motion_y); + if ((motion_x == 127) || (motion_y == 127)) + av_log(s->avctx, AV_LOG_ERROR, " help! got invalid motion vector! (%X, %X)\n", motion_x, motion_y); motion_halfpel_index = motion_x & 0x01; motion_source += (motion_x >> 1); -// motion_y = -motion_y; motion_halfpel_index |= (motion_y & 0x01) << 1; motion_source += ((motion_y >> 1) * stride); @@ -2172,26 +2574,49 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x, motion_source + stride + 1 + d, stride, 8); } + dequantizer = s->inter_dequant; + }else{ + if (plane == 0) + dequantizer = s->intra_y_dequant; + else + dequantizer = s->intra_c_dequant; } /* dequantize the DCT coefficients */ debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n", i, s->all_fragments[i].coding_method, - s->all_fragments[i].coeffs[0], dequantizer[0]); + DC_COEFF(i), dequantizer[0]); + + if(s->avctx->idct_algo==FF_IDCT_VP3){ + Coeff *coeff= s->coeffs + i; + memset(block, 0, sizeof(block)); + while(coeff->next){ + block[coeff->index]= coeff->coeff * dequantizer[coeff->index]; + coeff= coeff->next; + } + }else{ + Coeff *coeff= s->coeffs + i; + memset(block, 0, sizeof(block)); + while(coeff->next){ + block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2; + coeff= coeff->next; + } + } /* invert DCT and place (or add) in final output */ - s->dsp.vp3_idct(s->all_fragments[i].coeffs, - dequantizer, - s->all_fragments[i].coeff_count, - output_samples); + if (s->all_fragments[i].coding_method == MODE_INTRA) { - s->dsp.put_signed_pixels_clamped(output_samples, + if(s->avctx->idct_algo!=FF_IDCT_VP3) + block[0] += 128<<3; + s->dsp.idct_put( output_plane + s->all_fragments[i].first_pixel, - stride); + stride, + block); } else { - s->dsp.add_pixels_clamped(output_samples, + s->dsp.idct_add( output_plane + s->all_fragments[i].first_pixel, - stride); + stride, + block); } debug_idct("block after idct_%s():\n", @@ -2221,41 +2646,36 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x, emms_c(); } -#define SATURATE_U8(x) ((x) < 0) ? 0 : ((x) > 255) ? 255 : x - static void horizontal_filter(unsigned char *first_pixel, int stride, int *bounding_values) { - int i; + unsigned char *end; int filter_value; - for (i = 0; i < 8; i++, first_pixel += stride) { + for (end= first_pixel + 8*stride; first_pixel < end; first_pixel += stride) { filter_value = - (first_pixel[-2] * 1) - - (first_pixel[-1] * 3) + - (first_pixel[ 0] * 3) - - (first_pixel[ 1] * 1); + (first_pixel[-2] - first_pixel[ 1]) + +3*(first_pixel[ 0] - first_pixel[-1]); filter_value = bounding_values[(filter_value + 4) >> 3]; - first_pixel[-1] = SATURATE_U8(first_pixel[-1] + filter_value); - first_pixel[ 0] = SATURATE_U8(first_pixel[ 0] - filter_value); + first_pixel[-1] = clip_uint8(first_pixel[-1] + filter_value); + first_pixel[ 0] = clip_uint8(first_pixel[ 0] - filter_value); } } static void vertical_filter(unsigned char *first_pixel, int stride, int *bounding_values) { - int i; + unsigned char *end; int filter_value; + const int nstride= -stride; - for (i = 0; i < 8; i++, first_pixel++) { + for (end= first_pixel + 8; first_pixel < end; first_pixel++) { filter_value = - (first_pixel[-(2 * stride)] * 1) - - (first_pixel[-(1 * stride)] * 3) + - (first_pixel[ (0 )] * 3) - - (first_pixel[ (1 * stride)] * 1); + (first_pixel[2 * nstride] - first_pixel[ stride]) + +3*(first_pixel[0 ] - first_pixel[nstride]); filter_value = bounding_values[(filter_value + 4) >> 3]; - first_pixel[-(1 * stride)] = SATURATE_U8(first_pixel[-(1 * stride)] + filter_value); - first_pixel[0] = SATURATE_U8(first_pixel[0] - filter_value); + first_pixel[nstride] = clip_uint8(first_pixel[nstride] + filter_value); + first_pixel[0] = clip_uint8(first_pixel[0] - filter_value); } } @@ -2266,7 +2686,10 @@ static void apply_loop_filter(Vp3DecodeContext *s) int fragment; int stride; unsigned char *plane_data; - int bounding_values[256]; + int *bounding_values= s->bounding_values_array+127; + +#if 0 + int bounding_values_array[256]; int filter_limit; /* find the right loop limit value */ @@ -2274,16 +2697,17 @@ static void apply_loop_filter(Vp3DecodeContext *s) if (vp31_ac_scale_factor[x] >= s->quality_index) break; } - filter_limit = vp31_filter_limit_values[x]; + filter_limit = vp31_filter_limit_values[s->quality_index]; /* set up the bounding values */ - memset(bounding_values, 0, 256 * sizeof(int)); + memset(bounding_values_array, 0, 256 * sizeof(int)); for (x = 0; x < filter_limit; x++) { bounding_values[-x - filter_limit] = -filter_limit + x; bounding_values[-x] = -x; bounding_values[x] = x; bounding_values[x + filter_limit] = filter_limit - x; } +#endif for (plane = 0; plane < 3; plane++) { @@ -2313,12 +2737,12 @@ static void apply_loop_filter(Vp3DecodeContext *s) for (y = 0; y < height; y++) { for (x = 0; x < width; x++) { - +START_TIMER /* do not perform left edge filter for left columns frags */ if ((x > 0) && (s->all_fragments[fragment].coding_method != MODE_COPY)) { horizontal_filter( - plane_data + s->all_fragments[fragment].first_pixel, + plane_data + s->all_fragments[fragment].first_pixel - 7*stride, stride, bounding_values); } @@ -2326,7 +2750,7 @@ static void apply_loop_filter(Vp3DecodeContext *s) if ((y > 0) && (s->all_fragments[fragment].coding_method != MODE_COPY)) { vertical_filter( - plane_data + s->all_fragments[fragment].first_pixel, + plane_data + s->all_fragments[fragment].first_pixel + stride, stride, bounding_values); } @@ -2337,7 +2761,7 @@ static void apply_loop_filter(Vp3DecodeContext *s) (s->all_fragments[fragment].coding_method != MODE_COPY) && (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) { horizontal_filter( - plane_data + s->all_fragments[fragment + 1].first_pixel, + plane_data + s->all_fragments[fragment + 1].first_pixel - 7*stride, stride, bounding_values); } @@ -2348,11 +2772,12 @@ static void apply_loop_filter(Vp3DecodeContext *s) (s->all_fragments[fragment].coding_method != MODE_COPY) && (s->all_fragments[fragment + width].coding_method == MODE_COPY)) { vertical_filter( - plane_data + s->all_fragments[fragment + width].first_pixel, + plane_data + s->all_fragments[fragment + width].first_pixel + stride, stride, bounding_values); } fragment++; +STOP_TIMER("loop filter") } } } @@ -2474,17 +2899,15 @@ static int vp3_decode_init(AVCodecContext *avctx) s->version = 1; s->avctx = avctx; -#if 0 - s->width = avctx->width; - s->height = avctx->height; -#else s->width = (avctx->width + 15) & 0xFFFFFFF0; s->height = (avctx->height + 15) & 0xFFFFFFF0; -#endif avctx->pix_fmt = PIX_FMT_YUV420P; avctx->has_b_frames = 0; + if(avctx->idct_algo==FF_IDCT_AUTO) + avctx->idct_algo=FF_IDCT_VP3; dsputil_init(&s->dsp, avctx); - s->dsp.vp3_dsp_init(); + + ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); /* initialize to an impossible value which will force a recalculation * in the first frame decode */ @@ -2536,6 +2959,7 @@ static int vp3_decode_init(AVCodecContext *avctx) s->v_fragment_start); s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment)); + s->coeffs = av_malloc(s->fragment_count * sizeof(Coeff) * 65); s->coded_fragment_list = av_malloc(s->fragment_count * sizeof(int)); s->pixel_addresses_inited = 0; @@ -2551,40 +2975,82 @@ static int vp3_decode_init(AVCodecContext *avctx) s->coded_intra_c_dequant[i] = vp31_intra_c_dequant[i]; for (i = 0; i < 64; i++) s->coded_inter_dequant[i] = vp31_inter_dequant[i]; + for (i = 0; i < 64; i++) + s->filter_limit_values[i] = vp31_filter_limit_values[i]; + + /* init VLC tables */ + for (i = 0; i < 16; i++) { + + /* DC histograms */ + init_vlc(&s->dc_vlc[i], 5, 32, + &dc_bias[i][0][1], 4, 2, + &dc_bias[i][0][0], 4, 2, 0); + + /* group 1 AC histograms */ + init_vlc(&s->ac_vlc_1[i], 5, 32, + &ac_bias_0[i][0][1], 4, 2, + &ac_bias_0[i][0][0], 4, 2, 0); + + /* group 2 AC histograms */ + init_vlc(&s->ac_vlc_2[i], 5, 32, + &ac_bias_1[i][0][1], 4, 2, + &ac_bias_1[i][0][0], 4, 2, 0); + + /* group 3 AC histograms */ + init_vlc(&s->ac_vlc_3[i], 5, 32, + &ac_bias_2[i][0][1], 4, 2, + &ac_bias_2[i][0][0], 4, 2, 0); + + /* group 4 AC histograms */ + init_vlc(&s->ac_vlc_4[i], 5, 32, + &ac_bias_3[i][0][1], 4, 2, + &ac_bias_3[i][0][0], 4, 2, 0); + } + } else { + for (i = 0; i < 16; i++) { + + /* DC histograms */ + init_vlc(&s->dc_vlc[i], 5, 32, + &s->huffman_table[i][0][1], 4, 2, + &s->huffman_table[i][0][0], 4, 2, 0); + + /* group 1 AC histograms */ + init_vlc(&s->ac_vlc_1[i], 5, 32, + &s->huffman_table[i+16][0][1], 4, 2, + &s->huffman_table[i+16][0][0], 4, 2, 0); + + /* group 2 AC histograms */ + init_vlc(&s->ac_vlc_2[i], 5, 32, + &s->huffman_table[i+16*2][0][1], 4, 2, + &s->huffman_table[i+16*2][0][0], 4, 2, 0); + + /* group 3 AC histograms */ + init_vlc(&s->ac_vlc_3[i], 5, 32, + &s->huffman_table[i+16*3][0][1], 4, 2, + &s->huffman_table[i+16*3][0][0], 4, 2, 0); + + /* group 4 AC histograms */ + init_vlc(&s->ac_vlc_4[i], 5, 32, + &s->huffman_table[i+16*4][0][1], 4, 2, + &s->huffman_table[i+16*4][0][0], 4, 2, 0); + } } - /* init VLC tables */ - for (i = 0; i < 16; i++) { - - /* DC histograms */ - init_vlc(&s->dc_vlc[i], 5, 32, - &dc_bias[i][0][1], 4, 2, - &dc_bias[i][0][0], 4, 2, 0); - - /* group 1 AC histograms */ - init_vlc(&s->ac_vlc_1[i], 5, 32, - &ac_bias_0[i][0][1], 4, 2, - &ac_bias_0[i][0][0], 4, 2, 0); - - /* group 2 AC histograms */ - init_vlc(&s->ac_vlc_2[i], 5, 32, - &ac_bias_1[i][0][1], 4, 2, - &ac_bias_1[i][0][0], 4, 2, 0); - - /* group 3 AC histograms */ - init_vlc(&s->ac_vlc_3[i], 5, 32, - &ac_bias_2[i][0][1], 4, 2, - &ac_bias_2[i][0][0], 4, 2, 0); - - /* group 4 AC histograms */ - init_vlc(&s->ac_vlc_4[i], 5, 32, - &ac_bias_3[i][0][1], 4, 2, - &ac_bias_3[i][0][0], 4, 2, 0); - } + init_vlc(&s->superblock_run_length_vlc, 6, 34, + &superblock_run_length_vlc_table[0][1], 4, 2, + &superblock_run_length_vlc_table[0][0], 4, 2, 0); - /* build quantization zigzag table */ - for (i = 0; i < 64; i++) - zigzag_index[dezigzag_index[i]] = i; + init_vlc(&s->fragment_run_length_vlc, 5, 31, + &fragment_run_length_vlc_table[0][1], 4, 2, + &fragment_run_length_vlc_table[0][0], 4, 2, 0); + + init_vlc(&s->mode_code_vlc, 3, 8, + &mode_code_vlc_table[0][1], 2, 1, + &mode_code_vlc_table[0][0], 2, 1, 0); + + init_vlc(&s->motion_vector_vlc, 6, 63, + &motion_vector_vlc_table[0][1], 2, 1, + &motion_vector_vlc_table[0][0], 2, 1, 0); /* work out the block mapping tables */ s->superblock_fragments = av_malloc(s->superblock_count * 16 * sizeof(int)); @@ -2612,6 +3078,7 @@ static int vp3_decode_frame(AVCodecContext *avctx, Vp3DecodeContext *s = avctx->priv_data; GetBitContext gb; static int counter = 0; + int i; init_get_bits(&gb, buf, buf_size * 8); @@ -2641,7 +3108,7 @@ static int vp3_decode_frame(AVCodecContext *avctx, skip_bits(&gb, 1); s->last_quality_index = s->quality_index; s->quality_index = get_bits(&gb, 6); - if (s->theora >= 0x030300) + if (s->theora >= 0x030200) skip_bits1(&gb); if (s->avctx->debug & FF_DEBUG_PICT_INFO) @@ -2649,8 +3116,10 @@ static int vp3_decode_frame(AVCodecContext *avctx, s->keyframe?"key":"", counter, s->quality_index); counter++; - if (s->quality_index != s->last_quality_index) + if (s->quality_index != s->last_quality_index) { init_dequantizer(s); + init_loop_filter(s); + } if (s->keyframe) { if (!s->theora) @@ -2711,7 +3180,9 @@ static int vp3_decode_frame(AVCodecContext *avctx, s->current_frame.qscale_table= s->qscale_table; //FIXME allocate individual tables per AVFrame s->current_frame.qstride= 0; + {START_TIMER init_frame(s, &gb); + STOP_TIMER("init_frame")} #if KEYFRAMES_ONLY if (!s->keyframe) { @@ -2726,31 +3197,60 @@ if (!s->keyframe) { } else { #endif - if (unpack_superblocks(s, &gb) || - unpack_modes(s, &gb) || - unpack_vectors(s, &gb) || - unpack_dct_coeffs(s, &gb)) { - - av_log(s->avctx, AV_LOG_ERROR, " vp3: could not decode frame\n"); + {START_TIMER + if (unpack_superblocks(s, &gb)){ + av_log(s->avctx, AV_LOG_ERROR, "error in unpack_superblocks\n"); + return -1; + } + STOP_TIMER("unpack_superblocks")} + {START_TIMER + if (unpack_modes(s, &gb)){ + av_log(s->avctx, AV_LOG_ERROR, "error in unpack_modes\n"); + return -1; + } + STOP_TIMER("unpack_modes")} + {START_TIMER + if (unpack_vectors(s, &gb)){ + av_log(s->avctx, AV_LOG_ERROR, "error in unpack_vectors\n"); + return -1; + } + STOP_TIMER("unpack_vectors")} + {START_TIMER + if (unpack_dct_coeffs(s, &gb)){ + av_log(s->avctx, AV_LOG_ERROR, "error in unpack_dct_coeffs\n"); return -1; } + STOP_TIMER("unpack_dct_coeffs")} + {START_TIMER reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height); - render_fragments(s, 0, s->width, s->height, 0); -// apply_loop_filter(s); - if ((avctx->flags & CODEC_FLAG_GRAY) == 0) { reverse_dc_prediction(s, s->u_fragment_start, s->fragment_width / 2, s->fragment_height / 2); reverse_dc_prediction(s, s->v_fragment_start, s->fragment_width / 2, s->fragment_height / 2); + } + STOP_TIMER("reverse_dc_prediction")} + {START_TIMER + +#if 1 + for (i = 0; i < s->macroblock_height; i++) + render_slice(s, i); +#else + render_fragments(s, 0, s->width, s->height, 0); + if ((avctx->flags & CODEC_FLAG_GRAY) == 0) { render_fragments(s, s->u_fragment_start, s->width / 2, s->height / 2, 1); render_fragments(s, s->v_fragment_start, s->width / 2, s->height / 2, 2); } else { memset(s->current_frame.data[1], 0x80, s->width * s->height / 4); memset(s->current_frame.data[2], 0x80, s->width * s->height / 4); } +#endif + STOP_TIMER("render_fragments")} + {START_TIMER + apply_loop_filter(s); + STOP_TIMER("apply_loop_filter")} #if KEYFRAMES_ONLY } #endif @@ -2779,6 +3279,7 @@ static int vp3_decode_end(AVCodecContext *avctx) Vp3DecodeContext *s = avctx->priv_data; av_free(s->all_fragments); + av_free(s->coeffs); av_free(s->coded_fragment_list); av_free(s->superblock_fragments); av_free(s->superblock_macroblocks); @@ -2796,6 +3297,38 @@ static int vp3_decode_end(AVCodecContext *avctx) return 0; } +static int read_huffman_tree(AVCodecContext *avctx, GetBitContext *gb) +{ + Vp3DecodeContext *s = avctx->priv_data; + + if (get_bits(gb, 1)) { + int token; + if (s->entries >= 32) { /* overflow */ + av_log(avctx, AV_LOG_ERROR, "huffman tree overflow\n"); + return -1; + } + token = get_bits(gb, 5); + //av_log(avctx, AV_LOG_DEBUG, "hti %d hbits %x token %d entry : %d size %d\n", s->hti, s->hbits, token, s->entries, s->huff_code_size); + s->huffman_table[s->hti][token][0] = s->hbits; + s->huffman_table[s->hti][token][1] = s->huff_code_size; + s->entries++; + } + else { + if (s->huff_code_size >= 32) {/* overflow */ + av_log(avctx, AV_LOG_ERROR, "huffman tree overflow\n"); + return -1; + } + s->huff_code_size++; + s->hbits <<= 1; + read_huffman_tree(avctx, gb); + s->hbits |= 1; + read_huffman_tree(avctx, gb); + s->hbits >>= 1; + s->huff_code_size--; + } + return 0; +} + static int theora_decode_header(AVCodecContext *avctx, GetBitContext gb) { Vp3DecodeContext *s = avctx->priv_data; @@ -2810,9 +3343,9 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext gb) /* FIXME: endianess? */ s->theora = (major << 16) | (minor << 8) | micro; - /* 3.3.0 aka alpha3 has the same frame orientation as original vp3 */ + /* 3.2.0 aka alpha3 has the same frame orientation as original vp3 */ /* but previous versions have the image flipped relative to vp3 */ - if (s->theora < 0x030300) + if (s->theora < 0x030200) { s->flipped_image = 1; av_log(avctx, AV_LOG_DEBUG, "Old (theora < 0x030300) + if (s->theora < 0x030200) skip_bits(&gb, 5); /* keyframe frequency force */ skip_bits(&gb, 8); /* colorspace */ skip_bits(&gb, 24); /* bitrate */ skip_bits(&gb, 6); /* last(?) quality index */ - if (s->theora >= 0x030300) + if (s->theora >= 0x030200) { skip_bits(&gb, 5); /* keyframe frequency force */ skip_bits(&gb, 5); /* spare bits */ @@ -2855,8 +3388,6 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext gb) avctx->width = s->width; avctx->height = s->height; - vp3_decode_init(avctx); - return 0; } @@ -2885,16 +3416,39 @@ static int theora_decode_comments(AVCodecContext *avctx, GetBitContext gb) static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb) { Vp3DecodeContext *s = avctx->priv_data; - int i; + int i, n; + + if (s->theora >= 0x030200) { + n = get_bits(&gb, 3); + /* loop filter limit values table */ + for (i = 0; i < 64; i++) + s->filter_limit_values[i] = get_bits(&gb, n); + } + if (s->theora >= 0x030200) + n = get_bits(&gb, 4) + 1; + else + n = 16; /* quality threshold table */ for (i = 0; i < 64; i++) - s->coded_ac_scale_factor[i] = get_bits(&gb, 16); + s->coded_ac_scale_factor[i] = get_bits(&gb, n); + if (s->theora >= 0x030200) + n = get_bits(&gb, 4) + 1; + else + n = 16; /* dc scale factor table */ for (i = 0; i < 64; i++) - s->coded_dc_scale_factor[i] = get_bits(&gb, 16); + s->coded_dc_scale_factor[i] = get_bits(&gb, n); + if (s->theora >= 0x030200) + n = get_bits(&gb, 9) + 1; + else + n = 3; + if (n != 3) { + av_log(NULL,AV_LOG_ERROR, "unsupported nbms : %d\n", n); + return -1; + } /* y coeffs */ for (i = 0; i < 64; i++) s->coded_intra_y_dequant[i] = get_bits(&gb, 8); @@ -2907,7 +3461,41 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb) for (i = 0; i < 64; i++) s->coded_inter_dequant[i] = get_bits(&gb, 8); - /* FIXME: read huffmann tree.. */ + /* Huffman tables */ + for (i = 0; i <= 1; i++) { + for (n = 0; n <= 2; n++) { + int newqr; + if (i > 0 || n > 0) + newqr = get_bits(&gb, 1); + else + newqr = 1; + if (!newqr) { + if (i > 0) + get_bits(&gb, 1); + } + else { + int qi = 0; + skip_bits(&gb, av_log2(2)+1); + while (qi < 63) { + qi += get_bits(&gb, av_log2(63-qi)+1) + 1; + skip_bits(&gb, av_log2(2)+1); + } + if (qi > 63) + av_log(NULL, AV_LOG_ERROR, "error...\n"); + } + } + } + + for (s->hti = 0; s->hti < 80; s->hti++) { + s->entries = 0; + s->huff_code_size = 1; + if (!get_bits(&gb, 1)) { + s->hbits = 0; + read_huffman_tree(avctx, &gb); + s->hbits = 1; + read_huffman_tree(avctx, &gb); + } + } s->theora_tables = 1; @@ -2946,7 +3534,6 @@ static int theora_decode_init(AVCodecContext *avctx) { case 0x80: theora_decode_header(avctx, gb); - vp3_decode_init(avctx); break; case 0x81: theora_decode_comments(avctx, gb); @@ -2957,6 +3544,7 @@ static int theora_decode_init(AVCodecContext *avctx) } } + vp3_decode_init(avctx); return 0; } @@ -2973,6 +3561,7 @@ AVCodec vp3_decoder = { NULL }; +#ifndef CONFIG_LIBTHEORA AVCodec theora_decoder = { "theora", CODEC_TYPE_VIDEO, @@ -2985,3 +3574,4 @@ AVCodec theora_decoder = { 0, NULL }; +#endif diff --git a/src/libffmpeg/libavcodec/vp3data.h b/src/libffmpeg/libavcodec/vp3data.h index 85a233716..8bead2fc1 100644 --- a/src/libffmpeg/libavcodec/vp3data.h +++ b/src/libffmpeg/libavcodec/vp3data.h @@ -72,20 +72,353 @@ static const uint32_t vp31_filter_limit_values[64] = 0, 0, 0, 0, 0, 0, 0, 0 }; -/* table used to convert natural order <-> zigzag order */ -static const int dezigzag_index[64] = -{ 0, 1, 8, 16, 9, 2, 3, 10, - 17, 24, 32, 25, 18, 11, 4, 5, - 12, 19, 26, 33, 40, 48, 41, 34, - 27, 20, 13, 6, 7, 14, 21, 28, - 35, 42, 49, 56, 57, 50, 43, 36, - 29, 22, 15, 23, 30, 37, 44, 51, - 58, 59, 52, 45, 38, 31, 39, 46, - 53, 60, 61, 54, 47, 55, 62, 63 +static const uint16_t superblock_run_length_vlc_table[34][2] = { + { 0, 1 }, + + { 4, 3 }, { 5, 3 }, + + { 0xC, 4 }, { 0xD, 4 }, + + { 0x38, 6 }, { 0x39, 6 }, { 0x3A, 6 }, { 0x3B, 6 }, + + { 0xF0, 8 }, { 0xF1, 8 }, { 0xF2, 8 }, { 0xF3, 8 }, + { 0xF4, 8 }, { 0xF5, 8 }, { 0xF6, 8 }, { 0xF7, 8 }, + + { 0x3E0, 10 }, { 0x3E1, 10 }, { 0x3E2, 10 }, { 0x3E3, 10 }, + { 0x3E4, 10 }, { 0x3E5, 10 }, { 0x3E6, 10 }, { 0x3E7, 10 }, + { 0x3E8, 10 }, { 0x3E9, 10 }, { 0x3EA, 10 }, { 0x3EB, 10 }, + { 0x3EC, 10 }, { 0x3ED, 10 }, { 0x3EE, 10 }, { 0x3EF, 10 }, + + { 0x3F, 6 } /* this last VLC is a special case for reading 12 more + bits from stream and adding the value 34 */ +}; + +static const uint16_t fragment_run_length_vlc_table[30][2] = { + /* 1 -> 2 */ + { 0x0, 2 }, { 0x1, 2 }, + + /* 3 -> 4 */ + { 0x4, 3 }, { 0x5, 3 }, + + /* 5 -> 6 */ + { 0xC, 4 }, { 0xD, 4 }, + + /* 7 -> 10 */ + { 0x38, 6 }, { 0x39, 6 }, + { 0x3A, 6 }, { 0x3B, 6 }, + + /* 11 -> 14 */ + { 0x78, 7 }, { 0x79, 7 }, + { 0x7A, 7 }, { 0x7B, 7 }, + + /* 15 -> 30 */ + { 0x1F0, 9 }, { 0x1F1, 9 }, { 0x1F2, 9 }, { 0x1F3, 9 }, + { 0x1F4, 9 }, { 0x1F5, 9 }, { 0x1F6, 9 }, { 0x1F7, 9 }, + { 0x1F8, 9 }, { 0x1F9, 9 }, { 0x1FA, 9 }, { 0x1FB, 9 }, + { 0x1FC, 9 }, { 0x1FD, 9 }, { 0x1FE, 9 }, { 0x1FF, 9 } +}; + +static const uint8_t mode_code_vlc_table[30][2] = { + { 0, 1 }, { 2, 2 }, + { 6, 3 }, { 14, 4 }, + { 30, 5 }, { 62, 6 }, + { 126, 7 }, { 127, 7 } +}; + +static const uint8_t motion_vector_vlc_table[63][2] = { + { 0, 3 }, + { 1, 3 }, + { 2, 3 }, + + { 6, 4 }, { 7, 4 }, + + { 8, 4 }, { 9, 4 }, + + { 40, 6 }, { 41, 6 }, { 42, 6 }, { 43, 6 }, + { 44, 6 }, { 45, 6 }, { 46, 6 }, { 47, 6 }, + + { 96, 7 }, { 97, 7 }, { 98, 7 }, { 99, 7 }, + { 100, 7 }, { 101, 7 }, { 102, 7 }, { 103, 7 }, + { 104, 7 }, { 105, 7 }, { 106, 7 }, { 107, 7 }, + { 108, 7 }, { 109, 7 }, { 110, 7 }, { 111, 7 }, + + { 0xE0, 8 }, { 0xE1, 8 }, { 0xE2, 8 }, { 0xE3, 8 }, + { 0xE4, 8 }, { 0xE5, 8 }, { 0xE6, 8 }, { 0xE7, 8 }, + { 0xE8, 8 }, { 0xE9, 8 }, { 0xEA, 8 }, { 0xEB, 8 }, + { 0xEC, 8 }, { 0xED, 8 }, { 0xEE, 8 }, { 0xEF, 8 }, + + { 0xF0, 8 }, { 0xF1, 8 }, { 0xF2, 8 }, { 0xF3, 8 }, + { 0xF4, 8 }, { 0xF5, 8 }, { 0xF6, 8 }, { 0xF7, 8 }, + { 0xF8, 8 }, { 0xF9, 8 }, { 0xFA, 8 }, { 0xFB, 8 }, + { 0xFC, 8 }, { 0xFD, 8 }, { 0xFE, 8 }, { 0xFF, 8 } +}; + +static const int motion_vector_table[63] = { + 0, 1, -1, + 2, -2, + 3, -3, + 4, -4, 5, -5, 6, -6, 7, -7, + 8, -8, 9, -9, 10, -10, 11, -11, 12, -12, 13, -13, 14, -14, 15, -15, + 16, -16, 17, -17, 18, -18, 19, -19, 20, -20, 21, -21, 22, -22, 23, -23, + 24, -24, 25, -25, 26, -26, 27, -27, 28, -28, 29, -29, 30, -30, 31, -31 +}; + +static const int8_t fixed_motion_vector_table[64] = { + 0, 0, 1, -1, 2, -2, 3, -3, + 4, -4, 5, -5, 6, -6, 7, -7, + 8, -8, 9, -9, 10, -10, 11, -11, + 12, -12, 13, -13, 14, -14, 15, -15, + 16, -16, 17, -17, 18, -18, 19, -19, + 20, -20, 21, -21, 22, -22, 23, -23, + 24, -24, 25, -25, 26, -26, 27, -27, + 28, -28, 29, -29, 30, -30, 31, -31 +}; + +/* only tokens 0..6 indicate eob runs */ +static const int eob_run_base[7] = { + 1, 2, 3, 4, 8, 16, 0 +}; +static const int eob_run_get_bits[7] = { + 0, 0, 0, 2, 3, 4, 12 +}; + +static const int zero_run_base[32] = { + 0, 0, 0, 0, 0, 0, 0, /* 0..6 are never used */ + 0, 0, /* 7..8 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 9..22 */ + 1, 2, 3, 4, 5, /* 23..27 */ + 6, 10, 1, 2 /* 28..31 */ +}; +static const int zero_run_get_bits[32] = { + 0, 0, 0, 0, 0, 0, 0, /* 0..6 are never used */ + 3, 6, /* 7..8 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 9..22 */ + 0, 0, 0, 0, 0, /* 23..27 */ + 2, 3, 0, 1 /* 28..31 */ +}; + +static const int coeff_get_bits[32] = { + 0, 0, 0, 0, 0, 0, 0, /* 0..6 are never used */ + 0, 0, 0, 0, 0, 0, /* 7..12 use constant coeffs */ + 1, 1, 1, 1, /* 13..16 are constants but still need sign bit */ + 2, 3, 4, 5, 6, 10, /* 17..22, for reading large coeffs */ + 1, 1, 1, 1, 1, 1, 1, /* 23..29 are constants but still need sign bit */ + 2, 2 /* 30..31 */ +}; + +static const int16_t coeff_table_token_7_8[1] = { 0 }; +static const int16_t coeff_table_token_9[1] = { 1 }; +static const int16_t coeff_table_token_10[1] = { -1 }; +static const int16_t coeff_table_token_11[1] = { 2 }; +static const int16_t coeff_table_token_12[1] = { -2 }; + +static const int16_t coeff_table_token_13[2] = { 3, -3 }; +static const int16_t coeff_table_token_14[2] = { 4, -4 }; +static const int16_t coeff_table_token_15[2] = { 5, -5 }; +static const int16_t coeff_table_token_16[2] = { 6, -6 }; + +static const int16_t coeff_table_token_23_24_25_26_27_28_29[2] = { 1, -1 }; +static const int16_t coeff_table_token_30[4] = { 2, 3, -2, -3 }; +static const int16_t coeff_table_token_31[4] = { 2, 3, -2, -3 }; + +static const int16_t coeff_table_token_17[4] = { + 7, 8, -7, -8 +}; + +static const int16_t coeff_table_token_18[8] = { + 9, 10, 11, 12, -9, -10, -11, -12 +}; + +static const int16_t coeff_table_token_19[16] = { + 13, 14, 15, 16, 17, 18, 19, 20, -13, -14, -15, -16, -17, -18, -19, -20 +}; + +static const int16_t coeff_table_token_20[32] = { + 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, + -21, -22, -23, -24, -25, -26, -27, -28, + -29, -30, -31, -32, -33, -34, -35, -36 +}; + +static const int16_t coeff_table_token_21[64] = { + 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, + 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, + -37, -38, -39, -40, -41, -42, -43, -44, + -45, -46, -47, -48, -49, -50, -51, -52, + -53, -54, -55, -56, -57, -58, -59, -60, + -61, -62, -63, -64, -65, -66, -67, -68 }; -/* inverse of dezigzag index */ -static __attribute__((unused)) int zigzag_index[64]; +static const int16_t coeff_table_token_22[1024] = { + 69, 70, 71, 72, 73, 74, 75, 76, + 77, 78, 79, 80, 81, 82, 83, 84, + 85, 86, 87, 88, 89, 90, 91, 92, + 93, 94, 95, 96, 97, 98, 99, 100, + 101, 102, 103, 104, 105, 106, 107, 108, + 109, 110, 111, 112, 113, 114, 115, 116, + 117, 118, 119, 120, 121, 122, 123, 124, + 125, 126, 127, 128, 129, 130, 131, 132, + 133, 134, 135, 136, 137, 138, 139, 140, + 141, 142, 143, 144, 145, 146, 147, 148, + 149, 150, 151, 152, 153, 154, 155, 156, + 157, 158, 159, 160, 161, 162, 163, 164, + 165, 166, 167, 168, 169, 170, 171, 172, + 173, 174, 175, 176, 177, 178, 179, 180, + 181, 182, 183, 184, 185, 186, 187, 188, + 189, 190, 191, 192, 193, 194, 195, 196, + 197, 198, 199, 200, 201, 202, 203, 204, + 205, 206, 207, 208, 209, 210, 211, 212, + 213, 214, 215, 216, 217, 218, 219, 220, + 221, 222, 223, 224, 225, 226, 227, 228, + 229, 230, 231, 232, 233, 234, 235, 236, + 237, 238, 239, 240, 241, 242, 243, 244, + 245, 246, 247, 248, 249, 250, 251, 252, + 253, 254, 255, 256, 257, 258, 259, 260, + 261, 262, 263, 264, 265, 266, 267, 268, + 269, 270, 271, 272, 273, 274, 275, 276, + 277, 278, 279, 280, 281, 282, 283, 284, + 285, 286, 287, 288, 289, 290, 291, 292, + 293, 294, 295, 296, 297, 298, 299, 300, + 301, 302, 303, 304, 305, 306, 307, 308, + 309, 310, 311, 312, 313, 314, 315, 316, + 317, 318, 319, 320, 321, 322, 323, 324, + 325, 326, 327, 328, 329, 330, 331, 332, + 333, 334, 335, 336, 337, 338, 339, 340, + 341, 342, 343, 344, 345, 346, 347, 348, + 349, 350, 351, 352, 353, 354, 355, 356, + 357, 358, 359, 360, 361, 362, 363, 364, + 365, 366, 367, 368, 369, 370, 371, 372, + 373, 374, 375, 376, 377, 378, 379, 380, + 381, 382, 383, 384, 385, 386, 387, 388, + 389, 390, 391, 392, 393, 394, 395, 396, + 397, 398, 399, 400, 401, 402, 403, 404, + 405, 406, 407, 408, 409, 410, 411, 412, + 413, 414, 415, 416, 417, 418, 419, 420, + 421, 422, 423, 424, 425, 426, 427, 428, + 429, 430, 431, 432, 433, 434, 435, 436, + 437, 438, 439, 440, 441, 442, 443, 444, + 445, 446, 447, 448, 449, 450, 451, 452, + 453, 454, 455, 456, 457, 458, 459, 460, + 461, 462, 463, 464, 465, 466, 467, 468, + 469, 470, 471, 472, 473, 474, 475, 476, + 477, 478, 479, 480, 481, 482, 483, 484, + 485, 486, 487, 488, 489, 490, 491, 492, + 493, 494, 495, 496, 497, 498, 499, 500, + 501, 502, 503, 504, 505, 506, 507, 508, + 509, 510, 511, 512, 513, 514, 515, 516, + 517, 518, 519, 520, 521, 522, 523, 524, + 525, 526, 527, 528, 529, 530, 531, 532, + 533, 534, 535, 536, 537, 538, 539, 540, + 541, 542, 543, 544, 545, 546, 547, 548, + 549, 550, 551, 552, 553, 554, 555, 556, + 557, 558, 559, 560, 561, 562, 563, 564, + 565, 566, 567, 568, 569, 570, 571, 572, + 573, 574, 575, 576, 577, 578, 579, 580, + -69, -70, -71, -72, -73, -74, -75, -76, + -77, -78, -79, -80, -81, -82, -83, -84, + -85, -86, -87, -88, -89, -90, -91, -92, + -93, -94, -95, -96, -97, -98, -99, -100, + -101, -102, -103, -104, -105, -106, -107, -108, + -109, -110, -111, -112, -113, -114, -115, -116, + -117, -118, -119, -120, -121, -122, -123, -124, + -125, -126, -127, -128, -129, -130, -131, -132, + -133, -134, -135, -136, -137, -138, -139, -140, + -141, -142, -143, -144, -145, -146, -147, -148, + -149, -150, -151, -152, -153, -154, -155, -156, + -157, -158, -159, -160, -161, -162, -163, -164, + -165, -166, -167, -168, -169, -170, -171, -172, + -173, -174, -175, -176, -177, -178, -179, -180, + -181, -182, -183, -184, -185, -186, -187, -188, + -189, -190, -191, -192, -193, -194, -195, -196, + -197, -198, -199, -200, -201, -202, -203, -204, + -205, -206, -207, -208, -209, -210, -211, -212, + -213, -214, -215, -216, -217, -218, -219, -220, + -221, -222, -223, -224, -225, -226, -227, -228, + -229, -230, -231, -232, -233, -234, -235, -236, + -237, -238, -239, -240, -241, -242, -243, -244, + -245, -246, -247, -248, -249, -250, -251, -252, + -253, -254, -255, -256, -257, -258, -259, -260, + -261, -262, -263, -264, -265, -266, -267, -268, + -269, -270, -271, -272, -273, -274, -275, -276, + -277, -278, -279, -280, -281, -282, -283, -284, + -285, -286, -287, -288, -289, -290, -291, -292, + -293, -294, -295, -296, -297, -298, -299, -300, + -301, -302, -303, -304, -305, -306, -307, -308, + -309, -310, -311, -312, -313, -314, -315, -316, + -317, -318, -319, -320, -321, -322, -323, -324, + -325, -326, -327, -328, -329, -330, -331, -332, + -333, -334, -335, -336, -337, -338, -339, -340, + -341, -342, -343, -344, -345, -346, -347, -348, + -349, -350, -351, -352, -353, -354, -355, -356, + -357, -358, -359, -360, -361, -362, -363, -364, + -365, -366, -367, -368, -369, -370, -371, -372, + -373, -374, -375, -376, -377, -378, -379, -380, + -381, -382, -383, -384, -385, -386, -387, -388, + -389, -390, -391, -392, -393, -394, -395, -396, + -397, -398, -399, -400, -401, -402, -403, -404, + -405, -406, -407, -408, -409, -410, -411, -412, + -413, -414, -415, -416, -417, -418, -419, -420, + -421, -422, -423, -424, -425, -426, -427, -428, + -429, -430, -431, -432, -433, -434, -435, -436, + -437, -438, -439, -440, -441, -442, -443, -444, + -445, -446, -447, -448, -449, -450, -451, -452, + -453, -454, -455, -456, -457, -458, -459, -460, + -461, -462, -463, -464, -465, -466, -467, -468, + -469, -470, -471, -472, -473, -474, -475, -476, + -477, -478, -479, -480, -481, -482, -483, -484, + -485, -486, -487, -488, -489, -490, -491, -492, + -493, -494, -495, -496, -497, -498, -499, -500, + -501, -502, -503, -504, -505, -506, -507, -508, + -509, -510, -511, -512, -513, -514, -515, -516, + -517, -518, -519, -520, -521, -522, -523, -524, + -525, -526, -527, -528, -529, -530, -531, -532, + -533, -534, -535, -536, -537, -538, -539, -540, + -541, -542, -543, -544, -545, -546, -547, -548, + -549, -550, -551, -552, -553, -554, -555, -556, + -557, -558, -559, -560, -561, -562, -563, -564, + -565, -566, -567, -568, -569, -570, -571, -572, + -573, -574, -575, -576, -577, -578, -579, -580 +}; + +static const int16_t *coeff_tables[32] = { + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + coeff_table_token_7_8, + + coeff_table_token_7_8, + coeff_table_token_9, + coeff_table_token_10, + coeff_table_token_11, + coeff_table_token_12, + coeff_table_token_13, + coeff_table_token_14, + coeff_table_token_15, + + coeff_table_token_16, + coeff_table_token_17, + coeff_table_token_18, + coeff_table_token_19, + coeff_table_token_20, + coeff_table_token_21, + coeff_table_token_22, + coeff_table_token_23_24_25_26_27_28_29, + + coeff_table_token_23_24_25_26_27_28_29, + coeff_table_token_23_24_25_26_27_28_29, + coeff_table_token_23_24_25_26_27_28_29, + coeff_table_token_23_24_25_26_27_28_29, + coeff_table_token_23_24_25_26_27_28_29, + coeff_table_token_23_24_25_26_27_28_29, + coeff_table_token_30, + coeff_table_token_31 +}; static const uint16_t dc_bias[16][32][2] = { { /* DC bias table 0 */ diff --git a/src/libffmpeg/libavcodec/vp3dsp.c b/src/libffmpeg/libavcodec/vp3dsp.c index 9c9530d05..015f57b57 100644 --- a/src/libffmpeg/libavcodec/vp3dsp.c +++ b/src/libffmpeg/libavcodec/vp3dsp.c @@ -36,30 +36,17 @@ #define xC6S2 25080 #define xC7S1 12785 -void vp3_dsp_init_c(void) +static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type) { - /* nop */ -} - -void vp3_idct_c(int16_t *input_data, int16_t *dequant_matrix, - int coeff_count, int16_t *output_data) -{ - int32_t dequantized_data[64]; - int32_t *ip = dequantized_data; - int16_t *op = output_data; + int16_t *ip = input; + uint8_t *cm = cropTbl + MAX_NEG_CROP; - int32_t A_, B_, C_, D_, _Ad, _Bd, _Cd, _Dd, E_, F_, G_, H_; - int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd; - int32_t t1, t2; + int A_, B_, C_, D_, _Ad, _Bd, _Cd, _Dd, E_, F_, G_, H_; + int _Ed, _Gd, _Add, _Bdd, _Fd, _Hd; + int t1, t2; int i, j; - - /* de-zigzag and dequantize */ - for (i = 0; i < coeff_count; i++) { - j = dezigzag_index[i]; - dequantized_data[j] = dequant_matrix[i] * input_data[i]; - } - + /* Inverse DCT on the rows now */ for (i = 0; i < 8; i++) { /* Check for non-zero values */ @@ -132,28 +119,28 @@ void vp3_idct_c(int16_t *input_data, int16_t *dequant_matrix, _Hd = _Bd + H_; /* Final sequence of operations over-write original inputs. */ - ip[0] = (int16_t)((_Gd + _Cd ) >> 0); - ip[7] = (int16_t)((_Gd - _Cd ) >> 0); + ip[0] = _Gd + _Cd ; + ip[7] = _Gd - _Cd ; - ip[1] = (int16_t)((_Add + _Hd ) >> 0); - ip[2] = (int16_t)((_Add - _Hd ) >> 0); + ip[1] = _Add + _Hd; + ip[2] = _Add - _Hd; - ip[3] = (int16_t)((_Ed + _Dd ) >> 0); - ip[4] = (int16_t)((_Ed - _Dd ) >> 0); + ip[3] = _Ed + _Dd ; + ip[4] = _Ed - _Dd ; - ip[5] = (int16_t)((_Fd + _Bdd ) >> 0); - ip[6] = (int16_t)((_Fd - _Bdd ) >> 0); + ip[5] = _Fd + _Bdd; + ip[6] = _Fd - _Bdd; } ip += 8; /* next row */ } - - ip = dequantized_data; + + ip = input; for ( i = 0; i < 8; i++) { /* Check for non-zero values (bitwise or faster than ||) */ - if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] | + if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] | ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) { t1 = (int32_t)(xC1S7 * ip[1*8]); @@ -223,37 +210,103 @@ void vp3_idct_c(int16_t *input_data, int16_t *dequant_matrix, _Fd = F_ - _Ad; _Hd = _Bd + H_; + if(type==1){ //HACK + _Gd += 16*128; + _Add+= 16*128; + _Ed += 16*128; + _Fd += 16*128; + } _Gd += IdctAdjustBeforeShift; _Add += IdctAdjustBeforeShift; _Ed += IdctAdjustBeforeShift; _Fd += IdctAdjustBeforeShift; /* Final sequence of operations over-write original inputs. */ - op[0*8] = (int16_t)((_Gd + _Cd ) >> 4); - op[7*8] = (int16_t)((_Gd - _Cd ) >> 4); - - op[1*8] = (int16_t)((_Add + _Hd ) >> 4); - op[2*8] = (int16_t)((_Add - _Hd ) >> 4); - - op[3*8] = (int16_t)((_Ed + _Dd ) >> 4); - op[4*8] = (int16_t)((_Ed - _Dd ) >> 4); - - op[5*8] = (int16_t)((_Fd + _Bdd ) >> 4); - op[6*8] = (int16_t)((_Fd - _Bdd ) >> 4); + if(type==0){ + ip[0*8] = (_Gd + _Cd ) >> 4; + ip[7*8] = (_Gd - _Cd ) >> 4; + + ip[1*8] = (_Add + _Hd ) >> 4; + ip[2*8] = (_Add - _Hd ) >> 4; + + ip[3*8] = (_Ed + _Dd ) >> 4; + ip[4*8] = (_Ed - _Dd ) >> 4; + + ip[5*8] = (_Fd + _Bdd ) >> 4; + ip[6*8] = (_Fd - _Bdd ) >> 4; + }else if(type==1){ + dst[0*stride] = cm[(_Gd + _Cd ) >> 4]; + dst[7*stride] = cm[(_Gd - _Cd ) >> 4]; + + dst[1*stride] = cm[(_Add + _Hd ) >> 4]; + dst[2*stride] = cm[(_Add - _Hd ) >> 4]; + + dst[3*stride] = cm[(_Ed + _Dd ) >> 4]; + dst[4*stride] = cm[(_Ed - _Dd ) >> 4]; + + dst[5*stride] = cm[(_Fd + _Bdd ) >> 4]; + dst[6*stride] = cm[(_Fd - _Bdd ) >> 4]; + }else{ + dst[0*stride] = cm[dst[0*stride] + ((_Gd + _Cd ) >> 4)]; + dst[7*stride] = cm[dst[7*stride] + ((_Gd - _Cd ) >> 4)]; + + dst[1*stride] = cm[dst[1*stride] + ((_Add + _Hd ) >> 4)]; + dst[2*stride] = cm[dst[2*stride] + ((_Add - _Hd ) >> 4)]; + + dst[3*stride] = cm[dst[3*stride] + ((_Ed + _Dd ) >> 4)]; + dst[4*stride] = cm[dst[4*stride] + ((_Ed - _Dd ) >> 4)]; + + dst[5*stride] = cm[dst[5*stride] + ((_Fd + _Bdd ) >> 4)]; + dst[6*stride] = cm[dst[6*stride] + ((_Fd - _Bdd ) >> 4)]; + } } else { - - op[0*8] = 0; - op[7*8] = 0; - op[1*8] = 0; - op[2*8] = 0; - op[3*8] = 0; - op[4*8] = 0; - op[5*8] = 0; - op[6*8] = 0; + if(type==0){ + ip[0*8] = + ip[1*8] = + ip[2*8] = + ip[3*8] = + ip[4*8] = + ip[5*8] = + ip[6*8] = + ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20); + }else if(type==1){ + dst[0*stride]= + dst[1*stride]= + dst[2*stride]= + dst[3*stride]= + dst[4*stride]= + dst[5*stride]= + dst[6*stride]= + dst[7*stride]= 128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20); + }else{ + if(ip[0*8]){ + int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20); + dst[0*stride] = cm[dst[0*stride] + v]; + dst[1*stride] = cm[dst[1*stride] + v]; + dst[2*stride] = cm[dst[2*stride] + v]; + dst[3*stride] = cm[dst[3*stride] + v]; + dst[4*stride] = cm[dst[4*stride] + v]; + dst[5*stride] = cm[dst[5*stride] + v]; + dst[6*stride] = cm[dst[6*stride] + v]; + dst[7*stride] = cm[dst[7*stride] + v]; + } + } } ip++; /* next column */ - op++; + dst++; } } + +void ff_vp3_idct_c(DCTELEM *block/* align 16*/){ + idct(NULL, 0, block, 0); +} + +void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ + idct(dest, line_size, block, 1); +} + +void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ + idct(dest, line_size, block, 2); +} diff --git a/src/libffmpeg/libavcodec/wmv2.c b/src/libffmpeg/libavcodec/wmv2.c index 949d7c640..cbe5de215 100644 --- a/src/libffmpeg/libavcodec/wmv2.c +++ b/src/libffmpeg/libavcodec/wmv2.c @@ -587,11 +587,10 @@ static inline int wmv2_decode_inter_block(Wmv2Context *w, DCTELEM *block, int n, static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int stride, int n){ MpegEncContext * const s= &w->s; + if (s->block_last_index[n] >= 0) { switch(w->abt_type_table[n]){ case 0: - if (s->block_last_index[n] >= 0) { - s->dsp.idct_add (dst, stride, block1); - } + s->dsp.idct_add (dst, stride, block1); break; case 1: simple_idct84_add(dst , stride, block1); @@ -606,6 +605,7 @@ static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int st default: av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n"); } + } } void ff_wmv2_add_mb(MpegEncContext *s, DCTELEM block1[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr){ -- cgit v1.2.3