From e69ac0f8052424e3ce344365b6be0c95f0e68db6 Mon Sep 17 00:00:00 2001 From: Miguel Freitas Date: Tue, 29 Oct 2002 16:29:16 +0000 Subject: sync to ffmpeg cvs CVS patchset: 3090 CVS date: 2002/10/29 16:29:16 --- src/libffmpeg/libavcodec/Makefile.am | 7 +- src/libffmpeg/libavcodec/alpha/Makefile.am | 5 +- src/libffmpeg/libavcodec/alpha/asm.h | 38 +- src/libffmpeg/libavcodec/alpha/dsputil_alpha.c | 6 +- src/libffmpeg/libavcodec/alpha/motion_est_alpha.c | 2 + .../libavcodec/alpha/motion_est_mvi_asm.S | 186 ++ src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c | 27 +- src/libffmpeg/libavcodec/alpha/pixops.h | 135 - src/libffmpeg/libavcodec/armv4l/dsputil_arm.c | 2 +- src/libffmpeg/libavcodec/avcodec.h | 678 ++++- src/libffmpeg/libavcodec/common.c | 34 +- src/libffmpeg/libavcodec/common.h | 254 +- src/libffmpeg/libavcodec/dsputil.c | 346 ++- src/libffmpeg/libavcodec/dsputil.h | 98 +- src/libffmpeg/libavcodec/dv.c | 673 +++++ src/libffmpeg/libavcodec/dvdata.h | 907 +++++++ src/libffmpeg/libavcodec/error_resilience.c | 885 +++++++ src/libffmpeg/libavcodec/eval.c | 50 +- src/libffmpeg/libavcodec/fft.c | 229 ++ src/libffmpeg/libavcodec/h263.c | 2638 +++++++++++--------- src/libffmpeg/libavcodec/h263dec.c | 452 ++-- src/libffmpeg/libavcodec/i386/Makefile.am | 3 +- src/libffmpeg/libavcodec/i386/dsputil_mmx.c | 115 +- src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h | 8 +- src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h | 18 +- src/libffmpeg/libavcodec/i386/fdct_mmx.c | 192 +- src/libffmpeg/libavcodec/i386/fft_sse.c | 128 + src/libffmpeg/libavcodec/i386/idct_mmx.c | 413 +-- src/libffmpeg/libavcodec/i386/motion_est_mmx.c | 42 +- src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c | 748 +++--- .../libavcodec/i386/mpegvideo_mmx_template.c | 279 ++- src/libffmpeg/libavcodec/i386/simple_idct_mmx.c | 139 +- src/libffmpeg/libavcodec/imgconvert.c | 36 + src/libffmpeg/libavcodec/mdct.c | 170 ++ src/libffmpeg/libavcodec/mjpeg.c | 366 +-- src/libffmpeg/libavcodec/mlib/dsputil_mlib.c | 22 +- src/libffmpeg/libavcodec/motion_est.c | 38 +- src/libffmpeg/libavcodec/mpeg12.c | 698 ++++-- src/libffmpeg/libavcodec/mpeg12data.h | 4 +- src/libffmpeg/libavcodec/mpeg4data.h | 8 +- src/libffmpeg/libavcodec/mpegvideo.c | 1318 ++++++---- src/libffmpeg/libavcodec/mpegvideo.h | 224 +- src/libffmpeg/libavcodec/msmpeg4.c | 116 +- src/libffmpeg/libavcodec/msmpeg4data.h | 10 +- src/libffmpeg/libavcodec/ppc/dsputil_ppc.c | 1 - src/libffmpeg/libavcodec/ratecontrol.c | 202 +- src/libffmpeg/libavcodec/rv10.c | 173 +- src/libffmpeg/libavcodec/simple_idct.c | 341 +-- src/libffmpeg/libavcodec/simple_idct.h | 6 +- src/libffmpeg/libavcodec/svq1.c | 3 +- src/libffmpeg/libavcodec/utils.c | 55 +- src/libffmpeg/libavcodec/wmadata.h | 1409 +++++++++++ src/libffmpeg/libavcodec/wmadec.c | 1339 ++++++++++ 53 files changed, 11879 insertions(+), 4397 deletions(-) create mode 100644 src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S delete mode 100644 src/libffmpeg/libavcodec/alpha/pixops.h create mode 100644 src/libffmpeg/libavcodec/dv.c create mode 100644 src/libffmpeg/libavcodec/dvdata.h create mode 100644 src/libffmpeg/libavcodec/error_resilience.c create mode 100644 src/libffmpeg/libavcodec/fft.c create mode 100644 src/libffmpeg/libavcodec/i386/fft_sse.c create mode 100644 src/libffmpeg/libavcodec/mdct.c create mode 100644 src/libffmpeg/libavcodec/wmadata.h create mode 100644 src/libffmpeg/libavcodec/wmadec.c diff --git a/src/libffmpeg/libavcodec/Makefile.am b/src/libffmpeg/libavcodec/Makefile.am index de1ca8227..b5bb4d7f4 100644 --- a/src/libffmpeg/libavcodec/Makefile.am +++ b/src/libffmpeg/libavcodec/Makefile.am @@ -16,13 +16,17 @@ noinst_LTLIBRARIES = libavcodec.la libavcodec_la_SOURCES = \ common.c \ dsputil.c \ + dv.c \ + error_resilience.c \ eval.c \ + fft.c \ h263.c \ h263dec.c \ imgconvert.c \ jfdctfst.c \ jfdctint.c \ jrevdct.c \ + mdct.c \ mem.c \ mjpeg.c \ motion_est.c \ @@ -33,7 +37,8 @@ libavcodec_la_SOURCES = \ rv10.c \ simple_idct.c \ svq1.c \ - utils.c + utils.c \ + wmadec.c #imgresample.c libavcodec_la_LDFLAGS = \ diff --git a/src/libffmpeg/libavcodec/alpha/Makefile.am b/src/libffmpeg/libavcodec/alpha/Makefile.am index 84ddab118..8b9553319 100644 --- a/src/libffmpeg/libavcodec/alpha/Makefile.am +++ b/src/libffmpeg/libavcodec/alpha/Makefile.am @@ -1,8 +1,9 @@ EXTRA_DIST = asm.h \ dsputil_alpha.c \ mpegvideo_alpha.c \ - pixops.h \ - motion_est_alpha.c + motion_est_alpha.c \ + motion_est_mvi_asm.S \ + regdef.h all: debug: diff --git a/src/libffmpeg/libavcodec/alpha/asm.h b/src/libffmpeg/libavcodec/alpha/asm.h index 2fdbdf13d..c2983125a 100644 --- a/src/libffmpeg/libavcodec/alpha/asm.h +++ b/src/libffmpeg/libavcodec/alpha/asm.h @@ -22,6 +22,21 @@ #include +#if defined __GNUC__ +# define GNUC_PREREQ(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#else +# define GNUC_PREREQ(maj, min) 0 +#endif + +#if GNUC_PREREQ(2,96) +# define likely(x) __builtin_expect((x) != 0, 1) +# define unlikely(x) __builtin_expect((x) != 0, 0) +#else +# define likely(x) (x) +# define unlikely(x) (x) +#endif + #define AMASK_BWX (1 << 0) #define AMASK_FIX (1 << 1) #define AMASK_CIX (1 << 2) @@ -45,6 +60,7 @@ inline static uint64_t WORD_VEC(uint64_t x) #define ldl(p) (*(const int32_t *) (p)) #define stl(l, p) do { *(uint32_t *) (p) = (l); } while (0) #define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0) +#define sextw(x) ((int16_t) (x)) #ifdef __GNUC__ #define ASM_ACCEPT_MVI asm (".arch pca56") @@ -52,10 +68,26 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); #define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul)) #define uldq(a) (((const struct unaligned_long *) (a))->l) -#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 3 +#if GNUC_PREREQ(3,0) +/* Unfortunately, __builtin_prefetch is slightly buggy on Alpha. The + defines here are kludged so we still get the right + instruction. This needs to be adapted as soon as gcc is fixed. */ +# define prefetch(p) __builtin_prefetch((p), 0, 1) +# define prefetch_en(p) __builtin_prefetch((p), 1, 1) +# define prefetch_m(p) __builtin_prefetch((p), 0, 0) +# define prefetch_men(p) __builtin_prefetch((p), 1, 0) +#else +# define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory") +# define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory") +# define prefetch_m(p) asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory") +# define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory") +#endif + +#if GNUC_PREREQ(3,3) #define cmpbge __builtin_alpha_cmpbge /* Avoid warnings. */ #define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b)) +#define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b)) #define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b)) #define zap __builtin_alpha_zap #define zapnot __builtin_alpha_zapnot @@ -78,6 +110,7 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); #else #define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) #define extql(a, b) ({ uint64_t __r; asm ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define extwl(a, b) ({ uint64_t __r; asm ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) #define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) #define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) #define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) @@ -99,7 +132,7 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); #define unpkbw(a) ({ uint64_t __r; asm ("unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) #endif -#elif defined(__DECC) /* Digital/Compaq "ccc" compiler */ +#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */ #include #define ASM_ACCEPT_MVI @@ -107,6 +140,7 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); #define uldq(a) (*(const __unaligned uint64_t *) (a)) #define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b) #define extql(a, b) asm ("extql %a0,%a1,%v0", a, b) +#define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b) #define extqh(a, b) asm ("extqh %a0,%a1,%v0", a, b) #define zap(a, b) asm ("zap %a0,%a1,%v0", a, b) #define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b) diff --git a/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c index fef86fe64..371e20269 100644 --- a/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c +++ b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c @@ -20,8 +20,6 @@ #include "asm.h" #include "../dsputil.h" -void simple_idct_axp(DCTELEM *block); - void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, int line_size, int h); void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, @@ -34,7 +32,7 @@ void get_pixels_mvi(DCTELEM *restrict block, void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride); int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); -int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); +int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size); int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); @@ -335,7 +333,7 @@ void dsputil_init_alpha(void) get_pixels = get_pixels_mvi; diff_pixels = diff_pixels_mvi; pix_abs8x8 = pix_abs8x8_mvi; - pix_abs16x16 = pix_abs16x16_mvi; + pix_abs16x16 = pix_abs16x16_mvi_asm; pix_abs16x16_x2 = pix_abs16x16_x2_mvi; pix_abs16x16_y2 = pix_abs16x16_y2_mvi; pix_abs16x16_xy2 = pix_abs16x16_xy2_mvi; diff --git a/src/libffmpeg/libavcodec/alpha/motion_est_alpha.c b/src/libffmpeg/libavcodec/alpha/motion_est_alpha.c index b0968d104..804e1d2b6 100644 --- a/src/libffmpeg/libavcodec/alpha/motion_est_alpha.c +++ b/src/libffmpeg/libavcodec/alpha/motion_est_alpha.c @@ -117,6 +117,7 @@ int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) return result; } +#if 0 /* now done in assembly */ int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) { int result = 0; @@ -157,6 +158,7 @@ int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) return result; } +#endif int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) { diff --git a/src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S b/src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S new file mode 100644 index 000000000..0042e7e82 --- /dev/null +++ b/src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S @@ -0,0 +1,186 @@ +/* + * Alpha optimized DSP utils + * Copyright (c) 2002 Falk Hueffner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "regdef.h" +#ifdef HAVE_AV_CONFIG_H +#include "config.h" +#endif + +/* Some nicer register names. */ +#define ta t10 +#define tb t11 +#define tc t12 +#define td AT +/* Danger: these overlap with the argument list and the return value */ +#define te a5 +#define tf a4 +#define tg a3 +#define th v0 + + .set noat + .set noreorder + .arch pca56 + .text + +/***************************************************************************** + * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) + * + * This code is written with a pca56 in mind. For ev6, one should + * really take the increased latency of 3 cycles for MVI instructions + * into account. + * + * It is important to keep the loading and first use of a register as + * far apart as possible, because if a register is accessed before it + * has been fetched from memory, the CPU will stall. + */ + .align 4 + .globl pix_abs16x16_mvi_asm + .ent pix_abs16x16_mvi_asm +pix_abs16x16_mvi_asm: + .frame sp, 0, ra, 0 + .prologue 0 + +#ifdef HAVE_GPROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + + and a1, 7, t0 + clr v0 + lda a3, 16 + beq t0, $aligned + .align 4 +$unaligned: + /* Registers: + line 0: + t0: left_u -> left lo -> left + t1: mid + t2: right_u -> right hi -> right + t3: ref left + t4: ref right + line 1: + t5: left_u -> left lo -> left + t6: mid + t7: right_u -> right hi -> right + t8: ref left + t9: ref right + temp: + ta: left hi + tb: right lo + tc: error left + td: error right */ + + /* load line 0 */ + ldq_u t0, 0(a1) # left_u + ldq_u t1, 8(a1) # mid + ldq_u t2, 16(a1) # right_u + ldq t3, 0(a0) # ref left + ldq t4, 8(a0) # ref right + addq a0, a2, a0 # pix1 + addq a1, a2, a1 # pix2 + /* load line 1 */ + ldq_u t5, 0(a1) # left_u + ldq_u t6, 8(a1) # mid + ldq_u t7, 16(a1) # right_u + ldq t8, 0(a0) # ref left + ldq t9, 8(a0) # ref right + addq a0, a2, a0 # pix1 + addq a1, a2, a1 # pix2 + /* calc line 0 */ + extql t0, a1, t0 # left lo + extqh t1, a1, ta # left hi + extql t1, a1, tb # right lo + or t0, ta, t0 # left + extqh t2, a1, t2 # right hi + perr t3, t0, tc # error left + or t2, tb, t2 # right + perr t4, t2, td # error right + addq v0, tc, v0 # add error left + addq v0, td, v0 # add error left + /* calc line 1 */ + extql t5, a1, t5 # left lo + extqh t6, a1, ta # left hi + extql t6, a1, tb # right lo + or t5, ta, t5 # left + extqh t7, a1, t7 # right hi + perr t8, t5, tc # error left + or t7, tb, t7 # right + perr t9, t7, td # error right + addq v0, tc, v0 # add error left + addq v0, td, v0 # add error left + /* loop */ + subq a3, 2, a3 # h -= 2 + bne a3, $unaligned + ret + + .align 4 +$aligned: + /* load line 0 */ + ldq t0, 0(a1) # left + ldq t1, 8(a1) # right + addq a1, a2, a1 # pix2 + ldq t2, 0(a0) # ref left + ldq t3, 8(a0) # ref right + addq a0, a2, a0 # pix1 + /* load line 1 */ + ldq t4, 0(a1) # left + ldq t5, 8(a1) # right + addq a1, a2, a1 # pix2 + ldq t6, 0(a0) # ref left + ldq t7, 8(a0) # ref right + addq a0, a2, a0 # pix1 + /* load line 2 */ + ldq t8, 0(a1) # left + ldq t9, 8(a1) # right + addq a1, a2, a1 # pix2 + ldq ta, 0(a0) # ref left + ldq tb, 8(a0) # ref right + addq a0, a2, a0 # pix1 + /* load line 3 */ + ldq tc, 0(a1) # left + ldq td, 8(a1) # right + addq a1, a2, a1 # pix2 + ldq te, 0(a0) # ref left + ldq tf, 8(a0) # ref right + /* calc line 0 */ + perr t0, t2, t0 # error left + addq a0, a2, a0 # pix1 + perr t1, t3, t1 # error right + addq v0, t0, v0 # add error left + /* calc line 1 */ + perr t4, t6, t0 # error left + addq v0, t1, v0 # add error right + perr t5, t7, t1 # error right + addq v0, t0, v0 # add error left + /* calc line 2 */ + perr t8, ta, t0 # error left + addq v0, t1, v0 # add error right + perr t9, tb, t1 # error right + addq v0, t0, v0 # add error left + /* calc line 3 */ + perr tc, te, t0 # error left + addq v0, t1, v0 # add error right + perr td, tf, t1 # error right + addq v0, t0, v0 # add error left + addq v0, t1, v0 # add error right + /* loop */ + subq a3, 4, a3 # h -= 4 + bne a3, $aligned + ret + .end pix_abs16x16_mvi_asm diff --git a/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c b/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c index 0be327079..350b53f62 100644 --- a/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c +++ b/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c @@ -21,8 +21,9 @@ #include "../dsputil.h" #include "../mpegvideo.h" -extern UINT8 zigzag_end[64]; - +extern void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block); +extern void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block); + static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block, int n, int qscale) { @@ -32,24 +33,26 @@ static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block, DCTELEM *orig_block = block; DCTELEM block0; + qadd = WORD_VEC((qscale - 1) | 1); + qmul = qscale << 1; + /* This mask kills spill from negative subwords to the next subword. */ + correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */ + if (s->mb_intra) { if (!s->h263_aic) { if (n < 4) block0 = block[0] * s->y_dc_scale; else block0 = block[0] * s->c_dc_scale; - } - n_coeffs = 64; // does not always use zigzag table + } else { + qadd = 0; + } + n_coeffs = 63; // does not always use zigzag table } else { - n_coeffs = zigzag_end[s->block_last_index[n]]; + n_coeffs = s->intra_scantable.raster_end[s->block_last_index[n]]; } - qmul = qscale << 1; - qadd = WORD_VEC((qscale - 1) | 1); - /* This mask kills spill from negative subwords to the next subword. */ - correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */ - - for(i = 0; i < n_coeffs; block += 4, i += 4) { + for(i = 0; i <= n_coeffs; block += 4, i += 4) { uint64_t levels, negmask, zeros, add; levels = ldq(block); @@ -94,4 +97,6 @@ static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block, void MPV_common_init_axp(MpegEncContext *s) { s->dct_unquantize_h263 = dct_unquantize_h263_axp; + s->idct_put = simple_idct_put_axp; + s->idct_add = simple_idct_add_axp; } diff --git a/src/libffmpeg/libavcodec/alpha/pixops.h b/src/libffmpeg/libavcodec/alpha/pixops.h deleted file mode 100644 index 118d7ae23..000000000 --- a/src/libffmpeg/libavcodec/alpha/pixops.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Alpha optimized DSP utils - * Copyright (c) 2002 Falk Hueffner - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* This file is intended to be #included with proper definitions of - * PIXOPNAME, BTYPE, AVG2, AVG4 and STORE. */ - -static void PIXOPNAME(_pixels_axp)(BTYPE *block, const UINT8 *pixels, - int line_size, int h) -{ - if ((size_t) pixels & 0x7) { - do { - STORE(uldq(pixels), block); - pixels += line_size; - block += line_size; - } while (--h); - } else { - do { - STORE(ldq(pixels), block); - pixels += line_size; - block += line_size; - } while (--h); - } -} - -static void PIXOPNAME(_pixels_x2_axp)(BTYPE *block, const UINT8 *pixels, - int line_size, int h) -{ - if ((size_t) pixels & 0x7) { - do { - UINT64 pix1, pix2; - - pix1 = uldq(pixels); - pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56); - STORE(AVG2(pix1, pix2), block); - pixels += line_size; - block += line_size; - } while (--h); - } else { - do { - UINT64 pix1, pix2; - - pix1 = ldq(pixels); - pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56); - STORE(AVG2(pix1, pix2), block); - pixels += line_size; - block += line_size; - } while (--h); - } -} - -static void PIXOPNAME(_pixels_y2_axp)(BTYPE *block, const UINT8 *pixels, - int line_size, int h) -{ - if ((size_t) pixels & 0x7) { - UINT64 pix = uldq(pixels); - do { - UINT64 next_pix; - - pixels += line_size; - next_pix = uldq(pixels); - STORE(AVG2(pix, next_pix), block); - block += line_size; - pix = next_pix; - } while (--h); - } else { - UINT64 pix = ldq(pixels); - do { - UINT64 next_pix; - - pixels += line_size; - next_pix = ldq(pixels); - STORE(AVG2(pix, next_pix), block); - block += line_size; - pix = next_pix; - } while (--h); - } -} - -/* This could be further sped up by recycling AVG4 intermediate - results from the previous loop pass. */ -static void PIXOPNAME(_pixels_xy2_axp)(BTYPE *block, const UINT8 *pixels, - int line_size, int h) -{ - if ((size_t) pixels & 0x7) { - UINT64 pix1 = uldq(pixels); - UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56); - - do { - UINT64 next_pix1, next_pix2; - - pixels += line_size; - next_pix1 = uldq(pixels); - next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56); - - STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block); - - block += line_size; - pix1 = next_pix1; - pix2 = next_pix2; - } while (--h); - } else { - UINT64 pix1 = ldq(pixels); - UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56); - - do { - UINT64 next_pix1, next_pix2; - - pixels += line_size; - next_pix1 = ldq(pixels); - next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56); - - STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block); - - block += line_size; - pix1 = next_pix1; - pix2 = next_pix2; - } while (--h); - } -} diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c b/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c index cd362ca48..66358b38d 100644 --- a/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c +++ b/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c @@ -23,5 +23,5 @@ extern void j_rev_dct_ARM(DCTELEM *data); void dsputil_init_armv4l(void) { - ff_idct = j_rev_dct_ARM; +// ff_idct = j_rev_dct_ARM; } diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h index 8b59ce653..9f80071aa 100644 --- a/src/libffmpeg/libavcodec/avcodec.h +++ b/src/libffmpeg/libavcodec/avcodec.h @@ -5,11 +5,11 @@ #define LIBAVCODEC_VERSION_INT 0x000406 #define LIBAVCODEC_VERSION "0.4.6" -#define LIBAVCODEC_BUILD 4623 -#define LIBAVCODEC_BUILD_STR "4623" +#define LIBAVCODEC_BUILD 4632 +#define LIBAVCODEC_BUILD_STR "4632" enum CodecID { - CODEC_ID_NONE, + CODEC_ID_NONE, CODEC_ID_MPEG1VIDEO, CODEC_ID_H263, CODEC_ID_RV10, @@ -28,6 +28,10 @@ enum CodecID { CODEC_ID_H263P, CODEC_ID_H263I, CODEC_ID_SVQ1, + CODEC_ID_DVVIDEO, + CODEC_ID_DVAUDIO, + CODEC_ID_WMAV1, + CODEC_ID_WMAV2, /* various pcm "codecs" */ CODEC_ID_PCM_S16LE, @@ -62,7 +66,8 @@ enum PixelFormat { PIX_FMT_YUV444P, PIX_FMT_RGBA32, PIX_FMT_BGRA32, - PIX_FMT_YUV410P + PIX_FMT_YUV410P, + PIX_FMT_YUV411P }; /* currently unused, may be used if 24/32 bits samples ever supported */ @@ -71,7 +76,7 @@ enum SampleFormat { }; /* in bytes */ -#define AVCODEC_MAX_AUDIO_FRAME_SIZE 18432 +#define AVCODEC_MAX_AUDIO_FRAME_SIZE 131072 /* motion estimation type, EPZS by default */ enum Motion_Est_ID { @@ -86,7 +91,7 @@ enum Motion_Est_ID { typedef struct RcOverride{ int start_frame; int end_frame; - int qscale; /* if this is 0 then quality_factor will be used instead */ + int qscale; // if this is 0 then quality_factor will be used instead float quality_factor; } RcOverride; @@ -94,36 +99,41 @@ typedef struct RcOverride{ extern int motion_estimation_method; /* ME algos sorted by quality */ -static const int Motion_Est_QTab[] = { ME_ZERO, ME_PHODS, ME_LOG, +static const int Motion_Est_QTab[] = { ME_ZERO, ME_PHODS, ME_LOG, ME_X1, ME_EPZS, ME_FULL }; #define FF_MAX_B_FRAMES 4 -/* encoding support */ -/* note not everything is supported yet */ - -#define CODEC_FLAG_HQ 0x0001 /* high quality (non real time) encoding */ -#define CODEC_FLAG_QSCALE 0x0002 /* use fixed qscale */ -#define CODEC_FLAG_4MV 0x0004 /* 4 MV per MB allowed */ -#define CODEC_FLAG_QPEL 0x0010 /* use qpel MC */ -#define CODEC_FLAG_GMC 0x0020 /* use GMC */ -#define CODEC_FLAG_TYPE 0x0040 /* fixed I/P frame type, from avctx->key_frame */ -#define CODEC_FLAG_PART 0x0080 /* use data partitioning */ -/* parent program gurantees that the input for b-frame containing streams is not written to +/* encoding support + these flags can be passed in AVCodecContext.flags before initing + Note: note not everything is supported yet +*/ + +#define CODEC_FLAG_HQ 0x0001 /* brute force MB-type decission mode (slow) */ +#define CODEC_FLAG_QSCALE 0x0002 /* use fixed qscale */ +#define CODEC_FLAG_4MV 0x0004 /* 4 MV per MB allowed */ +#define CODEC_FLAG_QPEL 0x0010 /* use qpel MC */ +#define CODEC_FLAG_GMC 0x0020 /* use GMC */ +#define CODEC_FLAG_TYPE 0x0040 /* fixed I/P frame type, from avctx->key_frame */ +#define CODEC_FLAG_PART 0x0080 /* use data partitioning */ +/* parent program gurantees that the input for b-frame containing streams is not written to for at least s->max_b_frames+1 frames, if this is not set than the input will be copied */ #define CODEC_FLAG_INPUT_PRESERVED 0x0100 -#define CODEC_FLAG_PASS1 0x0200 /* use internal 2pass ratecontrol in first pass mode */ -#define CODEC_FLAG_PASS2 0x0400 /* use internal 2pass ratecontrol in second pass mode */ +#define CODEC_FLAG_PASS1 0x0200 /* use internal 2pass ratecontrol in first pass mode */ +#define CODEC_FLAG_PASS2 0x0400 /* use internal 2pass ratecontrol in second pass mode */ #define CODEC_FLAG_EXTERN_HUFF 0x1000 /* use external huffman table (for mjpeg) */ -#define CODEC_FLAG_GRAY 0x2000 /* only decode/encode grayscale */ +#define CODEC_FLAG_GRAY 0x2000 /* only decode/encode grayscale */ #define CODEC_FLAG_EMU_EDGE 0x4000/* dont draw edges */ -#define CODEC_FLAG_DR1 0x8000 /* dr1 */ -#define CODEC_FLAG_NOT_TRUNCATED 0x00010000 /* input bitstream is not truncated, except before a startcode */ +#define CODEC_FLAG_DR1 0x8000 /* direct renderig type 1 (store internal frames in external buffers) */ +#define CODEC_FLAG_NOT_TRUNCATED 0x00010000 /* input bitstream is not truncated, except before a startcode + allows the last part of a frame to be decoded earlier */ +#define CODEC_FLAG_NORMALIZE_AQP 0x00020000 /* normalize adaptive quantization */ +#define CODEC_FLAG_INTERLACED_DCT 0x00040000 /* use interlaced dct */ + /* codec capabilities */ -/* decoder can use draw_horiz_band callback */ -#define CODEC_CAP_DRAW_HORIZ_BAND 0x0001 +#define CODEC_CAP_DRAW_HORIZ_BAND 0x0001 /* decoder can use draw_horiz_band callback */ #define CODEC_CAP_DR1 0x0002 /* direct rendering method 1 */ /* if 'parse_only' field is true, then avcodec_parse_frame() can be used */ @@ -132,23 +142,73 @@ static const int Motion_Est_QTab[] = { ME_ZERO, ME_PHODS, ME_LOG, #define FRAME_RATE_BASE 10000 typedef struct AVCodecContext { + /** + * the average bitrate + * encoding: set by user. unused for constant quantizer encoding + * decoding: set by lavc. 0 or some bitrate if this info is available in the stream + */ int bit_rate; - int bit_rate_tolerance; /* amount of +- bits (>0)*/ - int flags; - int sub_id; /* some codecs needs additionnal format info. It is - stored there */ - int me_method; /* ME algorithm used for video coding */ + /** + * number of bits the bitstream is allowed to diverge from the reference + * the reference can be CBR (for CBR pass1) or VBR (for pass2) + * encoding: set by user. unused for constant quantizer encoding + * decoding: unused + */ + int bit_rate_tolerance; + + /** + * CODEC_FLAG_* + * encoding: set by user. + * decoding: set by user. + */ + int flags; - /* extra data from parent application to codec, e.g. huffman table - for mjpeg */ - /* the parent should allocate and free this buffer */ + /** + * some codecs needs additionnal format info. It is stored here + * encoding: set by user. + * decoding: set by lavc. (FIXME is this ok?) + */ + int sub_id; + + /** + * motion estimation algorithm used for video coding + * encoding: set by user. + * decoding: unused + */ + int me_method; + + /** + * some codecs need / can use extra-data like huffman tables + * mjpeg: huffman tables + * rv10: additional flags + * mpeg4: global headers (they can be in the bitstream or here) + * encoding: set/allocated/freed by lavc. + * decoding: set/allocated/freed by user. + */ void *extradata; int extradata_size; - + /* video only */ - int frame_rate; /* frames per sec multiplied by FRAME_RATE_BASE */ + /** + * frames per sec multiplied by FRAME_RATE_BASE + * for variable fps this is the precission, so if the timestamps + * can be specified in msec precssion then this is 1000*FRAME_RATE_BASE + * encoding: set by user + * decoding: set by lavc. 0 or the frame_rate if available + */ + int frame_rate; + + /** + * encoding: set by user. + * decoding: set by user, some codecs might override / change it during playback + */ int width, height; + + /** + * encoding: set by user. 0 if not known + * decoding: set by lavc. 0 if not known + */ int aspect_ratio_info; #define FF_ASPECT_SQUARE 1 #define FF_ASPECT_4_3_625 2 @@ -156,15 +216,33 @@ typedef struct AVCodecContext { #define FF_ASPECT_16_9_625 4 #define FF_ASPECT_16_9_525 5 #define FF_ASPECT_EXTENDED 15 - int gop_size; /* 0 = intra only */ - enum PixelFormat pix_fmt; /* pixel format, see PIX_FMT_xxx */ + + /** + * the number of pictures in a group of pitures, or 0 for intra_only + * encoding: set by user. + * decoding: unused + */ + int gop_size; + + /** + * pixel format, see PIX_FMT_xxx + * encoding: unused + * decoding: set by lavc. + */ + enum PixelFormat pix_fmt; + int repeat_pict; /* when decoding, this signal how much the picture */ /* must be delayed. */ /* extra_delay = (repeat_pict / 2) * (1/fps) */ - /* if non NULL, 'draw_horiz_band' is called by the libavcodec - decoder to draw an horizontal band. It improve cache usage. Not - all codecs can do that. You must check the codec capabilities - before */ + + /** + * if non NULL, 'draw_horiz_band' is called by the libavcodec + * decoder to draw an horizontal band. It improve cache usage. Not + * all codecs can do that. You must check the codec capabilities + * before + * encoding: unused + * decoding: set by user. + */ void (*draw_horiz_band)(struct AVCodecContext *s, UINT8 **src_ptr, int linesize, int y, int width, int height); @@ -179,40 +257,103 @@ typedef struct AVCodecContext { int frame_number; /* audio or video frame number */ int real_pict_num; /* returns the real picture number of previous encoded frame */ - int key_frame; /* true if the previous compressed frame was - a key frame (intra, or seekable) */ - int pict_type; /* picture type of the previous - en/decoded frame */ + + /** + * 1 -> keyframe, 0-> not + * encoding: set by lavc (for the outputed bitstream, not the input frame) + * decoding: set by lavc (for the decoded bitstream, not the displayed frame) + */ + int key_frame; + + /** + * picture type of the previous en/decoded frame, see ?_TYPE below + * encoding: set by lavc (for the outputed bitstream, not the input frame) + * decoding: set by lavc (for the decoded bitstream, not the displayed frame) + */ + int pict_type; /* FIXME: these should have FF_ */ -#define I_TYPE 1 /* Intra */ -#define P_TYPE 2 /* Predicted */ -#define B_TYPE 3 /* Bi-dir predicted */ -#define S_TYPE 4 /* S(GMC)-VOP MPEG4 */ - - int delay; /* number of frames the decoded output - will be delayed relative to the encoded input */ - uint8_t *mbskip_table; /* =1 if MB didnt change, is only valid for I/P frames - stride= mb_width = (width+15)>>4 */ - +#define I_TYPE 1 // Intra +#define P_TYPE 2 // Predicted +#define B_TYPE 3 // Bi-dir predicted +#define S_TYPE 4 // S(GMC)-VOP MPEG4 + + /** + * number of frames the decoded output will be delayed relative to + * the encoded input + * encoding: set by lavc. + * decoding: unused + */ + int delay; + + /** + * mbskip_table[mb]=1 if MB didnt change, is only valid for I/P frames + * stride= mb_width = (width+15)>>4 (FIXME export stride?) + * encoding: unused + * decoding: set by lavc + */ + uint8_t *mbskip_table; + /* encoding parameters */ - int quality; /* quality of the previous encoded frame - (between 1 (good) and 31 (bad)) + /** + * quality (between 1 (good) and 31 (bad)) + * encoding: set by user if CODEC_FLAG_QSCALE is set otherwise set by lavc + * decoding: set by lavc + */ + int quality; /* quality of the previous encoded frame + this is allso used to set the quality in vbr mode and the per frame quality in CODEC_FLAG_TYPE (second pass mode) */ float qcompress; /* amount of qscale change between easy & hard scenes (0.0-1.0)*/ float qblur; /* amount of qscale smoothing over time (0.0-1.0) */ - int qmin; /* min qscale */ - int qmax; /* max qscale */ - int max_qdiff; /* max qscale difference between frames */ - int max_b_frames; /* maximum b frames, the output will be delayed by max_b_frames+1 relative to the input */ - float b_quant_factor;/* qscale factor between ps and b frames */ - int rc_strategy; /* obsolete FIXME remove */ + + /** + * minimum quantizer + * encoding: set by user. + * decoding: unused + */ + int qmin; + + /** + * maximum quantizer + * encoding: set by user. + * decoding: unused + */ + int qmax; + + /** + * maximum quantizer difference etween frames + * encoding: set by user. + * decoding: unused + */ + int max_qdiff; + + /** + * maximum number of b frames between non b frames + * note: the output will be delayed by max_b_frames+1 relative to the input + * encoding: set by user. + * decoding: unused + */ + int max_b_frames; + + /** + * qscale factor between ip and b frames + * encoding: set by user. + * decoding: unused + */ + float b_quant_factor; + + /** obsolete FIXME remove */ + int rc_strategy; int b_frame_strategy; - int hurry_up; /* when set to 1 during decoding, b frames will be skiped - when set to 2 idct/dequant will be skipped too */ - + /** + * encoding: unused + * decoding: set by user. 1-> skip b frames, 2-> skip idct/dequant too + */ + int hurry_up; + struct AVCodec *codec; + void *priv_data; /* The following data is for RTP friendly coding */ @@ -220,7 +361,7 @@ typedef struct AVCodecContext { int rtp_mode; /* 1 for activate RTP friendly-mode */ /* highers numbers represent more error-prone */ /* enviroments, by now just "1" exist */ - + int rtp_payload_size; /* The size of the RTP payload, the coder will */ /* do it's best to deliver a chunk with size */ /* below rtp_payload_size, the chunk will start */ @@ -228,20 +369,24 @@ typedef struct AVCodecContext { /* This doesn't take account of any particular */ /* headers inside the transmited RTP payload */ - + /* The RTP callcack: This function is called */ /* every time the encoder as a packet to send */ /* Depends on the encoder if the data starts */ /* with a Start Code (it should) H.263 does */ - void (*rtp_callback)(void *data, int size, int packet_number); - - /* These are for PSNR calculation, if you set get_psnr to 1 */ - /* after encoding you will have the PSNR on psnr_y/cb/cr */ + void (*rtp_callback)(void *data, int size, int packet_number); + + /** + * if you set get_psnr to 1 then after encoding you will have the + * PSNR on psnr_y/cb/cr + * encoding: set by user (1-> on, 0-> off) + * decoding: unused + */ int get_psnr; float psnr_y; float psnr_cb; float psnr_cr; - + /* statistics, used for 2-pass encoding */ int mv_bits; int header_bits; @@ -250,67 +395,251 @@ typedef struct AVCodecContext { int i_count; int p_count; int skip_count; - int misc_bits; /* cbp, mb_type */ + int misc_bits; + + /** + * number of bits used for the previously encoded frame + * encoding: set by lavc + * decoding: unused + */ int frame_bits; + + /** + * private data of the user, can be used to carry app specific stuff + * encoding: set by user + * decoding: set by user + */ + void *opaque; - /* the following fields are ignored */ - void *opaque; /* can be used to carry app specific stuff */ char codec_name[32]; enum CodecType codec_type; /* see CODEC_TYPE_xxx */ enum CodecID codec_id; /* see CODEC_ID_xxx */ unsigned int codec_tag; /* codec tag, only used if unknown codec */ - - int workaround_bugs; /* workaround bugs in encoders which cannot be detected automatically */ + + /** + * workaround bugs in encoders which sometimes cannot be detected automatically + * encoding: unused + * decoding: set by user + */ + int workaround_bugs; +#define FF_BUG_AUTODETECT 1 //autodetection +#define FF_BUG_OLD_MSMPEG4 2 +#define FF_BUG_XVID_ILACE 4 +#define FF_BUG_UMP4 8 +#define FF_BUG_NO_PADDING 16 +#define FF_BUG_AC_VLC 32 +#define FF_BUG_QPEL_CHROMA 64 +//#define FF_BUG_FAKE_SCALABILITY 16 //autodetection should work 100% + + /** + * encoding: set by user + * decoding: unused + */ int luma_elim_threshold; + + /** + * encoding: set by user + * decoding: unused + */ int chroma_elim_threshold; - int strict_std_compliance; /* strictly follow the std (MPEG4, ...) */ - float b_quant_offset;/* qscale offset between ips and b frames, not implemented yet */ + + /** + * strictly follow the std (MPEG4, ...) + * encoding: set by user + * decoding: unused + */ + int strict_std_compliance; + + /** + * qscale offset between ip and b frames + * if > 0 then the last p frame quantizer will be used (q= lastp_q*factor+offset) + * if < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset) + * encoding: set by user. + * decoding: unused + */ + float b_quant_offset; + + /** + * error resilience {-1,0,1} higher values will detect more errors but may missdetect + * some more or less valid parts as errors + * encoding: unused + * decoding: set by user + */ int error_resilience; - + #ifndef MBC #define MBC 128 #define MBR 96 #endif -#define QP_TYPE int /* FIXME note xxx this might be changed to int8_t */ +#define QP_TYPE int //FIXME note xxx this might be changed to int8_t QP_TYPE *quant_store; /* field for communicating with external postprocessing */ unsigned qstride; + + /** + * buffer, where the next picture should be decoded into + * encoding: unused + * decoding: set by user in get_buffer_callback to a buffer into which the next part + * of the bitstream will be decoded, and set by lavc at end of frame to the + * next frame which needs to be displayed + */ uint8_t *dr_buffer[3]; + + /** + * stride of the luminance part of the dr buffer + * encoding: unused + * decoding: set by user + */ int dr_stride; + + /** + * same behavior as dr_buffer, just for some private data of the user + * encoding: unused + * decoding: set by user in get_buffer_callback, and set by lavc at end of frame + */ void *dr_opaque_frame; - void (*get_buffer_callback)(struct AVCodecContext *c, int width, int height, int pict_type); - - int has_b_frames; /* is 1 if the decoded stream contains b frames */ + + /** + * called at the beginning of each frame to get a buffer for it + * encoding: unused + * decoding: set by user + */ + int (*get_buffer_callback)(struct AVCodecContext *c, int width, int height, int pict_type); + + /** + * is 1 if the decoded stream contains b frames, 0 otherwise + * encoding: unused + * decoding: set by lavc + */ + int has_b_frames; + + /** + * stride of the chrominance part of the dr buffer + * encoding: unused + * decoding: set by user + */ int dr_uvstride; + + /** + * number of dr buffers + * encoding: unused + * decoding: set by user + */ int dr_ip_buffer_count; - int block_align; /* currently only for adpcm codec in wav/avi */ - + + int block_align; /* used by some WAV based audio codecs */ + int parse_only; /* decoding only: if true, only parsing is done (function avcodec_parse_frame()). The frame data is returned. Only MPEG codecs support this now. */ - - int mpeg_quant; /* 0-> h263 quant 1-> mpeg quant */ - + + /** + * 0-> h263 quant 1-> mpeg quant + * encoding: set by user. + * decoding: unused + */ + int mpeg_quant; + + /** + * pass1 encoding statistics output buffer + * encoding: set by lavc + * decoding: unused + */ char *stats_out; /* encoding statistics output buffer */ - char *stats_in; /* encoding statistics input buffer (concatenated stuff from stats_out of pass1 should be placed here)*/ + + /** + * pass2 encoding statistics input buffer. + * concatenated stuff from stats_out of pass1 should be placed here + * encoding: allocated/set/freed by user + * decoding: unused + */ + char *stats_in; + + /** + * ratecontrol qmin qmax limiting method + * 0-> clipping, 1-> use a nice continous function to limit qscale wthin qmin/qmax + * encoding: set by user. + * decoding: unused + */ float rc_qsquish; + float rc_qmod_amp; int rc_qmod_freq; + + /** + * ratecontrol override, see RcOverride + * encoding: allocated/set/freed by user. + * decoding: unused + */ RcOverride *rc_override; int rc_override_count; + + /** + * rate control equation + * encoding: set by user + * decoding: unused + */ char *rc_eq; + + /** + * maximum bitrate + * encoding: set by user. + * decoding: unused + */ int rc_max_rate; + + /** + * minimum bitrate + * encoding: set by user. + * decoding: unused + */ int rc_min_rate; + + /** + * decoder bitstream buffer size + * encoding: set by user. + * decoding: unused + */ int rc_buffer_size; float rc_buffer_aggressivity; - float i_quant_factor;/* qscale factor between i and p frames */ - float i_quant_offset;/* qscale offset between i and p frames */ + + /** + * qscale factor between p and i frames + * encoding: set by user. + * decoding: unused + */ + float i_quant_factor; + + /** + * qscale offset between p and i frames + * if > 0 then the last p frame quantizer will be used (q= lastp_q*factor+offset) + * if < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset) + * encoding: set by user. + * decoding: unused + */ + float i_quant_offset; + + /** + * initial complexity for pass1 ratecontrol + * encoding: set by user. + * decoding: unused + */ float rc_initial_cplx; + /** + * custom aspect ratio, used if aspect_info==FF_ASPECT_EXTENDED + * encoding: set by user. + * decoding: set by lavc. + */ int aspected_width; int aspected_height; + /** + * dct algorithm, see FF_DCT_* below + * encoding: set by user + * decoding: unused + */ int dct_algo; #define FF_DCT_AUTO 0 #define FF_DCT_FASTINT 1 @@ -318,21 +647,107 @@ typedef struct AVCodecContext { #define FF_DCT_MMX 3 #define FF_DCT_MLIB 4 - long long int pts; /* timestamp in micro seconds - for decoding: the timestamp from the stream or 0 - for encoding: the timestamp which will be stored in the stream - if 0 then the frame_rate will be used */ - - /* FIXME this should be reordered after kabis API is finished ... */ - /* TODO kill kabi */ + /** + * presentation timestamp in micro seconds (time when frame should be shown to user) + * if 0 then the frame_rate will be used as reference + * encoding: set by user + * decoding; set by lavc + */ + long long int pts; + + /** + * luminance masking (0-> disabled) + * encoding: set by user + * decoding: unused + */ + float lumi_masking; + + /** + * temporary complexity masking (0-> disabled) + * encoding: set by user + * decoding: unused + */ + float temporal_cplx_masking; + + /** + * spatial complexity masking (0-> disabled) + * encoding: set by user + * decoding: unused + */ + float spatial_cplx_masking; + + /** + * p block masking (0-> disabled) + * encoding: set by user + * decoding: unused + */ + float p_masking; + + /** + * darkness masking (0-> disabled) + * encoding: set by user + * decoding: unused + */ + float dark_masking; + + /** + * fourcc (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A') + * this is used to workaround some encoder bugs + * encoding: unused + * decoding: set by user, will be converted to upper case by lavc during init + */ + int fourcc; + + /** + * idct algorithm, see FF_IDCT_* below + * encoding: set by user + * decoding: set by user + */ + int idct_algo; +#define FF_IDCT_AUTO 0 +#define FF_IDCT_INT 1 +#define FF_IDCT_SIMPLE 2 +#define FF_IDCT_SIMPLEMMX 3 +#define FF_IDCT_LIBMPEG2MMX 4 +#define FF_IDCT_PS2 5 +#define FF_IDCT_MLIB 6 +#define FF_IDCT_ARM 7 + + /** + * slice count + * encoding: set by lavc + * decoding: set by user (or 0) + */ + int slice_count; + /** + * slice offsets in the frame in bytes + * encoding: set/allocated by lavc + * decoding: set/allocated by user (or NULL) + */ + int *slice_offset; + + /** + * error concealment flags + * encoding: unused + * decoding: set by user + */ + int error_concealment; +#define FF_EC_GUESS_MVS 1 +#define FF_EC_DEBLOCK 2 + + //FIXME this should be reordered after kabis API is finished ... + //TODO kill kabi /* Note: Below are located reserved fields for further usage It requires for ABI !!! If you'll perform some changes then borrow new space from these fields (void * can be safety replaced with struct * ;) P L E A S E ! ! ! - IMPORTANT: Never change order of already declared fields!!! - */ + Note: use avcodec_alloc_context instead of malloc to allocate this, + otherwise the ABI compatibility will be broken between versions + IMPORTANT: Never change order of already declared fields!!! + */ + //TODO: remove mess below unsigned long long int ull_res0,ull_res1,ull_res2,ull_res3,ull_res4,ull_res5, ull_res6,ull_res7,ull_res8,ull_res9,ull_res10,ull_res11; @@ -420,6 +835,10 @@ extern AVCodec mpeg_decoder; extern AVCodec h263i_decoder; extern AVCodec rv10_decoder; extern AVCodec svq1_decoder; +extern AVCodec dvvideo_decoder; +extern AVCodec dvaudio_decoder; +extern AVCodec wmav1_decoder; +extern AVCodec wmav2_decoder; extern AVCodec mjpeg_decoder; extern AVCodec mp2_decoder; extern AVCodec mp3_decoder; @@ -458,7 +877,7 @@ struct ReSampleContext; typedef struct ReSampleContext ReSampleContext; -ReSampleContext *audio_resample_init(int output_channels, int input_channels, +ReSampleContext *audio_resample_init(int output_channels, int input_channels, int output_rate, int input_rate); int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples); void audio_resample_close(ReSampleContext *s); @@ -477,7 +896,7 @@ ImgReSampleContext *img_resample_full_init(int owidth, int oheight, int topBand, int bottomBand, int leftBand, int rightBand); -void img_resample(ImgReSampleContext *s, +void img_resample(ImgReSampleContext *s, AVPicture *output, AVPicture *input); void img_resample_close(ImgReSampleContext *s); @@ -488,7 +907,7 @@ int avpicture_get_size(int pix_fmt, int width, int height); /* convert among pixel formats */ int img_convert(AVPicture *dst, int dst_pix_fmt, - AVPicture *src, int pix_fmt, + AVPicture *src, int pix_fmt, int width, int height); /* deinterlace a picture */ @@ -514,19 +933,20 @@ AVCodec *avcodec_find_decoder(enum CodecID id); AVCodec *avcodec_find_decoder_by_name(const char *name); void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode); +AVCodecContext *avcodec_alloc_context(void); int avcodec_open(AVCodecContext *avctx, AVCodec *codec); -int avcodec_decode_audio(AVCodecContext *avctx, INT16 *samples, +int avcodec_decode_audio(AVCodecContext *avctx, INT16 *samples, int *frame_size_ptr, UINT8 *buf, int buf_size); -int avcodec_decode_video(AVCodecContext *avctx, AVPicture *picture, +int avcodec_decode_video(AVCodecContext *avctx, AVPicture *picture, int *got_picture_ptr, UINT8 *buf, int buf_size); -int avcodec_parse_frame(AVCodecContext *avctx, UINT8 **pdata, +int avcodec_parse_frame(AVCodecContext *avctx, UINT8 **pdata, int *data_size_ptr, UINT8 *buf, int buf_size); -int avcodec_encode_audio(AVCodecContext *avctx, UINT8 *buf, int buf_size, +int avcodec_encode_audio(AVCodecContext *avctx, UINT8 *buf, int buf_size, const short *samples); -int avcodec_encode_video(AVCodecContext *avctx, UINT8 *buf, int buf_size, +int avcodec_encode_video(AVCodecContext *avctx, UINT8 *buf, int buf_size, const AVPicture *pict); int avcodec_close(AVCodecContext *avctx); @@ -536,7 +956,7 @@ void avcodec_register_all(void); void avcodec_flush_buffers(AVCodecContext *avctx); #ifdef FF_POSTPROCESS -extern int quant_store[MBR+1][MBC+1]; /* [Review] */ +extern int quant_store[MBR+1][MBC+1]; // [Review] #endif @@ -547,9 +967,9 @@ extern int quant_store[MBR+1][MBC+1]; /* [Review] */ */ typedef struct { - /* compressed size used from given memory buffer */ + // compressed size used from given memory buffer int size; - /* I/P/B frame type */ + /// I/P/B frame type int frame_type; } avc_enc_result_t; @@ -558,46 +978,46 @@ typedef struct { * order can't be changed - once it was defined */ typedef enum { - /* general commands */ + // general commands AVC_OPEN_BY_NAME = 0xACA000, AVC_OPEN_BY_CODEC_ID, AVC_OPEN_BY_FOURCC, AVC_CLOSE, AVC_FLUSH, - /* pin - struct { uint8_t* src, uint_t src_size } */ - /* pout - struct { AVPicture* img, consumed_bytes, */ + // pin - struct { uint8_t* src, uint_t src_size } + // pout - struct { AVPicture* img, consumed_bytes, AVC_DECODE, - /* pin - struct { AVPicture* img, uint8_t* dest, uint_t dest_size } */ - /* pout - uint_t used_from_dest_size */ - AVC_ENCODE, + // pin - struct { AVPicture* img, uint8_t* dest, uint_t dest_size } + // pout - uint_t used_from_dest_size + AVC_ENCODE, - /* query/get video commands */ + // query/get video commands AVC_GET_VERSION = 0xACB000, AVC_GET_WIDTH, AVC_GET_HEIGHT, AVC_GET_DELAY, AVC_GET_QUANT_TABLE, - /* ... */ + // ... - /* query/get audio commands */ + // query/get audio commands AVC_GET_FRAME_SIZE = 0xABC000, - /* maybe define some simple structure which */ - /* might be passed to the user - but they can't */ - /* contain any codec specific parts and these */ - /* calls are usualy necessary only few times */ + // maybe define some simple structure which + // might be passed to the user - but they can't + // contain any codec specific parts and these + // calls are usualy necessary only few times - /* set video commands */ + // set video commands AVC_SET_WIDTH = 0xACD000, AVC_SET_HEIGHT, - /* set video encoding commands */ + // set video encoding commands AVC_SET_FRAME_RATE = 0xACD800, AVC_SET_QUALITY, AVC_SET_HURRY_UP, - /* set audio commands */ + // set audio commands AVC_SET_SAMPLE_RATE = 0xACE000, AVC_SET_CHANNELS, diff --git a/src/libffmpeg/libavcodec/common.c b/src/libffmpeg/libavcodec/common.c index 00324edf3..fde12d927 100644 --- a/src/libffmpeg/libavcodec/common.c +++ b/src/libffmpeg/libavcodec/common.c @@ -27,7 +27,7 @@ const UINT8 ff_sqrt_tab[128]={ 9, 9, 9, 9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11 }; -void init_put_bits(PutBitContext *s, +void init_put_bits(PutBitContext *s, UINT8 *buffer, int buffer_size, void *opaque, void (*write_data)(void *, UINT8 *, int)) @@ -35,14 +35,14 @@ void init_put_bits(PutBitContext *s, s->buf = buffer; s->buf_end = s->buf + buffer_size; s->data_out_size = 0; - if(write_data!=NULL) + if(write_data!=NULL) { - fprintf(stderr, "write Data callback is not supported\n"); + fprintf(stderr, "write Data callback is not supported\n"); } #ifdef ALT_BITSTREAM_WRITER s->index=0; ((uint32_t*)(s->buf))[0]=0; -/* memset(buffer, 0, buffer_size); */ +// memset(buffer, 0, buffer_size); #else s->buf_ptr = s->buf; s->bit_left=32; @@ -146,7 +146,7 @@ void init_get_bits(GetBitContext *s, { OPEN_READER(re, s) UPDATE_CACHE(re, s) -/* UPDATE_CACHE(re, s) */ +// UPDATE_CACHE(re, s) CLOSE_READER(re, s) } #ifdef A32_BITSTREAM_READER @@ -170,7 +170,7 @@ int check_marker(GetBitContext *s, char *msg) /* VLC decoding */ -/* #define DEBUG_VLC */ +//#define DEBUG_VLC #define GET_DATA(v, table, i, wrap, size) \ {\ @@ -225,8 +225,8 @@ static int build_table(VLC *vlc, int table_nb_bits, table = &vlc->table[table_index]; for(i=0;i n1) n1 = n; - table[j][1] = -n1; /* bits */ + table[j][1] = -n1; //bits } } } /* second pass : fill auxillary tables recursively */ for(i=0;i table_nb_bits) { n = table_nb_bits; - table[i][1] = -n; /* bits */ + table[i][1] = -n; //bits } index = build_table(vlc, n, nb_codes, bits, bits_wrap, bits_size, @@ -293,7 +293,7 @@ static int build_table(VLC *vlc, int table_nb_bits, return -1; /* note: realloc has been done, so reload tables */ table = &vlc->table[table_index]; - table[i][0] = index; /* code */ + table[i][0] = index; //code } } return table_index; @@ -305,7 +305,7 @@ static int build_table(VLC *vlc, int table_nb_bits, 'nb_bits' set thee decoding table size (2^nb_bits) entries. The bigger it is, the faster is the decoding. But it should not be too big to save memory and L1 cache. '9' is a good compromise. - + 'nb_codes' : number of vlcs codes 'bits' : table which gives the size (in bits) of each vlc code. @@ -319,7 +319,7 @@ static int build_table(VLC *vlc, int table_nb_bits, or 'codes' tables. 'wrap' and 'size' allows to use any memory configuration and types - (byte/word/long) to store the 'bits' and 'codes' tables. + (byte/word/long) to store the 'bits' and 'codes' tables. */ int init_vlc(VLC *vlc, int nb_bits, int nb_codes, const void *bits, int bits_wrap, int bits_size, diff --git a/src/libffmpeg/libavcodec/common.h b/src/libffmpeg/libavcodec/common.h index 451c18248..8b8c7453b 100644 --- a/src/libffmpeg/libavcodec/common.h +++ b/src/libffmpeg/libavcodec/common.h @@ -5,45 +5,45 @@ #define FFMPEG_VERSION "0.4.6" #if defined(WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) -#define CONFIG_WIN32 +# define CONFIG_WIN32 #endif -/* #define ALT_BITSTREAM_WRITER */ -/* #define ALIGNED_BITSTREAM_WRITER */ +//#define ALT_BITSTREAM_WRITER +//#define ALIGNED_BITSTREAM_WRITER #define ALT_BITSTREAM_READER -/* #define LIBMPEG2_BITSTREAM_READER */ -/* #define A32_BITSTREAM_READER */ +//#define LIBMPEG2_BITSTREAM_READER +//#define A32_BITSTREAM_READER #ifdef HAVE_AV_CONFIG_H /* only include the following when compiling package */ -#include "config.h" - -#include -#include -#include -#ifndef __BEOS__ -#include -#else -#include "berrno.h" -#endif -#include - -#ifndef ENODATA -#define ENODATA 61 -#endif +# include "config.h" + +# include +# include +# include +# ifndef __BEOS__ +# include +# else +# include "berrno.h" +# endif +# include + +# ifndef ENODATA +# define ENODATA 61 +# endif #endif /* HAVE_AV_CONFIG_H */ /* Suppress restrict if it was not defined in config.h. */ #ifndef restrict -#define restrict +# define restrict #endif #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) -#define always_inline __attribute__((always_inline)) inline +# define always_inline __attribute__((always_inline)) inline #else -#define always_inline inline +# define always_inline inline #endif #ifdef CONFIG_WIN32 @@ -68,37 +68,37 @@ typedef INT32 int32_t; typedef UINT64 uint64_t; typedef INT64 int64_t; -#ifndef __MINGW32__ -#define INT64_C(c) (c ## i64) -#define UINT64_C(c) (c ## i64) +# ifndef __MINGW32__ +# define INT64_C(c) (c ## i64) +# define UINT64_C(c) (c ## i64) -#define inline __inline +# define inline __inline -#else -#define INT64_C(c) (c ## LL) -#define UINT64_C(c) (c ## ULL) -#endif /* __MINGW32__ */ +# else +# define INT64_C(c) (c ## LL) +# define UINT64_C(c) (c ## ULL) +# endif /* __MINGW32__ */ -#define M_PI 3.14159265358979323846 -#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ +# define M_PI 3.14159265358979323846 +# define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ -#ifdef _DEBUG -#define DEBUG -#endif +# ifdef _DEBUG +# define DEBUG +# endif -#define snprintf _snprintf +# define snprintf _snprintf #else /* CONFIG_WIN32 */ /* unix */ -#include +# include -#ifndef __WINE_WINDEF16_H +# ifndef __WINE_WINDEF16_H /* workaround for typedef conflict in MPlayer (wine typedefs) */ typedef unsigned short UINT16; typedef signed short INT16; -#endif +# endif typedef unsigned char UINT8; typedef unsigned int UINT32; @@ -107,45 +107,45 @@ typedef signed char INT8; typedef signed int INT32; typedef signed long long INT64; -#ifdef HAVE_AV_CONFIG_H +# ifdef HAVE_AV_CONFIG_H -#ifndef INT64_C -#define INT64_C(c) (c ## LL) -#define UINT64_C(c) (c ## ULL) -#endif +# ifndef INT64_C +# define INT64_C(c) (c ## LL) +# define UINT64_C(c) (c ## ULL) +# endif -#ifdef USE_FASTMEMCPY -#include "fastmemcpy.h" -#endif +# ifdef USE_FASTMEMCPY +# include "fastmemcpy.h" +# endif -#endif /* HAVE_AV_CONFIG_H */ +# endif /* HAVE_AV_CONFIG_H */ #endif /* !CONFIG_WIN32 */ #ifdef HAVE_AV_CONFIG_H -#include "bswap.h" +# include "bswap.h" -#if defined(__MINGW32__) || defined(__CYGWIN__) || \ - defined(__OS2__) || defined (__OpenBSD__) -#define MANGLE(a) "_" #a -#else -#define MANGLE(a) #a -#endif +# if defined(__MINGW32__) || defined(__CYGWIN__) || \ + defined(__OS2__) || defined (__OpenBSD__) +# define MANGLE(a) "_" #a +# else +# define MANGLE(a) #a +# endif /* debug stuff */ -#ifndef DEBUG -#define NDEBUG -#endif -#include +# ifndef DEBUG +# define NDEBUG +# endif +# include /* dprintf macros */ -#if defined(CONFIG_WIN32) && !defined(__MINGW32__) +# if defined(CONFIG_WIN32) && !defined(__MINGW32__) inline void dprintf(const char* fmt,...) {} -#else +# else #if __GNUC__ #ifdef DEBUG @@ -161,10 +161,12 @@ inline void dprintf(const char* fmt,...) {} #endif #endif -#endif /* !CONFIG_WIN32 */ +# endif /* !CONFIG_WIN32 */ -#define av_abort() do { fprintf(stderr, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0) +# define av_abort() do { fprintf(stderr, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0) +//rounded divison & shift +#define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b)) /* assume b>0 */ #define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b)) #define ABS(a) ((a) >= 0 ? (a) : (-(a))) @@ -172,24 +174,24 @@ inline void dprintf(const char* fmt,...) {} #define MIN(a,b) ((a) > (b) ? (b) : (a)) #ifdef ARCH_X86 -/* avoid +32 for shift optimization (gcc should do that ...) */ +// avoid +32 for shift optimization (gcc should do that ...) static inline int32_t NEG_SSR32( int32_t a, int8_t s){ - __asm__ ("sarl %1, %0\n\t" + asm ("sarl %1, %0\n\t" : "+r" (a) : "ic" ((uint8_t)(-s)) ); return a; } static inline uint32_t NEG_USR32(uint32_t a, int8_t s){ - __asm__ ("shrl %1, %0\n\t" + asm ("shrl %1, %0\n\t" : "+r" (a) : "ic" ((uint8_t)(-s)) ); return a; } #else -#define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s))) -#define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s))) +# define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s))) +# define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s))) #endif /* bit output */ @@ -210,7 +212,7 @@ typedef struct PutBitContext { INT64 data_out_size; /* in bytes */ } PutBitContext; -void init_put_bits(PutBitContext *s, +void init_put_bits(PutBitContext *s, UINT8 *buffer, int buffer_size, void *opaque, void (*write_data)(void *, UINT8 *, int)); @@ -248,7 +250,7 @@ static inline int get_bits_count(GetBitContext *s); typedef struct VLC { int bits; - VLC_TYPE (*table)[2]; /* code, bits */ + VLC_TYPE (*table)[2]; // code, bits int table_size, table_allocated; } VLC; @@ -260,9 +262,9 @@ typedef struct RL_VLC_ELEM { /* used to avoid missaligned exceptions on some archs (alpha, ...) */ #ifdef ARCH_X86 -#define unaligned32(a) (*(UINT32*)(a)) +# define unaligned32(a) (*(UINT32*)(a)) #else -#ifdef __GNUC__ +# ifdef __GNUC__ static inline uint32_t unaligned32(const void *v) { struct Unaligned { uint32_t i; @@ -270,16 +272,16 @@ static inline uint32_t unaligned32(const void *v) { return ((const struct Unaligned *) v)->i; } -#elif defined(__DECC) +# elif defined(__DECC) static inline uint32_t unaligned32(const void *v) { return *(const __unaligned uint32_t *) v; } -#else +# else static inline uint32_t unaligned32(const void *v) { return *(const uint32_t *) v; } -#endif -#endif /* !ARCH_X86 */ +# endif +#endif //!ARCH_X86 #ifndef ALT_BITSTREAM_WRITER static inline void put_bits(PutBitContext *s, int n, unsigned int value) @@ -290,13 +292,13 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) #ifdef STATS st_out_bit_counts[st_current_index] += n; #endif - /* printf("put_bits=%d %x\n", n, value); */ + // printf("put_bits=%d %x\n", n, value); assert(n == 32 || value < (1U << n)); - + bit_buf = s->bit_buf; bit_left = s->bit_left; - /* printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf); */ + // printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf); /* XXX: optimize */ if (n < bit_left) { bit_buf = (bit_buf<> (n - bit_left); *(UINT32 *)s->buf_ptr = be2me_32(bit_buf); - /* printf("bitbuf = %08x\n", bit_buf); */ + //printf("bitbuf = %08x\n", bit_buf); s->buf_ptr+=4; bit_left+=32 - n; bit_buf = value; @@ -320,8 +322,8 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) #ifdef ALT_BITSTREAM_WRITER static inline void put_bits(PutBitContext *s, int n, unsigned int value) { -#ifdef ALIGNED_BITSTREAM_WRITER -#ifdef ARCH_X86 +# ifdef ALIGNED_BITSTREAM_WRITER +# ifdef ARCH_X86 asm volatile( "movl %0, %%ecx \n\t" "xorl %%eax, %%eax \n\t" @@ -339,20 +341,20 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) : "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n)) : "%eax", "%ecx" ); -#else +# else int index= s->index; uint32_t *ptr= ((uint32_t *)s->buf)+(index>>5); - - value<<= 32-n; - + + value<<= 32-n; + ptr[0] |= be2me_32(value>>(index&31)); ptr[1] = be2me_32(value<<(32-(index&31))); -/* if(n>24) printf("%d %d\n", n, value); */ +//if(n>24) printf("%d %d\n", n, value); index+= n; s->index= index; -#endif -#else /* ALIGNED_BITSTREAM_WRITER */ -#ifdef ARCH_X86 +# endif +# else //ALIGNED_BITSTREAM_WRITER +# ifdef ARCH_X86 asm volatile( "movl $7, %%ecx \n\t" "andl %0, %%ecx \n\t" @@ -369,17 +371,17 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) : "r" (s->buf), "r" (n), "0" (s->index), "1" (value) : "%ecx" ); -#else +# else int index= s->index; uint32_t *ptr= (uint32_t*)(((uint8_t *)s->buf)+(index>>3)); - + ptr[0] |= be2me_32(value<<(32-n-(index&7) )); ptr[1] = 0; -/* if(n>24) printf("%d %d\n", n, value); */ +//if(n>24) printf("%d %d\n", n, value); index+= n; s->index= index; -#endif -#endif /* !ALIGNED_BITSTREAM_WRITER */ +# endif +# endif //!ALIGNED_BITSTREAM_WRITER } #endif @@ -389,13 +391,13 @@ static inline void jput_bits(PutBitContext *s, int n, unsigned int value) { unsigned int bit_buf, b; int bit_left, i; - + assert(n == 32 || value < (1U << n)); bit_buf = s->bit_buf; bit_left = s->bit_left; - /* printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf); */ + //printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf); /* XXX: optimize */ if (n < bit_left) { bit_buf = (bit_buf<bit_buf = bit_buf; s->bit_left = bit_left; } @@ -428,15 +430,15 @@ static inline void jput_bits(PutBitContext *s, int n, int value) int index= s->index; uint32_t *ptr= (uint32_t*)(((uint8_t *)s->buf)+(index>>3)); int v= ptr[0]; -/* if(n>24) printf("%d %d\n", n, value); */ - +//if(n>24) printf("%d %d\n", n, value); + v |= be2me_32(value<<(32-n-(index&7) )); if(((v+0x01010101)^0xFFFFFFFF)&v&0x80808080) { /* handle idiotic (m)jpeg escapes */ uint8_t *bPtr= (uint8_t*)ptr; int numChecked= ((index+n)>>3) - (index>>3); - + v= be2me_32(v); *(bPtr++)= v>>24; @@ -542,7 +544,7 @@ for examples see get_bits, show_bits, skip_bits, get_vlc # define SKIP_CACHE(name, gb, num)\ name##_cache <<= (num);\ -/* FIXME name? */ +// FIXME name? # define SKIP_COUNTER(name, gb, num)\ name##_index += (num);\ @@ -568,7 +570,7 @@ static inline int get_bits_count(GetBitContext *s){ return s->index; } #elif defined LIBMPEG2_BITSTREAM_READER -/* libmpeg2 like reader */ +//libmpeg2 like reader # define MIN_CACHE_BITS 16 @@ -699,12 +701,12 @@ static inline unsigned int show_bits(GetBitContext *s, int n){ OPEN_READER(re, s) UPDATE_CACHE(re, s) tmp= SHOW_UBITS(re, s, n); -/* CLOSE_READER(re, s) */ +// CLOSE_READER(re, s) return tmp; } static inline void skip_bits(GetBitContext *s, int n){ -/* Note gcc seems to optimize this to s->index+=n for the ALT_READER :)) */ + //Note gcc seems to optimize this to s->index+=n for the ALT_READER :)) OPEN_READER(re, s) UPDATE_CACHE(re, s) LAST_SKIP_BITS(re, s, n) @@ -797,16 +799,16 @@ void free_vlc(VLC *vlc); SKIP_BITS(name, gb, n)\ } -/* deprecated, dont use get_vlc for new code, use get_vlc2 instead or use GET_VLC directly */ +// deprecated, dont use get_vlc for new code, use get_vlc2 instead or use GET_VLC directly static inline int get_vlc(GetBitContext *s, VLC *vlc) { int code; VLC_TYPE (*table)[2]= vlc->table; - + OPEN_READER(re, s) UPDATE_CACHE(re, s) - GET_VLC(code, re, s, table, vlc->bits, 3) + GET_VLC(code, re, s, table, vlc->bits, 3) CLOSE_READER(re, s) return code; @@ -816,7 +818,7 @@ static always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2], int bits, int max_depth) { int code; - + OPEN_READER(re, s) UPDATE_CACHE(re, s) @@ -829,7 +831,7 @@ static always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2], /* define it to include statistics code (useful only for optimizing codec efficiency */ -/* #define STATS */ +//#define STATS #ifdef STATS @@ -918,9 +920,9 @@ static inline int ff_sqrt(int a) int ret=0; int s; int ret_sq=0; - + if(a<128) return ff_sqrt_tab[a]; - + for(s=15; s>=0; s--){ int b= ret_sq + (1<<(s*2)) + (ret<>31;\ + level= (level^mask)-mask; +#endif + #if __CPU__ >= 686 && !defined(RUNTIME_CPUDETECT) #define COPY3_IF_LT(x,y,a,b,c,d)\ diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c index f98ad388a..a8578b5c7 100644 --- a/src/libffmpeg/libavcodec/dsputil.c +++ b/src/libffmpeg/libavcodec/dsputil.c @@ -20,16 +20,14 @@ */ #include "avcodec.h" #include "dsputil.h" -#include "simple_idct.h" -void (*ff_idct)(DCTELEM *block); -void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block); -void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block); void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); -void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); +void (*ff_gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); +void (*ff_gmc )(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); void (*clear_blocks)(DCTELEM *blocks); int (*pix_sum)(UINT8 * pix, int line_size); int (*pix_norm1)(UINT8 * pix, int line_size); @@ -49,16 +47,11 @@ int ff_bit_exact=0; UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; UINT32 squareTbl[512]; -extern INT16 ff_mpeg1_default_intra_matrix[64]; -extern INT16 ff_mpeg1_default_non_intra_matrix[64]; -extern INT16 ff_mpeg4_default_intra_matrix[64]; -extern INT16 ff_mpeg4_default_non_intra_matrix[64]; - -UINT8 zigzag_direct[64] = { - 0, 1, 8, 16, 9, 2, 3, 10, - 17, 24, 32, 25, 18, 11, 4, 5, +const UINT8 ff_zigzag_direct[64] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, - 27, 20, 13, 6, 7, 14, 21, 28, + 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, @@ -68,100 +61,64 @@ UINT8 zigzag_direct[64] = { /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ UINT16 __align8 inv_zigzag_direct16[64]; -/* not permutated zigzag_direct for MMX quantizer */ -UINT8 zigzag_direct_noperm[64]; - -UINT8 ff_alternate_horizontal_scan[64] = { - 0, 1, 2, 3, 8, 9, 16, 17, +const UINT8 ff_alternate_horizontal_scan[64] = { + 0, 1, 2, 3, 8, 9, 16, 17, 10, 11, 4, 5, 6, 7, 15, 14, - 13, 12, 19, 18, 24, 25, 32, 33, + 13, 12, 19, 18, 24, 25, 32, 33, 26, 27, 20, 21, 22, 23, 28, 29, - 30, 31, 34, 35, 40, 41, 48, 49, + 30, 31, 34, 35, 40, 41, 48, 49, 42, 43, 36, 37, 38, 39, 44, 45, - 46, 47, 50, 51, 56, 57, 58, 59, + 46, 47, 50, 51, 56, 57, 58, 59, 52, 53, 54, 55, 60, 61, 62, 63, }; -UINT8 ff_alternate_vertical_scan[64] = { - 0, 8, 16, 24, 1, 9, 2, 10, +const UINT8 ff_alternate_vertical_scan[64] = { + 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49, - 41, 33, 26, 18, 3, 11, 4, 12, + 41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43, - 51, 59, 20, 28, 5, 13, 6, 14, + 51, 59, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 52, 60, 37, 45, - 53, 61, 22, 30, 7, 15, 23, 31, + 53, 61, 22, 30, 7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63, }; -#ifdef SIMPLE_IDCT - -/* Input permutation for the simple_idct_mmx */ -static UINT8 simple_mmx_permutation[64]={ - 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, - 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, - 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, - 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, - 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, - 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, - 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, - 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, -}; -#endif - /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ UINT32 inverse[256]={ - 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, - 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, - 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, - 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, - 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, - 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, - 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, - 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, - 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, - 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, - 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, - 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, - 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, - 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, - 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, - 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, - 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, - 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, - 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, - 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, - 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, - 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, - 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, - 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, - 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, - 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, - 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, - 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, - 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, - 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, - 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, + 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, + 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, + 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, + 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, + 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, + 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, + 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, + 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, + 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, + 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, + 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, + 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, + 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, + 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, + 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, + 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, + 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, + 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, + 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, + 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, + 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, + 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, + 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, + 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, + 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, + 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, + 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, + 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, + 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, + 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, + 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, }; -/* used to skip zeros at the end */ -UINT8 zigzag_end[64]; - -UINT8 permutation[64]; -/* UINT8 invPermutation[64]; */ - -static void build_zigzag_end(void) -{ - int lastIndex; - int lastIndexAfterPerm=0; - for(lastIndex=0; lastIndex<64; lastIndex++) - { - if(zigzag_direct[lastIndex] > lastIndexAfterPerm) - lastIndexAfterPerm= zigzag_direct[lastIndex]; - zigzag_end[lastIndex]= lastIndexAfterPerm + 1; - } -} - int pix_sum_c(UINT8 * pix, int line_size) { int s, i, j; @@ -253,7 +210,7 @@ void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, { int i; UINT8 *cm = cropTbl + MAX_NEG_CROP; - + /* read the pixels */ for(i=0;i<8;i++) { pixels[0] = cm[block[0]]; @@ -275,7 +232,7 @@ void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, { int i; UINT8 *cm = cropTbl + MAX_NEG_CROP; - + /* read the pixels */ for(i=0;i<8;i++) { pixels[0] = cm[pixels[0] + block[0]]; @@ -458,7 +415,7 @@ void (*OPNAME ## _no_rnd_pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels }; #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) -#else /* 64 bit variant */ +#else // 64 bit variant #define PIXOP2(OPNAME, OP) \ static void OPNAME ## _pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ @@ -867,6 +824,7 @@ PIXOP(uint8_t, put_no_rnd, op_put, line_size) #define avg2(a,b) ((a+b+1)>>1) #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) + static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, int rounder) { const int A=(16-x16)*(16-y16); @@ -874,7 +832,6 @@ static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, const int C=(16-x16)*( y16); const int D=( x16)*( y16); int i; - rounder= 128 - rounder; for(i=0; i>16; + src_y= vy>>16; + frac_x= src_x&(s-1); + frac_y= src_y&(s-1); + src_x>>=shift; + src_y>>=shift; + + if((unsigned)src_x < width){ + if((unsigned)src_y < height){ + index= src_x + src_y*stride; + dst[y*stride + x]= ( ( src[index ]*(s-frac_x) + + src[index +1]* frac_x )*(s-frac_y) + + ( src[index+stride ]*(s-frac_x) + + src[index+stride+1]* frac_x )* frac_y + + r)>>(shift*2); + }else{ + index= src_x + clip(src_y, 0, height)*stride; + dst[y*stride + x]= ( ( src[index ]*(s-frac_x) + + src[index +1]* frac_x )*s + + r)>>(shift*2); + } + }else{ + if((unsigned)src_y < height){ + index= clip(src_x, 0, width) + src_y*stride; + dst[y*stride + x]= ( ( src[index ]*(s-frac_y) + + src[index+stride ]* frac_y )*s + + r)>>(shift*2); + }else{ + index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride; + dst[y*stride + x]= src[index ]; + } + } + + vx+= dxx; + vy+= dyx; + } + ox += dxy; + oy += dyy; + } +} + static inline void copy_block17(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h) { int i; @@ -1332,7 +1347,7 @@ qpel_mc_func OPNAME ## qpel_pixels_tab[2][16]={ \ QPEL_MC(0, put_ , _ , op_put) QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) QPEL_MC(0, avg_ , _ , op_avg) -/* QPEL_MC(1, avg_no_rnd , _ , op_avg) */ +//QPEL_MC(1, avg_no_rnd , _ , op_avg) #undef op_avg #undef op_avg_no_rnd #undef op_put @@ -1538,67 +1553,35 @@ int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) return s; } -/* permute block according so that it corresponds to the MMX idct - order */ -#ifdef SIMPLE_IDCT - /* general permutation, but perhaps slightly slower */ -void block_permute(INT16 *block) -{ - int i; - INT16 temp[64]; - - for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i]; - - for(i=0; i<64; i++) block[i] = temp[i]; -} -#else - -void block_permute(INT16 *block) +void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last) { - int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; int i; - - for(i=0;i<8;i++) { - tmp1 = block[1]; - tmp2 = block[2]; - tmp3 = block[3]; - tmp4 = block[4]; - tmp5 = block[5]; - tmp6 = block[6]; - block[1] = tmp2; - block[2] = tmp4; - block[3] = tmp6; - block[4] = tmp1; - block[5] = tmp3; - block[6] = tmp5; - block += 8; + INT16 temp[64]; + + if(last<=0) return; + if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms + + for(i=0; i<=last; i++){ + const int j= scantable[i]; + temp[j]= block[j]; + block[j]=0; + } + + for(i=0; i<=last; i++){ + const int j= scantable[i]; + const int perm_j= permutation[j]; + block[perm_j]= temp[j]; } } -#endif void clear_blocks_c(DCTELEM *blocks) { memset(blocks, 0, sizeof(DCTELEM)*6*64); } -/* XXX: those functions should be suppressed ASAP when all IDCTs are - converted */ -void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block) -{ - ff_idct (block); - put_pixels_clamped(block, dest, line_size); -} - -void gen_idct_add(UINT8 *dest, int line_size, DCTELEM *block) -{ - ff_idct (block); - add_pixels_clamped(block, dest, line_size); -} - void dsputil_init(void) { - int i, j; - int use_permuted_idct; + int i; for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; for(i=0;i> 1) | ((i & 1) << 2); -#endif - else - for(i=0; i<64; i++) permutation[i]=i; - - for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1; - for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i]; - - if (use_permuted_idct) { - /* permute for IDCT */ - for(i=0;i<64;i++) { - j = zigzag_direct[i]; - zigzag_direct[i] = block_permute_op(j); - j = ff_alternate_horizontal_scan[i]; - ff_alternate_horizontal_scan[i] = block_permute_op(j); - j = ff_alternate_vertical_scan[i]; - ff_alternate_vertical_scan[i] = block_permute_op(j); - } - block_permute(ff_mpeg1_default_intra_matrix); - block_permute(ff_mpeg1_default_non_intra_matrix); - block_permute(ff_mpeg4_default_intra_matrix); - block_permute(ff_mpeg4_default_non_intra_matrix); - } - - build_zigzag_end(); + for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; } /* remove any non bit exact operation (testing purpose) */ @@ -1712,14 +1650,14 @@ void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3], int quad, diff, x, y; UINT8 *orig, *coded; UINT32 *sq = squareTbl + 256; - + quad = 0; diff = 0; - + /* Luminance */ orig = orig_image[0]; coded = coded_image[0]; - + for (y=0;yheight;y++) { for (x=0;xwidth;x++) { diff = *(orig + x) - *(coded + x); @@ -1728,12 +1666,12 @@ void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3], orig += orig_linesize[0]; coded += coded_linesize; } - + avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height); - + if (avctx->psnr_y) { avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y; - avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y); + avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y); } else avctx->psnr_y = 99.99; } diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h index 899de027b..da78f54c8 100644 --- a/src/libffmpeg/libavcodec/dsputil.h +++ b/src/libffmpeg/libavcodec/dsputil.h @@ -32,15 +32,12 @@ void ff_jpeg_fdct_islow (DCTELEM *data); void j_rev_dct (DCTELEM *data); -void fdct_mmx(DCTELEM *block); +void ff_fdct_mmx(DCTELEM *block); /* encoding scans */ -extern UINT8 ff_alternate_horizontal_scan[64]; -extern UINT8 ff_alternate_vertical_scan[64]; -extern UINT8 zigzag_direct[64]; - -/* permutation table */ -extern UINT8 permutation[64]; +extern const UINT8 ff_alternate_horizontal_scan[64]; +extern const UINT8 ff_alternate_vertical_scan[64]; +extern const UINT8 ff_zigzag_direct[64]; /* pixel operations */ #define MAX_NEG_CROP 384 @@ -52,7 +49,7 @@ extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; void dsputil_init(void); /* minimum alignment rules ;) -if u notice errors in the align stuff, need more alignment for some asm code for some cpu +if u notice errors in the align stuff, need more alignment for some asm code for some cpu or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ... !warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible) @@ -62,14 +59,13 @@ i (michael) didnt check them, these are just the alignents which i think could b */ /* pixel ops : interface with DCT */ -extern void (*ff_idct)(DCTELEM *block/*align 16*/); -extern void (*ff_idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); -extern void (*ff_idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size); extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride); extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); extern void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); -extern void (*gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); +extern void (*ff_gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); +extern void (*ff_gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); extern void (*clear_blocks)(DCTELEM *blocks/*align 16*/); extern int (*pix_sum)(UINT8 * pix, int line_size); extern int (*pix_norm1)(UINT8 * pix, int line_size); @@ -83,7 +79,7 @@ void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); void clear_blocks_c(DCTELEM *blocks); /* add and put pixel (decoding) */ -/* blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 */ +// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h); typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride); @@ -120,15 +116,14 @@ int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); -static inline int block_permute_op(int j) -{ - return permutation[j]; -} - -void block_permute(INT16 *block); +/** + * permute block according to permuatation. + * @param last last non zero element in scantable order + */ +void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last); #if defined(ARCH_X86) -#define HAVE_MMX 1 +#define HAVE_MMX 1 #endif #if defined(HAVE_MMX) @@ -146,7 +141,7 @@ extern int mm_flags; /*int mm_support(void);*/ #define mm_support() xine_mm_accel() -#if 0 +#if 0 static inline void emms(void) { __asm __volatile ("emms;":::"memory"); @@ -172,16 +167,16 @@ void dsputil_set_bit_exact_mmx(void); line ptimizations */ #define __align8 __attribute__ ((aligned (4))) -void dsputil_init_armv4l(void); +void dsputil_init_armv4l(void); #elif defined(HAVE_MLIB) - + #define emms_c() /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ #define __align8 __attribute__ ((aligned (8))) -void dsputil_init_mlib(void); +void dsputil_init_mlib(void); #elif defined(ARCH_ALPHA) @@ -197,6 +192,14 @@ void dsputil_init_alpha(void); void dsputil_init_ppc(void); +#elif defined(HAVE_MMI) + +#define emms_c() + +#define __align8 __attribute__ ((aligned (16))) + +void dsputil_init_mmi(void); + #else #define emms_c() @@ -229,4 +232,51 @@ void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3], int orig_linesize[3], int coded_linesize, AVCodecContext *avctx); +/* FFT computation */ + +/* NOTE: soon integer code will be added, so you must use the + FFTSample type */ +typedef float FFTSample; + +typedef struct FFTComplex { + FFTSample re, im; +} FFTComplex; + +typedef struct FFTContext { + int nbits; + int inverse; + uint16_t *revtab; + FFTComplex *exptab; + FFTComplex *exptab1; /* only used by SSE code */ + void (*fft_calc)(struct FFTContext *s, FFTComplex *z); +} FFTContext; + +int fft_init(FFTContext *s, int nbits, int inverse); +void fft_permute(FFTContext *s, FFTComplex *z); +void fft_calc_c(FFTContext *s, FFTComplex *z); +void fft_calc_sse(FFTContext *s, FFTComplex *z); +static inline void fft_calc(FFTContext *s, FFTComplex *z) +{ + s->fft_calc(s, z); +} +void fft_end(FFTContext *s); + +/* MDCT computation */ + +typedef struct MDCTContext { + int n; /* size of MDCT (i.e. number of input data * 2) */ + int nbits; /* n = 2^nbits */ + /* pre/post rotation tables */ + FFTSample *tcos; + FFTSample *tsin; + FFTContext fft; +} MDCTContext; + +int mdct_init(MDCTContext *s, int nbits, int inverse); +void imdct_calc(MDCTContext *s, FFTSample *output, + const FFTSample *input, FFTSample *tmp); +void mdct_calc(MDCTContext *s, FFTSample *out, + const FFTSample *input, FFTSample *tmp); +void mdct_end(MDCTContext *s); + #endif diff --git a/src/libffmpeg/libavcodec/dv.c b/src/libffmpeg/libavcodec/dv.c new file mode 100644 index 000000000..327149068 --- /dev/null +++ b/src/libffmpeg/libavcodec/dv.c @@ -0,0 +1,673 @@ +/* + * DV decoder + * Copyright (c) 2002 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "avcodec.h" +#include "dsputil.h" +#include "mpegvideo.h" +#include "simple_idct.h" + +#define NTSC_FRAME_SIZE 120000 +#define PAL_FRAME_SIZE 144000 + +#define TEX_VLC_BITS 9 + +typedef struct DVVideoDecodeContext { + AVCodecContext *avctx; + GetBitContext gb; + VLC *vlc; + int sampling_411; /* 0 = 420, 1 = 411 */ + int width, height; + UINT8 *current_picture[3]; /* picture structure */ + int linesize[3]; + DCTELEM block[5*6][64] __align8; + UINT8 dv_zigzag[2][64]; + UINT8 idct_permutation[64]; + /* XXX: move it to static storage ? */ + UINT8 dv_shift[2][22][64]; + void (*idct_put[2])(UINT8 *dest, int line_size, DCTELEM *block); +} DVVideoDecodeContext; + +#include "dvdata.h" + +static VLC dv_vlc; +/* XXX: also include quantization */ +static RL_VLC_ELEM *dv_rl_vlc[1]; + +static void dv_build_unquantize_tables(DVVideoDecodeContext *s) +{ + int i, q, j; + + /* NOTE: max left shift is 6 */ + for(q = 0; q < 22; q++) { + /* 88 unquant */ + for(i = 1; i < 64; i++) { + /* 88 table */ + j = s->idct_permutation[i]; + s->dv_shift[0][q][j] = + dv_quant_shifts[q][dv_88_areas[i]] + 1; + } + + /* 248 unquant */ + for(i = 1; i < 64; i++) { + /* 248 table */ + s->dv_shift[1][q][i] = + dv_quant_shifts[q][dv_248_areas[i]] + 1; + } + } +} + +static int dvvideo_decode_init(AVCodecContext *avctx) +{ + DVVideoDecodeContext *s = avctx->priv_data; + MpegEncContext s2; + static int done; + + if (!done) { + int i; + + done = 1; + + /* NOTE: as a trick, we use the fact the no codes are unused + to accelerate the parsing of partial codes */ + init_vlc(&dv_vlc, TEX_VLC_BITS, NB_DV_VLC, + dv_vlc_len, 1, 1, dv_vlc_bits, 2, 2); + + dv_rl_vlc[0] = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM)); + for(i = 0; i < dv_vlc.table_size; i++){ + int code= dv_vlc.table[i][0]; + int len = dv_vlc.table[i][1]; + int level, run; + + if(len<0){ //more bits needed + run= 0; + level= code; + } else if (code == (NB_DV_VLC - 1)) { + /* EOB */ + run = 0; + level = 256; + } else { + run= dv_vlc_run[code] + 1; + level= dv_vlc_level[code]; + } + dv_rl_vlc[0][i].len = len; + dv_rl_vlc[0][i].level = level; + dv_rl_vlc[0][i].run = run; + } + } + + /* ugly way to get the idct & scantable */ + /* XXX: fix it */ + memset(&s2, 0, sizeof(MpegEncContext)); + s2.avctx = avctx; + if (DCT_common_init(&s2) < 0) + return -1; + + s->idct_put[0] = s2.idct_put; + memcpy(s->idct_permutation, s2.idct_permutation, 64); + memcpy(s->dv_zigzag[0], s2.intra_scantable.permutated, 64); + + /* XXX: use MMX also for idct248 */ + s->idct_put[1] = simple_idct248_put; + memcpy(s->dv_zigzag[1], dv_248_zigzag, 64); + + /* XXX: do it only for constant case */ + dv_build_unquantize_tables(s); + + return 0; +} + +//#define VLC_DEBUG + +typedef struct BlockInfo { + const UINT8 *shift_table; + const UINT8 *scan_table; + UINT8 pos; /* position in block */ + UINT8 eob_reached; /* true if EOB has been reached */ + UINT8 dct_mode; + UINT8 partial_bit_count; + UINT16 partial_bit_buffer; + int shift_offset; +} BlockInfo; + +/* block size in bits */ +static const UINT16 block_sizes[6] = { + 112, 112, 112, 112, 80, 80 +}; + +#ifndef ALT_BITSTREAM_READER +#error only works with ALT_BITSTREAM_READER +#endif + +/* decode ac coefs */ +static void dv_decode_ac(DVVideoDecodeContext *s, + BlockInfo *mb, INT16 *block, int last_index) +{ + int last_re_index; + int shift_offset = mb->shift_offset; + const UINT8 *scan_table = mb->scan_table; + const UINT8 *shift_table = mb->shift_table; + int pos = mb->pos; + int level, pos1, sign, run; + int partial_bit_count; + + OPEN_READER(re, &s->gb); + +#ifdef VLC_DEBUG + printf("start\n"); +#endif + + /* if we must parse a partial vlc, we do it here */ + partial_bit_count = mb->partial_bit_count; + if (partial_bit_count > 0) { + UINT8 buf[4]; + UINT32 v; + int l, l1; + GetBitContext gb1; + + /* build the dummy bit buffer */ + l = 16 - partial_bit_count; + UPDATE_CACHE(re, &s->gb); +#ifdef VLC_DEBUG + printf("show=%04x\n", SHOW_UBITS(re, &s->gb, 16)); +#endif + v = (mb->partial_bit_buffer << l) | SHOW_UBITS(re, &s->gb, l); + buf[0] = v >> 8; + buf[1] = v; +#ifdef VLC_DEBUG + printf("v=%04x cnt=%d %04x\n", + v, partial_bit_count, (mb->partial_bit_buffer << l)); +#endif + /* try to read the codeword */ + init_get_bits(&gb1, buf, 4); + { + OPEN_READER(re1, &gb1); + UPDATE_CACHE(re1, &gb1); + GET_RL_VLC(level, run, re1, &gb1, dv_rl_vlc[0], + TEX_VLC_BITS, 2); + l = re1_index; + CLOSE_READER(re1, &gb1); + } +#ifdef VLC_DEBUG + printf("****run=%d level=%d size=%d\n", run, level, l); +#endif + /* compute codeword length */ + l1 = (level != 256 && level != 0); + /* if too long, we cannot parse */ + l -= partial_bit_count; + if ((re_index + l + l1) > last_index) + return; + /* skip read bits */ + last_re_index = 0; /* avoid warning */ + re_index += l; + /* by definition, if we can read the vlc, all partial bits + will be read (otherwise we could have read the vlc before) */ + mb->partial_bit_count = 0; + UPDATE_CACHE(re, &s->gb); + goto handle_vlc; + } + + /* get the AC coefficients until last_index is reached */ + for(;;) { + UPDATE_CACHE(re, &s->gb); +#ifdef VLC_DEBUG + printf("%2d: bits=%04x index=%d\n", + pos, SHOW_UBITS(re, &s->gb, 16), re_index); +#endif + last_re_index = re_index; + GET_RL_VLC(level, run, re, &s->gb, dv_rl_vlc[0], + TEX_VLC_BITS, 2); + handle_vlc: +#ifdef VLC_DEBUG + printf("run=%d level=%d\n", run, level); +#endif + if (level == 256) { + if (re_index > last_index) { + cannot_read: + /* put position before read code */ + re_index = last_re_index; + mb->eob_reached = 0; + break; + } + /* EOB */ + mb->eob_reached = 1; + break; + } else if (level != 0) { + if ((re_index + 1) > last_index) + goto cannot_read; + sign = SHOW_SBITS(re, &s->gb, 1); + level = (level ^ sign) - sign; + LAST_SKIP_BITS(re, &s->gb, 1); + pos += run; + /* error */ + if (pos >= 64) { + goto read_error; + } + pos1 = scan_table[pos]; + level = level << (shift_table[pos1] + shift_offset); + block[pos1] = level; + // printf("run=%d level=%d shift=%d\n", run, level, shift_table[pos1]); + } else { + if (re_index > last_index) + goto cannot_read; + /* level is zero: means run without coding. No + sign is coded */ + pos += run; + /* error */ + if (pos >= 64) { + read_error: +#if defined(VLC_DEBUG) || 1 + printf("error pos=%d\n", pos); +#endif + /* for errors, we consider the eob is reached */ + mb->eob_reached = 1; + break; + } + } + } + CLOSE_READER(re, &s->gb); + mb->pos = pos; +} + +static inline void bit_copy(PutBitContext *pb, GetBitContext *gb, int bits_left) +{ + while (bits_left >= 16) { + put_bits(pb, 16, get_bits(gb, 16)); + bits_left -= 16; + } + if (bits_left > 0) { + put_bits(pb, bits_left, get_bits(gb, bits_left)); + } +} + +/* mb_x and mb_y are in units of 8 pixels */ +static inline void dv_decode_video_segment(DVVideoDecodeContext *s, + UINT8 *buf_ptr1, + const UINT16 *mb_pos_ptr) +{ + int quant, dc, dct_mode, class1, j; + int mb_index, mb_x, mb_y, v, last_index; + DCTELEM *block, *block1; + int c_offset, bits_left; + UINT8 *y_ptr; + BlockInfo mb_data[5 * 6], *mb, *mb1; + void (*idct_put)(UINT8 *dest, int line_size, DCTELEM *block); + UINT8 *buf_ptr; + PutBitContext pb, vs_pb; + UINT8 mb_bit_buffer[80 + 4]; /* allow some slack */ + int mb_bit_count; + UINT8 vs_bit_buffer[5 * 80 + 4]; /* allow some slack */ + int vs_bit_count; + + memset(s->block, 0, sizeof(s->block)); + + /* pass 1 : read DC and AC coefficients in blocks */ + buf_ptr = buf_ptr1; + block1 = &s->block[0][0]; + mb1 = mb_data; + init_put_bits(&vs_pb, vs_bit_buffer, 5 * 80, NULL, NULL); + vs_bit_count = 0; + for(mb_index = 0; mb_index < 5; mb_index++) { + /* skip header */ + quant = buf_ptr[3] & 0x0f; + buf_ptr += 4; + init_put_bits(&pb, mb_bit_buffer, 80, NULL, NULL); + mb_bit_count = 0; + mb = mb1; + block = block1; + for(j = 0;j < 6; j++) { + /* NOTE: size is not important here */ + init_get_bits(&s->gb, buf_ptr, 14); + + /* get the dc */ + dc = get_bits(&s->gb, 9); + dc = (dc << (32 - 9)) >> (32 - 9); + dct_mode = get_bits1(&s->gb); + mb->dct_mode = dct_mode; + mb->scan_table = s->dv_zigzag[dct_mode]; + class1 = get_bits(&s->gb, 2); + mb->shift_offset = (class1 == 3); + mb->shift_table = s->dv_shift[dct_mode] + [quant + dv_quant_offset[class1]]; + dc = dc << 2; + /* convert to unsigned because 128 is not added in the + standard IDCT */ + dc += 1024; + block[0] = dc; + last_index = block_sizes[j]; + buf_ptr += last_index >> 3; + mb->pos = 0; + mb->partial_bit_count = 0; + + dv_decode_ac(s, mb, block, last_index); + + /* write the remaining bits in a new buffer only if the + block is finished */ + bits_left = last_index - s->gb.index; + if (mb->eob_reached) { + mb->partial_bit_count = 0; + mb_bit_count += bits_left; + bit_copy(&pb, &s->gb, bits_left); + } else { + /* should be < 16 bits otherwise a codeword could have + been parsed */ + mb->partial_bit_count = bits_left; + mb->partial_bit_buffer = get_bits(&s->gb, bits_left); + } + block += 64; + mb++; + } + + flush_put_bits(&pb); + + /* pass 2 : we can do it just after */ +#ifdef VLC_DEBUG + printf("***pass 2 size=%d\n", mb_bit_count); +#endif + block = block1; + mb = mb1; + init_get_bits(&s->gb, mb_bit_buffer, 80); + for(j = 0;j < 6; j++) { + if (!mb->eob_reached && s->gb.index < mb_bit_count) { + dv_decode_ac(s, mb, block, mb_bit_count); + /* if still not finished, no need to parse other blocks */ + if (!mb->eob_reached) { + /* we could not parse the current AC coefficient, + so we add the remaining bytes */ + bits_left = mb_bit_count - s->gb.index; + if (bits_left > 0) { + mb->partial_bit_count += bits_left; + mb->partial_bit_buffer = + (mb->partial_bit_buffer << bits_left) | + get_bits(&s->gb, bits_left); + } + goto next_mb; + } + } + block += 64; + mb++; + } + /* all blocks are finished, so the extra bytes can be used at + the video segment level */ + bits_left = mb_bit_count - s->gb.index; + vs_bit_count += bits_left; + bit_copy(&vs_pb, &s->gb, bits_left); + next_mb: + mb1 += 6; + block1 += 6 * 64; + } + + /* we need a pass other the whole video segment */ + flush_put_bits(&vs_pb); + +#ifdef VLC_DEBUG + printf("***pass 3 size=%d\n", vs_bit_count); +#endif + block = &s->block[0][0]; + mb = mb_data; + init_get_bits(&s->gb, vs_bit_buffer, 5 * 80); + for(mb_index = 0; mb_index < 5; mb_index++) { + for(j = 0;j < 6; j++) { + if (!mb->eob_reached) { +#ifdef VLC_DEBUG + printf("start %d:%d\n", mb_index, j); +#endif + dv_decode_ac(s, mb, block, vs_bit_count); + } + block += 64; + mb++; + } + } + + /* compute idct and place blocks */ + block = &s->block[0][0]; + mb = mb_data; + for(mb_index = 0; mb_index < 5; mb_index++) { + v = *mb_pos_ptr++; + mb_x = v & 0xff; + mb_y = v >> 8; + y_ptr = s->current_picture[0] + (mb_y * s->linesize[0] * 8) + (mb_x * 8); + if (s->sampling_411) + c_offset = (mb_y * s->linesize[1] * 8) + ((mb_x >> 2) * 8); + else + c_offset = ((mb_y >> 1) * s->linesize[1] * 8) + ((mb_x >> 1) * 8); + for(j = 0;j < 6; j++) { + idct_put = s->idct_put[mb->dct_mode]; + if (j < 4) { + if (s->sampling_411 && mb_x < (704 / 8)) { + /* NOTE: at end of line, the macroblock is handled as 420 */ + idct_put(y_ptr + (j * 8), s->linesize[0], block); + } else { + idct_put(y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->linesize[0]), + s->linesize[0], block); + } + } else { + if (s->sampling_411 && mb_x >= (704 / 8)) { + uint8_t pixels[64], *c_ptr, *c_ptr1, *ptr; + int y, linesize; + /* NOTE: at end of line, the macroblock is handled as 420 */ + idct_put(pixels, 8, block); + linesize = s->linesize[6 - j]; + c_ptr = s->current_picture[6 - j] + c_offset; + ptr = pixels; + for(y = 0;y < 8; y++) { + /* convert to 411P */ + c_ptr1 = c_ptr + linesize; + c_ptr1[0] = c_ptr[0] = (ptr[0] + ptr[1]) >> 1; + c_ptr1[1] = c_ptr[1] = (ptr[2] + ptr[3]) >> 1; + c_ptr1[2] = c_ptr[2] = (ptr[4] + ptr[5]) >> 1; + c_ptr1[3] = c_ptr[3] = (ptr[6] + ptr[7]) >> 1; + c_ptr += linesize * 2; + ptr += 8; + } + } else { + /* don't ask me why they inverted Cb and Cr ! */ + idct_put(s->current_picture[6 - j] + c_offset, + s->linesize[6 - j], block); + } + } + block += 64; + mb++; + } + } +} + + +/* NOTE: exactly one frame must be given (120000 bytes for NTSC, + 144000 bytes for PAL) */ +static int dvvideo_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + UINT8 *buf, int buf_size) +{ + DVVideoDecodeContext *s = avctx->priv_data; + int sct, dsf, apt, ds, nb_dif_segs, vs, size, width, height, i, packet_size; + UINT8 *buf_ptr; + const UINT16 *mb_pos_ptr; + AVPicture *picture; + + /* parse id */ + init_get_bits(&s->gb, buf, buf_size); + sct = get_bits(&s->gb, 3); + if (sct != 0) + return -1; + skip_bits(&s->gb, 5); + get_bits(&s->gb, 4); /* dsn (sequence number */ + get_bits(&s->gb, 1); /* fsc (channel number) */ + skip_bits(&s->gb, 3); + get_bits(&s->gb, 8); /* dbn (diff block number 0-134) */ + + dsf = get_bits(&s->gb, 1); /* 0 = NTSC 1 = PAL */ + if (get_bits(&s->gb, 1) != 0) + return -1; + skip_bits(&s->gb, 11); + apt = get_bits(&s->gb, 3); /* apt */ + + get_bits(&s->gb, 1); /* tf1 */ + skip_bits(&s->gb, 4); + get_bits(&s->gb, 3); /* ap1 */ + + get_bits(&s->gb, 1); /* tf2 */ + skip_bits(&s->gb, 4); + get_bits(&s->gb, 3); /* ap2 */ + + get_bits(&s->gb, 1); /* tf3 */ + skip_bits(&s->gb, 4); + get_bits(&s->gb, 3); /* ap3 */ + + /* init size */ + width = 720; + if (dsf) { + packet_size = PAL_FRAME_SIZE; + height = 576; + nb_dif_segs = 12; + } else { + packet_size = NTSC_FRAME_SIZE; + height = 480; + nb_dif_segs = 10; + } + /* NOTE: we only accept several full frames */ + if (buf_size < packet_size) + return -1; + + /* XXX: is it correct to assume that 420 is always used in PAL + mode ? */ + s->sampling_411 = !dsf; + if (s->sampling_411) + mb_pos_ptr = dv_place_411; + else + mb_pos_ptr = dv_place_420; + + /* (re)alloc picture if needed */ + if (s->width != width || s->height != height) { + for(i=0;i<3;i++) + av_freep(&s->current_picture[i]); + for(i=0;i<3;i++) { + size = width * height; + s->linesize[i] = width; + if (i >= 1) { + size >>= 2; + s->linesize[i] >>= s->sampling_411 ? 2 : 1; + } + s->current_picture[i] = av_malloc(size); + if (!s->current_picture[i]) + return -1; + } + s->width = width; + s->height = height; + } + + /* for each DIF segment */ + buf_ptr = buf; + for (ds = 0; ds < nb_dif_segs; ds++) { + buf_ptr += 6 * 80; /* skip DIF segment header */ + + for(vs = 0; vs < 27; vs++) { + if ((vs % 3) == 0) { + /* skip audio block */ + buf_ptr += 80; + } + dv_decode_video_segment(s, buf_ptr, mb_pos_ptr); + buf_ptr += 5 * 80; + mb_pos_ptr += 5; + } + } + + emms_c(); + + /* return image */ + avctx->width = width; + avctx->height = height; + if (s->sampling_411) + avctx->pix_fmt = PIX_FMT_YUV411P; + else + avctx->pix_fmt = PIX_FMT_YUV420P; + if (dsf) + avctx->frame_rate = 25 * FRAME_RATE_BASE; + else + avctx->frame_rate = 30 * FRAME_RATE_BASE; + *data_size = sizeof(AVPicture); + picture = data; + for(i=0;i<3;i++) { + picture->data[i] = s->current_picture[i]; + picture->linesize[i] = s->linesize[i]; + } + return packet_size; +} + +static int dvvideo_decode_end(AVCodecContext *avctx) +{ + DVVideoDecodeContext *s = avctx->priv_data; + int i; + + for(i=0;i<3;i++) + av_freep(&s->current_picture[i]); + return 0; +} + +AVCodec dvvideo_decoder = { + "dvvideo", + CODEC_TYPE_VIDEO, + CODEC_ID_DVVIDEO, + sizeof(DVVideoDecodeContext), + dvvideo_decode_init, + NULL, + dvvideo_decode_end, + dvvideo_decode_frame, + 0, + NULL +}; + +typedef struct DVAudioDecodeContext { + AVCodecContext *avctx; + GetBitContext gb; + +} DVAudioDecodeContext; + +static int dvaudio_decode_init(AVCodecContext *avctx) +{ + // DVAudioDecodeContext *s = avctx->priv_data; + return 0; +} + +/* NOTE: exactly one frame must be given (120000 bytes for NTSC, + 144000 bytes for PAL) */ +static int dvaudio_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + UINT8 *buf, int buf_size) +{ + // DVAudioDecodeContext *s = avctx->priv_data; + return buf_size; +} + +static int dvaudio_decode_end(AVCodecContext *avctx) +{ + // DVAudioDecodeContext *s = avctx->priv_data; + return 0; +} + +AVCodec dvaudio_decoder = { + "dvaudio", + CODEC_TYPE_AUDIO, + CODEC_ID_DVAUDIO, + sizeof(DVAudioDecodeContext), + dvaudio_decode_init, + NULL, + dvaudio_decode_end, + dvaudio_decode_frame, + 0, + NULL +}; diff --git a/src/libffmpeg/libavcodec/dvdata.h b/src/libffmpeg/libavcodec/dvdata.h new file mode 100644 index 000000000..b5c1f5607 --- /dev/null +++ b/src/libffmpeg/libavcodec/dvdata.h @@ -0,0 +1,907 @@ +/* + * Constants for DV codec + * Copyright (c) 2002 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define NB_DV_VLC 409 + +static const UINT16 dv_vlc_bits[409] = { + 0x0000, 0x0002, 0x0007, 0x0008, 0x0009, 0x0014, 0x0015, 0x0016, + 0x0017, 0x0030, 0x0031, 0x0032, 0x0033, 0x0068, 0x0069, 0x006a, + 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x00e0, 0x00e1, 0x00e2, + 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, + 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x01e0, 0x01e1, 0x01e2, + 0x01e3, 0x01e4, 0x01e5, 0x01e6, 0x01e7, 0x01e8, 0x01e9, 0x01ea, + 0x01eb, 0x01ec, 0x01ed, 0x01ee, 0x01ef, 0x03e0, 0x03e1, 0x03e2, + 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x07ce, 0x07cf, 0x07d0, 0x07d1, + 0x07d2, 0x07d3, 0x07d4, 0x07d5, 0x0fac, 0x0fad, 0x0fae, 0x0faf, + 0x0fb0, 0x0fb1, 0x0fb2, 0x0fb3, 0x0fb4, 0x0fb5, 0x0fb6, 0x0fb7, + 0x0fb8, 0x0fb9, 0x0fba, 0x0fbb, 0x0fbc, 0x0fbd, 0x0fbe, 0x0fbf, + 0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87, + 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, + 0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97, + 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, + 0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7, + 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf, + 0x1fb0, 0x1fb1, 0x1fb2, 0x1fb3, 0x1fb4, 0x1fb5, 0x1fb6, 0x1fb7, + 0x1fb8, 0x1fb9, 0x1fba, 0x1fbb, 0x1fbc, 0x1fbd, 0x1fbe, 0x1fbf, + 0x7f00, 0x7f01, 0x7f02, 0x7f03, 0x7f04, 0x7f05, 0x7f06, 0x7f07, + 0x7f08, 0x7f09, 0x7f0a, 0x7f0b, 0x7f0c, 0x7f0d, 0x7f0e, 0x7f0f, + 0x7f10, 0x7f11, 0x7f12, 0x7f13, 0x7f14, 0x7f15, 0x7f16, 0x7f17, + 0x7f18, 0x7f19, 0x7f1a, 0x7f1b, 0x7f1c, 0x7f1d, 0x7f1e, 0x7f1f, + 0x7f20, 0x7f21, 0x7f22, 0x7f23, 0x7f24, 0x7f25, 0x7f26, 0x7f27, + 0x7f28, 0x7f29, 0x7f2a, 0x7f2b, 0x7f2c, 0x7f2d, 0x7f2e, 0x7f2f, + 0x7f30, 0x7f31, 0x7f32, 0x7f33, 0x7f34, 0x7f35, 0x7f36, 0x7f37, + 0x7f38, 0x7f39, 0x7f3a, 0x7f3b, 0x7f3c, 0x7f3d, 0x7f3e, 0x7f3f, + 0x7f40, 0x7f41, 0x7f42, 0x7f43, 0x7f44, 0x7f45, 0x7f46, 0x7f47, + 0x7f48, 0x7f49, 0x7f4a, 0x7f4b, 0x7f4c, 0x7f4d, 0x7f4e, 0x7f4f, + 0x7f50, 0x7f51, 0x7f52, 0x7f53, 0x7f54, 0x7f55, 0x7f56, 0x7f57, + 0x7f58, 0x7f59, 0x7f5a, 0x7f5b, 0x7f5c, 0x7f5d, 0x7f5e, 0x7f5f, + 0x7f60, 0x7f61, 0x7f62, 0x7f63, 0x7f64, 0x7f65, 0x7f66, 0x7f67, + 0x7f68, 0x7f69, 0x7f6a, 0x7f6b, 0x7f6c, 0x7f6d, 0x7f6e, 0x7f6f, + 0x7f70, 0x7f71, 0x7f72, 0x7f73, 0x7f74, 0x7f75, 0x7f76, 0x7f77, + 0x7f78, 0x7f79, 0x7f7a, 0x7f7b, 0x7f7c, 0x7f7d, 0x7f7e, 0x7f7f, + 0x7f80, 0x7f81, 0x7f82, 0x7f83, 0x7f84, 0x7f85, 0x7f86, 0x7f87, + 0x7f88, 0x7f89, 0x7f8a, 0x7f8b, 0x7f8c, 0x7f8d, 0x7f8e, 0x7f8f, + 0x7f90, 0x7f91, 0x7f92, 0x7f93, 0x7f94, 0x7f95, 0x7f96, 0x7f97, + 0x7f98, 0x7f99, 0x7f9a, 0x7f9b, 0x7f9c, 0x7f9d, 0x7f9e, 0x7f9f, + 0x7fa0, 0x7fa1, 0x7fa2, 0x7fa3, 0x7fa4, 0x7fa5, 0x7fa6, 0x7fa7, + 0x7fa8, 0x7fa9, 0x7faa, 0x7fab, 0x7fac, 0x7fad, 0x7fae, 0x7faf, + 0x7fb0, 0x7fb1, 0x7fb2, 0x7fb3, 0x7fb4, 0x7fb5, 0x7fb6, 0x7fb7, + 0x7fb8, 0x7fb9, 0x7fba, 0x7fbb, 0x7fbc, 0x7fbd, 0x7fbe, 0x7fbf, + 0x7fc0, 0x7fc1, 0x7fc2, 0x7fc3, 0x7fc4, 0x7fc5, 0x7fc6, 0x7fc7, + 0x7fc8, 0x7fc9, 0x7fca, 0x7fcb, 0x7fcc, 0x7fcd, 0x7fce, 0x7fcf, + 0x7fd0, 0x7fd1, 0x7fd2, 0x7fd3, 0x7fd4, 0x7fd5, 0x7fd6, 0x7fd7, + 0x7fd8, 0x7fd9, 0x7fda, 0x7fdb, 0x7fdc, 0x7fdd, 0x7fde, 0x7fdf, + 0x7fe0, 0x7fe1, 0x7fe2, 0x7fe3, 0x7fe4, 0x7fe5, 0x7fe6, 0x7fe7, + 0x7fe8, 0x7fe9, 0x7fea, 0x7feb, 0x7fec, 0x7fed, 0x7fee, 0x7fef, + 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3, 0x7ff4, 0x7ff5, 0x7ff6, 0x7ff7, + 0x7ff8, 0x7ff9, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffd, 0x7ffe, 0x7fff, + 0x0006, +}; + +static const UINT8 dv_vlc_len[409] = { + 2, 3, 4, 4, 4, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 7, 7, + 7, 7, 7, 7, 7, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 10, 10, 10, + 10, 10, 10, 10, 11, 11, 11, 11, + 11, 11, 11, 11, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 4, +}; + +static const UINT8 dv_vlc_run[409] = { + 0, 0, 1, 0, 0, 2, 1, 0, + 0, 3, 4, 0, 0, 5, 6, 2, + 1, 1, 0, 0, 0, 7, 8, 9, + 10, 3, 4, 2, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 11, 12, 13, + 14, 5, 6, 3, 4, 2, 2, 1, + 0, 0, 0, 0, 0, 5, 3, 3, + 2, 1, 1, 1, 0, 1, 6, 4, + 3, 1, 1, 1, 2, 3, 4, 5, + 7, 8, 9, 10, 7, 8, 4, 3, + 2, 2, 2, 2, 2, 1, 1, 1, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, +}; + +static const UINT8 dv_vlc_level[409] = { + 1, 2, 1, 3, 4, 1, 2, 5, + 6, 1, 1, 7, 8, 1, 1, 2, + 3, 4, 9, 10, 11, 1, 1, 1, + 1, 2, 2, 3, 5, 6, 7, 12, + 13, 14, 15, 16, 17, 1, 1, 1, + 1, 2, 2, 3, 3, 4, 5, 8, + 18, 19, 20, 21, 22, 3, 4, 5, + 6, 9, 10, 11, 0, 0, 3, 4, + 6, 12, 13, 14, 0, 0, 0, 0, + 2, 2, 2, 2, 3, 3, 5, 7, + 7, 8, 9, 10, 11, 15, 16, 17, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, + 128, 129, 130, 131, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 141, 142, 143, + 144, 145, 146, 147, 148, 149, 150, 151, + 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, + 168, 169, 170, 171, 172, 173, 174, 175, + 176, 177, 178, 179, 180, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, 191, + 192, 193, 194, 195, 196, 197, 198, 199, + 200, 201, 202, 203, 204, 205, 206, 207, + 208, 209, 210, 211, 212, 213, 214, 215, + 216, 217, 218, 219, 220, 221, 222, 223, + 224, 225, 226, 227, 228, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 254, 255, + 0, +}; + +/* Specific zigzag scan for 248 idct. NOTE that unlike the + specification, we interleave the fields */ +static const UINT8 dv_248_zigzag[64] = { + 0, 8, 1, 9, 16, 24, 2, 10, + 17, 25, 32, 40, 48, 56, 33, 41, + 18, 26, 3, 11, 4, 12, 19, 27, + 34, 42, 49, 57, 50, 58, 35, 43, + 20, 28, 5, 13, 6, 14, 21, 29, + 36, 44, 51, 59, 52, 60, 37, 45, + 22, 30, 7, 15, 23, 31, 38, 46, + 53, 61, 54, 62, 39, 47, 55, 63, +}; + +/* unquant tables (not used directly) */ +static const UINT8 dv_88_areas[64] = { + 0,0,0,1,1,1,2,2, + 0,0,1,1,1,2,2,2, + 0,1,1,1,2,2,2,3, + 1,1,1,2,2,2,3,3, + 1,1,2,2,2,3,3,3, + 1,2,2,2,3,3,3,3, + 2,2,2,3,3,3,3,3, + 2,2,3,3,3,3,3,3, +}; + +static const UINT8 dv_248_areas[64] = { + 0,0,1,1,1,2,2,3, + 0,0,1,1,2,2,2,3, + 0,1,1,2,2,2,3,3, + 0,1,1,2,2,2,3,3, + 1,1,2,2,2,3,3,3, + 1,1,2,2,2,3,3,3, + 1,2,2,2,3,3,3,3, + 1,2,2,3,3,3,3,3, +}; + +static UINT8 dv_quant_shifts[22][4] = { + { 3,3,4,4 }, + { 3,3,4,4 }, + { 2,3,3,4 }, + { 2,3,3,4 }, + { 2,2,3,3 }, + { 2,2,3,3 }, + { 1,2,2,3 }, + { 1,2,2,3 }, + { 1,1,2,2 }, + { 1,1,2,2 }, + { 0,1,1,2 }, + { 0,1,1,2 }, + { 0,0,1,1 }, + { 0,0,1,1 }, + { 0,0,0,1 }, + { 0,0,0,0 }, + { 0,0,0,0 }, + { 0,0,0,0 }, + { 0,0,0,0 }, + { 0,0,0,0 }, + { 0,0,0,0 }, + { 0,0,0,0 }, +}; + +static const UINT8 dv_quant_offset[4] = { 6, 3, 0, 1 }; + +/* NOTE: I prefer hardcoding the positionning of dv blocks, it is + simpler :-) */ + +static const UINT16 dv_place_420[1620] = { + 0x0c24, 0x2412, 0x3036, 0x0000, 0x1848, + 0x0e24, 0x2612, 0x3236, 0x0200, 0x1a48, + 0x1024, 0x2812, 0x3436, 0x0400, 0x1c48, + 0x1026, 0x2814, 0x3438, 0x0402, 0x1c4a, + 0x0e26, 0x2614, 0x3238, 0x0202, 0x1a4a, + 0x0c26, 0x2414, 0x3038, 0x0002, 0x184a, + 0x0c28, 0x2416, 0x303a, 0x0004, 0x184c, + 0x0e28, 0x2616, 0x323a, 0x0204, 0x1a4c, + 0x1028, 0x2816, 0x343a, 0x0404, 0x1c4c, + 0x102a, 0x2818, 0x343c, 0x0406, 0x1c4e, + 0x0e2a, 0x2618, 0x323c, 0x0206, 0x1a4e, + 0x0c2a, 0x2418, 0x303c, 0x0006, 0x184e, + 0x0c2c, 0x241a, 0x303e, 0x0008, 0x1850, + 0x0e2c, 0x261a, 0x323e, 0x0208, 0x1a50, + 0x102c, 0x281a, 0x343e, 0x0408, 0x1c50, + 0x102e, 0x281c, 0x3440, 0x040a, 0x1c52, + 0x0e2e, 0x261c, 0x3240, 0x020a, 0x1a52, + 0x0c2e, 0x241c, 0x3040, 0x000a, 0x1852, + 0x0c30, 0x241e, 0x3042, 0x000c, 0x1854, + 0x0e30, 0x261e, 0x3242, 0x020c, 0x1a54, + 0x1030, 0x281e, 0x3442, 0x040c, 0x1c54, + 0x1032, 0x2820, 0x3444, 0x040e, 0x1c56, + 0x0e32, 0x2620, 0x3244, 0x020e, 0x1a56, + 0x0c32, 0x2420, 0x3044, 0x000e, 0x1856, + 0x0c34, 0x2422, 0x3046, 0x0010, 0x1858, + 0x0e34, 0x2622, 0x3246, 0x0210, 0x1a58, + 0x1034, 0x2822, 0x3446, 0x0410, 0x1c58, + 0x1224, 0x2a12, 0x3636, 0x0600, 0x1e48, + 0x1424, 0x2c12, 0x3836, 0x0800, 0x2048, + 0x1624, 0x2e12, 0x3a36, 0x0a00, 0x2248, + 0x1626, 0x2e14, 0x3a38, 0x0a02, 0x224a, + 0x1426, 0x2c14, 0x3838, 0x0802, 0x204a, + 0x1226, 0x2a14, 0x3638, 0x0602, 0x1e4a, + 0x1228, 0x2a16, 0x363a, 0x0604, 0x1e4c, + 0x1428, 0x2c16, 0x383a, 0x0804, 0x204c, + 0x1628, 0x2e16, 0x3a3a, 0x0a04, 0x224c, + 0x162a, 0x2e18, 0x3a3c, 0x0a06, 0x224e, + 0x142a, 0x2c18, 0x383c, 0x0806, 0x204e, + 0x122a, 0x2a18, 0x363c, 0x0606, 0x1e4e, + 0x122c, 0x2a1a, 0x363e, 0x0608, 0x1e50, + 0x142c, 0x2c1a, 0x383e, 0x0808, 0x2050, + 0x162c, 0x2e1a, 0x3a3e, 0x0a08, 0x2250, + 0x162e, 0x2e1c, 0x3a40, 0x0a0a, 0x2252, + 0x142e, 0x2c1c, 0x3840, 0x080a, 0x2052, + 0x122e, 0x2a1c, 0x3640, 0x060a, 0x1e52, + 0x1230, 0x2a1e, 0x3642, 0x060c, 0x1e54, + 0x1430, 0x2c1e, 0x3842, 0x080c, 0x2054, + 0x1630, 0x2e1e, 0x3a42, 0x0a0c, 0x2254, + 0x1632, 0x2e20, 0x3a44, 0x0a0e, 0x2256, + 0x1432, 0x2c20, 0x3844, 0x080e, 0x2056, + 0x1232, 0x2a20, 0x3644, 0x060e, 0x1e56, + 0x1234, 0x2a22, 0x3646, 0x0610, 0x1e58, + 0x1434, 0x2c22, 0x3846, 0x0810, 0x2058, + 0x1634, 0x2e22, 0x3a46, 0x0a10, 0x2258, + 0x1824, 0x3012, 0x3c36, 0x0c00, 0x2448, + 0x1a24, 0x3212, 0x3e36, 0x0e00, 0x2648, + 0x1c24, 0x3412, 0x4036, 0x1000, 0x2848, + 0x1c26, 0x3414, 0x4038, 0x1002, 0x284a, + 0x1a26, 0x3214, 0x3e38, 0x0e02, 0x264a, + 0x1826, 0x3014, 0x3c38, 0x0c02, 0x244a, + 0x1828, 0x3016, 0x3c3a, 0x0c04, 0x244c, + 0x1a28, 0x3216, 0x3e3a, 0x0e04, 0x264c, + 0x1c28, 0x3416, 0x403a, 0x1004, 0x284c, + 0x1c2a, 0x3418, 0x403c, 0x1006, 0x284e, + 0x1a2a, 0x3218, 0x3e3c, 0x0e06, 0x264e, + 0x182a, 0x3018, 0x3c3c, 0x0c06, 0x244e, + 0x182c, 0x301a, 0x3c3e, 0x0c08, 0x2450, + 0x1a2c, 0x321a, 0x3e3e, 0x0e08, 0x2650, + 0x1c2c, 0x341a, 0x403e, 0x1008, 0x2850, + 0x1c2e, 0x341c, 0x4040, 0x100a, 0x2852, + 0x1a2e, 0x321c, 0x3e40, 0x0e0a, 0x2652, + 0x182e, 0x301c, 0x3c40, 0x0c0a, 0x2452, + 0x1830, 0x301e, 0x3c42, 0x0c0c, 0x2454, + 0x1a30, 0x321e, 0x3e42, 0x0e0c, 0x2654, + 0x1c30, 0x341e, 0x4042, 0x100c, 0x2854, + 0x1c32, 0x3420, 0x4044, 0x100e, 0x2856, + 0x1a32, 0x3220, 0x3e44, 0x0e0e, 0x2656, + 0x1832, 0x3020, 0x3c44, 0x0c0e, 0x2456, + 0x1834, 0x3022, 0x3c46, 0x0c10, 0x2458, + 0x1a34, 0x3222, 0x3e46, 0x0e10, 0x2658, + 0x1c34, 0x3422, 0x4046, 0x1010, 0x2858, + 0x1e24, 0x3612, 0x4236, 0x1200, 0x2a48, + 0x2024, 0x3812, 0x4436, 0x1400, 0x2c48, + 0x2224, 0x3a12, 0x4636, 0x1600, 0x2e48, + 0x2226, 0x3a14, 0x4638, 0x1602, 0x2e4a, + 0x2026, 0x3814, 0x4438, 0x1402, 0x2c4a, + 0x1e26, 0x3614, 0x4238, 0x1202, 0x2a4a, + 0x1e28, 0x3616, 0x423a, 0x1204, 0x2a4c, + 0x2028, 0x3816, 0x443a, 0x1404, 0x2c4c, + 0x2228, 0x3a16, 0x463a, 0x1604, 0x2e4c, + 0x222a, 0x3a18, 0x463c, 0x1606, 0x2e4e, + 0x202a, 0x3818, 0x443c, 0x1406, 0x2c4e, + 0x1e2a, 0x3618, 0x423c, 0x1206, 0x2a4e, + 0x1e2c, 0x361a, 0x423e, 0x1208, 0x2a50, + 0x202c, 0x381a, 0x443e, 0x1408, 0x2c50, + 0x222c, 0x3a1a, 0x463e, 0x1608, 0x2e50, + 0x222e, 0x3a1c, 0x4640, 0x160a, 0x2e52, + 0x202e, 0x381c, 0x4440, 0x140a, 0x2c52, + 0x1e2e, 0x361c, 0x4240, 0x120a, 0x2a52, + 0x1e30, 0x361e, 0x4242, 0x120c, 0x2a54, + 0x2030, 0x381e, 0x4442, 0x140c, 0x2c54, + 0x2230, 0x3a1e, 0x4642, 0x160c, 0x2e54, + 0x2232, 0x3a20, 0x4644, 0x160e, 0x2e56, + 0x2032, 0x3820, 0x4444, 0x140e, 0x2c56, + 0x1e32, 0x3620, 0x4244, 0x120e, 0x2a56, + 0x1e34, 0x3622, 0x4246, 0x1210, 0x2a58, + 0x2034, 0x3822, 0x4446, 0x1410, 0x2c58, + 0x2234, 0x3a22, 0x4646, 0x1610, 0x2e58, + 0x2424, 0x3c12, 0x0036, 0x1800, 0x3048, + 0x2624, 0x3e12, 0x0236, 0x1a00, 0x3248, + 0x2824, 0x4012, 0x0436, 0x1c00, 0x3448, + 0x2826, 0x4014, 0x0438, 0x1c02, 0x344a, + 0x2626, 0x3e14, 0x0238, 0x1a02, 0x324a, + 0x2426, 0x3c14, 0x0038, 0x1802, 0x304a, + 0x2428, 0x3c16, 0x003a, 0x1804, 0x304c, + 0x2628, 0x3e16, 0x023a, 0x1a04, 0x324c, + 0x2828, 0x4016, 0x043a, 0x1c04, 0x344c, + 0x282a, 0x4018, 0x043c, 0x1c06, 0x344e, + 0x262a, 0x3e18, 0x023c, 0x1a06, 0x324e, + 0x242a, 0x3c18, 0x003c, 0x1806, 0x304e, + 0x242c, 0x3c1a, 0x003e, 0x1808, 0x3050, + 0x262c, 0x3e1a, 0x023e, 0x1a08, 0x3250, + 0x282c, 0x401a, 0x043e, 0x1c08, 0x3450, + 0x282e, 0x401c, 0x0440, 0x1c0a, 0x3452, + 0x262e, 0x3e1c, 0x0240, 0x1a0a, 0x3252, + 0x242e, 0x3c1c, 0x0040, 0x180a, 0x3052, + 0x2430, 0x3c1e, 0x0042, 0x180c, 0x3054, + 0x2630, 0x3e1e, 0x0242, 0x1a0c, 0x3254, + 0x2830, 0x401e, 0x0442, 0x1c0c, 0x3454, + 0x2832, 0x4020, 0x0444, 0x1c0e, 0x3456, + 0x2632, 0x3e20, 0x0244, 0x1a0e, 0x3256, + 0x2432, 0x3c20, 0x0044, 0x180e, 0x3056, + 0x2434, 0x3c22, 0x0046, 0x1810, 0x3058, + 0x2634, 0x3e22, 0x0246, 0x1a10, 0x3258, + 0x2834, 0x4022, 0x0446, 0x1c10, 0x3458, + 0x2a24, 0x4212, 0x0636, 0x1e00, 0x3648, + 0x2c24, 0x4412, 0x0836, 0x2000, 0x3848, + 0x2e24, 0x4612, 0x0a36, 0x2200, 0x3a48, + 0x2e26, 0x4614, 0x0a38, 0x2202, 0x3a4a, + 0x2c26, 0x4414, 0x0838, 0x2002, 0x384a, + 0x2a26, 0x4214, 0x0638, 0x1e02, 0x364a, + 0x2a28, 0x4216, 0x063a, 0x1e04, 0x364c, + 0x2c28, 0x4416, 0x083a, 0x2004, 0x384c, + 0x2e28, 0x4616, 0x0a3a, 0x2204, 0x3a4c, + 0x2e2a, 0x4618, 0x0a3c, 0x2206, 0x3a4e, + 0x2c2a, 0x4418, 0x083c, 0x2006, 0x384e, + 0x2a2a, 0x4218, 0x063c, 0x1e06, 0x364e, + 0x2a2c, 0x421a, 0x063e, 0x1e08, 0x3650, + 0x2c2c, 0x441a, 0x083e, 0x2008, 0x3850, + 0x2e2c, 0x461a, 0x0a3e, 0x2208, 0x3a50, + 0x2e2e, 0x461c, 0x0a40, 0x220a, 0x3a52, + 0x2c2e, 0x441c, 0x0840, 0x200a, 0x3852, + 0x2a2e, 0x421c, 0x0640, 0x1e0a, 0x3652, + 0x2a30, 0x421e, 0x0642, 0x1e0c, 0x3654, + 0x2c30, 0x441e, 0x0842, 0x200c, 0x3854, + 0x2e30, 0x461e, 0x0a42, 0x220c, 0x3a54, + 0x2e32, 0x4620, 0x0a44, 0x220e, 0x3a56, + 0x2c32, 0x4420, 0x0844, 0x200e, 0x3856, + 0x2a32, 0x4220, 0x0644, 0x1e0e, 0x3656, + 0x2a34, 0x4222, 0x0646, 0x1e10, 0x3658, + 0x2c34, 0x4422, 0x0846, 0x2010, 0x3858, + 0x2e34, 0x4622, 0x0a46, 0x2210, 0x3a58, + 0x3024, 0x0012, 0x0c36, 0x2400, 0x3c48, + 0x3224, 0x0212, 0x0e36, 0x2600, 0x3e48, + 0x3424, 0x0412, 0x1036, 0x2800, 0x4048, + 0x3426, 0x0414, 0x1038, 0x2802, 0x404a, + 0x3226, 0x0214, 0x0e38, 0x2602, 0x3e4a, + 0x3026, 0x0014, 0x0c38, 0x2402, 0x3c4a, + 0x3028, 0x0016, 0x0c3a, 0x2404, 0x3c4c, + 0x3228, 0x0216, 0x0e3a, 0x2604, 0x3e4c, + 0x3428, 0x0416, 0x103a, 0x2804, 0x404c, + 0x342a, 0x0418, 0x103c, 0x2806, 0x404e, + 0x322a, 0x0218, 0x0e3c, 0x2606, 0x3e4e, + 0x302a, 0x0018, 0x0c3c, 0x2406, 0x3c4e, + 0x302c, 0x001a, 0x0c3e, 0x2408, 0x3c50, + 0x322c, 0x021a, 0x0e3e, 0x2608, 0x3e50, + 0x342c, 0x041a, 0x103e, 0x2808, 0x4050, + 0x342e, 0x041c, 0x1040, 0x280a, 0x4052, + 0x322e, 0x021c, 0x0e40, 0x260a, 0x3e52, + 0x302e, 0x001c, 0x0c40, 0x240a, 0x3c52, + 0x3030, 0x001e, 0x0c42, 0x240c, 0x3c54, + 0x3230, 0x021e, 0x0e42, 0x260c, 0x3e54, + 0x3430, 0x041e, 0x1042, 0x280c, 0x4054, + 0x3432, 0x0420, 0x1044, 0x280e, 0x4056, + 0x3232, 0x0220, 0x0e44, 0x260e, 0x3e56, + 0x3032, 0x0020, 0x0c44, 0x240e, 0x3c56, + 0x3034, 0x0022, 0x0c46, 0x2410, 0x3c58, + 0x3234, 0x0222, 0x0e46, 0x2610, 0x3e58, + 0x3434, 0x0422, 0x1046, 0x2810, 0x4058, + 0x3624, 0x0612, 0x1236, 0x2a00, 0x4248, + 0x3824, 0x0812, 0x1436, 0x2c00, 0x4448, + 0x3a24, 0x0a12, 0x1636, 0x2e00, 0x4648, + 0x3a26, 0x0a14, 0x1638, 0x2e02, 0x464a, + 0x3826, 0x0814, 0x1438, 0x2c02, 0x444a, + 0x3626, 0x0614, 0x1238, 0x2a02, 0x424a, + 0x3628, 0x0616, 0x123a, 0x2a04, 0x424c, + 0x3828, 0x0816, 0x143a, 0x2c04, 0x444c, + 0x3a28, 0x0a16, 0x163a, 0x2e04, 0x464c, + 0x3a2a, 0x0a18, 0x163c, 0x2e06, 0x464e, + 0x382a, 0x0818, 0x143c, 0x2c06, 0x444e, + 0x362a, 0x0618, 0x123c, 0x2a06, 0x424e, + 0x362c, 0x061a, 0x123e, 0x2a08, 0x4250, + 0x382c, 0x081a, 0x143e, 0x2c08, 0x4450, + 0x3a2c, 0x0a1a, 0x163e, 0x2e08, 0x4650, + 0x3a2e, 0x0a1c, 0x1640, 0x2e0a, 0x4652, + 0x382e, 0x081c, 0x1440, 0x2c0a, 0x4452, + 0x362e, 0x061c, 0x1240, 0x2a0a, 0x4252, + 0x3630, 0x061e, 0x1242, 0x2a0c, 0x4254, + 0x3830, 0x081e, 0x1442, 0x2c0c, 0x4454, + 0x3a30, 0x0a1e, 0x1642, 0x2e0c, 0x4654, + 0x3a32, 0x0a20, 0x1644, 0x2e0e, 0x4656, + 0x3832, 0x0820, 0x1444, 0x2c0e, 0x4456, + 0x3632, 0x0620, 0x1244, 0x2a0e, 0x4256, + 0x3634, 0x0622, 0x1246, 0x2a10, 0x4258, + 0x3834, 0x0822, 0x1446, 0x2c10, 0x4458, + 0x3a34, 0x0a22, 0x1646, 0x2e10, 0x4658, + 0x3c24, 0x0c12, 0x1836, 0x3000, 0x0048, + 0x3e24, 0x0e12, 0x1a36, 0x3200, 0x0248, + 0x4024, 0x1012, 0x1c36, 0x3400, 0x0448, + 0x4026, 0x1014, 0x1c38, 0x3402, 0x044a, + 0x3e26, 0x0e14, 0x1a38, 0x3202, 0x024a, + 0x3c26, 0x0c14, 0x1838, 0x3002, 0x004a, + 0x3c28, 0x0c16, 0x183a, 0x3004, 0x004c, + 0x3e28, 0x0e16, 0x1a3a, 0x3204, 0x024c, + 0x4028, 0x1016, 0x1c3a, 0x3404, 0x044c, + 0x402a, 0x1018, 0x1c3c, 0x3406, 0x044e, + 0x3e2a, 0x0e18, 0x1a3c, 0x3206, 0x024e, + 0x3c2a, 0x0c18, 0x183c, 0x3006, 0x004e, + 0x3c2c, 0x0c1a, 0x183e, 0x3008, 0x0050, + 0x3e2c, 0x0e1a, 0x1a3e, 0x3208, 0x0250, + 0x402c, 0x101a, 0x1c3e, 0x3408, 0x0450, + 0x402e, 0x101c, 0x1c40, 0x340a, 0x0452, + 0x3e2e, 0x0e1c, 0x1a40, 0x320a, 0x0252, + 0x3c2e, 0x0c1c, 0x1840, 0x300a, 0x0052, + 0x3c30, 0x0c1e, 0x1842, 0x300c, 0x0054, + 0x3e30, 0x0e1e, 0x1a42, 0x320c, 0x0254, + 0x4030, 0x101e, 0x1c42, 0x340c, 0x0454, + 0x4032, 0x1020, 0x1c44, 0x340e, 0x0456, + 0x3e32, 0x0e20, 0x1a44, 0x320e, 0x0256, + 0x3c32, 0x0c20, 0x1844, 0x300e, 0x0056, + 0x3c34, 0x0c22, 0x1846, 0x3010, 0x0058, + 0x3e34, 0x0e22, 0x1a46, 0x3210, 0x0258, + 0x4034, 0x1022, 0x1c46, 0x3410, 0x0458, + 0x4224, 0x1212, 0x1e36, 0x3600, 0x0648, + 0x4424, 0x1412, 0x2036, 0x3800, 0x0848, + 0x4624, 0x1612, 0x2236, 0x3a00, 0x0a48, + 0x4626, 0x1614, 0x2238, 0x3a02, 0x0a4a, + 0x4426, 0x1414, 0x2038, 0x3802, 0x084a, + 0x4226, 0x1214, 0x1e38, 0x3602, 0x064a, + 0x4228, 0x1216, 0x1e3a, 0x3604, 0x064c, + 0x4428, 0x1416, 0x203a, 0x3804, 0x084c, + 0x4628, 0x1616, 0x223a, 0x3a04, 0x0a4c, + 0x462a, 0x1618, 0x223c, 0x3a06, 0x0a4e, + 0x442a, 0x1418, 0x203c, 0x3806, 0x084e, + 0x422a, 0x1218, 0x1e3c, 0x3606, 0x064e, + 0x422c, 0x121a, 0x1e3e, 0x3608, 0x0650, + 0x442c, 0x141a, 0x203e, 0x3808, 0x0850, + 0x462c, 0x161a, 0x223e, 0x3a08, 0x0a50, + 0x462e, 0x161c, 0x2240, 0x3a0a, 0x0a52, + 0x442e, 0x141c, 0x2040, 0x380a, 0x0852, + 0x422e, 0x121c, 0x1e40, 0x360a, 0x0652, + 0x4230, 0x121e, 0x1e42, 0x360c, 0x0654, + 0x4430, 0x141e, 0x2042, 0x380c, 0x0854, + 0x4630, 0x161e, 0x2242, 0x3a0c, 0x0a54, + 0x4632, 0x1620, 0x2244, 0x3a0e, 0x0a56, + 0x4432, 0x1420, 0x2044, 0x380e, 0x0856, + 0x4232, 0x1220, 0x1e44, 0x360e, 0x0656, + 0x4234, 0x1222, 0x1e46, 0x3610, 0x0658, + 0x4434, 0x1422, 0x2046, 0x3810, 0x0858, + 0x4634, 0x1622, 0x2246, 0x3a10, 0x0a58, + 0x0024, 0x1812, 0x2436, 0x3c00, 0x0c48, + 0x0224, 0x1a12, 0x2636, 0x3e00, 0x0e48, + 0x0424, 0x1c12, 0x2836, 0x4000, 0x1048, + 0x0426, 0x1c14, 0x2838, 0x4002, 0x104a, + 0x0226, 0x1a14, 0x2638, 0x3e02, 0x0e4a, + 0x0026, 0x1814, 0x2438, 0x3c02, 0x0c4a, + 0x0028, 0x1816, 0x243a, 0x3c04, 0x0c4c, + 0x0228, 0x1a16, 0x263a, 0x3e04, 0x0e4c, + 0x0428, 0x1c16, 0x283a, 0x4004, 0x104c, + 0x042a, 0x1c18, 0x283c, 0x4006, 0x104e, + 0x022a, 0x1a18, 0x263c, 0x3e06, 0x0e4e, + 0x002a, 0x1818, 0x243c, 0x3c06, 0x0c4e, + 0x002c, 0x181a, 0x243e, 0x3c08, 0x0c50, + 0x022c, 0x1a1a, 0x263e, 0x3e08, 0x0e50, + 0x042c, 0x1c1a, 0x283e, 0x4008, 0x1050, + 0x042e, 0x1c1c, 0x2840, 0x400a, 0x1052, + 0x022e, 0x1a1c, 0x2640, 0x3e0a, 0x0e52, + 0x002e, 0x181c, 0x2440, 0x3c0a, 0x0c52, + 0x0030, 0x181e, 0x2442, 0x3c0c, 0x0c54, + 0x0230, 0x1a1e, 0x2642, 0x3e0c, 0x0e54, + 0x0430, 0x1c1e, 0x2842, 0x400c, 0x1054, + 0x0432, 0x1c20, 0x2844, 0x400e, 0x1056, + 0x0232, 0x1a20, 0x2644, 0x3e0e, 0x0e56, + 0x0032, 0x1820, 0x2444, 0x3c0e, 0x0c56, + 0x0034, 0x1822, 0x2446, 0x3c10, 0x0c58, + 0x0234, 0x1a22, 0x2646, 0x3e10, 0x0e58, + 0x0434, 0x1c22, 0x2846, 0x4010, 0x1058, + 0x0624, 0x1e12, 0x2a36, 0x4200, 0x1248, + 0x0824, 0x2012, 0x2c36, 0x4400, 0x1448, + 0x0a24, 0x2212, 0x2e36, 0x4600, 0x1648, + 0x0a26, 0x2214, 0x2e38, 0x4602, 0x164a, + 0x0826, 0x2014, 0x2c38, 0x4402, 0x144a, + 0x0626, 0x1e14, 0x2a38, 0x4202, 0x124a, + 0x0628, 0x1e16, 0x2a3a, 0x4204, 0x124c, + 0x0828, 0x2016, 0x2c3a, 0x4404, 0x144c, + 0x0a28, 0x2216, 0x2e3a, 0x4604, 0x164c, + 0x0a2a, 0x2218, 0x2e3c, 0x4606, 0x164e, + 0x082a, 0x2018, 0x2c3c, 0x4406, 0x144e, + 0x062a, 0x1e18, 0x2a3c, 0x4206, 0x124e, + 0x062c, 0x1e1a, 0x2a3e, 0x4208, 0x1250, + 0x082c, 0x201a, 0x2c3e, 0x4408, 0x1450, + 0x0a2c, 0x221a, 0x2e3e, 0x4608, 0x1650, + 0x0a2e, 0x221c, 0x2e40, 0x460a, 0x1652, + 0x082e, 0x201c, 0x2c40, 0x440a, 0x1452, + 0x062e, 0x1e1c, 0x2a40, 0x420a, 0x1252, + 0x0630, 0x1e1e, 0x2a42, 0x420c, 0x1254, + 0x0830, 0x201e, 0x2c42, 0x440c, 0x1454, + 0x0a30, 0x221e, 0x2e42, 0x460c, 0x1654, + 0x0a32, 0x2220, 0x2e44, 0x460e, 0x1656, + 0x0832, 0x2020, 0x2c44, 0x440e, 0x1456, + 0x0632, 0x1e20, 0x2a44, 0x420e, 0x1256, + 0x0634, 0x1e22, 0x2a46, 0x4210, 0x1258, + 0x0834, 0x2022, 0x2c46, 0x4410, 0x1458, + 0x0a34, 0x2222, 0x2e46, 0x4610, 0x1658, +}; + +static const UINT16 dv_place_411[1350] = { + 0x0c24, 0x2710, 0x3334, 0x0000, 0x1848, + 0x0d24, 0x2810, 0x3434, 0x0100, 0x1948, + 0x0e24, 0x2910, 0x3534, 0x0200, 0x1a48, + 0x0f24, 0x2914, 0x3538, 0x0300, 0x1b48, + 0x1024, 0x2814, 0x3438, 0x0400, 0x1c48, + 0x1124, 0x2714, 0x3338, 0x0500, 0x1d48, + 0x1128, 0x2614, 0x3238, 0x0504, 0x1d4c, + 0x1028, 0x2514, 0x3138, 0x0404, 0x1c4c, + 0x0f28, 0x2414, 0x3038, 0x0304, 0x1b4c, + 0x0e28, 0x2418, 0x303c, 0x0204, 0x1a4c, + 0x0d28, 0x2518, 0x313c, 0x0104, 0x194c, + 0x0c28, 0x2618, 0x323c, 0x0004, 0x184c, + 0x0c2c, 0x2718, 0x333c, 0x0008, 0x1850, + 0x0d2c, 0x2818, 0x343c, 0x0108, 0x1950, + 0x0e2c, 0x2918, 0x353c, 0x0208, 0x1a50, + 0x0f2c, 0x291c, 0x3540, 0x0308, 0x1b50, + 0x102c, 0x281c, 0x3440, 0x0408, 0x1c50, + 0x112c, 0x271c, 0x3340, 0x0508, 0x1d50, + 0x1130, 0x261c, 0x3240, 0x050c, 0x1d54, + 0x1030, 0x251c, 0x3140, 0x040c, 0x1c54, + 0x0f30, 0x241c, 0x3040, 0x030c, 0x1b54, + 0x0e30, 0x2420, 0x3044, 0x020c, 0x1a54, + 0x0d30, 0x2520, 0x3144, 0x010c, 0x1954, + 0x0c30, 0x2620, 0x3244, 0x000c, 0x1854, + 0x0c34, 0x2720, 0x3344, 0x0010, 0x1858, + 0x0d34, 0x2820, 0x3444, 0x0110, 0x1a58, + 0x0e34, 0x2920, 0x3544, 0x0210, 0x1c58, + 0x1224, 0x2d10, 0x3934, 0x0600, 0x1e48, + 0x1324, 0x2e10, 0x3a34, 0x0700, 0x1f48, + 0x1424, 0x2f10, 0x3b34, 0x0800, 0x2048, + 0x1524, 0x2f14, 0x3b38, 0x0900, 0x2148, + 0x1624, 0x2e14, 0x3a38, 0x0a00, 0x2248, + 0x1724, 0x2d14, 0x3938, 0x0b00, 0x2348, + 0x1728, 0x2c14, 0x3838, 0x0b04, 0x234c, + 0x1628, 0x2b14, 0x3738, 0x0a04, 0x224c, + 0x1528, 0x2a14, 0x3638, 0x0904, 0x214c, + 0x1428, 0x2a18, 0x363c, 0x0804, 0x204c, + 0x1328, 0x2b18, 0x373c, 0x0704, 0x1f4c, + 0x1228, 0x2c18, 0x383c, 0x0604, 0x1e4c, + 0x122c, 0x2d18, 0x393c, 0x0608, 0x1e50, + 0x132c, 0x2e18, 0x3a3c, 0x0708, 0x1f50, + 0x142c, 0x2f18, 0x3b3c, 0x0808, 0x2050, + 0x152c, 0x2f1c, 0x3b40, 0x0908, 0x2150, + 0x162c, 0x2e1c, 0x3a40, 0x0a08, 0x2250, + 0x172c, 0x2d1c, 0x3940, 0x0b08, 0x2350, + 0x1730, 0x2c1c, 0x3840, 0x0b0c, 0x2354, + 0x1630, 0x2b1c, 0x3740, 0x0a0c, 0x2254, + 0x1530, 0x2a1c, 0x3640, 0x090c, 0x2154, + 0x1430, 0x2a20, 0x3644, 0x080c, 0x2054, + 0x1330, 0x2b20, 0x3744, 0x070c, 0x1f54, + 0x1230, 0x2c20, 0x3844, 0x060c, 0x1e54, + 0x1234, 0x2d20, 0x3944, 0x0610, 0x1e58, + 0x1334, 0x2e20, 0x3a44, 0x0710, 0x2058, + 0x1434, 0x2f20, 0x3b44, 0x0810, 0x2258, + 0x1824, 0x3310, 0x0334, 0x0c00, 0x2448, + 0x1924, 0x3410, 0x0434, 0x0d00, 0x2548, + 0x1a24, 0x3510, 0x0534, 0x0e00, 0x2648, + 0x1b24, 0x3514, 0x0538, 0x0f00, 0x2748, + 0x1c24, 0x3414, 0x0438, 0x1000, 0x2848, + 0x1d24, 0x3314, 0x0338, 0x1100, 0x2948, + 0x1d28, 0x3214, 0x0238, 0x1104, 0x294c, + 0x1c28, 0x3114, 0x0138, 0x1004, 0x284c, + 0x1b28, 0x3014, 0x0038, 0x0f04, 0x274c, + 0x1a28, 0x3018, 0x003c, 0x0e04, 0x264c, + 0x1928, 0x3118, 0x013c, 0x0d04, 0x254c, + 0x1828, 0x3218, 0x023c, 0x0c04, 0x244c, + 0x182c, 0x3318, 0x033c, 0x0c08, 0x2450, + 0x192c, 0x3418, 0x043c, 0x0d08, 0x2550, + 0x1a2c, 0x3518, 0x053c, 0x0e08, 0x2650, + 0x1b2c, 0x351c, 0x0540, 0x0f08, 0x2750, + 0x1c2c, 0x341c, 0x0440, 0x1008, 0x2850, + 0x1d2c, 0x331c, 0x0340, 0x1108, 0x2950, + 0x1d30, 0x321c, 0x0240, 0x110c, 0x2954, + 0x1c30, 0x311c, 0x0140, 0x100c, 0x2854, + 0x1b30, 0x301c, 0x0040, 0x0f0c, 0x2754, + 0x1a30, 0x3020, 0x0044, 0x0e0c, 0x2654, + 0x1930, 0x3120, 0x0144, 0x0d0c, 0x2554, + 0x1830, 0x3220, 0x0244, 0x0c0c, 0x2454, + 0x1834, 0x3320, 0x0344, 0x0c10, 0x2458, + 0x1934, 0x3420, 0x0444, 0x0d10, 0x2658, + 0x1a34, 0x3520, 0x0544, 0x0e10, 0x2858, + 0x1e24, 0x3910, 0x0934, 0x1200, 0x2a48, + 0x1f24, 0x3a10, 0x0a34, 0x1300, 0x2b48, + 0x2024, 0x3b10, 0x0b34, 0x1400, 0x2c48, + 0x2124, 0x3b14, 0x0b38, 0x1500, 0x2d48, + 0x2224, 0x3a14, 0x0a38, 0x1600, 0x2e48, + 0x2324, 0x3914, 0x0938, 0x1700, 0x2f48, + 0x2328, 0x3814, 0x0838, 0x1704, 0x2f4c, + 0x2228, 0x3714, 0x0738, 0x1604, 0x2e4c, + 0x2128, 0x3614, 0x0638, 0x1504, 0x2d4c, + 0x2028, 0x3618, 0x063c, 0x1404, 0x2c4c, + 0x1f28, 0x3718, 0x073c, 0x1304, 0x2b4c, + 0x1e28, 0x3818, 0x083c, 0x1204, 0x2a4c, + 0x1e2c, 0x3918, 0x093c, 0x1208, 0x2a50, + 0x1f2c, 0x3a18, 0x0a3c, 0x1308, 0x2b50, + 0x202c, 0x3b18, 0x0b3c, 0x1408, 0x2c50, + 0x212c, 0x3b1c, 0x0b40, 0x1508, 0x2d50, + 0x222c, 0x3a1c, 0x0a40, 0x1608, 0x2e50, + 0x232c, 0x391c, 0x0940, 0x1708, 0x2f50, + 0x2330, 0x381c, 0x0840, 0x170c, 0x2f54, + 0x2230, 0x371c, 0x0740, 0x160c, 0x2e54, + 0x2130, 0x361c, 0x0640, 0x150c, 0x2d54, + 0x2030, 0x3620, 0x0644, 0x140c, 0x2c54, + 0x1f30, 0x3720, 0x0744, 0x130c, 0x2b54, + 0x1e30, 0x3820, 0x0844, 0x120c, 0x2a54, + 0x1e34, 0x3920, 0x0944, 0x1210, 0x2a58, + 0x1f34, 0x3a20, 0x0a44, 0x1310, 0x2c58, + 0x2034, 0x3b20, 0x0b44, 0x1410, 0x2e58, + 0x2424, 0x0310, 0x0f34, 0x1800, 0x3048, + 0x2524, 0x0410, 0x1034, 0x1900, 0x3148, + 0x2624, 0x0510, 0x1134, 0x1a00, 0x3248, + 0x2724, 0x0514, 0x1138, 0x1b00, 0x3348, + 0x2824, 0x0414, 0x1038, 0x1c00, 0x3448, + 0x2924, 0x0314, 0x0f38, 0x1d00, 0x3548, + 0x2928, 0x0214, 0x0e38, 0x1d04, 0x354c, + 0x2828, 0x0114, 0x0d38, 0x1c04, 0x344c, + 0x2728, 0x0014, 0x0c38, 0x1b04, 0x334c, + 0x2628, 0x0018, 0x0c3c, 0x1a04, 0x324c, + 0x2528, 0x0118, 0x0d3c, 0x1904, 0x314c, + 0x2428, 0x0218, 0x0e3c, 0x1804, 0x304c, + 0x242c, 0x0318, 0x0f3c, 0x1808, 0x3050, + 0x252c, 0x0418, 0x103c, 0x1908, 0x3150, + 0x262c, 0x0518, 0x113c, 0x1a08, 0x3250, + 0x272c, 0x051c, 0x1140, 0x1b08, 0x3350, + 0x282c, 0x041c, 0x1040, 0x1c08, 0x3450, + 0x292c, 0x031c, 0x0f40, 0x1d08, 0x3550, + 0x2930, 0x021c, 0x0e40, 0x1d0c, 0x3554, + 0x2830, 0x011c, 0x0d40, 0x1c0c, 0x3454, + 0x2730, 0x001c, 0x0c40, 0x1b0c, 0x3354, + 0x2630, 0x0020, 0x0c44, 0x1a0c, 0x3254, + 0x2530, 0x0120, 0x0d44, 0x190c, 0x3154, + 0x2430, 0x0220, 0x0e44, 0x180c, 0x3054, + 0x2434, 0x0320, 0x0f44, 0x1810, 0x3058, + 0x2534, 0x0420, 0x1044, 0x1910, 0x3258, + 0x2634, 0x0520, 0x1144, 0x1a10, 0x3458, + 0x2a24, 0x0910, 0x1534, 0x1e00, 0x3648, + 0x2b24, 0x0a10, 0x1634, 0x1f00, 0x3748, + 0x2c24, 0x0b10, 0x1734, 0x2000, 0x3848, + 0x2d24, 0x0b14, 0x1738, 0x2100, 0x3948, + 0x2e24, 0x0a14, 0x1638, 0x2200, 0x3a48, + 0x2f24, 0x0914, 0x1538, 0x2300, 0x3b48, + 0x2f28, 0x0814, 0x1438, 0x2304, 0x3b4c, + 0x2e28, 0x0714, 0x1338, 0x2204, 0x3a4c, + 0x2d28, 0x0614, 0x1238, 0x2104, 0x394c, + 0x2c28, 0x0618, 0x123c, 0x2004, 0x384c, + 0x2b28, 0x0718, 0x133c, 0x1f04, 0x374c, + 0x2a28, 0x0818, 0x143c, 0x1e04, 0x364c, + 0x2a2c, 0x0918, 0x153c, 0x1e08, 0x3650, + 0x2b2c, 0x0a18, 0x163c, 0x1f08, 0x3750, + 0x2c2c, 0x0b18, 0x173c, 0x2008, 0x3850, + 0x2d2c, 0x0b1c, 0x1740, 0x2108, 0x3950, + 0x2e2c, 0x0a1c, 0x1640, 0x2208, 0x3a50, + 0x2f2c, 0x091c, 0x1540, 0x2308, 0x3b50, + 0x2f30, 0x081c, 0x1440, 0x230c, 0x3b54, + 0x2e30, 0x071c, 0x1340, 0x220c, 0x3a54, + 0x2d30, 0x061c, 0x1240, 0x210c, 0x3954, + 0x2c30, 0x0620, 0x1244, 0x200c, 0x3854, + 0x2b30, 0x0720, 0x1344, 0x1f0c, 0x3754, + 0x2a30, 0x0820, 0x1444, 0x1e0c, 0x3654, + 0x2a34, 0x0920, 0x1544, 0x1e10, 0x3658, + 0x2b34, 0x0a20, 0x1644, 0x1f10, 0x3858, + 0x2c34, 0x0b20, 0x1744, 0x2010, 0x3a58, + 0x3024, 0x0f10, 0x1b34, 0x2400, 0x0048, + 0x3124, 0x1010, 0x1c34, 0x2500, 0x0148, + 0x3224, 0x1110, 0x1d34, 0x2600, 0x0248, + 0x3324, 0x1114, 0x1d38, 0x2700, 0x0348, + 0x3424, 0x1014, 0x1c38, 0x2800, 0x0448, + 0x3524, 0x0f14, 0x1b38, 0x2900, 0x0548, + 0x3528, 0x0e14, 0x1a38, 0x2904, 0x054c, + 0x3428, 0x0d14, 0x1938, 0x2804, 0x044c, + 0x3328, 0x0c14, 0x1838, 0x2704, 0x034c, + 0x3228, 0x0c18, 0x183c, 0x2604, 0x024c, + 0x3128, 0x0d18, 0x193c, 0x2504, 0x014c, + 0x3028, 0x0e18, 0x1a3c, 0x2404, 0x004c, + 0x302c, 0x0f18, 0x1b3c, 0x2408, 0x0050, + 0x312c, 0x1018, 0x1c3c, 0x2508, 0x0150, + 0x322c, 0x1118, 0x1d3c, 0x2608, 0x0250, + 0x332c, 0x111c, 0x1d40, 0x2708, 0x0350, + 0x342c, 0x101c, 0x1c40, 0x2808, 0x0450, + 0x352c, 0x0f1c, 0x1b40, 0x2908, 0x0550, + 0x3530, 0x0e1c, 0x1a40, 0x290c, 0x0554, + 0x3430, 0x0d1c, 0x1940, 0x280c, 0x0454, + 0x3330, 0x0c1c, 0x1840, 0x270c, 0x0354, + 0x3230, 0x0c20, 0x1844, 0x260c, 0x0254, + 0x3130, 0x0d20, 0x1944, 0x250c, 0x0154, + 0x3030, 0x0e20, 0x1a44, 0x240c, 0x0054, + 0x3034, 0x0f20, 0x1b44, 0x2410, 0x0058, + 0x3134, 0x1020, 0x1c44, 0x2510, 0x0258, + 0x3234, 0x1120, 0x1d44, 0x2610, 0x0458, + 0x3624, 0x1510, 0x2134, 0x2a00, 0x0648, + 0x3724, 0x1610, 0x2234, 0x2b00, 0x0748, + 0x3824, 0x1710, 0x2334, 0x2c00, 0x0848, + 0x3924, 0x1714, 0x2338, 0x2d00, 0x0948, + 0x3a24, 0x1614, 0x2238, 0x2e00, 0x0a48, + 0x3b24, 0x1514, 0x2138, 0x2f00, 0x0b48, + 0x3b28, 0x1414, 0x2038, 0x2f04, 0x0b4c, + 0x3a28, 0x1314, 0x1f38, 0x2e04, 0x0a4c, + 0x3928, 0x1214, 0x1e38, 0x2d04, 0x094c, + 0x3828, 0x1218, 0x1e3c, 0x2c04, 0x084c, + 0x3728, 0x1318, 0x1f3c, 0x2b04, 0x074c, + 0x3628, 0x1418, 0x203c, 0x2a04, 0x064c, + 0x362c, 0x1518, 0x213c, 0x2a08, 0x0650, + 0x372c, 0x1618, 0x223c, 0x2b08, 0x0750, + 0x382c, 0x1718, 0x233c, 0x2c08, 0x0850, + 0x392c, 0x171c, 0x2340, 0x2d08, 0x0950, + 0x3a2c, 0x161c, 0x2240, 0x2e08, 0x0a50, + 0x3b2c, 0x151c, 0x2140, 0x2f08, 0x0b50, + 0x3b30, 0x141c, 0x2040, 0x2f0c, 0x0b54, + 0x3a30, 0x131c, 0x1f40, 0x2e0c, 0x0a54, + 0x3930, 0x121c, 0x1e40, 0x2d0c, 0x0954, + 0x3830, 0x1220, 0x1e44, 0x2c0c, 0x0854, + 0x3730, 0x1320, 0x1f44, 0x2b0c, 0x0754, + 0x3630, 0x1420, 0x2044, 0x2a0c, 0x0654, + 0x3634, 0x1520, 0x2144, 0x2a10, 0x0658, + 0x3734, 0x1620, 0x2244, 0x2b10, 0x0858, + 0x3834, 0x1720, 0x2344, 0x2c10, 0x0a58, + 0x0024, 0x1b10, 0x2734, 0x3000, 0x0c48, + 0x0124, 0x1c10, 0x2834, 0x3100, 0x0d48, + 0x0224, 0x1d10, 0x2934, 0x3200, 0x0e48, + 0x0324, 0x1d14, 0x2938, 0x3300, 0x0f48, + 0x0424, 0x1c14, 0x2838, 0x3400, 0x1048, + 0x0524, 0x1b14, 0x2738, 0x3500, 0x1148, + 0x0528, 0x1a14, 0x2638, 0x3504, 0x114c, + 0x0428, 0x1914, 0x2538, 0x3404, 0x104c, + 0x0328, 0x1814, 0x2438, 0x3304, 0x0f4c, + 0x0228, 0x1818, 0x243c, 0x3204, 0x0e4c, + 0x0128, 0x1918, 0x253c, 0x3104, 0x0d4c, + 0x0028, 0x1a18, 0x263c, 0x3004, 0x0c4c, + 0x002c, 0x1b18, 0x273c, 0x3008, 0x0c50, + 0x012c, 0x1c18, 0x283c, 0x3108, 0x0d50, + 0x022c, 0x1d18, 0x293c, 0x3208, 0x0e50, + 0x032c, 0x1d1c, 0x2940, 0x3308, 0x0f50, + 0x042c, 0x1c1c, 0x2840, 0x3408, 0x1050, + 0x052c, 0x1b1c, 0x2740, 0x3508, 0x1150, + 0x0530, 0x1a1c, 0x2640, 0x350c, 0x1154, + 0x0430, 0x191c, 0x2540, 0x340c, 0x1054, + 0x0330, 0x181c, 0x2440, 0x330c, 0x0f54, + 0x0230, 0x1820, 0x2444, 0x320c, 0x0e54, + 0x0130, 0x1920, 0x2544, 0x310c, 0x0d54, + 0x0030, 0x1a20, 0x2644, 0x300c, 0x0c54, + 0x0034, 0x1b20, 0x2744, 0x3010, 0x0c58, + 0x0134, 0x1c20, 0x2844, 0x3110, 0x0e58, + 0x0234, 0x1d20, 0x2944, 0x3210, 0x1058, + 0x0624, 0x2110, 0x2d34, 0x3600, 0x1248, + 0x0724, 0x2210, 0x2e34, 0x3700, 0x1348, + 0x0824, 0x2310, 0x2f34, 0x3800, 0x1448, + 0x0924, 0x2314, 0x2f38, 0x3900, 0x1548, + 0x0a24, 0x2214, 0x2e38, 0x3a00, 0x1648, + 0x0b24, 0x2114, 0x2d38, 0x3b00, 0x1748, + 0x0b28, 0x2014, 0x2c38, 0x3b04, 0x174c, + 0x0a28, 0x1f14, 0x2b38, 0x3a04, 0x164c, + 0x0928, 0x1e14, 0x2a38, 0x3904, 0x154c, + 0x0828, 0x1e18, 0x2a3c, 0x3804, 0x144c, + 0x0728, 0x1f18, 0x2b3c, 0x3704, 0x134c, + 0x0628, 0x2018, 0x2c3c, 0x3604, 0x124c, + 0x062c, 0x2118, 0x2d3c, 0x3608, 0x1250, + 0x072c, 0x2218, 0x2e3c, 0x3708, 0x1350, + 0x082c, 0x2318, 0x2f3c, 0x3808, 0x1450, + 0x092c, 0x231c, 0x2f40, 0x3908, 0x1550, + 0x0a2c, 0x221c, 0x2e40, 0x3a08, 0x1650, + 0x0b2c, 0x211c, 0x2d40, 0x3b08, 0x1750, + 0x0b30, 0x201c, 0x2c40, 0x3b0c, 0x1754, + 0x0a30, 0x1f1c, 0x2b40, 0x3a0c, 0x1654, + 0x0930, 0x1e1c, 0x2a40, 0x390c, 0x1554, + 0x0830, 0x1e20, 0x2a44, 0x380c, 0x1454, + 0x0730, 0x1f20, 0x2b44, 0x370c, 0x1354, + 0x0630, 0x2020, 0x2c44, 0x360c, 0x1254, + 0x0634, 0x2120, 0x2d44, 0x3610, 0x1258, + 0x0734, 0x2220, 0x2e44, 0x3710, 0x1458, + 0x0834, 0x2320, 0x2f44, 0x3810, 0x1658, +}; + diff --git a/src/libffmpeg/libavcodec/error_resilience.c b/src/libffmpeg/libavcodec/error_resilience.c new file mode 100644 index 000000000..84b7bb8a0 --- /dev/null +++ b/src/libffmpeg/libavcodec/error_resilience.c @@ -0,0 +1,885 @@ +/* + * Error resilience / concealment + * + * Copyright (c) 2002 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "avcodec.h" +#include "dsputil.h" +#include "mpegvideo.h" + +/** + * replaces the current MB with a flat dc only version. + */ +static void put_dc(MpegEncContext *s, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int mb_x, int mb_y) +{ + int dc, dcu, dcv, y, i; + for(i=0; i<4; i++){ + dc= s->dc_val[0][mb_x*2+1 + (i&1) + (mb_y*2+1 + (i>>1))*(s->mb_width*2+2)]; + if(dc<0) dc=0; + else if(dc>2040) dc=2040; + for(y=0; y<8; y++){ + int x; + for(x=0; x<8; x++){ + dest_y[x + (i&1)*8 + (y + (i>>1)*8)*s->linesize]= dc/8; + } + } + } + dcu = s->dc_val[1][mb_x+1 + (mb_y+1)*(s->mb_width+2)]; + dcv = s->dc_val[2][mb_x+1 + (mb_y+1)*(s->mb_width+2)]; + if (dcu<0 ) dcu=0; + else if(dcu>2040) dcu=2040; + if (dcv<0 ) dcv=0; + else if(dcv>2040) dcv=2040; + for(y=0; y<8; y++){ + int x; + for(x=0; x<8; x++){ + dest_cb[x + y*(s->uvlinesize)]= dcu/8; + dest_cr[x + y*(s->uvlinesize)]= dcv/8; + } + } +} + +static void filter181(INT16 *data, int width, int height, int stride){ + int x,y; + + /* horizontal filter */ + for(y=1; y>16; + prev_dc= data[x + y*stride]; + data[x + y*stride]= dc; + } + } + + /* vertical filter */ + for(x=1; x>16; + prev_dc= data[x + y*stride]; + data[x + y*stride]= dc; + } + } +} + +/** + * guess the dc of blocks which dont have a undamaged dc + * @param w width in 8 pixel blocks + * @param h height in 8 pixel blocks + */ +static void guess_dc(MpegEncContext *s, INT16 *dc, int w, int h, int stride, int is_luma){ + int b_x, b_y; + + for(b_y=0; b_y>is_luma) + (b_y>>is_luma)*s->mb_width; + + error= s->error_status_table[mb_index]; + + if(!(s->mb_type[mb_index]&MB_TYPE_INTRA)) continue; //inter + if(!(error&DC_ERROR)) continue; //dc-ok + + /* right block */ + for(j=b_x+1; j>is_luma) + (b_y>>is_luma)*s->mb_width; + int error_j= s->error_status_table[mb_index_j]; + int intra_j= s->mb_type[mb_index_j]&MB_TYPE_INTRA; + if(intra_j==0 || !(error_j&DC_ERROR)){ + color[0]= dc[j + b_y*stride]; + distance[0]= j-b_x; + break; + } + } + + /* left block */ + for(j=b_x-1; j>=0; j--){ + int mb_index_j= (j>>is_luma) + (b_y>>is_luma)*s->mb_width; + int error_j= s->error_status_table[mb_index_j]; + int intra_j= s->mb_type[mb_index_j]&MB_TYPE_INTRA; + if(intra_j==0 || !(error_j&DC_ERROR)){ + color[1]= dc[j + b_y*stride]; + distance[1]= b_x-j; + break; + } + } + + /* bottom block */ + for(j=b_y+1; j>is_luma) + (j>>is_luma)*s->mb_width; + int error_j= s->error_status_table[mb_index_j]; + int intra_j= s->mb_type[mb_index_j]&MB_TYPE_INTRA; + if(intra_j==0 || !(error_j&DC_ERROR)){ + color[2]= dc[b_x + j*stride]; + distance[2]= j-b_y; + break; + } + } + + /* top block */ + for(j=b_y-1; j>=0; j--){ + int mb_index_j= (b_x>>is_luma) + (j>>is_luma)*s->mb_width; + int error_j= s->error_status_table[mb_index_j]; + int intra_j= s->mb_type[mb_index_j]&MB_TYPE_INTRA; + if(intra_j==0 || !(error_j&DC_ERROR)){ + color[3]= dc[b_x + j*stride]; + distance[3]= b_y-j; + break; + } + } + + weight_sum=0; + guess=0; + for(j=0; j<4; j++){ + INT64 weight= 256*256*256*16/distance[j]; + guess+= weight*(INT64)color[j]; + weight_sum+= weight; + } + guess= (guess + weight_sum/2) / weight_sum; + + dc[b_x + b_y*stride]= guess; + } + } +} + +/** + * simple horizontal deblocking filter used for error resilience + * @param w width in 8 pixel blocks + * @param h height in 8 pixel blocks + */ +static void h_block_filter(MpegEncContext *s, UINT8 *dst, int w, int h, int stride, int is_luma){ + int b_x, b_y; + UINT8 *cm = cropTbl + MAX_NEG_CROP; + + for(b_y=0; b_yerror_status_table[( b_x >>is_luma) + (b_y>>is_luma)*s->mb_width]; + int right_status= s->error_status_table[((b_x+1)>>is_luma) + (b_y>>is_luma)*s->mb_width]; + int left_intra= s->mb_type [( b_x >>is_luma) + (b_y>>is_luma)*s->mb_width]&MB_TYPE_INTRA; + int right_intra= s->mb_type [((b_x+1)>>is_luma) + (b_y>>is_luma)*s->mb_width]&MB_TYPE_INTRA; + int left_damage = left_status&(DC_ERROR|AC_ERROR|MV_ERROR); + int right_damage= right_status&(DC_ERROR|AC_ERROR|MV_ERROR); + int offset= b_x*8 + b_y*stride*8; + INT16 *left_mv= s->motion_val[s->block_wrap[0]*((b_y<<(1-is_luma)) + 1) + ( b_x <<(1-is_luma))]; + INT16 *right_mv= s->motion_val[s->block_wrap[0]*((b_y<<(1-is_luma)) + 1) + ((b_x+1)<<(1-is_luma))]; + + if(!(left_damage||right_damage)) continue; // both undamaged + + if( (!left_intra) && (!right_intra) + && ABS(left_mv[0]-right_mv[0]) + ABS(left_mv[1]+right_mv[1]) < 2) continue; + + for(y=0; y<8; y++){ + int a,b,c,d; + + a= dst[offset + 7 + y*stride] - dst[offset + 6 + y*stride]; + b= dst[offset + 8 + y*stride] - dst[offset + 7 + y*stride]; + c= dst[offset + 9 + y*stride] - dst[offset + 8 + y*stride]; + + d= ABS(b) - ((ABS(a) + ABS(c) + 1)>>1); + d= MAX(d, 0); + if(b<0) d= -d; + + if(d==0) continue; + + if(!(left_damage && right_damage)) + d= d*16/9; + + if(left_damage){ + dst[offset + 7 + y*stride] = cm[dst[offset + 7 + y*stride] + ((d*7)>>4)]; + dst[offset + 6 + y*stride] = cm[dst[offset + 6 + y*stride] + ((d*5)>>4)]; + dst[offset + 5 + y*stride] = cm[dst[offset + 5 + y*stride] + ((d*3)>>4)]; + dst[offset + 4 + y*stride] = cm[dst[offset + 4 + y*stride] + ((d*1)>>4)]; + } + if(right_damage){ + dst[offset + 8 + y*stride] = cm[dst[offset + 8 + y*stride] - ((d*7)>>4)]; + dst[offset + 9 + y*stride] = cm[dst[offset + 9 + y*stride] - ((d*5)>>4)]; + dst[offset + 10+ y*stride] = cm[dst[offset +10 + y*stride] - ((d*3)>>4)]; + dst[offset + 11+ y*stride] = cm[dst[offset +11 + y*stride] - ((d*1)>>4)]; + } + } + } + } +} + +/** + * simple vertical deblocking filter used for error resilience + * @param w width in 8 pixel blocks + * @param h height in 8 pixel blocks + */ +static void v_block_filter(MpegEncContext *s, UINT8 *dst, int w, int h, int stride, int is_luma){ + int b_x, b_y; + UINT8 *cm = cropTbl + MAX_NEG_CROP; + + for(b_y=0; b_yerror_status_table[(b_x>>is_luma) + ( b_y >>is_luma)*s->mb_width]; + int bottom_status= s->error_status_table[(b_x>>is_luma) + ((b_y+1)>>is_luma)*s->mb_width]; + int top_intra= s->mb_type [(b_x>>is_luma) + ( b_y >>is_luma)*s->mb_width]&MB_TYPE_INTRA; + int bottom_intra= s->mb_type [(b_x>>is_luma) + ((b_y+1)>>is_luma)*s->mb_width]&MB_TYPE_INTRA; + int top_damage = top_status&(DC_ERROR|AC_ERROR|MV_ERROR); + int bottom_damage= bottom_status&(DC_ERROR|AC_ERROR|MV_ERROR); + int offset= b_x*8 + b_y*stride*8; + INT16 *top_mv= s->motion_val[s->block_wrap[0]*(( b_y <<(1-is_luma)) + 1) + (b_x<<(1-is_luma))]; + INT16 *bottom_mv= s->motion_val[s->block_wrap[0]*(((b_y+1)<<(1-is_luma)) + 1) + (b_x<<(1-is_luma))]; + + if(!(top_damage||bottom_damage)) continue; // both undamaged + + if( (!top_intra) && (!bottom_intra) + && ABS(top_mv[0]-bottom_mv[0]) + ABS(top_mv[1]+bottom_mv[1]) < 2) continue; + + for(x=0; x<8; x++){ + int a,b,c,d; + + a= dst[offset + x + 7*stride] - dst[offset + x + 6*stride]; + b= dst[offset + x + 8*stride] - dst[offset + x + 7*stride]; + c= dst[offset + x + 9*stride] - dst[offset + x + 8*stride]; + + d= ABS(b) - ((ABS(a) + ABS(c)+1)>>1); + d= MAX(d, 0); + if(b<0) d= -d; + + if(d==0) continue; + + if(!(top_damage && bottom_damage)) + d= d*16/9; + + if(top_damage){ + dst[offset + x + 7*stride] = cm[dst[offset + x + 7*stride] + ((d*7)>>4)]; + dst[offset + x + 6*stride] = cm[dst[offset + x + 6*stride] + ((d*5)>>4)]; + dst[offset + x + 5*stride] = cm[dst[offset + x + 5*stride] + ((d*3)>>4)]; + dst[offset + x + 4*stride] = cm[dst[offset + x + 4*stride] + ((d*1)>>4)]; + } + if(bottom_damage){ + dst[offset + x + 8*stride] = cm[dst[offset + x + 8*stride] - ((d*7)>>4)]; + dst[offset + x + 9*stride] = cm[dst[offset + x + 9*stride] - ((d*5)>>4)]; + dst[offset + x + 10*stride] = cm[dst[offset + x + 10*stride] - ((d*3)>>4)]; + dst[offset + x + 11*stride] = cm[dst[offset + x + 11*stride] - ((d*1)>>4)]; + } + } + } + } +} + +static void guess_mv(MpegEncContext *s){ + UINT8 fixed[s->mb_num]; +#define MV_FROZEN 3 +#define MV_CHANGED 2 +#define MV_UNCHANGED 1 + const int mb_width = s->mb_width; + const int mb_height= s->mb_height; + int i, depth, num_avail; + + num_avail=0; + for(i=0; imb_num; i++){ + int f=0; + int error= s->error_status_table[i]; + + if(s->mb_type[i]&MB_TYPE_INTRA) f=MV_FROZEN; //intra //FIXME check + if(!(error&MV_ERROR)) f=MV_FROZEN; //inter with undamaged MV + + fixed[i]= f; + if(f==MV_FROZEN) + num_avail++; + } + + if((!(s->avctx->error_concealment&FF_EC_GUESS_MVS)) || num_avail <= mb_width/2){ + int mb_x, mb_y; + i= -1; + for(mb_y=0; mb_ymb_height; mb_y++){ + for(mb_x=0; mb_xmb_width; mb_x++){ + i++; + + if(s->mb_type[i]&MB_TYPE_INTRA) continue; + if(!(s->error_status_table[i]&MV_ERROR)) continue; + + s->mv_dir = MV_DIR_FORWARD; + s->mb_intra=0; + s->mv_type = MV_TYPE_16X16; + s->mb_skiped=0; + + clear_blocks(s->block[0]); + + s->mb_x= mb_x; + s->mb_y= mb_y; + s->mv[0][0][0]= 0; + s->mv[0][0][1]= 0; + MPV_decode_mb(s, s->block); + } + } + return; + } + + for(depth=0;; depth++){ + int changed, pass, none_left; + + none_left=1; + changed=1; + for(pass=0; (changed || pass<2) && pass<10; pass++){ + int i,mb_x, mb_y; +int score_sum=0; + + changed=0; + i= -1; + for(mb_y=0; mb_ymb_height; mb_y++){ + for(mb_x=0; mb_xmb_width; mb_x++){ + int mv_predictor[8][2]={{0}}; + int pred_count=0; + int j; + int best_score=256*256*256*64; + int best_pred=0; + const int mot_stride= mb_width*2+2; + const int mot_index= mb_x*2 + 1 + (mb_y*2+1)*mot_stride; + int prev_x= s->motion_val[mot_index][0]; + int prev_y= s->motion_val[mot_index][1]; + + i++; + if((mb_x^mb_y^pass)&1) continue; + + if(fixed[i]==MV_FROZEN) continue; + + j=0; + if(mb_x>0 && fixed[i-1 ]==MV_FROZEN) j=1; + if(mb_x+10 && fixed[i-mb_width]==MV_FROZEN) j=1; + if(mb_y+10 && fixed[i-1 ]==MV_CHANGED) j=1; + if(mb_x+10 && fixed[i-mb_width]==MV_CHANGED) j=1; + if(mb_y+11) continue; + + none_left=0; + + if(mb_x>0 && fixed[i-1]){ + mv_predictor[pred_count][0]= s->motion_val[mot_index - 2][0]; + mv_predictor[pred_count][1]= s->motion_val[mot_index - 2][1]; + pred_count++; + } + if(mb_x+1motion_val[mot_index + 2][0]; + mv_predictor[pred_count][1]= s->motion_val[mot_index + 2][1]; + pred_count++; + } + if(mb_y>0 && fixed[i-mb_width]){ + mv_predictor[pred_count][0]= s->motion_val[mot_index - mot_stride*2][0]; + mv_predictor[pred_count][1]= s->motion_val[mot_index - mot_stride*2][1]; + pred_count++; + } + if(mb_y+1motion_val[mot_index + mot_stride*2][0]; + mv_predictor[pred_count][1]= s->motion_val[mot_index + mot_stride*2][1]; + pred_count++; + } + if(pred_count==0) continue; + + if(pred_count>1){ + int sum_x=0, sum_y=0; + int max_x, max_y, min_x, min_y; + + for(j=0; j=3){ + min_y= min_x= 99999; + max_y= max_x=-99999; + }else{ + min_x=min_y=max_x=max_y=0; + } + for(j=0; jmotion_val[mot_index][0]; + mv_predictor[pred_count][1]= s->motion_val[mot_index][1]; + pred_count++; + + s->mv_dir = MV_DIR_FORWARD; + s->mb_intra=0; + s->mv_type = MV_TYPE_16X16; + s->mb_skiped=0; + + clear_blocks(s->block[0]); + + s->mb_x= mb_x; + s->mb_y= mb_y; + for(j=0; jcurrent_picture[0] + mb_x*16 + mb_y*16*s->linesize; + + s->motion_val[mot_index][0]= s->mv[0][0][0]= mv_predictor[j][0]; + s->motion_val[mot_index][1]= s->mv[0][0][1]= mv_predictor[j][1]; + MPV_decode_mb(s, s->block); + + if(mb_x>0 && fixed[i-1]){ + int k; + for(k=0; k<16; k++) + score += ABS(src[k*s->linesize-1 ]-src[k*s->linesize ]); + } + if(mb_x+1linesize+15]-src[k*s->linesize+16]); + } + if(mb_y>0 && fixed[i-mb_width]){ + int k; + for(k=0; k<16; k++) + score += ABS(src[k-s->linesize ]-src[k ]); + } + if(mb_y+1linesize*15]-src[k+s->linesize*16]); + } + + if(score <= best_score){ // <= will favor the last MV + best_score= score; + best_pred= j; + } + } +score_sum+= best_score; +//FIXME no need to set s->motion_val[mot_index][0] explicit + s->motion_val[mot_index][0]= s->mv[0][0][0]= mv_predictor[best_pred][0]; + s->motion_val[mot_index][1]= s->mv[0][0][1]= mv_predictor[best_pred][1]; + + MPV_decode_mb(s, s->block); + + + if(s->mv[0][0][0] != prev_x || s->mv[0][0][1] != prev_y){ + fixed[i]=MV_CHANGED; + changed++; + }else + fixed[i]=MV_UNCHANGED; + } + } + +// printf(".%d/%d", changed, score_sum); fflush(stdout); + } + + if(none_left) + return; + + for(i=0; imb_num; i++){ + if(fixed[i]) + fixed[i]=MV_FROZEN; + } +// printf(":"); fflush(stdout); + } +} + +static int is_intra_more_likely(MpegEncContext *s){ + int is_intra_likely, i, j, undamaged_count, skip_amount, mb_x, mb_y; + + undamaged_count=0; + for(i=0; imb_num; i++){ + int error= s->error_status_table[i]; + if(!((error&DC_ERROR) && (error&MV_ERROR))) + undamaged_count++; + } + + if(undamaged_count < 5) return 0; //allmost all MBs damaged -> use temporal prediction + + skip_amount= MAX(undamaged_count/50, 1); //check only upto 50 MBs + is_intra_likely=0; + + j=0; + i=-1; + for(mb_y= 0; mb_ymb_height-1; mb_y++){ + for(mb_x= 0; mb_xmb_width; mb_x++){ + int error; + + i++; + error= s->error_status_table[i]; + if((error&DC_ERROR) && (error&MV_ERROR)) + continue; //skip damaged + + j++; + if((j%skip_amount) != 0) continue; //skip a few to speed things up + + if(s->pict_type==I_TYPE){ + UINT8 *mb_ptr = s->current_picture[0] + mb_x*16 + mb_y*16*s->linesize; + UINT8 *last_mb_ptr= s->last_picture [0] + mb_x*16 + mb_y*16*s->linesize; + + is_intra_likely += pix_abs16x16(last_mb_ptr, mb_ptr , s->linesize); + is_intra_likely -= pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize); + }else{ + if(s->mbintra_table[i]) //HACK (this is allways inited but we should use mb_type[]) + is_intra_likely++; + else + is_intra_likely--; + } + } + } +//printf("is_intra_likely: %d type:%d\n", is_intra_likely, s->pict_type); + return is_intra_likely > 0; +} + +void ff_error_resilience(MpegEncContext *s){ + int i, mb_x, mb_y, error, error_type; + int distance; + int threshold_part[4]= {100,100,100}; + int threshold= 50; + int is_intra_likely; + +#if 1 + /* handle overlapping slices */ + for(error_type=1; error_type<=3; error_type++){ + int end_ok=0; + + for(i=s->mb_num-1; i>=0; i--){ + int error= s->error_status_table[i]; + + if(error&(1<error_status_table[i]|= 1<partitioned_frame){ + int end_ok=0; + + for(i=s->mb_num-1; i>=0; i--){ + int error= s->error_status_table[i]; + + if(error&AC_END) + end_ok=0; + if((error&MV_END) || (error&DC_END) || (error&AC_ERROR)) + end_ok=1; + + if(!end_ok) + s->error_status_table[i]|= AC_ERROR; + + if(error&VP_START) + end_ok=0; + } + } +#endif + /* handle missing slices */ + if(s->error_resilience>=4){ + int end_ok=1; + + for(i=s->mb_num-2; i>=s->mb_width+100; i--){ //FIXME +100 hack + int error1= s->error_status_table[i ]; + int error2= s->error_status_table[i+1]; + + if(error1&VP_START) + end_ok=1; + + if( error2==(VP_START|DC_ERROR|AC_ERROR|MV_ERROR|AC_END|DC_END|MV_END) + && error1!=(VP_START|DC_ERROR|AC_ERROR|MV_ERROR|AC_END|DC_END|MV_END) + && ((error1&AC_END) || (error1&DC_END) || (error1&MV_END))){ //end & uninited + end_ok=0; + } + + if(!end_ok) + s->error_status_table[i]|= DC_ERROR|AC_ERROR|MV_ERROR; + } + } + +#if 1 + /* backward mark errors */ + distance=9999999; + for(error_type=1; error_type<=3; error_type++){ + for(i=s->mb_num-1; i>=0; i--){ + int error= s->error_status_table[i]; + + if(!s->mbskip_table[i]) //FIXME partition specific + distance++; + if(error&(1<partitioned_frame){ + if(distance < threshold_part[error_type-1]) + s->error_status_table[i]|= 1<error_status_table[i]|= 1<mb_num; i++){ + int old_error= s->error_status_table[i]; + + if(old_error&VP_START) + error= old_error& (DC_ERROR|AC_ERROR|MV_ERROR); + else{ + error|= old_error& (DC_ERROR|AC_ERROR|MV_ERROR); + s->error_status_table[i]|= error; + } + } +#if 1 + /* handle not partitioned case */ + if(!s->partitioned_frame){ + for(i=0; imb_num; i++){ + error= s->error_status_table[i]; + if(error&(AC_ERROR|DC_ERROR|MV_ERROR)) + error|= AC_ERROR|DC_ERROR|MV_ERROR; + s->error_status_table[i]= error; + } + } +#endif + is_intra_likely= is_intra_more_likely(s); + + /* set unknown mb-type to most likely */ + for(i=0; imb_num; i++){ + int intra; + error= s->error_status_table[i]; + if((error&DC_ERROR) && (error&MV_ERROR)) + intra= is_intra_likely; + else + intra= s->mbintra_table[i]; + + if(intra) + s->mb_type[i]|= MB_TYPE_INTRA; + else + s->mb_type[i]&= ~MB_TYPE_INTRA; + } + + /* handle inter blocks with damaged AC */ + i= -1; + for(mb_y=0; mb_ymb_height; mb_y++){ + for(mb_x=0; mb_xmb_width; mb_x++){ + i++; + error= s->error_status_table[i]; + + if(s->mb_type[i]&MB_TYPE_INTRA) continue; //intra + if(error&MV_ERROR) continue; //inter with damaged MV + if(!(error&AC_ERROR)) continue; //undamaged inter + + s->mv_dir = MV_DIR_FORWARD; + s->mb_intra=0; + s->mb_skiped=0; + if(s->mb_type[i]&MB_TYPE_INTER4V){ + int mb_index= mb_x*2+1 + (mb_y*2+1)*s->block_wrap[0]; + int j; + s->mv_type = MV_TYPE_8X8; + for(j=0; j<4; j++){ + s->mv[0][j][0] = s->motion_val[ mb_index + (j&1) + (j>>1)*s->block_wrap[0] ][0]; + s->mv[0][j][1] = s->motion_val[ mb_index + (j&1) + (j>>1)*s->block_wrap[0] ][1]; + } + }else{ + s->mv_type = MV_TYPE_16X16; + s->mv[0][0][0] = s->motion_val[ mb_x*2+1 + (mb_y*2+1)*s->block_wrap[0] ][0]; + s->mv[0][0][1] = s->motion_val[ mb_x*2+1 + (mb_y*2+1)*s->block_wrap[0] ][1]; + } + + clear_blocks(s->block[0]); + + s->mb_x= mb_x; + s->mb_y= mb_y; + MPV_decode_mb(s, s->block); + } + } + + /* guess MVs */ + if(s->pict_type==B_TYPE){ + i= -1; + for(mb_y=0; mb_ymb_height; mb_y++){ + for(mb_x=0; mb_xmb_width; mb_x++){ + int xy= mb_x*2+1 + (mb_y*2+1)*s->block_wrap[0]; + i++; + error= s->error_status_table[i]; + + if(s->mb_type[i]&MB_TYPE_INTRA) continue; //intra + if(!(error&MV_ERROR)) continue; //inter with undamaged MV + if(!(error&AC_ERROR)) continue; //undamaged inter + + s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD; + s->mb_intra=0; + s->mv_type = MV_TYPE_16X16; + s->mb_skiped=0; + + if(s->pp_time){ + int time_pp= s->pp_time; + int time_pb= s->pb_time; + + s->mv[0][0][0] = s->motion_val[xy][0]*time_pb/time_pp; + s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp; + s->mv[1][0][0] = s->motion_val[xy][0]*(time_pb - time_pp)/time_pp; + s->mv[1][0][1] = s->motion_val[xy][1]*(time_pb - time_pp)/time_pp; + }else{ + s->mv[0][0][0]= 0; + s->mv[0][0][1]= 0; + s->mv[1][0][0]= 0; + s->mv[1][0][1]= 0; + } + + clear_blocks(s->block[0]); + s->mb_x= mb_x; + s->mb_y= mb_y; + MPV_decode_mb(s, s->block); + } + } + }else + guess_mv(s); + + /* fill DC for inter blocks */ + i= -1; + for(mb_y=0; mb_ymb_height; mb_y++){ + for(mb_x=0; mb_xmb_width; mb_x++){ + int dc, dcu, dcv, y, n; + INT16 *dc_ptr; + UINT8 *dest_y, *dest_cb, *dest_cr; + + i++; + error= s->error_status_table[i]; + + if(s->mb_type[i]&MB_TYPE_INTRA) continue; //intra +// if(error&MV_ERROR) continue; //inter data damaged FIXME is this good? + + dest_y = s->current_picture[0] + mb_x*16 + mb_y*16*s->linesize; + dest_cb= s->current_picture[1] + mb_x*8 + mb_y*8 *s->uvlinesize; + dest_cr= s->current_picture[2] + mb_x*8 + mb_y*8 *s->uvlinesize; + + dc_ptr= &s->dc_val[0][mb_x*2+1 + (mb_y*2+1)*(s->mb_width*2+2)]; + for(n=0; n<4; n++){ + dc=0; + for(y=0; y<8; y++){ + int x; + for(x=0; x<8; x++){ + dc+= dest_y[x + (n&1)*8 + (y + (n>>1)*8)*s->linesize]; + } + } + dc_ptr[(n&1) + (n>>1)*(s->mb_width*2+2)]= (dc+4)>>3; + } + + dcu=dcv=0; + for(y=0; y<8; y++){ + int x; + for(x=0; x<8; x++){ + dcu+=dest_cb[x + y*(s->uvlinesize)]; + dcv+=dest_cr[x + y*(s->uvlinesize)]; + } + } + s->dc_val[1][mb_x+1 + (mb_y+1)*(s->mb_width+2)]= (dcu+4)>>3; + s->dc_val[2][mb_x+1 + (mb_y+1)*(s->mb_width+2)]= (dcv+4)>>3; + } + } +#if 1 + /* guess DC for damaged blocks */ + guess_dc(s, s->dc_val[0] + s->mb_width*2+3, s->mb_width*2, s->mb_height*2, s->mb_width*2+2, 1); + guess_dc(s, s->dc_val[1] + s->mb_width +3, s->mb_width , s->mb_height , s->mb_width +2, 0); + guess_dc(s, s->dc_val[2] + s->mb_width +3, s->mb_width , s->mb_height , s->mb_width +2, 0); +#endif + /* filter luma DC */ + filter181(s->dc_val[0] + s->mb_width*2+3, s->mb_width*2, s->mb_height*2, s->mb_width*2+2); + +#if 1 + /* render DC only intra */ + i= -1; + for(mb_y=0; mb_ymb_height; mb_y++){ + for(mb_x=0; mb_xmb_width; mb_x++){ + UINT8 *dest_y, *dest_cb, *dest_cr; + + i++; + error= s->error_status_table[i]; + + if(!(s->mb_type[i]&MB_TYPE_INTRA)) continue; //inter + if(!(error&AC_ERROR)) continue; //undamaged + + dest_y = s->current_picture[0] + mb_x*16 + mb_y*16*s->linesize; + dest_cb= s->current_picture[1] + mb_x*8 + mb_y*8 *s->uvlinesize; + dest_cr= s->current_picture[2] + mb_x*8 + mb_y*8 *s->uvlinesize; + + put_dc(s, dest_y, dest_cb, dest_cr, mb_x, mb_y); + } + } +#endif + + if(s->avctx->error_concealment&FF_EC_DEBLOCK){ + /* filter horizontal block boundaries */ + h_block_filter(s, s->current_picture[0], s->mb_width*2, s->mb_height*2, s->linesize , 1); + h_block_filter(s, s->current_picture[1], s->mb_width , s->mb_height , s->uvlinesize, 0); + h_block_filter(s, s->current_picture[2], s->mb_width , s->mb_height , s->uvlinesize, 0); + + /* filter vertical block boundaries */ + v_block_filter(s, s->current_picture[0], s->mb_width*2, s->mb_height*2, s->linesize , 1); + v_block_filter(s, s->current_picture[1], s->mb_width , s->mb_height , s->uvlinesize, 0); + v_block_filter(s, s->current_picture[2], s->mb_width , s->mb_height , s->uvlinesize, 0); + } + + /* clean a few tables */ + for(i=0; imb_num; i++){ + int error= s->error_status_table[i]; + + if(s->pict_type!=B_TYPE && (error&(DC_ERROR|MV_ERROR|AC_ERROR))){ + s->mbskip_table[i]=0; + } + s->mbintra_table[i]=1; + } +} diff --git a/src/libffmpeg/libavcodec/eval.c b/src/libffmpeg/libavcodec/eval.c index df2f4e29f..bcaf4f59b 100644 --- a/src/libffmpeg/libavcodec/eval.c +++ b/src/libffmpeg/libavcodec/eval.c @@ -43,11 +43,11 @@ typedef struct Parser{ int stack_index; char *s; double *const_value; - char **const_name; /* NULL terminated */ - double (**func1)(void *, double a); /* NULL terminated */ - char **func1_name; /* NULL terminated */ - double (**func2)(void *, double a, double b); /* NULL terminated */ - char **func2_name; /* NULL terminated */ + char **const_name; // NULL terminated + double (**func1)(void *, double a); // NULL terminated + char **func1_name; // NULL terminated + double (**func2)(void *, double a, double b); // NULL terminated + char **func2_name; // NULL terminated void *opaque; } Parser; @@ -59,7 +59,7 @@ static void push(Parser *p, double d){ return; } p->stack[ p->stack_index++ ]= d; -/* printf("push %f\n", d); fflush(stdout); */ +//printf("push %f\n", d); fflush(stdout); } static double pop(Parser *p){ @@ -67,7 +67,7 @@ static double pop(Parser *p){ fprintf(stderr, "stack underflow in the parser\n"); return NAN; } -/* printf("pop\n"); fflush(stdout); */ +//printf("pop\n"); fflush(stdout); return p->stack[ --p->stack_index ]; } @@ -91,7 +91,7 @@ static void evalPrimary(Parser *p){ p->s= next; return; } - + /* named constants */ for(i=0; p->const_name[i]; i++){ if(strmatch(p->s, p->const_name[i])){ @@ -100,22 +100,22 @@ static void evalPrimary(Parser *p){ return; } } - + p->s= strchr(p->s, '('); if(p->s==NULL){ fprintf(stderr, "Parser: missing ( in \"%s\"\n", next); return; } - p->s++; /* "(" */ + p->s++; // "(" evalExpression(p); d= pop(p); - p->s++; /* ")" or "," */ + p->s++; // ")" or "," if(p->s[-1]== ','){ evalExpression(p); d2= pop(p); - p->s++; /* ")" */ + p->s++; // ")" } - + if( strmatch(next, "sinh" ) ) d= sinh(d); else if( strmatch(next, "cosh" ) ) d= cosh(d); else if( strmatch(next, "tanh" ) ) d= tanh(d); @@ -132,8 +132,8 @@ static void evalPrimary(Parser *p){ else if( strmatch(next, "gt" ) ) d= d > d2 ? 1.0 : 0.0; else if( strmatch(next, "lt" ) ) d= d > d2 ? 0.0 : 1.0; else if( strmatch(next, "eq" ) ) d= d == d2 ? 1.0 : 0.0; -/* else if( strmatch(next, "l1" ) ) d= 1 + d2*(d - 1); */ -/* else if( strmatch(next, "sq01" ) ) d= (d >= 0.0 && d <=1.0) ? 1.0 : 0.0; */ +// else if( strmatch(next, "l1" ) ) d= 1 + d2*(d - 1); +// else if( strmatch(next, "sq01" ) ) d= (d >= 0.0 && d <=1.0) ? 1.0 : 0.0; else{ int error=1; for(i=0; p->func1_name && p->func1_name[i]; i++){ @@ -157,23 +157,23 @@ static void evalPrimary(Parser *p){ return; } } - + if(p->s[-1]!= ')'){ fprintf(stderr, "Parser: missing ) in \"%s\"\n", next); return; } push(p, d); -} - +} + static void evalPow(Parser *p){ int neg= 0; if(p->s[0]=='+') p->s++; - - if(p->s[0]=='-'){ + + if(p->s[0]=='-'){ neg= 1; p->s++; } - + if(p->s[0]=='('){ p->s++;; evalExpression(p); @@ -184,7 +184,7 @@ static void evalPow(Parser *p){ }else{ evalPrimary(p); } - + if(neg) push(p, -pop(p)); } @@ -229,11 +229,11 @@ static void evalExpression(Parser *p){ } double ff_eval(char *s, double *const_value, char **const_name, - double (**func1)(void *, double), char **func1_name, + double (**func1)(void *, double), char **func1_name, double (**func2)(void *, double, double), char **func2_name, void *opaque){ Parser p; - + p.stack_index=0; p.s= s; p.const_value= const_value; @@ -243,7 +243,7 @@ double ff_eval(char *s, double *const_value, char **const_name, p.func2 = func2; p.func2_name = func2_name; p.opaque = opaque; - + evalExpression(&p); return pop(&p); } diff --git a/src/libffmpeg/libavcodec/fft.c b/src/libffmpeg/libavcodec/fft.c new file mode 100644 index 000000000..0f5181ac3 --- /dev/null +++ b/src/libffmpeg/libavcodec/fft.c @@ -0,0 +1,229 @@ +/* + * FFT/IFFT transforms + * Copyright (c) 2002 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "dsputil.h" + +/** + * The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is + * done + */ +int fft_init(FFTContext *s, int nbits, int inverse) +{ + int i, j, m, n; + float alpha, c1, s1, s2; + + s->nbits = nbits; + n = 1 << nbits; + + s->exptab = av_malloc((n / 2) * sizeof(FFTComplex)); + if (!s->exptab) + goto fail; + s->revtab = av_malloc(n * sizeof(uint16_t)); + if (!s->revtab) + goto fail; + s->inverse = inverse; + + s2 = inverse ? 1.0 : -1.0; + + for(i=0;i<(n/2);i++) { + alpha = 2 * M_PI * (float)i / (float)n; + c1 = cos(alpha); + s1 = sin(alpha) * s2; + s->exptab[i].re = c1; + s->exptab[i].im = s1; + } + s->fft_calc = fft_calc_c; + s->exptab1 = NULL; + + /* compute constant table for HAVE_SSE version */ +#if defined(HAVE_MMX) && 0 + if (mm_flags & MM_SSE) { + int np, nblocks, np2, l; + FFTComplex *q; + + np = 1 << nbits; + nblocks = np >> 3; + np2 = np >> 1; + s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); + if (!s->exptab1) + goto fail; + q = s->exptab1; + do { + for(l = 0; l < np2; l += 2 * nblocks) { + *q++ = s->exptab[l]; + *q++ = s->exptab[l + nblocks]; + + q->re = -s->exptab[l].im; + q->im = s->exptab[l].re; + q++; + q->re = -s->exptab[l + nblocks].im; + q->im = s->exptab[l + nblocks].re; + q++; + } + nblocks = nblocks >> 1; + } while (nblocks != 0); + av_freep(&s->exptab); + } +#endif + + /* compute bit reverse table */ + + for(i=0;i> j) & 1) << (nbits-j-1); + } + s->revtab[i]=m; + } + return 0; + fail: + av_freep(&s->revtab); + av_freep(&s->exptab); + av_freep(&s->exptab1); + return -1; +} + +/* butter fly op */ +#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \ +{\ + FFTSample ax, ay, bx, by;\ + bx=pre1;\ + by=pim1;\ + ax=qre1;\ + ay=qim1;\ + pre = (bx + ax);\ + pim = (by + ay);\ + qre = (bx - ax);\ + qim = (by - ay);\ +} + +#define MUL16(a,b) ((a) * (b)) + +#define CMUL(pre, pim, are, aim, bre, bim) \ +{\ + pre = (MUL16(are, bre) - MUL16(aim, bim));\ + pim = (MUL16(are, bim) + MUL16(bre, aim));\ +} + +/** + * Do a complex FFT with the parameters defined in fft_init(). The + * input data must be permuted before with s->revtab table. No + * 1.0/sqrt(n) normalization is done. + */ +void fft_calc_c(FFTContext *s, FFTComplex *z) +{ + int ln = s->nbits; + int j, np, np2; + int nblocks, nloops; + register FFTComplex *p, *q; + FFTComplex *exptab = s->exptab; + int l; + FFTSample tmp_re, tmp_im; + + np = 1 << ln; + + /* pass 0 */ + + p=&z[0]; + j=(np >> 1); + do { + BF(p[0].re, p[0].im, p[1].re, p[1].im, + p[0].re, p[0].im, p[1].re, p[1].im); + p+=2; + } while (--j != 0); + + /* pass 1 */ + + + p=&z[0]; + j=np >> 2; + if (s->inverse) { + do { + BF(p[0].re, p[0].im, p[2].re, p[2].im, + p[0].re, p[0].im, p[2].re, p[2].im); + BF(p[1].re, p[1].im, p[3].re, p[3].im, + p[1].re, p[1].im, -p[3].im, p[3].re); + p+=4; + } while (--j != 0); + } else { + do { + BF(p[0].re, p[0].im, p[2].re, p[2].im, + p[0].re, p[0].im, p[2].re, p[2].im); + BF(p[1].re, p[1].im, p[3].re, p[3].im, + p[1].re, p[1].im, p[3].im, -p[3].re); + p+=4; + } while (--j != 0); + } + /* pass 2 .. ln-1 */ + + nblocks = np >> 3; + nloops = 1 << 2; + np2 = np >> 1; + do { + p = z; + q = z + nloops; + for (j = 0; j < nblocks; ++j) { + BF(p->re, p->im, q->re, q->im, + p->re, p->im, q->re, q->im); + + p++; + q++; + for(l = nblocks; l < np2; l += nblocks) { + CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im); + BF(p->re, p->im, q->re, q->im, + p->re, p->im, tmp_re, tmp_im); + p++; + q++; + } + + p += nloops; + q += nloops; + } + nblocks = nblocks >> 1; + nloops = nloops << 1; + } while (nblocks != 0); +} + +/** + * Do the permutation needed BEFORE calling fft_calc() + */ +void fft_permute(FFTContext *s, FFTComplex *z) +{ + int j, k, np; + FFTComplex tmp; + const uint16_t *revtab = s->revtab; + + /* reverse */ + np = 1 << s->nbits; + for(j=0;jrevtab); + av_freep(&s->exptab); + av_freep(&s->exptab1); +} + diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c index 97327ffc4..8fb9c06b7 100644 --- a/src/libffmpeg/libavcodec/h263.c +++ b/src/libffmpeg/libavcodec/h263.c @@ -19,11 +19,11 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * ac prediction encoding, b-frame support, error resilience, optimizations, - * qpel decoding, gmc decoding, interlaced decoding, + * qpel decoding, gmc decoding, interlaced decoding, * by Michael Niedermayer */ - -/* #define DEBUG */ + +//#define DEBUG #include "common.h" #include "dsputil.h" #include "avcodec.h" @@ -31,11 +31,14 @@ #include "h263data.h" #include "mpeg4data.h" -/* rounded divison & shift */ -#define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b)) +//#undef NDEBUG +//#include +#if 1 #define PRINT_MB_TYPE(a) {} -/* #define PRINT_MB_TYPE(a) printf(a) */ +#else +#define PRINT_MB_TYPE(a) printf(a) +#endif #define INTRA_MCBPC_VLC_BITS 6 #define INTER_MCBPC_VLC_BITS 6 @@ -51,7 +54,7 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, static void h263_encode_motion(MpegEncContext * s, int val, int fcode); static void h263p_encode_umotion(MpegEncContext * s, int val); static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, - int n, int dc, UINT8 *scan_table, + int n, int dc, UINT8 *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb); static int h263_decode_motion(MpegEncContext * s, int pred, int fcode); static int h263p_decode_umotion(MpegEncContext * s, int pred); @@ -64,6 +67,8 @@ static int h263_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr); static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n, int dir); static void mpeg4_decode_sprite_trajectory(MpegEncContext * s); +static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr); + extern UINT32 inverse[256]; @@ -78,7 +83,7 @@ static UINT8 uni_mpeg4_intra_rl_len [64*64*2*2]; static UINT32 uni_mpeg4_inter_rl_bits[64*64*2*2]; static UINT8 uni_mpeg4_inter_rl_len [64*64*2*2]; #define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128 + (run)*256 + (level)) -/* #define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128*64 + (run) + (level)*64) */ +//#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128*64 + (run) + (level)*64) /* mpeg4 inter @@ -120,7 +125,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) s->gob_number = 0; put_bits(&s->pb, 22, 0x20); /* PSC */ - put_bits(&s->pb, 8, (((INT64)s->picture_number * 30 * FRAME_RATE_BASE) / + put_bits(&s->pb, 8, (((INT64)s->picture_number * 30 * FRAME_RATE_BASE) / s->frame_rate) & 0xff); put_bits(&s->pb, 1, 1); /* marker */ @@ -128,7 +133,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) put_bits(&s->pb, 1, 0); /* split screen off */ put_bits(&s->pb, 1, 0); /* camera off */ put_bits(&s->pb, 1, 0); /* freeze picture release off */ - + format = h263_get_picture_format(s->width, s->height); if (!s->h263_plus) { /* H.263v1 */ @@ -152,7 +157,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) put_bits(&s->pb,3,6); /* Custom Source Format */ else put_bits(&s->pb, 3, format); - + put_bits(&s->pb,1,0); /* Custom PCF: off */ s->umvplus = (s->pict_type == P_TYPE) && s->unrestricted_mv; put_bits(&s->pb, 1, s->umvplus); /* Unrestricted Motion Vector */ @@ -167,9 +172,9 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) put_bits(&s->pb,1,0); /* Modified Quantization: off */ put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */ put_bits(&s->pb,3,0); /* Reserved */ - + put_bits(&s->pb, 3, s->pict_type == P_TYPE); - + put_bits(&s->pb,1,0); /* Reference Picture Resampling: off */ put_bits(&s->pb,1,0); /* Reduced-Resolution Update: off */ if (s->pict_type == I_TYPE) @@ -179,13 +184,13 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) put_bits(&s->pb,1,s->no_rounding); /* Rounding Type */ put_bits(&s->pb,2,0); /* Reserved */ put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */ - + /* This should be here if PLUSPTYPE */ put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */ - + if (format == 7) { /* Custom Picture Format (CPFMT) */ - + if (s->aspect_ratio_info) put_bits(&s->pb,4,s->aspect_ratio_info); else @@ -199,7 +204,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) put_bits(&s->pb, 8, s->aspected_height); } } - + /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */ if (s->umvplus) put_bits(&s->pb,1,1); /* Limited according tables of Annex D */ @@ -209,7 +214,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) put_bits(&s->pb, 1, 0); /* no PEI */ if(s->h263_aic){ - s->y_dc_scale_table= + s->y_dc_scale_table= s->c_dc_scale_table= h263_aic_dc_scale_table; }else{ s->y_dc_scale_table= @@ -217,51 +222,25 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) } } +/** + * Encodes a group of blocks header. + */ int h263_encode_gob_header(MpegEncContext * s, int mb_line) { - int pdif=0; - - /* Check to see if we need to put a new GBSC */ - /* for RTP packetization */ - if (s->rtp_mode) { - pdif = pbBufPtr(&s->pb) - s->ptr_lastgob; - if (pdif >= s->rtp_payload_size) { - /* Bad luck, packet must be cut before */ - align_put_bits(&s->pb); - flush_put_bits(&s->pb); - /* Call the RTP callback to send the last GOB */ - if (s->rtp_callback) { - pdif = pbBufPtr(&s->pb) - s->ptr_lastgob; - s->rtp_callback(s->ptr_lastgob, pdif, s->gob_number); - } - s->ptr_lastgob = pbBufPtr(&s->pb); - put_bits(&s->pb, 17, 1); /* GBSC */ - s->gob_number = mb_line / s->gob_index; - put_bits(&s->pb, 5, s->gob_number); /* GN */ - put_bits(&s->pb, 2, s->pict_type == I_TYPE); /* GFID */ - put_bits(&s->pb, 5, s->qscale); /* GQUANT */ - /* fprintf(stderr,"\nGOB: %2d size: %d", s->gob_number - 1, pdif); */ - return pdif; - } else if (pdif + s->mb_line_avgsize >= s->rtp_payload_size) { - /* Cut the packet before we can't */ align_put_bits(&s->pb); flush_put_bits(&s->pb); /* Call the RTP callback to send the last GOB */ if (s->rtp_callback) { - pdif = pbBufPtr(&s->pb) - s->ptr_lastgob; + int pdif = pbBufPtr(&s->pb) - s->ptr_lastgob; s->rtp_callback(s->ptr_lastgob, pdif, s->gob_number); } - s->ptr_lastgob = pbBufPtr(&s->pb); put_bits(&s->pb, 17, 1); /* GBSC */ s->gob_number = mb_line / s->gob_index; put_bits(&s->pb, 5, s->gob_number); /* GN */ put_bits(&s->pb, 2, s->pict_type == I_TYPE); /* GFID */ put_bits(&s->pb, 5, s->qscale); /* GQUANT */ - /* fprintf(stderr,"\nGOB: %2d size: %d", s->gob_number - 1, pdif); */ - return pdif; - } - } - return 0; + //fprintf(stderr,"\nGOB: %2d size: %d", s->gob_number - 1, pdif); + return 0; } static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int dir[6]) @@ -275,27 +254,114 @@ static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int d ac_val = s->ac_val[0][0] + s->block_index[n] * 16; ac_val1= ac_val; if(dir[n]){ + const int xy= s->mb_x + s->mb_y*s->mb_width - s->mb_width; + /* top prediction */ ac_val-= s->block_wrap[n]*16; - for(i=1; i<8; i++){ - const int level= block[n][block_permute_op(i )]; - score0+= ABS(level); - score1+= ABS(level - ac_val[i+8]); - ac_val1[i ]= block[n][block_permute_op(i<<3)]; - ac_val1[i+8]= level; + if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){ + /* same qscale */ + for(i=1; i<8; i++){ + const int level= block[n][s->idct_permutation[i ]]; + score0+= ABS(level); + score1+= ABS(level - ac_val[i+8]); + ac_val1[i ]= block[n][s->idct_permutation[i<<3]]; + ac_val1[i+8]= level; + } + }else{ + /* different qscale, we must rescale */ + for(i=1; i<8; i++){ + const int level= block[n][s->idct_permutation[i ]]; + score0+= ABS(level); + score1+= ABS(level - ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale)); + ac_val1[i ]= block[n][s->idct_permutation[i<<3]]; + ac_val1[i+8]= level; + } } }else{ + const int xy= s->mb_x-1 + s->mb_y*s->mb_width; + /* left prediction */ ac_val-= 16; - for(i=1; i<8; i++){ - const int level= block[n][block_permute_op(i<<3)]; - score0+= ABS(level); - score1+= ABS(level - ac_val[i]); - ac_val1[i ]= level; - ac_val1[i+8]= block[n][block_permute_op(i )]; + if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){ + /* same qscale */ + for(i=1; i<8; i++){ + const int level= block[n][s->idct_permutation[i<<3]]; + score0+= ABS(level); + score1+= ABS(level - ac_val[i]); + ac_val1[i ]= level; + ac_val1[i+8]= block[n][s->idct_permutation[i ]]; + } + }else{ + /* different qscale, we must rescale */ + for(i=1; i<8; i++){ + const int level= block[n][s->idct_permutation[i<<3]]; + score0+= ABS(level); + score1+= ABS(level - ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale)); + ac_val1[i ]= level; + ac_val1[i+8]= block[n][s->idct_permutation[i ]]; + } } } } - return score0 > score1 ? 1 : 0; + return score0 > score1 ? 1 : 0; +} + +/** + * modify qscale so that encoding is acually possible in h263 (limit difference to -2..2) + */ +void ff_clean_h263_qscales(MpegEncContext *s){ + int i; + + for(i=1; imb_num; i++){ + if(s->qscale_table[i] - s->qscale_table[i-1] >2) + s->qscale_table[i]= s->qscale_table[i-1]+2; + } + for(i=s->mb_num-2; i>=0; i--){ + if(s->qscale_table[i] - s->qscale_table[i+1] >2) + s->qscale_table[i]= s->qscale_table[i+1]+2; + } +} + +/** + * modify mb_type & qscale so that encoding is acually possible in mpeg4 + */ +void ff_clean_mpeg4_qscales(MpegEncContext *s){ + int i; + + ff_clean_h263_qscales(s); + + for(i=1; imb_num; i++){ + if(s->qscale_table[i] != s->qscale_table[i-1] && (s->mb_type[i]&MB_TYPE_INTER4V)){ + s->mb_type[i]&= ~MB_TYPE_INTER4V; + s->mb_type[i]|= MB_TYPE_INTER; + } + } + + if(s->pict_type== B_TYPE){ + int odd=0; + /* ok, come on, this isnt funny anymore, theres more code for handling this mpeg4 mess than + for the actual adaptive quantization */ + + for(i=0; imb_num; i++){ + odd += s->qscale_table[i]&1; + } + + if(2*odd > s->mb_num) odd=1; + else odd=0; + + for(i=0; imb_num; i++){ + if((s->qscale_table[i]&1) != odd) + s->qscale_table[i]++; + if(s->qscale_table[i] > 31) + s->qscale_table[i]= 31; + } + + for(i=1; imb_num; i++){ + if(s->qscale_table[i] != s->qscale_table[i-1] && (s->mb_type[i]&MB_TYPE_DIRECT)){ + s->mb_type[i]&= ~MB_TYPE_DIRECT; + s->mb_type[i]|= MB_TYPE_BIDIR; + } + } + } } void mpeg4_encode_mb(MpegEncContext * s, @@ -308,8 +374,9 @@ void mpeg4_encode_mb(MpegEncContext * s, PutBitContext * const tex_pb = s->data_partitioning && s->pict_type!=B_TYPE ? &s->tex_pb : &s->pb; PutBitContext * const dc_pb = s->data_partitioning && s->pict_type!=I_TYPE ? &s->pb2 : &s->pb; const int interleaved_stats= (s->flags&CODEC_FLAG_PASS1) && !s->data_partitioning ? 1 : 0; - - /* printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); */ + const int dquant_code[5]= {1,0,9,2,3}; + + // printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); if (!s->mb_intra) { /* compute cbp */ int cbp = 0; @@ -321,27 +388,34 @@ void mpeg4_encode_mb(MpegEncContext * s, if(s->pict_type==B_TYPE){ static const int mb_type_table[8]= {-1, 2, 3, 1,-1,-1,-1, 0}; /* convert from mv_dir to type */ int mb_type= mb_type_table[s->mv_dir]; - + if(s->mb_x==0){ - s->last_mv[0][0][0]= - s->last_mv[0][0][1]= - s->last_mv[1][0][0]= + s->last_mv[0][0][0]= + s->last_mv[0][0][1]= + s->last_mv[1][0][0]= s->last_mv[1][0][1]= 0; } + + assert(s->dquant>=-2 && s->dquant<=2); + assert((s->dquant&1)==0); + assert(mb_type>=0); /* nothing to do if this MB was skiped in the next P Frame */ - if(s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]){ + if(s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]){ //FIXME avoid DCT & ... s->skip_count++; - s->mv[0][0][0]= - s->mv[0][0][1]= - s->mv[1][0][0]= + s->mv[0][0][0]= + s->mv[0][0][1]= + s->mv[1][0][0]= s->mv[1][0][1]= 0; - s->mv_dir= MV_DIR_FORWARD; /* doesnt matter */ + s->mv_dir= MV_DIR_FORWARD; //doesnt matter + s->qscale -= s->dquant; return; } if ((cbp | motion_x | motion_y | mb_type) ==0) { /* direct MB with MV={0,0} */ + assert(s->dquant==0); + put_bits(&s->pb, 1, 1); /* mb not coded modb1=1 */ if(interleaved_stats){ @@ -352,12 +426,24 @@ void mpeg4_encode_mb(MpegEncContext * s, return; } put_bits(&s->pb, 1, 0); /* mb coded modb1=0 */ - put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ /* FIXME merge */ - put_bits(&s->pb, mb_type+1, 1); /* this table is so simple that we dont need it :) */ + put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ //FIXME merge + put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we dont need it :) if(cbp) put_bits(&s->pb, 6, cbp); - - if(cbp && mb_type) - put_bits(&s->pb, 1, 0); /* no q-scale change */ + + if(cbp && mb_type){ + if(s->dquant) + put_bits(&s->pb, 2, (s->dquant>>2)+3); + else + put_bits(&s->pb, 1, 0); + }else + s->qscale -= s->dquant; + + if(!s->progressive_sequence){ + if(cbp) + put_bits(&s->pb, 1, s->interlaced_dct); + if(mb_type) // not diect mode + put_bits(&s->pb, 1, 0); // no interlaced ME yet + } if(interleaved_stats){ bits= get_bit_count(&s->pb); @@ -369,7 +455,7 @@ void mpeg4_encode_mb(MpegEncContext * s, { case 0: /* direct */ h263_encode_motion(s, motion_x, 1); - h263_encode_motion(s, motion_y, 1); + h263_encode_motion(s, motion_y, 1); s->b_count++; s->f_count++; break; @@ -412,7 +498,7 @@ void mpeg4_encode_mb(MpegEncContext * s, /* encode each block */ for (i = 0; i < 6; i++) { - mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, &s->pb); + mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, &s->pb); } if(interleaved_stats){ @@ -421,8 +507,8 @@ void mpeg4_encode_mb(MpegEncContext * s, s->last_bits=bits; } }else{ /* s->pict_type==B_TYPE */ - if ((cbp | motion_x | motion_y) == 0 && s->mv_type==MV_TYPE_16X16) { - /* check if the B frames can skip it too, as we must skip it if we skip here + if ((cbp | motion_x | motion_y | s->dquant) == 0 && s->mv_type==MV_TYPE_16X16) { + /* check if the B frames can skip it too, as we must skip it if we skip here why didnt they just compress the skip-mb bits instead of reusing them ?! */ if(s->max_b_frames>0){ int i; @@ -436,7 +522,7 @@ void mpeg4_encode_mb(MpegEncContext * s, offset= x + y*s->linesize; p_pic= s->new_picture[0] + offset; - + s->mb_skiped=1; for(i=0; imax_b_frames; i++){ uint8_t *b_pic; @@ -446,13 +532,13 @@ void mpeg4_encode_mb(MpegEncContext * s, b_pic= s->coded_order[i+1].picture[0] + offset; diff= pix_abs16x16(p_pic, b_pic, s->linesize); - if(diff>s->qscale*70){ /* FIXME check that 70 is optimal */ + if(diff>s->qscale*70){ //FIXME check that 70 is optimal s->mb_skiped=0; break; } } }else - s->mb_skiped=1; + s->mb_skiped=1; if(s->mb_skiped==1){ /* skip macroblock */ @@ -470,13 +556,23 @@ void mpeg4_encode_mb(MpegEncContext * s, put_bits(&s->pb, 1, 0); /* mb coded */ if(s->mv_type==MV_TYPE_16X16){ cbpc = cbp & 3; + if(s->dquant) cbpc+= 8; put_bits(&s->pb, inter_MCBPC_bits[cbpc], inter_MCBPC_code[cbpc]); + cbpy = cbp >> 2; cbpy ^= 0xf; put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + if(s->dquant) + put_bits(pb2, 2, dquant_code[s->dquant+2]); + if(!s->progressive_sequence){ + if(cbp) + put_bits(pb2, 1, s->interlaced_dct); + put_bits(pb2, 1, 0); // no interlaced ME yet + } + if(interleaved_stats){ bits= get_bit_count(&s->pb); s->misc_bits+= bits - s->last_bits; @@ -485,7 +581,7 @@ void mpeg4_encode_mb(MpegEncContext * s, /* motion vectors: 16x16 mode */ h263_pred_motion(s, 0, &pred_x, &pred_y); - + h263_encode_motion(s, motion_x - pred_x, s->f_code); h263_encode_motion(s, motion_y - pred_y, s->f_code); }else{ @@ -497,6 +593,11 @@ void mpeg4_encode_mb(MpegEncContext * s, cbpy ^= 0xf; put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + if(!s->progressive_sequence){ + if(cbp) + put_bits(pb2, 1, s->interlaced_dct); + } + if(interleaved_stats){ bits= get_bit_count(&s->pb); s->misc_bits+= bits - s->last_bits; @@ -512,7 +613,7 @@ void mpeg4_encode_mb(MpegEncContext * s, } } - if(interleaved_stats){ + if(interleaved_stats){ bits= get_bit_count(&s->pb); s->mv_bits+= bits - s->last_bits; s->last_bits=bits; @@ -520,7 +621,7 @@ void mpeg4_encode_mb(MpegEncContext * s, /* encode each block */ for (i = 0; i < 6; i++) { - mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, tex_pb); + mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, tex_pb); } if(interleaved_stats){ @@ -532,8 +633,8 @@ void mpeg4_encode_mb(MpegEncContext * s, } } else { int cbp; - int dc_diff[6]; /* dc values with the dc prediction subtracted */ - int dir[6]; /* prediction direction */ + int dc_diff[6]; //dc values with the dc prediction subtracted + int dir[6]; //prediction direction int zigzag_last_index[6]; UINT8 *scan_table[6]; @@ -557,10 +658,10 @@ void mpeg4_encode_mb(MpegEncContext * s, int last_index; mpeg4_inv_pred_ac(s, block[i], i, dir[i]); - if (dir[i]==0) st = ff_alternate_vertical_scan; /* left */ - else st = ff_alternate_horizontal_scan; /* top */ + if (dir[i]==0) st = s->intra_v_scantable.permutated; /* left */ + else st = s->intra_h_scantable.permutated; /* top */ - for(last_index=63; last_index>=0; last_index--) /* FIXME optimize */ + for(last_index=63; last_index>=0; last_index--) //FIXME optimize if(block[i][st[last_index]]) break; zigzag_last_index[i]= s->block_last_index[i]; s->block_last_index[i]= last_index; @@ -568,7 +669,7 @@ void mpeg4_encode_mb(MpegEncContext * s, } }else{ for(i=0; i<6; i++) - scan_table[i]= zigzag_direct; + scan_table[i]= s->intra_scantable.permutated; } /* compute cbp */ @@ -580,10 +681,12 @@ void mpeg4_encode_mb(MpegEncContext * s, cbpc = cbp & 3; if (s->pict_type == I_TYPE) { + if(s->dquant) cbpc+=4; put_bits(&s->pb, intra_MCBPC_bits[cbpc], intra_MCBPC_code[cbpc]); } else { + if(s->dquant) cbpc+=8; put_bits(&s->pb, 1, 0); /* mb coded */ put_bits(&s->pb, inter_MCBPC_bits[cbpc + 4], @@ -592,6 +695,12 @@ void mpeg4_encode_mb(MpegEncContext * s, put_bits(pb2, 1, s->ac_pred); cbpy = cbp >> 2; put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + if(s->dquant) + put_bits(dc_pb, 2, dquant_code[s->dquant+2]); + + if(!s->progressive_sequence){ + put_bits(dc_pb, 1, s->interlaced_dct); + } if(interleaved_stats){ bits= get_bit_count(&s->pb); @@ -614,17 +723,17 @@ void mpeg4_encode_mb(MpegEncContext * s, /* restore ac coeffs & last_index stuff if we messed them up with the prediction */ if(s->ac_pred){ for(i=0; i<6; i++){ - int j; + int j; INT16 *ac_val; ac_val = s->ac_val[0][0] + s->block_index[i] * 16; if(dir[i]){ - for(j=1; j<8; j++) - block[i][block_permute_op(j )]= ac_val[j+8]; + for(j=1; j<8; j++) + block[i][s->idct_permutation[j ]]= ac_val[j+8]; }else{ - for(j=1; j<8; j++) - block[i][block_permute_op(j<<3)]= ac_val[j ]; + for(j=1; j<8; j++) + block[i][s->idct_permutation[j<<3]]= ac_val[j ]; } s->block_last_index[i]= zigzag_last_index[i]; } @@ -640,8 +749,9 @@ void h263_encode_mb(MpegEncContext * s, INT16 pred_dc; INT16 rec_intradc[6]; UINT16 *dc_ptr[6]; - - /* printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); */ + const int dquant_code[5]= {1,0,9,2,3}; + + //printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); if (!s->mb_intra) { /* compute cbp */ cbp = 0; @@ -649,24 +759,27 @@ void h263_encode_mb(MpegEncContext * s, if (s->block_last_index[i] >= 0) cbp |= 1 << (5 - i); } - if ((cbp | motion_x | motion_y) == 0) { + if ((cbp | motion_x | motion_y | s->dquant) == 0) { /* skip macroblock */ put_bits(&s->pb, 1, 1); return; } put_bits(&s->pb, 1, 0); /* mb coded */ cbpc = cbp & 3; + if(s->dquant) cbpc+= 8; put_bits(&s->pb, inter_MCBPC_bits[cbpc], inter_MCBPC_code[cbpc]); cbpy = cbp >> 2; cbpy ^= 0xf; put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + if(s->dquant) + put_bits(&s->pb, 2, dquant_code[s->dquant+2]); /* motion vectors: 16x16 mode only now */ h263_pred_motion(s, 0, &pred_x, &pred_y); - - if (!s->umvplus) { + + if (!s->umvplus) { h263_encode_motion(s, motion_x - pred_x, s->f_code); h263_encode_motion(s, motion_y - pred_y, s->f_code); } @@ -679,13 +792,13 @@ void h263_encode_mb(MpegEncContext * s, } } else { int li = s->h263_aic ? 0 : 1; - + cbp = 0; for(i=0; i<6; i++) { /* Predict DC */ if (s->h263_aic && s->mb_intra) { INT16 level = block[i][0]; - + pred_dc = h263_pred_dc(s, i, &dc_ptr[i]); level -= pred_dc; /* Quant */ @@ -693,7 +806,7 @@ void h263_encode_mb(MpegEncContext * s, level = (level + (s->qscale >> 1))/(s->y_dc_scale); else level = (level - (s->qscale >> 1))/(s->y_dc_scale); - + /* AIC can change CBP */ if (level == 0 && s->block_last_index[i] == 0) s->block_last_index[i] = -1; @@ -701,20 +814,20 @@ void h263_encode_mb(MpegEncContext * s, level = -127; else if (level > 127) level = 127; - + block[i][0] = level; - /* Reconstruction */ + /* Reconstruction */ rec_intradc[i] = (s->y_dc_scale*level) + pred_dc; /* Oddify */ rec_intradc[i] |= 1; - /* if ((rec_intradc[i] % 2) == 0) */ - /* rec_intradc[i]++; */ + //if ((rec_intradc[i] % 2) == 0) + // rec_intradc[i]++; /* Clipping */ if (rec_intradc[i] < 0) rec_intradc[i] = 0; else if (rec_intradc[i] > 2047) rec_intradc[i] = 2047; - + /* Update AC/DC tables */ *dc_ptr[i] = rec_intradc[i]; } @@ -725,10 +838,12 @@ void h263_encode_mb(MpegEncContext * s, cbpc = cbp & 3; if (s->pict_type == I_TYPE) { + if(s->dquant) cbpc+=4; put_bits(&s->pb, intra_MCBPC_bits[cbpc], intra_MCBPC_code[cbpc]); } else { + if(s->dquant) cbpc+=8; put_bits(&s->pb, 1, 0); /* mb coded */ put_bits(&s->pb, inter_MCBPC_bits[cbpc + 4], @@ -740,16 +855,18 @@ void h263_encode_mb(MpegEncContext * s, } cbpy = cbp >> 2; put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + if(s->dquant) + put_bits(&s->pb, 2, dquant_code[s->dquant+2]); } for(i=0; i<6; i++) { /* encode each block */ h263_encode_block(s, block[i], i); - + /* Update INTRADC for decoding */ if (s->h263_aic && s->mb_intra) { block[i][0] = rec_intradc[i]; - + } } } @@ -776,11 +893,11 @@ static int h263_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr) scale = s->c_dc_scale; } /* B C - * A X + * A X */ a = dc_val[(x - 1) + (y) * wrap]; c = dc_val[(x) + (y - 1) * wrap]; - + /* No prediction outside GOB boundary */ if (s->first_slice_line && ((n < 2) || (n > 3))) c = 1024; @@ -792,9 +909,9 @@ static int h263_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr) pred_dc = a; else pred_dc = c; - + /* we assume pred is positive */ - /* pred_dc = (pred_dc + (scale >> 1)) / scale; */ + //pred_dc = (pred_dc + (scale >> 1)) / scale; *dc_val_ptr = &dc_val[x + y * wrap]; return pred_dc; } @@ -821,16 +938,16 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n) ac_val = s->ac_val[n - 4 + 1][0]; scale = s->c_dc_scale; } - + ac_val += ((y) * wrap + (x)) * 16; ac_val1 = ac_val; - + /* B C - * A X + * A X */ a = dc_val[(x - 1) + (y) * wrap]; c = dc_val[(x) + (y - 1) * wrap]; - + /* No prediction outside GOB boundary */ if (s->first_slice_line && ((n < 2) || (n > 3))) c = 1024; @@ -841,7 +958,7 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n) if (a != 1024) { ac_val -= 16; for(i=1;i<8;i++) { - block[block_permute_op(i*8)] += ac_val[i]; + block[s->idct_permutation[i<<3]] += ac_val[i]; } pred_dc = a; } @@ -850,7 +967,7 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n) if (c != 1024) { ac_val -= 16 * wrap; for(i=1;i<8;i++) { - block[block_permute_op(i)] += ac_val[i + 8]; + block[s->idct_permutation[i ]] += ac_val[i + 8]; } pred_dc = c; } @@ -864,27 +981,27 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n) else pred_dc = c; } - + /* we assume pred is positive */ block[0]=block[0]*scale + pred_dc; - + if (block[0] < 0) block[0] = 0; else if (!(block[0] & 1)) block[0]++; - + /* Update AC/DC tables */ dc_val[(x) + (y) * wrap] = block[0]; - + /* left copy */ for(i=1;i<8;i++) - ac_val1[i] = block[block_permute_op(i * 8)]; + ac_val1[i ] = block[s->idct_permutation[i<<3]]; /* top copy */ for(i=1;i<8;i++) - ac_val1[8 + i] = block[block_permute_op(i)]; + ac_val1[8 + i] = block[s->idct_permutation[i ]]; } -INT16 *h263_pred_motion(MpegEncContext * s, int block, +INT16 *h263_pred_motion(MpegEncContext * s, int block, int *px, int *py) { int xy, wrap; @@ -898,13 +1015,13 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block, A = s->motion_val[xy - 1]; /* special case for first (slice) line */ - if ((s->mb_y == 0 || s->first_slice_line) && block<3) { - /* we cant just change some MVs to simulate that as we need them for the B frames (and ME) */ - /* and if we ever support non rectangular objects than we need to do a few ifs here anyway :( */ - if(block==0){ /* most common case */ - if(s->mb_x == s->resync_mb_x){ /* rare */ + if (s->first_slice_line && block<3) { + // we cant just change some MVs to simulate that as we need them for the B frames (and ME) + // and if we ever support non rectangular objects than we need to do a few ifs here anyway :( + if(block==0){ //most common case + if(s->mb_x == s->resync_mb_x){ //rare *px= *py = 0; - }else if(s->mb_x + 1 == s->resync_mb_x){ /* rare */ + }else if(s->mb_x + 1 == s->resync_mb_x){ //rare C = s->motion_val[xy + off[block] - wrap]; if(s->mb_x==0){ *px = C[0]; @@ -918,7 +1035,7 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block, *py = A[1]; } }else if(block==1){ - if(s->mb_x + 1 == s->resync_mb_x){ /* rare */ + if(s->mb_x + 1 == s->resync_mb_x){ //rare C = s->motion_val[xy + off[block] - wrap]; *px = mid_pred(A[0], 0, C[0]); *py = mid_pred(A[1], 0, C[1]); @@ -929,9 +1046,9 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block, }else{ /* block==2*/ B = s->motion_val[xy - wrap]; C = s->motion_val[xy + off[block] - wrap]; - if(s->mb_x == s->resync_mb_x) /* rare */ + if(s->mb_x == s->resync_mb_x) //rare A[0]=A[1]=0; - + *px = mid_pred(A[0], B[0], C[0]); *py = mid_pred(A[1], B[1], C[1]); } @@ -946,7 +1063,7 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block, static void h263_encode_motion(MpegEncContext * s, int val, int f_code) { - int range, l, m, bit_size, sign, code, bits; + int range, l, bit_size, sign, code, bits; if (val == 0) { /* zero vector */ @@ -957,40 +1074,51 @@ static void h263_encode_motion(MpegEncContext * s, int val, int f_code) range = 1 << bit_size; /* modulo encoding */ l = range * 32; - m = 2 * l; +#if 1 + val+= l; + val&= 2*l-1; + val-= l; + sign = val>>31; + val= (val^sign)-sign; + sign&=1; +#else if (val < -l) { - val += m; + val += 2*l; } else if (val >= l) { - val -= m; + val -= 2*l; } + assert(val>=-l && val= 0) { sign = 0; } else { val = -val; sign = 1; } +#endif val--; code = (val >> bit_size) + 1; bits = val & (range - 1); - put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign); + put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign); if (bit_size > 0) { put_bits(&s->pb, bit_size, bits); } } + } /* Encode MV differences on H.263+ with Unrestricted MV mode */ static void h263p_encode_umotion(MpegEncContext * s, int val) { - short sval = 0; + short sval = 0; short i = 0; short n_bits = 0; short temp_val; int code = 0; int tcode; - + if ( val == 0) put_bits(&s->pb, 1, 1); else if (val == 1) @@ -998,15 +1126,15 @@ static void h263p_encode_umotion(MpegEncContext * s, int val) else if (val == -1) put_bits(&s->pb, 3, 2); else { - + sval = ((val < 0) ? (short)(-val):(short)val); temp_val = sval; - + while (temp_val != 0) { temp_val = temp_val >> 1; n_bits++; } - + i = n_bits - 1; while (i > 0) { tcode = (sval & (1 << (i-1))) >> (i-1); @@ -1016,7 +1144,7 @@ static void h263p_encode_umotion(MpegEncContext * s, int val) } code = ((code << 1) | (val < 0)) << 1; put_bits(&s->pb, (2*n_bits)+1, code); - /* printf("\nVal = %d\tCode = %d", sval, code); */ + //printf("\nVal = %d\tCode = %d", sval, code); } } @@ -1036,7 +1164,7 @@ static void init_mv_penalty_and_fcode(MpegEncContext *s) range = 1 << bit_size; val=mv; - if (val < 0) + if (val < 0) val = -val; val--; code = (val >> bit_size) + 1; @@ -1099,7 +1227,7 @@ static void init_uni_dc_tab(void) /* chrominance */ uni_code= DCtab_chrom[size][0]; uni_len = DCtab_chrom[size][1]; - + if (size > 0) { uni_code<<=size; uni_code|=l; uni_len+=size; @@ -1116,7 +1244,7 @@ static void init_uni_dc_tab(void) static void init_uni_mpeg4_rl_tab(RLTable *rl, UINT32 *bits_tab, UINT8 *len_tab){ int slevel, run, last; - + assert(MAX_LEVEL >= 64); assert(MAX_RUN >= 63); @@ -1129,15 +1257,15 @@ static void init_uni_mpeg4_rl_tab(RLTable *rl, UINT32 *bits_tab, UINT8 *len_tab) int sign= slevel < 0 ? 1 : 0; int bits, len, code; int level1, run1; - + len_tab[index]= 100; - + /* ESC0 */ code= get_rl_index(rl, last, run, level); bits= rl->table_vlc[code][0]; len= rl->table_vlc[code][1]; bits=bits*2+sign; len++; - + if(code!=rl->n && len < len_tab[index]){ bits_tab[index]= bits; len_tab [index]= len; @@ -1146,7 +1274,7 @@ static void init_uni_mpeg4_rl_tab(RLTable *rl, UINT32 *bits_tab, UINT8 *len_tab) /* ESC1 */ bits= rl->table_vlc[rl->n][0]; len= rl->table_vlc[rl->n][1]; - bits=bits*2; len++; /* esc1 */ + bits=bits*2; len++; //esc1 level1= level - rl->max_level[last][run]; if(level1>0){ code= get_rl_index(rl, last, run, level1); @@ -1154,18 +1282,18 @@ static void init_uni_mpeg4_rl_tab(RLTable *rl, UINT32 *bits_tab, UINT8 *len_tab) len += rl->table_vlc[code][1]; bits += rl->table_vlc[code][0]; bits=bits*2+sign; len++; - + if(code!=rl->n && len < len_tab[index]){ bits_tab[index]= bits; len_tab [index]= len; } } -#endif +#endif #if 1 /* ESC2 */ bits= rl->table_vlc[rl->n][0]; len= rl->table_vlc[rl->n][1]; - bits=bits*4+2; len+=2; /* esc2 */ + bits=bits*4+2; len+=2; //esc2 run1 = run - rl->max_run[last][level] - 1; if(run1>=0){ code= get_rl_index(rl, last, run1, level); @@ -1173,23 +1301,23 @@ static void init_uni_mpeg4_rl_tab(RLTable *rl, UINT32 *bits_tab, UINT8 *len_tab) len += rl->table_vlc[code][1]; bits += rl->table_vlc[code][0]; bits=bits*2+sign; len++; - + if(code!=rl->n && len < len_tab[index]){ bits_tab[index]= bits; len_tab [index]= len; } } -#endif - /* ESC3 */ +#endif + /* ESC3 */ bits= rl->table_vlc[rl->n][0]; len = rl->table_vlc[rl->n][1]; - bits=bits*4+3; len+=2; /* esc3 */ + bits=bits*4+3; len+=2; //esc3 bits=bits*2+last; len++; bits=bits*64+run; len+=6; - bits=bits*2+1; len++; /* marker */ + bits=bits*2+1; len++; //marker bits=bits*4096+(slevel&0xfff); len+=12; - bits=bits*2+1; len++; /* marker */ - + bits=bits*2+1; len++; //marker + if(len < len_tab[index]){ bits_tab[index]= bits; len_tab [index]= len; @@ -1211,15 +1339,15 @@ void h263_encode_init(MpegEncContext *s) init_rl(&rl_inter); init_rl(&rl_intra); init_rl(&rl_intra_aic); - + init_uni_mpeg4_rl_tab(&rl_intra, uni_mpeg4_intra_rl_bits, uni_mpeg4_intra_rl_len); init_uni_mpeg4_rl_tab(&rl_inter, uni_mpeg4_inter_rl_bits, uni_mpeg4_inter_rl_len); init_mv_penalty_and_fcode(s); } - s->mv_penalty= mv_penalty; /* FIXME exact table for msmpeg4 & h263p */ - - /* use fcodes >1 only for mpeg4 & h263 & h263p FIXME */ + s->mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p + + // use fcodes >1 only for mpeg4 & h263 & h263p FIXME switch(s->codec_id){ case CODEC_ID_MPEG4: s->fcode_tab= fcode_tab; @@ -1231,8 +1359,8 @@ void h263_encode_init(MpegEncContext *s) s->min_qcoeff= -128; s->max_qcoeff= 127; break; - /* Note for mpeg4 & h263 the dc-scale table will be set per frame as needed later */ - default: /* nothing needed default table allready set in mpegvideo.c */ + //Note for mpeg4 & h263 the dc-scale table will be set per frame as needed later + default: //nothing needed default table allready set in mpegvideo.c s->min_qcoeff= -128; s->max_qcoeff= 127; s->y_dc_scale_table= @@ -1240,11 +1368,11 @@ void h263_encode_init(MpegEncContext *s) } if(s->mpeg_quant){ - s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); /* (a + x*3/8)/x */ + s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x s->inter_quant_bias= 0; }else{ s->intra_quant_bias=0; - s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); /* (a - x/4)/x */ + s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x } } @@ -1277,12 +1405,12 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n) if (s->h263_aic && s->mb_intra) rl = &rl_intra_aic; } - + /* AC coefs */ last_index = s->block_last_index[n]; last_non_zero = i - 1; for (; i <= last_index; i++) { - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; level = block[j]; if (level) { run = i - last_non_zero - 1; @@ -1308,7 +1436,9 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n) } /***************************************************/ - +/** + * add mpeg4 stuffing bits (01...1) + */ void ff_mpeg4_stuffing(PutBitContext * pbc) { int length; @@ -1321,13 +1451,13 @@ void ff_mpeg4_stuffing(PutBitContext * pbc) void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){ int time_div, time_mod; - if(s->pict_type==I_TYPE){ /* we will encode a vol header */ + if(s->pict_type==I_TYPE){ //we will encode a vol header s->time_increment_resolution= s->frame_rate/ff_gcd(s->frame_rate, FRAME_RATE_BASE); if(s->time_increment_resolution>=256*256) s->time_increment_resolution= 256*128; s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1; } - + if(s->avctx->pts) s->time= (s->avctx->pts*s->time_increment_resolution + 500*1000)/(1000*1000); else @@ -1347,7 +1477,7 @@ void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){ static void mpeg4_encode_vol_header(MpegEncContext * s) { - int vo_ver_id=1; /* must be 2 if we want GMC or q-pel */ + int vo_ver_id=1; //must be 2 if we want GMC or q-pel char buf[255]; s->vo_type= s->has_b_frames ? CORE_VO_TYPE : SIMPLE_VO_TYPE; @@ -1362,7 +1492,7 @@ static void mpeg4_encode_vol_header(MpegEncContext * s) put_bits(&s->pb, 1, 1); /* is obj layer id= yes */ put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */ put_bits(&s->pb, 3, 1); /* is obj layer priority */ - if(s->aspect_ratio_info) + if(s->aspect_ratio_info) put_bits(&s->pb, 4, s->aspect_ratio_info);/* aspect ratio info */ else put_bits(&s->pb, 4, 1); /* aspect ratio info= sqare pixel */ @@ -1383,7 +1513,7 @@ static void mpeg4_encode_vol_header(MpegEncContext * s) put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */ put_bits(&s->pb, 1, 1); /* marker bit */ - + put_bits(&s->pb, 16, s->time_increment_resolution); if (s->time_increment_bits < 1) s->time_increment_bits = 1; @@ -1394,14 +1524,16 @@ static void mpeg4_encode_vol_header(MpegEncContext * s) put_bits(&s->pb, 1, 1); /* marker bit */ put_bits(&s->pb, 13, s->height); /* vol height */ put_bits(&s->pb, 1, 1); /* marker bit */ - put_bits(&s->pb, 1, 0); /* interlace */ + put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1); put_bits(&s->pb, 1, 1); /* obmc disable */ if (vo_ver_id == 1) { put_bits(&s->pb, 1, s->vol_sprite_usage=0); /* sprite enable */ }else{ /* vo_ver_id == 2 */ put_bits(&s->pb, 2, s->vol_sprite_usage=0); /* sprite enable */ } - put_bits(&s->pb, 1, 0); /* not 8 bit */ + + s->quant_precision=5; + put_bits(&s->pb, 1, 0); /* not 8 bit == false */ put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/ if(s->mpeg_quant) put_bits(&s->pb, 2, 0); /* no custom matrixes */ @@ -1420,7 +1552,7 @@ static void mpeg4_encode_vol_header(MpegEncContext * s) put_bits(&s->pb, 1, 0); /* reduced res vop */ } put_bits(&s->pb, 1, 0); /* scalability */ - + ff_mpeg4_stuffing(&s->pb); /* user data */ @@ -1438,15 +1570,17 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) { int time_incr; int time_div, time_mod; - + if(s->pict_type==I_TYPE){ s->no_rounding=0; if(picture_number==0 || !s->strict_std_compliance) mpeg4_encode_vol_header(s); } + + s->partitioned_frame= s->data_partitioning && s->pict_type!=B_TYPE; -/* printf("num:%d rate:%d base:%d\n", s->picture_number, s->frame_rate, FRAME_RATE_BASE); */ - +//printf("num:%d rate:%d base:%d\n", s->picture_number, s->frame_rate, FRAME_RATE_BASE); + put_bits(&s->pb, 16, 0); /* vop header */ put_bits(&s->pb, 16, 0x1B6); /* vop header */ put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */ @@ -1456,20 +1590,24 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) time_incr= time_div - s->last_time_base; while(time_incr--) put_bits(&s->pb, 1, 1); - + put_bits(&s->pb, 1, 0); put_bits(&s->pb, 1, 1); /* marker */ put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */ put_bits(&s->pb, 1, 1); /* marker */ put_bits(&s->pb, 1, 1); /* vop coded */ - if ( s->pict_type == P_TYPE + if ( s->pict_type == P_TYPE || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) { s->no_rounding ^= 1; put_bits(&s->pb, 1, s->no_rounding); /* rounding type */ } put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */ - /* FIXME sprite stuff */ + if(!s->progressive_sequence){ + put_bits(&s->pb, 1, s->top_field_first); + put_bits(&s->pb, 1, s->alternate_scan); + } + //FIXME sprite stuff put_bits(&s->pb, 5, s->qscale); @@ -1477,21 +1615,31 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) put_bits(&s->pb, 3, s->f_code); /* fcode_for */ if (s->pict_type == B_TYPE) put_bits(&s->pb, 3, s->b_code); /* fcode_back */ - /* printf("****frame %d\n", picture_number); */ + // printf("****frame %d\n", picture_number); - s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; /* FIXME add short header support */ + s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; //FIXME add short header support s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table; s->h_edge_pos= s->width; s->v_edge_pos= s->height; } -static void h263_dc_scale(MpegEncContext * s) +/** + * change qscale by given dquant and update qscale dependant variables. + */ +static void change_qscale(MpegEncContext * s, int dquant) { + s->qscale += dquant; + + if (s->qscale < 1) + s->qscale = 1; + else if (s->qscale > 31) + s->qscale = 31; + s->y_dc_scale= s->y_dc_scale_table[ s->qscale ]; s->c_dc_scale= s->c_dc_scale_table[ s->qscale ]; } -inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr) +static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr) { int a, b, c, wrap, pred, scale; UINT16 *dc_val; @@ -1507,12 +1655,22 @@ inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int dc_val = s->dc_val[0] + s->block_index[n]; /* B C - * A X + * A X */ a = dc_val[ - 1]; b = dc_val[ - 1 - wrap]; c = dc_val[ - wrap]; + /* outside slice handling (we cant do that by memset as we need the dc for error resilience) */ + if(s->first_slice_line && n!=3){ + if(n!=2) b=c= 1024; + if(n!=1 && s->mb_x == s->resync_mb_x) b=a= 1024; + } + if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1){ + if(n==0 || n==4 || n==5) + b=1024; + } + if (abs(a - b) < abs(b - c)) { pred = c; *dir_ptr = 1; /* top */ @@ -1552,16 +1710,16 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, const int xy= s->mb_x-1 + s->mb_y*s->mb_width; /* left prediction */ ac_val -= 16; - + if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){ /* same qscale */ for(i=1;i<8;i++) { - block[block_permute_op(i*8)] += ac_val[i]; + block[s->idct_permutation[i<<3]] += ac_val[i]; } }else{ /* different qscale, we must rescale */ for(i=1;i<8;i++) { - block[block_permute_op(i*8)] += ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale); + block[s->idct_permutation[i<<3]] += ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale); } } } else { @@ -1572,23 +1730,23 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){ /* same qscale */ for(i=1;i<8;i++) { - block[block_permute_op(i)] += ac_val[i + 8]; + block[s->idct_permutation[i]] += ac_val[i + 8]; } }else{ /* different qscale, we must rescale */ for(i=1;i<8;i++) { - block[block_permute_op(i)] += ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale); + block[s->idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale); } } } } /* left copy */ for(i=1;i<8;i++) - ac_val1[i] = block[block_permute_op(i * 8)]; + ac_val1[i ] = block[s->idct_permutation[i<<3]]; /* top copy */ for(i=1;i<8;i++) - ac_val1[8 + i] = block[block_permute_op(i)]; + ac_val1[8 + i] = block[s->idct_permutation[i ]]; } @@ -1600,18 +1758,36 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n, /* find prediction */ ac_val = s->ac_val[0][0] + s->block_index[n] * 16; - + if (dir == 0) { + const int xy= s->mb_x-1 + s->mb_y*s->mb_width; /* left prediction */ ac_val -= 16; - for(i=1;i<8;i++) { - block[block_permute_op(i*8)] -= ac_val[i]; + if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){ + /* same qscale */ + for(i=1;i<8;i++) { + block[s->idct_permutation[i<<3]] -= ac_val[i]; + } + }else{ + /* different qscale, we must rescale */ + for(i=1;i<8;i++) { + block[s->idct_permutation[i<<3]] -= ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale); + } } } else { + const int xy= s->mb_x + s->mb_y*s->mb_width - s->mb_width; /* top prediction */ ac_val -= 16 * s->block_wrap[n]; - for(i=1;i<8;i++) { - block[block_permute_op(i)] -= ac_val[i + 8]; + if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){ + /* same qscale */ + for(i=1;i<8;i++) { + block[s->idct_permutation[i]] -= ac_val[i + 8]; + } + }else{ + /* different qscale, we must rescale */ + for(i=1;i<8;i++) { + block[s->idct_permutation[i]] -= ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale); + } } } } @@ -1619,7 +1795,7 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n, static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n) { #if 1 -/* if(level<-255 || level>255) printf("dc overflow\n"); */ +// if(level<-255 || level>255) printf("dc overflow\n"); level+=256; if (n < 4) { /* luminance */ @@ -1657,17 +1833,19 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n) #endif } -static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, +static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, UINT8 *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb) { - int last, i, last_non_zero, sign; - int code; + int i, last_non_zero; +#if 0 //variables for the outcommented version + int code, sign, last; +#endif const RLTable *rl; UINT32 *bits_tab; UINT8 *len_tab; const int last_index = s->block_last_index[n]; - if (s->mb_intra) { /* Note gcc (3.2.1 at least) will optimize this away */ + if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away /* mpeg4 based DC predictor */ mpeg4_encode_dc(dc_pb, intra_dc, n); if(last_index<1) return; @@ -1694,7 +1872,7 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n if((level&(~127)) == 0){ const int index= UNI_MPEG4_ENC_INDEX(0, run, level); put_bits(ac_pb, len_tab[index], bits_tab[index]); - }else{ /* ESC3 */ + }else{ //ESC3 put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(0<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1); } last_non_zero = i; @@ -1707,7 +1885,7 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n if((level&(~127)) == 0){ const int index= UNI_MPEG4_ENC_INDEX(1, run, level); put_bits(ac_pb, len_tab[index], bits_tab[index]); - }else{ /* ESC3 */ + }else{ //ESC3 put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(1<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1); } } @@ -1729,7 +1907,7 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n if (code == rl->n) { int level1, run1; level1 = level - rl->max_level[last][run]; - if (level1 < 1) + if (level1 < 1) goto esc2; code = get_rl_index(rl, last, run, level1); if (code == rl->n) { @@ -1825,35 +2003,35 @@ void init_rl(RLTable *rl) void init_vlc_rl(RLTable *rl) { int i, q; - - init_vlc(&rl->vlc, 9, rl->n + 1, + + init_vlc(&rl->vlc, 9, rl->n + 1, &rl->table_vlc[0][1], 4, 2, &rl->table_vlc[0][0], 4, 2); - + for(q=0; q<32; q++){ int qmul= q*2; int qadd= (q-1)|1; - + if(q==0){ qmul=1; qadd=0; } - + rl->rl_vlc[q]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM)); for(i=0; ivlc.table_size; i++){ int code= rl->vlc.table[i][0]; int len = rl->vlc.table[i][1]; int level, run; - - if(len==0){ /* illegal code */ + + if(len==0){ // illegal code run= 66; level= MAX_LEVEL; - }else if(len<0){ /* more bits needed */ + }else if(len<0){ //more bits needed run= 0; level= code; }else{ - if(code==rl->n){ /* esc */ + if(code==rl->n){ //esc run= 66; level= 0; }else{ @@ -1879,10 +2057,10 @@ void h263_decode_init_vlc(MpegEncContext *s) if (!done) { done = 1; - init_vlc(&intra_MCBPC_vlc, INTRA_MCBPC_VLC_BITS, 8, + init_vlc(&intra_MCBPC_vlc, INTRA_MCBPC_VLC_BITS, 8, intra_MCBPC_bits, 1, 1, intra_MCBPC_code, 1, 1); - init_vlc(&inter_MCBPC_vlc, INTER_MCBPC_VLC_BITS, 25, + init_vlc(&inter_MCBPC_vlc, INTER_MCBPC_VLC_BITS, 25, inter_MCBPC_bits, 1, 1, inter_MCBPC_code, 1, 1); init_vlc(&cbpy_vlc, CBPY_VLC_BITS, 16, @@ -1910,33 +2088,58 @@ void h263_decode_init_vlc(MpegEncContext *s) &mb_type_b_tab[0][1], 2, 1, &mb_type_b_tab[0][0], 2, 1); } +} - s->progressive_sequence=1; /* set to most likely for the case of incomplete headers */ +/** + * Get the GOB height based on picture height. + */ +int ff_h263_get_gob_height(MpegEncContext *s){ + if (s->height <= 400) + return 1; + else if (s->height <= 800) + return 2; + else + return 4; } -int h263_decode_gob_header(MpegEncContext *s) +/** + * decodes the group of blocks header. + * @return <0 if an error occured + */ +static int h263_decode_gob_header(MpegEncContext *s) { unsigned int val, gfid; - + int left; + /* Check for GOB Start Code */ val = show_bits(&s->gb, 16); - if (val == 0) { + if(val) + return -1; + /* We have a GBSC probably with GSTUFF */ - skip_bits(&s->gb, 16); /* Drop the zeros */ - while (get_bits1(&s->gb) == 0); /* Seek the '1' bit */ + skip_bits(&s->gb, 16); /* Drop the zeros */ + left= s->gb.size*8 - get_bits_count(&s->gb); + //MN: we must check the bits left or we might end in a infinite loop (or segfault) + for(;left>13; left--){ + if(get_bits1(&s->gb)) break; /* Seek the '1' bit */ + } + if(left<=13) + return -1; + #ifdef DEBUG - fprintf(stderr,"\nGOB Start Code at MB %d\n", (s->mb_y * s->mb_width) + s->mb_x); + fprintf(stderr,"\nGOB Start Code at MB %d\n", (s->mb_y * s->mb_width) + s->mb_x); #endif - s->gob_number = get_bits(&s->gb, 5); /* GN */ - gfid = get_bits(&s->gb, 2); /* GFID */ - s->qscale = get_bits(&s->gb, 5); /* GQUANT */ + s->gob_number = get_bits(&s->gb, 5); /* GN */ + gfid = get_bits(&s->gb, 2); /* GFID */ + s->qscale = get_bits(&s->gb, 5); /* GQUANT */ + if(s->qscale==0) + return -1; + s->mb_x= 0; + s->mb_y= s->gob_index* s->gob_number; #ifdef DEBUG - fprintf(stderr, "\nGN: %u GFID: %u Quant: %u\n", s->gob_number, gfid, s->qscale); + fprintf(stderr, "\nGN: %u GFID: %u Quant: %u\n", s->gob_number, gfid, s->qscale); #endif - return 1; - } return 0; - } static inline void memsetw(short *tab, int val, int n) @@ -1964,7 +2167,7 @@ void ff_mpeg4_merge_partitions(MpegEncContext *s) s->i_tex_bits+= tex_pb_len; }else{ put_bits(&s->pb, 17, MOTION_MARKER); - s->misc_bits+=17 + pb2_len;; + s->misc_bits+=17 + pb2_len; s->mv_bits+= bits - s->last_bits; s->p_tex_bits+= tex_pb_len; } @@ -1977,130 +2180,157 @@ void ff_mpeg4_merge_partitions(MpegEncContext *s) s->last_bits= get_bit_count(&s->pb); } +int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s){ + switch(s->pict_type){ + case I_TYPE: + return 16; + case P_TYPE: + case S_TYPE: + return s->f_code+15; + case B_TYPE: + return MAX(MAX(s->f_code, s->b_code)+15, 17); + default: + return -1; + } +} + void ff_mpeg4_encode_video_packet_header(MpegEncContext *s) { int mb_num_bits= av_log2(s->mb_num - 1) + 1; ff_mpeg4_stuffing(&s->pb); - if(s->pict_type==I_TYPE) - put_bits(&s->pb, 16, 0); - else if(s->pict_type==B_TYPE) - put_bits(&s->pb, MAX(MAX(s->f_code, s->b_code)+15, 17), 0); - else /* S/P_TYPE */ - put_bits(&s->pb, s->f_code+15, 0); + put_bits(&s->pb, ff_mpeg4_get_video_packet_prefix_length(s), 0); put_bits(&s->pb, 1, 1); - + put_bits(&s->pb, mb_num_bits, s->mb_x + s->mb_y*s->mb_width); - put_bits(&s->pb, 5, s->qscale); + put_bits(&s->pb, s->quant_precision, s->qscale); put_bits(&s->pb, 1, 0); /* no HEC */ } /** - * decodes the next video packet and sets s->next_qscale - * returns mb_num of the next packet or <0 if something went wrong + * check if the next stuff is a resync marker or the end. + * @return 0 if not + */ +static inline int mpeg4_is_resync(MpegEncContext *s){ + const int bits_count= get_bits_count(&s->gb); + + if(s->workaround_bugs&FF_BUG_NO_PADDING){ + return 0; + } + + if(bits_count + 8 >= s->gb.size*8){ + int v= show_bits(&s->gb, 8); + v|= 0x7F >> (7-(bits_count&7)); + + if(v==0x7F) + return 1; + }else{ + if(show_bits(&s->gb, 16) == ff_mpeg4_resync_prefix[bits_count&7]){ + int len; + GetBitContext gb= s->gb; + + skip_bits(&s->gb, 1); + align_get_bits(&s->gb); + + for(len=0; len<32; len++){ + if(get_bits1(&s->gb)) break; + } + + s->gb= gb; + + if(len>=ff_mpeg4_get_video_packet_prefix_length(s)) + return 1; + } + } + return 0; +} + +/** + * decodes the next video packet. + * @return <0 if something went wrong */ -static int decode_video_packet_header(MpegEncContext *s, GetBitContext *gb) +static int mpeg4_decode_video_packet_header(MpegEncContext *s) { - int bits; int mb_num_bits= av_log2(s->mb_num - 1) + 1; - int header_extension=0, mb_num; -/* printf("%X\n", show_bits(&gb, 24)); */ -/* printf("parse_video_packet_header\n"); */ -/* if(show_aligned_bits(gb, 1, 16) != 0) return -1; */ - + int header_extension=0, mb_num, len; + /* is there enough space left for a video packet + header */ - if( get_bits_count(gb) > gb->size*8-20) return -1; + if( get_bits_count(&s->gb) > s->gb.size*8-20) return -1; -/* printf("resync at %d %d\n", s->mb_x, s->mb_y); */ -/* skip_bits(gb, 1); */ -/* align_get_bits(gb); */ - if(get_bits(gb, 16)!=0){ - printf("internal error while decoding video packet header\n"); + for(len=0; len<32; len++){ + if(get_bits1(&s->gb)) break; } -/* printf("%X\n", show_bits(gb, 24)); */ - bits=0; - while(!get_bits1(gb) && bits<30) bits++; - if((s->pict_type == P_TYPE || s->pict_type == S_TYPE) && bits != s->f_code-1){ - printf("marker does not match f_code (is: %d should be: %d pos: %d end %d x: %d y: %d)\n", - bits+1, s->f_code, get_bits_count(gb), gb->size*8, s->mb_x, s->mb_y); - return -1; - }else if(s->pict_type == I_TYPE && bits != 0){ - printf("marker too long\n"); - return -1; - }else if(s->pict_type == B_TYPE && bits != MAX(MAX(s->f_code, s->b_code)-1, 1)){ - printf("marker does not match f/b_code\n"); + if(len!=ff_mpeg4_get_video_packet_prefix_length(s)){ + printf("marker does not match f_code\n"); return -1; } -/* printf("%X\n", show_bits(gb, 24)); */ - + if(s->shape != RECT_SHAPE){ - header_extension= get_bits1(gb); - /* FIXME more stuff here */ + header_extension= get_bits1(&s->gb); + //FIXME more stuff here } - mb_num= get_bits(gb, mb_num_bits); - if(mb_num < s->mb_x + s->mb_y*s->mb_width || mb_num>=s->mb_num){ - fprintf(stderr, "illegal mb_num in video packet (%d %d) \n", mb_num, s->mb_x + s->mb_y*s->mb_width); + mb_num= get_bits(&s->gb, mb_num_bits); + if(mb_num>=s->mb_num){ + fprintf(stderr, "illegal mb_num in video packet (%d %d) \n", mb_num, s->mb_num); return -1; } + s->mb_x= mb_num % s->mb_width; + s->mb_y= mb_num / s->mb_width; if(s->shape != BIN_ONLY_SHAPE){ - s->next_resync_qscale= get_bits(gb, 5); - if(s->next_resync_qscale==0) - s->next_resync_qscale= s->qscale; - if(s->next_resync_qscale==0){ - fprintf(stderr, "qscale==0\n"); - return -1; - } + int qscale= get_bits(&s->gb, s->quant_precision); + if(qscale) + s->qscale= qscale; } if(s->shape == RECT_SHAPE){ - header_extension= get_bits1(gb); + header_extension= get_bits1(&s->gb); } if(header_extension){ int time_increment; int time_incr=0; - while (get_bits1(gb) != 0) + while (get_bits1(&s->gb) != 0) time_incr++; - check_marker(gb, "before time_increment in video packed header"); - time_increment= get_bits(gb, s->time_increment_bits); - check_marker(gb, "before vop_coding_type in video packed header"); - - skip_bits(gb, 2); /* vop coding type */ - /* FIXME not rect stuff here */ + check_marker(&s->gb, "before time_increment in video packed header"); + time_increment= get_bits(&s->gb, s->time_increment_bits); + check_marker(&s->gb, "before vop_coding_type in video packed header"); + + skip_bits(&s->gb, 2); /* vop coding type */ + //FIXME not rect stuff here if(s->shape != BIN_ONLY_SHAPE){ - skip_bits(gb, 3); /* intra dc vlc threshold */ - + skip_bits(&s->gb, 3); /* intra dc vlc threshold */ +//FIXME dont just ignore everything if(s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE && s->num_sprite_warping_points){ mpeg4_decode_sprite_trajectory(s); fprintf(stderr, "untested\n"); } - /* FIXME reduced res stuff here */ - + //FIXME reduced res stuff here + if (s->pict_type != I_TYPE) { - int f_code = get_bits(gb, 3); /* fcode_for */ + int f_code = get_bits(&s->gb, 3); /* fcode_for */ if(f_code==0){ printf("Error, video packet header damaged (f_code=0)\n"); } } if (s->pict_type == B_TYPE) { - int b_code = get_bits(gb, 3); + int b_code = get_bits(&s->gb, 3); if(b_code==0){ printf("Error, video packet header damaged (b_code=0)\n"); } - } + } } } - /* FIXME new-pred stuff */ - -/* printf("parse ok %d %d %d %d\n", mb_num, s->mb_x + s->mb_y*s->mb_width, get_bits_count(gb), get_bits_count(&s->gb)); */ + //FIXME new-pred stuff + +//printf("parse ok %d %d %d %d\n", mb_num, s->mb_x + s->mb_y*s->mb_width, get_bits_count(gb), get_bits_count(&s->gb)); - return mb_num; + return 0; } void ff_mpeg4_clean_buffers(MpegEncContext *s) @@ -2112,10 +2342,12 @@ void ff_mpeg4_clean_buffers(MpegEncContext *s) c_wrap= s->block_wrap[4]; c_xy= s->mb_y*c_wrap + s->mb_x; +#if 0 /* clean DC */ memsetw(s->dc_val[0] + l_xy, 1024, l_wrap*2+1); memsetw(s->dc_val[1] + c_xy, 1024, c_wrap+1); memsetw(s->dc_val[2] + c_xy, 1024, c_wrap+1); +#endif /* clean AC */ memset(s->ac_val[0] + l_xy, 0, (l_wrap*2+1)*16*sizeof(INT16)); @@ -2123,156 +2355,184 @@ void ff_mpeg4_clean_buffers(MpegEncContext *s) memset(s->ac_val[2] + c_xy, 0, (c_wrap +1)*16*sizeof(INT16)); /* clean MV */ - /* we cant clear the MVs as they might be needed by a b frame */ -/* memset(s->motion_val + l_xy, 0, (l_wrap*2+1)*2*sizeof(INT16)); */ -/* memset(s->motion_val, 0, 2*sizeof(INT16)*(2 + s->mb_width*2)*(2 + s->mb_height*2)); */ + // we cant clear the MVs as they might be needed by a b frame +// memset(s->motion_val + l_xy, 0, (l_wrap*2+1)*2*sizeof(INT16)); +// memset(s->motion_val, 0, 2*sizeof(INT16)*(2 + s->mb_width*2)*(2 + s->mb_height*2)); s->last_mv[0][0][0]= s->last_mv[0][0][1]= s->last_mv[1][0][0]= s->last_mv[1][0][1]= 0; } -/* searches for the next resync marker clears ac,dc,mc, and sets s->next_resync_gb, s->mb_num_left */ -int ff_mpeg4_resync(MpegEncContext *s) -{ - GetBitContext gb; +/** + * decodes the group of blocks / video packet header. + * @return <0 if no resync found + */ +int ff_h263_resync(MpegEncContext *s){ + int left, ret; + + if(s->codec_id==CODEC_ID_MPEG4) + skip_bits1(&s->gb); + + align_get_bits(&s->gb); - /* search & parse next resync marker */ + if(show_bits(&s->gb, 16)==0){ + if(s->codec_id==CODEC_ID_MPEG4) + ret= mpeg4_decode_video_packet_header(s); + else + ret= h263_decode_gob_header(s); + if(ret>=0) + return 0; + } + //ok, its not where its supposed to be ... + s->gb= s->last_resync_gb; + align_get_bits(&s->gb); + left= s->gb.size*8 - get_bits_count(&s->gb); + + for(;left>16+1+5+5; left-=8){ + if(show_bits(&s->gb, 16)==0){ + GetBitContext bak= s->gb; + + if(s->codec_id==CODEC_ID_MPEG4) + ret= mpeg4_decode_video_packet_header(s); + else + ret= h263_decode_gob_header(s); + if(ret>=0) + return 0; - gb= s->next_resync_gb; - align_get_bits(&gb); -/* printf("mpeg4_resync %d next:%d \n", get_bits_count(&gb), get_bits_count(&s->next_resync_gb)); */ - for(;;) { - int v= show_bits(&gb, 24); - if( get_bits_count(&gb) >= gb.size*8-24 || v == 1 /* start-code */){ - s->mb_num_left= s->mb_num - s->mb_x - s->mb_y*s->mb_width; -/* printf("mpeg4_resync end\n"); */ - s->gb= s->next_resync_gb; /* continue at the next resync marker */ - return -1; - }else if(v>>8 == 0){ - int next; - s->next_resync_pos= get_bits_count(&gb); + s->gb= bak; + } + skip_bits(&s->gb, 8); + } + + return -1; +} - next= decode_video_packet_header(s, &gb); - if(next >= 0){ - s->mb_num_left= next - s->mb_x - s->mb_y*s->mb_width; - break; +/** + * @param n either 0 for the x component or 1 for y + * @returns the average MV for a GMC MB + */ +static inline int get_amv(MpegEncContext *s, int n){ + int x, y, mb_v, sum, dx, dy, shift; + int len = 1 << (s->f_code + 4); + const int a= s->sprite_warping_accuracy; + + if(s->real_sprite_warping_points==1){ + if(s->divx_version==500 && s->divx_build==413) + sum= s->sprite_offset[0][n] / (1<<(a - s->quarter_sample)); + else + sum= RSHIFT(s->sprite_offset[0][n]<quarter_sample, a); + }else{ + dx= s->sprite_delta[n][0]; + dy= s->sprite_delta[n][1]; + shift= s->sprite_shift[0]; + if(n) dy -= 1<<(shift + a + 1); + else dx -= 1<<(shift + a + 1); + mb_v= s->sprite_offset[0][n] + dx*s->mb_x*16 + dy*s->mb_y*16; + + sum=0; + for(y=0; y<16; y++){ + int v; + + v= mb_v + dy*y; + //XXX FIXME optimize + for(x=0; x<16; x++){ + sum+= v>>shift; + v+= dx; } - - align_get_bits(&gb); } - skip_bits(&gb, 8); + sum /= 256; + sum= RSHIFT(sum<quarter_sample, a); } - s->next_resync_gb=gb; - - return 0; -} -static inline void init_block_index(MpegEncContext *s) -{ - s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2; - s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1) + s->mb_x*2; - s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1 + s->mb_x*2; - s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2) + s->mb_x*2; - s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x; - s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x; -} + if (sum < -len) sum= -len; + else if (sum >= len) sum= len-1; -static inline void update_block_index(MpegEncContext *s) -{ - s->block_index[0]+=2; - s->block_index[1]+=2; - s->block_index[2]+=2; - s->block_index[3]+=2; - s->block_index[4]++; - s->block_index[5]++; + return sum; } /** - * decodes the first & second partition - * returns error type or 0 if no error + * decodes first partition. + * @return number of MBs decoded or <0 if an error occured */ -int ff_mpeg4_decode_partitions(MpegEncContext *s) -{ - static const INT8 quant_tab[4] = { -1, -2, 1, 2 }; +static int mpeg4_decode_partition_a(MpegEncContext *s){ int mb_num; - + static const INT8 quant_tab[4] = { -1, -2, 1, 2 }; + /* decode first partition */ mb_num=0; s->first_slice_line=1; - s->mb_x= s->resync_mb_x; - for(s->mb_y= s->resync_mb_y; mb_num < s->mb_num_left; s->mb_y++){ - init_block_index(s); - for(; mb_num < s->mb_num_left && s->mb_xmb_width; s->mb_x++){ + for(; s->mb_ymb_height; s->mb_y++){ + ff_init_block_index(s); + for(; s->mb_xmb_width; s->mb_x++){ const int xy= s->mb_x + s->mb_y*s->mb_width; int cbpc; int dir=0; - + mb_num++; - update_block_index(s); + ff_update_block_index(s); if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1) s->first_slice_line=0; - + if(s->mb_x==0) PRINT_MB_TYPE("\n"); if(s->pict_type==I_TYPE){ int i; + if(show_bits(&s->gb, 19)==DC_MARKER){ + return mb_num-1; + } + PRINT_MB_TYPE("I"); cbpc = get_vlc2(&s->gb, intra_MCBPC_vlc.table, INTRA_MCBPC_VLC_BITS, 1); if (cbpc < 0){ + fprintf(stderr, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y); - return DECODING_DESYNC; + return -1; } s->cbp_table[xy]= cbpc & 3; s->mb_type[xy]= MB_TYPE_INTRA; s->mb_intra = 1; if(cbpc & 4) { - s->qscale += quant_tab[get_bits(&s->gb, 2)]; - if (s->qscale < 1) - s->qscale = 1; - else if (s->qscale > 31) - s->qscale = 31; - h263_dc_scale(s); + change_qscale(s, quant_tab[get_bits(&s->gb, 2)]); } s->qscale_table[xy]= s->qscale; s->mbintra_table[xy]= 1; for(i=0; i<6; i++){ int dc_pred_dir; - int dc= mpeg4_decode_dc(s, i, &dc_pred_dir); + int dc= mpeg4_decode_dc(s, i, &dc_pred_dir); if(dc < 0){ fprintf(stderr, "DC corrupted at %d %d\n", s->mb_x, s->mb_y); - return DECODING_DESYNC; + return -1; } dir<<=1; if(dc_pred_dir) dir|=1; } s->pred_dir_table[xy]= dir; + + s->error_status_table[xy]= AC_ERROR; }else{ /* P/S_TYPE */ - int mx, my, pred_x, pred_y; + int mx, my, pred_x, pred_y, bits; INT16 * const mot_val= s->motion_val[s->block_index[0]]; const int stride= s->block_wrap[0]*2; - if(get_bits1(&s->gb)){ + bits= show_bits(&s->gb, 17); + if(bits==MOTION_MARKER){ + return mb_num-1; + } + skip_bits1(&s->gb); + if(bits&0x10000){ /* skip mb */ s->mb_type[xy]= MB_TYPE_SKIPED; if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){ - const int a= s->sprite_warping_accuracy; PRINT_MB_TYPE("G"); - if(s->divx_version==500 && s->divx_build==413){ - mx = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample)); - my = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample)); - }else{ - mx = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample); - my = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample); - s->mb_type[xy]= MB_TYPE_GMC | MB_TYPE_SKIPED; - } + mx= get_amv(s, 0); + my= get_amv(s, 1); }else{ PRINT_MB_TYPE("S"); - mx = 0; - my = 0; + mx=my=0; } mot_val[0 ]= mot_val[2 ]= mot_val[0+stride]= mot_val[2+stride]= mx; @@ -2282,29 +2542,31 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s) if(s->mbintra_table[xy]) ff_clean_intra_table_entries(s); + s->error_status_table[xy]= AC_ERROR; continue; } cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2); if (cbpc < 0){ fprintf(stderr, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y); - return DECODING_DESYNC; + return -1; } if (cbpc > 20) cbpc+=3; else if (cbpc == 20) fprintf(stderr, "Stuffing !"); - s->cbp_table[xy]= cbpc&(8+3); /* 8 is dquant */ - + s->cbp_table[xy]= cbpc&(8+3); //8 is dquant + s->mb_intra = ((cbpc & 4) != 0); - + if(s->mb_intra){ PRINT_MB_TYPE("I"); s->mbintra_table[xy]= 1; s->mb_type[xy]= MB_TYPE_INTRA; - mot_val[0 ]= mot_val[2 ]= + mot_val[0 ]= mot_val[2 ]= mot_val[0+stride]= mot_val[2+stride]= 0; mot_val[1 ]= mot_val[3 ]= mot_val[1+stride]= mot_val[3+stride]= 0; + s->error_status_table[xy]= DC_ERROR|AC_ERROR; }else{ if(s->mbintra_table[xy]) ff_clean_intra_table_entries(s); @@ -2312,38 +2574,26 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s) if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0) s->mcsel= get_bits1(&s->gb); else s->mcsel= 0; - + if ((cbpc & 16) == 0) { PRINT_MB_TYPE("P"); /* 16x16 motion prediction */ s->mb_type[xy]= MB_TYPE_INTER; h263_pred_motion(s, 0, &pred_x, &pred_y); - if(!s->mcsel) - mx = h263_decode_motion(s, pred_x, s->f_code); - else { - const int a= s->sprite_warping_accuracy; - if(s->divx_version==500 && s->divx_build==413){ - mx = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample)); - }else{ - mx = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample); - } - } - if (mx >= 0xffff) - return DECODING_DESYNC; - - if(!s->mcsel) - my = h263_decode_motion(s, pred_y, s->f_code); - else{ - const int a= s->sprite_warping_accuracy; - if(s->divx_version==500 && s->divx_build==413){ - my = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample)); - }else{ - my = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample); - } + if(!s->mcsel){ + mx = h263_decode_motion(s, pred_x, s->f_code); + if (mx >= 0xffff) + return -1; + + my = h263_decode_motion(s, pred_y, s->f_code); + if (my >= 0xffff) + return -1; + } else { + mx = get_amv(s, 0); + my = get_amv(s, 1); } - if (my >= 0xffff) - return DECODING_DESYNC; + mot_val[0 ]= mot_val[2 ] = mot_val[0+stride]= mot_val[2+stride]= mx; mot_val[1 ]= mot_val[3 ]= @@ -2356,83 +2606,85 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s) INT16 *mot_val= h263_pred_motion(s, i, &pred_x, &pred_y); mx = h263_decode_motion(s, pred_x, s->f_code); if (mx >= 0xffff) - return DECODING_DESYNC; - + return -1; + my = h263_decode_motion(s, pred_y, s->f_code); if (my >= 0xffff) - return DECODING_DESYNC; + return -1; mot_val[0] = mx; mot_val[1] = my; } } + s->error_status_table[xy]= AC_ERROR; } } } s->mb_x= 0; } - if (s->pict_type==I_TYPE && get_bits(&s->gb, 19)!=DC_MARKER ) s->decoding_error= DECODING_DESYNC; - else if(s->pict_type!=I_TYPE && get_bits(&s->gb, 17)!=MOTION_MARKER) s->decoding_error= DECODING_DESYNC; - if(s->decoding_error== DECODING_DESYNC){ - fprintf(stderr, "marker missing after first partition at %d %d\n", s->mb_x, s->mb_y); - return DECODING_DESYNC; - } + return mb_num; +} + +/** + * decode second partition. + * @return <0 if an error occured + */ +static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){ + int mb_num=0; + static const INT8 quant_tab[4] = { -1, -2, 1, 2 }; - /* decode second partition */ - mb_num=0; s->mb_x= s->resync_mb_x; - for(s->mb_y= s->resync_mb_y; mb_num < s->mb_num_left; s->mb_y++){ - init_block_index(s); - for(; mb_num < s->mb_num_left && s->mb_xmb_width; s->mb_x++){ + s->first_slice_line=1; + for(s->mb_y= s->resync_mb_y; mb_num < mb_count; s->mb_y++){ + ff_init_block_index(s); + for(; mb_num < mb_count && s->mb_xmb_width; s->mb_x++){ const int xy= s->mb_x + s->mb_y*s->mb_width; mb_num++; - update_block_index(s); - + ff_update_block_index(s); + if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1) + s->first_slice_line=0; + if(s->pict_type==I_TYPE){ int ac_pred= get_bits1(&s->gb); int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1); if(cbpy<0){ fprintf(stderr, "cbpy corrupted at %d %d\n", s->mb_x, s->mb_y); - return DECODING_AC_LOST; + return -1; } - + s->cbp_table[xy]|= cbpy<<2; s->pred_dir_table[xy]|= ac_pred<<7; }else{ /* P || S_TYPE */ - if(s->mb_type[xy]&MB_TYPE_INTRA){ + if(s->mb_type[xy]&MB_TYPE_INTRA){ int dir=0,i; int ac_pred = get_bits1(&s->gb); int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1); if(cbpy<0){ fprintf(stderr, "I cbpy corrupted at %d %d\n", s->mb_x, s->mb_y); - return DECODING_ACDC_LOST; + return -1; } - + if(s->cbp_table[xy] & 8) { - s->qscale += quant_tab[get_bits(&s->gb, 2)]; - if (s->qscale < 1) - s->qscale = 1; - else if (s->qscale > 31) - s->qscale = 31; - h263_dc_scale(s); + change_qscale(s, quant_tab[get_bits(&s->gb, 2)]); } s->qscale_table[xy]= s->qscale; for(i=0; i<6; i++){ int dc_pred_dir; - int dc= mpeg4_decode_dc(s, i, &dc_pred_dir); + int dc= mpeg4_decode_dc(s, i, &dc_pred_dir); if(dc < 0){ fprintf(stderr, "DC corrupted at %d %d\n", s->mb_x, s->mb_y); - return DECODING_ACDC_LOST; + return -1; } dir<<=1; if(dc_pred_dir) dir|=1; } - s->cbp_table[xy]&= 3; /* remove dquant */ + s->cbp_table[xy]&= 3; //remove dquant s->cbp_table[xy]|= cbpy<<2; s->pred_dir_table[xy]= dir | (ac_pred<<7); + s->error_status_table[xy]&= ~DC_ERROR; }else if(s->mb_type[xy]&MB_TYPE_SKIPED){ s->qscale_table[xy]= s->qscale; s->cbp_table[xy]= 0; @@ -2441,68 +2693,85 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s) if(cbpy<0){ fprintf(stderr, "P cbpy corrupted at %d %d\n", s->mb_x, s->mb_y); - return DECODING_ACDC_LOST; + return -1; } - + if(s->cbp_table[xy] & 8) { -/* fprintf(stderr, "dquant\n"); */ - s->qscale += quant_tab[get_bits(&s->gb, 2)]; - if (s->qscale < 1) - s->qscale = 1; - else if (s->qscale > 31) - s->qscale = 31; - h263_dc_scale(s); + change_qscale(s, quant_tab[get_bits(&s->gb, 2)]); } s->qscale_table[xy]= s->qscale; - s->cbp_table[xy]&= 3; /* remove dquant */ + s->cbp_table[xy]&= 3; //remove dquant s->cbp_table[xy]|= (cbpy^0xf)<<2; } } } + if(mb_num >= mb_count) return 0; s->mb_x= 0; } - - return 0; } -static int mpeg4_decode_partitioned_mb(MpegEncContext *s, - DCTELEM block[6][64]) +/** + * decodes the first & second partition + * @return <0 if error (and sets error type in the error_status_table) + */ +int ff_mpeg4_decode_partitions(MpegEncContext *s) { - int cbp, mb_type; - const int xy= s->mb_x + s->mb_y*s->mb_width; - - if(s->mb_x==s->resync_mb_x && s->mb_y==s->resync_mb_y){ /* Note resync_mb_{x,y}==0 at the start */ - int i; - int block_index_backup[6]; - int qscale= s->qscale; - - for(i=0; i<6; i++) block_index_backup[i]= s->block_index[i]; + int mb_num; + + mb_num= mpeg4_decode_partition_a(s); + if(mb_num<0) + return -1; + + if(s->resync_mb_x + s->resync_mb_y*s->mb_width + mb_num > s->mb_num){ + fprintf(stderr, "slice below monitor ...\n"); + return -1; + } - s->decoding_error= ff_mpeg4_decode_partitions(s); + s->mb_num_left= mb_num; + + if(s->pict_type==I_TYPE){ + if(get_bits(&s->gb, 19)!=DC_MARKER){ + fprintf(stderr, "marker missing after first I partition at %d %d\n", s->mb_x, s->mb_y); + return -1; + }else + s->error_status_table[s->mb_x + s->mb_y*s->mb_width-1]|= MV_END|DC_END; + }else{ + if(get_bits(&s->gb, 17)!=MOTION_MARKER){ + fprintf(stderr, "marker missing after first P partition at %d %d\n", s->mb_x, s->mb_y); + return -1; + }else + s->error_status_table[s->mb_x + s->mb_y*s->mb_width-1]|= MV_END; + } + + if( mpeg4_decode_partition_b(s, mb_num) < 0){ + return -1; + } + + s->error_status_table[s->mb_x + s->mb_y*s->mb_width-1]|= DC_END; - for(i=0; i<6; i++) s->block_index[i]= block_index_backup[i]; - s->first_slice_line=1; - s->mb_x= s->resync_mb_x; - s->mb_y= s->resync_mb_y; - s->qscale= qscale; - h263_dc_scale(s); + return 0; +} - if(s->decoding_error==DECODING_DESYNC) return -1; - } +/** + * decode partition C of one MB. + * @return <0 if an error occured + */ +static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64]) +{ + int cbp, mb_type; + const int xy= s->mb_x + s->mb_y*s->mb_width; mb_type= s->mb_type[xy]; - if(s->decoding_error) - cbp=0; - else - cbp = s->cbp_table[xy]; + cbp = s->cbp_table[xy]; - if(s->decoding_error!=DECODING_ACDC_LOST && s->qscale_table[xy] != s->qscale){ + if(s->qscale_table[xy] != s->qscale){ s->qscale= s->qscale_table[xy]; - h263_dc_scale(s); + s->y_dc_scale= s->y_dc_scale_table[ s->qscale ]; + s->c_dc_scale= s->c_dc_scale_table[ s->qscale ]; } - + if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) { int i; for(i=0; i<4; i++){ @@ -2524,114 +2793,78 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, s->mcsel=0; s->mb_skiped = 1; } - return 0; - }else if(s->mb_intra && s->decoding_error!=DECODING_ACDC_LOST){ + }else if(s->mb_intra){ s->ac_pred = s->pred_dir_table[xy]>>7; /* decode each block */ for (i = 0; i < 6; i++) { - int ret= mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, 1); - if(ret==DECODING_AC_LOST){ - fprintf(stderr, "texture corrupted at %d %d (trying to continue with mc/dc only)\n", s->mb_x, s->mb_y); - s->decoding_error=DECODING_AC_LOST; - cbp=0; - }else if(ret==DECODING_ACDC_LOST){ - fprintf(stderr, "dc corrupted at %d %d (trying to continue with mc only)\n", s->mb_x, s->mb_y); - s->decoding_error=DECODING_ACDC_LOST; - break; + if(mpeg4_decode_block(s, block[i], i, cbp&32, 1) < 0){ + fprintf(stderr, "texture corrupted at %d %d\n", s->mb_x, s->mb_y); + return -1; } + cbp+=cbp; } }else if(!s->mb_intra){ -/* s->mcsel= 0; // FIXME do we need to init that */ - +// s->mcsel= 0; //FIXME do we need to init that + s->mv_dir = MV_DIR_FORWARD; if (mb_type&MB_TYPE_INTER4V) { s->mv_type = MV_TYPE_8X8; } else { s->mv_type = MV_TYPE_16X16; } - if(s->decoding_error==0 && cbp){ - /* decode each block */ - for (i = 0; i < 6; i++) { - int ret= mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, 0); - if(ret==DECODING_AC_LOST){ - fprintf(stderr, "texture corrupted at %d %d (trying to continue with mc/dc only)\n", s->mb_x, s->mb_y); - s->decoding_error=DECODING_AC_LOST; - break; - } + /* decode each block */ + for (i = 0; i < 6; i++) { + if(mpeg4_decode_block(s, block[i], i, cbp&32, 0) < 0){ + fprintf(stderr, "texture corrupted at %d %d (trying to continue with mc/dc only)\n", s->mb_x, s->mb_y); + return -1; } + cbp+=cbp; } } } else { /* I-Frame */ int i; s->mb_intra = 1; s->ac_pred = s->pred_dir_table[xy]>>7; - + /* decode each block */ for (i = 0; i < 6; i++) { - int ret= mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, 1); - if(ret==DECODING_AC_LOST){ + if(mpeg4_decode_block(s, block[i], i, cbp&32, 1) < 0){ fprintf(stderr, "texture corrupted at %d %d (trying to continue with dc only)\n", s->mb_x, s->mb_y); - s->decoding_error=DECODING_AC_LOST; - cbp=0; - }else if(ret==DECODING_ACDC_LOST){ - fprintf(stderr, "dc corrupted at %d %d\n", s->mb_x, s->mb_y); return -1; } + cbp+=cbp; } } - return 0; -} -#if 0 -static inline void decode_interlaced_info(MpegEncContext *s, int cbp, int mb_type){ - s->mv_type= 0; - if(!s->progressive_sequence){ - if(cbp || s->mb_intra) - s->interlaced_dct= get_bits1(&s->gb); - - if(!s->mb_intra){ - if( s->pict_type==P_TYPE /* FIXME check that 4MV is forbidden */ - || (s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && !s->mcsel) - || (s->pict_type==B_TYPE && mb_type!=0) ){ + s->error_status_table[xy]&= ~AC_ERROR; - if(get_bits1(&s->gb)){ - s->mv_type= MV_TYPE_FIELD; + /* per-MB end of slice check */ - if( s->pict_type==P_TYPE - || (s->pict_type==B_TYPE && mb_type!=2)){ - s->field_select[0][0]= get_bits1(&s->gb); - s->field_select[0][1]= get_bits1(&s->gb); - } - if(s->pict_type==B_TYPE && mb_type!=3){ - s->field_select[1][0]= get_bits1(&s->gb); - s->field_select[1][1]= get_bits1(&s->gb); - } - }else - s->mv_type= 0; - } - } + if(--s->mb_num_left <= 0){ +//printf("%06X %d\n", show_bits(&s->gb, 24), s->gb.size*8 - get_bits_count(&s->gb)); + if(mpeg4_is_resync(s)) + return SLICE_END; + else + return SLICE_NOEND; + }else{ + if(s->cbp_table[xy+1] && mpeg4_is_resync(s)) + return SLICE_END; + else + return SLICE_OK; } } -#endif -int h263_decode_mb(MpegEncContext *s, - DCTELEM block[6][64]) +int ff_h263_decode_mb(MpegEncContext *s, + DCTELEM block[6][64]) { int cbpc, cbpy, i, cbp, pred_x, pred_y, mx, my, dquant; INT16 *mot_val; static INT8 quant_tab[4] = { -1, -2, 1, 2 }; - if(s->mb_x==0) PRINT_MB_TYPE("\n"); - - if(s->resync_marker){ - if(s->resync_mb_x == s->mb_x && s->resync_mb_y+1 == s->mb_y){ - s->first_slice_line=0; - } - } + s->error_status_table[s->mb_x + s->mb_y*s->mb_width]= 0; - if(s->data_partitioning && s->pict_type!=B_TYPE) - return mpeg4_decode_partitioned_mb(s, block); + if(s->mb_x==0) PRINT_MB_TYPE("\n"); if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) { if (get_bits1(&s->gb)) { @@ -2642,21 +2875,10 @@ int h263_decode_mb(MpegEncContext *s, s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_16X16; if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){ - const int a= s->sprite_warping_accuracy; -/* int l = (1 << (s->f_code - 1)) * 32; */ PRINT_MB_TYPE("G"); s->mcsel=1; - if(s->divx_version==500 && s->divx_build==413){ - s->mv[0][0][0] = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample)); - s->mv[0][0][1] = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample)); - }else{ - s->mv[0][0][0] = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample); - s->mv[0][0][1] = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample); - } -/* if (s->mv[0][0][0] < -l) s->mv[0][0][0]= -l; - else if (s->mv[0][0][0] >= l) s->mv[0][0][0]= l-1; - if (s->mv[0][0][1] < -l) s->mv[0][0][1]= -l; - else if (s->mv[0][0][1] >= l) s->mv[0][0][1]= l-1;*/ + s->mv[0][0][0]= get_amv(s, 0); + s->mv[0][0][1]= get_amv(s, 1); s->mb_skiped = 0; }else{ @@ -2666,53 +2888,40 @@ int h263_decode_mb(MpegEncContext *s, s->mv[0][0][1] = 0; s->mb_skiped = 1; } - return 0; + goto end; } cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2); - /* fprintf(stderr, "\tCBPC: %d", cbpc); */ + //fprintf(stderr, "\tCBPC: %d", cbpc); if (cbpc < 0) return -1; if (cbpc > 20) cbpc+=3; else if (cbpc == 20) fprintf(stderr, "Stuffing !"); - + dquant = cbpc & 8; s->mb_intra = ((cbpc & 4) != 0); if (s->mb_intra) goto intra; - + if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0) s->mcsel= get_bits1(&s->gb); else s->mcsel= 0; cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1); cbp = (cbpc & 3) | ((cbpy ^ 0xf) << 2); if (dquant) { - s->qscale += quant_tab[get_bits(&s->gb, 2)]; - if (s->qscale < 1) - s->qscale = 1; - else if (s->qscale > 31) - s->qscale = 31; - h263_dc_scale(s); + change_qscale(s, quant_tab[get_bits(&s->gb, 2)]); } - if((!s->progressive_sequence) && (cbp || s->workaround_bugs==2)) + if((!s->progressive_sequence) && (cbp || (s->workaround_bugs&FF_BUG_XVID_ILACE))) s->interlaced_dct= get_bits1(&s->gb); - + s->mv_dir = MV_DIR_FORWARD; if ((cbpc & 16) == 0) { if(s->mcsel){ - const int a= s->sprite_warping_accuracy; PRINT_MB_TYPE("G"); /* 16x16 global motion prediction */ s->mv_type = MV_TYPE_16X16; -/* int l = (1 << (s->f_code - 1)) * 32; */ - if(s->divx_version==500 && s->divx_build==413){ - mx = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample)); - my = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample)); - }else{ - mx = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample); - my = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample); - } -/* int l = (1 << (s->f_code - 1)) * 32; */ + mx= get_amv(s, 0); + my= get_amv(s, 1); s->mv[0][0][0] = mx; s->mv[0][0][1] = my; }else if((!s->progressive_sequence) && get_bits1(&s->gb)){ @@ -2724,12 +2933,12 @@ int h263_decode_mb(MpegEncContext *s, s->field_select[0][1]= get_bits1(&s->gb); h263_pred_motion(s, 0, &pred_x, &pred_y); - + for(i=0; i<2; i++){ mx = h263_decode_motion(s, pred_x, s->f_code); if (mx >= 0xffff) return -1; - + my = h263_decode_motion(s, pred_y/2, s->f_code); if (my >= 0xffff) return -1; @@ -2746,22 +2955,22 @@ int h263_decode_mb(MpegEncContext *s, mx = h263p_decode_umotion(s, pred_x); else mx = h263_decode_motion(s, pred_x, s->f_code); - + if (mx >= 0xffff) return -1; - + if (s->umvplus_dec) my = h263p_decode_umotion(s, pred_y); else my = h263_decode_motion(s, pred_y, s->f_code); - + if (my >= 0xffff) return -1; s->mv[0][0][0] = mx; s->mv[0][0][1] = my; if (s->umvplus_dec && (mx - pred_x) == 1 && (my - pred_y) == 1) - skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */ + skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */ } } else { PRINT_MB_TYPE("4"); @@ -2774,10 +2983,10 @@ int h263_decode_mb(MpegEncContext *s, mx = h263_decode_motion(s, pred_x, s->f_code); if (mx >= 0xffff) return -1; - + if (s->umvplus_dec) my = h263p_decode_umotion(s, pred_y); - else + else my = h263_decode_motion(s, pred_y, s->f_code); if (my >= 0xffff) return -1; @@ -2790,27 +2999,27 @@ int h263_decode_mb(MpegEncContext *s, } } } else if(s->pict_type==B_TYPE) { - int modb1; /* first bit of modb */ - int modb2; /* second bit of modb */ + int modb1; // first bit of modb + int modb2; // second bit of modb int mb_type; uint16_t time_pp; uint16_t time_pb; int xy; - s->mb_intra = 0; /* B-frames never contain intra blocks */ - s->mcsel=0; /* ... true gmc blocks */ + s->mb_intra = 0; //B-frames never contain intra blocks + s->mcsel=0; // ... true gmc blocks if(s->mb_x==0){ for(i=0; i<2; i++){ - s->last_mv[i][0][0]= - s->last_mv[i][0][1]= - s->last_mv[i][1][0]= + s->last_mv[i][0][0]= + s->last_mv[i][0][1]= + s->last_mv[i][1][0]= s->last_mv[i][1][1]= 0; } } /* if we skipped it in the future P Frame than skip it now too */ - s->mb_skiped= s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]; /* Note, skiptab=0 if last was GMC */ + s->mb_skiped= s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]; // Note, skiptab=0 if last was GMC if(s->mb_skiped){ /* skip mb */ @@ -2824,16 +3033,16 @@ int h263_decode_mb(MpegEncContext *s, s->mv[1][0][0] = 0; s->mv[1][0][1] = 0; PRINT_MB_TYPE("s"); - return 0; + goto end; } - modb1= get_bits1(&s->gb); + modb1= get_bits1(&s->gb); if(modb1){ - mb_type=4; /* like MB_TYPE_B_DIRECT but no vectors coded */ + mb_type=4; //like MB_TYPE_B_DIRECT but no vectors coded cbp=0; }else{ int field_mv; - + modb2= get_bits1(&s->gb); mb_type= get_vlc2(&s->gb, mb_type_b_vlc.table, MB_TYPE_B_VLC_BITS, 1); if(modb2) cbp= 0; @@ -2841,12 +3050,7 @@ int h263_decode_mb(MpegEncContext *s, if (mb_type!=MB_TYPE_B_DIRECT && cbp) { if(get_bits1(&s->gb)){ - s->qscale +=get_bits1(&s->gb)*4 - 2; - if (s->qscale < 1) - s->qscale = 1; - else if (s->qscale > 31) - s->qscale = 31; - h263_dc_scale(s); + change_qscale(s, get_bits1(&s->gb)*4 - 2); } } field_mv=0; @@ -2880,7 +3084,7 @@ int h263_decode_mb(MpegEncContext *s, s->last_mv[0][1][0]= s->last_mv[0][0][0]= s->mv[0][0][0] = mx; s->last_mv[0][1][1]= s->last_mv[0][0][1]= s->mv[0][0][1] = my; } - + if(mb_type!=MB_TYPE_B_FORW){ s->mv_dir |= MV_DIR_BACKWARD; @@ -2896,7 +3100,7 @@ int h263_decode_mb(MpegEncContext *s, if(mb_type!=MB_TYPE_B_BACKW){ s->mv_dir = MV_DIR_FORWARD; - + for(i=0; i<2; i++){ mx = h263_decode_motion(s, s->last_mv[0][i][0] , s->f_code); my = h263_decode_motion(s, s->last_mv[0][i][1]/2, s->f_code); @@ -2904,7 +3108,7 @@ int h263_decode_mb(MpegEncContext *s, s->last_mv[0][i][1]= (s->mv[0][i][1] = my)*2; } } - + if(mb_type!=MB_TYPE_B_FORW){ s->mv_dir |= MV_DIR_BACKWARD; @@ -2919,24 +3123,24 @@ int h263_decode_mb(MpegEncContext *s, PRINT_MB_TYPE(mb_type==MB_TYPE_B_FORW ? "f" : (mb_type==MB_TYPE_B_BACKW ? "b" : "t")); } } - + if(mb_type==4 || mb_type==MB_TYPE_B_DIRECT){ int mb_index= s->mb_x + s->mb_y*s->mb_width; int i; - + if(mb_type==4) mx=my=0; else{ mx = h263_decode_motion(s, 0, 1); my = h263_decode_motion(s, 0, 1); } - + s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT; xy= s->block_index[0]; time_pp= s->pp_time; time_pb= s->pb_time; - - /* FIXME avoid divides */ + + //FIXME avoid divides switch(s->co_located_type_table[mb_index]){ case 0: s->mv_type= MV_TYPE_16X16; @@ -2944,7 +3148,7 @@ int h263_decode_mb(MpegEncContext *s, s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp + my; s->mv[1][0][0] = mx ? s->mv[0][0][0] - s->motion_val[xy][0] : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp; - s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1] + s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1] : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp; PRINT_MB_TYPE(mb_type==4 ? "D" : "S"); break; @@ -2956,7 +3160,7 @@ int h263_decode_mb(MpegEncContext *s, s->mv[0][i][1] = s->motion_val[xy][1]*time_pb/time_pp + my; s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->motion_val[xy][0] : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp; - s->mv[1][i][1] = my ? s->mv[0][i][1] - s->motion_val[xy][1] + s->mv[1][i][1] = my ? s->mv[0][i][1] - s->motion_val[xy][1] : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp; } PRINT_MB_TYPE("4"); @@ -2975,14 +3179,14 @@ int h263_decode_mb(MpegEncContext *s, s->mv[0][i][1] = s->field_mv_table[mb_index][i][1]*time_pb/time_pp + my; s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->field_mv_table[mb_index][i][0] : s->field_mv_table[mb_index][i][0]*(time_pb - time_pp)/time_pp; - s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1] + s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1] : s->field_mv_table[mb_index][i][1]*(time_pb - time_pp)/time_pp; } PRINT_MB_TYPE("="); break; } } - + if(mb_type<0 || mb_type>4){ printf("illegal MB_type\n"); return -1; @@ -3001,61 +3205,81 @@ intra: s->h263_aic_dir = get_bits1(&s->gb); } PRINT_MB_TYPE(s->ac_pred ? "A" : "I"); - + cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1); if(cbpy<0) return -1; cbp = (cbpc & 3) | (cbpy << 2); if (dquant) { - s->qscale += quant_tab[get_bits(&s->gb, 2)]; - if (s->qscale < 1) - s->qscale = 1; - else if (s->qscale > 31) - s->qscale = 31; - h263_dc_scale(s); + change_qscale(s, quant_tab[get_bits(&s->gb, 2)]); } + if(!s->progressive_sequence) s->interlaced_dct= get_bits1(&s->gb); /* decode each block */ if (s->h263_pred) { for (i = 0; i < 6; i++) { - if (mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, 1) < 0) + if (mpeg4_decode_block(s, block[i], i, cbp&32, 1) < 0) return -1; + cbp+=cbp; } } else { for (i = 0; i < 6; i++) { - if (h263_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0) + if (h263_decode_block(s, block[i], i, cbp&32) < 0) return -1; + cbp+=cbp; } } - return 0; + goto end; } /* decode each block */ if (s->h263_pred) { for (i = 0; i < 6; i++) { - if (mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, 0) < 0) + if (mpeg4_decode_block(s, block[i], i, cbp&32, 0) < 0) return -1; + cbp+=cbp; } } else { for (i = 0; i < 6; i++) { - if (h263_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0) + if (h263_decode_block(s, block[i], i, cbp&32) < 0) return -1; + cbp+=cbp; } } - return 0; +end: + + /* per-MB end of slice check */ + if(s->codec_id==CODEC_ID_MPEG4){ + if(mpeg4_is_resync(s)){ + if(s->pict_type==B_TYPE && s->mbskip_table[s->mb_y * s->mb_width + s->mb_x+1]) + return SLICE_OK; + return SLICE_END; + } + }else{ + if(get_bits_count(&s->gb) + 7 >= s->gb.size*8){ + int v= show_bits(&s->gb, 8) >> (((get_bits_count(&s->gb)-1)&7)+1); + if(v==0) + return SLICE_END; + }else{ + if(show_bits(&s->gb, 16)==0) + return SLICE_END; + } + } + + return SLICE_OK; } static int h263_decode_motion(MpegEncContext * s, int pred, int f_code) { int code, val, sign, shift, l; - code = get_vlc2(&s->gb, mv_vlc.table, MV_VLC_BITS, 2); if (code < 0) return 0xffff; if (code == 0) return pred; + sign = get_bits1(&s->gb); shift = f_code - 1; val = (code - 1) << shift; @@ -3068,7 +3292,7 @@ static int h263_decode_motion(MpegEncContext * s, int pred, int f_code) /* modulo decoding */ if (!s->h263_long_vectors) { - l = (1 << (f_code - 1)) * 32; + l = 1 << (f_code + 4); if (val < -l) { val += l<<1; } else if (val >= l) { @@ -3080,7 +3304,7 @@ static int h263_decode_motion(MpegEncContext * s, int pred, int f_code) val += 64; if (pred > 32 && val > 63) val -= 64; - + } return val; } @@ -3089,12 +3313,12 @@ static int h263_decode_motion(MpegEncContext * s, int pred, int f_code) static int h263p_decode_umotion(MpegEncContext * s, int pred) { int code = 0, sign; - + if (get_bits1(&s->gb)) /* Motion difference = 0 */ return pred; - + code = 2 + get_bits1(&s->gb); - + while (get_bits1(&s->gb)) { code <<= 1; @@ -3102,12 +3326,12 @@ static int h263p_decode_umotion(MpegEncContext * s, int pred) } sign = code & 1; code >>= 1; - + code = (sign) ? (pred - code) : (pred + code); #ifdef DEBUG fprintf(stderr,"H.263+ UMV Motion = %d\n", code); #endif - return code; + return code; } @@ -3118,15 +3342,15 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block, RLTable *rl = &rl_inter; const UINT8 *scan_table; - scan_table = zigzag_direct; + scan_table = s->intra_scantable.permutated; if (s->h263_aic && s->mb_intra) { rl = &rl_intra_aic; i = 0; if (s->ac_pred) { - if (s->h263_aic_dir) - scan_table = ff_alternate_vertical_scan; /* left */ + if (s->h263_aic_dir) + scan_table = s->intra_v_scantable.permutated; /* left */ else - scan_table = ff_alternate_horizontal_scan; /* top */ + scan_table = s->intra_h_scantable.permutated; /* top */ } } else if (s->mb_intra) { /* DC coef */ @@ -3163,8 +3387,10 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block, for(;;) { code = get_vlc2(&s->gb, rl->vlc.table, TEX_VLC_BITS, 2); - if (code < 0) + if (code < 0){ + fprintf(stderr, "illegal ac vlc code at %dx%d\n", s->mb_x, s->mb_y); return -1; + } if (code == rl->n) { /* escape */ last = get_bits1(&s->gb); @@ -3173,7 +3399,7 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block, if (s->h263_rv10 && level == -128) { /* XXX: should patch encoder too */ level = get_bits(&s->gb, 12); - level= (level + ((-1)<<11)) ^ ((-1)<<11); /* sign extension */ + level= (level + ((-1)<<11)) ^ ((-1)<<11); //sign extension } } else { run = rl->table_run[code]; @@ -3183,15 +3409,17 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block, level = -level; } i += run; - if (i >= 64) + if (i >= 64){ + fprintf(stderr, "run overflow at %dx%d\n", s->mb_x, s->mb_y); return -1; + } j = scan_table[i]; block[j] = level; if (last) break; i++; } -not_coded: +not_coded: if (s->mb_intra && s->h263_aic) { h263_pred_acdc(s, block, n); i = 63; @@ -3205,9 +3433,9 @@ static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr) int level, pred, code; UINT16 *dc_val; - if (n < 4) + if (n < 4) code = get_vlc2(&s->gb, dc_lum.table, DC_VLC_BITS, 1); - else + else code = get_vlc2(&s->gb, dc_chrom.table, DC_VLC_BITS, 1); if (code < 0 || code > 9 /* && s->nbit<9 */){ fprintf(stderr, "illegal dc vlc\n"); @@ -3221,29 +3449,39 @@ static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr) level = - (level ^ ((1 << code) - 1)); if (code > 8){ if(get_bits1(&s->gb)==0){ /* marker */ - fprintf(stderr, "dc marker bit missing\n"); - return -1; + if(s->error_resilience>=2){ + fprintf(stderr, "dc marker bit missing\n"); + return -1; + } } } } - pred = ff_mpeg4_pred_dc(s, n, &dc_val, dir_ptr); level += pred; - if (level < 0) + if (level < 0){ + if(s->error_resilience>=3){ + fprintf(stderr, "dc<0 at %dx%d\n", s->mb_x, s->mb_y); + return -1; + } level = 0; + } if (n < 4) { *dc_val = level * s->y_dc_scale; } else { *dc_val = level * s->c_dc_scale; } + if(s->error_resilience>=3){ + if(*dc_val > 2048 + s->y_dc_scale + s->c_dc_scale){ + fprintf(stderr, "dc overflow at %dx%d\n", s->mb_x, s->mb_y); + return -1; + } + } return level; } /** - * decode a block - * returns 0 if everything went ok - * returns DECODING_AC_LOST if an error was detected during AC decoding - * returns DECODING_ACDC_LOST if an error was detected during DC decoding + * decodes a block. + * @return <0 if an error occured */ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, int n, int coded, int intra) @@ -3257,31 +3495,29 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, if(intra) { /* DC coef */ - if(s->data_partitioning && s->pict_type!=B_TYPE){ + if(s->partitioned_frame){ level = s->dc_val[0][ s->block_index[n] ]; - if(n<4) level= (level + (s->y_dc_scale>>1))/s->y_dc_scale; /* FIXME optimizs */ + if(n<4) level= (level + (s->y_dc_scale>>1))/s->y_dc_scale; //FIXME optimizs else level= (level + (s->c_dc_scale>>1))/s->c_dc_scale; dc_pred_dir= (s->pred_dir_table[s->mb_x + s->mb_y*s->mb_width]<alternate_scan) - scan_table = ff_alternate_vertical_scan; /* left */ - else if (s->ac_pred) { - if (dc_pred_dir == 0) - scan_table = ff_alternate_vertical_scan; /* left */ + if (s->ac_pred) { + if (dc_pred_dir == 0) + scan_table = s->intra_v_scantable.permutated; /* left */ else - scan_table = ff_alternate_horizontal_scan; /* top */ + scan_table = s->intra_h_scantable.permutated; /* top */ } else { - scan_table = zigzag_direct; + scan_table = s->intra_scantable.permutated; } qmul=1; qadd=0; @@ -3292,16 +3528,13 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, return 0; } rl = &rl_inter; - - if(s->alternate_scan) - scan_table = ff_alternate_vertical_scan; /* left */ - else - scan_table = zigzag_direct; + + scan_table = s->intra_scantable.permutated; if(s->mpeg_quant){ qmul=1; qadd=0; - rl_vlc = rl_inter.rl_vlc[0]; + rl_vlc = rl_inter.rl_vlc[0]; }else{ qmul = s->qscale << 1; qadd = (s->qscale - 1) | 1; @@ -3328,38 +3561,38 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, if(SHOW_UBITS(re, &s->gb, 1)==0){ fprintf(stderr, "1. marker bit missing in 3. esc\n"); - return DECODING_AC_LOST; + return -1; }; SKIP_CACHE(re, &s->gb, 1); - + level= SHOW_SBITS(re, &s->gb, 12); SKIP_CACHE(re, &s->gb, 12); - + if(SHOW_UBITS(re, &s->gb, 1)==0){ fprintf(stderr, "2. marker bit missing in 3. esc\n"); - return DECODING_AC_LOST; + return -1; }; LAST_SKIP_CACHE(re, &s->gb, 1); - + SKIP_COUNTER(re, &s->gb, 1+12+1); - + if(level*s->qscale>1024 || level*s->qscale<-1024){ fprintf(stderr, "|level| overflow in 3. esc, qp=%d\n", s->qscale); - return DECODING_AC_LOST; + return -1; } -#if 1 +#if 1 { const int abs_level= ABS(level); - if(abs_level<=MAX_LEVEL && run<=MAX_RUN && s->error_resilience>=0){ + if(abs_level<=MAX_LEVEL && run<=MAX_RUN && ((s->workaround_bugs&FF_BUG_AC_VLC)==0)){ const int run1= run - rl->max_run[last][abs_level] - 1; if(abs_level <= rl->max_level[last][run]){ fprintf(stderr, "illegal 3. esc, vlc encoding possible\n"); - return DECODING_AC_LOST; + return -1; } if(abs_level <= rl->max_level[last][run]*2){ fprintf(stderr, "illegal 3. esc, esc 1 encoding possible\n"); - return DECODING_AC_LOST; + return -1; } if(run1 >= 0 && abs_level <= rl->max_level[last][run1]){ fprintf(stderr, "illegal 3. esc, esc 2 encoding possible\n"); - return DECODING_AC_LOST; + return -1; } } } @@ -3378,7 +3611,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, SKIP_BITS(re, &s->gb, 2); #endif GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2); - i+= run + rl->max_run[run>>7][level/qmul] +1; /* FIXME opt indexing */ + i+= run + rl->max_run[run>>7][level/qmul] +1; //FIXME opt indexing level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); LAST_SKIP_BITS(re, &s->gb, 1); } @@ -3392,7 +3625,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, #endif GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2); i+= run; - level = level + rl->max_level[run>>7][(run-1)&63] * qmul;/* FIXME opt indexing */ + level = level + rl->max_level[run>>7][(run-1)&63] * qmul;//FIXME opt indexing level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); LAST_SKIP_BITS(re, &s->gb, 1); } @@ -3405,7 +3638,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, i-= 192; if(i&(~63)){ fprintf(stderr, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); - return DECODING_AC_LOST; + return -1; } block[scan_table[i]] = level; @@ -3440,7 +3673,7 @@ int h263_decode_picture_header(MpegEncContext *s) /* temporal reference */ s->picture_number = get_bits(&s->gb, 8); /* picture timestamp */ - /* PTYPE starts here */ + /* PTYPE starts here */ if (get_bits1(&s->gb) != 1) { /* marker */ fprintf(stderr, "Bad marker\n"); @@ -3456,7 +3689,7 @@ int h263_decode_picture_header(MpegEncContext *s) /* Reset GOB number */ s->gob_number = 0; - + format = get_bits(&s->gb, 3); /* 0 forbidden @@ -3472,12 +3705,12 @@ int h263_decode_picture_header(MpegEncContext *s) height = h263_format[format][1]; if (!width) return -1; - + s->width = width; s->height = height; s->pict_type = I_TYPE + get_bits1(&s->gb); - s->unrestricted_mv = get_bits1(&s->gb); + s->unrestricted_mv = get_bits1(&s->gb); s->h263_long_vectors = s->unrestricted_mv; if (get_bits1(&s->gb) != 0) { @@ -3486,8 +3719,8 @@ int h263_decode_picture_header(MpegEncContext *s) } if (get_bits1(&s->gb) != 0) { s->mv_type = MV_TYPE_8X8; /* Advanced prediction mode */ - } - + } + if (get_bits1(&s->gb) != 0) { fprintf(stderr, "H263 PB frame not supported\n"); return -1; /* not PB frame */ @@ -3496,14 +3729,14 @@ int h263_decode_picture_header(MpegEncContext *s) skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */ } else { int ufep; - + /* H.263v2 */ s->h263_plus = 1; ufep = get_bits(&s->gb, 3); /* Update Full Extended PTYPE */ - /* ufep other than 0 and 1 are reserved */ + /* ufep other than 0 and 1 are reserved */ if (ufep == 1) { - /* OPPTYPE */ + /* OPPTYPE */ format = get_bits(&s->gb, 3); dprintf("ufep=1, format: %d\n", format); skip_bits(&s->gb,1); /* Custom PCF */ @@ -3515,7 +3748,7 @@ int h263_decode_picture_header(MpegEncContext *s) if (get_bits1(&s->gb) != 0) { /* Advanced Intra Coding (AIC) */ s->h263_aic = 1; } - + skip_bits(&s->gb, 7); /* these are the 7 bits: (in order of appearence */ /* Deblocking Filter */ @@ -3531,7 +3764,7 @@ int h263_decode_picture_header(MpegEncContext *s) fprintf(stderr, "Bad UFEP type (%d)\n", ufep); return -1; } - + /* MPPTYPE */ s->pict_type = get_bits(&s->gb, 3) + I_TYPE; dprintf("pict_type: %d\n", s->pict_type); @@ -3542,7 +3775,7 @@ int h263_decode_picture_header(MpegEncContext *s) s->no_rounding = get_bits1(&s->gb); dprintf("RTYPE: %d\n", s->no_rounding); skip_bits(&s->gb, 4); - + /* Get the picture dimensions */ if (ufep) { if (format == 6) { @@ -3579,7 +3812,7 @@ int h263_decode_picture_header(MpegEncContext *s) skip_bits1(&s->gb); /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */ } } - + s->qscale = get_bits(&s->gb, 5); } /* PEI */ @@ -3587,9 +3820,9 @@ int h263_decode_picture_header(MpegEncContext *s) skip_bits(&s->gb, 8); } s->f_code = 1; - + if(s->h263_aic){ - s->y_dc_scale_table= + s->y_dc_scale_table= s->c_dc_scale_table= h263_aic_dc_scale_table; }else{ s->y_dc_scale_table= @@ -3605,27 +3838,16 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s) int a= 2<sprite_warping_accuracy; int rho= 3-s->sprite_warping_accuracy; int r=16/a; -#if __GNUC__ && !__STRICT_ANSI__ - const int vop_ref[4][2]= {{0,0}, {s->width,0}, {0, s->height}, {s->width, s->height}}; /* only true for rectangle shapes */ + const int vop_ref[4][2]= {{0,0}, {s->width,0}, {0, s->height}, {s->width, s->height}}; // only true for rectangle shapes int d[4][2]={{0,0}, {0,0}, {0,0}, {0,0}}; -#else - int vop_ref[4][2]; - int d[4][2]; -#endif int sprite_ref[4][2]; int virtual_ref[2][2]; - int w2, h2; + int w2, h2, w3, h3; int alpha=0, beta=0; int w= s->width; int h= s->height; -#if !__GNUC__ || __STRICT_ANSI__ - vop_ref[0][0] = 0; vop_ref[0][1] = 0; - vop_ref[1][0] = s->width; vop_ref[1][1] = 0; - vop_ref[2][0] = 0; vop_ref[2][1] = s->height; - vop_ref[3][0] = s->width; vop_ref[3][1] = s->height; - memset(d, 0, sizeof(d)); -#endif -/* printf("SP %d\n", s->sprite_warping_accuracy); */ + int min_ab; + for(i=0; inum_sprite_warping_points; i++){ int length; int x=0, y=0; @@ -3633,33 +3855,31 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s) length= get_vlc(&s->gb, &sprite_trajectory); if(length){ x= get_bits(&s->gb, length); -/* printf("lx %d %d\n", length, x); */ + if ((x >> (length - 1)) == 0) /* if MSB not set it is negative*/ x = - (x ^ ((1 << length) - 1)); } if(!(s->divx_version==500 && s->divx_build==413)) skip_bits1(&s->gb); /* marker bit */ - + length= get_vlc(&s->gb, &sprite_trajectory); if(length){ y=get_bits(&s->gb, length); -/* printf("ly %d %d\n", length, y); */ + if ((y >> (length - 1)) == 0) /* if MSB not set it is negative*/ y = - (y ^ ((1 << length) - 1)); } skip_bits1(&s->gb); /* marker bit */ -/* printf("%d %d %d %d\n", x, y, i, s->sprite_warping_accuracy); */ -/* if(i>0 && (x!=0 || y!=0)) printf("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n"); */ -/* x=y=0; */ +//printf("%d %d %d %d\n", x, y, i, s->sprite_warping_accuracy); d[i][0]= x; d[i][1]= y; } while((1<divx_version==500 && s->divx_build==413){ sprite_ref[0][0]= a*vop_ref[0][0] + d[0][0]; sprite_ref[0][1]= a*vop_ref[0][1] + d[0][1]; @@ -3677,20 +3897,20 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s) } /* sprite_ref[3][0]= (a>>1)*(2*vop_ref[3][0] + d[0][0] + d[1][0] + d[2][0] + d[3][0]); sprite_ref[3][1]= (a>>1)*(2*vop_ref[3][1] + d[0][1] + d[1][1] + d[2][1] + d[3][1]); */ - -/* this is mostly identical to the mpeg4 std (and is totally unreadable because of that ...) */ -/* perhaps it should be reordered to be more readable ... */ -/* the idea behind this virtual_ref mess is to be able to use shifts later per pixel instead of divides */ -/* so the distance between points is converted from w&h based to w2&h2 based which are of the 2^x form */ - virtual_ref[0][0]= 16*(vop_ref[0][0] + w2) + +// this is mostly identical to the mpeg4 std (and is totally unreadable because of that ...) +// perhaps it should be reordered to be more readable ... +// the idea behind this virtual_ref mess is to be able to use shifts later per pixel instead of divides +// so the distance between points is converted from w&h based to w2&h2 based which are of the 2^x form + virtual_ref[0][0]= 16*(vop_ref[0][0] + w2) + ROUNDED_DIV(((w - w2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + w2*(r*sprite_ref[1][0] - 16*vop_ref[1][0])),w); - virtual_ref[0][1]= 16*vop_ref[0][1] + virtual_ref[0][1]= 16*vop_ref[0][1] + ROUNDED_DIV(((w - w2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + w2*(r*sprite_ref[1][1] - 16*vop_ref[1][1])),w); - virtual_ref[1][0]= 16*vop_ref[0][0] + virtual_ref[1][0]= 16*vop_ref[0][0] + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + h2*(r*sprite_ref[2][0] - 16*vop_ref[2][0])),h); - virtual_ref[1][1]= 16*(vop_ref[0][1] + h2) + virtual_ref[1][1]= 16*(vop_ref[0][1] + h2) + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + h2*(r*sprite_ref[2][1] - 16*vop_ref[2][1])),h); - + switch(s->num_sprite_warping_points) { case 0: @@ -3698,409 +3918,468 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s) s->sprite_offset[0][1]= 0; s->sprite_offset[1][0]= 0; s->sprite_offset[1][1]= 0; - s->sprite_delta[0][0][0]= a; - s->sprite_delta[0][0][1]= 0; - s->sprite_delta[0][1][0]= 0; - s->sprite_delta[0][1][1]= a; - s->sprite_delta[1][0][0]= a; - s->sprite_delta[1][0][1]= 0; - s->sprite_delta[1][1][0]= 0; - s->sprite_delta[1][1][1]= a; - s->sprite_shift[0][0]= 0; - s->sprite_shift[0][1]= 0; - s->sprite_shift[1][0]= 0; - s->sprite_shift[1][1]= 0; + s->sprite_delta[0][0]= a; + s->sprite_delta[0][1]= 0; + s->sprite_delta[1][0]= 0; + s->sprite_delta[1][1]= a; + s->sprite_shift[0]= 0; + s->sprite_shift[1]= 0; break; - case 1: /* GMC only */ + case 1: //GMC only s->sprite_offset[0][0]= sprite_ref[0][0] - a*vop_ref[0][0]; s->sprite_offset[0][1]= sprite_ref[0][1] - a*vop_ref[0][1]; s->sprite_offset[1][0]= ((sprite_ref[0][0]>>1)|(sprite_ref[0][0]&1)) - a*(vop_ref[0][0]/2); s->sprite_offset[1][1]= ((sprite_ref[0][1]>>1)|(sprite_ref[0][1]&1)) - a*(vop_ref[0][1]/2); - s->sprite_delta[0][0][0]= a; - s->sprite_delta[0][0][1]= 0; - s->sprite_delta[0][1][0]= 0; - s->sprite_delta[0][1][1]= a; - s->sprite_delta[1][0][0]= a; - s->sprite_delta[1][0][1]= 0; - s->sprite_delta[1][1][0]= 0; - s->sprite_delta[1][1][1]= a; - s->sprite_shift[0][0]= 0; - s->sprite_shift[0][1]= 0; - s->sprite_shift[1][0]= 0; - s->sprite_shift[1][1]= 0; + s->sprite_delta[0][0]= a; + s->sprite_delta[0][1]= 0; + s->sprite_delta[1][0]= 0; + s->sprite_delta[1][1]= a; + s->sprite_shift[0]= 0; + s->sprite_shift[1]= 0; break; case 2: - case 3: /* FIXME */ s->sprite_offset[0][0]= (sprite_ref[0][0]<<(alpha+rho)) - + ((-r*sprite_ref[0][0] + virtual_ref[0][0])*(-vop_ref[0][0]) - +( r*sprite_ref[0][1] - virtual_ref[0][1])*(-vop_ref[0][1])); + + (-r*sprite_ref[0][0] + virtual_ref[0][0])*(-vop_ref[0][0]) + + ( r*sprite_ref[0][1] - virtual_ref[0][1])*(-vop_ref[0][1]) + + (1<<(alpha+rho-1)); s->sprite_offset[0][1]= (sprite_ref[0][1]<<(alpha+rho)) - + ((-r*sprite_ref[0][1] + virtual_ref[0][1])*(-vop_ref[0][0]) - +(-r*sprite_ref[0][0] + virtual_ref[0][0])*(-vop_ref[0][1])); - s->sprite_offset[1][0]= ((-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][0] + 1) - +( r*sprite_ref[0][1] - virtual_ref[0][1])*(-2*vop_ref[0][1] + 1) - +2*w2*r*sprite_ref[0][0] - 16*w2); - s->sprite_offset[1][1]= ((-r*sprite_ref[0][1] + virtual_ref[0][1])*(-2*vop_ref[0][0] + 1) - +(-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][1] + 1) - +2*w2*r*sprite_ref[0][1] - 16*w2); - s->sprite_delta[0][0][0]= (-r*sprite_ref[0][0] + virtual_ref[0][0]); - s->sprite_delta[0][0][1]= ( r*sprite_ref[0][1] - virtual_ref[0][1]); - s->sprite_delta[0][1][0]= (-r*sprite_ref[0][1] + virtual_ref[0][1]); - s->sprite_delta[0][1][1]= (-r*sprite_ref[0][0] + virtual_ref[0][0]); - s->sprite_delta[1][0][0]= 4*(-r*sprite_ref[0][0] + virtual_ref[0][0]); - s->sprite_delta[1][0][1]= 4*( r*sprite_ref[0][1] - virtual_ref[0][1]); - s->sprite_delta[1][1][0]= 4*(-r*sprite_ref[0][1] + virtual_ref[0][1]); - s->sprite_delta[1][1][1]= 4*(-r*sprite_ref[0][0] + virtual_ref[0][0]); - s->sprite_shift[0][0]= alpha+rho; - s->sprite_shift[0][1]= alpha+rho; - s->sprite_shift[1][0]= alpha+rho+2; - s->sprite_shift[1][1]= alpha+rho+2; + + (-r*sprite_ref[0][1] + virtual_ref[0][1])*(-vop_ref[0][0]) + + (-r*sprite_ref[0][0] + virtual_ref[0][0])*(-vop_ref[0][1]) + + (1<<(alpha+rho-1)); + s->sprite_offset[1][0]= ( (-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][0] + 1) + +( r*sprite_ref[0][1] - virtual_ref[0][1])*(-2*vop_ref[0][1] + 1) + +2*w2*r*sprite_ref[0][0] + - 16*w2 + + (1<<(alpha+rho+1))); + s->sprite_offset[1][1]= ( (-r*sprite_ref[0][1] + virtual_ref[0][1])*(-2*vop_ref[0][0] + 1) + +(-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][1] + 1) + +2*w2*r*sprite_ref[0][1] + - 16*w2 + + (1<<(alpha+rho+1))); + s->sprite_delta[0][0]= (-r*sprite_ref[0][0] + virtual_ref[0][0]); + s->sprite_delta[0][1]= (+r*sprite_ref[0][1] - virtual_ref[0][1]); + s->sprite_delta[1][0]= (-r*sprite_ref[0][1] + virtual_ref[0][1]); + s->sprite_delta[1][1]= (-r*sprite_ref[0][0] + virtual_ref[0][0]); + + s->sprite_shift[0]= alpha+rho; + s->sprite_shift[1]= alpha+rho+2; break; -/* case 3: */ + case 3: + min_ab= MIN(alpha, beta); + w3= w2>>min_ab; + h3= h2>>min_ab; + s->sprite_offset[0][0]= (sprite_ref[0][0]<<(alpha+beta+rho-min_ab)) + + (-r*sprite_ref[0][0] + virtual_ref[0][0])*h3*(-vop_ref[0][0]) + + (-r*sprite_ref[0][0] + virtual_ref[1][0])*w3*(-vop_ref[0][1]) + + (1<<(alpha+beta+rho-min_ab-1)); + s->sprite_offset[0][1]= (sprite_ref[0][1]<<(alpha+beta+rho-min_ab)) + + (-r*sprite_ref[0][1] + virtual_ref[0][1])*h3*(-vop_ref[0][0]) + + (-r*sprite_ref[0][1] + virtual_ref[1][1])*w3*(-vop_ref[0][1]) + + (1<<(alpha+beta+rho-min_ab-1)); + s->sprite_offset[1][0]= (-r*sprite_ref[0][0] + virtual_ref[0][0])*h3*(-2*vop_ref[0][0] + 1) + + (-r*sprite_ref[0][0] + virtual_ref[1][0])*w3*(-2*vop_ref[0][1] + 1) + + 2*w2*h3*r*sprite_ref[0][0] + - 16*w2*h3 + + (1<<(alpha+beta+rho-min_ab+1)); + s->sprite_offset[1][1]= (-r*sprite_ref[0][1] + virtual_ref[0][1])*h3*(-2*vop_ref[0][0] + 1) + + (-r*sprite_ref[0][1] + virtual_ref[1][1])*w3*(-2*vop_ref[0][1] + 1) + + 2*w2*h3*r*sprite_ref[0][1] + - 16*w2*h3 + + (1<<(alpha+beta+rho-min_ab+1)); + s->sprite_delta[0][0]= (-r*sprite_ref[0][0] + virtual_ref[0][0])*h3; + s->sprite_delta[0][1]= (-r*sprite_ref[0][0] + virtual_ref[1][0])*w3; + s->sprite_delta[1][0]= (-r*sprite_ref[0][1] + virtual_ref[0][1])*h3; + s->sprite_delta[1][1]= (-r*sprite_ref[0][1] + virtual_ref[1][1])*w3; + + s->sprite_shift[0]= alpha + beta + rho - min_ab; + s->sprite_shift[1]= alpha + beta + rho - min_ab + 2; break; } -/*printf("%d %d\n", s->sprite_delta[0][0][0], a<sprite_shift[0][0]); -printf("%d %d\n", s->sprite_delta[0][0][1], 0); -printf("%d %d\n", s->sprite_delta[0][1][0], 0); -printf("%d %d\n", s->sprite_delta[0][1][1], a<sprite_shift[0][1]); -printf("%d %d\n", s->sprite_delta[1][0][0], a<sprite_shift[1][0]); -printf("%d %d\n", s->sprite_delta[1][0][1], 0); -printf("%d %d\n", s->sprite_delta[1][1][0], 0); -printf("%d %d\n", s->sprite_delta[1][1][1], a<sprite_shift[1][1]);*/ - /* try to simplify the situation */ - if( s->sprite_delta[0][0][0] == a<sprite_shift[0][0] - && s->sprite_delta[0][0][1] == 0 - && s->sprite_delta[0][1][0] == 0 - && s->sprite_delta[0][1][1] == a<sprite_shift[0][1] - && s->sprite_delta[1][0][0] == a<sprite_shift[1][0] - && s->sprite_delta[1][0][1] == 0 - && s->sprite_delta[1][1][0] == 0 - && s->sprite_delta[1][1][1] == a<sprite_shift[1][1]) + /* try to simplify the situation */ + if( s->sprite_delta[0][0] == a<sprite_shift[0] + && s->sprite_delta[0][1] == 0 + && s->sprite_delta[1][0] == 0 + && s->sprite_delta[1][1] == a<sprite_shift[0]) { - s->sprite_offset[0][0]>>=s->sprite_shift[0][0]; - s->sprite_offset[0][1]>>=s->sprite_shift[0][1]; - s->sprite_offset[1][0]>>=s->sprite_shift[1][0]; - s->sprite_offset[1][1]>>=s->sprite_shift[1][1]; - s->sprite_delta[0][0][0]= a; - s->sprite_delta[0][0][1]= 0; - s->sprite_delta[0][1][0]= 0; - s->sprite_delta[0][1][1]= a; - s->sprite_delta[1][0][0]= a; - s->sprite_delta[1][0][1]= 0; - s->sprite_delta[1][1][0]= 0; - s->sprite_delta[1][1][1]= a; - s->sprite_shift[0][0]= 0; - s->sprite_shift[0][1]= 0; - s->sprite_shift[1][0]= 0; - s->sprite_shift[1][1]= 0; + s->sprite_offset[0][0]>>=s->sprite_shift[0]; + s->sprite_offset[0][1]>>=s->sprite_shift[0]; + s->sprite_offset[1][0]>>=s->sprite_shift[1]; + s->sprite_offset[1][1]>>=s->sprite_shift[1]; + s->sprite_delta[0][0]= a; + s->sprite_delta[0][1]= 0; + s->sprite_delta[1][0]= 0; + s->sprite_delta[1][1]= a; + s->sprite_shift[0]= 0; + s->sprite_shift[1]= 0; s->real_sprite_warping_points=1; } - else + else{ + int shift_y= 16 - s->sprite_shift[0]; + int shift_c= 16 - s->sprite_shift[1]; +//printf("shifts %d %d\n", shift_y, shift_c); + for(i=0; i<2; i++){ + s->sprite_offset[0][i]<<= shift_y; + s->sprite_offset[1][i]<<= shift_c; + s->sprite_delta[0][i]<<= shift_y; + s->sprite_delta[1][i]<<= shift_y; + s->sprite_shift[i]= 16; + } s->real_sprite_warping_points= s->num_sprite_warping_points; - -/* printf("%d %d %d %d\n", d[0][0], d[0][1], s->sprite_offset[0][0], s->sprite_offset[0][1]); */ + } +#if 0 +printf("vop:%d:%d %d:%d %d:%d, sprite:%d:%d %d:%d %d:%d, virtual: %d:%d %d:%d\n", + vop_ref[0][0], vop_ref[0][1], + vop_ref[1][0], vop_ref[1][1], + vop_ref[2][0], vop_ref[2][1], + sprite_ref[0][0], sprite_ref[0][1], + sprite_ref[1][0], sprite_ref[1][1], + sprite_ref[2][0], sprite_ref[2][1], + virtual_ref[0][0], virtual_ref[0][1], + virtual_ref[1][0], virtual_ref[1][1] + ); + +printf("offset: %d:%d , delta: %d %d %d %d, shift %d\n", + s->sprite_offset[0][0], s->sprite_offset[0][1], + s->sprite_delta[0][0], s->sprite_delta[0][1], + s->sprite_delta[1][0], s->sprite_delta[1][1], + s->sprite_shift[0] + ); +#endif } -/* decode mpeg4 VOP header */ -int mpeg4_decode_picture_header(MpegEncContext * s) -{ - int time_incr, startcode, state, v; - int time_increment; +static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ + int width, height, vo_ver_id; - redo: - /* search next start code */ - align_get_bits(&s->gb); - state = 0xff; - for(;;) { - v = get_bits(&s->gb, 8); - if (state == 0x000001) { - state = ((state << 8) | v) & 0xffffff; - startcode = state; - break; - } - state = ((state << 8) | v) & 0xffffff; - if( get_bits_count(&s->gb) > s->gb.size*8-32){ - if(s->gb.size>50){ - printf("no VOP startcode found, frame size was=%d\n", s->gb.size); - return -1; - }else{ - printf("frame skip\n"); - return FRAME_SKIPED; - } - } + /* vol header */ + skip_bits(gb, 1); /* random access */ + s->vo_type= get_bits(gb, 8); + if (get_bits1(gb) != 0) { /* is_ol_id */ + vo_ver_id = get_bits(gb, 4); /* vo_ver_id */ + skip_bits(gb, 3); /* vo_priority */ + } else { + vo_ver_id = 1; + } +//printf("vo type:%d\n",s->vo_type); + s->aspect_ratio_info= get_bits(gb, 4); + if(s->aspect_ratio_info == FF_ASPECT_EXTENDED){ + s->aspected_width = get_bits(gb, 8); // par_width + s->aspected_height = get_bits(gb, 8); // par_height } -/* printf("startcode %X %d\n", startcode, get_bits_count(&s->gb)); */ - if (startcode == 0x120) { /* Video Object Layer */ - int width, height, vo_ver_id; - - /* vol header */ - skip_bits(&s->gb, 1); /* random access */ - s->vo_type= get_bits(&s->gb, 8); - if (get_bits1(&s->gb) != 0) { /* is_ol_id */ - vo_ver_id = get_bits(&s->gb, 4); /* vo_ver_id */ - skip_bits(&s->gb, 3); /* vo_priority */ - } else { - vo_ver_id = 1; - } -/* printf("vo type:%d\n",s->vo_type); */ - s->aspect_ratio_info= get_bits(&s->gb, 4); - if(s->aspect_ratio_info == FF_ASPECT_EXTENDED){ - s->aspected_width = get_bits(&s->gb, 8); /* par_width */ - s->aspected_height = get_bits(&s->gb, 8); /* par_height */ - } - if ((s->vol_control_parameters=get_bits1(&s->gb))) { /* vol control parameter */ - int chroma_format= get_bits(&s->gb, 2); - if(chroma_format!=1){ - printf("illegal chroma format\n"); - } - s->low_delay= get_bits1(&s->gb); - if(get_bits1(&s->gb)){ /* vbv parameters */ - get_bits(&s->gb, 15); /* first_half_bitrate */ - skip_bits1(&s->gb); /* marker */ - get_bits(&s->gb, 15); /* latter_half_bitrate */ - skip_bits1(&s->gb); /* marker */ - get_bits(&s->gb, 15); /* first_half_vbv_buffer_size */ - skip_bits1(&s->gb); /* marker */ - get_bits(&s->gb, 3); /* latter_half_vbv_buffer_size */ - get_bits(&s->gb, 11); /* first_half_vbv_occupancy */ - skip_bits1(&s->gb); /* marker */ - get_bits(&s->gb, 15); /* latter_half_vbv_occupancy */ - skip_bits1(&s->gb); /* marker */ - } - }else{ - /* set low delay flag only once so the smart? low delay detection wont be overriden */ - if(s->picture_number==0) - s->low_delay=0; + if ((s->vol_control_parameters=get_bits1(gb))) { /* vol control parameter */ + int chroma_format= get_bits(gb, 2); + if(chroma_format!=1){ + printf("illegal chroma format\n"); } - - s->shape = get_bits(&s->gb, 2); /* vol shape */ - if(s->shape != RECT_SHAPE) printf("only rectangular vol supported\n"); - if(s->shape == GRAY_SHAPE && vo_ver_id != 1){ - printf("Gray shape not supported\n"); - skip_bits(&s->gb, 4); /* video_object_layer_shape_extension */ + s->low_delay= get_bits1(gb); + if(get_bits1(gb)){ /* vbv parameters */ + get_bits(gb, 15); /* first_half_bitrate */ + skip_bits1(gb); /* marker */ + get_bits(gb, 15); /* latter_half_bitrate */ + skip_bits1(gb); /* marker */ + get_bits(gb, 15); /* first_half_vbv_buffer_size */ + skip_bits1(gb); /* marker */ + get_bits(gb, 3); /* latter_half_vbv_buffer_size */ + get_bits(gb, 11); /* first_half_vbv_occupancy */ + skip_bits1(gb); /* marker */ + get_bits(gb, 15); /* latter_half_vbv_occupancy */ + skip_bits1(gb); /* marker */ } + }else{ + // set low delay flag only once so the smart? low delay detection wont be overriden + if(s->picture_number==0) + s->low_delay=0; + } - skip_bits1(&s->gb); /* marker */ - - s->time_increment_resolution = get_bits(&s->gb, 16); - - s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1; - if (s->time_increment_bits < 1) - s->time_increment_bits = 1; - skip_bits1(&s->gb); /* marker */ + s->shape = get_bits(gb, 2); /* vol shape */ + if(s->shape != RECT_SHAPE) printf("only rectangular vol supported\n"); + if(s->shape == GRAY_SHAPE && vo_ver_id != 1){ + printf("Gray shape not supported\n"); + skip_bits(gb, 4); //video_object_layer_shape_extension + } - if (get_bits1(&s->gb) != 0) { /* fixed_vop_rate */ - skip_bits(&s->gb, s->time_increment_bits); - } + skip_bits1(gb); /* marker */ + + s->time_increment_resolution = get_bits(gb, 16); + + s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1; + if (s->time_increment_bits < 1) + s->time_increment_bits = 1; + skip_bits1(gb); /* marker */ - if (s->shape != BIN_ONLY_SHAPE) { - if (s->shape == RECT_SHAPE) { - skip_bits1(&s->gb); /* marker */ - width = get_bits(&s->gb, 13); - skip_bits1(&s->gb); /* marker */ - height = get_bits(&s->gb, 13); - skip_bits1(&s->gb); /* marker */ - if(width && height){ /* they should be non zero but who knows ... */ - s->width = width; - s->height = height; -/* printf("width/height: %d %d\n", width, height); */ - } - } + if (get_bits1(gb) != 0) { /* fixed_vop_rate */ + skip_bits(gb, s->time_increment_bits); + } - s->progressive_sequence= get_bits1(&s->gb)^1; - if(!get_bits1(&s->gb)) printf("OBMC not supported (very likely buggy encoder)\n"); /* OBMC Disable */ - if (vo_ver_id == 1) { - s->vol_sprite_usage = get_bits1(&s->gb); /* vol_sprite_usage */ - } else { - s->vol_sprite_usage = get_bits(&s->gb, 2); /* vol_sprite_usage */ + if (s->shape != BIN_ONLY_SHAPE) { + if (s->shape == RECT_SHAPE) { + skip_bits1(gb); /* marker */ + width = get_bits(gb, 13); + skip_bits1(gb); /* marker */ + height = get_bits(gb, 13); + skip_bits1(gb); /* marker */ + if(width && height){ /* they should be non zero but who knows ... */ + s->width = width; + s->height = height; +// printf("width/height: %d %d\n", width, height); } - if(s->vol_sprite_usage==STATIC_SPRITE) printf("Static Sprites not supported\n"); - if(s->vol_sprite_usage==STATIC_SPRITE || s->vol_sprite_usage==GMC_SPRITE){ - if(s->vol_sprite_usage==STATIC_SPRITE){ - s->sprite_width = get_bits(&s->gb, 13); - skip_bits1(&s->gb); /* marker */ - s->sprite_height= get_bits(&s->gb, 13); - skip_bits1(&s->gb); /* marker */ - s->sprite_left = get_bits(&s->gb, 13); - skip_bits1(&s->gb); /* marker */ - s->sprite_top = get_bits(&s->gb, 13); - skip_bits1(&s->gb); /* marker */ - } - s->num_sprite_warping_points= get_bits(&s->gb, 6); - s->sprite_warping_accuracy = get_bits(&s->gb, 2); - s->sprite_brightness_change= get_bits1(&s->gb); - if(s->vol_sprite_usage==STATIC_SPRITE) - s->low_latency_sprite= get_bits1(&s->gb); + } + + s->progressive_sequence= get_bits1(gb)^1; + if(!get_bits1(gb)) printf("OBMC not supported (very likely buggy encoder)\n"); /* OBMC Disable */ + if (vo_ver_id == 1) { + s->vol_sprite_usage = get_bits1(gb); /* vol_sprite_usage */ + } else { + s->vol_sprite_usage = get_bits(gb, 2); /* vol_sprite_usage */ + } + if(s->vol_sprite_usage==STATIC_SPRITE) printf("Static Sprites not supported\n"); + if(s->vol_sprite_usage==STATIC_SPRITE || s->vol_sprite_usage==GMC_SPRITE){ + if(s->vol_sprite_usage==STATIC_SPRITE){ + s->sprite_width = get_bits(gb, 13); + skip_bits1(gb); /* marker */ + s->sprite_height= get_bits(gb, 13); + skip_bits1(gb); /* marker */ + s->sprite_left = get_bits(gb, 13); + skip_bits1(gb); /* marker */ + s->sprite_top = get_bits(gb, 13); + skip_bits1(gb); /* marker */ } - /* FIXME sadct disable bit if verid!=1 && shape not rect */ - - if (get_bits1(&s->gb) == 1) { /* not_8_bit */ - s->quant_precision = get_bits(&s->gb, 4); /* quant_precision */ - if(get_bits(&s->gb, 4)!=8) printf("N-bit not supported\n"); /* bits_per_pixel */ - if(s->quant_precision!=5) printf("quant precission %d\n", s->quant_precision); - } else { - s->quant_precision = 5; + s->num_sprite_warping_points= get_bits(gb, 6); + s->sprite_warping_accuracy = get_bits(gb, 2); + s->sprite_brightness_change= get_bits1(gb); + if(s->vol_sprite_usage==STATIC_SPRITE) + s->low_latency_sprite= get_bits1(gb); + } + // FIXME sadct disable bit if verid!=1 && shape not rect + + if (get_bits1(gb) == 1) { /* not_8_bit */ + s->quant_precision = get_bits(gb, 4); /* quant_precision */ + if(get_bits(gb, 4)!=8) printf("N-bit not supported\n"); /* bits_per_pixel */ + if(s->quant_precision!=5) printf("quant precission %d\n", s->quant_precision); + } else { + s->quant_precision = 5; + } + + // FIXME a bunch of grayscale shape things + + if((s->mpeg_quant=get_bits1(gb))){ /* vol_quant_type */ + int i, j, v; + + /* load default matrixes */ + for(i=0; i<64; i++){ + int j= s->idct_permutation[i]; + v= ff_mpeg4_default_intra_matrix[i]; + s->intra_matrix[j]= v; + s->chroma_intra_matrix[j]= v; + + v= ff_mpeg4_default_non_intra_matrix[i]; + s->inter_matrix[j]= v; + s->chroma_inter_matrix[j]= v; } - /* FIXME a bunch of grayscale shape things */ - - if((s->mpeg_quant=get_bits1(&s->gb))){ /* vol_quant_type */ - int i, j, v; - - /* load default matrixes */ + /* load custom intra matrix */ + if(get_bits1(gb)){ + int last=0; for(i=0; i<64; i++){ - v= ff_mpeg4_default_intra_matrix[i]; - s->intra_matrix[i]= v; - s->chroma_intra_matrix[i]= v; - - v= ff_mpeg4_default_non_intra_matrix[i]; - s->inter_matrix[i]= v; - s->chroma_inter_matrix[i]= v; + v= get_bits(gb, 8); + if(v==0) break; + + last= v; + j= s->idct_permutation[ ff_zigzag_direct[i] ]; + s->intra_matrix[j]= v; + s->chroma_intra_matrix[j]= v; } - /* load custom intra matrix */ - if(get_bits1(&s->gb)){ - for(i=0; i<64; i++){ - v= get_bits(&s->gb, 8); - if(v==0) break; - - j= zigzag_direct[i]; - s->intra_matrix[j]= v; - s->chroma_intra_matrix[j]= v; - } + /* replicate last value */ + for(; i<64; i++){ + j= s->idct_permutation[ ff_zigzag_direct[i] ]; + s->intra_matrix[j]= v; + s->chroma_intra_matrix[j]= v; } + } - /* load custom non intra matrix */ - if(get_bits1(&s->gb)){ - for(i=0; i<64; i++){ - v= get_bits(&s->gb, 8); - if(v==0) break; - - j= zigzag_direct[i]; - s->inter_matrix[j]= v; - s->chroma_inter_matrix[j]= v; - } + /* load custom non intra matrix */ + if(get_bits1(gb)){ + int last=0; + for(i=0; i<64; i++){ + v= get_bits(gb, 8); + if(v==0) break; - /* replicate last value */ - for(; i<64; i++){ - j= zigzag_direct[i]; - s->inter_matrix[j]= v; - s->chroma_inter_matrix[j]= v; - } + last= v; + j= s->idct_permutation[ ff_zigzag_direct[i] ]; + s->inter_matrix[j]= v; + s->chroma_inter_matrix[j]= v; } - /* FIXME a bunch of grayscale shape things */ + /* replicate last value */ + for(; i<64; i++){ + j= s->idct_permutation[ ff_zigzag_direct[i] ]; + s->inter_matrix[j]= last; + s->chroma_inter_matrix[j]= last; + } } - if(vo_ver_id != 1) - s->quarter_sample= get_bits1(&s->gb); - else s->quarter_sample=0; + // FIXME a bunch of grayscale shape things + } - if(!get_bits1(&s->gb)) printf("Complexity estimation not supported\n"); + if(vo_ver_id != 1) + s->quarter_sample= get_bits1(gb); + else s->quarter_sample=0; - s->resync_marker= !get_bits1(&s->gb); /* resync_marker_disabled */ + if(!get_bits1(gb)) printf("Complexity estimation not supported\n"); - s->data_partitioning= get_bits1(&s->gb); - if(s->data_partitioning){ - s->rvlc= get_bits1(&s->gb); - if(s->rvlc){ - printf("reversible vlc not supported\n"); - } - } + s->resync_marker= !get_bits1(gb); /* resync_marker_disabled */ - if(vo_ver_id != 1) { - s->new_pred= get_bits1(&s->gb); - if(s->new_pred){ - printf("new pred not supported\n"); - skip_bits(&s->gb, 2); /* requested upstream message type */ - skip_bits1(&s->gb); /* newpred segment type */ - } - s->reduced_res_vop= get_bits1(&s->gb); - if(s->reduced_res_vop) printf("reduced resolution VOP not supported\n"); + s->data_partitioning= get_bits1(gb); + if(s->data_partitioning){ + s->rvlc= get_bits1(gb); + if(s->rvlc){ + printf("reversible vlc not supported\n"); } - else{ - s->new_pred=0; - s->reduced_res_vop= 0; + } + + if(vo_ver_id != 1) { + s->new_pred= get_bits1(gb); + if(s->new_pred){ + printf("new pred not supported\n"); + skip_bits(gb, 2); /* requested upstream message type */ + skip_bits1(gb); /* newpred segment type */ } + s->reduced_res_vop= get_bits1(gb); + if(s->reduced_res_vop) printf("reduced resolution VOP not supported\n"); + } + else{ + s->new_pred=0; + s->reduced_res_vop= 0; + } - s->scalability= get_bits1(&s->gb); - if(s->workaround_bugs==1) s->scalability=0; - if (s->scalability) { - int dummy= s->hierachy_type= get_bits1(&s->gb); - int ref_layer_id= get_bits(&s->gb, 4); - int ref_layer_sampling_dir= get_bits1(&s->gb); - int h_sampling_factor_n= get_bits(&s->gb, 5); - int h_sampling_factor_m= get_bits(&s->gb, 5); - int v_sampling_factor_n= get_bits(&s->gb, 5); - int v_sampling_factor_m= get_bits(&s->gb, 5); - s->enhancement_type= get_bits1(&s->gb); - /* bin shape stuff FIXME */ + s->scalability= get_bits1(gb); + + if (s->scalability) { + GetBitContext bak= *gb; + int ref_layer_id; + int ref_layer_sampling_dir; + int h_sampling_factor_n; + int h_sampling_factor_m; + int v_sampling_factor_n; + int v_sampling_factor_m; + + s->hierachy_type= get_bits1(gb); + ref_layer_id= get_bits(gb, 4); + ref_layer_sampling_dir= get_bits1(gb); + h_sampling_factor_n= get_bits(gb, 5); + h_sampling_factor_m= get_bits(gb, 5); + v_sampling_factor_n= get_bits(gb, 5); + v_sampling_factor_m= get_bits(gb, 5); + s->enhancement_type= get_bits1(gb); + + if( h_sampling_factor_n==0 || h_sampling_factor_m==0 + || v_sampling_factor_n==0 || v_sampling_factor_m==0){ + +// fprintf(stderr, "illegal scalability header (VERY broken encoder), trying to workaround\n"); + s->scalability=0; + + *gb= bak; + }else printf("scalability not supported\n"); - } + + // bin shape stuff FIXME } -/* printf("end Data %X %d\n", show_bits(&s->gb, 32), get_bits_count(&s->gb)&0x7); */ - goto redo; - } else if (startcode == 0x1b2) { /* userdata */ - char buf[256]; - int i; - int e; - int ver, build; - -/* printf("user Data %X\n", show_bits(&s->gb, 32)); */ - buf[0]= show_bits(&s->gb, 8); - for(i=1; i<256; i++){ - buf[i]= show_bits(&s->gb, 16)&0xFF; - if(buf[i]==0) break; - skip_bits(&s->gb, 8); + } + return 0; +} + +static int decode_user_data(MpegEncContext *s, GetBitContext *gb){ + char buf[256]; + int i; + int e; + int ver, build, ver2, ver3; + + buf[0]= show_bits(gb, 8); + for(i=1; i<256; i++){ + buf[i]= show_bits(gb, 16)&0xFF; + if(buf[i]==0) break; + skip_bits(gb, 8); + } + buf[255]=0; + + /* divx detection */ + e=sscanf(buf, "DivX%dBuild%d", &ver, &build); + if(e!=2) + e=sscanf(buf, "DivX%db%d", &ver, &build); + if(e==2){ + s->divx_version= ver; + s->divx_build= build; + if(s->picture_number==0){ + printf("This file was encoded with DivX%d Build%d\n", ver, build); } - buf[255]=0; - e=sscanf(buf, "DivX%dBuild%d", &ver, &build); - if(e!=2) - e=sscanf(buf, "DivX%db%d", &ver, &build); - if(e==2){ - s->divx_version= ver; - s->divx_build= build; - if(s->picture_number==0){ - printf("This file was encoded with DivX%d Build%d\n", ver, build); - if(ver==500 && build==413){ - printf("WARNING: this version of DivX is not MPEG4 compatible, trying to workaround these bugs...\n"); - } - } + } + + /* ffmpeg detection */ + e=sscanf(buf, "FFmpeg%d.%d.%db%d", &ver, &ver2, &ver3, &build); + if(e!=4) + e=sscanf(buf, "FFmpeg v%d.%d.%d / libavcodec build: %d", &ver, &ver2, &ver3, &build); + if(e!=4){ + if(strcmp(buf, "ffmpeg")==0){ + s->ffmpeg_version= 0x000406; + s->lavc_build= 4600; } -/* printf("User Data: %s\n", buf); */ - goto redo; - } else if (startcode != 0x1b6) { /* VOP */ - goto redo; } + if(e==4){ + s->ffmpeg_version= ver*256*256 + ver2*256 + ver3; + s->lavc_build= build; + if(s->picture_number==0) + printf("This file was encoded with libavcodec build %d\n", build); + } + + /* xvid detection */ + e=sscanf(buf, "XviD%d", &build); + if(e==1){ + s->xvid_build= build; + if(s->picture_number==0) + printf("This file was encoded with XviD build %d\n", build); + } + +//printf("User Data: %s\n", buf); + return 0; +} + +static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){ + int time_incr, time_increment; - s->pict_type = get_bits(&s->gb, 2) + I_TYPE; /* pict type: I = 0 , P = 1 */ -/* if(s->pict_type!=I_TYPE) return FRAME_SKIPED; */ + s->pict_type = get_bits(gb, 2) + I_TYPE; /* pict type: I = 0 , P = 1 */ if(s->pict_type==B_TYPE && s->low_delay && s->vol_control_parameters==0){ printf("low_delay flag set, but shouldnt, clearing it\n"); s->low_delay=0; } -/* printf("pic: %d, qpel:%d part:%d resync:%d\n", s->pict_type, s->quarter_sample, s->data_partitioning, s->resync_marker); */ + + s->partitioned_frame= s->data_partitioning && s->pict_type!=B_TYPE; + if(s->partitioned_frame) + s->decode_mb= mpeg4_decode_partitioned_mb; + else + s->decode_mb= ff_h263_decode_mb; if(s->time_increment_resolution==0){ s->time_increment_resolution=1; -/* fprintf(stderr, "time_increment_resolution is illegal\n"); */ +// fprintf(stderr, "time_increment_resolution is illegal\n"); } time_incr=0; - while (get_bits1(&s->gb) != 0) + while (get_bits1(gb) != 0) time_incr++; - check_marker(&s->gb, "before time_increment"); - time_increment= get_bits(&s->gb, s->time_increment_bits); -/* printf(" type:%d incr:%d increment:%d\n", s->pict_type, time_incr, time_increment); */ + check_marker(gb, "before time_increment"); + time_increment= get_bits(gb, s->time_increment_bits); +//printf(" type:%d modulo_time_base:%d increment:%d\n", s->pict_type, time_incr, time_increment); if(s->pict_type!=B_TYPE){ s->last_time_base= s->time_base; s->time_base+= time_incr; s->time= s->time_base*s->time_increment_resolution + time_increment; - if(s->time < s->last_non_b_time && s->workaround_bugs==3){ - fprintf(stderr, "header is not mpeg4 compatible, broken encoder, trying to workaround\n"); - s->time_base++; - s->time+= s->time_increment_resolution; + if(s->workaround_bugs&FF_BUG_UMP4){ + if(s->time < s->last_non_b_time){ +// fprintf(stderr, "header is not mpeg4 compatible, broken encoder, trying to workaround\n"); + s->time_base++; + s->time+= s->time_increment_resolution; + } } s->pp_time= s->time - s->last_non_b_time; s->last_non_b_time= s->time; @@ -4108,73 +4387,87 @@ int mpeg4_decode_picture_header(MpegEncContext * s) s->time= (s->last_time_base + time_incr)*s->time_increment_resolution + time_increment; s->pb_time= s->pp_time - (s->last_non_b_time - s->time); if(s->pp_time <=s->pb_time || s->pp_time <= s->pp_time - s->pb_time || s->pp_time<=0){ -/* printf("messed up order, seeking?, skiping current b frame\n"); */ +// printf("messed up order, seeking?, skiping current b frame\n"); return FRAME_SKIPED; } - + if(s->t_frame==0) s->t_frame= s->time - s->last_time_base; - if(s->t_frame==0) s->t_frame=1; /* 1/0 protection */ -/* printf("%Ld %Ld %d %d\n", s->last_non_b_time, s->time, s->pp_time, s->t_frame); fflush(stdout); */ - s->pp_field_time= ( ROUNDED_DIV(s->last_non_b_time, s->t_frame) + if(s->t_frame==0) s->t_frame=1; // 1/0 protection +//printf("%Ld %Ld %d %d\n", s->last_non_b_time, s->time, s->pp_time, s->t_frame); fflush(stdout); + s->pp_field_time= ( ROUNDED_DIV(s->last_non_b_time, s->t_frame) - ROUNDED_DIV(s->last_non_b_time - s->pp_time, s->t_frame))*2; - s->pb_field_time= ( ROUNDED_DIV(s->time, s->t_frame) + s->pb_field_time= ( ROUNDED_DIV(s->time, s->t_frame) - ROUNDED_DIV(s->last_non_b_time - s->pp_time, s->t_frame))*2; } - + s->avctx->pts= s->time*1000LL*1000LL / s->time_increment_resolution; - - if(check_marker(&s->gb, "before vop_coded")==0 && s->picture_number==0){ + + if(check_marker(gb, "before vop_coded")==0 && s->picture_number==0){ printf("hmm, seems the headers arnt complete, trying to guess time_increment_bits\n"); for(s->time_increment_bits++ ;s->time_increment_bits<16; s->time_increment_bits++){ - if(get_bits1(&s->gb)) break; + if(get_bits1(gb)) break; } printf("my guess is %d bits ;)\n",s->time_increment_bits); } /* vop coded */ - if (get_bits1(&s->gb) != 1) - goto redo; -/* printf("time %d %d %d || %Ld %Ld %Ld\n", s->time_increment_bits, s->time_increment_resolution, s->time_base, */ -/* s->time, s->last_non_b_time, s->last_non_b_time - s->pp_time); */ + if (get_bits1(gb) != 1){ + printf("vop not coded\n"); + return FRAME_SKIPED; + } +//printf("time %d %d %d || %Ld %Ld %Ld\n", s->time_increment_bits, s->time_increment_resolution, s->time_base, +//s->time, s->last_non_b_time, s->last_non_b_time - s->pp_time); if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) { /* rounding type for motion estimation */ - s->no_rounding = get_bits1(&s->gb); + s->no_rounding = get_bits1(gb); } else { s->no_rounding = 0; } -/* FIXME reduced res stuff */ +//FIXME reduced res stuff if (s->shape != RECT_SHAPE) { if (s->vol_sprite_usage != 1 || s->pict_type != I_TYPE) { int width, height, hor_spat_ref, ver_spat_ref; - - width = get_bits(&s->gb, 13); - skip_bits1(&s->gb); /* marker */ - height = get_bits(&s->gb, 13); - skip_bits1(&s->gb); /* marker */ - hor_spat_ref = get_bits(&s->gb, 13); /* hor_spat_ref */ - skip_bits1(&s->gb); /* marker */ - ver_spat_ref = get_bits(&s->gb, 13); /* ver_spat_ref */ + + width = get_bits(gb, 13); + skip_bits1(gb); /* marker */ + height = get_bits(gb, 13); + skip_bits1(gb); /* marker */ + hor_spat_ref = get_bits(gb, 13); /* hor_spat_ref */ + skip_bits1(gb); /* marker */ + ver_spat_ref = get_bits(gb, 13); /* ver_spat_ref */ } - skip_bits1(&s->gb); /* change_CR_disable */ - - if (get_bits1(&s->gb) != 0) { - skip_bits(&s->gb, 8); /* constant_alpha_value */ + skip_bits1(gb); /* change_CR_disable */ + + if (get_bits1(gb) != 0) { + skip_bits(gb, 8); /* constant_alpha_value */ } } -/* FIXME complexity estimation stuff */ - +//FIXME complexity estimation stuff + if (s->shape != BIN_ONLY_SHAPE) { int t; - t=get_bits(&s->gb, 3); /* intra dc VLC threshold */ -/* printf("threshold %d\n", t); */ + t=get_bits(gb, 3); /* intra dc VLC threshold */ +//printf("threshold %d\n", t); if(!s->progressive_sequence){ - s->top_field_first= get_bits1(&s->gb); - s->alternate_scan= get_bits1(&s->gb); + s->top_field_first= get_bits1(gb); + s->alternate_scan= get_bits1(gb); }else s->alternate_scan= 0; } + if(s->alternate_scan){ + ff_init_scantable(s, &s->inter_scantable , ff_alternate_vertical_scan); + ff_init_scantable(s, &s->intra_scantable , ff_alternate_vertical_scan); + ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_vertical_scan); + ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan); + } else{ + ff_init_scantable(s, &s->inter_scantable , ff_zigzag_direct); + ff_init_scantable(s, &s->intra_scantable , ff_zigzag_direct); + ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_horizontal_scan); + ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan); + } + if(s->pict_type == S_TYPE && (s->vol_sprite_usage==STATIC_SPRITE || s->vol_sprite_usage==GMC_SPRITE)){ if(s->num_sprite_warping_points){ mpeg4_decode_sprite_trajectory(s); @@ -4184,59 +4477,57 @@ int mpeg4_decode_picture_header(MpegEncContext * s) } if (s->shape != BIN_ONLY_SHAPE) { - /* note: we do not use quant_precision to avoid problem if no - MPEG4 vol header as it is found on some old opendivx - movies */ - s->qscale = get_bits(&s->gb, 5); + s->qscale = get_bits(gb, s->quant_precision); if(s->qscale==0){ printf("Error, header damaged or not MPEG4 header (qscale=0)\n"); - return -1; /* makes no sense to continue, as there is nothing left from the image then */ + return -1; // makes no sense to continue, as there is nothing left from the image then } - + if (s->pict_type != I_TYPE) { - s->f_code = get_bits(&s->gb, 3); /* fcode_for */ + s->f_code = get_bits(gb, 3); /* fcode_for */ if(s->f_code==0){ printf("Error, header damaged or not MPEG4 header (f_code=0)\n"); - return -1; /* makes no sense to continue, as the MV decoding will break very quickly */ + return -1; // makes no sense to continue, as the MV decoding will break very quickly } }else s->f_code=1; - + if (s->pict_type == B_TYPE) { - s->b_code = get_bits(&s->gb, 3); + s->b_code = get_bits(gb, 3); }else s->b_code=1; #if 0 -printf("qp:%d fc:%d bc:%d type:%s size:%d pro:%d alt:%d top:%d qpel:%d part:%d resync:%d\n", - s->qscale, s->f_code, s->b_code, - s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), - s->gb.size,s->progressive_sequence, s->alternate_scan, s->top_field_first, - s->quarter_sample, s->data_partitioning, s->resync_marker); +printf("qp:%d fc:%d bc:%d type:%s size:%d pro:%d alt:%d top:%d qpel:%d part:%d resync:%d w:%d a:%d\n", + s->qscale, s->f_code, s->b_code, + s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), + gb->size,s->progressive_sequence, s->alternate_scan, s->top_field_first, + s->quarter_sample, s->data_partitioning, s->resync_marker, s->num_sprite_warping_points, + s->sprite_warping_accuracy); #endif if(!s->scalability){ if (s->shape!=RECT_SHAPE && s->pict_type!=I_TYPE) { - skip_bits1(&s->gb); /* vop shape coding type */ + skip_bits1(gb); // vop shape coding type } }else{ if(s->enhancement_type){ - int load_backward_shape= get_bits1(&s->gb); + int load_backward_shape= get_bits1(gb); if(load_backward_shape){ printf("load backward shape isnt supported\n"); } } - skip_bits(&s->gb, 2); /* ref_select_code */ + skip_bits(gb, 2); //ref_select_code } } /* detect buggy encoders which dont set the low_delay flag (divx4/xvid/opendivx)*/ - /* note we cannot detect divx5 without b-frames easyly (allthough its buggy too) */ + // note we cannot detect divx5 without b-frames easyly (allthough its buggy too) if(s->vo_type==0 && s->vol_control_parameters==0 && s->divx_version==0 && s->picture_number==0){ printf("looks like this file was encoded with (divx4/(old)xvid/opendivx) -> forcing low_delay flag\n"); s->low_delay=1; } - s->picture_number++; /* better than pic number==0 allways ;) */ + s->picture_number++; // better than pic number==0 allways ;) - s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; /* FIXME add short header support */ + s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; //FIXME add short header support s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table; if(s->divx_version==0 || s->divx_version < 500){ @@ -4246,6 +4537,53 @@ printf("qp:%d fc:%d bc:%d type:%s size:%d pro:%d alt:%d top:%d qpel:%d part:%d r return 0; } +/** + * decode mpeg4 headers + * @return <0 if no VOP found (or a damaged one) + * FRAME_SKIPPED if a not coded VOP is found + * 0 if a VOP is found + */ +int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb) +{ + int startcode, v; + + /* search next start code */ + align_get_bits(gb); + startcode = 0xff; + for(;;) { + v = get_bits(gb, 8); + startcode = ((startcode << 8) | v) & 0xffffffff; + + if(get_bits_count(gb) >= gb->size*8){ + if(gb->size==1 && s->divx_version){ + printf("frame skip %d\n", gb->size); + return FRAME_SKIPED; //divx bug + }else + return -1; //end of stream + } + + if((startcode&0xFFFFFF00) != 0x100) + continue; //no startcode + + switch(startcode){ + case 0x120: + decode_vol_header(s, gb); + break; + case 0x1b2: + decode_user_data(s, gb); + break; + case 0x1b6: + return decode_vop_header(s, gb); + default: +// printf("startcode %X found\n", startcode); + break; + } + + align_get_bits(gb); + startcode = 0xff; + } +} + /* don't understand why they choose a different header ! */ int intel_h263_decode_picture_header(MpegEncContext *s) { @@ -4278,8 +4616,8 @@ int intel_h263_decode_picture_header(MpegEncContext *s) s->h263_plus = 0; s->pict_type = I_TYPE + get_bits1(&s->gb); - - s->unrestricted_mv = get_bits1(&s->gb); + + s->unrestricted_mv = get_bits1(&s->gb); s->h263_long_vectors = s->unrestricted_mv; if (get_bits1(&s->gb) != 0) { @@ -4306,6 +4644,10 @@ int intel_h263_decode_picture_header(MpegEncContext *s) skip_bits(&s->gb, 8); } s->f_code = 1; + + s->y_dc_scale_table= + s->c_dc_scale_table= ff_mpeg1_dc_scale_table; + return 0; } diff --git a/src/libffmpeg/libavcodec/h263dec.c b/src/libffmpeg/libavcodec/h263dec.c index ffecbc932..491d80454 100644 --- a/src/libffmpeg/libavcodec/h263dec.c +++ b/src/libffmpeg/libavcodec/h263dec.c @@ -20,6 +20,12 @@ #include "dsputil.h" #include "mpegvideo.h" +#if 1 +#define PRINT_QP(a, b) {} +#else +#define PRINT_QP(a, b) printf(a, b) +#endif + //#define DEBUG //#define PRINT_FRAME_TIME #ifdef PRINT_FRAME_TIME @@ -45,6 +51,11 @@ static int h263_decode_init(AVCodecContext *avctx) s->height = avctx->height; s->workaround_bugs= avctx->workaround_bugs; + // set defaults + s->quant_precision=5; + s->progressive_sequence=1; + s->decode_mb= ff_h263_decode_mb; + /* select sub codec */ switch(avctx->codec->id) { case CODEC_ID_H263: @@ -115,7 +126,6 @@ static int h263_decode_end(AVCodecContext *avctx) */ static int get_consumed_bytes(MpegEncContext *s, int buf_size){ int pos= (get_bits_count(&s->gb)+7)>>3; - if(s->divx_version>=500){ //we would have to scan through the whole buf to handle the weird reordering ... return buf_size; @@ -127,12 +137,175 @@ static int get_consumed_bytes(MpegEncContext *s, int buf_size){ } } +static int decode_slice(MpegEncContext *s){ + s->last_resync_gb= s->gb; + s->first_slice_line= 1; + + s->resync_mb_x= s->mb_x; + s->resync_mb_y= s->mb_y; + + s->y_dc_scale= s->y_dc_scale_table[ s->qscale ]; + s->c_dc_scale= s->c_dc_scale_table[ s->qscale ]; + + if(s->partitioned_frame){ + const int qscale= s->qscale; + + if(s->codec_id==CODEC_ID_MPEG4){ + if(ff_mpeg4_decode_partitions(s) < 0) + return -1; + } + + /* restore variables which where modified */ + s->first_slice_line=1; + s->mb_x= s->resync_mb_x; + s->mb_y= s->resync_mb_y; + s->qscale= qscale; + s->y_dc_scale= s->y_dc_scale_table[ s->qscale ]; + s->c_dc_scale= s->c_dc_scale_table[ s->qscale ]; + } + + for(; s->mb_y < s->mb_height; s->mb_y++) { + /* per-row end of slice checks */ + if(s->msmpeg4_version){ + if(s->resync_mb_y + s->slice_height == s->mb_y){ + const int xy= s->mb_x + s->mb_y*s->mb_width; + s->error_status_table[xy-1]|= AC_END|DC_END|MV_END; + return 0; + } + } + + if(s->msmpeg4_version==1){ + s->last_dc[0]= + s->last_dc[1]= + s->last_dc[2]= 128; + } + + ff_init_block_index(s); + for(; s->mb_x < s->mb_width; s->mb_x++) { + int ret; + + ff_update_block_index(s); + + if(s->resync_mb_x == s->mb_x && s->resync_mb_y+1 == s->mb_y){ + s->first_slice_line=0; + } + + /* DCT & quantize */ + clear_blocks(s->block[0]); + + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; +//printf("%d %d %06X\n", ret, get_bits_count(&s->gb), show_bits(&s->gb, 24)); + ret= s->decode_mb(s, s->block); + + PRINT_QP("%2d", s->qscale); + MPV_decode_mb(s, s->block); + + if(ret<0){ + const int xy= s->mb_x + s->mb_y*s->mb_width; + if(ret==SLICE_END){ +//printf("%d %d %d %06X\n", s->mb_x, s->mb_y, s->gb.size*8 - get_bits_count(&s->gb), show_bits(&s->gb, 24)); + s->error_status_table[xy]|= AC_END; + if(!s->partitioned_frame) + s->error_status_table[xy]|= MV_END|DC_END; + + s->padding_bug_score--; + + if(++s->mb_x >= s->mb_width){ + s->mb_x=0; + ff_draw_horiz_band(s); + s->mb_y++; + } + return 0; + }else if(ret==SLICE_NOEND){ + fprintf(stderr,"Slice mismatch at MB: %d\n", xy); + return -1; + } + fprintf(stderr,"Error at MB: %d\n", xy); + s->error_status_table[xy]|= AC_ERROR; + if(!s->partitioned_frame) + s->error_status_table[xy]|= DC_ERROR|MV_ERROR; + + return -1; + } + } + + ff_draw_horiz_band(s); + + PRINT_QP("%s", "\n"); + + s->mb_x= 0; + } + + assert(s->mb_x==0 && s->mb_y==s->mb_height); + + /* try to detect the padding bug */ + if( s->codec_id==CODEC_ID_MPEG4 + && (s->workaround_bugs&FF_BUG_AUTODETECT) + && s->gb.size*8 - get_bits_count(&s->gb) >=0 + && s->gb.size*8 - get_bits_count(&s->gb) < 48 + && !s->resync_marker + && !s->data_partitioning){ + + const int bits_count= get_bits_count(&s->gb); + const int bits_left = s->gb.size*8 - bits_count; + + if(bits_left==0 || bits_left>8){ + s->padding_bug_score++; + } else { + int v= show_bits(&s->gb, 8); + v|= 0x7F >> (7-(bits_count&7)); + + if(v==0x7F) + s->padding_bug_score--; + else + s->padding_bug_score++; + } + + if(s->padding_bug_score > -2) + s->workaround_bugs |= FF_BUG_NO_PADDING; + else + s->workaround_bugs &= ~FF_BUG_NO_PADDING; + } + + // handle formats which dont have unique end markers + if(s->msmpeg4_version || (s->workaround_bugs&FF_BUG_NO_PADDING)){ //FIXME perhaps solve this more cleanly + int left= s->gb.size*8 - get_bits_count(&s->gb); + int max_extra=7; + + /* no markers in M$ crap */ + if(s->msmpeg4_version && s->pict_type==I_TYPE) + max_extra+= 17; + + /* buggy padding but the frame should still end approximately at the bitstream end */ + if((s->workaround_bugs&FF_BUG_NO_PADDING) && s->error_resilience>=3) + max_extra+= 48; + else if((s->workaround_bugs&FF_BUG_NO_PADDING)) + max_extra+= 256*256*256*64; + + if(left>max_extra){ + fprintf(stderr, "discarding %d junk bits at end, next would be %X\n", left, show_bits(&s->gb, 24)); + } + else if(left<0){ + fprintf(stderr, "overreading %d bits\n", -left); + }else + s->error_status_table[s->mb_num-1]|= AC_END|MV_END|DC_END; + + return 0; + } + + fprintf(stderr, "slice end not reached but screenspace end (%d left %06X)\n", + s->gb.size*8 - get_bits_count(&s->gb), + show_bits(&s->gb, 24)); + return -1; +} + static int h263_decode_frame(AVCodecContext *avctx, void *data, int *data_size, UINT8 *buf, int buf_size) { MpegEncContext *s = avctx->priv_data; - int ret; + int ret,i; AVPicture *pict = data; #ifdef PRINT_FRAME_TIME uint64_t time= rdtsc(); @@ -144,7 +317,7 @@ uint64_t time= rdtsc(); s->hurry_up= avctx->hurry_up; s->error_resilience= avctx->error_resilience; - s->workaround_bugs= avctx->workaround_bugs; + s->flags= avctx->flags; *data_size = 0; @@ -154,17 +327,31 @@ uint64_t time= rdtsc(); return 0; } +retry: + if(s->bitstream_buffer_size && buf_size<20){ //divx 5.01+ frame reorder init_get_bits(&s->gb, s->bitstream_buffer, s->bitstream_buffer_size); }else init_get_bits(&s->gb, buf, buf_size); s->bitstream_buffer_size=0; + if (!s->context_initialized) { + if (MPV_common_init(s) < 0) //we need the idct permutaton for reading a custom matrix + return -1; + } + /* let's go :-) */ if (s->h263_msmpeg4) { ret = msmpeg4_decode_picture_header(s); } else if (s->h263_pred) { - ret = mpeg4_decode_picture_header(s); + if(s->avctx->extradata_size && s->picture_number==0){ + GetBitContext gb; + + init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size); + ret = ff_mpeg4_decode_picture_header(s, &gb); + } + ret = ff_mpeg4_decode_picture_header(s, &s->gb); + s->has_b_frames= !s->low_delay; } else if (s->h263_intel) { ret = intel_h263_decode_picture_header(s); @@ -172,6 +359,50 @@ uint64_t time= rdtsc(); ret = h263_decode_picture_header(s); } avctx->has_b_frames= s->has_b_frames; + + if(s->workaround_bugs&FF_BUG_AUTODETECT){ + if(s->avctx->fourcc == ff_get_fourcc("XVIX")) + s->workaround_bugs|= FF_BUG_XVID_ILACE; + + if(s->avctx->fourcc == ff_get_fourcc("MP4S")) + s->workaround_bugs|= FF_BUG_AC_VLC; + + if(s->avctx->fourcc == ff_get_fourcc("M4S2")) + s->workaround_bugs|= FF_BUG_AC_VLC; + + if(s->avctx->fourcc == ff_get_fourcc("UMP4")){ + s->workaround_bugs|= FF_BUG_UMP4; + s->workaround_bugs|= FF_BUG_AC_VLC; + } + + if(s->divx_version){ + s->workaround_bugs|= FF_BUG_QPEL_CHROMA; + } + + if(s->avctx->fourcc == ff_get_fourcc("XVID") && s->xvid_build==0) + s->workaround_bugs|= FF_BUG_QPEL_CHROMA; + + if(s->xvid_build && s->xvid_build<=1) + s->workaround_bugs|= FF_BUG_QPEL_CHROMA; + +//printf("padding_bug_score: %d\n", s->padding_bug_score); +#if 0 + if(s->divx_version==500) + s->workaround_bugs|= FF_BUG_NO_PADDING; + + /* very ugly XVID padding bug detection FIXME/XXX solve this differently + * lets hope this at least works + */ + if( s->resync_marker==0 && s->data_partitioning==0 && s->divx_version==0 + && s->codec_id==CODEC_ID_MPEG4 && s->vo_type==0) + s->workaround_bugs|= FF_BUG_NO_PADDING; + + if(s->lavc_build && s->lavc_build<4609) //FIXME not sure about the version num but a 4609 file seems ok + s->workaround_bugs|= FF_BUG_NO_PADDING; +#endif + } + + #if 0 // dump bits per frame / qp / complexity { static FILE *f=NULL; @@ -184,7 +415,10 @@ uint64_t time= rdtsc(); /* and other parameters. So then we could init the picture */ /* FIXME: By the way H263 decoder is evolving it should have */ /* an H263EncContext */ - if (s->width != avctx->width || s->height != avctx->height) { + if ( s->width != avctx->width || s->height != avctx->height + || avctx->aspect_ratio_info != s->aspect_ratio_info + || avctx->aspected_width != s->aspected_width + || avctx->aspected_height != s->aspected_height) { /* H.263 could change picture size any time */ MPV_common_end(s); s->context_initialized=0; @@ -198,10 +432,13 @@ uint64_t time= rdtsc(); avctx->aspected_width = s->aspected_width; avctx->aspected_height = s->aspected_height; } - if (MPV_common_init(s) < 0) - return -1; + + goto retry; } + if((s->codec_id==CODEC_ID_H263 || s->codec_id==CODEC_ID_H263P)) + s->gob_index = ff_h263_get_gob_height(s); + if(ret==FRAME_SKIPED) return get_consumed_bytes(s, buf_size); /* skip if the header was thrashed */ if (ret < 0){ @@ -220,18 +457,16 @@ uint64_t time= rdtsc(); s->next_p_frame_damaged=0; } - MPV_frame_start(s, avctx); + if(MPV_frame_start(s, avctx) < 0) + return -1; #ifdef DEBUG printf("qscale=%d\n", s->qscale); #endif - /* init resync/ error resilience specific variables */ - s->next_resync_qscale= s->qscale; - s->next_resync_gb= s->gb; - if(s->resync_marker) s->mb_num_left= 0; - else s->mb_num_left= s->mb_num; - + if(s->error_resilience) + memset(s->error_status_table, MV_ERROR|AC_ERROR|DC_ERROR|VP_START|AC_END|DC_END|MV_END, s->mb_num*sizeof(UINT8)); + /* decode each macroblock */ s->block_wrap[0]= s->block_wrap[1]= @@ -239,140 +474,28 @@ uint64_t time= rdtsc(); s->block_wrap[3]= s->mb_width*2 + 2; s->block_wrap[4]= s->block_wrap[5]= s->mb_width + 2; - for(s->mb_y=0; s->mb_y < s->mb_height; s->mb_y++) { - /* Check for GOB headers on H.263 */ - /* FIXME: In the future H.263+ will have intra prediction */ - /* and we are gonna need another way to detect MPEG4 */ - if (s->mb_y && !s->h263_pred) { - s->first_slice_line = h263_decode_gob_header(s); + s->mb_x=0; + s->mb_y=0; + + decode_slice(s); + s->error_status_table[0]|= VP_START; + while(s->mb_ymb_height && s->gb.size*8 - get_bits_count(&s->gb)>32){ + if(s->msmpeg4_version){ + if(s->mb_x!=0 || (s->mb_y%s->slice_height)!=0) + break; + }else{ + if(ff_h263_resync(s)<0) + break; } - if(s->msmpeg4_version==1){ - s->last_dc[0]= - s->last_dc[1]= - s->last_dc[2]= 128; - } - - s->y_dc_scale= s->y_dc_scale_table[ s->qscale ]; - s->c_dc_scale= s->c_dc_scale_table[ s->qscale ]; - - s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1; - s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1); - s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1; - s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2); - s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2); - s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2); - for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) { - s->block_index[0]+=2; - s->block_index[1]+=2; - s->block_index[2]+=2; - s->block_index[3]+=2; - s->block_index[4]++; - s->block_index[5]++; -#ifdef DEBUG - printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); -#endif - - if(s->resync_marker){ - if(s->mb_num_left<=0){ - /* except the first block */ - if(s->mb_x!=0 || s->mb_y!=0){ - /* did we miss the next resync marker without noticing an error yet */ - if(((get_bits_count(&s->gb)+8)&(~7)) != s->next_resync_pos && s->decoding_error==0){ - fprintf(stderr, "slice end missmatch x:%d y:%d %d %d\n", - s->mb_x, s->mb_y, get_bits_count(&s->gb), s->next_resync_pos); - ff_conceal_past_errors(s, 1); - } - } - s->qscale= s->next_resync_qscale; - s->y_dc_scale= s->y_dc_scale_table[ s->qscale ]; - s->c_dc_scale= s->c_dc_scale_table[ s->qscale ]; - - s->gb= s->next_resync_gb; - s->resync_mb_x= s->mb_x; //we know that the marker is here cuz mb_num_left was the distance to it - s->resync_mb_y= s->mb_y; - s->first_slice_line=1; - - if(s->codec_id==CODEC_ID_MPEG4){ - ff_mpeg4_clean_buffers(s); - ff_mpeg4_resync(s); - } - } - - if( s->resync_mb_x==s->mb_x - && s->resync_mb_y==s->mb_y && s->decoding_error!=0){ - fprintf(stderr, "resynced at %d %d\n", s->mb_x, s->mb_y); - s->decoding_error= 0; - } - } + if(s->msmpeg4_version!=4 && s->h263_pred) + ff_mpeg4_clean_buffers(s); - //fprintf(stderr,"\nFrame: %d\tMB: %d",avctx->frame_number, (s->mb_y * s->mb_width) + s->mb_x); - /* DCT & quantize */ - if(s->decoding_error!=DECODING_DESYNC){ - int last_error= s->decoding_error; - clear_blocks(s->block[0]); - - s->mv_dir = MV_DIR_FORWARD; - s->mv_type = MV_TYPE_16X16; - if (s->h263_msmpeg4) { - if (msmpeg4_decode_mb(s, s->block) < 0) { - fprintf(stderr,"Error at MB: %d\n", (s->mb_y * s->mb_width) + s->mb_x); - s->decoding_error=DECODING_DESYNC; - } - } else { - if (h263_decode_mb(s, s->block) < 0) { - fprintf(stderr,"Error at MB: %d\n", (s->mb_y * s->mb_width) + s->mb_x); - s->decoding_error=DECODING_DESYNC; - } - } + decode_slice(s); - if(s->decoding_error!=last_error){ - ff_conceal_past_errors(s, 0); - } - } - - /* conceal errors */ - if( s->decoding_error==DECODING_DESYNC - || (s->decoding_error==DECODING_ACDC_LOST && s->mb_intra)){ - s->mv_dir = MV_DIR_FORWARD; - s->mv_type = MV_TYPE_16X16; - s->mb_skiped=0; - s->mb_intra=0; - s->mv[0][0][0]=0; //FIXME this is not optimal - s->mv[0][0][1]=0; - clear_blocks(s->block[0]); - }else if(s->decoding_error && !s->mb_intra){ - clear_blocks(s->block[0]); - } - //FIXME remove AC for intra - - MPV_decode_mb(s, s->block); - - s->mb_num_left--; - } - if ( avctx->draw_horiz_band - && (s->num_available_buffers>=1 || (!s->has_b_frames)) ) { - UINT8 *src_ptr[3]; - int y, h, offset; - y = s->mb_y * 16; - h = s->height - y; - if (h > 16) - h = 16; - offset = y * s->linesize; - if(s->pict_type==B_TYPE || (!s->has_b_frames)){ - src_ptr[0] = s->current_picture[0] + offset; - src_ptr[1] = s->current_picture[1] + (offset >> 2); - src_ptr[2] = s->current_picture[2] + (offset >> 2); - } else { - src_ptr[0] = s->last_picture[0] + offset; - src_ptr[1] = s->last_picture[1] + (offset >> 2); - src_ptr[2] = s->last_picture[2] + (offset >> 2); - } - avctx->draw_horiz_band(avctx, src_ptr, s->linesize, - y, s->width, h); - } + s->error_status_table[s->resync_mb_x + s->resync_mb_y*s->mb_width]|= VP_START; } - + if (s->h263_msmpeg4 && s->msmpeg4_version<4 && s->pict_type==I_TYPE) if(msmpeg4_decode_ext_header(s, buf_size) < 0) return -1; @@ -397,24 +520,37 @@ uint64_t time= rdtsc(); } } - if(s->bitstream_buffer_size==0 && s->error_resilience>0){ - int left= s->gb.size*8 - get_bits_count(&s->gb); - int max_extra=8; - - if(s->codec_id==CODEC_ID_MPEG4) max_extra+=32; - - if(left>max_extra){ - fprintf(stderr, "discarding %d junk bits at end, next would be %X\n", left, show_bits(&s->gb, 24)); - if(s->decoding_error==0) - ff_conceal_past_errors(s, 1); + if(s->error_resilience){ + int error=0, num_end_markers=0; + for(i=0; imb_num; i++){ + int status= s->error_status_table[i]; +#if 0 + if(i%s->mb_width == 0) printf("\n"); + printf("%2X ", status); +#endif + if(status==0) continue; + + if(status&(DC_ERROR|AC_ERROR|MV_ERROR)) + error=1; + if(status&VP_START){ + if(num_end_markers) + error=1; + num_end_markers=3; + } + if(status&AC_END) + num_end_markers--; + if(status&DC_END) + num_end_markers--; + if(status&MV_END) + num_end_markers--; } - if(left<0){ - fprintf(stderr, "overreading %d bits\n", -left); - if(s->decoding_error==0) - ff_conceal_past_errors(s, 1); + if(num_end_markers || error){ + fprintf(stderr, "concealing errors\n"); +//printf("type:%d\n", s->pict_type); + ff_error_resilience(s); } } - + MPV_frame_end(s); #if 0 //dirty show MVs, we should export the MV tables and write a filter to show them { diff --git a/src/libffmpeg/libavcodec/i386/Makefile.am b/src/libffmpeg/libavcodec/i386/Makefile.am index 15bec161e..53f1f6528 100644 --- a/src/libffmpeg/libavcodec/i386/Makefile.am +++ b/src/libffmpeg/libavcodec/i386/Makefile.am @@ -17,8 +17,9 @@ libavcodec_mmx_src = \ fdct_mmx.c \ idct_mmx.c \ motion_est_mmx.c \ - mpegvideo_mmx.c \ + mpegvideo_mmx.c \ simple_idct_mmx.c +# fft_sse.c - needs new header from gcc 3.1 libavcodec_mmx_dummy = libavcodec_mmx_dummy.c diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c index a524f96c8..708d0b091 100644 --- a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c +++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c @@ -20,7 +20,6 @@ */ #include "../dsputil.h" -#include "../simple_idct.h" int mm_flags; /* multimedia extension flags */ @@ -44,10 +43,6 @@ int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); -/* external functions, from idct_mmx.c */ -void ff_mmx_idct(DCTELEM *block); -void ff_mmxext_idct(DCTELEM *block); - /* pixel operations */ static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL; static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; @@ -70,8 +65,8 @@ static const uint64_t mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002U #define MOVQ_BONE(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_bone)) #define MOVQ_WTWO(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wtwo)) #else -/* for shared library it's better to use this way for accessing constants */ -/* pcmpeqd -> -1 */ +// for shared library it's better to use this way for accessing constants +// pcmpeqd -> -1 #define MOVQ_BONE(regd) \ __asm __volatile ( \ "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ @@ -86,9 +81,9 @@ static const uint64_t mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002U #endif -/* using regr as temporary and for the output result */ -/* first argument is unmodifed and second is trashed */ -/* regfe is supposed to contain 0xfefefefefefefefe */ +// using regr as temporary and for the output result +// first argument is unmodifed and second is trashed +// regfe is supposed to contain 0xfefefefefefefefe #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \ "movq " #rega ", " #regr " \n\t"\ "pand " #regb ", " #regr " \n\t"\ @@ -105,7 +100,7 @@ static const uint64_t mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002U "psrlq $1, " #regb " \n\t"\ "psubb " #regb ", " #regr " \n\t" -/* mm6 is supposed to contain 0xfefefefefefefefe */ +// mm6 is supposed to contain 0xfefefefefefefefe #define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \ "movq " #rega ", " #regr " \n\t"\ "movq " #regc ", " #regp " \n\t"\ @@ -192,7 +187,7 @@ static const uint64_t mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002U static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size) { - __asm__ volatile( + asm volatile( "movl $-128, %%eax \n\t" "pxor %%mm7, %%mm7 \n\t" ".balign 16 \n\t" @@ -220,7 +215,7 @@ static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size) static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride) { - __asm__ volatile( + asm volatile( "pxor %%mm7, %%mm7 \n\t" "movl $-128, %%eax \n\t" ".balign 16 \n\t" @@ -278,9 +273,9 @@ static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line pix += line_size*4; p += 32; - /* if here would be an exact copy of the code above */ - /* compiler would generate some very strange code */ - /* thus using "r" */ + // if here would be an exact copy of the code above + // compiler would generate some very strange code + // thus using "r" __asm __volatile( "movq (%3), %%mm0\n\t" "movq 8(%3), %%mm1\n\t" @@ -420,6 +415,44 @@ static void clear_blocks_mmx(DCTELEM *blocks) ); } +static int pix_sum16_mmx(UINT8 * pix, int line_size){ + const int h=16; + int sum; + int index= -line_size*h; + + __asm __volatile( + "pxor %%mm7, %%mm7 \n\t" + "pxor %%mm6, %%mm6 \n\t" + "1: \n\t" + "movq (%2, %1), %%mm0 \n\t" + "movq (%2, %1), %%mm1 \n\t" + "movq 8(%2, %1), %%mm2 \n\t" + "movq 8(%2, %1), %%mm3 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "paddw %%mm0, %%mm1 \n\t" + "paddw %%mm2, %%mm3 \n\t" + "paddw %%mm1, %%mm3 \n\t" + "paddw %%mm3, %%mm6 \n\t" + "addl %3, %1 \n\t" + " js 1b \n\t" + "movq %%mm6, %%mm5 \n\t" + "psrlq $32, %%mm6 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "movq %%mm6, %%mm5 \n\t" + "psrlq $16, %%mm6 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "movd %%mm6, %0 \n\t" + "andl $0xFFFF, %0 \n\t" + : "=&r" (sum), "+r" (index) + : "r" (pix - index), "r" (line_size) + ); + + return sum; +} + #if 0 static void just_return() { return; } #endif @@ -448,6 +481,7 @@ void dsputil_init_mmx(void) put_pixels_clamped = put_pixels_clamped_mmx; add_pixels_clamped = add_pixels_clamped_mmx; clear_blocks= clear_blocks_mmx; + pix_sum= pix_sum16_mmx; pix_abs16x16 = pix_abs16x16_mmx; pix_abs16x16_x2 = pix_abs16x16_x2_mmx; @@ -477,7 +511,7 @@ void dsputil_init_mmx(void) avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx; avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx; avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx; - + put_pixels_tab[1][0] = put_pixels8_mmx; put_pixels_tab[1][1] = put_pixels8_x2_mmx; put_pixels_tab[1][2] = put_pixels8_y2_mmx; @@ -538,7 +572,7 @@ void dsputil_init_mmx(void) avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; - + put_pixels_tab[1][1] = put_pixels8_x2_3dnow; put_pixels_tab[1][2] = put_pixels8_y2_3dnow; put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; @@ -549,21 +583,10 @@ void dsputil_init_mmx(void) avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; } - - /* idct */ - if (mm_flags & MM_MMXEXT) { - ff_idct = ff_mmxext_idct; - } else { - ff_idct = ff_mmx_idct; - } -#ifdef SIMPLE_IDCT -/* ff_idct = simple_idct; */ - ff_idct = simple_idct_mmx; -#endif } #if 0 - /* for speed testing */ + // for speed testing get_pixels = just_return; put_pixels_clamped = just_return; add_pixels_clamped = just_return; @@ -593,40 +616,18 @@ void dsputil_init_mmx(void) avg_no_rnd_pixels_tab[2] = just_return; avg_no_rnd_pixels_tab[3] = just_return; - /* av_fdct = just_return; */ - /* ff_idct = just_return; */ + //av_fdct = just_return; + //ff_idct = just_return; #endif } -void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block); - -/** - * this will send coeff matrixes which would have different results for the 16383 type MMX vs C IDCTs to the C IDCT - */ -void bit_exact_idct_put(UINT8 *dest, int line_size, INT16 *block){ - if( block[0]>1022 && block[1]==0 && block[4 ]==0 && block[5 ]==0 - && block[8]==0 && block[9]==0 && block[12]==0 && block[13]==0){ - int16_t tmp[64]; - int i; - - for(i=0; i<64; i++) - tmp[i]= block[i]; - for(i=0; i<64; i++) - block[i]= tmp[block_permute_op(i)]; - - simple_idct_put(dest, line_size, block); - } - else - gen_idct_put(dest, line_size, block); -} - /* remove any non bit exact operation (testing purpose). NOTE that this function should be kept as small as possible because it is always difficult to test automatically non bit exact cases. */ void dsputil_set_bit_exact_mmx(void) { if (mm_flags & MM_MMX) { - + /* MMX2 & 3DNOW */ put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; @@ -643,9 +644,5 @@ void dsputil_set_bit_exact_mmx(void) pix_abs8x8_y2 = pix_abs8x8_y2_mmx; pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; } -#ifdef SIMPLE_IDCT - if(ff_idct_put==gen_idct_put && ff_idct == simple_idct_mmx) - ff_idct_put= bit_exact_idct_put; -#endif } } diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h b/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h index 818ec0e6d..6873432ce 100644 --- a/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h +++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h @@ -21,7 +21,7 @@ * mostly rewritten by Michael Niedermayer * and improved by Zdenek Kabelac */ - + /* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm clobber bug - now it will work with 2.95.2 and also with -fPIC */ @@ -92,7 +92,7 @@ static void DEF(put_pixels16_x2)(UINT8 *block, const UINT8 *pixels, int line_siz :"r" (line_size) :"%eax", "memory"); } - + /* GL: this function does incorrect rounding if overflow */ static void DEF(put_no_rnd_pixels8_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) { @@ -293,7 +293,7 @@ static void DEF(avg_pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_size :"%eax", "memory"); } -/* Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter */ +// Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter static void DEF(avg_pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) { MOVQ_BONE(mm6); @@ -335,7 +335,7 @@ static void DEF(avg_pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_siz :"%eax", "memory"); } -/* FIXME the following could be optimized too ... */ +//FIXME the following could be optimized too ... static void DEF(put_no_rnd_pixels16_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h); DEF(put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h); diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h b/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h index a6e84c199..3605e03f9 100644 --- a/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h +++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h @@ -21,7 +21,7 @@ * and improved by Zdenek Kabelac */ -/* put_pixels */ +// put_pixels static void DEF(put, pixels8_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) { MOVQ_BFE(mm6); @@ -132,7 +132,7 @@ static void DEF(put, pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_siz static void DEF(put, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) { MOVQ_ZERO(mm7); - SET_RND(mm6); /* =2 for rnd and =1 for no_rnd version */ + SET_RND(mm6); // =2 for rnd and =1 for no_rnd version __asm __volatile( "movq (%1), %%mm0 \n\t" "movq 1(%1), %%mm4 \n\t" @@ -168,7 +168,7 @@ static void DEF(put, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_si "movq %%mm4, (%2, %%eax) \n\t" "addl %3, %%eax \n\t" - "movq (%1, %%eax), %%mm2 \n\t" /* 0 <-> 2 1 <-> 3 */ + "movq (%1, %%eax), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 "movq 1(%1, %%eax), %%mm4 \n\t" "movq %%mm2, %%mm3 \n\t" "movq %%mm4, %%mm5 \n\t" @@ -195,8 +195,8 @@ static void DEF(put, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_si :"eax", "memory"); } -/* avg_pixels */ -/* in case more speed is needed - unroling would certainly help */ +// avg_pixels +// in case more speed is needed - unroling would certainly help static void DEF(avg, pixels8)(UINT8 *block, const UINT8 *pixels, int line_size, int h) { MOVQ_BFE(mm6); @@ -324,11 +324,11 @@ static void DEF(avg, pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_siz :"eax", "memory"); } -/* this routine is 'slightly' suboptimal but mostly unused */ +// this routine is 'slightly' suboptimal but mostly unused static void DEF(avg, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) { MOVQ_ZERO(mm7); - SET_RND(mm6); /* =2 for rnd and =1 for no_rnd version */ + SET_RND(mm6); // =2 for rnd and =1 for no_rnd version __asm __volatile( "movq (%1), %%mm0 \n\t" "movq 1(%1), %%mm4 \n\t" @@ -368,7 +368,7 @@ static void DEF(avg, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_si "movq %%mm5, (%2, %%eax) \n\t" "addl %3, %%eax \n\t" - "movq (%1, %%eax), %%mm2 \n\t" /* 0 <-> 2 1 <-> 3 */ + "movq (%1, %%eax), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 "movq 1(%1, %%eax), %%mm4 \n\t" "movq %%mm2, %%mm3 \n\t" "movq %%mm4, %%mm5 \n\t" @@ -399,7 +399,7 @@ static void DEF(avg, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_si :"eax", "memory"); } -/* FIXME optimize */ +//FIXME optimize static void DEF(put, pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ DEF(put, pixels8_y2)(block , pixels , line_size, h); DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h); diff --git a/src/libffmpeg/libavcodec/i386/fdct_mmx.c b/src/libffmpeg/libavcodec/i386/fdct_mmx.c index 14d93370d..19f656afd 100644 --- a/src/libffmpeg/libavcodec/i386/fdct_mmx.c +++ b/src/libffmpeg/libavcodec/i386/fdct_mmx.c @@ -3,133 +3,131 @@ * The gcc porting is Copyright (c) 2001 Fabrice Bellard. * * from fdctam32.c - AP922 MMX(3D-Now) forward-DCT - * + * * Intel Application Note AP-922 - fast, precise implementation of DCT * http://developer.intel.com/vtune/cbts/appnotes.htm */ #include "../common.h" #include "mmx.h" -/* #define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align))) */ +#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align))) -/* -*********************************** -* -* constants for the forward DCT -* ----------------------------- -* -* Be sure to check that your compiler is aligning all constants to QWORD -* (8-byte) memory boundaries! Otherwise the unaligned memory access will -* severely stall MMX execution. -* -*********************************** -*/ +////////////////////////////////////////////////////////////////////// +// +// constants for the forward DCT +// ----------------------------- +// +// Be sure to check that your compiler is aligning all constants to QWORD +// (8-byte) memory boundaries! Otherwise the unaligned memory access will +// severely stall MMX execution. +// +////////////////////////////////////////////////////////////////////// -#define BITS_FRW_ACC 3 /*; 2 or 3 for accuracy */ +#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy #define SHIFT_FRW_COL BITS_FRW_ACC #define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3) -/* #define RND_FRW_ROW (262144 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_ROW-1) */ +//#define RND_FRW_ROW (262144 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_ROW-1) #define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1)) -/* #define RND_FRW_COL (2 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_COL-1) */ +//#define RND_FRW_COL (2 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_COL-1) #define RND_FRW_COL (1 << (SHIFT_FRW_COL-1)) -/* concatenated table, for forward DCT transformation */ +//concatenated table, for forward DCT transformation const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = { - 13036, 13036, 13036, 13036, /* tg * (2<<16) + 0.5 */ - 27146, 27146, 27146, 27146, /* tg * (2<<16) + 0.5 */ - -21746, -21746, -21746, -21746, /* tg * (2<<16) + 0.5 */ + 13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5 + 27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5 + -21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5 }; const int16_t cos_4_16[4] = { - -19195, -19195, -19195, -19195, /* cos * (2<<16) + 0.5 */ + -19195, -19195, -19195, -19195, //cos * (2<<16) + 0.5 }; const int16_t ocos_4_16[4] = { - 23170, 23170, 23170, 23170, /* cos * (2<<15) + 0.5 */ + 23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5 }; static const mmx_t fdct_one_corr = {0x0001000100010001LL}; static volatile mmx_t fdct_r_row = { d:{RND_FRW_ROW, RND_FRW_ROW} }; -const int16_t tab_frw_01234567[] ATTR_ALIGN(8) = { /* forward_dct coeff table */ - /* row0 */ - 16384, 16384, 21407, -8867, /* w09 w01 w08 w00 */ - 16384, 16384, 8867, -21407, /* w13 w05 w12 w04 */ - 16384, -16384, 8867, 21407, /* w11 w03 w10 w02 */ - -16384, 16384, -21407, -8867, /* w15 w07 w14 w06 */ - 22725, 12873, 19266, -22725, /* w22 w20 w18 w16 */ - 19266, 4520, -4520, -12873, /* w23 w21 w19 w17 */ - 12873, 4520, 4520, 19266, /* w30 w28 w26 w24 */ - -22725, 19266, -12873, -22725, /* w31 w29 w27 w25 */ +const int16_t tab_frw_01234567[] ATTR_ALIGN(8) = { // forward_dct coeff table + //row0 + 16384, 16384, 21407, -8867, // w09 w01 w08 w00 + 16384, 16384, 8867, -21407, // w13 w05 w12 w04 + 16384, -16384, 8867, 21407, // w11 w03 w10 w02 + -16384, 16384, -21407, -8867, // w15 w07 w14 w06 + 22725, 12873, 19266, -22725, // w22 w20 w18 w16 + 19266, 4520, -4520, -12873, // w23 w21 w19 w17 + 12873, 4520, 4520, 19266, // w30 w28 w26 w24 + -22725, 19266, -12873, -22725, // w31 w29 w27 w25 - /* row1 */ - 22725, 22725, 29692, -12299, /* w09 w01 w08 w00 */ - 22725, 22725, 12299, -29692, /* w13 w05 w12 w04 */ - 22725, -22725, 12299, 29692, /* w11 w03 w10 w02 */ - -22725, 22725, -29692, -12299, /* w15 w07 w14 w06 */ - 31521, 17855, 26722, -31521, /* w22 w20 w18 w16 */ - 26722, 6270, -6270, -17855, /* w23 w21 w19 w17 */ - 17855, 6270, 6270, 26722, /* w30 w28 w26 w24 */ - -31521, 26722, -17855, -31521, /* w31 w29 w27 w25 */ + //row1 + 22725, 22725, 29692, -12299, // w09 w01 w08 w00 + 22725, 22725, 12299, -29692, // w13 w05 w12 w04 + 22725, -22725, 12299, 29692, // w11 w03 w10 w02 + -22725, 22725, -29692, -12299, // w15 w07 w14 w06 + 31521, 17855, 26722, -31521, // w22 w20 w18 w16 + 26722, 6270, -6270, -17855, // w23 w21 w19 w17 + 17855, 6270, 6270, 26722, // w30 w28 w26 w24 + -31521, 26722, -17855, -31521, // w31 w29 w27 w25 - /* row2 */ - 21407, 21407, 27969, -11585, /* w09 w01 w08 w00 */ - 21407, 21407, 11585, -27969, /* w13 w05 w12 w04 */ - 21407, -21407, 11585, 27969, /* w11 w03 w10 w02 */ - -21407, 21407, -27969, -11585, /* w15 w07 w14 w06 */ - 29692, 16819, 25172, -29692, /* w22 w20 w18 w16 */ - 25172, 5906, -5906, -16819, /* w23 w21 w19 w17 */ - 16819, 5906, 5906, 25172, /* w30 w28 w26 w24 */ - -29692, 25172, -16819, -29692, /* w31 w29 w27 w25 */ + //row2 + 21407, 21407, 27969, -11585, // w09 w01 w08 w00 + 21407, 21407, 11585, -27969, // w13 w05 w12 w04 + 21407, -21407, 11585, 27969, // w11 w03 w10 w02 + -21407, 21407, -27969, -11585, // w15 w07 w14 w06 + 29692, 16819, 25172, -29692, // w22 w20 w18 w16 + 25172, 5906, -5906, -16819, // w23 w21 w19 w17 + 16819, 5906, 5906, 25172, // w30 w28 w26 w24 + -29692, 25172, -16819, -29692, // w31 w29 w27 w25 - /* row3 */ - 19266, 19266, 25172, -10426, /* w09 w01 w08 w00 */ - 19266, 19266, 10426, -25172, /* w13 w05 w12 w04 */ - 19266, -19266, 10426, 25172, /* w11 w03 w10 w02 */ - -19266, 19266, -25172, -10426, /* w15 w07 w14 w06, */ - 26722, 15137, 22654, -26722, /* w22 w20 w18 w16 */ - 22654, 5315, -5315, -15137, /* w23 w21 w19 w17 */ - 15137, 5315, 5315, 22654, /* w30 w28 w26 w24 */ - -26722, 22654, -15137, -26722, /* w31 w29 w27 w25, */ + //row3 + 19266, 19266, 25172, -10426, // w09 w01 w08 w00 + 19266, 19266, 10426, -25172, // w13 w05 w12 w04 + 19266, -19266, 10426, 25172, // w11 w03 w10 w02 + -19266, 19266, -25172, -10426, // w15 w07 w14 w06, + 26722, 15137, 22654, -26722, // w22 w20 w18 w16 + 22654, 5315, -5315, -15137, // w23 w21 w19 w17 + 15137, 5315, 5315, 22654, // w30 w28 w26 w24 + -26722, 22654, -15137, -26722, // w31 w29 w27 w25, - /* row4 */ - 16384, 16384, 21407, -8867, /* w09 w01 w08 w00 */ - 16384, 16384, 8867, -21407, /* w13 w05 w12 w04 */ - 16384, -16384, 8867, 21407, /* w11 w03 w10 w02 */ - -16384, 16384, -21407, -8867, /* w15 w07 w14 w06 */ - 22725, 12873, 19266, -22725, /* w22 w20 w18 w16 */ - 19266, 4520, -4520, -12873, /* w23 w21 w19 w17 */ - 12873, 4520, 4520, 19266, /* w30 w28 w26 w24 */ - -22725, 19266, -12873, -22725, /* w31 w29 w27 w25 */ + //row4 + 16384, 16384, 21407, -8867, // w09 w01 w08 w00 + 16384, 16384, 8867, -21407, // w13 w05 w12 w04 + 16384, -16384, 8867, 21407, // w11 w03 w10 w02 + -16384, 16384, -21407, -8867, // w15 w07 w14 w06 + 22725, 12873, 19266, -22725, // w22 w20 w18 w16 + 19266, 4520, -4520, -12873, // w23 w21 w19 w17 + 12873, 4520, 4520, 19266, // w30 w28 w26 w24 + -22725, 19266, -12873, -22725, // w31 w29 w27 w25 - /* row5 */ - 19266, 19266, 25172, -10426, /* w09 w01 w08 w00 */ - 19266, 19266, 10426, -25172, /* w13 w05 w12 w04 */ - 19266, -19266, 10426, 25172, /* w11 w03 w10 w02 */ - -19266, 19266, -25172, -10426, /* w15 w07 w14 w06 */ - 26722, 15137, 22654, -26722, /* w22 w20 w18 w16 */ - 22654, 5315, -5315, -15137, /* w23 w21 w19 w17 */ - 15137, 5315, 5315, 22654, /* w30 w28 w26 w24 */ - -26722, 22654, -15137, -26722, /* w31 w29 w27 w25 */ + //row5 + 19266, 19266, 25172, -10426, // w09 w01 w08 w00 + 19266, 19266, 10426, -25172, // w13 w05 w12 w04 + 19266, -19266, 10426, 25172, // w11 w03 w10 w02 + -19266, 19266, -25172, -10426, // w15 w07 w14 w06 + 26722, 15137, 22654, -26722, // w22 w20 w18 w16 + 22654, 5315, -5315, -15137, // w23 w21 w19 w17 + 15137, 5315, 5315, 22654, // w30 w28 w26 w24 + -26722, 22654, -15137, -26722, // w31 w29 w27 w25 - /* row6 */ - 21407, 21407, 27969, -11585, /* w09 w01 w08 w00 */ - 21407, 21407, 11585, -27969, /* w13 w05 w12 w04 */ - 21407, -21407, 11585, 27969, /* w11 w03 w10 w02 */ - -21407, 21407, -27969, -11585, /* w15 w07 w14 w06, */ - 29692, 16819, 25172, -29692, /* w22 w20 w18 w16 */ - 25172, 5906, -5906, -16819, /* w23 w21 w19 w17 */ - 16819, 5906, 5906, 25172, /* w30 w28 w26 w24 */ - -29692, 25172, -16819, -29692, /* w31 w29 w27 w25, */ + //row6 + 21407, 21407, 27969, -11585, // w09 w01 w08 w00 + 21407, 21407, 11585, -27969, // w13 w05 w12 w04 + 21407, -21407, 11585, 27969, // w11 w03 w10 w02 + -21407, 21407, -27969, -11585, // w15 w07 w14 w06, + 29692, 16819, 25172, -29692, // w22 w20 w18 w16 + 25172, 5906, -5906, -16819, // w23 w21 w19 w17 + 16819, 5906, 5906, 25172, // w30 w28 w26 w24 + -29692, 25172, -16819, -29692, // w31 w29 w27 w25, - /* row7 */ - 22725, 22725, 29692, -12299, /* w09 w01 w08 w00 */ - 22725, 22725, 12299, -29692, /* w13 w05 w12 w04 */ - 22725, -22725, 12299, 29692, /* w11 w03 w10 w02 */ - -22725, 22725, -29692, -12299, /* w15 w07 w14 w06, */ - 31521, 17855, 26722, -31521, /* w22 w20 w18 w16 */ - 26722, 6270, -6270, -17855, /* w23 w21 w19 w17 */ - 17855, 6270, 6270, 26722, /* w30 w28 w26 w24 */ - -31521, 26722, -17855, -31521 /* w31 w29 w27 w25 */ + //row7 + 22725, 22725, 29692, -12299, // w09 w01 w08 w00 + 22725, 22725, 12299, -29692, // w13 w05 w12 w04 + 22725, -22725, 12299, 29692, // w11 w03 w10 w02 + -22725, 22725, -29692, -12299, // w15 w07 w14 w06, + 31521, 17855, 26722, -31521, // w22 w20 w18 w16 + 26722, 6270, -6270, -17855, // w23 w21 w19 w17 + 17855, 6270, 6270, 26722, // w30 w28 w26 w24 + -31521, 26722, -17855, -31521 // w31 w29 w27 w25 }; @@ -269,7 +267,7 @@ static inline void fdct_row(const int16_t *in, int16_t *out, const int16_t *tabl movq_r2m(mm6, *(out + 4)); } -void fdct_mmx(int16_t *block) +void ff_fdct_mmx(int16_t *block) { /* XXX: not thread safe */ static int16_t block_tmp[64] ATTR_ALIGN(8); diff --git a/src/libffmpeg/libavcodec/i386/fft_sse.c b/src/libffmpeg/libavcodec/i386/fft_sse.c new file mode 100644 index 000000000..8e8e36b0f --- /dev/null +++ b/src/libffmpeg/libavcodec/i386/fft_sse.c @@ -0,0 +1,128 @@ +/* + * FFT/MDCT transform with SSE optimizations + * Copyright (c) 2002 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "../dsputil.h" +#include + +#include + +static const float p1p1p1m1[4] __attribute__((aligned(16))) = + { 1.0, 1.0, 1.0, -1.0 }; + +static const float p1p1m1m1[4] __attribute__((aligned(16))) = + { 1.0, 1.0, -1.0, -1.0 }; + +#if 0 +static void print_v4sf(const char *str, __m128 a) +{ + float *p = (float *)&a; + printf("%s: %f %f %f %f\n", + str, p[0], p[1], p[2], p[3]); +} +#endif + +/* XXX: handle reverse case */ +void fft_calc_sse(FFTContext *s, FFTComplex *z) +{ + int ln = s->nbits; + int j, np, np2; + int nblocks, nloops; + register FFTComplex *p, *q; + FFTComplex *cptr, *cptr1; + int k; + + np = 1 << ln; + + { + __m128 *r, a, b, a1, c1, c2; + + r = (__m128 *)&z[0]; + c1 = *(__m128 *)p1p1m1m1; + c2 = *(__m128 *)p1p1p1m1; + j = (np >> 2); + do { + a = r[0]; + b = _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 0, 3, 2)); + a = _mm_mul_ps(a, c1); + /* do the pass 0 butterfly */ + a = _mm_add_ps(a, b); + + a1 = r[1]; + b = _mm_shuffle_ps(a1, a1, _MM_SHUFFLE(1, 0, 3, 2)); + a1 = _mm_mul_ps(a1, c1); + /* do the pass 0 butterfly */ + b = _mm_add_ps(a1, b); + + /* multiply third by -i */ + b = _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 3, 1, 0)); + b = _mm_mul_ps(b, c2); + + /* do the pass 1 butterfly */ + r[0] = _mm_add_ps(a, b); + r[1] = _mm_sub_ps(a, b); + r += 2; + } while (--j != 0); + } + /* pass 2 .. ln-1 */ + + nblocks = np >> 3; + nloops = 1 << 2; + np2 = np >> 1; + + cptr1 = s->exptab1; + do { + p = z; + q = z + nloops; + j = nblocks; + do { + cptr = cptr1; + k = nloops >> 1; + do { + __m128 a, b, c, t1, t2; + + a = *(__m128 *)p; + b = *(__m128 *)q; + + /* complex mul */ + c = *(__m128 *)cptr; + /* cre*re cim*re */ + t1 = _mm_mul_ps(c, + _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 0, 0))); + c = *(__m128 *)(cptr + 2); + /* -cim*im cre*im */ + t2 = _mm_mul_ps(c, + _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 1, 1))); + b = _mm_add_ps(t1, t2); + + /* butterfly */ + *(__m128 *)p = _mm_add_ps(a, b); + *(__m128 *)q = _mm_sub_ps(a, b); + + p += 2; + q += 2; + cptr += 4; + } while (--k); + + p += nloops; + q += nloops; + } while (--j); + cptr1 += nloops * 2; + nblocks = nblocks >> 1; + nloops = nloops << 1; + } while (nblocks != 0); +} diff --git a/src/libffmpeg/libavcodec/i386/idct_mmx.c b/src/libffmpeg/libavcodec/i386/idct_mmx.c index 1225de5d2..298c8a8b0 100644 --- a/src/libffmpeg/libavcodec/i386/idct_mmx.c +++ b/src/libffmpeg/libavcodec/i386/idct_mmx.c @@ -87,99 +87,102 @@ static inline void idct_row (int16_t * row, int offset, static inline void mmxext_row_head (int16_t * row, int offset, int16_t * table) { - movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ - movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 - movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ - movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */ + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 - movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ - movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */ + movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 - pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */ - pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */ + movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 + pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 + + pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 } static inline void mmxext_row (int16_t * table, int32_t * rounder) { - movq_m2r (*(table+8), mm1); /* mm1 = -C5 -C1 C3 C1 */ - pmaddwd_r2r (mm2, mm4); /* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */ + movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1 + pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6 - pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */ - pshufw_r2r (mm6, mm6, 0x4e); /* mm6 = x3 x1 x7 x5 */ + pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2 + pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5 - movq_m2r (*(table+12), mm7); /* mm7 = -C7 C3 C7 C5 */ - pmaddwd_r2r (mm5, mm1); /* mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 */ + movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5 + pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 - paddd_m2r (*rounder, mm3); /* mm3 += rounder */ - pmaddwd_r2r (mm6, mm7); /* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */ + paddd_m2r (*rounder, mm3); // mm3 += rounder + pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7 - pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 */ - paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */ + pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 + paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder - pmaddwd_m2r (*(table+24), mm5); /* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */ - movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */ + pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3 + movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder - pmaddwd_m2r (*(table+28), mm6); /* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */ - paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */ + pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7 + paddd_r2r (mm7, mm1); // mm1 = b1 b0 - paddd_m2r (*rounder, mm0); /* mm0 += rounder */ - psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */ + paddd_m2r (*rounder, mm0); // mm0 += rounder + psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder - psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */ - paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */ + psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 + paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder - paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */ - psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */ + paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder + psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 - paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */ - movq_r2r (mm0, mm4); /* mm4 = a3 a2 + rounder */ + paddd_r2r (mm6, mm5); // mm5 = b3 b2 + movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder - paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */ - psubd_r2r (mm5, mm4); /* mm4 = a3-b3 a2-b2 + rounder */ + paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder + psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder } static inline void mmxext_row_tail (int16_t * row, int store) { - psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ - psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */ + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 - packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ - packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */ + packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 - movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ - pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */ + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 /* slot */ - movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */ + movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 } static inline void mmxext_row_mid (int16_t * row, int store, int offset, int16_t * table) { - movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ - psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 - movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ - psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */ + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 - packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ - movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 - packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */ - movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ + packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 - movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ - pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */ + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 - movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */ - movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */ + movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 + movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 - pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */ - movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */ + pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 - pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */ + movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 + pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 } @@ -196,120 +199,123 @@ static inline void mmxext_row_mid (int16_t * row, int store, static inline void mmx_row_head (int16_t * row, int offset, int16_t * table) { - movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ - movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 - movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ - movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */ + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 - movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ - punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */ + movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 - movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */ - pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */ + punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 - movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */ - punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */ + movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 + pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 + + movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 + punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 } static inline void mmx_row (int16_t * table, int32_t * rounder) { - pmaddwd_r2r (mm2, mm4); /* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */ - punpckldq_r2r (mm5, mm5); /* mm5 = x3 x1 x3 x1 */ - - pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */ - punpckhdq_r2r (mm6, mm6); /* mm6 = x7 x5 x7 x5 */ + pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 + punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1 - movq_m2r (*(table+12), mm7); /* mm7 = -C5 -C1 C7 C5 */ - pmaddwd_r2r (mm5, mm1); /* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */ + pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2 + punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5 - paddd_m2r (*rounder, mm3); /* mm3 += rounder */ - pmaddwd_r2r (mm6, mm7); /* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */ + movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5 + pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3 - pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */ - paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */ + paddd_m2r (*rounder, mm3); // mm3 += rounder + pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 - pmaddwd_m2r (*(table+24), mm5); /* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */ - movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */ + pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 + paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder - pmaddwd_m2r (*(table+28), mm6); /* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */ - paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */ + pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3 + movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder - paddd_m2r (*rounder, mm0); /* mm0 += rounder */ - psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */ + pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7 + paddd_r2r (mm7, mm1); // mm1 = b1 b0 - psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */ - paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */ + paddd_m2r (*rounder, mm0); // mm0 += rounder + psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder - paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */ - psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */ + psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 + paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder - paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */ - movq_r2r (mm0, mm7); /* mm7 = a3 a2 + rounder */ + paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder + psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 - paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */ - psubd_r2r (mm5, mm7); /* mm7 = a3-b3 a2-b2 + rounder */ + paddd_r2r (mm6, mm5); // mm5 = b3 b2 + movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder + paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder + psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder } static inline void mmx_row_tail (int16_t * row, int store) { - psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ - psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */ + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 - packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ - packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */ + psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 - movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ - movq_r2r (mm7, mm4); /* mm4 = y6 y7 y4 y5 */ + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 - pslld_i2r (16, mm7); /* mm7 = y7 0 y5 0 */ - psrld_i2r (16, mm4); /* mm4 = 0 y6 0 y4 */ + packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 - por_r2r (mm4, mm7); /* mm7 = y7 y6 y5 y4 */ + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5 + + pslld_i2r (16, mm7); // mm7 = y7 0 y5 0 + + psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4 + + por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4 /* slot */ - movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */ + movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 } static inline void mmx_row_mid (int16_t * row, int store, int offset, int16_t * table) { - movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ - psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 - movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ - psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */ + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 - packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ - movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 - packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */ - movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ + packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 - movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ - movq_r2r (mm7, mm1); /* mm1 = y6 y7 y4 y5 */ + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5 - punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */ - psrld_i2r (16, mm7); /* mm7 = 0 y6 0 y4 */ + punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 + psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4 - movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */ - pslld_i2r (16, mm1); /* mm1 = y7 0 y5 0 */ + movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 + pslld_i2r (16, mm1); // mm1 = y7 0 y5 0 - movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */ - por_r2r (mm1, mm7); /* mm7 = y7 y6 y5 y4 */ + movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 + por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4 - movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */ - punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */ + movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 + punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 - movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */ - pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */ + movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 + pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 } #if 0 -/* C column IDCT - its just here to document the MMXEXT and MMX versions */ +// C column IDCT - its just here to document the MMXEXT and MMX versions static inline void idct_col (int16_t * col, int offset) { /* multiplication - as implemented on mmx */ @@ -380,7 +386,7 @@ static inline void idct_col (int16_t * col, int offset) #endif -/* MMX column IDCT */ +// MMX column IDCT static inline void idct_col (int16_t * col, int offset) { #define T1 13036 @@ -396,131 +402,132 @@ static inline void idct_col (int16_t * col, int offset) /* column code adapted from peter gubanov */ /* http://www.elecard.com/peter/idct.shtml */ - movq_m2r (*_T1, mm0); /* mm0 = T1 */ - movq_m2r (*(col+offset+1*8), mm1); /* mm1 = x1 */ + movq_m2r (*_T1, mm0); // mm0 = T1 - movq_r2r (mm0, mm2); /* mm2 = T1 */ - movq_m2r (*(col+offset+7*8), mm4); /* mm4 = x7 */ + movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1 + movq_r2r (mm0, mm2); // mm2 = T1 - pmulhw_r2r (mm1, mm0); /* mm0 = T1*x1 */ - movq_m2r (*_T3, mm5); /* mm5 = T3 */ + movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7 + pmulhw_r2r (mm1, mm0); // mm0 = T1*x1 - pmulhw_r2r (mm4, mm2); /* mm2 = T1*x7 */ - movq_m2r (*(col+offset+5*8), mm6); /* mm6 = x5 */ + movq_m2r (*_T3, mm5); // mm5 = T3 + pmulhw_r2r (mm4, mm2); // mm2 = T1*x7 - movq_r2r (mm5, mm7); /* mm7 = T3-1 */ - movq_m2r (*(col+offset+3*8), mm3); /* mm3 = x3 */ + movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5 + movq_r2r (mm5, mm7); // mm7 = T3-1 - psubsw_r2r (mm4, mm0); /* mm0 = v17 */ - movq_m2r (*_T2, mm4); /* mm4 = T2 */ + movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3 + psubsw_r2r (mm4, mm0); // mm0 = v17 - pmulhw_r2r (mm3, mm5); /* mm5 = (T3-1)*x3 */ - paddsw_r2r (mm2, mm1); /* mm1 = u17 */ + movq_m2r (*_T2, mm4); // mm4 = T2 + pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3 - pmulhw_r2r (mm6, mm7); /* mm7 = (T3-1)*x5 */ + paddsw_r2r (mm2, mm1); // mm1 = u17 + pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5 /* slot */ - movq_r2r (mm4, mm2); /* mm2 = T2 */ - paddsw_r2r (mm3, mm5); /* mm5 = T3*x3 */ + movq_r2r (mm4, mm2); // mm2 = T2 + paddsw_r2r (mm3, mm5); // mm5 = T3*x3 + + pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2 + paddsw_r2r (mm6, mm7); // mm7 = T3*x5 - pmulhw_m2r (*(col+offset+2*8), mm4);/* mm4 = T2*x2 */ - paddsw_r2r (mm6, mm7); /* mm7 = T3*x5 */ + psubsw_r2r (mm6, mm5); // mm5 = v35 + paddsw_r2r (mm3, mm7); // mm7 = u35 - psubsw_r2r (mm6, mm5); /* mm5 = v35 */ - paddsw_r2r (mm3, mm7); /* mm7 = u35 */ + movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6 + movq_r2r (mm0, mm6); // mm6 = v17 - movq_m2r (*(col+offset+6*8), mm3); /* mm3 = x6 */ - movq_r2r (mm0, mm6); /* mm6 = v17 */ + pmulhw_r2r (mm3, mm2); // mm2 = T2*x6 + psubsw_r2r (mm5, mm0); // mm0 = b3 - pmulhw_r2r (mm3, mm2); /* mm2 = T2*x6 */ - psubsw_r2r (mm5, mm0); /* mm0 = b3 */ + psubsw_r2r (mm3, mm4); // mm4 = v26 + paddsw_r2r (mm6, mm5); // mm5 = v12 - psubsw_r2r (mm3, mm4); /* mm4 = v26 */ - paddsw_r2r (mm6, mm5); /* mm5 = v12 */ + movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0 + movq_r2r (mm1, mm6); // mm6 = u17 - movq_r2m (mm0, *(col+offset+3*8)); /* save b3 in scratch0 */ - movq_r2r (mm1, mm6); /* mm6 = u17 */ + paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26 + paddsw_r2r (mm7, mm6); // mm6 = b0 - paddsw_m2r (*(col+offset+2*8), mm2);/* mm2 = u26 */ - paddsw_r2r (mm7, mm6); /* mm6 = b0 */ + psubsw_r2r (mm7, mm1); // mm1 = u12 + movq_r2r (mm1, mm7); // mm7 = u12 - psubsw_r2r (mm7, mm1); /* mm1 = u12 */ - movq_r2r (mm1, mm7); /* mm7 = u12 */ + movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0 + paddsw_r2r (mm5, mm1); // mm1 = u12+v12 - movq_m2r (*(col+offset+0*8), mm3); /* mm3 = x0 */ - paddsw_r2r (mm5, mm1); /* mm1 = u12+v12 */ + movq_m2r (*_C4, mm0); // mm0 = C4/2 + psubsw_r2r (mm5, mm7); // mm7 = u12-v12 - movq_m2r (*_C4, mm0); /* mm0 = C4/2 */ - psubsw_r2r (mm5, mm7); /* mm7 = u12-v12 */ + movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1 + pmulhw_r2r (mm0, mm1); // mm1 = b1/2 - movq_r2m (mm6, *(col+offset+5*8)); /* save b0 in scratch1 */ - pmulhw_r2r (mm0, mm1); /* mm1 = b1/2 */ + movq_r2r (mm4, mm6); // mm6 = v26 + pmulhw_r2r (mm0, mm7); // mm7 = b2/2 - movq_r2r (mm4, mm6); /* mm6 = v26 */ - pmulhw_r2r (mm0, mm7); /* mm7 = b2/2 */ + movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4 + movq_r2r (mm3, mm0); // mm0 = x0 - movq_m2r (*(col+offset+4*8), mm5); /* mm5 = x4 */ - movq_r2r (mm3, mm0); /* mm0 = x0 */ + psubsw_r2r (mm5, mm3); // mm3 = v04 + paddsw_r2r (mm5, mm0); // mm0 = u04 - psubsw_r2r (mm5, mm3); /* mm3 = v04 */ - paddsw_r2r (mm5, mm0); /* mm0 = u04 */ + paddsw_r2r (mm3, mm4); // mm4 = a1 + movq_r2r (mm0, mm5); // mm5 = u04 - paddsw_r2r (mm3, mm4); /* mm4 = a1 */ - movq_r2r (mm0, mm5); /* mm5 = u04 */ + psubsw_r2r (mm6, mm3); // mm3 = a2 + paddsw_r2r (mm2, mm5); // mm5 = a0 - psubsw_r2r (mm6, mm3); /* mm3 = a2 */ - paddsw_r2r (mm2, mm5); /* mm5 = a0 */ + paddsw_r2r (mm1, mm1); // mm1 = b1 + psubsw_r2r (mm2, mm0); // mm0 = a3 - paddsw_r2r (mm1, mm1); /* mm1 = b1 */ - psubsw_r2r (mm2, mm0); /* mm0 = a3 */ + paddsw_r2r (mm7, mm7); // mm7 = b2 + movq_r2r (mm3, mm2); // mm2 = a2 - paddsw_r2r (mm7, mm7); /* mm7 = b2 */ - movq_r2r (mm3, mm2); /* mm2 = a2 */ + movq_r2r (mm4, mm6); // mm6 = a1 + paddsw_r2r (mm7, mm3); // mm3 = a2+b2 - movq_r2r (mm4, mm6); /* mm6 = a1 */ - paddsw_r2r (mm7, mm3); /* mm3 = a2+b2 */ + psraw_i2r (COL_SHIFT, mm3); // mm3 = y2 + paddsw_r2r (mm1, mm4); // mm4 = a1+b1 - psraw_i2r (COL_SHIFT, mm3); /* mm3 = y2 */ - paddsw_r2r (mm1, mm4); /* mm4 = a1+b1 */ + psraw_i2r (COL_SHIFT, mm4); // mm4 = y1 + psubsw_r2r (mm1, mm6); // mm6 = a1-b1 - psraw_i2r (COL_SHIFT, mm4); /* mm4 = y1 */ - psubsw_r2r (mm1, mm6); /* mm6 = a1-b1 */ + movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0 + psubsw_r2r (mm7, mm2); // mm2 = a2-b2 - movq_m2r (*(col+offset+5*8), mm1); /* mm1 = b0 */ - psubsw_r2r (mm7, mm2); /* mm2 = a2-b2 */ + psraw_i2r (COL_SHIFT, mm6); // mm6 = y6 + movq_r2r (mm5, mm7); // mm7 = a0 - psraw_i2r (COL_SHIFT, mm6); /* mm6 = y6 */ - movq_r2r (mm5, mm7); /* mm7 = a0 */ + movq_r2m (mm4, *(col+offset+1*8)); // save y1 + psraw_i2r (COL_SHIFT, mm2); // mm2 = y5 - movq_r2m (mm4, *(col+offset+1*8)); /* save y1 */ - psraw_i2r (COL_SHIFT, mm2); /* mm2 = y5 */ + movq_r2m (mm3, *(col+offset+2*8)); // save y2 + paddsw_r2r (mm1, mm5); // mm5 = a0+b0 - movq_r2m (mm3, *(col+offset+2*8)); /* save y2 */ - paddsw_r2r (mm1, mm5); /* mm5 = a0+b0 */ + movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3 + psubsw_r2r (mm1, mm7); // mm7 = a0-b0 - movq_m2r (*(col+offset+3*8), mm4); /* mm4 = b3 */ - psubsw_r2r (mm1, mm7); /* mm7 = a0-b0 */ + psraw_i2r (COL_SHIFT, mm5); // mm5 = y0 + movq_r2r (mm0, mm3); // mm3 = a3 - psraw_i2r (COL_SHIFT, mm5); /* mm5 = y0 */ - movq_r2r (mm0, mm3); /* mm3 = a3 */ + movq_r2m (mm2, *(col+offset+5*8)); // save y5 + psubsw_r2r (mm4, mm3); // mm3 = a3-b3 - movq_r2m (mm2, *(col+offset+5*8)); /* save y5 */ - psubsw_r2r (mm4, mm3); /* mm3 = a3-b3 */ + psraw_i2r (COL_SHIFT, mm7); // mm7 = y7 + paddsw_r2r (mm0, mm4); // mm4 = a3+b3 - psraw_i2r (COL_SHIFT, mm7); /* mm7 = y7 */ - paddsw_r2r (mm0, mm4); /* mm4 = a3+b3 */ + movq_r2m (mm5, *(col+offset+0*8)); // save y0 + psraw_i2r (COL_SHIFT, mm3); // mm3 = y4 - movq_r2m (mm5, *(col+offset+0*8)); /* save y0 */ - psraw_i2r (COL_SHIFT, mm3); /* mm3 = y4 */ + movq_r2m (mm6, *(col+offset+6*8)); // save y6 + psraw_i2r (COL_SHIFT, mm4); // mm4 = y3 - movq_r2m (mm6, *(col+offset+6*8)); /* save y6 */ - psraw_i2r (COL_SHIFT, mm4); /* mm4 = y3 */ + movq_r2m (mm7, *(col+offset+7*8)); // save y7 - movq_r2m (mm7, *(col+offset+7*8)); /* save y7 */ - movq_r2m (mm3, *(col+offset+4*8)); /* save y4 */ + movq_r2m (mm3, *(col+offset+4*8)); // save y4 - movq_r2m (mm4, *(col+offset+3*8)); /* save y3 */ + movq_r2m (mm4, *(col+offset+3*8)); // save y3 #undef T1 #undef T2 diff --git a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c index 9a29df49a..9b76cdb07 100644 --- a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c +++ b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c @@ -31,7 +31,7 @@ static __attribute__ ((aligned(8))) uint64_t bone= 0x0101010101010101LL; static inline void sad8_mmx(UINT8 *blk1, UINT8 *blk2, int stride, int h) { int len= -(stride<pict_type == B_TYPE) { - ____asm____ volatile( - "movl (%1), %%eax\n\t" - "movl 4(%1), %%edx\n\t" - "movl 8(%1), %%ecx\n\t" - "movl %%eax, (%0)\n\t" - "movl %%edx, 4(%0)\n\t" - "movl %%ecx, 8(%0)\n\t" - : - :"r"(s->current_picture), "r"(s->aux_picture) - :"eax","edx","ecx","memory"); - } else { - /* swap next and last */ - ____asm____ volatile( - "movl (%1), %%eax\n\t" - "movl 4(%1), %%edx\n\t" - "movl 8(%1), %%ecx\n\t" - "xchgl (%0), %%eax\n\t" - "xchgl 4(%0), %%edx\n\t" - "xchgl 8(%0), %%ecx\n\t" - "movl %%eax, (%1)\n\t" - "movl %%edx, 4(%1)\n\t" - "movl %%ecx, 8(%1)\n\t" - "movl %%eax, (%2)\n\t" - "movl %%edx, 4(%2)\n\t" - "movl %%ecx, 8(%2)\n\t" - : - :"r"(s->last_picture), "r"(s->next_picture), "r"(s->current_picture) - :"eax","edx","ecx","memory"); - } -} -#endif - static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL; static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; @@ -77,85 +36,77 @@ static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x000 static void dct_unquantize_h263_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { - int i, level, qmul, qadd, nCoeffs; + int level, qmul, qadd, nCoeffs; - qmul = s->qscale << 1; - if (s->h263_aic && s->mb_intra) - qadd = 0; - else - qadd = (s->qscale - 1) | 1; + qmul = qscale << 1; + qadd = (qscale - 1) | 1; + assert(s->block_last_index[n]>=0); + if (s->mb_intra) { if (!s->h263_aic) { if (n < 4) - block[0] = block[0] * s->y_dc_scale; + level = block[0] * s->y_dc_scale; else - block[0] = block[0] * s->c_dc_scale; - } - for(i=1; i<8; i++) { - level = block[i]; - if (level) { - if (level < 0) { - level = level * qmul - qadd; - } else { - level = level * qmul + qadd; - } - block[i] = level; - } + level = block[0] * s->c_dc_scale; + }else{ + qadd = 0; + level= block[0]; } - nCoeffs=64; + nCoeffs=63; } else { - i = 0; - nCoeffs= zigzag_end[ s->block_last_index[n] ]; + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; } -/* printf("%d %d ", qmul, qadd); */ -__asm__ volatile( - "movd %1, %%mm6 \n\t" /* qmul */ - "packssdw %%mm6, %%mm6 \n\t" - "packssdw %%mm6, %%mm6 \n\t" - "movd %2, %%mm5 \n\t" /* qadd */ - "pxor %%mm7, %%mm7 \n\t" - "packssdw %%mm5, %%mm5 \n\t" - "packssdw %%mm5, %%mm5 \n\t" - "psubw %%mm5, %%mm7 \n\t" - "pxor %%mm4, %%mm4 \n\t" - ".balign 16\n\t" - "1: \n\t" - "movq (%0, %3), %%mm0 \n\t" - "movq 8(%0, %3), %%mm1 \n\t" - - "pmullw %%mm6, %%mm0 \n\t" - "pmullw %%mm6, %%mm1 \n\t" - - "movq (%0, %3), %%mm2 \n\t" - "movq 8(%0, %3), %%mm3 \n\t" - - "pcmpgtw %%mm4, %%mm2 \n\t" /* block[i] < 0 ? -1 : 0 */ - "pcmpgtw %%mm4, %%mm3 \n\t" /* block[i] < 0 ? -1 : 0 */ - - "pxor %%mm2, %%mm0 \n\t" - "pxor %%mm3, %%mm1 \n\t" - - "paddw %%mm7, %%mm0 \n\t" - "paddw %%mm7, %%mm1 \n\t" - - "pxor %%mm0, %%mm2 \n\t" - "pxor %%mm1, %%mm3 \n\t" - - "pcmpeqw %%mm7, %%mm0 \n\t" /* block[i] == 0 ? -1 : 0 */ - "pcmpeqw %%mm7, %%mm1 \n\t" /* block[i] == 0 ? -1 : 0 */ - - "pandn %%mm2, %%mm0 \n\t" - "pandn %%mm3, %%mm1 \n\t" - - "movq %%mm0, (%0, %3) \n\t" - "movq %%mm1, 8(%0, %3) \n\t" - - "addl $16, %3 \n\t" - "js 1b \n\t" - ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(i-nCoeffs)) - : "memory" +//printf("%d %d ", qmul, qadd); +asm volatile( + "movd %1, %%mm6 \n\t" //qmul + "packssdw %%mm6, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "movd %2, %%mm5 \n\t" //qadd + "pxor %%mm7, %%mm7 \n\t" + "packssdw %%mm5, %%mm5 \n\t" + "packssdw %%mm5, %%mm5 \n\t" + "psubw %%mm5, %%mm7 \n\t" + "pxor %%mm4, %%mm4 \n\t" + ".balign 16\n\t" + "1: \n\t" + "movq (%0, %3), %%mm0 \n\t" + "movq 8(%0, %3), %%mm1 \n\t" + + "pmullw %%mm6, %%mm0 \n\t" + "pmullw %%mm6, %%mm1 \n\t" + + "movq (%0, %3), %%mm2 \n\t" + "movq 8(%0, %3), %%mm3 \n\t" + + "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 + "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 + + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + + "paddw %%mm7, %%mm0 \n\t" + "paddw %%mm7, %%mm1 \n\t" + + "pxor %%mm0, %%mm2 \n\t" + "pxor %%mm1, %%mm3 \n\t" + + "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 + + "pandn %%mm2, %%mm0 \n\t" + "pandn %%mm3, %%mm1 \n\t" + + "movq %%mm0, (%0, %3) \n\t" + "movq %%mm1, 8(%0, %3) \n\t" + + "addl $16, %3 \n\t" + "jng 1b \n\t" + ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) + : "memory" ); + if(s->mb_intra) + block[0]= level; } @@ -194,124 +145,126 @@ static void dct_unquantize_mpeg1_mmx(MpegEncContext *s, int nCoeffs; const UINT16 *quant_matrix; - if(s->alternate_scan) nCoeffs= 64; - else nCoeffs= zigzag_end[ s->block_last_index[n] ]; + assert(s->block_last_index[n]>=0); + + nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; if (s->mb_intra) { int block0; - if (n < 4) + if (n < 4) block0 = block[0] * s->y_dc_scale; else block0 = block[0] * s->c_dc_scale; /* XXX: only mpeg1 */ quant_matrix = s->intra_matrix; -__asm__ volatile( - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $15, %%mm7 \n\t" - "movd %2, %%mm6 \n\t" - "packssdw %%mm6, %%mm6 \n\t" - "packssdw %%mm6, %%mm6 \n\t" +asm volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $15, %%mm7 \n\t" + "movd %2, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" "movl %3, %%eax \n\t" - ".balign 16\n\t" - "1: \n\t" - "movq (%0, %%eax), %%mm0 \n\t" - "movq 8(%0, %%eax), %%mm1 \n\t" - "movq (%1, %%eax), %%mm4 \n\t" - "movq 8(%1, %%eax), %%mm5 \n\t" - "pmullw %%mm6, %%mm4 \n\t" /* q=qscale*quant_matrix[i] */ - "pmullw %%mm6, %%mm5 \n\t" /* q=qscale*quant_matrix[i] */ - "pxor %%mm2, %%mm2 \n\t" - "pxor %%mm3, %%mm3 \n\t" - "pcmpgtw %%mm0, %%mm2 \n\t" /* block[i] < 0 ? -1 : 0 */ - "pcmpgtw %%mm1, %%mm3 \n\t" /* block[i] < 0 ? -1 : 0 */ - "pxor %%mm2, %%mm0 \n\t" - "pxor %%mm3, %%mm1 \n\t" - "psubw %%mm2, %%mm0 \n\t" /* abs(block[i]) */ - "psubw %%mm3, %%mm1 \n\t" /* abs(block[i]) */ - "pmullw %%mm4, %%mm0 \n\t" /* abs(block[i])*q */ - "pmullw %%mm5, %%mm1 \n\t" /* abs(block[i])*q */ - "pxor %%mm4, %%mm4 \n\t" - "pxor %%mm5, %%mm5 \n\t" /* FIXME slow */ - "pcmpeqw (%0, %%eax), %%mm4 \n\t" /* block[i] == 0 ? -1 : 0 */ - "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" /* block[i] == 0 ? -1 : 0 */ - "psraw $3, %%mm0 \n\t" - "psraw $3, %%mm1 \n\t" - "psubw %%mm7, %%mm0 \n\t" - "psubw %%mm7, %%mm1 \n\t" - "por %%mm7, %%mm0 \n\t" - "por %%mm7, %%mm1 \n\t" - "pxor %%mm2, %%mm0 \n\t" - "pxor %%mm3, %%mm1 \n\t" - "psubw %%mm2, %%mm0 \n\t" - "psubw %%mm3, %%mm1 \n\t" - "pandn %%mm0, %%mm4 \n\t" - "pandn %%mm1, %%mm5 \n\t" - "movq %%mm4, (%0, %%eax) \n\t" - "movq %%mm5, 8(%0, %%eax) \n\t" - - "addl $16, %%eax \n\t" - "js 1b \n\t" - ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) - : "%eax", "memory" - ); + ".balign 16\n\t" + "1: \n\t" + "movq (%0, %%eax), %%mm0 \n\t" + "movq 8(%0, %%eax), %%mm1 \n\t" + "movq (%1, %%eax), %%mm4 \n\t" + "movq 8(%1, %%eax), %%mm5 \n\t" + "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] + "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] + "pxor %%mm2, %%mm2 \n\t" + "pxor %%mm3, %%mm3 \n\t" + "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 + "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) + "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) + "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q + "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q + "pxor %%mm4, %%mm4 \n\t" + "pxor %%mm5, %%mm5 \n\t" // FIXME slow + "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0 + "psraw $3, %%mm0 \n\t" + "psraw $3, %%mm1 \n\t" + "psubw %%mm7, %%mm0 \n\t" + "psubw %%mm7, %%mm1 \n\t" + "por %%mm7, %%mm0 \n\t" + "por %%mm7, %%mm1 \n\t" + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" + "psubw %%mm3, %%mm1 \n\t" + "pandn %%mm0, %%mm4 \n\t" + "pandn %%mm1, %%mm5 \n\t" + "movq %%mm4, (%0, %%eax) \n\t" + "movq %%mm5, 8(%0, %%eax) \n\t" + + "addl $16, %%eax \n\t" + "js 1b \n\t" + ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) + : "%eax", "memory" + ); block[0]= block0; } else { quant_matrix = s->inter_matrix; -__asm__ volatile( - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $15, %%mm7 \n\t" - "movd %2, %%mm6 \n\t" - "packssdw %%mm6, %%mm6 \n\t" - "packssdw %%mm6, %%mm6 \n\t" +asm volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $15, %%mm7 \n\t" + "movd %2, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" "movl %3, %%eax \n\t" - ".balign 16\n\t" - "1: \n\t" - "movq (%0, %%eax), %%mm0 \n\t" - "movq 8(%0, %%eax), %%mm1 \n\t" - "movq (%1, %%eax), %%mm4 \n\t" - "movq 8(%1, %%eax), %%mm5 \n\t" - "pmullw %%mm6, %%mm4 \n\t" /* q=qscale*quant_matrix[i] */ - "pmullw %%mm6, %%mm5 \n\t" /* q=qscale*quant_matrix[i] */ - "pxor %%mm2, %%mm2 \n\t" - "pxor %%mm3, %%mm3 \n\t" - "pcmpgtw %%mm0, %%mm2 \n\t" /* block[i] < 0 ? -1 : 0 */ - "pcmpgtw %%mm1, %%mm3 \n\t" /* block[i] < 0 ? -1 : 0 */ - "pxor %%mm2, %%mm0 \n\t" - "pxor %%mm3, %%mm1 \n\t" - "psubw %%mm2, %%mm0 \n\t" /* abs(block[i]) */ - "psubw %%mm3, %%mm1 \n\t" /* abs(block[i]) */ - "paddw %%mm0, %%mm0 \n\t" /* abs(block[i])*2 */ - "paddw %%mm1, %%mm1 \n\t" /* abs(block[i])*2 */ - "paddw %%mm7, %%mm0 \n\t" /* abs(block[i])*2 + 1 */ - "paddw %%mm7, %%mm1 \n\t" /* abs(block[i])*2 + 1 */ - "pmullw %%mm4, %%mm0 \n\t" /* (abs(block[i])*2 + 1)*q */ - "pmullw %%mm5, %%mm1 \n\t" /* (abs(block[i])*2 + 1)*q */ - "pxor %%mm4, %%mm4 \n\t" - "pxor %%mm5, %%mm5 \n\t" /* FIXME slow */ - "pcmpeqw (%0, %%eax), %%mm4 \n\t" /* block[i] == 0 ? -1 : 0 */ - "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" /* block[i] == 0 ? -1 : 0 */ - "psraw $4, %%mm0 \n\t" - "psraw $4, %%mm1 \n\t" - "psubw %%mm7, %%mm0 \n\t" - "psubw %%mm7, %%mm1 \n\t" - "por %%mm7, %%mm0 \n\t" - "por %%mm7, %%mm1 \n\t" - "pxor %%mm2, %%mm0 \n\t" - "pxor %%mm3, %%mm1 \n\t" - "psubw %%mm2, %%mm0 \n\t" - "psubw %%mm3, %%mm1 \n\t" - "pandn %%mm0, %%mm4 \n\t" - "pandn %%mm1, %%mm5 \n\t" - "movq %%mm4, (%0, %%eax) \n\t" - "movq %%mm5, 8(%0, %%eax) \n\t" - - "addl $16, %%eax \n\t" - "js 1b \n\t" - ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) - : "%eax", "memory" + ".balign 16\n\t" + "1: \n\t" + "movq (%0, %%eax), %%mm0 \n\t" + "movq 8(%0, %%eax), %%mm1 \n\t" + "movq (%1, %%eax), %%mm4 \n\t" + "movq 8(%1, %%eax), %%mm5 \n\t" + "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] + "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] + "pxor %%mm2, %%mm2 \n\t" + "pxor %%mm3, %%mm3 \n\t" + "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 + "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) + "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) + "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2 + "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2 + "paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1 + "paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1 + "pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q + "pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q + "pxor %%mm4, %%mm4 \n\t" + "pxor %%mm5, %%mm5 \n\t" // FIXME slow + "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0 + "psraw $4, %%mm0 \n\t" + "psraw $4, %%mm1 \n\t" + "psubw %%mm7, %%mm0 \n\t" + "psubw %%mm7, %%mm1 \n\t" + "por %%mm7, %%mm0 \n\t" + "por %%mm7, %%mm1 \n\t" + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" + "psubw %%mm3, %%mm1 \n\t" + "pandn %%mm0, %%mm4 \n\t" + "pandn %%mm1, %%mm5 \n\t" + "movq %%mm4, (%0, %%eax) \n\t" + "movq %%mm5, 8(%0, %%eax) \n\t" + + "addl $16, %%eax \n\t" + "js 1b \n\t" + ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) + : "%eax", "memory" ); } + } static void dct_unquantize_mpeg2_mmx(MpegEncContext *s, @@ -319,115 +272,117 @@ static void dct_unquantize_mpeg2_mmx(MpegEncContext *s, { int nCoeffs; const UINT16 *quant_matrix; + + assert(s->block_last_index[n]>=0); - if(s->alternate_scan) nCoeffs= 64; - else nCoeffs= zigzag_end[ s->block_last_index[n] ]; + if(s->alternate_scan) nCoeffs= 63; //FIXME + else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; if (s->mb_intra) { int block0; - if (n < 4) + if (n < 4) block0 = block[0] * s->y_dc_scale; else block0 = block[0] * s->c_dc_scale; quant_matrix = s->intra_matrix; -__asm__ volatile( - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $15, %%mm7 \n\t" - "movd %2, %%mm6 \n\t" - "packssdw %%mm6, %%mm6 \n\t" - "packssdw %%mm6, %%mm6 \n\t" +asm volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $15, %%mm7 \n\t" + "movd %2, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" "movl %3, %%eax \n\t" - ".balign 16\n\t" - "1: \n\t" - "movq (%0, %%eax), %%mm0 \n\t" - "movq 8(%0, %%eax), %%mm1 \n\t" - "movq (%1, %%eax), %%mm4 \n\t" - "movq 8(%1, %%eax), %%mm5 \n\t" - "pmullw %%mm6, %%mm4 \n\t" /* q=qscale*quant_matrix[i] */ - "pmullw %%mm6, %%mm5 \n\t" /* q=qscale*quant_matrix[i] */ - "pxor %%mm2, %%mm2 \n\t" - "pxor %%mm3, %%mm3 \n\t" - "pcmpgtw %%mm0, %%mm2 \n\t" /* block[i] < 0 ? -1 : 0 */ - "pcmpgtw %%mm1, %%mm3 \n\t" /* block[i] < 0 ? -1 : 0 */ - "pxor %%mm2, %%mm0 \n\t" - "pxor %%mm3, %%mm1 \n\t" - "psubw %%mm2, %%mm0 \n\t" /* abs(block[i]) */ - "psubw %%mm3, %%mm1 \n\t" /* abs(block[i]) */ - "pmullw %%mm4, %%mm0 \n\t" /* abs(block[i])*q */ - "pmullw %%mm5, %%mm1 \n\t" /* abs(block[i])*q */ - "pxor %%mm4, %%mm4 \n\t" - "pxor %%mm5, %%mm5 \n\t" /* FIXME slow */ - "pcmpeqw (%0, %%eax), %%mm4 \n\t" /* block[i] == 0 ? -1 : 0 */ - "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" /* block[i] == 0 ? -1 : 0 */ - "psraw $3, %%mm0 \n\t" - "psraw $3, %%mm1 \n\t" - "pxor %%mm2, %%mm0 \n\t" - "pxor %%mm3, %%mm1 \n\t" - "psubw %%mm2, %%mm0 \n\t" - "psubw %%mm3, %%mm1 \n\t" - "pandn %%mm0, %%mm4 \n\t" - "pandn %%mm1, %%mm5 \n\t" - "movq %%mm4, (%0, %%eax) \n\t" - "movq %%mm5, 8(%0, %%eax) \n\t" - - "addl $16, %%eax \n\t" - "js 1b \n\t" - ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) - : "%eax", "memory" - ); + ".balign 16\n\t" + "1: \n\t" + "movq (%0, %%eax), %%mm0 \n\t" + "movq 8(%0, %%eax), %%mm1 \n\t" + "movq (%1, %%eax), %%mm4 \n\t" + "movq 8(%1, %%eax), %%mm5 \n\t" + "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] + "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] + "pxor %%mm2, %%mm2 \n\t" + "pxor %%mm3, %%mm3 \n\t" + "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 + "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) + "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) + "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q + "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q + "pxor %%mm4, %%mm4 \n\t" + "pxor %%mm5, %%mm5 \n\t" // FIXME slow + "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0 + "psraw $3, %%mm0 \n\t" + "psraw $3, %%mm1 \n\t" + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" + "psubw %%mm3, %%mm1 \n\t" + "pandn %%mm0, %%mm4 \n\t" + "pandn %%mm1, %%mm5 \n\t" + "movq %%mm4, (%0, %%eax) \n\t" + "movq %%mm5, 8(%0, %%eax) \n\t" + + "addl $16, %%eax \n\t" + "jng 1b \n\t" + ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) + : "%eax", "memory" + ); block[0]= block0; - /* Note, we dont do mismatch control for intra as errors cannot accumulate */ + //Note, we dont do mismatch control for intra as errors cannot accumulate } else { quant_matrix = s->inter_matrix; -__asm__ volatile( - "pcmpeqw %%mm7, %%mm7 \n\t" +asm volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" "psrlq $48, %%mm7 \n\t" - "movd %2, %%mm6 \n\t" - "packssdw %%mm6, %%mm6 \n\t" - "packssdw %%mm6, %%mm6 \n\t" + "movd %2, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" "movl %3, %%eax \n\t" - ".balign 16\n\t" - "1: \n\t" - "movq (%0, %%eax), %%mm0 \n\t" - "movq 8(%0, %%eax), %%mm1 \n\t" - "movq (%1, %%eax), %%mm4 \n\t" - "movq 8(%1, %%eax), %%mm5 \n\t" - "pmullw %%mm6, %%mm4 \n\t" /* q=qscale*quant_matrix[i] */ - "pmullw %%mm6, %%mm5 \n\t" /* q=qscale*quant_matrix[i] */ - "pxor %%mm2, %%mm2 \n\t" - "pxor %%mm3, %%mm3 \n\t" - "pcmpgtw %%mm0, %%mm2 \n\t" /* block[i] < 0 ? -1 : 0 */ - "pcmpgtw %%mm1, %%mm3 \n\t" /* block[i] < 0 ? -1 : 0 */ - "pxor %%mm2, %%mm0 \n\t" - "pxor %%mm3, %%mm1 \n\t" - "psubw %%mm2, %%mm0 \n\t" /* abs(block[i]) */ - "psubw %%mm3, %%mm1 \n\t" /* abs(block[i]) */ - "paddw %%mm0, %%mm0 \n\t" /* abs(block[i])*2 */ - "paddw %%mm1, %%mm1 \n\t" /* abs(block[i])*2 */ - "pmullw %%mm4, %%mm0 \n\t" /* abs(block[i])*2*q */ - "pmullw %%mm5, %%mm1 \n\t" /* abs(block[i])*2*q */ - "paddw %%mm4, %%mm0 \n\t" /* (abs(block[i])*2 + 1)*q */ - "paddw %%mm5, %%mm1 \n\t" /* (abs(block[i])*2 + 1)*q */ - "pxor %%mm4, %%mm4 \n\t" - "pxor %%mm5, %%mm5 \n\t" /* FIXME slow */ - "pcmpeqw (%0, %%eax), %%mm4 \n\t" /* block[i] == 0 ? -1 : 0 */ - "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" /* block[i] == 0 ? -1 : 0 */ - "psrlw $4, %%mm0 \n\t" - "psrlw $4, %%mm1 \n\t" - "pxor %%mm2, %%mm0 \n\t" - "pxor %%mm3, %%mm1 \n\t" - "psubw %%mm2, %%mm0 \n\t" - "psubw %%mm3, %%mm1 \n\t" - "pandn %%mm0, %%mm4 \n\t" - "pandn %%mm1, %%mm5 \n\t" + ".balign 16\n\t" + "1: \n\t" + "movq (%0, %%eax), %%mm0 \n\t" + "movq 8(%0, %%eax), %%mm1 \n\t" + "movq (%1, %%eax), %%mm4 \n\t" + "movq 8(%1, %%eax), %%mm5 \n\t" + "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] + "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] + "pxor %%mm2, %%mm2 \n\t" + "pxor %%mm3, %%mm3 \n\t" + "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 + "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) + "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) + "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2 + "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2 + "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q + "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q + "paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q + "paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q + "pxor %%mm4, %%mm4 \n\t" + "pxor %%mm5, %%mm5 \n\t" // FIXME slow + "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0 + "psrlw $4, %%mm0 \n\t" + "psrlw $4, %%mm1 \n\t" + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" + "psubw %%mm3, %%mm1 \n\t" + "pandn %%mm0, %%mm4 \n\t" + "pandn %%mm1, %%mm5 \n\t" "pxor %%mm4, %%mm7 \n\t" "pxor %%mm5, %%mm7 \n\t" - "movq %%mm4, (%0, %%eax) \n\t" - "movq %%mm5, 8(%0, %%eax) \n\t" + "movq %%mm4, (%0, %%eax) \n\t" + "movq %%mm5, 8(%0, %%eax) \n\t" - "addl $16, %%eax \n\t" - "js 1b \n\t" + "addl $16, %%eax \n\t" + "jng 1b \n\t" "movd 124(%0, %3), %%mm0 \n\t" "movq %%mm7, %%mm6 \n\t" "psrlq $32, %%mm7 \n\t" @@ -439,14 +394,14 @@ __asm__ volatile( "psrlq $15, %%mm7 \n\t" "pxor %%mm7, %%mm0 \n\t" "movd %%mm0, 124(%0, %3) \n\t" - - ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs) - : "%eax", "memory" + + ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs) + : "%eax", "memory" ); } } -/* draw the edges of width 'w' of an image of size width, height +/* draw the edges of width 'w' of an image of size width, height this mmx version can only handle w==8 || w==16 */ static void draw_edges_mmx(UINT8 *buf, int wrap, int width, int height, int w) { @@ -458,88 +413,82 @@ static void draw_edges_mmx(UINT8 *buf, int wrap, int width, int height, int w) ptr = buf; if(w==8) { - __asm__ volatile( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "punpckldq %%mm0, %%mm0 \n\t" - "movq %%mm0, -8(%0) \n\t" - "movq -8(%0, %2), %%mm1 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movq %%mm1, (%0, %2) \n\t" - "addl %1, %0 \n\t" - "cmpl %3, %0 \n\t" - " jb 1b \n\t" - : "+r" (ptr) - : "r" (wrap), "r" (width), "r" (ptr + wrap*height) + asm volatile( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "punpckldq %%mm0, %%mm0 \n\t" + "movq %%mm0, -8(%0) \n\t" + "movq -8(%0, %2), %%mm1 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movq %%mm1, (%0, %2) \n\t" + "addl %1, %0 \n\t" + "cmpl %3, %0 \n\t" + " jb 1b \n\t" + : "+r" (ptr) + : "r" (wrap), "r" (width), "r" (ptr + wrap*height) ); } else { - __asm__ volatile( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "punpckldq %%mm0, %%mm0 \n\t" - "movq %%mm0, -8(%0) \n\t" - "movq %%mm0, -16(%0) \n\t" - "movq -8(%0, %2), %%mm1 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movq %%mm1, (%0, %2) \n\t" - "movq %%mm1, 8(%0, %2) \n\t" - "addl %1, %0 \n\t" - "cmpl %3, %0 \n\t" - " jb 1b \n\t" - : "+r" (ptr) - : "r" (wrap), "r" (width), "r" (ptr + wrap*height) + asm volatile( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "punpckldq %%mm0, %%mm0 \n\t" + "movq %%mm0, -8(%0) \n\t" + "movq %%mm0, -16(%0) \n\t" + "movq -8(%0, %2), %%mm1 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movq %%mm1, (%0, %2) \n\t" + "movq %%mm1, 8(%0, %2) \n\t" + "addl %1, %0 \n\t" + "cmpl %3, %0 \n\t" + " jb 1b \n\t" + : "+r" (ptr) + : "r" (wrap), "r" (width), "r" (ptr + wrap*height) ); } - + for(i=0;iavctx->dct_algo; + int i; + const int dct_algo = s->avctx->dct_algo; + const int idct_algo= s->avctx->idct_algo; + s->dct_unquantize_h263 = dct_unquantize_h263_mmx; s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_mmx; s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_mmx; @@ -560,7 +539,7 @@ void MPV_common_init_mmx(MpegEncContext *s) draw_edges = draw_edges_mmx; if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ - s->fdct = fdct_mmx; + s->fdct = ff_fdct_mmx; if(mm_flags & MM_MMXEXT){ s->dct_quantize= dct_quantize_MMX2; @@ -568,5 +547,20 @@ void MPV_common_init_mmx(MpegEncContext *s) s->dct_quantize= dct_quantize_MMX; } } + + if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ + s->idct_put= ff_simple_idct_put_mmx; + s->idct_add= ff_simple_idct_add_mmx; + s->idct_permutation_type= FF_SIMPLE_IDCT_PERM; + }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ + if(mm_flags & MM_MMXEXT){ + s->idct_put= ff_libmpeg2mmx2_idct_put; + s->idct_add= ff_libmpeg2mmx2_idct_add; + }else{ + s->idct_put= ff_libmpeg2mmx_idct_put; + s->idct_add= ff_libmpeg2mmx_idct_add; + } + s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + } } } diff --git a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c index f10014837..799ff1666 100644 --- a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c +++ b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c @@ -36,12 +36,12 @@ static int RENAME(dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow) { - int level=0, last_non_zero_p1, q; /* =0 is cuz gcc says uninitalized ... */ + int level=0, last_non_zero_p1, q; //=0 is cuz gcc says uninitalized ... const UINT16 *qmat, *bias; static __align8 INT16 temp_block[64]; - /* s->fdct (block); */ - fdct_mmx (block); /* cant be anything else ... */ + //s->fdct (block); + ff_fdct_mmx (block); //cant be anything else ... if (s->mb_intra) { int dummy; @@ -52,14 +52,14 @@ static int RENAME(dct_quantize)(MpegEncContext *s, /* note: block[0] is assumed to be positive */ if (!s->h263_aic) { #if 1 - __asm__ volatile ( + asm volatile ( "xorl %%edx, %%edx \n\t" "mul %%ecx \n\t" : "=d" (level), "=a"(dummy) : "a" ((block[0]>>2) + q), "c" (inverse[q<<1]) ); #else - __asm__ volatile ( + asm volatile ( "xorl %%edx, %%edx \n\t" "divw %%cx \n\t" "movzwl %%ax, %%eax \n\t" @@ -71,9 +71,9 @@ static int RENAME(dct_quantize)(MpegEncContext *s, } else /* For AIC we skip quant/dequant of INTRADC */ level = (block[0] + 4)>>3; - - block[0]=0; /* avoid fake overflow */ -/* temp_block[0] = (block[0] + (q >> 1)) / q; */ + + block[0]=0; //avoid fake overflow +// temp_block[0] = (block[0] + (q >> 1)) / q; last_non_zero_p1 = 1; bias = s->q_intra_matrix16_bias[qscale]; qmat = s->q_intra_matrix16[qscale]; @@ -89,36 +89,36 @@ static int RENAME(dct_quantize)(MpegEncContext *s, to enable -fpic compilation. this patch has not been accepted on main ffmpeg cvs. */ - __asm__ volatile( - "movd %%eax, %%mm3 \n\t" /* last_non_zero_p1 */ + asm volatile( + "movd %%eax, %%mm3 \n\t" // last_non_zero_p1 SPREADW(%%mm3) - "pxor %%mm7, %%mm7 \n\t" /* 0 */ - "pxor %%mm4, %%mm4 \n\t" /* 0 */ - "movq (%1), %%mm5 \n\t" /* qmat[0] */ + "pxor %%mm7, %%mm7 \n\t" // 0 + "pxor %%mm4, %%mm4 \n\t" // 0 + "movq (%1), %%mm5 \n\t" // qmat[0] "pxor %%mm6, %%mm6 \n\t" - "psubw (%2), %%mm6 \n\t" /* -bias[0] */ + "psubw (%2), %%mm6 \n\t" // -bias[0] "movl $-128, %%eax \n\t" : "+a" (last_non_zero_p1) : "r" (qmat), "r" (bias) ); /* CORE */ - __asm__ volatile( + asm volatile( ".balign 16 \n\t" "1: \n\t" - "pxor %%mm1, %%mm1 \n\t" /* 0 */ - "movq (%1, %%eax), %%mm0 \n\t" /* block[i] */ - "pcmpgtw %%mm0, %%mm1 \n\t" /* block[i] <= 0 ? 0xFF : 0x00 */ - "pxor %%mm1, %%mm0 \n\t" - "psubw %%mm1, %%mm0 \n\t" /* ABS(block[i]) */ - "psubusw %%mm6, %%mm0 \n\t" /* ABS(block[i]) + bias[0] */ - "pmulhw %%mm5, %%mm0 \n\t" /* (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 */ - "por %%mm0, %%mm4 \n\t" - "pxor %%mm1, %%mm0 \n\t" - "psubw %%mm1, %%mm0 \n\t" /* out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) */ + "pxor %%mm1, %%mm1 \n\t" // 0 + "movq (%1, %%eax), %%mm0 \n\t" // block[i] + "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) + "psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] + "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 + "por %%mm0, %%mm4 \n\t" + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) "movq %%mm0, (%3, %%eax) \n\t" - "pcmpeqw %%mm7, %%mm0 \n\t" /* out==0 ? 0xFF : 0x00 */ - "movq (%4, %%eax), %%mm1 \n\t" - "movq %%mm7, (%1, %%eax) \n\t" /* 0 */ + "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 + "movq (%4, %%eax), %%mm1 \n\t" + "movq %%mm7, (%1, %%eax) \n\t" // 0 "pandn %%mm1, %%mm0 \n\t" PMAXW(%%mm0, %%mm3) "addl $8, %%eax \n\t" @@ -127,7 +127,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, : "r" (block+64), "r" (inv_zigzag_direct16+64), "r" (temp_block+64) ); /* EPILOGUE */ - __asm__ volatile( + asm volatile( "movq %%mm3, %%mm0 \n\t" "psrlq $32, %%mm3 \n\t" PMAXW(%%mm0, %%mm3) @@ -135,44 +135,44 @@ static int RENAME(dct_quantize)(MpegEncContext *s, "psrlq $16, %%mm3 \n\t" PMAXW(%%mm0, %%mm3) "movd %%mm3, %%eax \n\t" - "movzbl %%al, %%eax \n\t" /* last_non_zero_p1 */ - "movd %2, %%mm1 \n\t" /* max_qcoeff */ + "movzbl %%al, %%eax \n\t" // last_non_zero_p1 + "movd %2, %%mm1 \n\t" // max_qcoeff SPREADW(%%mm1) - "psubusw %%mm1, %%mm4 \n\t" + "psubusw %%mm1, %%mm4 \n\t" "packuswb %%mm4, %%mm4 \n\t" - "movd %%mm4, %1 \n\t" /* *overflow */ + "movd %%mm4, %1 \n\t" // *overflow : "+a" (last_non_zero_p1), "=r" (*overflow) : "r" (s->max_qcoeff) ); - }else{ /* FMT_H263 */ - __asm__ volatile( + }else{ // FMT_H263 + asm volatile( "pushl %%ebp \n\t" "pushl %%ebx \n\t" "movl %0, %%ebp \n\t" "movl (%%ebp), %%ebx \n\t" - "movd %%ebx, %%mm3 \n\t" /* last_non_zero_p1 */ + "movd %%ebx, %%mm3 \n\t" // last_non_zero_p1 SPREADW(%%mm3) - "pxor %%mm7, %%mm7 \n\t" /* 0 */ - "pxor %%mm4, %%mm4 \n\t" /* 0 */ + "pxor %%mm7, %%mm7 \n\t" // 0 + "pxor %%mm4, %%mm4 \n\t" // 0 "movl $-128, %%ebx \n\t" ".balign 16 \n\t" "1: \n\t" - "pxor %%mm1, %%mm1 \n\t" /* 0 */ - "movq (%1, %%ebx), %%mm0 \n\t" /* block[i] */ - "pcmpgtw %%mm0, %%mm1 \n\t" /* block[i] <= 0 ? 0xFF : 0x00 */ - "pxor %%mm1, %%mm0 \n\t" - "psubw %%mm1, %%mm0 \n\t" /* ABS(block[i]) */ - "movq (%3, %%ebx), %%mm6 \n\t" /* bias[0] */ - "paddusw %%mm6, %%mm0 \n\t" /* ABS(block[i]) + bias[0] */ - "movq (%2, %%ebx), %%mm5 \n\t" /* qmat[i] */ - "pmulhw %%mm5, %%mm0 \n\t" /* (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 */ - "por %%mm0, %%mm4 \n\t" - "pxor %%mm1, %%mm0 \n\t" - "psubw %%mm1, %%mm0 \n\t" /* out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) */ + "pxor %%mm1, %%mm1 \n\t" // 0 + "movq (%1, %%ebx), %%mm0 \n\t" // block[i] + "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) + "movq (%3, %%ebx), %%mm6 \n\t" // bias[0] + "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] + "movq (%2, %%ebx), %%mm5 \n\t" // qmat[i] + "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 + "por %%mm0, %%mm4 \n\t" + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) "movq %%mm0, (%5, %%ebx) \n\t" - "pcmpeqw %%mm7, %%mm0 \n\t" /* out==0 ? 0xFF : 0x00 */ - "movq (%4, %%ebx), %%mm1 \n\t" - "movq %%mm7, (%1, %%ebx) \n\t" /* 0 */ + "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 + "movq (%4, %%ebx), %%mm1 \n\t" + "movq %%mm7, (%1, %%ebx) \n\t" // 0 "pandn %%mm1, %%mm0 \n\t" PMAXW(%%mm0, %%mm3) "addl $8, %%ebx \n\t" @@ -184,7 +184,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, "psrlq $16, %%mm3 \n\t" PMAXW(%%mm0, %%mm3) "movd %%mm3, %%ebx \n\t" - "movzbl %%bl, %%ebx \n\t" /* last_non_zero_p1 */ + "movzbl %%bl, %%ebx \n\t" // last_non_zero_p1 "movl %%ebx, (%%ebp) \n\t" "popl %%ebx \n\t" "popl %%ebp \n\t" @@ -193,43 +193,155 @@ static int RENAME(dct_quantize)(MpegEncContext *s, "r" (block+64), "r" (qmat+64), "r" (bias+64), "r" (inv_zigzag_direct16+64), "r" (temp_block+64) ); - /* note the __asm__ is split cuz gcc doesnt like that many operands ... */ - __asm__ volatile( - "movd %1, %%mm1 \n\t" /* max_qcoeff */ + // note the asm is split cuz gcc doesnt like that many operands ... + asm volatile( + "movd %1, %%mm1 \n\t" // max_qcoeff SPREADW(%%mm1) - "psubusw %%mm1, %%mm4 \n\t" + "psubusw %%mm1, %%mm4 \n\t" "packuswb %%mm4, %%mm4 \n\t" - "movd %%mm4, %0 \n\t" /* *overflow */ + "movd %%mm4, %0 \n\t" // *overflow : "=r" (*overflow) : "r" (s->max_qcoeff) ); } - if(s->mb_intra) temp_block[0]= level; /* FIXME move after permute */ + if(s->mb_intra) block[0]= level; + else block[0]= temp_block[0]; -/* last_non_zero_p1=64; */ - /* permute for IDCT */ - __asm__ volatile( - "movl %0, %%eax \n\t" - "pushl %%ebp \n\t" - "movl %%esp, " MANGLE(esp_temp) "\n\t" - "1: \n\t" - "movzbl (%1, %%eax), %%ebx \n\t" - "movzbl 1(%1, %%eax), %%ebp \n\t" - "movw (%2, %%ebx, 2), %%cx \n\t" - "movw (%2, %%ebp, 2), %%sp \n\t" - "movzbl " MANGLE(permutation) "(%%ebx), %%ebx\n\t" - "movzbl " MANGLE(permutation) "(%%ebp), %%ebp\n\t" - "movw %%cx, (%3, %%ebx, 2) \n\t" - "movw %%sp, (%3, %%ebp, 2) \n\t" - "addl $2, %%eax \n\t" - " js 1b \n\t" - "movl " MANGLE(esp_temp) ", %%esp\n\t" - "popl %%ebp \n\t" - : - : "g" (-last_non_zero_p1), "d" (zigzag_direct_noperm+last_non_zero_p1), "S" (temp_block), "D" (block) - : "%eax", "%ebx", "%ecx" - ); + if(s->idct_permutation[1]==8){ + if(last_non_zero_p1 <= 1) goto end; + block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08]; + block[0x20] = temp_block[0x10]; + if(last_non_zero_p1 <= 4) goto end; + block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02]; + block[0x09] = temp_block[0x03]; + if(last_non_zero_p1 <= 7) goto end; + block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11]; + block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20]; + if(last_non_zero_p1 <= 11) goto end; + block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12]; + block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04]; + block[0x0C] = temp_block[0x05]; + if(last_non_zero_p1 <= 16) goto end; + block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13]; + block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21]; + block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30]; + block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22]; + if(last_non_zero_p1 <= 24) goto end; + block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14]; + block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06]; + block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E]; + block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C]; + if(last_non_zero_p1 <= 32) goto end; + block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A]; + block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38]; + block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32]; + block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24]; + if(last_non_zero_p1 <= 40) goto end; + block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16]; + block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17]; + block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25]; + block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33]; + if(last_non_zero_p1 <= 48) goto end; + block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B]; + block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D]; + block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; + block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E]; + if(last_non_zero_p1 <= 56) goto end; + block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C]; + block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36]; + block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37]; + block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; + }else if(s->idct_permutation[1]==4){ + if(last_non_zero_p1 <= 1) goto end; + block[0x04] = temp_block[0x01]; + block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10]; + if(last_non_zero_p1 <= 4) goto end; + block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02]; + block[0x05] = temp_block[0x03]; + if(last_non_zero_p1 <= 7) goto end; + block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11]; + block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20]; + if(last_non_zero_p1 <= 11) goto end; + block[0x1C] = temp_block[0x19]; + block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B]; + block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05]; + if(last_non_zero_p1 <= 16) goto end; + block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13]; + block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21]; + block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30]; + block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22]; + if(last_non_zero_p1 <= 24) goto end; + block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14]; + block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06]; + block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E]; + block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C]; + if(last_non_zero_p1 <= 32) goto end; + block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A]; + block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38]; + block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32]; + block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24]; + if(last_non_zero_p1 <= 40) goto end; + block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16]; + block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17]; + block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25]; + block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33]; + if(last_non_zero_p1 <= 48) goto end; + block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B]; + block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D]; + block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; + block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E]; + if(last_non_zero_p1 <= 56) goto end; + block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C]; + block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36]; + block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37]; + block[0x3B] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; + }else{ + if(last_non_zero_p1 <= 1) goto end; + block[0x01] = temp_block[0x01]; + block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10]; + if(last_non_zero_p1 <= 4) goto end; + block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02]; + block[0x03] = temp_block[0x03]; + if(last_non_zero_p1 <= 7) goto end; + block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11]; + block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20]; + if(last_non_zero_p1 <= 11) goto end; + block[0x19] = temp_block[0x19]; + block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B]; + block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05]; + if(last_non_zero_p1 <= 16) goto end; + block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13]; + block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21]; + block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30]; + block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22]; + if(last_non_zero_p1 <= 24) goto end; + block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14]; + block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06]; + block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E]; + block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C]; + if(last_non_zero_p1 <= 32) goto end; + block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A]; + block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38]; + block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32]; + block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24]; + if(last_non_zero_p1 <= 40) goto end; + block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16]; + block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17]; + block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25]; + block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33]; + if(last_non_zero_p1 <= 48) goto end; + block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B]; + block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D]; + block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; + block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E]; + if(last_non_zero_p1 <= 56) goto end; + block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C]; + block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36]; + block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37]; + block[0x3E] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; + } + end: /* for(i=0; i 2x1. width and height are given for the source picture */ +static void conv411(UINT8 *dst, int dst_wrap, + UINT8 *src, int src_wrap, + int width, int height) +{ + int w, c; + UINT8 *s1, *s2, *d; + + for(;height > 0; height -= 2) { + s1 = src; + s2 = src + src_wrap; + d = dst; + for(w = width;w > 0; w--) { + c = (s1[0] + s2[0]) >> 1; + d[0] = c; + d[1] = c; + s1++; + s2++; + d += 2; + } + src += src_wrap * 2; + dst += dst_wrap; + } +} + static void img_copy(UINT8 *dst, int dst_wrap, UINT8 *src, int src_wrap, int width, int height) @@ -628,6 +653,17 @@ int img_convert(AVPicture *dst, int dst_pix_fmt, } else if (dst_pix_fmt == PIX_FMT_YUV420P) { switch(pix_fmt) { + case PIX_FMT_YUV411P: + img_copy(dst->data[0], dst->linesize[0], + src->data[0], src->linesize[0], + width, height); + conv411(dst->data[1], dst->linesize[1], + src->data[1], src->linesize[1], + width / 4, height); + conv411(dst->data[2], dst->linesize[2], + src->data[2], src->linesize[2], + width / 4, height); + break; case PIX_FMT_YUV410P: img_copy(dst->data[0], dst->linesize[0], src->data[0], src->linesize[0], diff --git a/src/libffmpeg/libavcodec/mdct.c b/src/libffmpeg/libavcodec/mdct.c new file mode 100644 index 000000000..baab5d315 --- /dev/null +++ b/src/libffmpeg/libavcodec/mdct.c @@ -0,0 +1,170 @@ +/* + * MDCT/IMDCT transforms + * Copyright (c) 2002 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "dsputil.h" + +/* + * init MDCT or IMDCT computation + */ +int mdct_init(MDCTContext *s, int nbits, int inverse) +{ + int n, n4, i; + float alpha; + + memset(s, 0, sizeof(*s)); + n = 1 << nbits; + s->nbits = nbits; + s->n = n; + n4 = n >> 2; + s->tcos = malloc(n4 * sizeof(FFTSample)); + if (!s->tcos) + goto fail; + s->tsin = malloc(n4 * sizeof(FFTSample)); + if (!s->tsin) + goto fail; + + for(i=0;itcos[i] = -cos(alpha); + s->tsin[i] = -sin(alpha); + } + if (fft_init(&s->fft, s->nbits - 2, inverse) < 0) + goto fail; + return 0; + fail: + av_freep(&s->tcos); + av_freep(&s->tsin); + return -1; +} + +/* complex multiplication: p = a * b */ +#define CMUL(pre, pim, are, aim, bre, bim) \ +{\ + float _are = (are);\ + float _aim = (aim);\ + float _bre = (bre);\ + float _bim = (bim);\ + (pre) = _are * _bre - _aim * _bim;\ + (pim) = _are * _bim + _aim * _bre;\ +} + +/** + * Compute inverse MDCT of size N = 2^nbits + * @param output N samples + * @param input N/2 samples + * @param tmp N/2 samples + */ +void imdct_calc(MDCTContext *s, FFTSample *output, + const FFTSample *input, FFTSample *tmp) +{ + int k, n8, n4, n2, n, j; + const uint16_t *revtab = s->fft.revtab; + const FFTSample *tcos = s->tcos; + const FFTSample *tsin = s->tsin; + const FFTSample *in1, *in2; + FFTComplex *z = (FFTComplex *)tmp; + + n = 1 << s->nbits; + n2 = n >> 1; + n4 = n >> 2; + n8 = n >> 3; + + /* pre rotation */ + in1 = input; + in2 = input + n2 - 1; + for(k = 0; k < n4; k++) { + j=revtab[k]; + CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); + in1 += 2; + in2 -= 2; + } + fft_calc(&s->fft, z); + + /* post rotation + reordering */ + /* XXX: optimize */ + for(k = 0; k < n4; k++) { + CMUL(z[k].re, z[k].im, z[k].re, z[k].im, tcos[k], tsin[k]); + } + for(k = 0; k < n8; k++) { + output[2*k] = -z[n8 + k].im; + output[n2-1-2*k] = z[n8 + k].im; + + output[2*k+1] = z[n8-1-k].re; + output[n2-1-2*k-1] = -z[n8-1-k].re; + + output[n2 + 2*k]=-z[k+n8].re; + output[n-1- 2*k]=-z[k+n8].re; + + output[n2 + 2*k+1]=z[n8-k-1].im; + output[n-2 - 2 * k] = z[n8-k-1].im; + } +} + +/** + * Compute MDCT of size N = 2^nbits + * @param input N samples + * @param out N/2 samples + * @param tmp temporary storage of N/2 samples + */ +void mdct_calc(MDCTContext *s, FFTSample *out, + const FFTSample *input, FFTSample *tmp) +{ + int i, j, n, n8, n4, n2, n3; + FFTSample re, im, re1, im1; + const uint16_t *revtab = s->fft.revtab; + const FFTSample *tcos = s->tcos; + const FFTSample *tsin = s->tsin; + FFTComplex *x = (FFTComplex *)tmp; + + n = 1 << s->nbits; + n2 = n >> 1; + n4 = n >> 2; + n8 = n >> 3; + n3 = 3 * n4; + + /* pre rotation */ + for(i=0;ifft, x); + + /* post rotation */ + for(i=0;itcos); + av_freep(&s->tsin); + fft_end(&s->fft); +} diff --git a/src/libffmpeg/libavcodec/mjpeg.c b/src/libffmpeg/libavcodec/mjpeg.c index 6cfd83160..292cd7d0f 100644 --- a/src/libffmpeg/libavcodec/mjpeg.c +++ b/src/libffmpeg/libavcodec/mjpeg.c @@ -17,7 +17,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Support for external huffman table, various fixes (AVID workaround), - * aspecting and various markers support + * aspecting and new decode_frame mechanism * by Alex Beregszaszi */ //#define DEBUG @@ -25,11 +25,7 @@ #include "dsputil.h" #include "mpegvideo.h" -#ifdef USE_FASTMEMCPY -#include "fastmemcpy.h" -#endif - -/* use two quantizer table (one for luminance and one for chrominance) */ +/* use two quantizer tables (one for luminance and one for chrominance) */ /* not yet working */ #undef TWOMATRIXES @@ -322,14 +318,14 @@ static void jpeg_table_header(MpegEncContext *s) put_bits(p, 4, 0); /* 8 bit precision */ put_bits(p, 4, 0); /* table 0 */ for(i=0;i<64;i++) { - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; put_bits(p, 8, s->intra_matrix[j]); } #ifdef TWOMATRIXES put_bits(p, 4, 0); /* 8 bit precision */ put_bits(p, 4, 1); /* table 1 */ for(i=0;i<64;i++) { - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; put_bits(p, 8, s->chroma_intra_matrix[j]); } #endif @@ -535,7 +531,7 @@ static void encode_block(MpegEncContext *s, DCTELEM *block, int n) run = 0; last_index = s->block_last_index[n]; for(i=1;i<=last_index;i++) { - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; val = block[j]; if (val == 0) { run++; @@ -582,19 +578,17 @@ void mjpeg_encode_mb(MpegEncContext *s, /******************************************/ /* decoding */ -/* compressed picture size */ -#define PICTURE_BUFFER_SIZE 100000 - #define MAX_COMPONENTS 4 typedef struct MJpegDecodeContext { AVCodecContext *avctx; GetBitContext gb; - UINT32 header_state; + int mpeg_enc_ctx_allocated; /* true if decoding context allocated */ + int start_code; /* current start code */ - UINT8 *buf_ptr; int buffer_size; - int mpeg_enc_ctx_allocated; /* true if decoding context allocated */ + UINT8 *buffer; + INT16 quant_matrixes[4][64]; VLC vlcs[2][4]; @@ -614,21 +608,16 @@ typedef struct MJpegDecodeContext { UINT8 *current_picture[MAX_COMPONENTS]; /* picture structure */ int linesize[MAX_COMPONENTS]; DCTELEM block[64] __align8; - UINT8 buffer[PICTURE_BUFFER_SIZE]; int buggy_avid; int restart_interval; int restart_count; - int interleaved_rows; + int interlace_polarity; + ScanTable scantable; + void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); } MJpegDecodeContext; -#define SKIP_REMAINING(gb, len) { \ - dprintf("reamining %d bytes in marker\n", len); \ - if (len) while (--len) \ - skip_bits(gb, 8); \ -} - -static int mjpeg_decode_dht(MJpegDecodeContext *s, UINT8 *buf, int buf_size); +static int mjpeg_decode_dht(MJpegDecodeContext *s); static void build_vlc(VLC *vlc, const UINT8 *bits_table, const UINT8 *val_table, int nb_codes) @@ -645,39 +634,52 @@ static void build_vlc(VLC *vlc, const UINT8 *bits_table, const UINT8 *val_table, static int mjpeg_decode_init(AVCodecContext *avctx) { MJpegDecodeContext *s = avctx->priv_data; + MpegEncContext s2; s->avctx = avctx; - s->header_state = 0; + /* ugly way to get the idct & scantable */ + memset(&s2, 0, sizeof(MpegEncContext)); + s2.flags= avctx->flags; + s2.avctx= avctx; +// s2->out_format = FMT_MJPEG; + s2.width = 8; + s2.height = 8; + if (MPV_common_init(&s2) < 0) + return -1; + s->scantable= s2.intra_scantable; + s->idct_put= s2.idct_put; + MPV_common_end(&s2); + s->mpeg_enc_ctx_allocated = 0; - s->buffer_size = PICTURE_BUFFER_SIZE - 1; /* minus 1 to take into - account FF 00 case */ + s->buffer_size = 102400; /* smaller buffer should be enough, + but photojpg files could ahive bigger sizes */ + s->buffer = av_malloc(s->buffer_size); s->start_code = -1; - s->buf_ptr = s->buffer; s->first_picture = 1; s->org_width = avctx->width; s->org_height = avctx->height; - + build_vlc(&s->vlcs[0][0], bits_dc_luminance, val_dc_luminance, 12); build_vlc(&s->vlcs[0][1], bits_dc_chrominance, val_dc_chrominance, 12); build_vlc(&s->vlcs[1][0], bits_ac_luminance, val_ac_luminance, 251); build_vlc(&s->vlcs[1][1], bits_ac_chrominance, val_ac_chrominance, 251); - + if (avctx->flags & CODEC_FLAG_EXTERN_HUFF) { printf("mjpeg: using external huffman table\n"); - mjpeg_decode_dht(s, avctx->extradata, avctx->extradata_size); + init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size); + mjpeg_decode_dht(s); /* should check for error - but dunno */ } + return 0; } /* quantize tables */ -static int mjpeg_decode_dqt(MJpegDecodeContext *s, - UINT8 *buf, int buf_size) +static int mjpeg_decode_dqt(MJpegDecodeContext *s) { int len, index, i, j; - init_get_bits(&s->gb, buf, buf_size); len = get_bits(&s->gb, 16) - 2; @@ -694,29 +696,23 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s, dprintf("index=%d\n", index); /* read quant table */ for(i=0;i<64;i++) { - j = zigzag_direct[i]; + j = s->scantable.permutated[i]; s->quant_matrixes[index][j] = get_bits(&s->gb, 8); } len -= 65; } - SKIP_REMAINING(&s->gb, len); - return 0; } /* decode huffman tables and build VLC decoders */ -static int mjpeg_decode_dht(MJpegDecodeContext *s, - UINT8 *buf, int buf_size) +static int mjpeg_decode_dht(MJpegDecodeContext *s) { int len, index, i, class, n, v, code_max; UINT8 bits_table[17]; UINT8 val_table[256]; - init_get_bits(&s->gb, buf, buf_size); - - len = get_bits(&s->gb, 16); - len -= 2; + len = get_bits(&s->gb, 16) - 2; while (len > 0) { if (len < 17) @@ -754,13 +750,10 @@ static int mjpeg_decode_dht(MJpegDecodeContext *s, return 0; } -static int mjpeg_decode_sof0(MJpegDecodeContext *s, - UINT8 *buf, int buf_size) +static int mjpeg_decode_sof0(MJpegDecodeContext *s) { int len, nb_components, i, width, height; - init_get_bits(&s->gb, buf, buf_size); - /* XXX: verify len field validity */ len = get_bits(&s->gb, 16); /* only 8 bits/component accepted */ @@ -806,6 +799,7 @@ static int mjpeg_decode_sof0(MJpegDecodeContext *s, s->org_height != 0 && s->height < ((s->org_height * 3) / 4)) { s->interlaced = 1; +// s->bottom_field = (s->interlace_polarity) ? 1 : 0; s->bottom_field = 0; } @@ -835,8 +829,11 @@ static int mjpeg_decode_sof0(MJpegDecodeContext *s, static inline int mjpeg_decode_dc(MJpegDecodeContext *s, int dc_index) { int code, diff; - +#if 1 + code = get_vlc2(&s->gb, s->vlcs[0][dc_index].table, 9, 2); +#else code = get_vlc(&s->gb, &s->vlcs[0][dc_index]); +#endif if (code < 0) { dprintf("mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index, @@ -876,7 +873,11 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block, ac_vlc = &s->vlcs[1][ac_index]; i = 1; for(;;) { +#if 1 + code = get_vlc2(&s->gb, s->vlcs[1][ac_index].table, 9, 2); +#else code = get_vlc(&s->gb, ac_vlc); +#endif if (code < 0) { dprintf("error ac\n"); return -1; @@ -897,7 +898,7 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block, dprintf("error count: %d\n", i); return -1; } - j = zigzag_direct[i]; + j = s->scantable.permutated[i]; block[j] = level * quant_matrix[j]; i++; if (i >= 64) @@ -907,8 +908,7 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block, return 0; } -static int mjpeg_decode_sos(MJpegDecodeContext *s, - UINT8 *buf, int buf_size) +static int mjpeg_decode_sos(MJpegDecodeContext *s) { int len, nb_components, i, j, n, h, v, ret; int mb_width, mb_height, mb_x, mb_y, vmax, hmax, index, id; @@ -919,10 +919,14 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s, int h_count[4]; int v_count[4]; - init_get_bits(&s->gb, buf, buf_size); /* XXX: verify len field validity */ len = get_bits(&s->gb, 16); nb_components = get_bits(&s->gb, 8); + if (len != 6+2*nb_components) + { + dprintf("decode_sos: invalid len (%d)\n", len); + return -1; + } /* XXX: only interleaved scan accepted */ if (nb_components != 3) { @@ -1021,14 +1025,14 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s, (h * mb_x + x) * 8; if (s->interlaced && s->bottom_field) ptr += s->linesize[c] >> 1; - ff_idct_put(ptr, s->linesize[c], s->block); + s->idct_put(ptr, s->linesize[c], s->block); if (++x == h) { x = 0; y++; } } } - if (s->restart_interval && !--s->restart_count) { + if ((s->restart_interval <= 8) && !--s->restart_count) { align_get_bits(&s->gb); skip_bits(&s->gb, 16); /* skip RSTn */ for (j=0; jgb, buf, buf_size); - if (get_bits(&s->gb, 16) != 4) return -1; s->restart_interval = get_bits(&s->gb, 16); @@ -1058,26 +1059,22 @@ static int mjpeg_decode_dri(MJpegDecodeContext *s, return 0; } -#define FOURCC(a,b,c,d) ((a << 24) | (b << 16) | (c << 8) | d) -static int mjpeg_decode_app(MJpegDecodeContext *s, - UINT8 *buf, int buf_size, int start_code) +static int mjpeg_decode_app(MJpegDecodeContext *s) { int len, id; - init_get_bits(&s->gb, buf, buf_size); - /* XXX: verify len field validity */ len = get_bits(&s->gb, 16); if (len < 5) return -1; - id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16); + id = be2me_32((get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16)); len -= 6; /* buggy AVID, it puts EOI only at every 10th frame */ /* also this fourcc is used by non-avid files too, it means interleaving, but it's always present in AVID files */ - if (id == FOURCC('A','V','I','1')) + if (id == ff_get_fourcc("AVI1")) { /* structure: 4bytes AVI1 @@ -1087,23 +1084,23 @@ static int mjpeg_decode_app(MJpegDecodeContext *s, 4bytes field_size_less_padding */ s->buggy_avid = 1; - if (s->first_picture) - printf("mjpeg: workarounding buggy AVID\n"); - s->interleaved_rows = get_bits(&s->gb, 8); +// if (s->first_picture) +// printf("mjpeg: workarounding buggy AVID\n"); + s->interlace_polarity = get_bits(&s->gb, 8); #if 0 skip_bits(&s->gb, 8); skip_bits(&s->gb, 32); skip_bits(&s->gb, 32); len -= 10; #endif - if (s->interleaved_rows) - printf("mjpeg: interleaved rows: %d\n", s->interleaved_rows); +// if (s->interlace_polarity) +// printf("mjpeg: interlace polarity: %d\n", s->interlace_polarity); goto out; } len -= 2; - if (id == FOURCC('J','F','I','F')) + if (id == ff_get_fourcc("JFIF")) { skip_bits(&s->gb, 8); /* the trailing zero-byte */ printf("mjpeg: JFIF header found (version: %x.%x)\n", @@ -1125,11 +1122,11 @@ static int mjpeg_decode_app(MJpegDecodeContext *s, } /* Apple MJPEG-A */ - if ((start_code == APP1) && (len > (0x28 - 8))) + if ((s->start_code == APP1) && (len > (0x28 - 8))) { - id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16); + id = be2me_32((get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16)); len -= 4; - if (id == FOURCC('m','j','p','g')) /* Apple MJPEG-A */ + if (id == ff_get_fourcc("mjpg")) /* Apple MJPEG-A */ { #if 0 skip_bits(&s->gb, 32); /* field size */ @@ -1147,21 +1144,14 @@ static int mjpeg_decode_app(MJpegDecodeContext *s, } out: - /* should check for further values.. */ - SKIP_REMAINING(&s->gb, len); - return 0; } -#undef FOURCC -static int mjpeg_decode_com(MJpegDecodeContext *s, - UINT8 *buf, int buf_size) +static int mjpeg_decode_com(MJpegDecodeContext *s) { int len, i; UINT8 *cbuf; - init_get_bits(&s->gb, buf, buf_size); - /* XXX: verify len field validity */ len = get_bits(&s->gb, 16)-2; cbuf = av_malloc(len+1); @@ -1179,8 +1169,8 @@ static int mjpeg_decode_com(MJpegDecodeContext *s, if (!strcmp(cbuf, "AVID")) { s->buggy_avid = 1; - if (s->first_picture) - printf("mjpeg: workarounding buggy AVID\n"); +// if (s->first_picture) +// printf("mjpeg: workarounding buggy AVID\n"); } av_free(cbuf); @@ -1188,41 +1178,58 @@ static int mjpeg_decode_com(MJpegDecodeContext *s, return 0; } +#if 0 +static int valid_marker_list[] = +{ + /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f */ +/* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* 2 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* 3 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* 4 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* 5 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* 6 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* 7 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, +} +#endif + /* return the 8 bit start code value and update the search state. Return -1 if no start code found */ -static int find_marker(UINT8 **pbuf_ptr, UINT8 *buf_end, - UINT32 *header_state) +static int find_marker(UINT8 **pbuf_ptr, UINT8 *buf_end) { UINT8 *buf_ptr; - unsigned int state, v; + unsigned int v, v2; int val; +#ifdef DEBUG + int skipped=0; +#endif - state = *header_state; buf_ptr = *pbuf_ptr; -retry: - if (state) { - /* get marker */ - found: - if (buf_ptr < buf_end) { - val = *buf_ptr++; - state = 0; - if ((val >= RST0) && (val <= RST7)) - goto retry; - } else { - val = -1; - } - } else { - while (buf_ptr < buf_end) { - v = *buf_ptr++; - if (v == 0xff) { - state = 1; - goto found; - } + while (buf_ptr < buf_end) { + v = *buf_ptr++; + v2 = *buf_ptr; + if ((v == 0xff) && (v2 >= 0xc0) && (v2 <= 0xfe)) { + val = *buf_ptr++; + goto found; } - val = -1; +#ifdef DEBUG + skipped++; +#endif } + val = -1; +found: +#ifdef DEBUG + dprintf("find_marker skipped %d bytes\n", skipped); +#endif *pbuf_ptr = buf_ptr; - *header_state = state; return val; } @@ -1231,10 +1238,9 @@ static int mjpeg_decode_frame(AVCodecContext *avctx, UINT8 *buf, int buf_size) { MJpegDecodeContext *s = avctx->priv_data; - UINT8 *buf_end, *buf_ptr, *buf_start; - int len, code, input_size, i; + UINT8 *buf_end, *buf_ptr; + int i, start_code; AVPicture *picture = data; - unsigned int start_code; *data_size = 0; @@ -1245,49 +1251,80 @@ static int mjpeg_decode_frame(AVCodecContext *avctx, buf_ptr = buf; buf_end = buf + buf_size; while (buf_ptr < buf_end) { - buf_start = buf_ptr; /* find start next marker */ - code = find_marker(&buf_ptr, buf_end, &s->header_state); - /* copy to buffer */ - len = buf_ptr - buf_start; - if (len + (s->buf_ptr - s->buffer) > s->buffer_size) { - /* data too big : flush */ - s->buf_ptr = s->buffer; - if (code > 0) - s->start_code = code; - } else { - memcpy(s->buf_ptr, buf_start, len); - s->buf_ptr += len; - if (code < 0) { - /* nothing to do: wait next marker */ - } else if (code == 0 || code == 0xff) { - /* if we got FF 00, we copy FF to the stream to unescape FF 00 */ - /* valid marker code is between 00 and ff - alex */ - s->buf_ptr--; + start_code = find_marker(&buf_ptr, buf_end); + { + /* EOF */ + if (start_code < 0) { + goto the_end; } else { - /* prepare data for next start code */ - input_size = s->buf_ptr - s->buffer; - start_code = s->start_code; - s->buf_ptr = s->buffer; - s->start_code = code; - dprintf("marker=%x\n", start_code); + dprintf("marker=%x avail_size_in_buf=%d\n", start_code, buf_end - buf_ptr); + + if ((buf_end - buf_ptr) > s->buffer_size) + { + av_free(s->buffer); + s->buffer_size = buf_end-buf_ptr; + s->buffer = av_malloc(s->buffer_size); + dprintf("buffer too small, expanding to %d bytes\n", + s->buffer_size); + } + + /* unescape buffer of SOS */ + if (start_code == SOS) + { + UINT8 *src = buf_ptr; + UINT8 *dst = s->buffer; + + while (src= 0xd0 && x <= 0xd7) + *(dst++) = x; + else if (x) + break; + } + } + init_get_bits(&s->gb, s->buffer, dst - s->buffer); + } + else + init_get_bits(&s->gb, buf_ptr, buf_end - buf_ptr); + + s->start_code = start_code; + + /* process markers */ + if (start_code >= 0xd0 && start_code <= 0xd7) { + dprintf("restart marker: %d\n", start_code&0x0f); + } else if (s->first_picture) { + /* APP fields */ + if (start_code >= 0xe0 && start_code <= 0xef) + mjpeg_decode_app(s); + /* Comment */ + else if (start_code == COM) + mjpeg_decode_com(s); + } + switch(start_code) { case SOI: s->restart_interval = 0; /* nothing to do on SOI */ break; case DQT: - mjpeg_decode_dqt(s, s->buffer, input_size); + mjpeg_decode_dqt(s); break; case DHT: - mjpeg_decode_dht(s, s->buffer, input_size); + mjpeg_decode_dht(s); break; case SOF0: - mjpeg_decode_sof0(s, s->buffer, input_size); + mjpeg_decode_sof0(s); break; - case SOS: - mjpeg_decode_sos(s, s->buffer, input_size); - if (s->start_code == EOI || s->buggy_avid || s->restart_interval) { + case EOI: +eoi_parser: + { int l; if (s->interlaced) { s->bottom_field ^= 1; @@ -1297,10 +1334,15 @@ static int mjpeg_decode_frame(AVCodecContext *avctx, } for(i=0;i<3;i++) { picture->data[i] = s->current_picture[i]; +#if 1 l = s->linesize[i]; if (s->interlaced) l >>= 1; picture->linesize[i] = l; +#else + picture->linesize[i] = (s->interlaced) ? + s->linesize[i] >> 1 : s->linesize[i]; +#endif } *data_size = sizeof(AVPicture); avctx->height = s->height; @@ -1325,9 +1367,16 @@ static int mjpeg_decode_frame(AVCodecContext *avctx, avctx->quality = 3; goto the_end; } + break; + case SOS: + mjpeg_decode_sos(s); + /* buggy avid puts EOI every 10-20th frame */ + /* if restart period is over process EOI */ + if ((s->buggy_avid && !s->interlaced) || s->restart_interval) + goto eoi_parser; break; case DRI: - mjpeg_decode_dri(s, s->buffer, input_size); + mjpeg_decode_dri(s); break; case SOF1: case SOF2: @@ -1343,26 +1392,24 @@ static int mjpeg_decode_frame(AVCodecContext *avctx, case SOF15: case JPG: printf("mjpeg: unsupported coding type (%x)\n", start_code); - return -1; + break; +// default: +// printf("mjpeg: unsupported marker (%x)\n", start_code); +// break; } -#if 1 - if (start_code >= 0xd0 && start_code <= 0xd7) { - dprintf("restart marker: %d\n", start_code&0x0f); - } else if (s->first_picture) { - /* APP fields */ - if (start_code >= 0xe0 && start_code <= 0xef) - mjpeg_decode_app(s, s->buffer, input_size, start_code); - /* Comment */ - else if (start_code == COM) - mjpeg_decode_com(s, s->buffer, input_size); - } -#endif + +not_the_end: + /* eof process start code */ + buf_ptr += (get_bits_count(&s->gb)+7)/8; + dprintf("marker parser used %d bytes (%d bits)\n", + (get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb)); } } - not_the_end: - ; } - the_end: +the_end: + + dprintf("mjpeg decode frame unused %d bytes\n", buf_end - buf_ptr); +// return buf_end - buf_ptr; return buf_ptr - buf; } @@ -1371,6 +1418,7 @@ static int mjpeg_decode_end(AVCodecContext *avctx) MJpegDecodeContext *s = avctx->priv_data; int i, j; + av_free(s->buffer); for(i=0;icurrent_picture[i]); for(i=0;i<2;i++) { diff --git a/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c b/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c index c66653e83..445500c50 100644 --- a/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c +++ b/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c @@ -204,7 +204,6 @@ static void add_pixels_clamped_mlib(const DCTELEM *block, UINT8 *pixels, int lin } -#ifdef __notyet__ /* we'll probably need this after the next ffmeg sync */ /* XXX: those functions should be suppressed ASAP when all IDCTs are converted */ static void ff_idct_put_mlib(UINT8 *dest, int line_size, DCTELEM *data) @@ -218,12 +217,6 @@ static void ff_idct_add_mlib(UINT8 *dest, int line_size, DCTELEM *data) mlib_VideoIDCT8x8_S16_S16 (data, data); add_pixels_clamped(data, dest, line_size); } -#else -static void ff_idct_mlib(DCTELEM *data) -{ - mlib_VideoIDCT8x8_S16_S16 (data, data); -} -#endif static void ff_fdct_mlib(DCTELEM *data) { @@ -232,8 +225,6 @@ static void ff_fdct_mlib(DCTELEM *data) void dsputil_init_mlib(void) { - ff_idct = ff_idct_mlib; - put_pixels_tab[0][0] = put_pixels16_mlib; put_pixels_tab[0][1] = put_pixels16_x2_mlib; put_pixels_tab[0][2] = put_pixels16_y2_mlib; @@ -260,16 +251,15 @@ void dsputil_init_mlib(void) void MPV_common_init_mlib(MpegEncContext *s) { + int i; + if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){ s->fdct = ff_fdct_mlib; } -#ifdef __notyet__ /* we'll probably need this after the next ffmeg sync */ + if(s->avctx->idct_algo==FF_IDCT_AUTO || s->avctx->idct_algo==FF_IDCT_MLIB){ - int i; - s->idct_put = ff_idct_put_mlib; - s->idct_add = ff_idct_add_mlib; - for(i=0; i<64; i++) - s->idct_permutation[i] = i; + s->idct_put= ff_idct_put_mlib; + s->idct_add= ff_idct_add_mlib; + s->idct_permutation_type= FF_NO_IDCT_PERM; } -#endif } diff --git a/src/libffmpeg/libavcodec/motion_est.c b/src/libffmpeg/libavcodec/motion_est.c index 032556a6d..f8064b126 100644 --- a/src/libffmpeg/libavcodec/motion_est.c +++ b/src/libffmpeg/libavcodec/motion_est.c @@ -1142,6 +1142,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); s->mb_var [s->mb_width * mb_y + mb_x] = varc; s->mc_mb_var[s->mb_width * mb_y + mb_x] = vard; + s->mb_mean [s->mb_width * mb_y + mb_x] = (sum+7)>>4; s->mb_var_sum += varc; s->mc_mb_var_sum += vard; //printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); @@ -1335,7 +1336,13 @@ static inline int check_bidir_mv(MpegEncContext * s, dxy = ((motion_fy & 1) << 1) | (motion_fx & 1); src_x = mb_x * 16 + (motion_fx >> 1); src_y = mb_y * 16 + (motion_fy >> 1); - + src_x = clip(src_x, -16, s->width); + if (src_x == s->width) + dxy&= 2; + src_y = clip(src_y, -16, s->height); + if (src_y == s->height) + dxy&= 1; + ptr = s->last_picture[0] + (src_y * s->linesize) + src_x; put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); @@ -1344,6 +1351,12 @@ static inline int check_bidir_mv(MpegEncContext * s, dxy = ((motion_by & 1) << 1) | (motion_bx & 1); src_x = mb_x * 16 + (motion_bx >> 1); src_y = mb_y * 16 + (motion_by >> 1); + src_x = clip(src_x, -16, s->width); + if (src_x == s->width) + dxy&= 2; + src_y = clip(src_y, -16, s->height); + if (src_y == s->height) + dxy&= 1; ptr = s->next_picture[0] + (src_y * s->linesize) + src_x; avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); @@ -1545,7 +1558,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s, score=fbmin; type= MB_TYPE_BIDIR; } - score= (score*score)>>8; + score= (score*score + 128*256)>>16; s->mc_mb_var_sum += score; s->mc_mb_var[mb_y*s->mb_width + mb_x] = score; //FIXME use SSD } @@ -1697,18 +1710,15 @@ void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, i int xy= (y+1)* (s->mb_width+2)+1; int i= y*s->mb_width; for(x=0; xmb_width; x++){ - if(s->mb_type[i]&type){ - if( fcode_tab[mv_table[xy][0] + MAX_MV] > f_code - || fcode_tab[mv_table[xy][0] + MAX_MV] == 0 - || fcode_tab[mv_table[xy][1] + MAX_MV] > f_code - || fcode_tab[mv_table[xy][1] + MAX_MV] == 0 ){ - if(s->mb_type[i]&(~type)) s->mb_type[i] &= ~type; - else{ - mv_table[xy][0] = 0; - mv_table[xy][1] = 0; - //this is certainly bad FIXME - } - } + if( fcode_tab[mv_table[xy][0] + MAX_MV] > f_code + || fcode_tab[mv_table[xy][0] + MAX_MV] == 0){ + if(mv_table[xy][0]>0) mv_table[xy][0]= (16< f_code + || fcode_tab[mv_table[xy][1] + MAX_MV] == 0){ + if(mv_table[xy][1]>0) mv_table[xy][1]= (16<pb); + ret= bits - s->last_bits; + s->last_bits=bits; + + return ret; +} + static void init_2d_vlc_rl(RLTable *rl) { int i; @@ -86,8 +105,8 @@ static void init_2d_vlc_rl(RLTable *rl) run= 65; level= 0; }else if(code==rl->n+1){ //eob - run= 192; - level= 1; + run= 0; + level= 127; }else{ run= rl->table_run [code] + 1; level= rl->table_level[code]; @@ -289,6 +308,10 @@ void mpeg1_encode_mb(MpegEncContext *s, (!((mb_x | mb_y) == 0 || (mb_x == s->mb_width - 1 && mb_y == s->mb_height - 1)))) { s->mb_incr++; + s->qscale -= s->dquant; + s->skip_count++; + s->misc_bits++; + s->last_bits++; } else { /* output mb incr */ mb_incr = s->mb_incr; @@ -301,26 +324,58 @@ void mpeg1_encode_mb(MpegEncContext *s, mbAddrIncrTable[mb_incr - 1][0]); if (s->pict_type == I_TYPE) { - put_bits(&s->pb, 1, 1); /* macroblock_type : macroblock_quant = 0 */ + if(s->dquant && cbp){ + put_bits(&s->pb, 2, 1); /* macroblock_type : macroblock_quant = 1 */ + put_bits(&s->pb, 5, s->qscale); + }else{ + put_bits(&s->pb, 1, 1); /* macroblock_type : macroblock_quant = 0 */ + s->qscale -= s->dquant; + } + s->misc_bits+= get_bits_diff(s); + s->i_count++; } else { if (s->mb_intra) { - put_bits(&s->pb, 5, 0x03); + if(s->dquant && cbp){ + put_bits(&s->pb, 6, 0x01); + put_bits(&s->pb, 5, s->qscale); + }else{ + put_bits(&s->pb, 5, 0x03); + s->qscale -= s->dquant; + } + s->misc_bits+= get_bits_diff(s); + s->i_count++; } else { if (cbp != 0) { if (motion_x == 0 && motion_y == 0) { - put_bits(&s->pb, 2, 1); /* macroblock_pattern only */ + if(s->dquant){ + put_bits(&s->pb, 5, 1); /* macroblock_pattern & quant */ + put_bits(&s->pb, 5, s->qscale); + }else{ + put_bits(&s->pb, 2, 1); /* macroblock_pattern only */ + } + s->misc_bits+= get_bits_diff(s); put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); } else { - put_bits(&s->pb, 1, 1); /* motion + cbp */ + if(s->dquant){ + put_bits(&s->pb, 5, 2); /* motion + cbp */ + put_bits(&s->pb, 5, s->qscale); + }else{ + put_bits(&s->pb, 1, 1); /* motion + cbp */ + } + s->misc_bits+= get_bits_diff(s); mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0]); mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1]); + s->mv_bits+= get_bits_diff(s); put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); } } else { put_bits(&s->pb, 3, 1); /* motion only */ mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0]); mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1]); + s->qscale -= s->dquant; + s->mv_bits+= get_bits_diff(s); } + s->f_count++; } } for(i=0;i<6;i++) { @@ -329,6 +384,10 @@ void mpeg1_encode_mb(MpegEncContext *s, } } s->mb_incr = 1; + if(s->mb_intra) + s->i_tex_bits+= get_bits_diff(s); + else + s->p_tex_bits+= get_bits_diff(s); } s->last_mv[0][0][0] = motion_x; s->last_mv[0][0][1] = motion_y; @@ -512,7 +571,7 @@ static void mpeg1_encode_block(MpegEncContext *s, last_non_zero = i - 1; for(;i<=last_index;i++) { - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; level = block[j]; next_coef: #if 0 @@ -522,26 +581,11 @@ static void mpeg1_encode_block(MpegEncContext *s, /* encode using VLC */ if (level != 0) { run = i - last_non_zero - 1; -#ifdef ARCH_X86 - asm volatile( - "movl %2, %1 \n\t" - "movl %1, %0 \n\t" - "addl %1, %1 \n\t" - "sbbl %1, %1 \n\t" - "xorl %1, %0 \n\t" - "subl %1, %0 \n\t" - "andl $1, %1 \n\t" - : "=&r" (alevel), "=&r" (sign) - : "g" (level) - ); -#else - sign = 0; - alevel = level; - if (alevel < 0) { - sign = 1; - alevel = -alevel; - } -#endif + + alevel= level; + MASK_ABS(sign, alevel) + sign&=1; + // code = get_rl_index(rl, 0, run, alevel); if (alevel > mpeg1_max_level[0][run]) code= 111; /*rl->n*/ @@ -654,41 +698,8 @@ static inline int get_qscale(MpegEncContext *s) static int mpeg_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) { - int i, j, k, cbp, val, code, mb_type, motion_type; + int i, j, k, cbp, val, mb_type, motion_type; - /* skip mb handling */ - if (s->mb_incr == 0) { - /* read again increment */ - s->mb_incr = 1; - for(;;) { - code = get_vlc2(&s->gb, mbincr_vlc.table, MBINCR_VLC_BITS, 2); - if (code < 0) - return 1; /* error = end of slice */ - if (code >= 33) { - if (code == 33) { - s->mb_incr += 33; - } - /* otherwise, stuffing, nothing to do */ - } else { - s->mb_incr += code; - break; - } - } - } - if(s->mb_x==-1 /* first MB in a slice */ && s->mb_incr>1){ - s->mb_x+= (s->mb_incr - 1) % s->mb_width; - s->mb_y+= (s->mb_incr - 1) / s->mb_width; - s->mb_incr= 1; - } - - if (++s->mb_x >= s->mb_width) { - s->mb_x = 0; - if (s->mb_y >= (s->mb_height - 1)){ - fprintf(stderr, "slice too long\n"); - return -1; - } - s->mb_y++; - } dprintf("decode_mb: x=%d y=%d\n", s->mb_x, s->mb_y); if (--s->mb_incr != 0) { @@ -917,30 +928,35 @@ static int mpeg_decode_mb(MpegEncContext *s, if (s->mpeg2) { if (s->mb_intra) { for(i=0;i<6;i++) { - if (cbp & (1 << (5 - i))) { - if (mpeg2_decode_block_intra(s, block[i], i) < 0) - return -1; - } else { - s->block_last_index[i] = -1; - } + if (mpeg2_decode_block_intra(s, block[i], i) < 0) + return -1; } } else { for(i=0;i<6;i++) { - if (cbp & (1 << (5 - i))) { + if (cbp & 32) { if (mpeg2_decode_block_non_intra(s, block[i], i) < 0) return -1; } else { s->block_last_index[i] = -1; } + cbp+=cbp; } } } else { - for(i=0;i<6;i++) { - if (cbp & (1 << (5 - i))) { - if (mpeg1_decode_block(s, block[i], i) < 0) + if (s->mb_intra) { + for(i=0;i<6;i++) { + if (mpeg1_decode_block_intra(s, block[i], i) < 0) return -1; - } else { - s->block_last_index[i] = -1; + } + }else{ + for(i=0;i<6;i++) { + if (cbp & 32) { + if (mpeg1_decode_block_inter(s, block[i], i) < 0) + return -1; + } else { + s->block_last_index[i] = -1; + } + cbp+=cbp; } } } @@ -1003,185 +1019,256 @@ static inline int decode_dc(MpegEncContext *s, int component) return diff; } -static int mpeg1_decode_block(MpegEncContext *s, +static inline int mpeg1_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n) { int level, dc, diff, i, j, run; - int code, component; + int component; RLTable *rl = &rl_mpeg1; + UINT8 * const scantable= s->intra_scantable.permutated; + const UINT16 *quant_matrix= s->intra_matrix; + const int qscale= s->qscale; - if (s->mb_intra) { - /* DC coef */ - component = (n <= 3 ? 0 : n - 4 + 1); - diff = decode_dc(s, component); - if (diff >= 0xffff) - return -1; - dc = s->last_dc[component]; - dc += diff; - s->last_dc[component] = dc; - block[0] = dc; - dprintf("dc=%d diff=%d\n", dc, diff); - i = 1; - } else { + /* DC coef */ + component = (n <= 3 ? 0 : n - 4 + 1); + diff = decode_dc(s, component); + if (diff >= 0xffff) + return -1; + dc = s->last_dc[component]; + dc += diff; + s->last_dc[component] = dc; + block[0] = dc<<3; + dprintf("dc=%d diff=%d\n", dc, diff); + i = 0; + { + OPEN_READER(re, &s->gb); + /* now quantify & encode AC coefs */ + for(;;) { + UPDATE_CACHE(re, &s->gb); + GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2); + + if(level == 127){ + break; + } else if(level != 0) { + i += run; + j = scantable[i]; + level= (level*qscale*quant_matrix[j])>>3; + level= (level-1)|1; + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } else { + /* escape */ + run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6); + UPDATE_CACHE(re, &s->gb); + level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8); + if (level == -128) { + level = SHOW_UBITS(re, &s->gb, 8) - 256; LAST_SKIP_BITS(re, &s->gb, 8); + } else if (level == 0) { + level = SHOW_UBITS(re, &s->gb, 8) ; LAST_SKIP_BITS(re, &s->gb, 8); + } + i += run; + j = scantable[i]; + if(level<0){ + level= -level; + level= (level*qscale*quant_matrix[j])>>3; + level= (level-1)|1; + level= -level; + }else{ + level= (level*qscale*quant_matrix[j])>>3; + level= (level-1)|1; + } + } + if (i > 63){ + fprintf(stderr, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + block[j] = level; + } + CLOSE_READER(re, &s->gb); + } + s->block_last_index[n] = i; + return 0; +} + +static inline int mpeg1_decode_block_inter(MpegEncContext *s, + DCTELEM *block, + int n) +{ + int level, i, j, run; + RLTable *rl = &rl_mpeg1; + UINT8 * const scantable= s->intra_scantable.permutated; + const UINT16 *quant_matrix= s->inter_matrix; + const int qscale= s->qscale; + + { int v; OPEN_READER(re, &s->gb); - i = 0; + i = -1; /* special case for the first coef. no need to add a second vlc table */ UPDATE_CACHE(re, &s->gb); v= SHOW_UBITS(re, &s->gb, 2); if (v & 2) { - run = 0; - level = 1 - ((v & 1) << 1); - SKIP_BITS(re, &s->gb, 2); - CLOSE_READER(re, &s->gb); - goto add_coef; + LAST_SKIP_BITS(re, &s->gb, 2); + level= (3*qscale*quant_matrix[0])>>4; + level= (level-1)|1; + if(v&1) + level= -level; + block[0] = level; + i++; } - CLOSE_READER(re, &s->gb); - } - /* now quantify & encode AC coefs */ - for(;;) { - code = get_vlc2(&s->gb, rl->vlc.table, TEX_VLC_BITS, 2); - if (code < 0) { - return -1; - } - if (code == 112) { - break; - } else if (code == 111) { - /* escape */ - run = get_bits(&s->gb, 6); - level = get_bits(&s->gb, 8); - level= (level + ((-1)<<7)) ^ ((-1)<<7); //sign extension - if (level == -128) { - level = get_bits(&s->gb, 8) - 256; - } else if (level == 0) { - level = get_bits(&s->gb, 8); + /* now quantify & encode AC coefs */ + for(;;) { + UPDATE_CACHE(re, &s->gb); + GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2); + + if(level == 127){ + break; + } else if(level != 0) { + i += run; + j = scantable[i]; + level= ((level*2+1)*qscale*quant_matrix[j])>>4; + level= (level-1)|1; + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } else { + /* escape */ + run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6); + UPDATE_CACHE(re, &s->gb); + level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8); + if (level == -128) { + level = SHOW_UBITS(re, &s->gb, 8) - 256; LAST_SKIP_BITS(re, &s->gb, 8); + } else if (level == 0) { + level = SHOW_UBITS(re, &s->gb, 8) ; LAST_SKIP_BITS(re, &s->gb, 8); + } + i += run; + j = scantable[i]; + if(level<0){ + level= -level; + level= ((level*2+1)*qscale*quant_matrix[j])>>4; + level= (level-1)|1; + level= -level; + }else{ + level= ((level*2+1)*qscale*quant_matrix[j])>>4; + level= (level-1)|1; + } } - } else { - run = rl->table_run[code]; - level = rl->table_level[code]; - if (get_bits1(&s->gb)) - level = -level; + if (i > 63){ + fprintf(stderr, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + block[j] = level; } - i += run; - if (i >= 64) - return -1; - add_coef: - dprintf("%d: run=%d level=%d\n", n, run, level); - j = zigzag_direct[i]; - block[j] = level; - i++; + CLOSE_READER(re, &s->gb); } - s->block_last_index[n] = i-1; + s->block_last_index[n] = i; return 0; } /* Also does unquantization here, since I will never support mpeg2 encoding */ -static int mpeg2_decode_block_non_intra(MpegEncContext *s, - DCTELEM *block, - int n) +static inline int mpeg2_decode_block_non_intra(MpegEncContext *s, + DCTELEM *block, + int n) { int level, i, j, run; - int code; RLTable *rl = &rl_mpeg1; - const UINT8 *scan_table; - const UINT16 *matrix; + UINT8 * const scantable= s->intra_scantable.permutated; + const UINT16 *quant_matrix; + const int qscale= s->qscale; int mismatch; - if (s->alternate_scan) - scan_table = ff_alternate_vertical_scan; - else - scan_table = zigzag_direct; mismatch = 1; { int v; OPEN_READER(re, &s->gb); - i = 0; + i = -1; if (n < 4) - matrix = s->inter_matrix; + quant_matrix = s->inter_matrix; else - matrix = s->chroma_inter_matrix; + quant_matrix = s->chroma_inter_matrix; /* special case for the first coef. no need to add a second vlc table */ UPDATE_CACHE(re, &s->gb); v= SHOW_UBITS(re, &s->gb, 2); if (v & 2) { - run = 0; - level = 1 - ((v & 1) << 1); - SKIP_BITS(re, &s->gb, 2); - CLOSE_READER(re, &s->gb); - goto add_coef; + LAST_SKIP_BITS(re, &s->gb, 2); + level= (3*qscale*quant_matrix[0])>>5; + if(v&1) + level= -level; + block[0] = level; + mismatch ^= level; + i++; } - CLOSE_READER(re, &s->gb); - } - /* now quantify & encode AC coefs */ - for(;;) { - code = get_vlc2(&s->gb, rl->vlc.table, TEX_VLC_BITS, 2); - if (code < 0){ - fprintf(stderr, "invalid ac code at %d %d\n", s->mb_x, s->mb_y); - return -1; - } - if (code == 112) { - break; - } else if (code == 111) { - /* escape */ - run = get_bits(&s->gb, 6); - level = get_bits(&s->gb, 12); - level= (level + ((-1)<<11)) ^ ((-1)<<11); //sign extension - } else { - run = rl->table_run[code]; - level = rl->table_level[code]; - if (get_bits1(&s->gb)) - level = -level; - } - i += run; - if (i >= 64){ - fprintf(stderr, "run too long at %d %d\n", s->mb_x, s->mb_y); - return -1; - } - add_coef: - j = scan_table[i]; - dprintf("%d: run=%d level=%d\n", n, run, level); - /* XXX: optimize */ - if (level > 0) { - level = ((level * 2 + 1) * s->qscale * matrix[j]) >> 5; - } else { - level = ((-level * 2 + 1) * s->qscale * matrix[j]) >> 5; - level = -level; + /* now quantify & encode AC coefs */ + for(;;) { + UPDATE_CACHE(re, &s->gb); + GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2); + + if(level == 127){ + break; + } else if(level != 0) { + i += run; + j = scantable[i]; + level= ((level*2+1)*qscale*quant_matrix[j])>>5; + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } else { + /* escape */ + run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6); + UPDATE_CACHE(re, &s->gb); + level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12); + + i += run; + j = scantable[i]; + if(level<0){ + level= ((-level*2+1)*qscale*quant_matrix[j])>>5; + level= -level; + }else{ + level= ((level*2+1)*qscale*quant_matrix[j])>>5; + } + } + if (i > 63){ + fprintf(stderr, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + mismatch ^= level; + block[j] = level; } - /* XXX: is it really necessary to saturate since the encoder - knows whats going on ? */ - mismatch ^= level; - block[j] = level; - i++; + CLOSE_READER(re, &s->gb); } block[63] ^= (mismatch & 1); + s->block_last_index[n] = i; return 0; } -static int mpeg2_decode_block_intra(MpegEncContext *s, - DCTELEM *block, - int n) +static inline int mpeg2_decode_block_intra(MpegEncContext *s, + DCTELEM *block, + int n) { int level, dc, diff, i, j, run; - int code, component; + int component; RLTable *rl; - const UINT8 *scan_table; - const UINT16 *matrix; + UINT8 * const scantable= s->intra_scantable.permutated; + const UINT16 *quant_matrix; + const int qscale= s->qscale; int mismatch; - if (s->alternate_scan) - scan_table = ff_alternate_vertical_scan; - else - scan_table = zigzag_direct; - /* DC coef */ - component = (n <= 3 ? 0 : n - 4 + 1); + if (n < 4){ + quant_matrix = s->intra_matrix; + component = 0; + }else{ + quant_matrix = s->chroma_intra_matrix; + component = n - 3; + } diff = decode_dc(s, component); if (diff >= 0xffff) return -1; @@ -1191,51 +1278,53 @@ static int mpeg2_decode_block_intra(MpegEncContext *s, block[0] = dc << (3 - s->intra_dc_precision); dprintf("dc=%d\n", block[0]); mismatch = block[0] ^ 1; - i = 1; + i = 0; if (s->intra_vlc_format) rl = &rl_mpeg2; else rl = &rl_mpeg1; - if (n < 4) - matrix = s->intra_matrix; - else - matrix = s->chroma_intra_matrix; - /* now quantify & encode AC coefs */ - for(;;) { - code = get_vlc2(&s->gb, rl->vlc.table, TEX_VLC_BITS, 2); - if (code < 0){ - fprintf(stderr, "invalid ac code at %d %d\n", s->mb_x, s->mb_y); - return -1; - } - if (code == 112) { - break; - } else if (code == 111) { - /* escape */ - run = get_bits(&s->gb, 6); - level = get_bits(&s->gb, 12); - level= (level + ((-1)<<11)) ^ ((-1)<<11); //sign extension - } else { - run = rl->table_run[code]; - level = rl->table_level[code]; - if (get_bits1(&s->gb)) - level = -level; - } - i += run; - if (i >= 64){ - fprintf(stderr, "run too long at %d %d\n", s->mb_x, s->mb_y); - return -1; + { + OPEN_READER(re, &s->gb); + /* now quantify & encode AC coefs */ + for(;;) { + UPDATE_CACHE(re, &s->gb); + GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2); + + if(level == 127){ + break; + } else if(level != 0) { + i += run; + j = scantable[i]; + level= (level*qscale*quant_matrix[j])>>4; + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } else { + /* escape */ + run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6); + UPDATE_CACHE(re, &s->gb); + level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12); + i += run; + j = scantable[i]; + if(level<0){ + level= (-level*qscale*quant_matrix[j])>>4; + level= -level; + }else{ + level= (level*qscale*quant_matrix[j])>>4; + } + } + if (i > 63){ + fprintf(stderr, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + mismatch^= level; + block[j] = level; } - j = scan_table[i]; - dprintf("%d: run=%d level=%d\n", n, run, level); - level = (level * s->qscale * matrix[j]) / 16; - /* XXX: is it really necessary to saturate since the encoder - knows whats going on ? */ - mismatch ^= level; - block[j] = level; - i++; + CLOSE_READER(re, &s->gb); } - block[63] ^= (mismatch & 1); + block[63]^= mismatch&1; + s->block_last_index[n] = i; return 0; } @@ -1372,7 +1461,7 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s) if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j = zigzag_direct[i]; + j= s->idct_permutation[ ff_zigzag_direct[i] ]; s->intra_matrix[j] = v; s->chroma_intra_matrix[j] = v; } @@ -1380,7 +1469,7 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s) if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j = zigzag_direct[i]; + j= s->idct_permutation[ ff_zigzag_direct[i] ]; s->inter_matrix[j] = v; s->chroma_inter_matrix[j] = v; } @@ -1388,14 +1477,14 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s) if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j = zigzag_direct[i]; + j= s->idct_permutation[ ff_zigzag_direct[i] ]; s->chroma_intra_matrix[j] = v; } } if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j = zigzag_direct[i]; + j= s->idct_permutation[ ff_zigzag_direct[i] ]; s->chroma_inter_matrix[j] = v; } } @@ -1419,6 +1508,19 @@ static void mpeg_decode_picture_coding_extension(MpegEncContext *s) s->repeat_first_field = get_bits1(&s->gb); s->chroma_420_type = get_bits1(&s->gb); s->progressive_frame = get_bits1(&s->gb); + + if(s->alternate_scan){ + ff_init_scantable(s, &s->inter_scantable , ff_alternate_vertical_scan); + ff_init_scantable(s, &s->intra_scantable , ff_alternate_vertical_scan); + ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_vertical_scan); + ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan); + }else{ + ff_init_scantable(s, &s->inter_scantable , ff_zigzag_direct); + ff_init_scantable(s, &s->intra_scantable , ff_zigzag_direct); + ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_horizontal_scan); + ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan); + } + /* composite display not parsed */ dprintf("intra_dc_precision=%d\n", s->intra_dc_precision); dprintf("picture_structure=%d\n", s->picture_structure); @@ -1476,13 +1578,11 @@ static int mpeg_decode_slice(AVCodecContext *avctx, s->last_dc[1] = s->last_dc[0]; s->last_dc[2] = s->last_dc[0]; memset(s->last_mv, 0, sizeof(s->last_mv)); - s->mb_x = -1; - s->mb_y = start_code; - s->mb_incr = 0; /* start frame decoding */ if (s->first_slice) { s->first_slice = 0; - MPV_frame_start(s, avctx); + if(MPV_frame_start(s, avctx) < 0) + return -1; } init_get_bits(&s->gb, buf, buf_size); @@ -1493,22 +1593,98 @@ static int mpeg_decode_slice(AVCodecContext *avctx, skip_bits(&s->gb, 8); } + s->mb_x=0; + for(;;) { + int code = get_vlc2(&s->gb, mbincr_vlc.table, MBINCR_VLC_BITS, 2); + if (code < 0) + return -1; /* error = end of slice, but empty slice is bad or?*/ + if (code >= 33) { + if (code == 33) { + s->mb_x += 33; + } + /* otherwise, stuffing, nothing to do */ + } else { + s->mb_x += code; + break; + } + } + s->mb_y = start_code; + s->mb_incr= 1; + for(;;) { clear_blocks(s->block[0]); emms_c(); + ret = mpeg_decode_mb(s, s->block); dprintf("ret=%d\n", ret); if (ret < 0) return -1; - if (ret == 1) - break; + MPV_decode_mb(s, s->block); + + if (++s->mb_x >= s->mb_width) { + if ( avctx->draw_horiz_band + && (s->num_available_buffers>=1 || (!s->has_b_frames)) ) { + UINT8 *src_ptr[3]; + int y, h, offset; + y = s->mb_y * 16; + h = s->height - y; + if (h > 16) + h = 16; + if(s->pict_type==B_TYPE) + offset = 0; + else + offset = y * s->linesize; + if(s->pict_type==B_TYPE || (!s->has_b_frames)){ + src_ptr[0] = s->current_picture[0] + offset; + src_ptr[1] = s->current_picture[1] + (offset >> 2); + src_ptr[2] = s->current_picture[2] + (offset >> 2); + } else { + src_ptr[0] = s->last_picture[0] + offset; + src_ptr[1] = s->last_picture[1] + (offset >> 2); + src_ptr[2] = s->last_picture[2] + (offset >> 2); + } + avctx->draw_horiz_band(avctx, src_ptr, s->linesize, + y, s->width, h); + } + + s->mb_x = 0; + s->mb_y++; + PRINT_QP("%s", "\n"); + } + PRINT_QP("%2d", s->qscale); + + /* skip mb handling */ + if (s->mb_incr == 0) { + /* read again increment */ + s->mb_incr = 1; + for(;;) { + int code = get_vlc2(&s->gb, mbincr_vlc.table, MBINCR_VLC_BITS, 2); + if (code < 0) + goto eos; /* error = end of slice */ + if (code >= 33) { + if (code == 33) { + s->mb_incr += 33; + } + /* otherwise, stuffing, nothing to do */ + } else { + s->mb_incr += code; + break; + } + } + } + if(s->mb_y >= s->mb_height){ + fprintf(stderr, "slice too long\n"); + return -1; + } } +eos: //end of slice + emms_c(); /* end of slice reached */ - if (s->mb_x == (s->mb_width - 1) && - s->mb_y == (s->mb_height - 1)) { + if (/*s->mb_x == 0 &&*/ + s->mb_y == s->mb_height) { /* end of image */ UINT8 **picture; @@ -1601,41 +1777,43 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; s->intra_matrix[j] = v; s->chroma_intra_matrix[j] = v; } #ifdef DEBUG dprintf("intra matrix present\n"); for(i=0;i<64;i++) - dprintf(" %d", s->intra_matrix[zigzag_direct[i]]); + dprintf(" %d", s->intra_matrix[s->intra_scantable.permutated[i]]); printf("\n"); #endif } else { for(i=0;i<64;i++) { + int j= s->idct_permutation[i]; v = ff_mpeg1_default_intra_matrix[i]; - s->intra_matrix[i] = v; - s->chroma_intra_matrix[i] = v; + s->intra_matrix[j] = v; + s->chroma_intra_matrix[j] = v; } } if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; s->inter_matrix[j] = v; s->chroma_inter_matrix[j] = v; } #ifdef DEBUG dprintf("non intra matrix present\n"); for(i=0;i<64;i++) - dprintf(" %d", s->inter_matrix[zigzag_direct[i]]); + dprintf(" %d", s->inter_matrix[s->intra_scantable.permutated[i]]); printf("\n"); #endif } else { for(i=0;i<64;i++) { + int j= s->idct_permutation[i]; v = ff_mpeg1_default_non_intra_matrix[i]; - s->inter_matrix[i] = v; - s->chroma_inter_matrix[i] = v; + s->inter_matrix[j] = v; + s->chroma_inter_matrix[j] = v; } } @@ -1805,5 +1983,5 @@ AVCodec mpeg_decoder = { NULL, mpeg_decode_end, mpeg_decode_frame, - CODEC_CAP_DR1, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, }; diff --git a/src/libffmpeg/libavcodec/mpeg12data.h b/src/libffmpeg/libavcodec/mpeg12data.h index 041708fd1..a1a7166be 100644 --- a/src/libffmpeg/libavcodec/mpeg12data.h +++ b/src/libffmpeg/libavcodec/mpeg12data.h @@ -2,7 +2,7 @@ * MPEG1/2 tables */ -INT16 ff_mpeg1_default_intra_matrix[64] = { +const INT16 ff_mpeg1_default_intra_matrix[64] = { 8, 16, 19, 22, 26, 27, 29, 34, 16, 16, 22, 24, 27, 29, 34, 37, 19, 22, 26, 27, 29, 34, 34, 38, @@ -13,7 +13,7 @@ INT16 ff_mpeg1_default_intra_matrix[64] = { 27, 29, 35, 38, 46, 56, 69, 83 }; -INT16 ff_mpeg1_default_non_intra_matrix[64] = { +const INT16 ff_mpeg1_default_non_intra_matrix[64] = { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, diff --git a/src/libffmpeg/libavcodec/mpeg4data.h b/src/libffmpeg/libavcodec/mpeg4data.h index 88101a544..ef612c2fa 100644 --- a/src/libffmpeg/libavcodec/mpeg4data.h +++ b/src/libffmpeg/libavcodec/mpeg4data.h @@ -135,7 +135,7 @@ static const UINT16 pixel_aspect[16][2]={ }; /* these matrixes will be permuted for the idct */ -INT16 ff_mpeg4_default_intra_matrix[64] = { +const INT16 ff_mpeg4_default_intra_matrix[64] = { 8, 17, 18, 19, 21, 23, 25, 27, 17, 18, 19, 21, 23, 25, 27, 28, 20, 21, 22, 23, 24, 26, 28, 30, @@ -146,7 +146,7 @@ INT16 ff_mpeg4_default_intra_matrix[64] = { 27, 28, 30, 32, 35, 38, 41, 45, }; -INT16 ff_mpeg4_default_non_intra_matrix[64] = { +const INT16 ff_mpeg4_default_non_intra_matrix[64] = { 16, 17, 18, 19, 20, 21, 22, 23, 17, 18, 19, 20, 21, 22, 23, 24, 18, 19, 20, 21, 22, 23, 24, 25, @@ -166,4 +166,6 @@ UINT8 ff_mpeg4_c_dc_scale_table[32]={ 0, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,20,21,22,23,24,25 }; - +const UINT16 ff_mpeg4_resync_prefix[8]={ + 0x7F00, 0x7E00, 0x7C00, 0x7800, 0x7000, 0x6000, 0x4000, 0x0000 +}; diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c index 53534736a..4761b6137 100644 --- a/src/libffmpeg/libavcodec/mpegvideo.c +++ b/src/libffmpeg/libavcodec/mpegvideo.c @@ -18,26 +18,32 @@ * * 4MV & hq & b-frame encoding stuff by Michael Niedermayer */ + +#include #include "avcodec.h" #include "dsputil.h" #include "mpegvideo.h" +#include "simple_idct.h" #ifdef USE_FASTMEMCPY #include "fastmemcpy.h" #endif +//#undef NDEBUG +//#include + static void encode_picture(MpegEncContext *s, int picture_number); -static void dct_unquantize_mpeg1_c(MpegEncContext *s, +static void dct_unquantize_mpeg1_c(MpegEncContext *s, DCTELEM *block, int n, int qscale); static void dct_unquantize_mpeg2_c(MpegEncContext *s, DCTELEM *block, int n, int qscale); -static void dct_unquantize_h263_c(MpegEncContext *s, +static void dct_unquantize_h263_c(MpegEncContext *s, DCTELEM *block, int n, int qscale); static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w); static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow); void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c; -static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, +static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, int src_x, int src_y, int w, int h); #define EDGE_WIDTH 16 @@ -63,6 +69,18 @@ static const unsigned short aanscales[64] = { 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 }; +/* Input permutation for the simple_idct_mmx */ +static const UINT8 simple_mmx_permutation[64]={ + 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, + 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, + 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, + 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, + 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, + 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, + 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, + 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, +}; + static UINT8 h263_chroma_roundtab[16] = { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, }; @@ -70,49 +88,48 @@ static UINT8 h263_chroma_roundtab[16] = { static UINT16 default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1]; static UINT8 default_fcode_tab[MAX_MV*2+1]; -extern UINT8 zigzag_end[64]; - /* default motion estimation */ int motion_estimation_method = ME_EPZS; static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64], - const UINT16 *quant_matrix, int bias) + const UINT16 *quant_matrix, int bias, int qmin, int qmax) { int qscale; - for(qscale=1; qscale<32; qscale++){ + for(qscale=qmin; qscale<=qmax; qscale++){ int i; if (s->fdct == ff_jpeg_fdct_islow) { for(i=0;i<64;i++) { - const int j= block_permute_op(i); + const int j= s->idct_permutation[i]; /* 16 <= qscale * quant_matrix[i] <= 7905 */ /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ - - qmat[qscale][j] = (int)((UINT64_C(1) << QMAT_SHIFT) / + + qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j])); } } else if (s->fdct == fdct_ifast) { for(i=0;i<64;i++) { - const int j= block_permute_op(i); + const int j= s->idct_permutation[i]; /* 16 <= qscale * quant_matrix[i] <= 7905 */ /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ - - qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / + + qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / (aanscales[i] * qscale * quant_matrix[j])); } } else { for(i=0;i<64;i++) { + const int j= s->idct_permutation[i]; /* We can safely suppose that 16 <= quant_matrix[i] <= 255 So 16 <= qscale * quant_matrix[i] <= 7905 so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905 so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67 */ qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]); - qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]); + qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]); if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1; qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]); @@ -120,7 +137,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 } } } -// move into common.c perhaps +// move into common.c perhaps #define CHECKED_ALLOCZ(p, size)\ {\ p= av_mallocz(size);\ @@ -130,11 +147,44 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 }\ } -/* init common structure for both encoder and decoder */ -int MPV_common_init(MpegEncContext *s) +void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable){ + int i; + int end; + + st->scantable= src_scantable; + + for(i=0; i<64; i++){ + int j; + j = src_scantable[i]; + st->permutated[i] = s->idct_permutation[j]; + } + + end=-1; + for(i=0; i<64; i++){ + int j; + j = st->permutated[i]; + if(j>end) end=j; + st->raster_end[i]= end; + } +} + +/* XXX: those functions should be suppressed ASAP when all IDCTs are + converted */ +static void ff_jref_idct_put(UINT8 *dest, int line_size, DCTELEM *block) { - int c_size, i; - UINT8 *pict; + j_rev_dct (block); + put_pixels_clamped(block, dest, line_size); +} +static void ff_jref_idct_add(UINT8 *dest, int line_size, DCTELEM *block) +{ + j_rev_dct (block); + add_pixels_clamped(block, dest, line_size); +} + +/* init common dct for both encoder and decoder */ +int DCT_common_init(MpegEncContext *s) +{ + int i; s->dct_unquantize_h263 = dct_unquantize_h263_c; s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c; @@ -144,69 +194,141 @@ int MPV_common_init(MpegEncContext *s) if(s->avctx->dct_algo==FF_DCT_FASTINT) s->fdct = fdct_ifast; else - s->fdct = ff_jpeg_fdct_islow; - + s->fdct = ff_jpeg_fdct_islow; //slow/accurate/default + + if(s->avctx->idct_algo==FF_IDCT_INT){ + s->idct_put= ff_jref_idct_put; + s->idct_add= ff_jref_idct_add; + s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + }else{ //accurate/default + s->idct_put= simple_idct_put; + s->idct_add= simple_idct_add; + s->idct_permutation_type= FF_NO_IDCT_PERM; + } + #ifdef HAVE_MMX MPV_common_init_mmx(s); #endif #ifdef ARCH_ALPHA MPV_common_init_axp(s); #endif -#if defined(HAVE_MLIB) && !defined(HAVE_MMX) +#ifdef HAVE_MLIB MPV_common_init_mlib(s); #endif +#ifdef HAVE_MMI + MPV_common_init_mmi(s); +#endif +#ifdef ARCH_ARMV4L + MPV_common_init_armv4l(); +#endif + + switch(s->idct_permutation_type){ + case FF_NO_IDCT_PERM: + for(i=0; i<64; i++) + s->idct_permutation[i]= i; + break; + case FF_LIBMPEG2_IDCT_PERM: + for(i=0; i<64; i++) + s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); + break; + case FF_SIMPLE_IDCT_PERM: + for(i=0; i<64; i++) + s->idct_permutation[i]= simple_mmx_permutation[i]; + break; + case FF_TRANSPOSE_IDCT_PERM: + for(i=0; i<64; i++) + s->idct_permutation[i]= ((i&7)<<3) | (i>>3); + break; + default: + fprintf(stderr, "Internal error, IDCT permutation not set\n"); + return -1; + } + + + /* load & permutate scantables + note: only wmv uses differnt ones + */ + ff_init_scantable(s, &s->inter_scantable , ff_zigzag_direct); + ff_init_scantable(s, &s->intra_scantable , ff_zigzag_direct); + ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_horizontal_scan); + ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan); + + return 0; +} + +/* init common structure for both encoder and decoder */ +int MPV_common_init(MpegEncContext *s) +{ + UINT8 *pict; + int y_size, c_size, yc_size, i; + + DCT_common_init(s); + + s->flags= s->avctx->flags; s->mb_width = (s->width + 15) / 16; s->mb_height = (s->height + 15) / 16; - + + y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2); + c_size = (s->mb_width + 2) * (s->mb_height + 2); + yc_size = y_size + 2 * c_size; + /* set default edge pos, will be overriden in decode_header if needed */ s->h_edge_pos= s->mb_width*16; s->v_edge_pos= s->mb_height*16; + + /* convert fourcc to upper case */ + s->avctx->fourcc= toupper( s->avctx->fourcc &0xFF) + + (toupper((s->avctx->fourcc>>8 )&0xFF)<<8 ) + + (toupper((s->avctx->fourcc>>16)&0xFF)<<16) + + (toupper((s->avctx->fourcc>>24)&0xFF)<<24); s->mb_num = s->mb_width * s->mb_height; + if(!(s->flags&CODEC_FLAG_DR1)){ s->linesize = s->mb_width * 16 + 2 * EDGE_WIDTH; s->uvlinesize = s->mb_width * 8 + EDGE_WIDTH; for(i=0;i<3;i++) { - int w, h, shift, pict_start; + int w, h, shift, pict_start, size; w = s->linesize; h = s->mb_height * 16 + 2 * EDGE_WIDTH; shift = (i == 0) ? 0 : 1; - c_size = (s->linesize>>shift) * (h >> shift); + size = (s->linesize>>shift) * (h >> shift); pict_start = (s->linesize>>shift) * (EDGE_WIDTH >> shift) + (EDGE_WIDTH >> shift); - CHECKED_ALLOCZ(pict, c_size) + CHECKED_ALLOCZ(pict, size) s->last_picture_base[i] = pict; s->last_picture[i] = pict + pict_start; - if(i>0) memset(s->last_picture_base[i], 128, c_size); - - CHECKED_ALLOCZ(pict, c_size) + if(i>0) memset(s->last_picture_base[i], 128, size); + + CHECKED_ALLOCZ(pict, size) s->next_picture_base[i] = pict; s->next_picture[i] = pict + pict_start; - if(i>0) memset(s->next_picture_base[i], 128, c_size); - + if(i>0) memset(s->next_picture_base[i], 128, size); + if (s->has_b_frames || s->codec_id==CODEC_ID_MPEG4) { - /* Note the MPEG4 stuff is here cuz of buggy encoders which dont set the low_delay flag but + /* Note the MPEG4 stuff is here cuz of buggy encoders which dont set the low_delay flag but do low-delay encoding, so we cant allways distinguish b-frame containing streams from low_delay streams */ - CHECKED_ALLOCZ(pict, c_size) + CHECKED_ALLOCZ(pict, size) s->aux_picture_base[i] = pict; s->aux_picture[i] = pict + pict_start; - if(i>0) memset(s->aux_picture_base[i], 128, c_size); + if(i>0) memset(s->aux_picture_base[i], 128, size); } } s->ip_buffer_count= 2; } - + CHECKED_ALLOCZ(s->edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance - + if (s->encoding) { int j; int mv_table_size= (s->mb_width+2)*(s->mb_height+2); - + CHECKED_ALLOCZ(s->mb_var , s->mb_num * sizeof(INT16)) CHECKED_ALLOCZ(s->mc_mb_var, s->mb_num * sizeof(INT16)) + CHECKED_ALLOCZ(s->mb_mean , s->mb_num * sizeof(INT8)) /* Allocate MV tables */ CHECKED_ALLOCZ(s->p_mv_table , mv_table_size * 2 * sizeof(INT16)) @@ -219,7 +341,7 @@ int MPV_common_init(MpegEncContext *s) CHECKED_ALLOCZ(s->b_direct_mv_table , mv_table_size * 2 * sizeof(INT16)) CHECKED_ALLOCZ(s->me_scratchpad, s->linesize*16*3*sizeof(uint8_t)) - + CHECKED_ALLOCZ(s->me_map , ME_MAP_SIZE*sizeof(uint32_t)) CHECKED_ALLOCZ(s->me_score_map, ME_MAP_SIZE*sizeof(uint16_t)) @@ -227,14 +349,14 @@ int MPV_common_init(MpegEncContext *s) for(j=0; jlinesize; h = s->mb_height * 16; shift = (i == 0) ? 0 : 1; - c_size = (w >> shift) * (h >> shift); + size = (w >> shift) * (h >> shift); - CHECKED_ALLOCZ(pict, c_size); + CHECKED_ALLOCZ(pict, size); s->picture_buffer[j][i] = pict; } } @@ -244,13 +366,15 @@ int MPV_common_init(MpegEncContext *s) CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE); CHECKED_ALLOCZ( s->pb2_buffer, PB_BUFFER_SIZE); } - + if(s->msmpeg4_version){ CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int)); } CHECKED_ALLOCZ(s->avctx->stats_out, 256); } - + + CHECKED_ALLOCZ(s->error_status_table, s->mb_num*sizeof(UINT8)) + if (s->out_format == FMT_H263 || s->encoding) { int size; /* Allocate MB type table */ @@ -262,53 +386,53 @@ int MPV_common_init(MpegEncContext *s) } if(s->codec_id==CODEC_ID_MPEG4){ - /* 4mv and interlaced direct mode decoding tables */ - CHECKED_ALLOCZ(s->co_located_type_table, s->mb_num * sizeof(UINT8)) + /* interlaced direct mode decoding tables */ CHECKED_ALLOCZ(s->field_mv_table, s->mb_num*2*2 * sizeof(INT16)) CHECKED_ALLOCZ(s->field_select_table, s->mb_num*2* sizeof(INT8)) } - - if (s->h263_pred || s->h263_plus) { - int y_size, c_size, i, size; - - /* dc values */ - - y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2); - c_size = (s->mb_width + 2) * (s->mb_height + 2); - size = y_size + 2 * c_size; - CHECKED_ALLOCZ(s->dc_val[0], size * sizeof(INT16)); - s->dc_val[1] = s->dc_val[0] + y_size; - s->dc_val[2] = s->dc_val[1] + c_size; - for(i=0;idc_val[0][i] = 1024; - + /* 4mv b frame decoding table */ + //note this is needed for h263 without b frames too (segfault on damaged streams otherwise) + CHECKED_ALLOCZ(s->co_located_type_table, s->mb_num * sizeof(UINT8)) + if (s->out_format == FMT_H263) { /* ac values */ - CHECKED_ALLOCZ(s->ac_val[0], size * sizeof(INT16) * 16); + CHECKED_ALLOCZ(s->ac_val[0], yc_size * sizeof(INT16) * 16); s->ac_val[1] = s->ac_val[0] + y_size; s->ac_val[2] = s->ac_val[1] + c_size; - + /* cbp values */ CHECKED_ALLOCZ(s->coded_block, y_size); - + /* divx501 bitstream reorder buffer */ CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE); - + /* cbp, ac_pred, pred_dir */ CHECKED_ALLOCZ(s->cbp_table , s->mb_num * sizeof(UINT8)) CHECKED_ALLOCZ(s->pred_dir_table, s->mb_num * sizeof(UINT8)) } - CHECKED_ALLOCZ(s->qscale_table , s->mb_num * sizeof(UINT8)) + + if (s->h263_pred || s->h263_plus || !s->encoding) { + /* dc values */ + //MN: we need these for error resilience of intra-frames + CHECKED_ALLOCZ(s->dc_val[0], yc_size * sizeof(INT16)); + s->dc_val[1] = s->dc_val[0] + y_size; + s->dc_val[2] = s->dc_val[1] + c_size; + for(i=0;idc_val[0][i] = 1024; + } + CHECKED_ALLOCZ(s->qscale_table , s->mb_num * sizeof(UINT8)) + /* which mb is a intra block */ CHECKED_ALLOCZ(s->mbintra_table, s->mb_num); memset(s->mbintra_table, 1, s->mb_num); - + /* default structure is frame */ s->picture_structure = PICT_FRAME; - + /* init macroblock skip table */ - CHECKED_ALLOCZ(s->mbskip_table, s->mb_num); - + CHECKED_ALLOCZ(s->mbskip_table, s->mb_num+1); + //Note the +1 is for a quicker mpeg4 slice_end detection + s->block= s->blocks[0]; s->context_initialized = 1; @@ -329,6 +453,7 @@ void MPV_common_end(MpegEncContext *s) av_freep(&s->mb_type); av_freep(&s->mb_var); av_freep(&s->mc_mb_var); + av_freep(&s->mb_mean); av_freep(&s->p_mv_table); av_freep(&s->b_forw_mv_table); av_freep(&s->b_back_mv_table); @@ -348,7 +473,7 @@ void MPV_common_end(MpegEncContext *s) av_freep(&s->me_scratchpad); av_freep(&s->me_map); av_freep(&s->me_score_map); - + av_freep(&s->mbskip_table); av_freep(&s->bitstream_buffer); av_freep(&s->tex_pb_buffer); @@ -359,7 +484,8 @@ void MPV_common_end(MpegEncContext *s) av_freep(&s->field_select_table); av_freep(&s->avctx->stats_out); av_freep(&s->ac_stats); - + av_freep(&s->error_status_table); + for(i=0;i<3;i++) { int j; if(!(s->flags&CODEC_FLAG_DR1)){ @@ -441,6 +567,15 @@ int MPV_encode_init(AVCodecContext *avctx) /* Fixed QSCALE */ s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE); + + s->adaptive_quant= ( s->avctx->lumi_masking + || s->avctx->dark_masking + || s->avctx->temporal_cplx_masking + || s->avctx->spatial_cplx_masking + || s->avctx->p_masking) + && !s->fixed_qscale; + + s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT); switch(avctx->codec->id) { case CODEC_ID_MPEG1VIDEO: @@ -454,10 +589,10 @@ int MPV_encode_init(AVCodecContext *avctx) s->mjpeg_data_only_frames = 0; /* write all the needed headers */ s->mjpeg_vsample[0] = 2; /* set up default sampling factors */ s->mjpeg_vsample[1] = 1; /* the only currently supported values */ - s->mjpeg_vsample[2] = 1; + s->mjpeg_vsample[2] = 1; s->mjpeg_hsample[0] = 2; - s->mjpeg_hsample[1] = 1; - s->mjpeg_hsample[2] = 1; + s->mjpeg_hsample[1] = 1; + s->mjpeg_hsample[2] = 1; if (mjpeg_init(s) < 0) return -1; avctx->delay=0; @@ -472,12 +607,10 @@ int MPV_encode_init(AVCodecContext *avctx) break; case CODEC_ID_H263P: s->out_format = FMT_H263; - s->rtp_mode = 1; - s->rtp_payload_size = 1200; s->h263_plus = 1; s->unrestricted_mv = 1; s->h263_aic = 1; - + /* These are just to be sure */ s->umvplus = 0; s->umvplus_dec = 0; @@ -493,7 +626,7 @@ int MPV_encode_init(AVCodecContext *avctx) s->h263_pred = 1; s->unrestricted_mv = 1; s->has_b_frames= s->max_b_frames ? 1 : 0; - s->low_delay=0; + s->low_delay= !s->has_b_frames; avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1); break; case CODEC_ID_MSMPEG4V1: @@ -539,7 +672,7 @@ int MPV_encode_init(AVCodecContext *avctx) default: return -1; } - + { /* set up some save defaults, some codecs might override them later */ static int done=0; if(!done){ @@ -557,14 +690,7 @@ int MPV_encode_init(AVCodecContext *avctx) s->fcode_tab= default_fcode_tab; s->y_dc_scale_table= s->c_dc_scale_table= ff_mpeg1_dc_scale_table; - - if (s->out_format == FMT_H263) - h263_encode_init(s); - else if (s->out_format == FMT_MPEG1) - ff_mpeg1_encode_init(s); - if(s->msmpeg4_version) - ff_msmpeg4_encode_init(s); - + /* dont use mv_penalty table for crap MV as it would be confused */ if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty; @@ -573,28 +699,36 @@ int MPV_encode_init(AVCodecContext *avctx) /* init */ if (MPV_common_init(s) < 0) return -1; + + if (s->out_format == FMT_H263) + h263_encode_init(s); + else if (s->out_format == FMT_MPEG1) + ff_mpeg1_encode_init(s); + if(s->msmpeg4_version) + ff_msmpeg4_encode_init(s); /* init default q matrix */ for(i=0;i<64;i++) { + int j= s->idct_permutation[i]; if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){ - s->intra_matrix[i] = ff_mpeg4_default_intra_matrix[i]; - s->inter_matrix[i] = ff_mpeg4_default_non_intra_matrix[i]; + s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i]; + s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i]; }else if(s->out_format == FMT_H263){ - s->intra_matrix[i] = - s->inter_matrix[i] = ff_mpeg1_default_non_intra_matrix[i]; + s->intra_matrix[j] = + s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i]; }else{ /* mpeg1 */ - s->intra_matrix[i] = ff_mpeg1_default_intra_matrix[i]; - s->inter_matrix[i] = ff_mpeg1_default_non_intra_matrix[i]; + s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i]; + s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i]; } } /* precompute matrix */ /* for mjpeg, we do include qscale in the matrix */ if (s->out_format != FMT_MJPEG) { - convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias, - s->intra_matrix, s->intra_quant_bias); - convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias, - s->inter_matrix, s->inter_quant_bias); + convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias, + s->intra_matrix, s->intra_quant_bias, 1, 31); + convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias, + s->inter_matrix, s->inter_quant_bias, 1, 31); } if(ff_rate_control_init(s) < 0) @@ -623,7 +757,7 @@ int MPV_encode_end(AVCodecContext *avctx) MPV_common_end(s); if (s->out_format == FMT_MJPEG) mjpeg_close(s); - + return 0; } @@ -657,29 +791,31 @@ static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w) } /* generic function for encode/decode called before a frame is coded/decoded */ -void MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx) +int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx) { int i; UINT8 *tmp; s->mb_skiped = 0; - s->decoding_error=0; avctx->mbskip_table= s->mbskip_table; if(avctx->flags&CODEC_FLAG_DR1){ - avctx->get_buffer_callback(avctx, s->width, s->height, s->pict_type); + if(avctx->get_buffer_callback(avctx, s->width, s->height, s->pict_type) < 0){ + fprintf(stderr, "get_buffer() failed\n"); + return -1; + } s->linesize = avctx->dr_stride; s->uvlinesize= avctx->dr_uvstride; s->ip_buffer_count= avctx->dr_ip_buffer_count; } avctx->dr_ip_buffer_count= s->ip_buffer_count; - + if (s->pict_type == B_TYPE) { for(i=0;i<3;i++) { if(avctx->flags&CODEC_FLAG_DR1) s->aux_picture[i]= avctx->dr_buffer[i]; - + //FIXME the following should never be needed, the decoder should drop b frames if no reference is available if(s->next_picture[i]==NULL) s->next_picture[i]= s->aux_picture[i]; @@ -719,8 +855,10 @@ void MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx) s->dct_unquantize = s->dct_unquantize_mpeg2; else s->dct_unquantize = s->dct_unquantize_h263; - }else + }else s->dct_unquantize = s->dct_unquantize_mpeg1; + + return 0; } /* generic function for encode/decode called after a frame has been coded/decoded */ @@ -736,7 +874,7 @@ void MPV_frame_end(MpegEncContext *s) draw_edges(s->current_picture[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2); } emms_c(); - + s->last_pict_type = s->pict_type; if(s->pict_type!=B_TYPE){ s->last_non_b_pict_type= s->pict_type; @@ -749,7 +887,7 @@ void MPV_frame_end(MpegEncContext *s) void reorder_input(MpegEncContext *s, AVPicture *pict) { int i, j, index; - + if(s->max_b_frames > FF_MAX_B_FRAMES) s->max_b_frames= FF_MAX_B_FRAMES; // delay= s->max_b_frames+1; (or 0 if no b frames cuz decoder diff) @@ -761,17 +899,17 @@ void reorder_input(MpegEncContext *s, AVPicture *pict) s->coded_order[j].pict_type=0; switch(s->input_pict_type){ - default: + default: case I_TYPE: case S_TYPE: case P_TYPE: index= s->max_b_frames - s->b_frames_since_non_b; s->b_frames_since_non_b=0; - break; + break; case B_TYPE: index= s->max_b_frames + 1; s->b_frames_since_non_b++; - break; + break; } //printf("index:%d type:%d strides: %d %d\n", index, s->input_pict_type, pict->linesize[0], s->linesize); if( (index==0 || (s->flags&CODEC_FLAG_INPUT_PRESERVED)) @@ -861,9 +999,9 @@ int MPV_encode_picture(AVCodecContext *avctx, if(s->input_pict_type==I_TYPE) s->input_picture_in_gop_number=0; - + reorder_input(s, pict); - + /* output? */ if(s->coded_order[0].picture[0]){ @@ -877,7 +1015,7 @@ int MPV_encode_picture(AVCodecContext *avctx, MPV_frame_start(s, avctx); encode_picture(s, s->picture_number); - + avctx->real_pict_num = s->picture_number; avctx->header_bits = s->header_bits; avctx->mv_bits = s->mv_bits; @@ -893,11 +1031,12 @@ int MPV_encode_picture(AVCodecContext *avctx, if (s->out_format == FMT_MJPEG) mjpeg_picture_trailer(s); - avctx->quality = s->qscale; - + if(!s->fixed_qscale) + avctx->quality = s->qscale; + if(s->flags&CODEC_FLAG_PASS1) ff_write_pass1_stats(s); - + } s->input_picture_number++; @@ -905,10 +1044,10 @@ int MPV_encode_picture(AVCodecContext *avctx, flush_put_bits(&s->pb); s->frame_bits = (pbBufPtr(&s->pb) - s->pb.buf) * 8; - + s->total_bits += s->frame_bits; avctx->frame_bits = s->frame_bits; -//printf("fcode: %d, type: %d, head: %d, mv: %d, misc: %d, frame: %d, itex: %d, ptex: %d\n", +//printf("fcode: %d, type: %d, head: %d, mv: %d, misc: %d, frame: %d, itex: %d, ptex: %d\n", //s->f_code, avctx->key_frame, s->header_bits, s->mv_bits, s->misc_bits, s->frame_bits, s->i_tex_bits, s->p_tex_bits); #if 0 //dump some stats to stats.txt for testing/debuging if(s->max_b_frames==0) @@ -934,15 +1073,13 @@ if(s->max_b_frames==0) static inline void gmc1_motion(MpegEncContext *s, UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr, int dest_offset, - UINT8 **ref_picture, int src_offset, - int h) + UINT8 **ref_picture, int src_offset) { UINT8 *ptr; int offset, src_x, src_y, linesize, uvlinesize; int motion_x, motion_y; int emu=0; - if(s->real_sprite_warping_points>1) printf("more than 1 warp point isnt supported\n"); motion_x= s->sprite_offset[0][0]; motion_y= s->sprite_offset[0][1]; src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1)); @@ -958,19 +1095,34 @@ static inline void gmc1_motion(MpegEncContext *s, linesize = s->linesize; uvlinesize = s->uvlinesize; + ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset; dest_y+=dest_offset; if(s->flags&CODEC_FLAG_EMU_EDGE){ if(src_x<0 || src_y<0 || src_x + (motion_x&15) + 16 > s->h_edge_pos - || src_y + (motion_y&15) + h > s->v_edge_pos){ - emulated_edge_mc(s, ptr, linesize, 17, h+1, src_x, src_y, s->h_edge_pos, s->v_edge_pos); + || src_y + (motion_y&15) + 16 > s->v_edge_pos){ + emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos); ptr= s->edge_emu_buffer; emu=1; } } - gmc1(dest_y , ptr , linesize, h, motion_x&15, motion_y&15, s->no_rounding); - gmc1(dest_y+8, ptr+8, linesize, h, motion_x&15, motion_y&15, s->no_rounding); + + if((motion_x|motion_y)&7){ + ff_gmc1(dest_y , ptr , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding); + ff_gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding); + }else{ + int dxy; + + dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2); + if (s->no_rounding){ + put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16); + }else{ + put_pixels_tab [0][dxy](dest_y, ptr, linesize, 16); + } + } + + if(s->flags&CODEC_FLAG_GRAY) return; motion_x= s->sprite_offset[1][0]; motion_y= s->sprite_offset[1][1]; @@ -988,27 +1140,91 @@ static inline void gmc1_motion(MpegEncContext *s, offset = (src_y * uvlinesize) + src_x + (src_offset>>1); ptr = ref_picture[1] + offset; if(emu){ - emulated_edge_mc(s, ptr, uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); + emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); ptr= s->edge_emu_buffer; } - gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, h>>1, motion_x&15, motion_y&15, s->no_rounding); - + ff_gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding); + ptr = ref_picture[2] + offset; if(emu){ - emulated_edge_mc(s, ptr, uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); + emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); ptr= s->edge_emu_buffer; } - gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, h>>1, motion_x&15, motion_y&15, s->no_rounding); - + ff_gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding); + return; } -static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, +static inline void gmc_motion(MpegEncContext *s, + UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr, + int dest_offset, + UINT8 **ref_picture, int src_offset) +{ + UINT8 *ptr; + int linesize, uvlinesize; + const int a= s->sprite_warping_accuracy; + int ox, oy; + + linesize = s->linesize; + uvlinesize = s->uvlinesize; + + ptr = ref_picture[0] + src_offset; + + dest_y+=dest_offset; + + ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16; + oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16; + + ff_gmc(dest_y, ptr, linesize, 16, + ox, + oy, + s->sprite_delta[0][0], s->sprite_delta[0][1], + s->sprite_delta[1][0], s->sprite_delta[1][1], + a+1, (1<<(2*a+1)) - s->no_rounding, + s->h_edge_pos, s->v_edge_pos); + ff_gmc(dest_y+8, ptr, linesize, 16, + ox + s->sprite_delta[0][0]*8, + oy + s->sprite_delta[1][0]*8, + s->sprite_delta[0][0], s->sprite_delta[0][1], + s->sprite_delta[1][0], s->sprite_delta[1][1], + a+1, (1<<(2*a+1)) - s->no_rounding, + s->h_edge_pos, s->v_edge_pos); + + if(s->flags&CODEC_FLAG_GRAY) return; + + + dest_cb+=dest_offset>>1; + dest_cr+=dest_offset>>1; + + ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8; + oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8; + + ptr = ref_picture[1] + (src_offset>>1); + ff_gmc(dest_cb, ptr, uvlinesize, 8, + ox, + oy, + s->sprite_delta[0][0], s->sprite_delta[0][1], + s->sprite_delta[1][0], s->sprite_delta[1][1], + a+1, (1<<(2*a+1)) - s->no_rounding, + s->h_edge_pos>>1, s->v_edge_pos>>1); + + ptr = ref_picture[2] + (src_offset>>1); + ff_gmc(dest_cr, ptr, uvlinesize, 8, + ox, + oy, + s->sprite_delta[0][0], s->sprite_delta[0][1], + s->sprite_delta[1][0], s->sprite_delta[1][1], + a+1, (1<<(2*a+1)) - s->no_rounding, + s->h_edge_pos>>1, s->v_edge_pos>>1); +} + + +static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, int src_x, int src_y, int w, int h){ int x, y; int start_y, start_x, end_y, end_x; UINT8 *buf= s->edge_emu_buffer; - + if(src_y>= h){ src+= (h-1-src_y)*linesize; src_y=h-1; @@ -1049,13 +1265,13 @@ static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int bl buf[x + y*linesize]= buf[x + (end_y-1)*linesize]; } } - + for(y=0; yquarter_sample) { motion_x>>=1; @@ -1085,7 +1301,7 @@ if(s->quarter_sample) dxy = ((motion_y & 1) << 1) | (motion_x & 1); src_x = s->mb_x * 16 + (motion_x >> 1); src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1); - + /* WARNING: do no forget half pels */ height = s->height >> field_based; v_edge_pos = s->v_edge_pos >> field_based; @@ -1103,8 +1319,9 @@ if(s->quarter_sample) if(s->flags&CODEC_FLAG_EMU_EDGE){ if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos || src_y + (motion_y&1) + h > v_edge_pos){ - emulated_edge_mc(s, ptr, linesize, 17, h+1, src_x, src_y, s->h_edge_pos, v_edge_pos); - ptr= s->edge_emu_buffer; + emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, + src_x, src_y<h_edge_pos, s->v_edge_pos); + ptr= s->edge_emu_buffer + src_offset; emu=1; } } @@ -1127,7 +1344,7 @@ if(s->quarter_sample) mx >>= 1; my >>= 1; } - + src_x = s->mb_x * 8 + mx; src_y = s->mb_y * (8 >> field_based) + my; src_x = clip(src_x, -8, s->width >> 1); @@ -1139,15 +1356,17 @@ if(s->quarter_sample) offset = (src_y * uvlinesize) + src_x + (src_offset >> 1); ptr = ref_picture[1] + offset; if(emu){ - emulated_edge_mc(s, ptr, uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, v_edge_pos>>1); - ptr= s->edge_emu_buffer; + emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, + src_x, src_y<h_edge_pos>>1, s->v_edge_pos>>1); + ptr= s->edge_emu_buffer + (src_offset >> 1); } pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1); ptr = ref_picture[2] + offset; if(emu){ - emulated_edge_mc(s, ptr, uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, v_edge_pos>>1); - ptr= s->edge_emu_buffer; + emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, + src_x, src_y<h_edge_pos>>1, s->v_edge_pos>>1); + ptr= s->edge_emu_buffer + (src_offset >> 1); } pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1); } @@ -1181,12 +1400,13 @@ static inline void qpel_motion(MpegEncContext *s, ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset; dest_y += dest_offset; //printf("%d %d %d\n", src_x, src_y, dxy); - + if(s->flags&CODEC_FLAG_EMU_EDGE){ if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos || src_y + (motion_y&3) + h > v_edge_pos){ - emulated_edge_mc(s, ptr, linesize, 17, h+1, src_x, src_y, s->h_edge_pos, v_edge_pos); - ptr= s->edge_emu_buffer; + emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, + src_x, src_y<h_edge_pos, s->v_edge_pos); + ptr= s->edge_emu_buffer + src_offset; emu=1; } } @@ -1204,7 +1424,7 @@ static inline void qpel_motion(MpegEncContext *s, if(field_based){ mx= motion_x/2; my= motion_y>>1; - }else if(s->divx_version){ + }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){ mx= (motion_x>>1)|(motion_x&1); my= (motion_y>>1)|(motion_y&1); }else{ @@ -1229,23 +1449,25 @@ static inline void qpel_motion(MpegEncContext *s, offset = (src_y * uvlinesize) + src_x + (src_offset >> 1); ptr = ref_picture[1] + offset; if(emu){ - emulated_edge_mc(s, ptr, uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, v_edge_pos>>1); - ptr= s->edge_emu_buffer; + emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, + src_x, src_y<h_edge_pos>>1, s->v_edge_pos>>1); + ptr= s->edge_emu_buffer + (src_offset >> 1); } pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1); - + ptr = ref_picture[2] + offset; if(emu){ - emulated_edge_mc(s, ptr, uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, v_edge_pos>>1); - ptr= s->edge_emu_buffer; + emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, + src_x, src_y<h_edge_pos>>1, s->v_edge_pos>>1); + ptr= s->edge_emu_buffer + (src_offset >> 1); } pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1); } -static inline void MPV_motion(MpegEncContext *s, +static inline void MPV_motion(MpegEncContext *s, UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr, - int dir, UINT8 **ref_picture, + int dir, UINT8 **ref_picture, op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16]) { int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y; @@ -1259,9 +1481,13 @@ static inline void MPV_motion(MpegEncContext *s, switch(s->mv_type) { case MV_TYPE_16X16: if(s->mcsel){ - gmc1_motion(s, dest_y, dest_cb, dest_cr, 0, - ref_picture, 0, - 16); + if(s->real_sprite_warping_points==1){ + gmc1_motion(s, dest_y, dest_cb, dest_cr, 0, + ref_picture, 0); + }else{ + gmc_motion(s, dest_y, dest_cb, dest_cr, 0, + ref_picture, 0); + } }else if(s->quarter_sample){ qpel_motion(s, dest_y, dest_cb, dest_cr, 0, ref_picture, 0, @@ -1272,7 +1498,7 @@ static inline void MPV_motion(MpegEncContext *s, ref_picture, 0, 0, pix_op, s->mv[dir][0][0], s->mv[dir][0][1], 16); - } + } break; case MV_TYPE_8X8: mx = 0; @@ -1285,7 +1511,7 @@ static inline void MPV_motion(MpegEncContext *s, dxy = ((motion_y & 3) << 2) | (motion_x & 3); src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8; src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8; - + /* WARNING: do no forget half pels */ src_x = clip(src_x, -16, s->width); if (src_x == s->width) @@ -1293,7 +1519,7 @@ static inline void MPV_motion(MpegEncContext *s, src_y = clip(src_y, -16, s->height); if (src_y == s->height) dxy &= ~12; - + ptr = ref_picture[0] + (src_y * s->linesize) + (src_x); if(s->flags&CODEC_FLAG_EMU_EDGE){ if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos @@ -1316,7 +1542,7 @@ static inline void MPV_motion(MpegEncContext *s, dxy = ((motion_y & 1) << 1) | (motion_x & 1); src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8; src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8; - + /* WARNING: do no forget half pels */ src_x = clip(src_x, -16, s->width); if (src_x == s->width) @@ -1324,7 +1550,7 @@ static inline void MPV_motion(MpegEncContext *s, src_y = clip(src_y, -16, s->height); if (src_y == s->height) dxy &= ~2; - + ptr = ref_picture[0] + (src_y * s->linesize) + (src_x); if(s->flags&CODEC_FLAG_EMU_EDGE){ if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos @@ -1370,7 +1596,7 @@ static inline void MPV_motion(MpegEncContext *s, src_y = clip(src_y, -8, s->height/2); if (src_y == s->height/2) dxy &= ~2; - + offset = (src_y * (s->uvlinesize)) + src_x; ptr = ref_picture[1] + offset; if(s->flags&CODEC_FLAG_EMU_EDGE){ @@ -1404,7 +1630,7 @@ static inline void MPV_motion(MpegEncContext *s, 1, pix_op, qpix_op, s->mv[dir][1][0], s->mv[dir][1][1], 8); }else{ - /* top field */ + /* top field */ mpeg_motion(s, dest_y, dest_cb, dest_cr, 0, ref_picture, s->field_select[dir][0] ? s->linesize : 0, 1, pix_op, @@ -1416,7 +1642,7 @@ static inline void MPV_motion(MpegEncContext *s, s->mv[dir][1][0], s->mv[dir][1][1], 8); } } else { - + } break; @@ -1425,30 +1651,29 @@ static inline void MPV_motion(MpegEncContext *s, /* put block[] to dest[] */ -static inline void put_dct(MpegEncContext *s, +static inline void put_dct(MpegEncContext *s, DCTELEM *block, int i, UINT8 *dest, int line_size) { - if (!s->mpeg2) - s->dct_unquantize(s, block, i, s->qscale); - ff_idct_put (dest, line_size, block); + s->dct_unquantize(s, block, i, s->qscale); + s->idct_put (dest, line_size, block); } /* add block[] to dest[] */ -static inline void add_dct(MpegEncContext *s, +static inline void add_dct(MpegEncContext *s, DCTELEM *block, int i, UINT8 *dest, int line_size) { if (s->block_last_index[i] >= 0) { - ff_idct_add (dest, line_size, block); + s->idct_add (dest, line_size, block); } } -static inline void add_dequant_dct(MpegEncContext *s, +static inline void add_dequant_dct(MpegEncContext *s, DCTELEM *block, int i, UINT8 *dest, int line_size) { if (s->block_last_index[i] >= 0) { s->dct_unquantize(s, block, i, s->qscale); - ff_idct_add (dest, line_size, block); + s->idct_add (dest, line_size, block); } } @@ -1459,9 +1684,9 @@ void ff_clean_intra_table_entries(MpegEncContext *s) { int wrap = s->block_wrap[0]; int xy = s->block_index[0]; - - s->dc_val[0][xy ] = - s->dc_val[0][xy + 1 ] = + + s->dc_val[0][xy ] = + s->dc_val[0][xy + 1 ] = s->dc_val[0][xy + wrap] = s->dc_val[0][xy + 1 + wrap] = 1024; /* ac pred */ @@ -1481,7 +1706,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s) /* ac pred */ memset(s->ac_val[1][xy], 0, 16 * sizeof(INT16)); memset(s->ac_val[2][xy], 0, 16 * sizeof(INT16)); - + s->mbintra_table[s->mb_x + s->mb_y*s->mb_width]= 0; } @@ -1529,7 +1754,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) /* update motion predictor, not for B-frames as they need the motion_val from the last P/S-Frame */ if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE) { //FIXME move into h263.c if possible, format specific stuff shouldnt be here - + const int wrap = s->block_wrap[0]; const int xy = s->block_index[0]; const int mb_index= s->mb_x + s->mb_y*s->mb_width; @@ -1570,14 +1795,14 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) s->motion_val[xy + 1 + wrap][1] = motion_y; } } - + if (!(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { UINT8 *dest_y, *dest_cb, *dest_cr; int dct_linesize, dct_offset; op_pixels_func (*op_pix)[4]; qpel_mc_func (*op_qpix)[16]; - /* avoid copy if macroblock skipped in last frame too + /* avoid copy if macroblock skipped in last frame too dont touch it for B-frames as they need the skip info from the next p-frame */ if (s->pict_type != B_TYPE) { UINT8 *mbskip_ptr = &s->mbskip_table[mb_xy]; @@ -1587,18 +1812,24 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) (*mbskip_ptr) ++; /* indicate that this time we skiped it */ if(*mbskip_ptr >99) *mbskip_ptr= 99; - /* if previous was skipped too, then nothing to do ! + /* if previous was skipped too, then nothing to do ! skip only during decoding as we might trash the buffers during encoding a bit */ - if (*mbskip_ptr >= s->ip_buffer_count && !s->encoding) + if (*mbskip_ptr >= s->ip_buffer_count && !s->encoding) goto the_end; } else { *mbskip_ptr = 0; /* not skipped */ } } - dest_y = s->current_picture [0] + (mb_y * 16* s->linesize ) + mb_x * 16; - dest_cb = s->current_picture[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; - dest_cr = s->current_picture[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; + if(s->pict_type==B_TYPE && s->avctx->draw_horiz_band){ + dest_y = s->current_picture [0] + mb_x * 16; + dest_cb = s->current_picture[1] + mb_x * 8; + dest_cr = s->current_picture[2] + mb_x * 8; + }else{ + dest_y = s->current_picture [0] + (mb_y * 16* s->linesize ) + mb_x * 16; + dest_cb = s->current_picture[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; + dest_cr = s->current_picture[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; + } if (s->interlaced_dct) { dct_linesize = s->linesize * 2; @@ -1612,7 +1843,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) /* motion handling */ /* decoding or more than one mb_type (MC was allready done otherwise) */ if((!s->encoding) || (s->mb_type[mb_xy]&(s->mb_type[mb_xy]-1))){ - if ((!s->no_rounding) || s->pict_type==B_TYPE){ + if ((!s->no_rounding) || s->pict_type==B_TYPE){ op_pix = put_pixels_tab; op_qpix= put_qpel_pixels_tab; }else{ @@ -1634,7 +1865,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) if(s->hurry_up>1) goto the_end; /* add dct residue */ - if(s->encoding || !(s->mpeg2 || s->h263_msmpeg4 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){ + if(s->encoding || !( s->mpeg2 || s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO + || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){ add_dequant_dct(s, block[0], 0, dest_y, dct_linesize); add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize); add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize); @@ -1657,14 +1889,26 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) } } else { /* dct only in intra block */ - put_dct(s, block[0], 0, dest_y, dct_linesize); - put_dct(s, block[1], 1, dest_y + 8, dct_linesize); - put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize); - put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize); - - if(!(s->flags&CODEC_FLAG_GRAY)){ - put_dct(s, block[4], 4, dest_cb, s->uvlinesize); - put_dct(s, block[5], 5, dest_cr, s->uvlinesize); + if(s->encoding || !(s->mpeg2 || s->codec_id==CODEC_ID_MPEG1VIDEO)){ + put_dct(s, block[0], 0, dest_y, dct_linesize); + put_dct(s, block[1], 1, dest_y + 8, dct_linesize); + put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize); + put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize); + + if(!(s->flags&CODEC_FLAG_GRAY)){ + put_dct(s, block[4], 4, dest_cb, s->uvlinesize); + put_dct(s, block[5], 5, dest_cr, s->uvlinesize); + } + }else{ + s->idct_put(dest_y , dct_linesize, block[0]); + s->idct_put(dest_y + 8, dct_linesize, block[1]); + s->idct_put(dest_y + dct_offset , dct_linesize, block[2]); + s->idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]); + + if(!(s->flags&CODEC_FLAG_GRAY)){ + s->idct_put(dest_cb, s->uvlinesize, block[4]); + s->idct_put(dest_cr, s->uvlinesize, block[5]); + } } } } @@ -1700,7 +1944,7 @@ static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int th if(last_index<=skip_dc - 1) return; for(i=0; i<=last_index; i++){ - const int j = zigzag_direct[i]; + const int j = s->intra_scantable.permutated[i]; const int level = ABS(block[j]); if(level==1){ if(skip_dc && i==0) continue; @@ -1714,7 +1958,7 @@ static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int th } if(score >= threshold) return; for(i=skip_dc; i<=last_index; i++){ - const int j = zigzag_direct[i]; + const int j = s->intra_scantable.permutated[i]; block[j]=0; } if(block[0]) s->block_last_index[n]= 0; @@ -1726,69 +1970,267 @@ static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index int i; const int maxlevel= s->max_qcoeff; const int minlevel= s->min_qcoeff; - - for(i=0;i<=last_index; i++){ - const int j = zigzag_direct[i]; + + if(s->mb_intra){ + i=1; //skip clipping of intra dc + }else + i=0; + + for(;i<=last_index; i++){ + const int j= s->intra_scantable.permutated[i]; int level = block[j]; - + if (level>maxlevel) level=maxlevel; else if(levelmb_intra){ + i=1; //skip clipping of intra dc + //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...) + }else + i=0; + + for(;i<=s->block_last_index[n]; i++){ + const int j = s->intra_scantable.permutated[i]; + int level = block[j]; + + block[j]= ROUNDED_DIV(level*oldq, newq); + } + + for(i=s->block_last_index[n]; i>=0; i--){ + const int j = s->intra_scantable.permutated[i]; + if(block[j]) break; + } + s->block_last_index[n]= i; +} + +static inline void auto_requantize_coeffs(MpegEncContext *s, DCTELEM block[6][64]) +{ + int i,n, newq; + const int maxlevel= s->max_qcoeff; + const int minlevel= s->min_qcoeff; + int largest=0, smallest=0; + + assert(s->adaptive_quant); + + for(n=0; n<6; n++){ + if(s->mb_intra){ + i=1; //skip clipping of intra dc + //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...) + }else + i=0; + + for(;i<=s->block_last_index[n]; i++){ + const int j = s->intra_scantable.permutated[i]; + int level = block[n][j]; + if(largest < level) largest = level; + if(smallest > level) smallest= level; + } + } + + for(newq=s->qscale+1; newq<32; newq++){ + if( ROUNDED_DIV(smallest*s->qscale, newq) >= minlevel + && ROUNDED_DIV(largest *s->qscale, newq) <= maxlevel) + break; + } + + if(s->out_format==FMT_H263){ + /* h263 like formats cannot change qscale by more than 2 easiely */ + if(s->avctx->qmin + 2 < newq) + newq= s->avctx->qmin + 2; + } + + for(n=0; n<6; n++){ + requantize_coeffs(s, block[n], s->qscale, newq, n); + clip_coeffs(s, block[n], s->block_last_index[n]); + } + + s->dquant+= newq - s->qscale; + s->qscale= newq; +} +#if 0 +static int pix_vcmp16x8(UINT8 *s, int stride){ //FIXME move to dsputil & optimize + int score=0; + int x,y; + + for(y=0; y<7; y++){ + for(x=0; x<16; x+=4){ + score+= ABS(s[x ] - s[x +stride]) + ABS(s[x+1] - s[x+1+stride]) + +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]); + } + s+= stride; + } + + return score; +} + +static int pix_diff_vcmp16x8(UINT8 *s1, UINT8*s2, int stride){ //FIXME move to dsputil & optimize + int score=0; + int x,y; + + for(y=0; y<7; y++){ + for(x=0; x<16; x++){ + score+= ABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); + } + s1+= stride; + s2+= stride; + } + + return score; +} +#else +#define SQ(a) ((a)*(a)) + +static int pix_vcmp16x8(UINT8 *s, int stride){ //FIXME move to dsputil & optimize + int score=0; + int x,y; + + for(y=0; y<7; y++){ + for(x=0; x<16; x+=4){ + score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) + +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); + } + s+= stride; + } + + return score; +} + +static int pix_diff_vcmp16x8(UINT8 *s1, UINT8*s2, int stride){ //FIXME move to dsputil & optimize + int score=0; + int x,y; + + for(y=0; y<7; y++){ + for(x=0; x<16; x++){ + score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); + } + s1+= stride; + s2+= stride; + } + + return score; +} + +#endif + +void ff_draw_horiz_band(MpegEncContext *s){ + if ( s->avctx->draw_horiz_band + && (s->num_available_buffers>=1 || (!s->has_b_frames)) ) { + UINT8 *src_ptr[3]; + int y, h, offset; + y = s->mb_y * 16; + h = s->height - y; + if (h > 16) + h = 16; + + if(s->pict_type==B_TYPE) + offset = 0; + else + offset = y * s->linesize; + + if(s->pict_type==B_TYPE || (!s->has_b_frames)){ + src_ptr[0] = s->current_picture[0] + offset; + src_ptr[1] = s->current_picture[1] + (offset >> 2); + src_ptr[2] = s->current_picture[2] + (offset >> 2); + } else { + src_ptr[0] = s->last_picture[0] + offset; + src_ptr[1] = s->last_picture[1] + (offset >> 2); + src_ptr[2] = s->last_picture[2] + (offset >> 2); + } + s->avctx->draw_horiz_band(s->avctx, src_ptr, s->linesize, + y, s->width, h); + } +} + static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) { const int mb_x= s->mb_x; const int mb_y= s->mb_y; int i; int skip_dct[6]; -#if 0 - if (s->interlaced_dct) { - dct_linesize = s->linesize * 2; - dct_offset = s->linesize; - } else { - dct_linesize = s->linesize; - dct_offset = s->linesize * 8; - } -#endif + int dct_offset = s->linesize*8; //default for progressive frames + for(i=0; i<6; i++) skip_dct[i]=0; + + if(s->adaptive_quant){ + s->dquant= s->qscale_table[mb_x + mb_y*s->mb_width] - s->qscale; + + if(s->out_format==FMT_H263){ + if (s->dquant> 2) s->dquant= 2; + else if(s->dquant<-2) s->dquant=-2; + } + + if(s->codec_id==CODEC_ID_MPEG4){ + if(!s->mb_intra){ + assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8); + + if(s->mv_dir&MV_DIRECT) + s->dquant=0; + } + } + s->qscale+= s->dquant; + s->y_dc_scale= s->y_dc_scale_table[ s->qscale ]; + s->c_dc_scale= s->c_dc_scale_table[ s->qscale ]; + } if (s->mb_intra) { UINT8 *ptr; - int wrap; + int wrap_y; int emu=0; - wrap = s->linesize; - ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16; + wrap_y = s->linesize; + ptr = s->new_picture[0] + (mb_y * 16 * wrap_y) + mb_x * 16; + if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){ - emulated_edge_mc(s, ptr, wrap, 16, 16, mb_x*16, mb_y*16, s->width, s->height); + emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height); ptr= s->edge_emu_buffer; emu=1; } - get_pixels(s->block[0], ptr , wrap); - get_pixels(s->block[1], ptr + 8, wrap); - get_pixels(s->block[2], ptr + 8 * wrap , wrap); - get_pixels(s->block[3], ptr + 8 * wrap + 8, wrap); + + if(s->flags&CODEC_FLAG_INTERLACED_DCT){ + int progressive_score, interlaced_score; + + progressive_score= pix_vcmp16x8(ptr, wrap_y ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y ); + interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y , wrap_y*2); + + if(progressive_score > interlaced_score + 100){ + s->interlaced_dct=1; + + dct_offset= wrap_y; + wrap_y<<=1; + }else + s->interlaced_dct=0; + } + + get_pixels(s->block[0], ptr , wrap_y); + get_pixels(s->block[1], ptr + 8, wrap_y); + get_pixels(s->block[2], ptr + dct_offset , wrap_y); + get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y); if(s->flags&CODEC_FLAG_GRAY){ skip_dct[4]= 1; skip_dct[5]= 1; }else{ - wrap >>=1; - ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8; + int wrap_c = s->uvlinesize; + ptr = s->new_picture[1] + (mb_y * 8 * wrap_c) + mb_x * 8; if(emu){ - emulated_edge_mc(s, ptr, wrap, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); + emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); ptr= s->edge_emu_buffer; } - get_pixels(s->block[4], ptr, wrap); + get_pixels(s->block[4], ptr, wrap_c); - ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8; + ptr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8; if(emu){ - emulated_edge_mc(s, ptr, wrap, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); + emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); ptr= s->edge_emu_buffer; } - get_pixels(s->block[5], ptr, wrap); + get_pixels(s->block[5], ptr, wrap_c); } }else{ op_pixels_func (*op_pix)[4]; @@ -1802,7 +2244,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) dest_cb = s->current_picture[1] + (mb_y * 8 * (s->uvlinesize)) + mb_x * 8; dest_cr = s->current_picture[2] + (mb_y * 8 * (s->uvlinesize)) + mb_x * 8; wrap_y = s->linesize; - wrap_c = wrap_y>>1; + wrap_c = s->uvlinesize; ptr_y = s->new_picture[0] + (mb_y * 16 * wrap_y) + mb_x * 16; ptr_cb = s->new_picture[1] + (mb_y * 8 * wrap_c) + mb_x * 8; ptr_cr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8; @@ -1829,11 +2271,29 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) ptr_y= s->edge_emu_buffer; emu=1; } + + if(s->flags&CODEC_FLAG_INTERLACED_DCT){ + int progressive_score, interlaced_score; + + progressive_score= pix_diff_vcmp16x8(ptr_y , dest_y , wrap_y ) + + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y ); + interlaced_score = pix_diff_vcmp16x8(ptr_y , dest_y , wrap_y*2) + + pix_diff_vcmp16x8(ptr_y + wrap_y , dest_y + wrap_y , wrap_y*2); + + if(progressive_score > interlaced_score + 600){ + s->interlaced_dct=1; + + dct_offset= wrap_y; + wrap_y<<=1; + }else + s->interlaced_dct=0; + } + diff_pixels(s->block[0], ptr_y , dest_y , wrap_y); diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); - diff_pixels(s->block[2], ptr_y + 8 * wrap_y , dest_y + 8 * wrap_y , wrap_y); - diff_pixels(s->block[3], ptr_y + 8 * wrap_y + 8, dest_y + 8 * wrap_y + 8, wrap_y); - + diff_pixels(s->block[2], ptr_y + dct_offset , dest_y + dct_offset , wrap_y); + diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y); + if(s->flags&CODEC_FLAG_GRAY){ skip_dct[4]= 1; skip_dct[5]= 1; @@ -1850,12 +2310,13 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); } - /* pre quantization */ + /* pre quantization */ if(s->mc_mb_var[s->mb_width*mb_y+ mb_x]<2*s->qscale*s->qscale){ + //FIXME optimize if(pix_abs8x8(ptr_y , dest_y , wrap_y) < 20*s->qscale) skip_dct[0]= 1; if(pix_abs8x8(ptr_y + 8, dest_y + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1; - if(pix_abs8x8(ptr_y + 8*wrap_y , dest_y + 8*wrap_y , wrap_y) < 20*s->qscale) skip_dct[2]= 1; - if(pix_abs8x8(ptr_y + 8*wrap_y + 8, dest_y + 8*wrap_y + 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1; + if(pix_abs8x8(ptr_y +dct_offset , dest_y +dct_offset , wrap_y) < 20*s->qscale) skip_dct[2]= 1; + if(pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1; if(pix_abs8x8(ptr_cb , dest_cb , wrap_y) < 20*s->qscale) skip_dct[4]= 1; if(pix_abs8x8(ptr_cr , dest_cr , wrap_y) < 20*s->qscale) skip_dct[5]= 1; #if 0 @@ -1865,7 +2326,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) for(i=0; i<6; i++) if(skip_dct[i]) num++; stat[num]++; - + if(s->mb_x==0 && s->mb_y==0){ for(i=0; i<7; i++){ printf("%6d %1d\n", stat[i], i); @@ -1876,16 +2337,16 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) } } - + #if 0 { float adap_parm; - + adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_width*mb_y+mb_x] + 1.0) / ((s->mb_var[s->mb_width*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0); - - printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d", - (s->mb_type[s->mb_width*mb_y+mb_x] > 0) ? 'I' : 'P', + + printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d", + (s->mb_type[s->mb_width*mb_y+mb_x] > 0) ? 'I' : 'P', s->qscale, adap_parm, s->qscale*adap_parm, s->mb_var[s->mb_width*mb_y+mb_x], s->avg_mb_var); } @@ -1964,7 +2425,7 @@ static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext d->mb_incr= s->mb_incr; for(i=0; i<3; i++) d->last_dc[i]= s->last_dc[i]; - + /* statistics */ d->mv_bits= s->mv_bits; d->i_tex_bits= s->i_tex_bits; @@ -1982,14 +2443,14 @@ static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){ int i; - memcpy(d->mv, s->mv, 2*4*2*sizeof(int)); + memcpy(d->mv, s->mv, 2*4*2*sizeof(int)); memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop? - + /* mpeg1 */ d->mb_incr= s->mb_incr; for(i=0; i<3; i++) d->last_dc[i]= s->last_dc[i]; - + /* statistics */ d->mv_bits= s->mv_bits; d->i_tex_bits= s->i_tex_bits; @@ -2012,14 +2473,15 @@ static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext * d->block= s->block; for(i=0; i<6; i++) d->block_last_index[i]= s->block_last_index[i]; + d->interlaced_dct= s->interlaced_dct; } -static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type, +static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type, PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2], int *dmin, int *next_block, int motion_x, int motion_y) { int bits_count; - + copy_context_before_encode(s, backup, type); s->block= s->blocks[*next_block]; @@ -2047,7 +2509,7 @@ static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegE static void encode_picture(MpegEncContext *s, int picture_number) { - int mb_x, mb_y, last_gob, pdif = 0; + int mb_x, mb_y, pdif = 0; int i; int bits; MpegEncContext best_s, backup_s; @@ -2070,16 +2532,18 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->block_wrap[3]= s->mb_width*2 + 2; s->block_wrap[4]= s->block_wrap[5]= s->mb_width + 2; - + /* Reset the average MB variance */ s->mb_var_sum = 0; s->mc_mb_var_sum = 0; /* we need to initialize some time vars before we can encode b-frames */ if (s->h263_pred && !s->h263_msmpeg4) - ff_set_mpeg4_time(s, s->picture_number); + ff_set_mpeg4_time(s, s->picture_number); s->scene_change_score=0; + + s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration /* Estimate motion for every MB */ if(s->pict_type != I_TYPE){ @@ -2111,7 +2575,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) memset(s->motion_val[0], 0, sizeof(INT16)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2); memset(s->p_mv_table , 0, sizeof(INT16)*(s->mb_width+2)*(s->mb_height+2)*2); memset(s->mb_type , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height); - + if(!s->fixed_qscale){ /* finding spatial complexity for I-frame rate control */ for(mb_y=0; mb_y < s->mb_height; mb_y++) { @@ -2121,11 +2585,12 @@ static void encode_picture(MpegEncContext *s, int picture_number) uint8_t *pix = s->new_picture[0] + (yy * s->linesize) + xx; int varc; int sum = pix_sum(pix, s->linesize); - + sum= (sum+8)>>4; varc = (pix_norm1(pix, s->linesize) - sum*sum + 500 + 128)>>8; - s->mb_var[s->mb_width * mb_y + mb_x] = varc; + s->mb_var [s->mb_width * mb_y + mb_x] = varc; + s->mb_mean[s->mb_width * mb_y + mb_x] = (sum+7)>>4; s->mb_var_sum += varc; } } @@ -2140,8 +2605,8 @@ static void encode_picture(MpegEncContext *s, int picture_number) } //printf("Scene change detected, encoding as I Frame %d %d\n", s->mb_var_sum, s->mc_mb_var_sum); } - - if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) + + if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER); ff_fix_long_p_mvs(s); if(s->pict_type==B_TYPE){ @@ -2153,20 +2618,37 @@ static void encode_picture(MpegEncContext *s, int picture_number) ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR); ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR); } + + if (s->fixed_qscale) + s->frame_qscale = s->avctx->quality; + else + s->frame_qscale = ff_rate_estimate_qscale(s); -//printf("f_code %d ///\n", s->f_code); - -// printf("%d %d\n", s->avg_mb_var, s->mc_mb_var); - if (!s->fixed_qscale) - s->qscale = ff_rate_estimate_qscale(s); + if(s->adaptive_quant){ + switch(s->codec_id){ + case CODEC_ID_MPEG4: + ff_clean_mpeg4_qscales(s); + break; + case CODEC_ID_H263: + case CODEC_ID_H263P: + ff_clean_h263_qscales(s); + break; + } + s->qscale= s->qscale_table[0]; + }else + s->qscale= (int)(s->frame_qscale + 0.5); + if (s->out_format == FMT_MJPEG) { /* for mjpeg, we do include qscale in the matrix */ s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0]; - for(i=1;i<64;i++) - s->intra_matrix[i] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); - convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, - s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias); + for(i=1;i<64;i++){ + int j= s->idct_permutation[i]; + + s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); + } + convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, + s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8); } s->last_bits= get_bit_count(&s->pb); @@ -2175,11 +2657,11 @@ static void encode_picture(MpegEncContext *s, int picture_number) mjpeg_picture_header(s); break; case FMT_H263: - if (s->h263_msmpeg4) + if (s->h263_msmpeg4) msmpeg4_encode_picture_header(s, picture_number); else if (s->h263_pred) mpeg4_encode_picture_header(s, picture_number); - else if (s->h263_rv10) + else if (s->h263_rv10) rv10_encode_picture_header(s, picture_number); else h263_encode_picture_header(s, picture_number); @@ -2209,46 +2691,21 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->last_mv[0][0][0] = 0; s->last_mv[0][0][1] = 0; - /* Get the GOB height based on picture height */ - if (s->out_format == FMT_H263 && !s->h263_pred && !s->h263_msmpeg4) { - if (s->height <= 400) - s->gob_index = 1; - else if (s->height <= 800) - s->gob_index = 2; - else - s->gob_index = 4; - }else if(s->codec_id==CODEC_ID_MPEG4){ - s->gob_index = 1; - } + if (s->codec_id==CODEC_ID_H263 || s->codec_id==CODEC_ID_H263P) + s->gob_index = ff_h263_get_gob_height(s); - if(s->codec_id==CODEC_ID_MPEG4 && s->data_partitioning && s->pict_type!=B_TYPE) + if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame) ff_mpeg4_init_partitions(s); s->resync_mb_x=0; s->resync_mb_y=0; + s->first_slice_line = 1; + s->ptr_lastgob = s->pb.buf; + s->ptr_last_mb_line = s->pb.buf; for(mb_y=0; mb_y < s->mb_height; mb_y++) { - /* Put GOB header based on RTP MTU for formats which support it per line (H263*)*/ - /* TODO: Put all this stuff in a separate generic function */ - if (s->rtp_mode) { - if (!mb_y) { - s->ptr_lastgob = s->pb.buf; - s->ptr_last_mb_line = s->pb.buf; - } else if (s->out_format == FMT_H263 && !s->h263_pred && !s->h263_msmpeg4 && !(mb_y % s->gob_index)) { - // MN: we could move the space check from h263 -> here, as its not h263 specific - last_gob = h263_encode_gob_header(s, mb_y); - if (last_gob) { - s->first_slice_line = 1; - }else{ - /*MN: we reset it here instead at the end of each line cuz mpeg4 can have - slice lines starting & ending in the middle*/ - s->first_slice_line = 0; - } - } - } - s->y_dc_scale= s->y_dc_scale_table[ s->qscale ]; s->c_dc_scale= s->c_dc_scale_table[ s->qscale ]; - + s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1; s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1); s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1; @@ -2270,14 +2727,18 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->block_index[4]++; s->block_index[5]++; - /* write gob / video packet header for formats which support it at any MB (MPEG4) */ - if(s->rtp_mode && s->mb_y>0 && s->codec_id==CODEC_ID_MPEG4){ - int pdif= pbBufPtr(&s->pb) - s->ptr_lastgob; - - //the *2 is there so we stay below the requested size - if(pdif + s->mb_line_avgsize/s->mb_width >= s->rtp_payload_size){ - if(s->codec_id==CODEC_ID_MPEG4){ - if(s->data_partitioning && s->pict_type!=B_TYPE){ + /* write gob / video packet header */ + if(s->rtp_mode){ + int current_packet_size, is_gob_start; + + current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob; + is_gob_start=0; + + if(s->codec_id==CODEC_ID_MPEG4){ + if(current_packet_size + s->mb_line_avgsize/s->mb_width >= s->rtp_payload_size + && s->mb_y + s->mb_x>0){ + + if(s->partitioned_frame){ ff_mpeg4_merge_partitions(s); ff_mpeg4_init_partitions(s); } @@ -2289,17 +2750,28 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->last_bits= bits; } ff_mpeg4_clean_buffers(s); + is_gob_start=1; } + }else{ + if(current_packet_size + s->mb_line_avgsize*s->gob_index >= s->rtp_payload_size + && s->mb_x==0 && s->mb_y>0 && s->mb_y%s->gob_index==0){ + + h263_encode_gob_header(s, mb_y); + is_gob_start=1; + } + } + + if(is_gob_start){ s->ptr_lastgob = pbBufPtr(&s->pb); s->first_slice_line=1; s->resync_mb_x=mb_x; s->resync_mb_y=mb_y; } + } - if( (s->resync_mb_x == s->mb_x) - && s->resync_mb_y+1 == s->mb_y){ - s->first_slice_line=0; - } + if( (s->resync_mb_x == s->mb_x) + && s->resync_mb_y+1 == s->mb_y){ + s->first_slice_line=0; } if(mb_type & (mb_type-1)){ // more than 1 MB type possible @@ -2309,6 +2781,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) copy_context_before_encode(&backup_s, s, -1); backup_s.pb= s->pb; best_s.data_partitioning= s->data_partitioning; + best_s.partitioned_frame= s->partitioned_frame; if(s->data_partitioning){ backup_s.pb2= s->pb2; backup_s.tex_pb= s->tex_pb; @@ -2320,10 +2793,10 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mb_intra= 0; s->mv[0][0][0] = s->p_mv_table[xy][0]; s->mv[0][0][1] = s->p_mv_table[xy][1]; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb, &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]); } - if(mb_type&MB_TYPE_INTER4V){ + if(mb_type&MB_TYPE_INTER4V){ s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_8X8; s->mb_intra= 0; @@ -2331,7 +2804,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mv[0][i][0] = s->motion_val[s->block_index[i]][0]; s->mv[0][i][1] = s->motion_val[s->block_index[i]][1]; } - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb, &dmin, &next_block, 0, 0); } if(mb_type&MB_TYPE_FORWARD){ @@ -2340,7 +2813,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mb_intra= 0; s->mv[0][0][0] = s->b_forw_mv_table[xy][0]; s->mv[0][0][1] = s->b_forw_mv_table[xy][1]; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb, &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]); } if(mb_type&MB_TYPE_BACKWARD){ @@ -2349,7 +2822,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mb_intra= 0; s->mv[1][0][0] = s->b_back_mv_table[xy][0]; s->mv[1][0][1] = s->b_back_mv_table[xy][1]; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb, &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]); } if(mb_type&MB_TYPE_BIDIR){ @@ -2360,7 +2833,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1]; s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0]; s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1]; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb, &dmin, &next_block, 0, 0); } if(mb_type&MB_TYPE_DIRECT){ @@ -2371,7 +2844,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1]; s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0]; s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1]; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb, &dmin, &next_block, s->b_direct_mv_table[xy][0], s->b_direct_mv_table[xy][1]); } if(mb_type&MB_TYPE_INTRA){ @@ -2380,25 +2853,25 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mb_intra= 1; s->mv[0][0][0] = 0; s->mv[0][0][1] = 0; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb, &dmin, &next_block, 0, 0); /* force cleaning of ac/dc pred stuff if needed ... */ if(s->h263_pred || s->h263_aic) s->mbintra_table[mb_x + mb_y*s->mb_width]=1; } copy_context_after_encode(s, &best_s, -1); - + pb_bits_count= get_bit_count(&s->pb); flush_put_bits(&s->pb); ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count); s->pb= backup_s.pb; - + if(s->data_partitioning){ pb2_bits_count= get_bit_count(&s->pb2); flush_put_bits(&s->pb2); ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count); s->pb2= backup_s.pb2; - + tex_pb_bits_count= get_bit_count(&s->tex_pb); flush_put_bits(&s->tex_pb); ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count); @@ -2482,34 +2955,31 @@ static void encode_picture(MpegEncContext *s, int picture_number) } - /* Obtain average GOB size for RTP */ + /* Obtain average mb_row size for RTP */ if (s->rtp_mode) { - if (!mb_y) + if (mb_y==0) s->mb_line_avgsize = pbBufPtr(&s->pb) - s->ptr_last_mb_line; - else if (!(mb_y % s->gob_index)) { + else { s->mb_line_avgsize = (s->mb_line_avgsize + pbBufPtr(&s->pb) - s->ptr_last_mb_line) >> 1; - s->ptr_last_mb_line = pbBufPtr(&s->pb); } - //fprintf(stderr, "\nMB line: %d\tSize: %u\tAvg. Size: %u", s->mb_y, - // (s->pb.buf_ptr - s->ptr_last_mb_line), s->mb_line_avgsize); - if(s->codec_id!=CODEC_ID_MPEG4) s->first_slice_line = 0; //FIXME clean + s->ptr_last_mb_line = pbBufPtr(&s->pb); } } emms_c(); - if(s->codec_id==CODEC_ID_MPEG4 && s->data_partitioning && s->pict_type!=B_TYPE) + if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame) ff_mpeg4_merge_partitions(s); if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE) msmpeg4_encode_ext_header(s); - if(s->codec_id==CODEC_ID_MPEG4) + if(s->codec_id==CODEC_ID_MPEG4) ff_mpeg4_stuffing(&s->pb); //if (s->gob_number) // fprintf(stderr,"\nNumber of GOB: %d", s->gob_number); - - /* Send the last GOB if RTP */ + + /* Send the last GOB if RTP */ if (s->rtp_mode) { flush_put_bits(&s->pb); pdif = pbBufPtr(&s->pb) - s->ptr_lastgob; @@ -2521,22 +2991,19 @@ static void encode_picture(MpegEncContext *s, int picture_number) } } -static int dct_quantize_c(MpegEncContext *s, +static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow) { int i, j, level, last_non_zero, q; const int *qmat; + const UINT8 *scantable= s->intra_scantable.scantable; int bias; int max=0; unsigned int threshold1, threshold2; - + s->fdct (block); - /* we need this permutation so that we correct the IDCT - permutation. will be moved into DCT code */ - block_permute(block); - if (s->mb_intra) { if (!s->h263_aic) { if (n < 4) @@ -2547,7 +3014,7 @@ static int dct_quantize_c(MpegEncContext *s, } else /* For AIC we skip quant/dequant of INTRADC */ q = 1 << 3; - + /* note: block[0] is assumed to be positive */ block[0] = (block[0] + (q >> 1)) / q; i = 1; @@ -2564,7 +3031,7 @@ static int dct_quantize_c(MpegEncContext *s, threshold2= (threshold1<<1); for(;i<64;i++) { - j = zigzag_direct[i]; + j = scantable[i]; level = block[j]; level = level * qmat[j]; @@ -2585,28 +3052,30 @@ static int dct_quantize_c(MpegEncContext *s, } } *overflow= s->max_qcoeff < max; //overflow might have happend + + /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ + ff_block_permute(block, s->idct_permutation, scantable, last_non_zero); return last_non_zero; } -static void dct_unquantize_mpeg1_c(MpegEncContext *s, +static void dct_unquantize_mpeg1_c(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int i, level, nCoeffs; const UINT16 *quant_matrix; - if(s->alternate_scan) nCoeffs= 64; - else nCoeffs= s->block_last_index[n]+1; - + nCoeffs= s->block_last_index[n]; + if (s->mb_intra) { - if (n < 4) + if (n < 4) block[0] = block[0] * s->y_dc_scale; else block[0] = block[0] * s->c_dc_scale; /* XXX: only mpeg1 */ quant_matrix = s->intra_matrix; - for(i=1;iintra_scantable.permutated[i]; level = block[j]; if (level) { if (level < 0) { @@ -2628,8 +3097,8 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s, } else { i = 0; quant_matrix = s->inter_matrix; - for(;iintra_scantable.permutated[i]; level = block[j]; if (level) { if (level < 0) { @@ -2653,23 +3122,23 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s, } } -static void dct_unquantize_mpeg2_c(MpegEncContext *s, +static void dct_unquantize_mpeg2_c(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int i, level, nCoeffs; const UINT16 *quant_matrix; - if(s->alternate_scan) nCoeffs= 64; - else nCoeffs= s->block_last_index[n]+1; - + if(s->alternate_scan) nCoeffs= 63; + else nCoeffs= s->block_last_index[n]; + if (s->mb_intra) { - if (n < 4) + if (n < 4) block[0] = block[0] * s->y_dc_scale; else block[0] = block[0] * s->c_dc_scale; quant_matrix = s->intra_matrix; - for(i=1;iintra_scantable.permutated[i]; level = block[j]; if (level) { if (level < 0) { @@ -2690,8 +3159,8 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s, int sum=-1; i = 0; quant_matrix = s->inter_matrix; - for(;iintra_scantable.permutated[i]; level = block[j]; if (level) { if (level < 0) { @@ -2716,33 +3185,33 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s, } -static void dct_unquantize_h263_c(MpegEncContext *s, +static void dct_unquantize_h263_c(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int i, level, qmul, qadd; int nCoeffs; - + + assert(s->block_last_index[n]>=0); + + qadd = (qscale - 1) | 1; + qmul = qscale << 1; + if (s->mb_intra) { if (!s->h263_aic) { - if (n < 4) + if (n < 4) block[0] = block[0] * s->y_dc_scale; else block[0] = block[0] * s->c_dc_scale; - } + }else + qadd = 0; i = 1; - nCoeffs= 64; //does not allways use zigzag table + nCoeffs= 63; //does not allways use zigzag table } else { i = 0; - nCoeffs= zigzag_end[ s->block_last_index[n] ]; + nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; } - qmul = s->qscale << 1; - if (s->h263_aic && s->mb_intra) - qadd = 0; - else - qadd = (s->qscale - 1) | 1; - - for(;idc_val[0][mb_x*2+1 + (i&1) + (mb_y*2+1 + (i>>1))*(s->mb_width*2+2)]; - for(y=0; y<8; y++){ - int x; - for(x=0; x<8; x++){ - dest_y[x + (i&1)*8 + (y + (i>>1)*8)*s->linesize]= dc/8; - } - } - } - dcb = s->dc_val[1][mb_x+1 + (mb_y+1)*(s->mb_width+2)]; - dcr= s->dc_val[2][mb_x+1 + (mb_y+1)*(s->mb_width+2)]; - for(y=0; y<8; y++){ - int x; - for(x=0; x<8; x++){ - dest_cb[x + y*(s->uvlinesize)]= dcb/8; - dest_cr[x + y*(s->uvlinesize)]= dcr/8; - } - } -} - -/** - * will conceal past errors, and allso drop b frames if needed - * - */ -void ff_conceal_past_errors(MpegEncContext *s, int unknown_pos) -{ - int mb_x= s->mb_x; - int mb_y= s->mb_y; - int mb_dist=0; - int i, intra_count=0, inter_count=0; - int intra_conceal= s->msmpeg4_version ? 50 : 50; //FIXME finetune - int inter_conceal= s->msmpeg4_version ? 50 : 50; - - // for last block - if(mb_x>=s->mb_width) mb_x= s->mb_width -1; - if(mb_y>=s->mb_height) mb_y= s->mb_height-1; - - if(s->decoding_error==0 && unknown_pos){ - if(s->data_partitioning && s->pict_type!=B_TYPE) - s->decoding_error= DECODING_AC_LOST; - else - s->decoding_error= DECODING_DESYNC; - } - - if(s->decoding_error==DECODING_DESYNC && s->pict_type!=B_TYPE) s->next_p_frame_damaged=1; - - for(i=mb_x + mb_y*s->mb_width; i>=0; i--){ - if(s->mbintra_table[i]) intra_count++; - else inter_count++; - } - - if(s->decoding_error==DECODING_AC_LOST){ - intra_conceal*=2; - inter_conceal*=2; - }else if(s->decoding_error==DECODING_ACDC_LOST){ - intra_conceal*=2; - inter_conceal*=2; - } - - if(unknown_pos && (intra_countmb_num; -// printf("%d %d\n",intra_count, inter_count); - } - - fprintf(stderr, "concealing errors\n"); - - /* for all MBs from the current one back until the last resync marker */ - for(; mb_y>=0 && mb_y>=s->resync_mb_y; mb_y--){ - for(; mb_x>=0; mb_x--){ - uint8_t *dest_y = s->current_picture[0] + (mb_y * 16* s->linesize ) + mb_x * 16; - uint8_t *dest_cb = s->current_picture[1] + (mb_y * 8 * (s->uvlinesize)) + mb_x * 8; - uint8_t *dest_cr = s->current_picture[2] + (mb_y * 8 * (s->uvlinesize)) + mb_x * 8; - int mb_x_backup= s->mb_x; //FIXME pass xy to mpeg_motion - int mb_y_backup= s->mb_y; - s->mb_x=mb_x; - s->mb_y=mb_y; - if(s->mbintra_table[mb_y*s->mb_width + mb_x] && mb_distdecoding_error==DECODING_AC_LOST){ - remove_ac(s, dest_y, dest_cb, dest_cr, mb_x, mb_y); -// printf("remove ac to %d %d\n", mb_x, mb_y); - }else{ - mpeg_motion(s, dest_y, dest_cb, dest_cr, 0, - s->last_picture, 0, 0, put_pixels_tab, - 0/*mx*/, 0/*my*/, 16); - } - } - else if(!s->mbintra_table[mb_y*s->mb_width + mb_x] && mb_distdecoding_error!=DECODING_DESYNC){ - int xy= mb_x*2+1 + (mb_y*2+1)*(s->mb_width*2+2); - mx= s->motion_val[ xy ][0]; - my= s->motion_val[ xy ][1]; - } - - mpeg_motion(s, dest_y, dest_cb, dest_cr, 0, - s->last_picture, 0, 0, put_pixels_tab, - mx, my, 16); - } - s->mb_x= mb_x_backup; - s->mb_y= mb_y_backup; - - if(mb_x== s->resync_mb_x && mb_y== s->resync_mb_y) return; - if(!s->mbskip_table[mb_x + mb_y*s->mb_width]) mb_dist++; - } - mb_x=s->mb_width-1; - } -} - AVCodec mpeg1video_encoder = { "mpeg1video", CODEC_TYPE_VIDEO, diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h index 962672005..d50d1ad0e 100644 --- a/src/libffmpeg/libavcodec/mpegvideo.h +++ b/src/libffmpeg/libavcodec/mpegvideo.h @@ -20,12 +20,12 @@ #ifndef AVCODEC_MPEGVIDEO_H #define AVCODEC_MPEGVIDEO_H -#define FRAME_SKIPED 100 /* return value for header parsers if frame is not coded */ +#define FRAME_SKIPED 100 // return value for header parsers if frame is not coded enum OutputFormat { FMT_MPEG1, FMT_H263, - FMT_MJPEG, + FMT_MJPEG, }; #define MPEG_BUF_SIZE (16 * 1024) @@ -53,7 +53,7 @@ typedef struct Predictor{ typedef struct RateControlEntry{ int pict_type; - int qscale; + float qscale; int mv_bits; int i_tex_bits; int p_tex_bits; @@ -76,12 +76,10 @@ typedef struct RateControlContext{ Predictor pred[5]; double short_term_qsum; /* sum of recent qscales */ double short_term_qcount; /* count of recent qscales */ - double pass1_bits; /* bits outputted by the pass1 code (including complexity init) */ + double pass1_rc_eq_output_sum;/* sum of the output of the rc equation, this is used for normalization */ double pass1_wanted_bits; /* bits which should have been outputed by the pass1 code (including complexity init) */ double last_qscale; - double last_qscale_for[5]; /* last qscale for a specific pict type */ - double next_non_b_qscale; - double next_p_qscale; + double last_qscale_for[5]; /* last qscale for a specific pict type, used for max_diff & ipb factor stuff */ int last_mc_mb_var_sum; int last_mb_var_sum; UINT64 i_cplx_sum[5]; @@ -89,6 +87,7 @@ typedef struct RateControlContext{ UINT64 mv_bits_sum[5]; UINT64 qscale_sum[5]; int frame_count[5]; + int last_non_b_pict_type; }RateControlContext; typedef struct ReorderBuffer{ @@ -100,6 +99,12 @@ typedef struct ReorderBuffer{ int picture_in_gop_number; } ReorderBuffer; +typedef struct ScanTable{ + const UINT8 *scantable; + UINT8 permutated[64]; + UINT8 raster_end[64]; +} ScanTable; + typedef struct MpegEncContext { struct AVCodecContext *avctx; /* the following parameters must be initialized before encoding */ @@ -117,7 +122,7 @@ typedef struct MpegEncContext { int h263_rv10; /* use RV10 variation for H263 */ int h263_msmpeg4; /* generate MSMPEG4 compatible stream (deprecated, use msmpeg4_version instead)*/ int h263_intel; /* use I263 intel h263 header */ - + int codec_id; /* see CODEC_ID_xxx */ int fixed_qscale; /* fixed qscale if non zero */ float qcompress; /* amount of qscale change between easy & hard scenes (0.0-1.0) */ @@ -177,7 +182,7 @@ typedef struct MpegEncContext { INT16 (*ac_val[3])[16]; /* used for for mpeg4 AC prediction, all 3 arrays must be continuous */ int ac_pred; int mb_skiped; /* MUST BE SET only during DECODING */ - UINT8 *mbskip_table; /* used to avoid copy if macroblock skipped (for black regions for example) + UINT8 *mbskip_table; /* used to avoid copy if macroblock skipped (for black regions for example) and used for b-frame encoding & decoding (contains skip table of next P Frame) */ UINT8 *mbintra_table; /* used to avoid setting {ac, dc, cbp}-pred stuff to zero on inter MB decoding */ UINT8 *cbp_table; /* used to store cbp, ac_pred for partitioned decoding */ @@ -189,6 +194,9 @@ typedef struct MpegEncContext { int input_pict_type; /* pict_type prior to reordering of frames */ int force_type; /* 0= no force, otherwise I_TYPE, P_TYPE, ... */ int qscale; /* QP */ + float frame_qscale; /* qscale from the frame level rc */ + int adaptive_quant; /* use adaptive quantization */ + int dquant; /* qscale difference to prev qscale */ int pict_type; /* I_TYPE, P_TYPE, B_TYPE, ... */ int last_pict_type; int last_non_b_pict_type; /* used for mpeg4 gmc b-frames & ratecontrol */ @@ -218,14 +226,14 @@ typedef struct MpegEncContext { int mv_dir; #define MV_DIR_BACKWARD 1 #define MV_DIR_FORWARD 2 -#define MV_DIRECT 4 /* bidirectional mode where the difference equals the MV of the last P/S/I-Frame (mpeg4) */ +#define MV_DIRECT 4 // bidirectional mode where the difference equals the MV of the last P/S/I-Frame (mpeg4) int mv_type; #define MV_TYPE_16X16 0 /* 1 vector for the whole mb */ #define MV_TYPE_8X8 1 /* 4 vectors (h263, mpeg4 4MV) */ -#define MV_TYPE_16X8 2 /* 2 vectors, one per 16x8 block */ -#define MV_TYPE_FIELD 3 /* 2 vectors, one per field */ +#define MV_TYPE_16X8 2 /* 2 vectors, one per 16x8 block */ +#define MV_TYPE_FIELD 3 /* 2 vectors, one per field */ #define MV_TYPE_DMV 4 /* 2 vectors, special mpeg2 Dual Prime Vectors */ - /* motion vectors for a macroblock + /* motion vectors for a macroblock first coordinate : 0 = forward 1 = backward second " : depend on type third " : 0 = x, 1 = y @@ -237,18 +245,19 @@ typedef struct MpegEncContext { UINT8 *fcode_tab; /* smallest fcode needed for each MV */ int has_b_frames; - int no_rounding; /* apply no rounding to motion compensation (MPEG4, msmpeg4, ...) + int no_rounding; /* apply no rounding to motion compensation (MPEG4, msmpeg4, ...) for b-frames rounding mode is allways 0 */ int hurry_up; /* when set to 1 during decoding, b frames will be skiped when set to 2 idct/dequant will be skipped too */ - + /* macroblock layer */ int mb_x, mb_y; int mb_incr; int mb_intra; UINT16 *mb_var; /* Table for MB variances */ UINT16 *mc_mb_var; /* Table for motion compensated MB variances */ + UINT8 *mb_mean; /* Table for MB luminance */ UINT8 *mb_type; /* Table for MB type */ #define MB_TYPE_INTRA 0x01 #define MB_TYPE_INTER 0x02 @@ -283,17 +292,28 @@ typedef struct MpegEncContext { UINT16 __align8 q_intra_matrix16_bias[32][64]; UINT16 __align8 q_inter_matrix16_bias[32][64]; int block_last_index[6]; /* last non zero coefficient in block */ + /* scantables */ + ScanTable intra_scantable; + ScanTable intra_h_scantable; + ScanTable intra_v_scantable; + ScanTable inter_scantable; // if inter == intra then intra should be used to reduce tha cache usage + UINT8 idct_permutation[64]; + int idct_permutation_type; +#define FF_NO_IDCT_PERM 1 +#define FF_LIBMPEG2_IDCT_PERM 2 +#define FF_SIMPLE_IDCT_PERM 3 +#define FF_TRANSPOSE_IDCT_PERM 4 void *opaque; /* private data for the user */ /* bit rate control */ - int I_frame_bits; /* FIXME used in mpeg12 ... */ + int I_frame_bits; //FIXME used in mpeg12 ... int mb_var_sum; /* sum of MB variance for current frame */ int mc_mb_var_sum; /* motion compensated MB variance for current frame */ INT64 wanted_bits; INT64 total_bits; int frame_bits; /* bits used for the current frame */ - RateControlContext rc_context; /* contains stuff only accessed in ratecontrol.c */ + RateControlContext rc_context; // contains stuff only accessed in ratecontrol.c /* statistics, used for 2-pass encoding */ int mv_bits; @@ -304,39 +324,43 @@ typedef struct MpegEncContext { int f_count; int b_count; int skip_count; - int misc_bits; /* cbp, mb_type */ - int last_bits; /* temp var used for calculating the above vars */ - + int misc_bits; // cbp, mb_type + int last_bits; //temp var used for calculating the above vars + /* error concealment / resync */ + UINT8 *error_status_table; /* table of the error status of each MB */ +#define VP_START 1 /* current MB is the first after a resync marker */ +#define AC_ERROR 2 +#define DC_ERROR 4 +#define MV_ERROR 8 +#define AC_END 16 +#define DC_END 32 +#define MV_END 64 +//FIXME some prefix? + int resync_mb_x; /* x position of last resync marker */ int resync_mb_y; /* y position of last resync marker */ - int mb_num_left; /* number of MBs left in this video packet */ - GetBitContext next_resync_gb; /* starts at the next resync marker */ - int next_resync_qscale; /* qscale of next resync marker */ - int next_resync_pos; /* bitstream position of next resync marker */ -#define DECODING_AC_LOST -1 -#define DECODING_ACDC_LOST -2 -#define DECODING_DESYNC -3 - int decoding_error; + GetBitContext last_resync_gb; /* used to serach for the next resync marker */ + int mb_num_left; /* number of MBs left in this video packet (for partitioned Slices only)*/ int next_p_frame_damaged; /* set if the next p frame is damaged, to avoid showing trashed b frames */ int error_resilience; /* H.263 specific */ int gob_number; int gob_index; - + /* H.263+ specific */ int umvplus; int umvplus_dec; int h263_aic; /* Advanded INTRA Coding (AIC) */ int h263_aic_dir; /* AIC direction: 0 = left, 1 = top */ - + /* mpeg4 specific */ int time_increment_resolution; int time_increment_bits; /* number of bits to represent the fractional part of time */ int last_time_base; int time_base; /* time in seconds of last I,P,S Frame */ - INT64 time; /* time of current frame */ + INT64 time; /* time of current frame */ INT64 last_non_b_time; UINT16 pp_time; /* time distance between the last 2 p,s,i frames */ UINT16 pb_time; /* time distance between the last b and p,s,i frame */ @@ -351,12 +375,12 @@ typedef struct MpegEncContext { int sprite_brightness_change; int num_sprite_warping_points; int real_sprite_warping_points; - int sprite_offset[2][2]; - int sprite_delta[2][2][2]; - int sprite_shift[2][2]; + int sprite_offset[2][2]; /* sprite offset[isChroma][isMVY] */ + int sprite_delta[2][2]; /* sprite_delta [isY][isMVY] */ + int sprite_shift[2]; /* sprite shift [isChroma] */ int mcsel; int quant_precision; - int quarter_sample; /* 1->qpel, 0->half pel ME/MC */ + int quarter_sample; /* 1->qpel, 0->half pel ME/MC */ int scalability; int hierachy_type; int enhancement_type; @@ -367,7 +391,8 @@ typedef struct MpegEncContext { int aspected_height; int sprite_warping_accuracy; int low_latency_sprite; - int data_partitioning; + int data_partitioning; /* data partitioning flag from header */ + int partitioned_frame; /* is current frame partitioned */ int rvlc; /* reversible vlc */ int resync_marker; /* could this stream contain resync markers*/ int low_delay; /* no reordering needed / has no b-frames */ @@ -376,7 +401,7 @@ typedef struct MpegEncContext { PutBitContext tex_pb; /* used for data partitioned VOPs */ PutBitContext pb2; /* used for data partitioned VOPs */ #define PB_BUFFER_SIZE 1024*256 - uint8_t *tex_pb_buffer; + uint8_t *tex_pb_buffer; uint8_t *pb2_buffer; int mpeg_quant; #define CO_LOCATED_TYPE_4MV 1 @@ -385,18 +410,25 @@ typedef struct MpegEncContext { INT16 (*field_mv_table)[2][2]; /* used for interlaced b frame decoding */ INT8 (*field_select_table)[2]; /* wtf, no really another table for interlaced b frames */ int t_frame; /* time distance of first I -> B, used for interlaced b frames */ + int padding_bug_score; /* used to detect the VERY common padding bug in MPEG4 */ /* divx specific, used to workaround (many) bugs in divx5 */ int divx_version; int divx_build; #define BITSTREAM_BUFFER_SIZE 1024*256 - UINT8 *bitstream_buffer; /* Divx 5.01 puts several frames in a single one, this is used to reorder them */ + UINT8 *bitstream_buffer; //Divx 5.01 puts several frames in a single one, this is used to reorder them int bitstream_buffer_size; - + + int xvid_build; + + /* lavc specific stuff, used to workaround bugs in libavcodec */ + int ffmpeg_version; + int lavc_build; + /* RV10 specific */ int rv10_version; /* RV10 version: 0 or 3 */ int rv10_first_dc_coded[3]; - + /* MJPEG specific */ struct MJpegContext *mjpeg_ctx; int mjpeg_vsample[3]; /* vertical sampling factors, default = {2, 1, 1} */ @@ -418,10 +450,6 @@ typedef struct MpegEncContext { int per_mb_rl_table; int esc3_level_length; int esc3_run_length; - UINT8 *inter_scantable; - UINT8 *intra_scantable; - UINT8 *intra_v_scantable; - UINT8 *intra_h_scantable; /* [mb_intra][isChroma][level][run][last] */ int (*ac_stats)[2][MAX_LEVEL+1][MAX_RUN+1][2]; int inter_intra_pred; @@ -453,7 +481,7 @@ typedef struct MpegEncContext { int interlaced_dct; int last_qscale; int first_slice; - + /* RTP specific */ /* These are explained on avcodec.h */ int rtp_mode; @@ -462,25 +490,35 @@ typedef struct MpegEncContext { UINT8 *ptr_lastgob; UINT8 *ptr_last_mb_line; UINT32 mb_line_avgsize; - + DCTELEM (*block)[64]; /* points to one of the following blocks */ - DCTELEM blocks[2][6][64] __align8; /* for HQ mode we need to keep the best block */ - void (*dct_unquantize_mpeg1)(struct MpegEncContext *s, + DCTELEM blocks[2][6][64] __align8; // for HQ mode we need to keep the best block + int (*decode_mb)(struct MpegEncContext *s, DCTELEM block[6][64]); // used by some codecs to avoid a switch() +#define SLICE_OK 0 +#define SLICE_ERROR -1 +#define SLICE_END -2 //end marker found +#define SLICE_NOEND -3 //no end marker or error found but mb count exceeded + + void (*dct_unquantize_mpeg1)(struct MpegEncContext *s, DCTELEM *block, int n, int qscale); - void (*dct_unquantize_mpeg2)(struct MpegEncContext *s, + void (*dct_unquantize_mpeg2)(struct MpegEncContext *s, DCTELEM *block, int n, int qscale); - void (*dct_unquantize_h263)(struct MpegEncContext *s, + void (*dct_unquantize_h263)(struct MpegEncContext *s, DCTELEM *block, int n, int qscale); - void (*dct_unquantize)(struct MpegEncContext *s, /* unquantizer to use (mpeg4 can use both) */ + void (*dct_unquantize)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both) DCTELEM *block, int n, int qscale); int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow); - void (*fdct)(DCTELEM *block); + void (*fdct)(DCTELEM *block/* align 16*/); + void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); + void (*idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); } MpegEncContext; + +int DCT_common_init(MpegEncContext *s); int MPV_common_init(MpegEncContext *s); void MPV_common_end(MpegEncContext *s); void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); -void MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx); +int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx); void MPV_frame_end(MpegEncContext *s); #ifdef HAVE_MMX void MPV_common_init_mmx(MpegEncContext *s); @@ -491,13 +529,38 @@ void MPV_common_init_axp(MpegEncContext *s); #ifdef HAVE_MLIB void MPV_common_init_mlib(MpegEncContext *s); #endif +#ifdef HAVE_MMI +void MPV_common_init_mmi(MpegEncContext *s); +#endif extern void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w); void ff_conceal_past_errors(MpegEncContext *s, int conceal_all); void ff_copy_bits(PutBitContext *pb, UINT8 *src, int length); void ff_clean_intra_table_entries(MpegEncContext *s); +void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable); +void ff_error_resilience(MpegEncContext *s); +void ff_draw_horiz_band(MpegEncContext *s); extern int ff_bit_exact; +static inline void ff_init_block_index(MpegEncContext *s){ + s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2; + s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1) + s->mb_x*2; + s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1 + s->mb_x*2; + s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2) + s->mb_x*2; + s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x; + s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x; +} + +static inline void ff_update_block_index(MpegEncContext *s){ + s->block_index[0]+=2; + s->block_index[1]+=2; + s->block_index[2]+=2; + s->block_index[3]+=2; + s->block_index[4]++; + s->block_index[5]++; +} + + /* motion_est.c */ void ff_estimate_p_frame_motion(MpegEncContext * s, int mb_x, int mb_y); @@ -507,9 +570,10 @@ int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type); void ff_fix_long_p_mvs(MpegEncContext * s); void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type); + /* mpeg12.c */ -extern INT16 ff_mpeg1_default_intra_matrix[64]; -extern INT16 ff_mpeg1_default_non_intra_matrix[64]; +extern const INT16 ff_mpeg1_default_intra_matrix[64]; +extern const INT16 ff_mpeg1_default_non_intra_matrix[64]; extern UINT8 ff_mpeg1_dc_scale_table[128]; void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number); @@ -518,6 +582,7 @@ void mpeg1_encode_mb(MpegEncContext *s, int motion_x, int motion_y); void ff_mpeg1_encode_init(MpegEncContext *s); + /* h263enc.c */ typedef struct RLTable { int n; /* number of entries of table_vlc minus 1 */ @@ -548,76 +613,79 @@ static inline int get_rl_index(const RLTable *rl, int last, int run, int level) extern UINT8 ff_mpeg4_y_dc_scale_table[32]; extern UINT8 ff_mpeg4_c_dc_scale_table[32]; -extern INT16 ff_mpeg4_default_intra_matrix[64]; -extern INT16 ff_mpeg4_default_non_intra_matrix[64]; - -void h263_encode_mb(MpegEncContext *s, +extern const INT16 ff_mpeg4_default_intra_matrix[64]; +extern const INT16 ff_mpeg4_default_non_intra_matrix[64]; +void h263_encode_mb(MpegEncContext *s, DCTELEM block[6][64], int motion_x, int motion_y); -void mpeg4_encode_mb(MpegEncContext *s, +void mpeg4_encode_mb(MpegEncContext *s, DCTELEM block[6][64], int motion_x, int motion_y); void h263_encode_picture_header(MpegEncContext *s, int picture_number); int h263_encode_gob_header(MpegEncContext * s, int mb_line); -INT16 *h263_pred_motion(MpegEncContext * s, int block, +INT16 *h263_pred_motion(MpegEncContext * s, int block, int *px, int *py); -void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, +void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, int dir); void ff_set_mpeg4_time(MpegEncContext * s, int picture_number); void mpeg4_encode_picture_header(MpegEncContext *s, int picture_number); void h263_encode_init(MpegEncContext *s); - void h263_decode_init_vlc(MpegEncContext *s); int h263_decode_picture_header(MpegEncContext *s); -int h263_decode_gob_header(MpegEncContext *s); -int mpeg4_decode_picture_header(MpegEncContext * s); +int ff_h263_decode_gob_header(MpegEncContext *s); +int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb); + + int intel_h263_decode_picture_header(MpegEncContext *s); -int h263_decode_mb(MpegEncContext *s, - DCTELEM block[6][64]); +int ff_h263_decode_mb(MpegEncContext *s, + DCTELEM block[6][64]); int h263_get_picture_format(int width, int height); -int ff_mpeg4_decode_video_packet_header(MpegEncContext *s); -int ff_mpeg4_resync(MpegEncContext *s); void ff_mpeg4_encode_video_packet_header(MpegEncContext *s); void ff_mpeg4_clean_buffers(MpegEncContext *s); void ff_mpeg4_stuffing(PutBitContext * pbc); void ff_mpeg4_init_partitions(MpegEncContext *s); void ff_mpeg4_merge_partitions(MpegEncContext *s); -extern inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr); +void ff_clean_mpeg4_qscales(MpegEncContext *s); +void ff_clean_h263_qscales(MpegEncContext *s); +int ff_mpeg4_decode_partitions(MpegEncContext *s); +int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s); +int ff_h263_resync(MpegEncContext *s); +int ff_h263_get_gob_height(MpegEncContext *s); + /* rv10.c */ void rv10_encode_picture_header(MpegEncContext *s, int picture_number); int rv_decode_dc(MpegEncContext *s, int n); + /* msmpeg4.c */ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number); void msmpeg4_encode_ext_header(MpegEncContext * s); -void msmpeg4_encode_mb(MpegEncContext * s, +void msmpeg4_encode_mb(MpegEncContext * s, DCTELEM block[6][64], int motion_x, int motion_y); int msmpeg4_decode_picture_header(MpegEncContext * s); int msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size); -int msmpeg4_decode_mb(MpegEncContext *s, - DCTELEM block[6][64]); int ff_msmpeg4_decode_init(MpegEncContext *s); void ff_msmpeg4_encode_init(MpegEncContext *s); -/* mjpegenc.c */ +/* mjpegenc.c */ int mjpeg_init(MpegEncContext *s); void mjpeg_close(MpegEncContext *s); -void mjpeg_encode_mb(MpegEncContext *s, +void mjpeg_encode_mb(MpegEncContext *s, DCTELEM block[6][64]); void mjpeg_picture_header(MpegEncContext *s); void mjpeg_picture_trailer(MpegEncContext *s); + /* rate control */ int ff_rate_control_init(MpegEncContext *s); -int ff_rate_estimate_qscale(MpegEncContext *s); -int ff_rate_estimate_qscale_pass2(MpegEncContext *s); +float ff_rate_estimate_qscale(MpegEncContext *s); void ff_write_pass1_stats(MpegEncContext *s); void ff_rate_control_uninit(MpegEncContext *s); double ff_eval(char *s, double *const_value, char **const_name, - double (**func1)(void *, double), char **func1_name, + double (**func1)(void *, double), char **func1_name, double (**func2)(void *, double, double), char **func2_name, void *opaque); diff --git a/src/libffmpeg/libavcodec/msmpeg4.c b/src/libffmpeg/libavcodec/msmpeg4.c index 6972ae806..81dc91fbb 100644 --- a/src/libffmpeg/libavcodec/msmpeg4.c +++ b/src/libffmpeg/libavcodec/msmpeg4.c @@ -61,7 +61,8 @@ static void msmpeg4v2_encode_motion(MpegEncContext * s, int val); static void init_h263_dc_for_msmpeg4(void); static inline void msmpeg4_memsetw(short *tab, int val, int n); static int get_size_of_code(MpegEncContext * s, RLTable *rl, int last, int run, int level, int intra); - +static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); +static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); extern UINT32 inverse[256]; @@ -73,7 +74,7 @@ int frame_count = 0; #include "msmpeg4data.h" -static int rl_length[2][NB_RL_TABLES][MAX_LEVEL+1][MAX_RUN+1][2]; +static UINT8 rl_length[NB_RL_TABLES][MAX_LEVEL+1][MAX_RUN+1][2]; #ifdef STATS @@ -164,32 +165,19 @@ static void common_init(MpegEncContext * s) break; } + if(s->msmpeg4_version==4){ - s->intra_scantable = wmv1_scantable[1]; - s->intra_h_scantable= wmv1_scantable[2]; - s->intra_v_scantable= wmv1_scantable[3]; - s->inter_scantable = wmv1_scantable[0]; - }else{ - s->intra_scantable = zigzag_direct; - s->intra_h_scantable= ff_alternate_horizontal_scan; - s->intra_v_scantable= ff_alternate_vertical_scan; - s->inter_scantable = zigzag_direct; + ff_init_scantable(s, &s->intra_scantable , wmv1_scantable[1]); + ff_init_scantable(s, &s->intra_h_scantable, wmv1_scantable[2]); + ff_init_scantable(s, &s->intra_v_scantable, wmv1_scantable[3]); + ff_init_scantable(s, &s->inter_scantable , wmv1_scantable[0]); } + //Note the default tables are set in common_init in mpegvideo.c if(!inited){ - int i; inited=1; init_h263_dc_for_msmpeg4(); - - /* permute for IDCT */ - for(i=0; iac_stats[1][1][level][run][last]; if(s->pict_type==I_TYPE){ - size += intra_luma_count *rl_length[1][i ][level][run][last]; - chroma_size+= intra_chroma_count*rl_length[1][i+3][level][run][last]; + size += intra_luma_count *rl_length[i ][level][run][last]; + chroma_size+= intra_chroma_count*rl_length[i+3][level][run][last]; }else{ - size+= intra_luma_count *rl_length[1][i ][level][run][last] - +intra_chroma_count*rl_length[1][i+3][level][run][last] - +inter_count *rl_length[0][i+3][level][run][last]; + size+= intra_luma_count *rl_length[i ][level][run][last] + +intra_chroma_count*rl_length[i+3][level][run][last] + +inter_count *rl_length[i+3][level][run][last]; } } if(last_size == size+chroma_size) break; @@ -381,7 +368,8 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number) s->mv_table_index = 1; /* only if P frame */ s->use_skip_mb_code = 1; /* only if P frame */ s->per_mb_rl_table = 0; - s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE && s->pict_type==P_TYPE); + if(s->msmpeg4_version==4) + s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE && s->pict_type==P_TYPE); if (s->pict_type == I_TYPE) { s->no_rounding = 1; @@ -517,26 +505,7 @@ static inline void handle_slices(MpegEncContext *s){ if (s->mb_x == 0) { if (s->slice_height && (s->mb_y % s->slice_height) == 0) { if(s->msmpeg4_version != 4){ - int wrap; - /* reset DC pred (set previous line to 1024) */ - wrap = 2 * s->mb_width + 2; - msmpeg4_memsetw(&s->dc_val[0][(1) + (2 * s->mb_y) * wrap], - 1024, 2 * s->mb_width); - wrap = s->mb_width + 2; - msmpeg4_memsetw(&s->dc_val[1][(1) + (s->mb_y) * wrap], - 1024, s->mb_width); - msmpeg4_memsetw(&s->dc_val[2][(1) + (s->mb_y) * wrap], - 1024, s->mb_width); - - /* reset AC pred (set previous line to 0) */ - wrap = s->mb_width * 2 + 2; - msmpeg4_memsetw(s->ac_val[0][0] + (1 + (2 * s->mb_y) * wrap)*16, - 0, 2 * s->mb_width*16); - wrap = s->mb_width + 2; - msmpeg4_memsetw(s->ac_val[1][0] + (1 + (s->mb_y) * wrap)*16, - 0, s->mb_width*16); - msmpeg4_memsetw(s->ac_val[2][0] + (1 + (s->mb_y) * wrap)*16, - 0, s->mb_width*16); + ff_mpeg4_clean_buffers(s); } s->first_slice_line = 1; } else { @@ -723,6 +692,10 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n, a = dc_val[ - 1]; b = dc_val[ - 1 - wrap]; c = dc_val[ - wrap]; + + if(s->first_slice_line && (n&2)==0 && s->msmpeg4_version!=4){ + b=c=1024; + } /* XXX: the following solution consumes divisions, but it does not necessitate to modify mpegvideo.c. The problem comes from the @@ -936,7 +909,7 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int rl = &rl_table[3 + s->rl_chroma_table_index]; } run_diff = 0; - scantable= s->intra_scantable; + scantable= s->intra_scantable.permutated; set_stat(ST_INTRA_AC); } else { i = 0; @@ -945,15 +918,16 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int run_diff = 0; else run_diff = 1; - scantable= s->inter_scantable; + scantable= s->inter_scantable.permutated; set_stat(ST_INTER_AC); } /* recalculate block_last_index for M$ wmv1 */ - if(scantable!=zigzag_direct && s->block_last_index[n]>0){ + if(s->msmpeg4_version==4 && s->block_last_index[n]>0){ for(last_index=63; last_index>=0; last_index--){ if(block[scantable[last_index]]) break; } + s->block_last_index[n]= last_index; }else last_index = s->block_last_index[n]; /* AC coefs */ @@ -1183,6 +1157,20 @@ int ff_msmpeg4_decode_init(MpegEncContext *s) &table_inter_intra[0][1], 2, 1, &table_inter_intra[0][0], 2, 1); } + + switch(s->msmpeg4_version){ + case 1: + case 2: + s->decode_mb= msmpeg4v12_decode_mb; + break; + case 3: + case 4: + s->decode_mb= msmpeg4v34_decode_mb; + break; + } + + s->slice_height= s->mb_height; //to avoid 1/0 if the first frame isnt a keyframe + return 0; } @@ -1457,11 +1445,12 @@ static int msmpeg4v2_decode_motion(MpegEncContext * s, int pred, int f_code) return val; } - -static int msmpeg4v12_decode_mb(MpegEncContext *s, - DCTELEM block[6][64]) +static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) { int cbp, code, i; + + s->error_status_table[s->mb_x + s->mb_y*s->mb_width]= 0; + if (s->pict_type == P_TYPE) { if (s->use_skip_mb_code) { if (get_bits1(&s->gb)) { @@ -1543,8 +1532,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, return 0; } -int msmpeg4_decode_mb(MpegEncContext *s, - DCTELEM block[6][64]) +static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) { int cbp, code, i; UINT8 *coded_val; @@ -1555,10 +1543,8 @@ if(s->mb_x==0){ if(s->mb_y==0) printf("\n"); } #endif - /* special slice handling */ - handle_slices(s); - if(s->msmpeg4_version<=2) return msmpeg4v12_decode_mb(s, block); //FIXME export function & call from outside perhaps + s->error_status_table[s->mb_x + s->mb_y*s->mb_width]= 0; if (s->pict_type == P_TYPE) { set_stat(ST_INTER_MB); @@ -1704,11 +1690,11 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, } if (s->ac_pred) { if (dc_pred_dir == 0) - scan_table = s->intra_v_scantable; /* left */ + scan_table = s->intra_v_scantable.permutated; /* left */ else - scan_table = s->intra_h_scantable; /* top */ + scan_table = s->intra_h_scantable.permutated; /* top */ } else { - scan_table = s->intra_scantable; + scan_table = s->intra_scantable.permutated; } set_stat(ST_INTRA_AC); rl_vlc= rl->rl_vlc[0]; @@ -1727,7 +1713,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, s->block_last_index[n] = i; return 0; } - scan_table = s->inter_scantable; + scan_table = s->inter_scantable.permutated; set_stat(ST_INTER_AC); rl_vlc= rl->rl_vlc[s->qscale]; } @@ -1879,7 +1865,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, i-= 192; if(i&(~63)){ const int left= s->gb.size*8 - get_bits_count(&s->gb); - if(((i+192 == 64 && level/qmul==-1) || s->error_resilience<0) && left>=0){ + if(((i+192 == 64 && level/qmul==-1) || s->error_resilience<=1) && left>=0){ fprintf(stderr, "ignoring overflow at %d %d\n", s->mb_x, s->mb_y); break; }else{ diff --git a/src/libffmpeg/libavcodec/msmpeg4data.h b/src/libffmpeg/libavcodec/msmpeg4data.h index cab8f04dd..93a72c54c 100644 --- a/src/libffmpeg/libavcodec/msmpeg4data.h +++ b/src/libffmpeg/libavcodec/msmpeg4data.h @@ -1819,7 +1819,7 @@ static UINT8 old_ff_c_dc_scale_table[32]={ #define WMV1_SCANTABLE_COUNT 4 -static UINT8 wmv1_scantable00[64]= { +static const UINT8 wmv1_scantable00[64]= { 0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11, 0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28, 0x30, 0x38, 0x29, 0x21, 0x1A, 0x13, 0x0C, 0x05, @@ -1829,7 +1829,7 @@ static UINT8 wmv1_scantable00[64]= { 0x2C, 0x25, 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x35, 0x3D, 0x3E, 0x36, 0x2E, 0x27, 0x2F, 0x37, 0x3F, }; -static UINT8 wmv1_scantable01[64]= { +static const UINT8 wmv1_scantable01[64]= { 0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11, 0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28, 0x21, 0x30, 0x1A, 0x13, 0x0C, 0x05, 0x06, 0x0D, @@ -1839,7 +1839,7 @@ static UINT8 wmv1_scantable01[64]= { 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3C, 0x35, 0x3D, 0x2E, 0x27, 0x2F, 0x36, 0x3E, 0x37, 0x3F, }; -static UINT8 wmv1_scantable02[64]= { +static const UINT8 wmv1_scantable02[64]= { 0x00, 0x01, 0x08, 0x02, 0x03, 0x09, 0x10, 0x18, 0x11, 0x0A, 0x04, 0x05, 0x0B, 0x12, 0x19, 0x20, 0x28, 0x30, 0x21, 0x1A, 0x13, 0x0C, 0x06, 0x07, @@ -1849,7 +1849,7 @@ static UINT8 wmv1_scantable02[64]= { 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3B, 0x3C, 0x35, 0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, }; -static UINT8 wmv1_scantable03[64]= { +static const UINT8 wmv1_scantable03[64]= { 0x00, 0x08, 0x10, 0x01, 0x18, 0x20, 0x28, 0x09, 0x02, 0x03, 0x0A, 0x11, 0x19, 0x30, 0x38, 0x29, 0x21, 0x1A, 0x12, 0x0B, 0x04, 0x05, 0x0C, 0x13, @@ -1860,7 +1860,7 @@ static UINT8 wmv1_scantable03[64]= { 0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, }; -static UINT8 *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={ +static const UINT8 *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={ wmv1_scantable00, wmv1_scantable01, wmv1_scantable02, diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c index 1311cc61b..d9c8ba86e 100644 --- a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c +++ b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c @@ -1,4 +1,3 @@ -#include "../../config.h" #include "../dsputil.h" #ifdef HAVE_ALTIVEC diff --git a/src/libffmpeg/libavcodec/ratecontrol.c b/src/libffmpeg/libavcodec/ratecontrol.c index 77af3c93c..99e46dbad 100644 --- a/src/libffmpeg/libavcodec/ratecontrol.c +++ b/src/libffmpeg/libavcodec/ratecontrol.c @@ -38,9 +38,9 @@ static int init_pass2(MpegEncContext *s); static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_factor, int frame_num); void ff_write_pass1_stats(MpegEncContext *s){ - sprintf(s->avctx->stats_out, "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d;\n", + sprintf(s->avctx->stats_out, "in:%d out:%d type:%d q:%f itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d;\n", s->picture_number, s->input_picture_number - s->max_b_frames, s->pict_type, - s->qscale, s->i_tex_bits, s->p_tex_bits, s->mv_bits, s->misc_bits, + s->frame_qscale, s->i_tex_bits, s->p_tex_bits, s->mv_bits, s->misc_bits, s->f_code, s->b_code, s->mc_mb_var_sum, s->mb_var_sum, s->i_count); } @@ -64,9 +64,6 @@ int ff_rate_control_init(MpegEncContext *s) } rcc->buffer_index= s->avctx->rc_buffer_size/2; - rcc->next_non_b_qscale=10; - rcc->next_p_qscale=10; - if(s->flags&CODEC_FLAG_PASS2){ int i; char *p; @@ -108,7 +105,7 @@ int ff_rate_control_init(MpegEncContext *s) assert(picture_number < rcc->num_entries); rce= &rcc->entry[picture_number]; - e+=sscanf(p, " in:%*d out:%*d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d", + e+=sscanf(p, " in:%*d out:%*d type:%d q:%f itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d", &rce->pict_type, &rce->qscale, &rce->i_tex_bits, &rce->p_tex_bits, &rce->mv_bits, &rce->misc_bits, &rce->f_code, &rce->b_code, &rce->mc_mb_var_sum, &rce->mb_var_sum, &rce->i_count); if(e!=12){ @@ -126,7 +123,7 @@ int ff_rate_control_init(MpegEncContext *s) rcc->short_term_qsum=0.001; rcc->short_term_qcount=0.001; - rcc->pass1_bits =0.001; + rcc->pass1_rc_eq_output_sum= 0.001; rcc->pass1_wanted_bits=0.001; /* init stuff with the user specified complexity */ @@ -166,7 +163,7 @@ int ff_rate_control_init(MpegEncContext *s) bits= rce.i_tex_bits + rce.p_tex_bits; - q= get_qscale(s, &rce, rcc->pass1_wanted_bits/rcc->pass1_bits, i); + q= get_qscale(s, &rce, rcc->pass1_wanted_bits/rcc->pass1_rc_eq_output_sum, i); rcc->pass1_wanted_bits+= s->bit_rate/(s->frame_rate / (double)FRAME_RATE_BASE); } } @@ -231,7 +228,6 @@ static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_f const int pict_type= rce->new_pict_type; const double mb_num= s->mb_num; int i; - const double last_q= rcc->last_qscale_for[pict_type]; double const_values[]={ M_PI, @@ -288,8 +284,8 @@ static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_f NULL }; static double (*func1[])(void *, double)={ - bits2qp, - qp2bits, + (void *)bits2qp, + (void *)qp2bits, NULL }; char *func1_names[]={ @@ -300,7 +296,7 @@ static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_f bits= ff_eval(s->avctx->rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce); - rcc->pass1_bits+= bits; + rcc->pass1_rc_eq_output_sum+= bits; bits*=rate_factor; if(bits<0.0) bits=0.0; bits+= 1.0; //avoid 1/0 issues @@ -324,13 +320,34 @@ static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_f q= -q*s->avctx->i_quant_factor + s->avctx->i_quant_offset; else if(pict_type==B_TYPE && s->avctx->b_quant_factor<0.0) q= -q*s->avctx->b_quant_factor + s->avctx->b_quant_offset; - + + return q; +} + +static double get_diff_limited_q(MpegEncContext *s, RateControlEntry *rce, double q){ + RateControlContext *rcc= &s->rc_context; + AVCodecContext *a= s->avctx; + const int pict_type= rce->new_pict_type; + const double last_p_q = rcc->last_qscale_for[P_TYPE]; + const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type]; + + if (pict_type==I_TYPE && (a->i_quant_factor>0.0 || rcc->last_non_b_pict_type==P_TYPE)) + q= last_p_q *ABS(a->i_quant_factor) + a->i_quant_offset; + else if(pict_type==B_TYPE && a->b_quant_factor>0.0) + q= last_non_b_q* a->b_quant_factor + a->b_quant_offset; + /* last qscale / qdiff stuff */ - if (q > last_q + s->max_qdiff) q= last_q + s->max_qdiff; - else if(q < last_q - s->max_qdiff) q= last_q - s->max_qdiff; + if(rcc->last_non_b_pict_type==pict_type || pict_type!=I_TYPE){ + double last_q= rcc->last_qscale_for[pict_type]; + if (q > last_q + a->max_qdiff) q= last_q + a->max_qdiff; + else if(q < last_q - a->max_qdiff) q= last_q - a->max_qdiff; + } rcc->last_qscale_for[pict_type]= q; //Note we cant do that after blurring + if(pict_type!=B_TYPE) + rcc->last_non_b_pict_type= pict_type; + return q; } @@ -377,24 +394,30 @@ static double modify_qscale(MpegEncContext *s, RateControlEntry *rce, double q, q*= s->avctx->rc_qmod_amp; bits= qp2bits(rce, q); - +//printf("q:%f\n", q); /* buffer overflow/underflow protection */ if(buffer_size){ - double expected_size= rcc->buffer_index - bits; + double expected_size= rcc->buffer_index; if(min_rate){ - double d= 2*(buffer_size - (expected_size + min_rate))/buffer_size; + double d= 2*(buffer_size - expected_size)/buffer_size; if(d>1.0) d=1.0; - q/= pow(d, 1.0/s->avctx->rc_buffer_aggressivity); + else if(d<0.0001) d=0.0001; + q*= pow(d, 1.0/s->avctx->rc_buffer_aggressivity); + + q= MIN(q, bits2qp(rce, MAX((min_rate - buffer_size + rcc->buffer_index)*2, 1))); } if(max_rate){ double d= 2*expected_size/buffer_size; if(d>1.0) d=1.0; - q*= pow(d, 1.0/s->avctx->rc_buffer_aggressivity); + else if(d<0.0001) d=0.0001; + q/= pow(d, 1.0/s->avctx->rc_buffer_aggressivity); + + q= MAX(q, bits2qp(rce, MAX(rcc->buffer_index/2, 1))); } } - +//printf("q:%f max:%f min:%f size:%f index:%d bits:%f agr:%f\n", q,max_rate, min_rate, buffer_size, rcc->buffer_index, bits, s->avctx->rc_buffer_aggressivity); if(s->avctx->rc_qsquish==0.0 || qmin==qmax){ if (qqmax) q=qmax; @@ -410,7 +433,7 @@ static double modify_qscale(MpegEncContext *s, RateControlEntry *rce, double q, q= exp(q); } - + return q; } @@ -439,10 +462,94 @@ static void update_predictor(Predictor *p, double q, double var, double size) p->coeff+= new_coeff; } -int ff_rate_estimate_qscale(MpegEncContext *s) +static void adaptive_quantization(MpegEncContext *s, double q){ + int i; + const float lumi_masking= s->avctx->lumi_masking / (128.0*128.0); + const float dark_masking= s->avctx->dark_masking / (128.0*128.0); + const float temp_cplx_masking= s->avctx->temporal_cplx_masking; + const float spatial_cplx_masking = s->avctx->spatial_cplx_masking; + const float p_masking = s->avctx->p_masking; + float bits_sum= 0.0; + float cplx_sum= 0.0; + float cplx_tab[s->mb_num]; + float bits_tab[s->mb_num]; + const int qmin= 2; //s->avctx->mb_qmin; + const int qmax= 31; //s->avctx->mb_qmax; + + for(i=0; imb_num; i++){ + float temp_cplx= sqrt(s->mc_mb_var[i]); + float spat_cplx= sqrt(s->mb_var[i]); + const int lumi= s->mb_mean[i]; + float bits, cplx, factor; + + if(spat_cplx < q/3) spat_cplx= q/3; //FIXME finetune + if(temp_cplx < q/3) temp_cplx= q/3; //FIXME finetune + + if((s->mb_type[i]&MB_TYPE_INTRA)){//FIXME hq mode + cplx= spat_cplx; + factor= 1.0 + p_masking; + }else{ + cplx= temp_cplx; + factor= pow(temp_cplx, - temp_cplx_masking); + } + factor*=pow(spat_cplx, - spatial_cplx_masking); + + if(lumi>127) + factor*= (1.0 - (lumi-128)*(lumi-128)*lumi_masking); + else + factor*= (1.0 - (lumi-128)*(lumi-128)*dark_masking); + + if(factor<0.00001) factor= 0.00001; + + bits= cplx*factor; + cplx_sum+= cplx; + bits_sum+= bits; + cplx_tab[i]= cplx; + bits_tab[i]= bits; + } + + /* handle qmin/qmax cliping */ + if(s->flags&CODEC_FLAG_NORMALIZE_AQP){ + for(i=0; imb_num; i++){ + float newq= q*cplx_tab[i]/bits_tab[i]; + newq*= bits_sum/cplx_sum; + + if (newq > qmax){ + bits_sum -= bits_tab[i]; + cplx_sum -= cplx_tab[i]*q/qmax; + } + else if(newq < qmin){ + bits_sum -= bits_tab[i]; + cplx_sum -= cplx_tab[i]*q/qmin; + } + } + } + + for(i=0; imb_num; i++){ + float newq= q*cplx_tab[i]/bits_tab[i]; + int intq; + + if(s->flags&CODEC_FLAG_NORMALIZE_AQP){ + newq*= bits_sum/cplx_sum; + } + + if(i && ABS(s->qscale_table[i-1] - newq)<0.75) + intq= s->qscale_table[i-1]; + else + intq= (int)(newq + 0.5); + + if (intq > qmax) intq= qmax; + else if(intq < qmin) intq= qmin; +//if(i%s->mb_width==0) printf("\n"); +//printf("%2d%3d ", intq, ff_sqrt(s->mc_mb_var[i])); + s->qscale_table[i]= intq; + } +} + +float ff_rate_estimate_qscale(MpegEncContext *s) { float q; - int qscale, qmin, qmax; + int qmin, qmax; float br_compensation; double diff; double short_term_q; @@ -460,7 +567,7 @@ int ff_rate_estimate_qscale(MpegEncContext *s) get_qminmax(&qmin, &qmax, s, pict_type); fps= (double)s->frame_rate / FRAME_RATE_BASE; -//printf("input_picture_number:%d picture_number:%d\n", s->input_picture_number, s->picture_number); +//printf("input_pic_num:%d pic_num:%d frame_rate:%d\n", s->input_picture_number, s->picture_number, s->frame_rate); /* update predictors */ if(picture_number>2){ const int last_var= s->last_pict_type == I_TYPE ? rcc->last_mb_var_sum : rcc->last_mc_mb_var_sum; @@ -521,16 +628,13 @@ int ff_rate_estimate_qscale(MpegEncContext *s) rcc->frame_count[pict_type] ++; bits= rce->i_tex_bits + rce->p_tex_bits; - rate_factor= rcc->pass1_wanted_bits/rcc->pass1_bits * br_compensation; + rate_factor= rcc->pass1_wanted_bits/rcc->pass1_rc_eq_output_sum * br_compensation; q= get_qscale(s, rce, rate_factor, picture_number); assert(q>0.0); //printf("%f ", q); - if (pict_type==I_TYPE && s->avctx->i_quant_factor>0.0) - q= rcc->next_p_qscale*s->avctx->i_quant_factor + s->avctx->i_quant_offset; - else if(pict_type==B_TYPE && s->avctx->b_quant_factor>0.0) - q= rcc->next_non_b_qscale*s->avctx->b_quant_factor + s->avctx->b_quant_offset; + q= get_diff_limited_q(s, rce, q); //printf("%f ", q); assert(q>0.0); @@ -544,14 +648,13 @@ int ff_rate_estimate_qscale(MpegEncContext *s) q= short_term_q= rcc->short_term_qsum/rcc->short_term_qcount; //printf("%f ", q); } + assert(q>0.0); + q= modify_qscale(s, rce, q, picture_number); rcc->pass1_wanted_bits+= s->bit_rate/fps; assert(q>0.0); - - if(pict_type != B_TYPE) rcc->next_non_b_qscale= q; - if(pict_type == P_TYPE) rcc->next_p_qscale= q; } //printf("qmin:%d, qmax:%d, q:%f\n", qmin, qmax, q); @@ -561,19 +664,21 @@ int ff_rate_estimate_qscale(MpegEncContext *s) // printf("%f %d %d %d\n", q, picture_number, (int)wanted_bits, (int)s->total_bits); - //printf("%f %f %f\n", q, br_compensation, short_term_q); - qscale= (int)(q + 0.5); -//printf("%d ", qscale); - -//printf("q:%d diff:%d comp:%f rate_q:%d st_q:%f fvar:%d last_size:%d\n", qscale, (int)diff, br_compensation, -// rate_q, short_term_q, s->mc_mb_var, s->frame_bits); + +//printf("q:%d diff:%d comp:%f st_q:%f last_size:%d type:%d\n", qscale, (int)diff, br_compensation, +// short_term_q, s->frame_bits, pict_type); //printf("%d %d\n", s->bit_rate, (int)fps); - rcc->last_qscale= qscale; + if(s->adaptive_quant) + adaptive_quantization(s, q); + else + q= (int)(q + 0.5); + + rcc->last_qscale= q; rcc->last_mc_mb_var_sum= s->mc_mb_var_sum; rcc->last_mb_var_sum= s->mb_var_sum; - return qscale; + return q; } //---------------------------------------------- @@ -689,21 +794,10 @@ static int init_pass2(MpegEncContext *s) assert(filter_size%2==1); /* fixed I/B QP relative to P mode */ - rcc->next_non_b_qscale= 10; - rcc->next_p_qscale= 10; for(i=rcc->num_entries-1; i>=0; i--){ RateControlEntry *rce= &rcc->entry[i]; - const int pict_type= rce->new_pict_type; - - if (pict_type==I_TYPE && s->avctx->i_quant_factor>0.0) - qscale[i]= rcc->next_p_qscale*s->avctx->i_quant_factor + s->avctx->i_quant_offset; - else if(pict_type==B_TYPE && s->avctx->b_quant_factor>0.0) - qscale[i]= rcc->next_non_b_qscale*s->avctx->b_quant_factor + s->avctx->b_quant_offset; - - if(pict_type!=B_TYPE) - rcc->next_non_b_qscale= qscale[i]; - if(pict_type==P_TYPE) - rcc->next_p_qscale= qscale[i]; + + qscale[i]= get_diff_limited_q(s, rce, qscale[i]); } /* smooth curve */ diff --git a/src/libffmpeg/libavcodec/rv10.c b/src/libffmpeg/libavcodec/rv10.c index 72a412eb5..5932126c8 100644 --- a/src/libffmpeg/libavcodec/rv10.c +++ b/src/libffmpeg/libavcodec/rv10.c @@ -223,18 +223,10 @@ int rv_decode_dc(MpegEncContext *s, int n) /* write RV 1.0 compatible frame header */ void rv10_encode_picture_header(MpegEncContext *s, int picture_number) { - int full_frame= 1; + int full_frame= 0; align_put_bits(&s->pb); - if(full_frame){ - put_bits(&s->pb, 8, 0xc0); /* packet header */ - put_bits(&s->pb, 16, 0x4000); /* len */ - put_bits(&s->pb, 16, 0x4000); /* pos */ - } - - put_bits(&s->pb, 8, picture_number&0xFF); - put_bits(&s->pb, 1, 1); /* marker */ put_bits(&s->pb, 1, (s->pict_type == P_TYPE)); @@ -273,35 +265,17 @@ static int get_num(GetBitContext *gb) /* read RV 1.0 compatible frame header */ static int rv10_decode_picture_header(MpegEncContext *s) { - int mb_count, pb_frame, marker, h, full_frame; - int pic_num, unk; + int mb_count, pb_frame, marker, full_frame, unk; - /* skip packet header */ - h = get_bits(&s->gb, 8); - if ((h & 0xc0) == 0xc0) { - int len, pos; - full_frame = 1; - len = get_num(&s->gb); - pos = get_num(&s->gb); -//printf("pos:%d\n",len); - } else { - int seq, frame_size, pos; - full_frame = 0; - seq = get_bits(&s->gb, 8); - frame_size = get_num(&s->gb); - pos = get_num(&s->gb); -//printf("seq:%d, size:%d, pos:%d\n",seq,frame_size,pos); - } - /* picture number */ - pic_num= get_bits(&s->gb, 8); - + full_frame= s->avctx->slice_count==1; +//printf("ff:%d\n", full_frame); marker = get_bits(&s->gb, 1); if (get_bits(&s->gb, 1)) s->pict_type = P_TYPE; else s->pict_type = I_TYPE; -//printf("h:%d ver:%d\n",h,s->rv10_version); +//printf("h:%X ver:%d\n",h,s->rv10_version); if(!marker) printf("marker missing\n"); pb_frame = get_bits(&s->gb, 1); @@ -336,7 +310,7 @@ static int rv10_decode_picture_header(MpegEncContext *s) } /* if multiple packets per frame are sent, the position at which to display the macro blocks is coded here */ - if (!full_frame) { + if ((!full_frame) || show_bits(&s->gb, 12)==0) { s->mb_x = get_bits(&s->gb, 6); /* mb_x */ s->mb_y = get_bits(&s->gb, 6); /* mb_y */ mb_count = get_bits(&s->gb, 12); @@ -365,28 +339,23 @@ static int rv10_decode_init(AVCodecContext *avctx) s->height = avctx->height; s->h263_rv10 = 1; - if(avctx->extradata_size >= 8){ - switch(((uint32_t*)avctx->extradata)[1]){ - case 0x10000000: - s->rv10_version= 0; - s->h263_long_vectors=0; - break; - case 0x10003000: - s->rv10_version= 3; - s->h263_long_vectors=1; - break; - case 0x10003001: - s->rv10_version= 3; - s->h263_long_vectors=0; - break; - default: - fprintf(stderr, "unknown header %X\n", ((uint32_t*)avctx->extradata)[1]); - } - }else{ - // for backward compatibility - s->rv10_version= avctx->sub_id; + switch(avctx->sub_id){ + case 0x10000000: + s->rv10_version= 0; + s->h263_long_vectors=0; + break; + case 0x10003000: + s->rv10_version= 3; + s->h263_long_vectors=1; + break; + case 0x10003001: + s->rv10_version= 3; + s->h263_long_vectors=0; + break; + default: + fprintf(stderr, "unknown header %X\n", avctx->sub_id); } - +//printf("ver:%X\n", avctx->sub_id); s->flags= avctx->flags; if (MPV_common_init(s) < 0) @@ -396,6 +365,7 @@ static int rv10_decode_init(AVCodecContext *avctx) s->y_dc_scale_table= s->c_dc_scale_table= ff_mpeg1_dc_scale_table; + s->progressive_sequence=1; /* init rv vlc */ if (!done) { @@ -419,27 +389,14 @@ static int rv10_decode_end(AVCodecContext *avctx) return 0; } -static int rv10_decode_frame(AVCodecContext *avctx, - void *data, int *data_size, +static int rv10_decode_packet(AVCodecContext *avctx, UINT8 *buf, int buf_size) { MpegEncContext *s = avctx->priv_data; int i, mb_count, mb_pos, left; - DCTELEM block[6][64]; - AVPicture *pict = data; - -#ifdef DEBUG - printf("*****frame %d size=%d\n", avctx->frame_number, buf_size); -#endif - - /* no supplementary picture */ - if (buf_size == 0) { - *data_size = 0; - return 0; - } init_get_bits(&s->gb, buf, buf_size); - + mb_count = rv10_decode_picture_header(s); if (mb_count < 0) { fprintf(stderr, "HEADER ERROR\n"); @@ -459,7 +416,8 @@ static int rv10_decode_frame(AVCodecContext *avctx, } if (s->mb_x == 0 && s->mb_y == 0) { - MPV_frame_start(s, avctx); + if(MPV_frame_start(s, avctx) < 0) + return -1; } #ifdef DEBUG @@ -473,52 +431,78 @@ static int rv10_decode_frame(AVCodecContext *avctx, s->rv10_first_dc_coded[1] = 0; s->rv10_first_dc_coded[2] = 0; + if(s->mb_y==0) s->first_slice_line=1; + s->block_wrap[0]= s->block_wrap[1]= s->block_wrap[2]= s->block_wrap[3]= s->mb_width*2 + 2; s->block_wrap[4]= s->block_wrap[5]= s->mb_width + 2; - s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2; - s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1) + s->mb_x*2; - s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1 + s->mb_x*2; - s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2) + s->mb_x*2; - s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x; - s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x; + ff_init_block_index(s); /* decode each macroblock */ for(i=0;iblock_index[0]+=2; - s->block_index[1]+=2; - s->block_index[2]+=2; - s->block_index[3]+=2; - s->block_index[4]++; - s->block_index[5]++; + ff_update_block_index(s); #ifdef DEBUG printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); #endif - memset(block, 0, sizeof(block)); + clear_blocks(s->block[0]); s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_16X16; - if (h263_decode_mb(s, block) < 0) { + if (ff_h263_decode_mb(s, s->block) == SLICE_ERROR) { fprintf(stderr, "ERROR at MB %d %d\n", s->mb_x, s->mb_y); return -1; } - MPV_decode_mb(s, block); + MPV_decode_mb(s, s->block); if (++s->mb_x == s->mb_width) { s->mb_x = 0; s->mb_y++; - s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1; - s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1); - s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1; - s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2); - s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2); - s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2); + ff_init_block_index(s); + s->first_slice_line=0; + } + } + + return buf_size; +} + +static int rv10_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + UINT8 *buf, int buf_size) +{ + MpegEncContext *s = avctx->priv_data; + int i; + AVPicture *pict = data; + +#ifdef DEBUG + printf("*****frame %d size=%d\n", avctx->frame_number, buf_size); +#endif + + /* no supplementary picture */ + if (buf_size == 0) { + *data_size = 0; + return 0; + } + + if(avctx->slice_count){ + for(i=0; islice_count; i++){ + int offset= avctx->slice_offset[i]; + int size; + + if(i+1 == avctx->slice_count) + size= buf_size - offset; + else + size= avctx->slice_offset[i+1] - offset; + + if( rv10_decode_packet(avctx, buf+offset, size) < 0 ) + return -1; } + }else{ + if( rv10_decode_packet(avctx, buf, buf_size) < 0 ) + return -1; } - if (s->mb_x == 0 && - s->mb_y == s->mb_height) { + if(s->mb_y>=s->mb_height){ MPV_frame_end(s); pict->data[0] = s->current_picture[0]; @@ -527,12 +511,13 @@ static int rv10_decode_frame(AVCodecContext *avctx, pict->linesize[0] = s->linesize; pict->linesize[1] = s->uvlinesize; pict->linesize[2] = s->uvlinesize; - + avctx->quality = s->qscale; *data_size = sizeof(AVPicture); - } else { + }else{ *data_size = 0; } + return buf_size; } diff --git a/src/libffmpeg/libavcodec/simple_idct.c b/src/libffmpeg/libavcodec/simple_idct.c index ccebd67a9..ad27ac594 100644 --- a/src/libffmpeg/libavcodec/simple_idct.c +++ b/src/libffmpeg/libavcodec/simple_idct.c @@ -25,8 +25,6 @@ #include "dsputil.h" #include "simple_idct.h" -//#define ARCH_ALPHA - #if 0 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ @@ -49,10 +47,6 @@ #define COL_SHIFT 20 // 6 #endif -#ifdef ARCH_ALPHA -#define FAST_64BIT -#endif - #if defined(ARCH_POWERPC_405) /* signed 16x16 -> 32 multiply add accumulate */ @@ -73,180 +67,6 @@ #endif -#ifdef ARCH_ALPHA -/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */ -static inline int idctRowCondDC(int16_t *row) -{ - int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; - uint64_t *lrow = (uint64_t *) row; - - if (lrow[1] == 0) { - if (lrow[0] == 0) - return 0; - if ((lrow[0] & ~0xffffULL) == 0) { - uint64_t v; -#if 1 //is ok if |a0| < 1024 than theres an +-1 error (for the *W4 case for W4=16383 !!!) - a0 = row[0]<<3; -#else - a0 = W4 * row[0]; - a0 += 1 << (ROW_SHIFT - 1); - a0 >>= ROW_SHIFT; -#endif - v = (uint16_t) a0; - v += v << 16; - v += v << 32; - lrow[0] = v; - lrow[1] = v; - - return 1; - } - } - - a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); - a1 = a0; - a2 = a0; - a3 = a0; - - if (row[2]) { - a0 += W2 * row[2]; - a1 += W6 * row[2]; - a2 -= W6 * row[2]; - a3 -= W2 * row[2]; - } - - if (row[4]) { - a0 += W4 * row[4]; - a1 -= W4 * row[4]; - a2 -= W4 * row[4]; - a3 += W4 * row[4]; - } - - if (row[6]) { - a0 += W6 * row[6]; - a1 -= W2 * row[6]; - a2 += W2 * row[6]; - a3 -= W6 * row[6]; - } - - if (row[1]) { - b0 = W1 * row[1]; - b1 = W3 * row[1]; - b2 = W5 * row[1]; - b3 = W7 * row[1]; - } else { - b0 = 0; - b1 = 0; - b2 = 0; - b3 = 0; - } - - if (row[3]) { - b0 += W3 * row[3]; - b1 -= W7 * row[3]; - b2 -= W1 * row[3]; - b3 -= W5 * row[3]; - } - - if (row[5]) { - b0 += W5 * row[5]; - b1 -= W1 * row[5]; - b2 += W7 * row[5]; - b3 += W3 * row[5]; - } - - if (row[7]) { - b0 += W7 * row[7]; - b1 -= W5 * row[7]; - b2 += W3 * row[7]; - b3 -= W1 * row[7]; - } - - row[0] = (a0 + b0) >> ROW_SHIFT; - row[1] = (a1 + b1) >> ROW_SHIFT; - row[2] = (a2 + b2) >> ROW_SHIFT; - row[3] = (a3 + b3) >> ROW_SHIFT; - row[4] = (a3 - b3) >> ROW_SHIFT; - row[5] = (a2 - b2) >> ROW_SHIFT; - row[6] = (a1 - b1) >> ROW_SHIFT; - row[7] = (a0 - b0) >> ROW_SHIFT; - - return 2; -} - -inline static void idctSparseCol2(int16_t *col) -{ - int a0, a1, a2, a3, b0, b1, b2, b3; - - col[0] += (1 << (COL_SHIFT - 1)) / W4; - - a0 = W4 * col[8 * 0]; - a1 = W4 * col[8 * 0]; - a2 = W4 * col[8 * 0]; - a3 = W4 * col[8 * 0]; - - if (col[8 * 2]) { - a0 += W2 * col[8 * 2]; - a1 += W6 * col[8 * 2]; - a2 -= W6 * col[8 * 2]; - a3 -= W2 * col[8 * 2]; - } - - if (col[8 * 4]) { - a0 += W4 * col[8 * 4]; - a1 -= W4 * col[8 * 4]; - a2 -= W4 * col[8 * 4]; - a3 += W4 * col[8 * 4]; - } - - if (col[8 * 6]) { - a0 += W6 * col[8 * 6]; - a1 -= W2 * col[8 * 6]; - a2 += W2 * col[8 * 6]; - a3 -= W6 * col[8 * 6]; - } - - if (col[8 * 1]) { - b0 = W1 * col[8 * 1]; - b1 = W3 * col[8 * 1]; - b2 = W5 * col[8 * 1]; - b3 = W7 * col[8 * 1]; - } else { - b0 = b1 = b2 = b3 = 0; - } - - if (col[8 * 3]) { - b0 += W3 * col[8 * 3]; - b1 -= W7 * col[8 * 3]; - b2 -= W1 * col[8 * 3]; - b3 -= W5 * col[8 * 3]; - } - - if (col[8 * 5]) { - b0 += W5 * col[8 * 5]; - b1 -= W1 * col[8 * 5]; - b2 += W7 * col[8 * 5]; - b3 += W3 * col[8 * 5]; - } - - if (col[8 * 7]) { - b0 += W7 * col[8 * 7]; - b1 -= W5 * col[8 * 7]; - b2 += W3 * col[8 * 7]; - b3 -= W1 * col[8 * 7]; - } - - col[8 * 0] = (a0 + b0) >> COL_SHIFT; - col[8 * 7] = (a0 - b0) >> COL_SHIFT; - col[8 * 1] = (a1 + b1) >> COL_SHIFT; - col[8 * 6] = (a1 - b1) >> COL_SHIFT; - col[8 * 2] = (a2 + b2) >> COL_SHIFT; - col[8 * 5] = (a2 - b2) >> COL_SHIFT; - col[8 * 3] = (a3 + b3) >> COL_SHIFT; - col[8 * 4] = (a3 - b3) >> COL_SHIFT; -} - -#else /* not ARCH_ALPHA */ - static inline void idctRowCondDC (int16_t * row) { int a0, a1, a2, a3, b0, b1, b2, b3; @@ -337,7 +157,6 @@ static inline void idctRowCondDC (int16_t * row) row[3] = (a3 + b3) >> ROW_SHIFT; row[4] = (a3 - b3) >> ROW_SHIFT; } -#endif /* not ARCH_ALPHA */ static inline void idctSparseColPut (UINT8 *dest, int line_size, int16_t * col) @@ -546,87 +365,6 @@ static inline void idctSparseCol (int16_t * col) col[56] = ((a0 - b0) >> COL_SHIFT); } - -#ifdef ARCH_ALPHA -/* If all rows but the first one are zero after row transformation, - all rows will be identical after column transformation. */ -static inline void idctCol2(int16_t *col) -{ - int i; - uint64_t l, r; - uint64_t *lcol = (uint64_t *) col; - - for (i = 0; i < 8; ++i) { - int a0 = col[0] + (1 << (COL_SHIFT - 1)) / W4; - - a0 *= W4; - col[0] = a0 >> COL_SHIFT; - ++col; - } - - l = lcol[0]; - r = lcol[1]; - lcol[ 2] = l; lcol[ 3] = r; - lcol[ 4] = l; lcol[ 5] = r; - lcol[ 6] = l; lcol[ 7] = r; - lcol[ 8] = l; lcol[ 9] = r; - lcol[10] = l; lcol[11] = r; - lcol[12] = l; lcol[13] = r; - lcol[14] = l; lcol[15] = r; -} - -void simple_idct (short *block) -{ - - int i; - int rowsZero = 1; /* all rows except row 0 zero */ - int rowsConstant = 1; /* all rows consist of a constant value */ - - for (i = 0; i < 8; i++) { - int sparseness = idctRowCondDC(block + 8 * i); - - if (i > 0 && sparseness > 0) - rowsZero = 0; - if (sparseness == 2) - rowsConstant = 0; - } - - if (rowsZero) { - idctCol2(block); - } else if (rowsConstant) { - uint64_t *lblock = (uint64_t *) block; - - idctSparseCol2(block); - for (i = 0; i < 8; i++) { - uint64_t v = (uint16_t) block[i * 8]; - - v += v << 16; - v += v << 32; - lblock[0] = v; - lblock[1] = v; - lblock += 2; - } - } else { - for (i = 0; i < 8; i++) - idctSparseCol2(block + i); - } -} - -/* XXX: suppress this mess */ -void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block) -{ - simple_idct(block); - put_pixels_clamped(block, dest, line_size); -} - -void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block) -{ - simple_idct(block); - add_pixels_clamped(block, dest, line_size); -} - -#else - void simple_idct_put(UINT8 *dest, int line_size, INT16 *block) { int i; @@ -657,6 +395,81 @@ void simple_idct(INT16 *block) idctSparseCol(block + i); } -#endif +/* 2x4x8 idct */ + +#define CN_SHIFT 12 +#define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5)) +#define C1 C_FIX(0.6532814824) +#define C2 C_FIX(0.2705980501) + +/* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized, + and the butterfly must be multiplied by 0.5 * sqrt(2.0) */ +#define C_SHIFT (4+1+12) + +static inline void idct4col(UINT8 *dest, int line_size, const INT16 *col) +{ + int c0, c1, c2, c3, a0, a1, a2, a3; + const UINT8 *cm = cropTbl + MAX_NEG_CROP; + + a0 = col[8*0]; + a1 = col[8*2]; + a2 = col[8*4]; + a3 = col[8*6]; + c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); + c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); + c1 = a1 * C1 + a3 * C2; + c3 = a1 * C2 - a3 * C1; + dest[0] = cm[(c0 + c1) >> C_SHIFT]; + dest += line_size; + dest[0] = cm[(c2 + c3) >> C_SHIFT]; + dest += line_size; + dest[0] = cm[(c2 - c3) >> C_SHIFT]; + dest += line_size; + dest[0] = cm[(c0 - c1) >> C_SHIFT]; +} + +#define BF(k) \ +{\ + int a0, a1;\ + a0 = ptr[k];\ + a1 = ptr[8 + k];\ + ptr[k] = a0 + a1;\ + ptr[8 + k] = a0 - a1;\ +} + +/* only used by DV codec. The input must be interlaced. 128 is added + to the pixels before clamping to avoid systematic error + (1024*sqrt(2)) offset would be needed otherwise. */ +/* XXX: I think a 1.0/sqrt(2) normalization should be needed to + compensate the extra butterfly stage - I don't have the full DV + specification */ +void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block) +{ + int i; + INT16 *ptr; + + /* butterfly */ + ptr = block; + for(i=0;i<4;i++) { + BF(0); + BF(1); + BF(2); + BF(3); + BF(4); + BF(5); + BF(6); + BF(7); + ptr += 2 * 8; + } + + /* IDCT8 on each line */ + for(i=0; i<8; i++) { + idctRowCondDC(block + i*8); + } -#undef COL_SHIFT + /* IDCT4 and store */ + for(i=0;i<8;i++) { + idct4col(dest + i, 2 * line_size, block + i); + idct4col(dest + line_size + i, 2 * line_size, block + 8 + i); + } +} diff --git a/src/libffmpeg/libavcodec/simple_idct.h b/src/libffmpeg/libavcodec/simple_idct.h index b26754225..6c6b4f011 100644 --- a/src/libffmpeg/libavcodec/simple_idct.h +++ b/src/libffmpeg/libavcodec/simple_idct.h @@ -20,5 +20,9 @@ void simple_idct_put(UINT8 *dest, int line_size, INT16 *block); void simple_idct_add(UINT8 *dest, int line_size, INT16 *block); -void simple_idct_mmx(short *block); +void ff_simple_idct_mmx(short *block); +void ff_simple_idct_add_mmx(UINT8 *dest, int line_size, INT16 *block); +void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, INT16 *block); void simple_idct(short *block); + +void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block); diff --git a/src/libffmpeg/libavcodec/svq1.c b/src/libffmpeg/libavcodec/svq1.c index 741bef217..d1df89bd8 100644 --- a/src/libffmpeg/libavcodec/svq1.c +++ b/src/libffmpeg/libavcodec/svq1.c @@ -1085,7 +1085,8 @@ static int svq1_decode_frame(AVCodecContext *avctx, result = svq1_decode_frame_header (&s->gb, s); - MPV_frame_start(s, avctx); + if(MPV_frame_start(s, avctx) < 0) + return -1; if (result != 0) { diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c index 943614da6..fcd65364b 100644 --- a/src/libffmpeg/libavcodec/utils.c +++ b/src/libffmpeg/libavcodec/utils.c @@ -49,6 +49,40 @@ void register_avcodec(AVCodec *format) format->next = NULL; } +void avcodec_get_context_defaults(AVCodecContext *s){ + s->bit_rate= 800*1000; + s->bit_rate_tolerance= s->bit_rate*10; + s->qmin= 2; + s->qmax= 31; + s->rc_eq= "tex^qComp"; + s->qcompress= 0.5; + s->max_qdiff= 3; + s->b_quant_factor=1.25; + s->b_quant_offset=1.25; + s->i_quant_factor=-0.8; + s->i_quant_offset=0.0; + s->error_concealment= 3; + s->error_resilience= 1; + s->workaround_bugs= FF_BUG_AUTODETECT; + s->frame_rate = 25 * FRAME_RATE_BASE; + s->gop_size= 50; + s->me_method= ME_EPZS; +} + +/** + * allocates a AVCodecContext and set it to defaults. + * this can be deallocated by simply calling free() + */ +AVCodecContext *avcodec_alloc_context(void){ + AVCodecContext *avctx= av_mallocz(sizeof(AVCodecContext)); + + if(avctx==NULL) return NULL; + + avcodec_get_context_defaults(avctx); + + return avctx; +} + int avcodec_open(AVCodecContext *avctx, AVCodec *codec) { int ret; @@ -192,7 +226,6 @@ AVCodec *avcodec_find(enum CodecID id) } const char *pix_fmt_str[] = { - "??", "yuv420p", "yuv422", "rgb24", @@ -201,9 +234,10 @@ const char *pix_fmt_str[] = { "yuv444p", "rgba32", "bgra32", - "yuv410p" + "yuv410p", + "yuv411p", }; - + void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode) { const char *codec_name; @@ -251,9 +285,10 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode) enc->width, enc->height, (float)enc->frame_rate / FRAME_RATE_BASE); } - snprintf(buf + strlen(buf), buf_size - strlen(buf), - ", q=%d-%d", enc->qmin, enc->qmax); - + if (encode) { + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", q=%d-%d", enc->qmin, enc->qmax); + } bitrate = enc->bit_rate; break; case CODEC_TYPE_AUDIO: @@ -303,6 +338,14 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode) default: av_abort(); } + if (encode) { + if (enc->flags & CODEC_FLAG_PASS1) + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", pass 1"); + if (enc->flags & CODEC_FLAG_PASS2) + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", pass 2"); + } if (bitrate != 0) { snprintf(buf + strlen(buf), buf_size - strlen(buf), ", %d kb/s", bitrate / 1000); diff --git a/src/libffmpeg/libavcodec/wmadata.h b/src/libffmpeg/libavcodec/wmadata.h new file mode 100644 index 000000000..12050b2f3 --- /dev/null +++ b/src/libffmpeg/libavcodec/wmadata.h @@ -0,0 +1,1409 @@ +/* Various WMA tables */ + +static const uint16_t wma_critical_freqs[25] = { + 100, 200, 300, 400, 510, 630, 770, 920, + 1080, 1270, 1480, 1720, 2000, 2320, 2700, 3150, + 3700, 4400, 5300, 6400, 7700, 9500, 12000, 15500, + 24500, +}; + +/* first value is number of bands */ +static const uint8_t exponent_band_22050[3][25] = { + { 10, 4, 8, 4, 8, 8, 12, 20, 24, 24, 16, }, + { 14, 4, 8, 8, 4, 12, 12, 16, 24, 16, 20, 24, 32, 40, 36, }, + { 23, 4, 4, 4, 8, 4, 4, 8, 8, 8, 8, 8, 12, 12, 16, 16, 24, 24, 32, 44, 48, 60, 84, 72, }, +}; + +static const uint8_t exponent_band_32000[3][25] = { + { 11, 4, 4, 8, 4, 4, 12, 16, 24, 20, 28, 4, }, + { 15, 4, 8, 4, 4, 8, 8, 16, 20, 12, 20, 20, 28, 40, 56, 8, }, + { 16, 8, 4, 8, 8, 12, 16, 20, 24, 40, 32, 32, 44, 56, 80, 112, 16, }, +}; + +static const uint8_t exponent_band_44100[3][25] = { + { 12, 4, 4, 4, 4, 4, 8, 8, 8, 12, 16, 20, 36, }, + { 15, 4, 8, 4, 8, 8, 4, 8, 8, 12, 12, 12, 24, 28, 40, 76, }, + { 17, 4, 8, 8, 4, 12, 12, 8, 8, 24, 16, 20, 24, 32, 40, 60, 80, 152, }, +}; + +static const uint16_t hgain_huffcodes[37] = { + 0x00003, 0x002e7, 0x00001, 0x005cd, 0x0005d, 0x005c9, 0x0005e, 0x00003, + 0x00016, 0x0000b, 0x00001, 0x00006, 0x00001, 0x00006, 0x00004, 0x00005, + 0x00004, 0x00007, 0x00003, 0x00007, 0x00004, 0x0000a, 0x0000a, 0x00002, + 0x00003, 0x00000, 0x00005, 0x00002, 0x0005f, 0x00004, 0x00003, 0x00002, + 0x005c8, 0x000b8, 0x005ca, 0x005cb, 0x005cc, +}; + +static const uint8_t hgain_huffbits[37] = { + 10, 12, 10, 13, 9, 13, 9, 8, + 7, 5, 5, 4, 4, 3, 3, 3, + 4, 3, 4, 4, 5, 5, 6, 8, + 7, 10, 8, 10, 9, 8, 9, 9, + 13, 10, 13, 13, 13, +}; + +static const float lsp_codebook[NB_LSP_COEFS][16] = { + { 1.98732877, 1.97944528, 1.97179088, 1.96260549, 1.95038374, 1.93336114, 1.90719232, 1.86191415, }, + { 1.97260000, 1.96083160, 1.94982586, 1.93806164, 1.92516608, 1.91010199, 1.89232331, 1.87149812, + 1.84564818, 1.81358067, 1.77620070, 1.73265264, 1.67907855, 1.60959081, 1.50829650, 1.33120330, }, + { 1.90109110, 1.86482426, 1.83419671, 1.80168452, 1.76650116, 1.72816320, 1.68502700, 1.63738256, + 1.58501580, 1.51795181, 1.43679906, 1.33950585, 1.24176208, 1.12260729, 0.96749668, 0.74048265, }, + { 1.76943864, 1.67822463, 1.59946365, 1.53560582, 1.47470796, 1.41210167, 1.34509536, 1.27339507, + 1.19303814, 1.09765169, 0.98818722, 0.87239446, 0.74369172, 0.59768184, 0.43168630, 0.17977021, }, + { 1.43428349, 1.32038354, 1.21074086, 1.10577988, 1.00561746, 0.90335924, 0.80437489, 0.70709671, + 0.60427395, 0.49814048, 0.38509539, 0.27106800, 0.14407416, 0.00219910, -0.16725141, -0.36936085, }, + { 0.99895687, 0.84188166, 0.70753739, 0.57906595, 0.47055563, 0.36966965, 0.26826648, 0.17163380, + 0.07208392, -0.03062936, -1.40037388, -0.25128968, -0.37213937, -0.51075646, -0.64887512, -0.80308031, }, + { 0.26515280, 0.06313551, -0.08872080, -0.21103548, -0.31069678, -0.39680323, -0.47223474, -0.54167135, + -0.61444740, -0.68943343, -0.76580211, -0.85170082, -0.95289061, -1.06514703, -1.20510707, -1.37617746, }, + { -0.53940301, -0.73770929, -0.88424876, -1.01117930, -1.13389091, -1.26830073, -1.42041987, -1.62033919, + -1.10158808, -1.16512566, -1.23337128, -1.30414401, -1.37663312, -1.46853845, -1.57625798, -1.66893638, }, + { -0.38601997, -0.56009350, -0.66978483, -0.76028471, -0.83846064, -0.90868087, -0.97408881, -1.03694962, }, + { -1.56144989, -1.65944032, -1.72689685, -1.77857740, -1.82203011, -1.86220079, -1.90283983, -1.94820479, }, +}; + +static const uint32_t scale_huffcodes[121] = { + 0x3ffe8, 0x3ffe6, 0x3ffe7, 0x3ffe5, 0x7fff5, 0x7fff1, 0x7ffed, 0x7fff6, + 0x7ffee, 0x7ffef, 0x7fff0, 0x7fffc, 0x7fffd, 0x7ffff, 0x7fffe, 0x7fff7, + 0x7fff8, 0x7fffb, 0x7fff9, 0x3ffe4, 0x7fffa, 0x3ffe3, 0x1ffef, 0x1fff0, + 0x0fff5, 0x1ffee, 0x0fff2, 0x0fff3, 0x0fff4, 0x0fff1, 0x07ff6, 0x07ff7, + 0x03ff9, 0x03ff5, 0x03ff7, 0x03ff3, 0x03ff6, 0x03ff2, 0x01ff7, 0x01ff5, + 0x00ff9, 0x00ff7, 0x00ff6, 0x007f9, 0x00ff4, 0x007f8, 0x003f9, 0x003f7, + 0x003f5, 0x001f8, 0x001f7, 0x000fa, 0x000f8, 0x000f6, 0x00079, 0x0003a, + 0x00038, 0x0001a, 0x0000b, 0x00004, 0x00000, 0x0000a, 0x0000c, 0x0001b, + 0x00039, 0x0003b, 0x00078, 0x0007a, 0x000f7, 0x000f9, 0x001f6, 0x001f9, + 0x003f4, 0x003f6, 0x003f8, 0x007f5, 0x007f4, 0x007f6, 0x007f7, 0x00ff5, + 0x00ff8, 0x01ff4, 0x01ff6, 0x01ff8, 0x03ff8, 0x03ff4, 0x0fff0, 0x07ff4, + 0x0fff6, 0x07ff5, 0x3ffe2, 0x7ffd9, 0x7ffda, 0x7ffdb, 0x7ffdc, 0x7ffdd, + 0x7ffde, 0x7ffd8, 0x7ffd2, 0x7ffd3, 0x7ffd4, 0x7ffd5, 0x7ffd6, 0x7fff2, + 0x7ffdf, 0x7ffe7, 0x7ffe8, 0x7ffe9, 0x7ffea, 0x7ffeb, 0x7ffe6, 0x7ffe0, + 0x7ffe1, 0x7ffe2, 0x7ffe3, 0x7ffe4, 0x7ffe5, 0x7ffd7, 0x7ffec, 0x7fff4, + 0x7fff3, +}; + +static const uint8_t scale_huffbits[121] = { + 18, 18, 18, 18, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 18, 19, 18, 17, 17, + 16, 17, 16, 16, 16, 16, 15, 15, + 14, 14, 14, 14, 14, 14, 13, 13, + 12, 12, 12, 11, 12, 11, 10, 10, + 10, 9, 9, 8, 8, 8, 7, 6, + 6, 5, 4, 3, 1, 4, 4, 5, + 6, 6, 7, 7, 8, 8, 9, 9, + 10, 10, 10, 11, 11, 11, 11, 12, + 12, 13, 13, 13, 14, 14, 16, 15, + 16, 15, 18, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, +}; + +static const uint32_t coef0_huffcodes[666] = { + 0x00258, 0x0003d, 0x00000, 0x00005, 0x00008, 0x00008, 0x0000c, 0x0001b, + 0x0001f, 0x00015, 0x00024, 0x00032, 0x0003a, 0x00026, 0x0002c, 0x0002f, + 0x0004a, 0x0004d, 0x00061, 0x00070, 0x00073, 0x00048, 0x00052, 0x0005a, + 0x0005d, 0x0006e, 0x00099, 0x0009e, 0x000c1, 0x000ce, 0x000e4, 0x000f0, + 0x00093, 0x0009e, 0x000a2, 0x000a1, 0x000b8, 0x000d2, 0x000d3, 0x0012e, + 0x00130, 0x000de, 0x0012d, 0x0019b, 0x001e4, 0x00139, 0x0013a, 0x0013f, + 0x0014f, 0x0016d, 0x001a2, 0x0027c, 0x0027e, 0x00332, 0x0033c, 0x0033f, + 0x0038b, 0x00396, 0x003c5, 0x00270, 0x0027c, 0x0025a, 0x00395, 0x00248, + 0x004bd, 0x004fb, 0x00662, 0x00661, 0x0071b, 0x004e6, 0x004ff, 0x00666, + 0x0071c, 0x0071a, 0x0071f, 0x00794, 0x00536, 0x004e2, 0x0078e, 0x004ee, + 0x00518, 0x00535, 0x004fb, 0x0078d, 0x00530, 0x00680, 0x0068f, 0x005cb, + 0x00965, 0x006a6, 0x00967, 0x0097f, 0x00682, 0x006ae, 0x00cd0, 0x00e28, + 0x00f13, 0x00f1f, 0x009f5, 0x00cd3, 0x00f11, 0x00926, 0x00964, 0x00f32, + 0x00f12, 0x00f30, 0x00966, 0x00d0b, 0x00a68, 0x00b91, 0x009c7, 0x00b73, + 0x012fa, 0x0131d, 0x013f9, 0x01ca0, 0x0199c, 0x01c7a, 0x0198c, 0x01248, + 0x01c74, 0x01c64, 0x0139e, 0x012fd, 0x00a77, 0x012fc, 0x01c7b, 0x012ca, + 0x014cc, 0x014d2, 0x014e3, 0x014dc, 0x012dc, 0x03344, 0x02598, 0x0263c, + 0x0333b, 0x025e6, 0x01a1c, 0x01e3c, 0x014e2, 0x033d4, 0x01a11, 0x03349, + 0x03cce, 0x014e1, 0x01a34, 0x0273e, 0x02627, 0x0273f, 0x038ee, 0x03971, + 0x03c67, 0x03c61, 0x0333d, 0x038c2, 0x0263f, 0x038cd, 0x02638, 0x02e41, + 0x0351f, 0x03348, 0x03c66, 0x03562, 0x02989, 0x027d5, 0x0333c, 0x02e4f, + 0x0343b, 0x02ddf, 0x04bc8, 0x029c0, 0x02e57, 0x04c72, 0x025b7, 0x03547, + 0x03540, 0x029d3, 0x04c45, 0x025bb, 0x06600, 0x04c73, 0x04bce, 0x0357b, + 0x029a6, 0x029d2, 0x0263e, 0x0298a, 0x07183, 0x06602, 0x07958, 0x04b66, + 0x0537d, 0x05375, 0x04fe9, 0x04b67, 0x0799f, 0x04bc9, 0x051fe, 0x06a3b, + 0x05bb6, 0x04fa8, 0x0728f, 0x05376, 0x0492c, 0x0537e, 0x0795a, 0x06a3c, + 0x0e515, 0x07887, 0x0683a, 0x051f9, 0x051fd, 0x0cc6a, 0x06a8a, 0x0cc6d, + 0x05bb3, 0x0683b, 0x051fc, 0x05378, 0x0728e, 0x07886, 0x05bb7, 0x0f2a4, + 0x0795b, 0x0683c, 0x09fc1, 0x0683d, 0x0b752, 0x09678, 0x0a3e8, 0x06ac7, + 0x051f0, 0x0b759, 0x06af3, 0x04b6b, 0x0f2a0, 0x0f2ad, 0x096c3, 0x0e518, + 0x0b75c, 0x0d458, 0x0cc6b, 0x0537c, 0x067aa, 0x04fea, 0x0343a, 0x0cc71, + 0x0967f, 0x09fc4, 0x096c2, 0x0e516, 0x0f2a1, 0x0d45c, 0x0d45d, 0x0d45e, + 0x12fb9, 0x0967e, 0x1982f, 0x09883, 0x096c4, 0x0b753, 0x12fb8, 0x0f2a8, + 0x1ca21, 0x096c5, 0x0e51a, 0x1ca27, 0x12f3c, 0x0d471, 0x0f2aa, 0x0b75b, + 0x12fbb, 0x0f2a9, 0x0f2ac, 0x0d45a, 0x0b74f, 0x096c8, 0x16e91, 0x096ca, + 0x12fbf, 0x0d0a7, 0x13103, 0x0d516, 0x16e99, 0x12cbd, 0x0a3ea, 0x19829, + 0x0b755, 0x29ba7, 0x1ca28, 0x29ba5, 0x16e93, 0x1982c, 0x19828, 0x25994, + 0x0a3eb, 0x1ca29, 0x16e90, 0x1ca25, 0x1982d, 0x1ca26, 0x16e9b, 0x0b756, + 0x0967c, 0x25997, 0x0b75f, 0x198d3, 0x0b757, 0x19a2a, 0x0d45b, 0x0e517, + 0x1ca24, 0x1ca23, 0x1ca22, 0x0b758, 0x16e97, 0x0cd14, 0x13100, 0x00007, + 0x0003b, 0x0006b, 0x00097, 0x00138, 0x00125, 0x00173, 0x00258, 0x00335, + 0x0028e, 0x004c6, 0x00715, 0x00729, 0x004ef, 0x00519, 0x004ed, 0x00532, + 0x0068c, 0x00686, 0x00978, 0x00e5d, 0x00e31, 0x009f4, 0x00b92, 0x012f8, + 0x00d06, 0x00a67, 0x00d44, 0x00a76, 0x00d59, 0x012cd, 0x01c78, 0x01c75, + 0x0199f, 0x0198f, 0x01c67, 0x014c6, 0x01c79, 0x01c76, 0x00b94, 0x00d1b, + 0x01e32, 0x01e31, 0x01ab0, 0x01a05, 0x01aa1, 0x0333a, 0x025e5, 0x02626, + 0x03541, 0x03544, 0x03421, 0x03546, 0x02e55, 0x02e56, 0x0492d, 0x02dde, + 0x0299b, 0x02ddc, 0x0357a, 0x0249c, 0x0668b, 0x1c77f, 0x1ca20, 0x0d45f, + 0x09886, 0x16e9a, 0x0f2a7, 0x0b751, 0x0a3ee, 0x0cf59, 0x0cf57, 0x0b754, + 0x0d0a6, 0x16e98, 0x0b760, 0x06ac6, 0x0a3f0, 0x12fbe, 0x13104, 0x0f2a5, + 0x0a3ef, 0x0d472, 0x12cba, 0x1982e, 0x16e9c, 0x1c77e, 0x198d0, 0x13105, + 0x16e92, 0x0b75d, 0x0d459, 0x0001a, 0x000c0, 0x0016c, 0x003cd, 0x00350, + 0x0067b, 0x0051e, 0x006a9, 0x009f4, 0x00b72, 0x00d09, 0x01249, 0x01e3d, + 0x01ca1, 0x01a1f, 0x01721, 0x01a8a, 0x016e8, 0x03347, 0x01a35, 0x0249d, + 0x0299a, 0x02596, 0x02e4e, 0x0298b, 0x07182, 0x04c46, 0x025ba, 0x02e40, + 0x027d6, 0x04fe8, 0x06607, 0x05310, 0x09884, 0x072e1, 0x06a3d, 0x04b6a, + 0x04c7a, 0x06603, 0x04c7b, 0x03428, 0x06605, 0x09664, 0x09fc0, 0x071de, + 0x06601, 0x05bb2, 0x09885, 0x0a3e2, 0x1c61f, 0x12cbb, 0x0b750, 0x0cf58, + 0x0967d, 0x25995, 0x668ad, 0x0b75a, 0x09fc2, 0x0537f, 0x0b75e, 0x13fae, + 0x12fbc, 0x00031, 0x001c4, 0x004c5, 0x005b8, 0x00cf4, 0x0096f, 0x00d46, + 0x01e57, 0x01a04, 0x02625, 0x03346, 0x028f9, 0x04c47, 0x072e0, 0x04b69, + 0x03420, 0x07957, 0x06639, 0x0799e, 0x07959, 0x07881, 0x04b68, 0x09fc3, + 0x09fd6, 0x0cc70, 0x0a3f1, 0x12cbe, 0x0e30e, 0x0e51b, 0x06af2, 0x12cbc, + 0x1c77d, 0x0f2ab, 0x12fbd, 0x1aa2f, 0x0a3ec, 0x0d473, 0x05377, 0x0a3e9, + 0x1982b, 0x0e300, 0x12f3f, 0x0cf5f, 0x096c0, 0x38c3c, 0x16e94, 0x16e95, + 0x12f3d, 0x29ba4, 0x29ba6, 0x1c77c, 0x6a8ba, 0x3545c, 0x33457, 0x668ac, + 0x6a8bb, 0x16e9d, 0x0e519, 0x25996, 0x12f3e, 0x00036, 0x0033e, 0x006ad, + 0x00d03, 0x012c8, 0x0124a, 0x03c42, 0x03ccd, 0x06606, 0x07880, 0x06852, + 0x06a3a, 0x05bb4, 0x0f2a2, 0x09fc7, 0x12cb9, 0x0cc6c, 0x0a6e8, 0x096c1, + 0x0004a, 0x00355, 0x012f9, 0x014e8, 0x01abe, 0x025b6, 0x0492e, 0x09fc6, + 0x051ff, 0x0cc6f, 0x096cb, 0x0d071, 0x198d1, 0x12cb8, 0x38c3d, 0x13faf, + 0x096c9, 0x0009d, 0x00539, 0x012ce, 0x0341f, 0x029c1, 0x04b33, 0x0a3e3, + 0x0d070, 0x16e96, 0x0b763, 0x000a0, 0x009ce, 0x038cc, 0x0343d, 0x051fa, + 0x09888, 0x12fba, 0x000df, 0x00a75, 0x029a7, 0x09fc5, 0x0e301, 0x0967b, + 0x001e7, 0x012c9, 0x051fb, 0x09889, 0x0f2a6, 0x0016f, 0x01cb9, 0x0cf5a, + 0x12cbf, 0x09679, 0x00272, 0x01a15, 0x0967a, 0x003cb, 0x025f6, 0x0b762, + 0x0028d, 0x03c60, 0x0cf5e, 0x00352, 0x03ccc, 0x0072f, 0x07186, 0x004ec, + 0x05379, 0x0068e, 0x09887, 0x006a7, 0x06af1, 0x00e29, 0x0cf5b, 0x00f31, + 0x0d470, 0x009c6, 0x013fb, 0x13102, 0x019a5, 0x13101, 0x01983, 0x01c65, + 0x0124f, 0x014c7, 0x01726, 0x01abf, 0x03304, 0x02624, 0x03c41, 0x027d7, + 0x02ddd, 0x02e54, 0x0343c, 0x06604, 0x07181, 0x0663a, 0x04fa9, 0x0663b, + 0x05311, 0x0537a, 0x06839, 0x05bb5, 0x0492f, 0x06af0, 0x096c7, 0x0cc6e, + 0x0537b, 0x0cf5c, 0x0cf56, 0x198d2, 0x0cf5d, 0x0a3ed, 0x0f2a3, 0x1982a, + 0x0b761, 0x096c6, +}; + +static const uint8_t coef0_huffbits[666] = { + 11, 6, 2, 3, 4, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 7, 7, + 7, 7, 7, 7, 7, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 11, 11, 11, 10, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 12, 12, 11, 12, + 12, 12, 12, 11, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 13, 13, 12, + 12, 12, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 14, + 13, 13, 13, 13, 13, 13, 13, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 13, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 15, + 15, 14, 14, 15, 15, 15, 14, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 14, 15, 15, 15, 15, 16, + 16, 16, 15, 16, 15, 15, 16, 16, + 16, 16, 15, 16, 16, 16, 15, 16, + 16, 15, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 15, 15, 16, 16, + 15, 16, 16, 16, 17, 17, 17, 16, + 16, 17, 16, 16, 16, 16, 17, 16, + 17, 17, 16, 16, 15, 15, 15, 16, + 17, 16, 17, 16, 16, 17, 17, 17, + 17, 17, 17, 16, 17, 17, 17, 16, + 17, 17, 16, 17, 17, 17, 16, 17, + 17, 16, 16, 17, 17, 17, 18, 17, + 17, 17, 17, 17, 18, 18, 17, 17, + 17, 19, 17, 19, 18, 17, 17, 18, + 17, 17, 18, 17, 17, 17, 18, 17, + 17, 18, 17, 17, 17, 17, 17, 16, + 17, 17, 17, 17, 18, 16, 17, 4, + 6, 8, 9, 9, 10, 10, 10, 10, + 11, 11, 11, 11, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 14, 13, 13, 13, 13, + 13, 13, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 16, 15, + 15, 15, 15, 15, 15, 17, 17, 17, + 16, 18, 16, 17, 17, 16, 16, 17, + 17, 18, 17, 16, 17, 17, 17, 16, + 17, 17, 18, 17, 18, 17, 17, 17, + 18, 17, 17, 5, 8, 10, 10, 11, + 11, 12, 12, 12, 13, 13, 14, 13, + 13, 14, 14, 14, 14, 14, 14, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 16, 16, 15, 16, 16, + 15, 15, 15, 15, 15, 16, 16, 15, + 15, 16, 16, 17, 17, 18, 17, 16, + 17, 18, 19, 17, 16, 16, 17, 17, + 17, 6, 9, 11, 12, 12, 13, 13, + 13, 14, 14, 14, 15, 15, 15, 16, + 15, 15, 15, 15, 15, 15, 16, 16, + 16, 16, 17, 18, 16, 16, 16, 18, + 17, 16, 17, 18, 17, 17, 16, 17, + 17, 16, 17, 16, 17, 18, 18, 18, + 17, 19, 19, 17, 20, 19, 18, 19, + 20, 18, 16, 18, 17, 7, 10, 12, + 13, 13, 14, 14, 14, 15, 15, 16, + 16, 16, 16, 16, 18, 16, 17, 17, + 8, 11, 13, 14, 14, 15, 16, 16, + 16, 16, 17, 17, 17, 18, 18, 17, + 17, 8, 12, 14, 15, 15, 15, 17, + 17, 18, 17, 9, 12, 14, 15, 16, + 16, 17, 9, 13, 15, 16, 16, 17, + 9, 13, 16, 16, 16, 10, 13, 16, + 18, 17, 10, 14, 17, 10, 14, 17, + 11, 14, 16, 11, 14, 11, 15, 12, + 16, 12, 16, 12, 16, 12, 16, 12, + 17, 13, 13, 17, 13, 17, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 15, + 15, 15, 15, 15, 15, 15, 16, 15, + 16, 16, 16, 16, 16, 16, 17, 16, + 16, 16, 16, 17, 16, 17, 16, 17, + 17, 17, +}; + +static const uint32_t coef1_huffcodes[555] = { + 0x00115, 0x00002, 0x00001, 0x00000, 0x0000d, 0x00007, 0x00013, 0x0001d, + 0x00008, 0x0000c, 0x00023, 0x0002b, 0x0003f, 0x00017, 0x0001b, 0x00043, + 0x00049, 0x00050, 0x00055, 0x00054, 0x00067, 0x00064, 0x0007b, 0x0002d, + 0x00028, 0x0002a, 0x00085, 0x00089, 0x0002b, 0x00035, 0x00090, 0x00091, + 0x00094, 0x00088, 0x000c1, 0x000c6, 0x000f2, 0x000e3, 0x000c5, 0x000e2, + 0x00036, 0x000f0, 0x000a7, 0x000cd, 0x000fb, 0x00059, 0x00116, 0x00103, + 0x00108, 0x0012b, 0x0012d, 0x00188, 0x0012e, 0x0014c, 0x001c3, 0x00187, + 0x001e7, 0x0006f, 0x00094, 0x00069, 0x001e6, 0x001ca, 0x00147, 0x00195, + 0x000a7, 0x00213, 0x00209, 0x00303, 0x00295, 0x00289, 0x0028c, 0x0028d, + 0x00312, 0x00330, 0x0029b, 0x00308, 0x00328, 0x0029a, 0x0025e, 0x003c5, + 0x00384, 0x0039f, 0x00397, 0x00296, 0x0032e, 0x00332, 0x003c6, 0x003e6, + 0x0012d, 0x000d1, 0x00402, 0x000dd, 0x00161, 0x0012b, 0x00127, 0x0045d, + 0x00601, 0x004ab, 0x0045f, 0x00410, 0x004bf, 0x00528, 0x0045c, 0x00424, + 0x00400, 0x00511, 0x00618, 0x0073d, 0x0063a, 0x00614, 0x0073c, 0x007c0, + 0x007cf, 0x00802, 0x00966, 0x00964, 0x00951, 0x008a0, 0x00346, 0x00803, + 0x00a52, 0x0024a, 0x007c1, 0x0063f, 0x00126, 0x00406, 0x00789, 0x008a2, + 0x00960, 0x00967, 0x00c05, 0x00c70, 0x00c79, 0x00a5d, 0x00c26, 0x00c4d, + 0x00372, 0x008a5, 0x00c08, 0x002c5, 0x00f11, 0x00cc4, 0x00f8e, 0x00e16, + 0x00496, 0x00e77, 0x00f9c, 0x00c25, 0x00f1e, 0x00c27, 0x00f1f, 0x00e17, + 0x00ccd, 0x00355, 0x00c09, 0x00c78, 0x00f90, 0x00521, 0x00357, 0x00356, + 0x0068e, 0x00f9d, 0x00c04, 0x00e58, 0x00a20, 0x00a2c, 0x00c4c, 0x0052f, + 0x00f8d, 0x01178, 0x01053, 0x01097, 0x0180f, 0x0180d, 0x012fb, 0x012aa, + 0x0202a, 0x00a40, 0x018ed, 0x01ceb, 0x01455, 0x018e3, 0x012a1, 0x00354, + 0x00353, 0x00f1c, 0x00c7b, 0x00c37, 0x0101d, 0x012cb, 0x01142, 0x0197d, + 0x01095, 0x01e3b, 0x0186b, 0x00588, 0x01c2a, 0x014b8, 0x01e3a, 0x018ec, + 0x01f46, 0x012fa, 0x00a53, 0x01ce8, 0x00a55, 0x01c29, 0x0117b, 0x01052, + 0x012a0, 0x00589, 0x00950, 0x01c2b, 0x00a50, 0x0208b, 0x0180e, 0x02027, + 0x02556, 0x01e20, 0x006e7, 0x01c28, 0x0197a, 0x00684, 0x020a2, 0x01f22, + 0x03018, 0x039cf, 0x03e25, 0x02557, 0x0294c, 0x028a6, 0x00d11, 0x028a9, + 0x02979, 0x00d46, 0x00a56, 0x039ce, 0x030cc, 0x0329a, 0x0149d, 0x0510f, + 0x0451c, 0x02028, 0x03299, 0x01ced, 0x014b9, 0x00f85, 0x00c7a, 0x01800, + 0x00341, 0x012ca, 0x039c8, 0x0329d, 0x00d0d, 0x03e20, 0x05144, 0x00d45, + 0x030d0, 0x0186d, 0x030d5, 0x00d0f, 0x00d40, 0x04114, 0x020a1, 0x0297f, + 0x03e24, 0x032f1, 0x04047, 0x030d4, 0x028a8, 0x00d0e, 0x0451d, 0x04044, + 0x0297e, 0x04042, 0x030d2, 0x030cf, 0x03e21, 0x03e26, 0x028a5, 0x0451a, + 0x00d48, 0x01a16, 0x00d44, 0x04518, 0x0149b, 0x039ca, 0x01498, 0x0403d, + 0x0451b, 0x0149c, 0x032f3, 0x030cb, 0x08073, 0x03e22, 0x0529a, 0x020aa, + 0x039cc, 0x0738a, 0x06530, 0x07389, 0x06193, 0x08071, 0x04043, 0x030ce, + 0x05147, 0x07388, 0x05145, 0x08072, 0x04521, 0x00d47, 0x0297c, 0x030cd, + 0x030ca, 0x0000b, 0x0000c, 0x00083, 0x000e4, 0x00048, 0x00102, 0x001cc, + 0x001f5, 0x00097, 0x0020b, 0x00124, 0x00453, 0x00627, 0x00639, 0x00605, + 0x00517, 0x001b8, 0x00663, 0x00667, 0x007c3, 0x00823, 0x00961, 0x00963, + 0x00e5a, 0x00e59, 0x00a2b, 0x00cbf, 0x00292, 0x00a2d, 0x007d0, 0x00953, + 0x00cc5, 0x00f84, 0x004ab, 0x014a7, 0x0068a, 0x0117a, 0x0052e, 0x01442, + 0x0052c, 0x00c77, 0x00f8f, 0x004aa, 0x01094, 0x01801, 0x012c4, 0x0297b, + 0x00952, 0x01f19, 0x006a5, 0x01149, 0x012c5, 0x01803, 0x022f2, 0x0329b, + 0x04520, 0x0149e, 0x00d13, 0x01f16, 0x01ce9, 0x0101c, 0x006e6, 0x039c9, + 0x06191, 0x07c8e, 0x06192, 0x0ca63, 0x039cd, 0x06190, 0x06884, 0x06885, + 0x07382, 0x00d49, 0x00d41, 0x0450c, 0x0149a, 0x030d1, 0x08077, 0x03e23, + 0x01a15, 0x0e701, 0x0e702, 0x08079, 0x0822a, 0x0a218, 0x07887, 0x0403f, + 0x0520b, 0x0529b, 0x0e700, 0x04519, 0x00007, 0x000e0, 0x000d0, 0x0039b, + 0x003e5, 0x00163, 0x0063e, 0x007c9, 0x00806, 0x00954, 0x01044, 0x01f44, + 0x0197c, 0x01f45, 0x00a51, 0x01f47, 0x00951, 0x0052d, 0x02291, 0x0092f, + 0x00a54, 0x00d12, 0x0297d, 0x00d0c, 0x01499, 0x0329e, 0x032f0, 0x02025, + 0x039c6, 0x00a57, 0x03e46, 0x00d42, 0x0738b, 0x05146, 0x04046, 0x08078, + 0x0510e, 0x07886, 0x02904, 0x04156, 0x04157, 0x06032, 0x030d3, 0x08bce, + 0x04040, 0x0403e, 0x0a414, 0x10457, 0x08075, 0x06887, 0x07c8f, 0x039c7, + 0x07387, 0x08070, 0x08bcf, 0x1482a, 0x10456, 0x1482b, 0x01a17, 0x06886, + 0x0450d, 0x00013, 0x0006b, 0x00615, 0x0080b, 0x0082b, 0x00952, 0x00e5b, + 0x018e2, 0x0186c, 0x01f18, 0x0329f, 0x00d43, 0x03e29, 0x05140, 0x05141, + 0x0ca62, 0x06033, 0x03c42, 0x03e28, 0x0450f, 0x0a21a, 0x07384, 0x0a219, + 0x0e703, 0x0a21b, 0x01a14, 0x07383, 0x045e6, 0x0007a, 0x0012c, 0x00ccc, + 0x0068f, 0x01802, 0x00a52, 0x00953, 0x04045, 0x01a20, 0x0451f, 0x000a4, + 0x00735, 0x01cec, 0x02029, 0x020a3, 0x0451e, 0x00069, 0x00c24, 0x02024, + 0x032f2, 0x05142, 0x00196, 0x00523, 0x000a6, 0x0197b, 0x0030b, 0x0092e, + 0x003e9, 0x03e27, 0x00160, 0x05143, 0x00652, 0x04041, 0x00734, 0x028a7, + 0x0080f, 0x01483, 0x0097c, 0x00340, 0x0068b, 0x00522, 0x01054, 0x01096, + 0x01f17, 0x0202b, 0x01cea, 0x020a0, 0x02978, 0x02026, 0x0297a, 0x039cb, + 0x03e2b, 0x0149f, 0x0329c, 0x07385, 0x08074, 0x0450e, 0x03e2a, 0x05149, + 0x08076, 0x07386, 0x05148, +}; + +static const uint8_t coef1_huffbits[555] = { + 9, 5, 2, 4, 4, 5, 5, 5, + 6, 6, 6, 6, 6, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 9, 8, 8, 8, 8, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 10, 10, 10, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 11, 11, 11, 11, 11, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 13, 12, 12, 12, 12, 12, 12, 12, + 13, 12, 12, 12, 12, 12, 12, 12, + 12, 13, 12, 12, 12, 13, 13, 13, + 13, 12, 12, 12, 12, 12, 12, 13, + 12, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 13, 13, 13, 13, 13, 13, + 13, 12, 12, 12, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 14, 13, 14, 13, 13, 13, + 13, 13, 14, 13, 14, 14, 13, 14, + 14, 13, 14, 13, 13, 14, 14, 13, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 15, 14, 14, 14, 14, 15, 15, + 15, 14, 14, 13, 13, 12, 12, 13, + 13, 13, 14, 14, 15, 14, 15, 15, + 14, 13, 14, 15, 15, 15, 14, 14, + 14, 14, 15, 14, 14, 15, 15, 15, + 14, 15, 14, 14, 14, 14, 14, 15, + 15, 16, 15, 15, 15, 14, 15, 15, + 15, 15, 14, 14, 16, 14, 15, 14, + 14, 15, 15, 15, 15, 16, 15, 14, + 15, 15, 15, 16, 15, 15, 14, 14, + 14, 4, 7, 8, 8, 9, 9, 9, + 9, 10, 10, 11, 11, 11, 11, 11, + 11, 12, 11, 11, 11, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 11, 12, + 12, 12, 13, 13, 13, 13, 13, 13, + 13, 12, 12, 13, 13, 13, 13, 14, + 14, 13, 14, 13, 13, 13, 14, 14, + 15, 15, 14, 13, 13, 13, 14, 14, + 15, 15, 15, 16, 14, 15, 17, 17, + 15, 15, 15, 15, 15, 14, 16, 14, + 16, 16, 16, 16, 16, 16, 15, 15, + 17, 15, 16, 15, 6, 8, 10, 10, + 10, 11, 11, 11, 12, 12, 13, 13, + 13, 13, 14, 13, 14, 13, 14, 14, + 14, 14, 14, 15, 15, 14, 14, 14, + 14, 14, 14, 15, 15, 15, 15, 16, + 15, 15, 16, 15, 15, 15, 14, 16, + 15, 15, 18, 17, 16, 17, 15, 14, + 15, 16, 16, 19, 17, 19, 16, 17, + 15, 7, 10, 11, 12, 12, 12, 12, + 13, 13, 13, 14, 15, 14, 15, 15, + 16, 15, 14, 14, 15, 16, 15, 16, + 16, 16, 16, 15, 15, 7, 11, 12, + 13, 13, 14, 14, 15, 15, 15, 8, + 11, 13, 14, 14, 15, 9, 12, 14, + 14, 15, 9, 13, 10, 13, 10, 14, + 10, 14, 11, 15, 11, 15, 11, 14, + 12, 15, 12, 13, 13, 13, 13, 13, + 13, 14, 13, 14, 14, 14, 14, 14, + 14, 15, 14, 15, 16, 15, 14, 15, + 16, 15, 15, +}; + +static const uint32_t coef2_huffcodes[1336] = { + 0x003e6, 0x000f6, 0x00000, 0x00002, 0x00006, 0x0000f, 0x0001b, 0x00028, + 0x00039, 0x0003f, 0x0006b, 0x00076, 0x000b7, 0x000e8, 0x000ef, 0x00169, + 0x001a7, 0x001d4, 0x001dc, 0x002c4, 0x00349, 0x00355, 0x00391, 0x003dc, + 0x00581, 0x005b2, 0x00698, 0x0070c, 0x00755, 0x0073a, 0x00774, 0x007cf, + 0x00b0a, 0x00b66, 0x00d2e, 0x00d5e, 0x00e1b, 0x00eac, 0x00e5a, 0x00f7e, + 0x00fa1, 0x0163e, 0x01a37, 0x01a52, 0x01c39, 0x01ab3, 0x01d5f, 0x01cb6, + 0x01f52, 0x01dd9, 0x02c04, 0x02c2e, 0x02c2d, 0x02c23, 0x03467, 0x034a3, + 0x0351b, 0x03501, 0x03a5d, 0x0351c, 0x03875, 0x03dea, 0x0397b, 0x039db, + 0x03df1, 0x039d8, 0x03bb4, 0x0580a, 0x0584d, 0x05842, 0x05b13, 0x058ea, + 0x0697d, 0x06a06, 0x068cc, 0x06ac7, 0x06a96, 0x072f4, 0x07543, 0x072b4, + 0x07d20, 0x0b003, 0x073b5, 0x07be6, 0x0d180, 0x07bd1, 0x07cb8, 0x07d06, + 0x07d25, 0x0d2f2, 0x0d19a, 0x0d334, 0x0e1dc, 0x0d529, 0x0d584, 0x0e1d2, + 0x0e5e3, 0x0eec4, 0x0e564, 0x0fa49, 0x16001, 0x0eedc, 0x0f7fa, 0x1a32c, + 0x16131, 0x16003, 0x0f9c8, 0x1ef80, 0x1d2a0, 0x1aa4b, 0x0f7ce, 0x1abfe, + 0x1aa50, 0x1a458, 0x1a816, 0x1cae4, 0x1d2fe, 0x1d52e, 0x1aa4c, 0x2c245, + 0x1d2a1, 0x1a35d, 0x1ca1b, 0x1d5d8, 0x1f531, 0x1ca1c, 0x1f389, 0x1f4af, + 0x3a5e7, 0x351fb, 0x2c24b, 0x34bce, 0x2c24d, 0x2c249, 0x2c24a, 0x72dfc, + 0x357ef, 0x35002, 0x3a5e6, 0x39431, 0x5843b, 0x34a77, 0x58431, 0x3a5f3, + 0x3a5dd, 0x3e5e5, 0x356bd, 0x3976e, 0x6a3d2, 0x3500d, 0x694c4, 0x580bd, + 0x3e5e8, 0x74b95, 0x34a6e, 0x3977c, 0x39432, 0x5b0d2, 0x6a3d8, 0x580b8, + 0x5b0cb, 0x5b0d7, 0x72dee, 0x72ded, 0x72dec, 0x74b9c, 0x3977f, 0x72dea, + 0x74b9e, 0x7be7d, 0x580bf, 0x5b0d5, 0x7cba8, 0x74b91, 0x3e5dd, 0xb6171, + 0xd46b3, 0xd46b9, 0x7cba1, 0x74b9f, 0x72de1, 0xe59f5, 0x3e5eb, 0x00004, + 0x00015, 0x00038, 0x00075, 0x000e8, 0x001d3, 0x00347, 0x0039c, 0x00690, + 0x0074a, 0x00b60, 0x00e93, 0x00f74, 0x0163d, 0x01a5a, 0x01d24, 0x01cbe, + 0x01f4b, 0x03468, 0x03562, 0x03947, 0x03e82, 0x05804, 0x05b12, 0x05803, + 0x0696d, 0x06a9e, 0x0697c, 0x06978, 0x06afb, 0x074b2, 0x072f5, 0x073c0, + 0x07541, 0x06944, 0x074b7, 0x070d3, 0x07ba9, 0x0b0b1, 0x0d1af, 0x0e1dd, + 0x0e5e2, 0x0e1a3, 0x0eec3, 0x1612f, 0x0e961, 0x0eeda, 0x0e78e, 0x0fa48, + 0x1612c, 0x0e511, 0x0e565, 0x0e953, 0x1aa4a, 0x0e59d, 0x1d52c, 0x1a811, + 0x1cae7, 0x1abfc, 0x1d52d, 0x1cacf, 0x1cf05, 0x2c254, 0x34a72, 0x1f4ac, + 0x3976b, 0x34a71, 0x2c6d9, 0x2d873, 0x34a6a, 0x357e7, 0x3464c, 0x3e5f5, + 0x58433, 0x1f53a, 0x3500a, 0x357ea, 0x34a73, 0x3942f, 0x357e5, 0x39775, + 0x694cd, 0x39772, 0x7cba5, 0x6a3ef, 0x35483, 0x74b98, 0x5b0c1, 0x39770, + 0x3a5d7, 0x39433, 0x39434, 0x694ce, 0x580be, 0x3e5ff, 0x6a3ec, 0xb616f, + 0xd46b1, 0x6a3d1, 0x72de5, 0x74b6e, 0x72de9, 0x3e700, 0xd46b6, 0x6a3e9, + 0x74b69, 0xe5675, 0xd46b8, 0x7cbaa, 0x3a5d1, 0x0000c, 0x0003c, 0x000eb, + 0x001f1, 0x003a4, 0x006a8, 0x007d5, 0x00d43, 0x00e77, 0x016c5, 0x01cb1, + 0x02c5d, 0x03a55, 0x03a56, 0x03e51, 0x03bb5, 0x05b0a, 0x06a9f, 0x074b8, + 0x07d28, 0x0d187, 0x0d40e, 0x0d52e, 0x0d425, 0x0eae3, 0x0e1d3, 0x1612e, + 0x0e59e, 0x0eec2, 0x0e578, 0x0e51a, 0x0e579, 0x0e515, 0x0e960, 0x0d183, + 0x0d220, 0x0d2cb, 0x0e512, 0x16c3e, 0x16002, 0x16c42, 0x1cae9, 0x3461a, + 0x1d2fa, 0x1a308, 0x1a849, 0x1cf07, 0x1f38f, 0x34b65, 0x2c253, 0x1ef9e, + 0x1cbc3, 0x1cbc1, 0x2c255, 0x1f384, 0x58435, 0x2c5cd, 0x3a5f7, 0x2c252, + 0x3959c, 0x2c6d8, 0x3a5d3, 0x6ad78, 0x6a3f2, 0x7cba9, 0xb6176, 0x72deb, + 0x39764, 0x3e5f6, 0x3a5d8, 0x74a8c, 0x6a3e6, 0x694d1, 0x6ad79, 0x1a4592, + 0xe59fb, 0x7cbb3, 0x5b0cd, 0x00017, 0x000b5, 0x002c3, 0x005b7, 0x00b1c, + 0x00e5c, 0x0163f, 0x01ab2, 0x01efa, 0x0348a, 0x0396e, 0x058da, 0x06963, + 0x06a30, 0x072cd, 0x073cf, 0x07ce7, 0x0d2ca, 0x0d2d8, 0x0e764, 0x0e794, + 0x16008, 0x16167, 0x1617e, 0x1aa49, 0x1a30b, 0x1a813, 0x2c6da, 0x1a580, + 0x1cbc2, 0x0f9ca, 0x1617f, 0x1d2fe, 0x0f7fc, 0x16c40, 0x0e513, 0x0eec5, + 0x0f7c3, 0x1d508, 0x1a81e, 0x1d2fd, 0x39430, 0x35486, 0x3e5fd, 0x2c24c, + 0x2c75a, 0x34a74, 0x3a5f4, 0x3464d, 0x694ca, 0x3a5f1, 0x1d509, 0x1d5c0, + 0x34648, 0x3464e, 0x6a3d5, 0x6a3e8, 0x6a3e7, 0x5b0c3, 0x2c248, 0x1f38a, + 0x3a5f2, 0x6a3e5, 0x00029, 0x00168, 0x0058c, 0x00b67, 0x00f9d, 0x01c3d, + 0x01cbf, 0x02c20, 0x0351d, 0x03df6, 0x06af9, 0x072b5, 0x0b1d7, 0x0b0b2, + 0x0d40a, 0x0d52b, 0x0e952, 0x0e797, 0x163c3, 0x1c3a0, 0x1f386, 0x1ca21, + 0x34655, 0x2c247, 0x1f53b, 0x2c250, 0x2c24f, 0x1f385, 0x1ef5d, 0x1cf15, + 0x1caea, 0x1ab0a, 0x1cf19, 0x1f53d, 0x1d5c2, 0x1d2fb, 0x1ef58, 0x34a78, + 0x357ec, 0x1f533, 0x3a5e1, 0x694d2, 0x58482, 0x3a5ee, 0x2c6dc, 0x357eb, + 0x5b0c4, 0x39778, 0x6a3e1, 0x7cbb4, 0x3a5e1, 0x74b68, 0x3a5ef, 0x3a5d2, + 0x39424, 0x72de2, 0xe59f6, 0xe59f7, 0x3e702, 0x3e5ec, 0x1f38b, 0x0003b, + 0x001f0, 0x00777, 0x00fa8, 0x01cb2, 0x02d84, 0x03a57, 0x03dd6, 0x06917, + 0x06a11, 0x07d07, 0x0eae2, 0x0e796, 0x0f9c9, 0x0f7fb, 0x16166, 0x16160, + 0x1ab1b, 0x1abfa, 0x2d87b, 0x1d2f7, 0x39768, 0x1f38c, 0x34653, 0x34651, + 0x6a3d9, 0x35001, 0x3abbd, 0x38742, 0x39426, 0x34a76, 0x3a5ec, 0x34a75, + 0x35000, 0x35488, 0x1cf10, 0x2c6db, 0x357ed, 0x357e8, 0x357e9, 0x3a5f0, + 0x694c2, 0xb6178, 0x72df5, 0x39425, 0x3942b, 0x74b6d, 0x74b6f, 0xb6177, + 0xb6179, 0x74b6a, 0xb6172, 0x58487, 0x3e5ee, 0x3e5ed, 0x72df2, 0x72df4, + 0x7cbae, 0x6a3ca, 0x70e86, 0x34bcf, 0x6a3c8, 0x00059, 0x00384, 0x00d5b, + 0x01c38, 0x03560, 0x0395b, 0x0584e, 0x06964, 0x073cd, 0x0b1e7, 0x0e798, + 0x0e78d, 0x0fa43, 0x1a848, 0x1a32f, 0x1aa4e, 0x3464a, 0x1f4ab, 0x1f38d, + 0x3a5eb, 0x3a5d4, 0x3548a, 0x6a3c7, 0x5b0d0, 0x6a3c5, 0x7cbb0, 0x694cb, + 0x3a5e5, 0x3e5e2, 0x3942c, 0x2d872, 0x1f4ae, 0x3a5d5, 0x694d3, 0x58481, + 0x35009, 0x39774, 0x58432, 0xb616c, 0x5b0db, 0x3548b, 0xb6174, 0x1d5d95, + 0xb004c, 0x7cbb2, 0x3a5e5, 0x74a8f, 0xe59f9, 0x72df6, 0xe59fd, 0x7cbad, + 0xd427d, 0x72cff, 0x3977a, 0x5b0d9, 0xb616d, 0xb616b, 0x1a4593, 0x7cbaf, + 0x5b0da, 0x00071, 0x003eb, 0x01603, 0x02c6c, 0x03961, 0x068c8, 0x06a31, + 0x072bd, 0x0d2c2, 0x0e51b, 0x0e5e6, 0x1abfb, 0x1d2ff, 0x1cae5, 0x1ef5c, + 0x1ef5e, 0x1cf13, 0x34a6d, 0x3976d, 0xb616a, 0x3e5f2, 0x6a3c4, 0xb6169, + 0x3e5dc, 0x580b9, 0x74b99, 0x75764, 0x58434, 0x3a5d9, 0x6945a, 0x69459, + 0x3548c, 0x3a5e9, 0x69457, 0x72df1, 0x6945e, 0x6a35e, 0x3e701, 0xb6168, + 0x5b0dd, 0x3a5de, 0x6a3c2, 0xd4278, 0x6a3cc, 0x72dfd, 0xb6165, 0x16009a, + 0x7cbb1, 0xd427c, 0xb6162, 0xe765e, 0x1cecbe, 0x7cbb6, 0x69454, 0xb6160, + 0xd427a, 0x1d5d96, 0xb1d6d, 0xe59f4, 0x72de8, 0x3a5db, 0x0007a, 0x006ae, + 0x01c3c, 0x03aba, 0x058e9, 0x072cc, 0x0d2dd, 0x0d22d, 0x0eec1, 0x0eedb, + 0x1d2a2, 0x1ef5b, 0x357e2, 0x3abbf, 0x1d2f9, 0x35004, 0x3a5dc, 0x351fc, + 0x3976c, 0x6a3c6, 0x6a3cb, 0x3e5ea, 0xe59f3, 0x6a3ce, 0x69452, 0xe59f0, + 0x74b90, 0xd4279, 0xd427b, 0x7cbb5, 0x5b0c5, 0x3a5e3, 0x3a5e2, 0x000d0, + 0x00775, 0x01efe, 0x03dd5, 0x0728c, 0x07cb9, 0x0e1a2, 0x0ea85, 0x0eed8, + 0x1a30a, 0x1aa4f, 0x3a5df, 0x35008, 0x3a5e0, 0x3e5f4, 0x3e5f7, 0xb1d6c, + 0x5843e, 0x34a70, 0x72df8, 0x74b6b, 0xd427f, 0x72df0, 0x5b0bf, 0x5b0c0, + 0xd46b0, 0x72def, 0xe59f8, 0x162e64, 0xb1d6f, 0x3a5e0, 0x39427, 0x69166, + 0x6a3e2, 0x6a3e3, 0x74a8d, 0xd427e, 0x1d5d97, 0xd46b4, 0x5b0d8, 0x6a3d3, + 0x000e0, 0x00b63, 0x034cc, 0x06a33, 0x073c9, 0x0e1a0, 0x0f7fd, 0x0f9cc, + 0x1617d, 0x1caeb, 0x1f4a9, 0x3abb3, 0x69450, 0x39420, 0x39777, 0x3e5e0, + 0x6a3d4, 0x6a3ed, 0xb6166, 0xe59f1, 0xb1d6e, 0xe5676, 0x6a3ea, 0xe5674, + 0xb6163, 0xd46b7, 0x7cba6, 0xd46ba, 0x1d5d94, 0xb6164, 0x6a3f1, 0x7cba2, + 0x69451, 0x72dfa, 0xd46bb, 0x72df7, 0x74b94, 0x1cecbf, 0xe59fa, 0x16009b, + 0x6a3e4, 0x000e6, 0x00e94, 0x03876, 0x070ef, 0x0d52a, 0x16015, 0x16014, + 0x1abf9, 0x1cf17, 0x34a79, 0x34650, 0x3e705, 0x6a3d0, 0x58430, 0x74b9d, + 0x7be7e, 0x5b0be, 0x39773, 0x6a3de, 0x000fb, 0x00f7b, 0x03dd7, 0x07bd0, + 0x0e59c, 0x0f9cd, 0x1cf18, 0x1d2ff, 0x34a7a, 0x39429, 0x3500c, 0x72de0, + 0x69456, 0x7be7c, 0xd46b5, 0xd46b2, 0x6a3dd, 0x001a2, 0x0163b, 0x06913, + 0x0b016, 0x0fa42, 0x1a32d, 0x1cf06, 0x34a7c, 0x34a7d, 0xb6161, 0x35481, + 0x3e5fa, 0x7cba0, 0x7be7f, 0x7cba3, 0x7cba7, 0x5b0d3, 0x72de6, 0x6a3dc, + 0x001a9, 0x01ab4, 0x06a34, 0x0d46a, 0x16130, 0x1ef5f, 0x1f532, 0x1f536, + 0x3942e, 0x58436, 0x6a3db, 0x6945b, 0x001c9, 0x01ca0, 0x0728b, 0x0eed9, + 0x1f539, 0x1ca1d, 0x39765, 0x39766, 0x58439, 0x6945d, 0x39767, 0x001d3, + 0x01f2c, 0x07bfc, 0x16161, 0x34652, 0x3a5ed, 0x3548d, 0x58438, 0x6a3da, + 0x002c1, 0x02c5e, 0x0d335, 0x1ab1a, 0x2d874, 0x35006, 0x35484, 0x5b0cc, + 0x74b9a, 0x72df3, 0x6a3d6, 0x002da, 0x034b3, 0x0d5ae, 0x1caee, 0x2d871, + 0x357e3, 0x74b97, 0x72df9, 0x580ba, 0x5b0d4, 0x0034d, 0x0354e, 0x0f750, + 0x1cbc0, 0x3a5e7, 0x3a5e4, 0x00385, 0x03a58, 0x16c41, 0x2c5cf, 0x3e5e1, + 0x74b6c, 0xe5677, 0x6a3df, 0x00390, 0x03e50, 0x163c2, 0x2d876, 0x35482, + 0x5b0d6, 0x5843a, 0x0039f, 0x0585e, 0x1a583, 0x3500f, 0x74b93, 0x39771, + 0x003e4, 0x06912, 0x16c43, 0x357e1, 0x0058a, 0x0696f, 0x1f538, 0x5b0c9, + 0x6a3cf, 0x005b6, 0x06af8, 0x1f534, 0x58483, 0x6a3e0, 0x00695, 0x07d02, + 0x1cae8, 0x58485, 0x006a2, 0x0754a, 0x357ee, 0x3977b, 0x00748, 0x074b2, + 0x34a7b, 0x00729, 0x0b1e0, 0x34649, 0x3e5e3, 0x0073d, 0x0d2c4, 0x3e5e6, + 0x007bb, 0x0b099, 0x39762, 0x5b0ce, 0x6945f, 0x007d1, 0x0d5ab, 0x39779, + 0x007d3, 0x0d52f, 0x39763, 0x6945c, 0x00b1a, 0x0d2c5, 0x35489, 0x00d23, + 0x0eaed, 0x3e5f8, 0x00d32, 0x16016, 0x3e5fb, 0x00d41, 0x0e768, 0x3a5ed, + 0x00e1f, 0x16017, 0x58027, 0x00ead, 0x0fa07, 0x69455, 0x00e54, 0x1612b, + 0x00e55, 0x1a581, 0x00f78, 0x1a32b, 0x580bc, 0x6a3ee, 0x00f79, 0x1abfd, + 0x00f95, 0x1ab18, 0x6a3f0, 0x01637, 0x1aa4d, 0x0162d, 0x1f53c, 0x6a3f3, + 0x01a31, 0x1a810, 0x39769, 0x01a50, 0x1caef, 0x01a36, 0x1a32e, 0x01a67, + 0x1f38e, 0x01a85, 0x1ef59, 0x01aa6, 0x1ef83, 0x01d51, 0x2c012, 0x01d53, + 0x2d879, 0x01d5e, 0x35005, 0x01cba, 0x1cf04, 0x69453, 0x01d2d, 0x351ff, + 0x01f2d, 0x2d86f, 0x01f29, 0x35007, 0x02c22, 0x351fa, 0x02c03, 0x3a5ec, + 0x02c5f, 0x3a5eb, 0x02c58, 0x34a6b, 0x03469, 0x356be, 0x02c59, 0x34a6c, + 0x0346a, 0x3a5ea, 0x034bd, 0x034bf, 0x356bf, 0x0386a, 0x03ab9, 0x5843f, + 0x0386b, 0x3a5f5, 0x03a4b, 0x39421, 0x03aa4, 0x3a5e9, 0x03a5a, 0x03960, + 0x3977e, 0x03de9, 0x03958, 0x03df7, 0x039e1, 0x3e5e4, 0x0395f, 0x69458, + 0x03e91, 0x03df2, 0x39428, 0x058f2, 0x03e80, 0x6a3c3, 0x03e93, 0x694c0, + 0x058b8, 0x5b0ca, 0x0584f, 0x694c1, 0x058f1, 0x068d6, 0x06a10, 0x06ac3, + 0x06a32, 0x070d2, 0x06911, 0x074b1, 0x07494, 0x06ad4, 0x06ad6, 0x072b8, + 0x06afa, 0x074b3, 0x07540, 0x073ce, 0x0b005, 0x074b3, 0x07495, 0x074b9, + 0x0d336, 0x07bff, 0x07763, 0x073c8, 0x07d29, 0x0b622, 0x0d221, 0x0d181, + 0x0b1d1, 0x074b8, 0x0b1d0, 0x0d19b, 0x0d2c3, 0x0b172, 0x0d2dc, 0x0b623, + 0x0d5aa, 0x0d426, 0x0d182, 0x0e795, 0x0e1d1, 0x0d337, 0x0e96c, 0x0e5e4, + 0x0e514, 0x0eaee, 0x16000, 0x0e767, 0x0e1a1, 0x0e78f, 0x16004, 0x0f7c2, + 0x0e799, 0x0e5e7, 0x0e566, 0x0e769, 0x0f751, 0x0eede, 0x0fa06, 0x16005, + 0x0fa9f, 0x1a5e6, 0x0e766, 0x1636f, 0x0eedd, 0x0eec0, 0x1a309, 0x1ceca, + 0x163cd, 0x0f9cb, 0x0eedf, 0x1a582, 0x1612d, 0x0e5e5, 0x1abf8, 0x1a30c, + 0x1ca1f, 0x163cc, 0x1a35c, 0x1ca1e, 0x1aa51, 0x163ac, 0x1a84e, 0x1a53f, + 0x1cf16, 0x1d2fc, 0x1a5b3, 0x1ab19, 0x1a81f, 0x1d5c3, 0x16c3f, 0x1d5c1, + 0x1d2fc, 0x1f4aa, 0x1a812, 0x1f535, 0x1cf12, 0x1a817, 0x1617c, 0x1ab0b, + 0x1d2f8, 0x1ef82, 0x2d87a, 0x1d52f, 0x1f530, 0x1aa48, 0x35487, 0x1d2fd, + 0x1f4ad, 0x1cf11, 0x3461b, 0x35485, 0x1ca20, 0x1caed, 0x1cae6, 0x1abff, + 0x3464f, 0x34a6f, 0x1ef81, 0x3464b, 0x39d96, 0x1f383, 0x1f537, 0x1cf14, + 0x2c5ce, 0x3500e, 0x2c251, 0x1caec, 0x1f387, 0x34654, 0x357e4, 0x2d878, + 0x3500b, 0x35480, 0x3a5e8, 0x3548e, 0x34b64, 0x1f4a8, 0x35003, 0x3e5df, + 0x2d870, 0x357e6, 0x3e5f0, 0x1ef5a, 0x3a5ea, 0x1f388, 0x3e703, 0x2c24e, + 0x3a5e2, 0x351fd, 0x2c6dd, 0x3e704, 0x351fe, 0x2d875, 0x5b0c7, 0x3976a, + 0x3a5e6, 0x39423, 0x58480, 0x2c246, 0x3a5e3, 0x2d877, 0x3e5f1, 0x3abbe, + 0x58489, 0x3e5f9, 0x357e0, 0x3abbc, 0x5b0c6, 0x69167, 0x69165, 0x3e5e9, + 0x39422, 0x3976f, 0x3977d, 0x3e5de, 0x6a3c9, 0x58b98, 0x3a5f6, 0x3a5d0, + 0x58486, 0x6a3c1, 0x3e5fc, 0x5b0dc, 0x3548f, 0x3942d, 0x694c9, 0x58484, + 0x3a5e8, 0x74b9b, 0x74b96, 0x694d0, 0x58488, 0x3a5e4, 0x3942a, 0x72ec2, + 0x39776, 0x5b0d1, 0x5b0cf, 0x3a5d6, 0xe59fc, 0x5b0c8, 0x3e5e7, 0x7cbb7, + 0x70e87, 0x7cbab, 0x5b0c2, 0x694c3, 0x74a8e, 0x3e5f3, 0x6a3cd, 0x72dfe, + 0x73b2e, 0x72ec0, 0x694c5, 0x58437, 0x694c8, 0x72dff, 0x39435, 0x5843d, + 0x6a3d7, 0x72ec1, 0xd22c8, 0x694cf, 0xb6173, 0x3e5fe, 0x580bb, 0xe59f2, + 0xb616e, 0xb6175, 0x3a5da, 0x5b0bd, 0x694cc, 0x5843c, 0x694c7, 0x74b92, + 0x72ec3, 0x694c6, 0xb6170, 0x7cbac, 0xb1733, 0x7cba4, 0xb6167, 0x72de7, + 0x72de4, 0x6a3c0, 0x3e5ef, 0x162e65, 0x72de3, 0x72dfb, 0x6a35f, 0x6a3eb, +}; + +static const uint8_t coef2_huffbits[1336] = { + 11, 9, 2, 3, 4, 4, 5, 6, + 6, 7, 7, 8, 8, 8, 9, 9, + 9, 9, 10, 10, 10, 10, 11, 11, + 11, 11, 11, 11, 11, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 16, 15, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 18, 17, 17, 17, 17, + 17, 17, 17, 18, 18, 17, 17, 18, + 17, 17, 18, 17, 18, 18, 18, 18, + 19, 18, 18, 18, 18, 18, 18, 20, + 18, 18, 18, 19, 19, 18, 19, 18, + 19, 19, 18, 19, 19, 18, 19, 19, + 19, 19, 18, 19, 19, 19, 19, 19, + 19, 19, 20, 20, 20, 19, 19, 20, + 19, 20, 19, 19, 20, 19, 19, 20, + 20, 20, 20, 19, 20, 21, 19, 3, + 5, 7, 8, 9, 9, 10, 11, 11, + 12, 12, 12, 13, 13, 13, 13, 14, + 14, 14, 14, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 16, 16, + 15, 15, 15, 15, 16, 16, 16, 16, + 17, 16, 17, 17, 16, 17, 17, 17, + 17, 17, 17, 16, 17, 17, 17, 17, + 18, 17, 17, 18, 18, 18, 18, 18, + 19, 18, 18, 18, 18, 18, 18, 19, + 19, 18, 18, 18, 18, 19, 18, 19, + 19, 19, 20, 19, 18, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 20, + 20, 19, 20, 19, 20, 19, 20, 19, + 19, 21, 20, 20, 19, 4, 7, 8, + 10, 11, 11, 12, 12, 13, 13, 14, + 14, 14, 14, 15, 15, 15, 15, 15, + 16, 16, 16, 16, 16, 16, 16, 17, + 17, 17, 17, 17, 17, 17, 16, 16, + 16, 16, 17, 17, 17, 17, 18, 18, + 18, 17, 17, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 19, 18, 18, 18, + 19, 18, 19, 19, 19, 20, 20, 20, + 19, 19, 19, 19, 19, 19, 19, 21, + 21, 20, 19, 5, 8, 10, 11, 12, + 13, 13, 13, 14, 14, 15, 15, 15, + 15, 16, 16, 16, 16, 16, 17, 17, + 17, 17, 17, 17, 17, 17, 18, 17, + 18, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 19, 18, 19, 18, + 18, 18, 18, 18, 19, 18, 17, 17, + 18, 18, 19, 19, 19, 19, 18, 18, + 18, 19, 6, 9, 11, 12, 13, 13, + 14, 14, 14, 15, 15, 16, 16, 16, + 16, 16, 16, 17, 17, 17, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 17, 18, 18, 17, 18, 18, 18, + 18, 18, 18, 19, 19, 18, 18, 18, + 19, 19, 19, 20, 19, 19, 18, 19, + 19, 20, 21, 21, 19, 19, 18, 6, + 10, 12, 13, 14, 14, 14, 15, 15, + 15, 16, 16, 17, 17, 17, 17, 17, + 17, 17, 18, 18, 19, 18, 18, 18, + 19, 18, 18, 18, 19, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 19, 20, 20, 19, 19, 19, 19, 20, + 20, 19, 20, 19, 19, 19, 20, 20, + 20, 19, 19, 18, 19, 7, 10, 12, + 13, 14, 15, 15, 15, 16, 16, 17, + 17, 17, 17, 17, 17, 18, 18, 18, + 18, 19, 18, 19, 19, 19, 20, 19, + 18, 19, 19, 18, 18, 19, 19, 19, + 18, 19, 19, 20, 19, 18, 20, 21, + 20, 20, 19, 19, 21, 20, 21, 20, + 20, 20, 19, 19, 20, 20, 21, 20, + 19, 7, 11, 13, 14, 15, 15, 15, + 16, 16, 17, 17, 17, 17, 18, 18, + 18, 18, 18, 19, 20, 19, 19, 20, + 19, 19, 19, 19, 19, 19, 19, 19, + 18, 18, 19, 20, 19, 19, 19, 20, + 19, 19, 19, 20, 19, 20, 20, 21, + 20, 20, 20, 21, 22, 20, 19, 20, + 20, 21, 20, 21, 20, 19, 8, 11, + 13, 14, 15, 16, 16, 16, 17, 17, + 17, 18, 18, 18, 18, 18, 19, 18, + 19, 19, 19, 19, 21, 19, 19, 21, + 19, 20, 20, 20, 19, 18, 18, 8, + 12, 14, 15, 16, 16, 16, 16, 17, + 17, 17, 19, 18, 18, 19, 19, 20, + 19, 18, 20, 19, 20, 20, 19, 19, + 20, 20, 21, 21, 20, 19, 19, 19, + 19, 19, 19, 20, 21, 20, 19, 19, + 8, 12, 14, 15, 16, 16, 17, 17, + 17, 18, 18, 18, 19, 19, 19, 19, + 19, 19, 20, 21, 20, 21, 19, 21, + 20, 20, 20, 20, 21, 20, 19, 20, + 19, 20, 20, 20, 19, 22, 21, 21, + 19, 9, 12, 14, 15, 16, 17, 17, + 17, 18, 18, 18, 19, 19, 19, 19, + 20, 19, 19, 19, 9, 13, 15, 16, + 17, 17, 18, 18, 18, 19, 18, 20, + 19, 20, 20, 20, 19, 9, 13, 15, + 16, 17, 17, 18, 18, 18, 20, 18, + 19, 20, 20, 20, 20, 19, 20, 19, + 9, 13, 15, 16, 17, 18, 18, 18, + 19, 19, 19, 19, 10, 14, 16, 17, + 18, 18, 19, 19, 19, 19, 19, 10, + 14, 16, 17, 18, 18, 18, 19, 19, + 10, 14, 16, 17, 18, 18, 18, 19, + 19, 20, 19, 10, 14, 16, 18, 18, + 18, 19, 20, 19, 19, 10, 14, 17, + 18, 18, 18, 10, 15, 17, 18, 19, + 19, 21, 19, 11, 15, 17, 18, 18, + 19, 19, 11, 15, 17, 18, 19, 19, + 11, 15, 17, 18, 11, 15, 18, 19, + 19, 11, 15, 18, 19, 19, 11, 16, + 18, 19, 11, 15, 18, 19, 11, 16, + 18, 12, 16, 18, 19, 12, 16, 19, + 12, 16, 19, 19, 19, 12, 16, 19, + 12, 16, 19, 19, 12, 16, 18, 12, + 16, 19, 12, 17, 19, 12, 17, 19, + 12, 17, 19, 12, 17, 19, 13, 17, + 13, 17, 13, 17, 19, 19, 13, 17, + 13, 17, 19, 13, 17, 13, 18, 19, + 13, 17, 19, 13, 18, 13, 17, 13, + 18, 13, 18, 13, 18, 13, 18, 13, + 18, 13, 18, 14, 18, 19, 14, 18, + 14, 18, 14, 18, 14, 18, 14, 19, + 14, 19, 14, 18, 14, 18, 14, 18, + 14, 19, 14, 14, 18, 14, 14, 19, + 14, 18, 14, 19, 14, 19, 14, 15, + 19, 15, 15, 15, 15, 19, 15, 19, + 15, 15, 19, 15, 15, 19, 15, 19, + 15, 19, 15, 19, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 16, + 15, 15, 15, 16, 16, 16, 15, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 17, 16, 16, 16, 17, + 17, 16, 17, 17, 16, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 18, + 17, 17, 17, 17, 17, 17, 17, 17, + 18, 17, 17, 18, 17, 17, 17, 17, + 18, 18, 17, 17, 17, 17, 17, 17, + 17, 18, 17, 18, 18, 17, 17, 17, + 18, 18, 18, 17, 18, 17, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 17, + 18, 18, 18, 18, 19, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 19, + 18, 18, 19, 18, 18, 18, 19, 18, + 19, 18, 18, 19, 18, 18, 19, 19, + 19, 19, 19, 18, 19, 18, 19, 18, + 19, 19, 18, 18, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 18, 19, + 19, 19, 19, 19, 18, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 20, + 19, 19, 19, 19, 21, 19, 19, 20, + 19, 20, 19, 19, 19, 19, 19, 20, + 20, 20, 19, 19, 19, 20, 19, 19, + 19, 20, 20, 19, 20, 19, 19, 21, + 20, 20, 19, 19, 19, 19, 19, 19, + 20, 19, 20, 20, 20, 20, 20, 20, + 20, 19, 19, 21, 20, 20, 19, 19, +}; + +static const uint32_t coef3_huffcodes[1072] = { + 0x001b2, 0x00069, 0x00000, 0x00004, 0x00006, 0x0000e, 0x00014, 0x00019, + 0x00016, 0x0002b, 0x00030, 0x0003d, 0x0003c, 0x0005a, 0x0005f, 0x0006d, + 0x0007e, 0x0005f, 0x0007f, 0x000b6, 0x000bc, 0x000d8, 0x000f2, 0x000fe, + 0x000bc, 0x000fc, 0x00161, 0x0016e, 0x00174, 0x00176, 0x001a2, 0x001e3, + 0x001f3, 0x00174, 0x0017a, 0x001ea, 0x002a8, 0x002c4, 0x002e6, 0x00314, + 0x00346, 0x00367, 0x003e9, 0x002e5, 0x002ee, 0x003d6, 0x00555, 0x00554, + 0x00557, 0x005c3, 0x005d6, 0x006e0, 0x0062f, 0x006e2, 0x00799, 0x00789, + 0x007fa, 0x005ce, 0x007fe, 0x005ec, 0x007cc, 0x007af, 0x00aa7, 0x00b19, + 0x00b94, 0x00b85, 0x00b9f, 0x00c48, 0x00c45, 0x00dd8, 0x00c4c, 0x00c4b, + 0x00d99, 0x00d1f, 0x00dc2, 0x00f95, 0x00fa2, 0x00bb5, 0x00b9f, 0x00f5d, + 0x00bbf, 0x00f47, 0x0154a, 0x00fd5, 0x00f45, 0x00f7f, 0x0160d, 0x01889, + 0x01757, 0x01722, 0x018b3, 0x0172d, 0x01a39, 0x01a18, 0x01bb3, 0x01b30, + 0x01e63, 0x0173c, 0x01b35, 0x01723, 0x01e80, 0x01fee, 0x01761, 0x01ffc, + 0x01f7f, 0x02c7c, 0x01fa1, 0x0177b, 0x01755, 0x0175a, 0x01fa6, 0x02eab, + 0x0310a, 0x02c69, 0x03669, 0x03127, 0x03103, 0x02e43, 0x03662, 0x03165, + 0x03124, 0x0313b, 0x03111, 0x03668, 0x0343b, 0x03c52, 0x03efc, 0x02e6c, + 0x03fda, 0x03ef8, 0x02e7b, 0x03ee2, 0x03cc5, 0x03d72, 0x058c0, 0x03df8, + 0x02ea9, 0x03e7e, 0x0556d, 0x05c82, 0x03d71, 0x03e7b, 0x03c42, 0x058d7, + 0x03f4e, 0x06200, 0x03d70, 0x05cb2, 0x05c96, 0x05cb0, 0x03f45, 0x05cb1, + 0x02e6d, 0x03110, 0x02f68, 0x05c90, 0x07ca6, 0x07c88, 0x06204, 0x062c8, + 0x078a6, 0x07986, 0x079d5, 0x0b1ad, 0x07989, 0x0b079, 0x05cdd, 0x0aad4, + 0x05de8, 0x07dcd, 0x07987, 0x05d67, 0x05d99, 0x0b91d, 0x07cf1, 0x05d9b, + 0x079d7, 0x0b07b, 0x05c85, 0x05d9a, 0x07dcc, 0x07ebf, 0x07dce, 0x07dfb, + 0x07ec0, 0x07d1a, 0x07a07, 0x05c84, 0x0c471, 0x07cf2, 0x0baef, 0x0b9d2, + 0x05deb, 0x07bd6, 0x0b845, 0x05d98, 0x0b91a, 0x0bae8, 0x0c4e0, 0x0dc31, + 0x0f93d, 0x0bbce, 0x0d1d2, 0x0f7a9, 0x0d9b9, 0x0bbcb, 0x0b900, 0x0aad7, + 0x0babd, 0x0c4e1, 0x0f46f, 0x0c588, 0x0c58b, 0x160e6, 0x0bbcf, 0x0bac3, + 0x0f945, 0x0f7a3, 0x0d1c1, 0x0fb8e, 0x0f7a4, 0x0fb8c, 0x0f40c, 0x0c473, + 0x0fd72, 0x0bbcd, 0x0fffa, 0x0f940, 0x0bbc9, 0x0f7a8, 0x1a1ed, 0x0bbc5, + 0x1f26f, 0x163fd, 0x160c7, 0x1a1f5, 0x0f947, 0x163fc, 0x154b3, 0x0fff6, + 0x163f6, 0x160e9, 0x1a1f0, 0x0bab9, 0x0baba, 0x17086, 0x0b903, 0x0fd75, + 0x0f308, 0x176f3, 0x163ff, 0x0fd7d, 0x1bb78, 0x163fb, 0x188db, 0x1a1f7, + 0x154b2, 0x172fd, 0x163f4, 0x1bb73, 0x172ff, 0x0babc, 0x0f97d, 0x1a1f3, + 0x1bb6d, 0x1ffd5, 0x1a1f4, 0x1f272, 0x17380, 0x17382, 0x1ffe7, 0x0bac8, + 0x0bbc4, 0x188d3, 0x160e0, 0x0fd7b, 0x1725f, 0x172f5, 0x1bb79, 0x1fad9, + 0x1f269, 0x188d0, 0x0bac4, 0x0bac5, 0x31185, 0x188d2, 0x188cc, 0x31187, + 0x3e7fe, 0x188d1, 0x1bb6c, 0x1f268, 0x1fad2, 0x1ffd9, 0x1a1ea, 0x1bb68, + 0x1facb, 0x3fdb2, 0x1e81a, 0x188ce, 0x172fb, 0x1a1ef, 0x1face, 0x1bb70, + 0x0bac1, 0x1bb6b, 0x172f8, 0x1bb66, 0x1ffdf, 0x1bb6a, 0x1ffd7, 0x1f266, + 0x176f8, 0x37653, 0x1fa7e, 0x31182, 0x1fac8, 0x2c7e3, 0x370ee, 0x176ec, + 0x176e9, 0x2e4bc, 0x160c5, 0x3765a, 0x3ce9c, 0x17373, 0x176e8, 0x188d4, + 0x176f1, 0x176ef, 0x37659, 0x1bb7c, 0x1ffde, 0x176f2, 0x3118b, 0x2c7d4, + 0x37651, 0x5ce9f, 0x37650, 0x31191, 0x3f4f6, 0x3f4f5, 0x7a06c, 0x1fac1, + 0x5c97b, 0x2c7e0, 0x79d3a, 0x3e7fd, 0x2c7df, 0x3f4f0, 0x7a06d, 0x376c1, + 0x79d3b, 0x00004, 0x00014, 0x00059, 0x000ab, 0x000b8, 0x00177, 0x001f5, + 0x001f2, 0x00315, 0x003fc, 0x005bd, 0x0062d, 0x006e8, 0x007dd, 0x00b04, + 0x007cd, 0x00b1e, 0x00d1e, 0x00f15, 0x00f3b, 0x00f41, 0x01548, 0x018b0, + 0x0173b, 0x01884, 0x01a1c, 0x01bb4, 0x01f25, 0x017b5, 0x0176d, 0x01ef8, + 0x02e73, 0x03107, 0x03125, 0x03105, 0x02e49, 0x03ce8, 0x03ef9, 0x03e5e, + 0x02e72, 0x03471, 0x03fd9, 0x0623f, 0x078a0, 0x06867, 0x05cb3, 0x06272, + 0x068ec, 0x06e9a, 0x079d4, 0x06e98, 0x0b1aa, 0x06e1a, 0x07985, 0x068ee, + 0x06e9b, 0x05c88, 0x0b1ac, 0x07dfa, 0x05d65, 0x07cf0, 0x07cbf, 0x0c475, + 0x160eb, 0x1bb7e, 0x0f7a6, 0x1fedd, 0x160e3, 0x0fffb, 0x0fb8d, 0x0fff9, + 0x0d1c0, 0x0c58c, 0x1a1e9, 0x0bab8, 0x0f5cf, 0x0fff5, 0x376c5, 0x1a1ec, + 0x160ed, 0x1fede, 0x1fac9, 0x1a1eb, 0x1f224, 0x176ee, 0x0fd79, 0x17080, + 0x17387, 0x1bb7a, 0x1ffe9, 0x176f7, 0x17385, 0x17781, 0x2c7d5, 0x17785, + 0x1ffe3, 0x163f5, 0x1fac2, 0x3e7f9, 0x3118d, 0x3fdb1, 0x1ffe2, 0x1f226, + 0x3118a, 0x2c7d9, 0x31190, 0x3118c, 0x3f4f3, 0x1bb7f, 0x1bb72, 0x31184, + 0xb92f4, 0x3e7fb, 0x6e1d9, 0x1faca, 0x62300, 0x3fdb8, 0x3d037, 0x3e7fc, + 0x62301, 0x3f4f2, 0x1f26a, 0x0000e, 0x00063, 0x000f8, 0x001ee, 0x00377, + 0x003f7, 0x006e3, 0x005cc, 0x00b05, 0x00dd2, 0x00fd4, 0x0172e, 0x0172a, + 0x01e23, 0x01f2d, 0x01763, 0x01769, 0x0176c, 0x02e75, 0x03104, 0x02ec1, + 0x03e58, 0x0583f, 0x03f62, 0x03f44, 0x058c5, 0x0623c, 0x05cf4, 0x07bd7, + 0x05d9d, 0x0aad2, 0x05d66, 0x0b1a9, 0x0b078, 0x07cfe, 0x0b918, 0x0c46f, + 0x0b919, 0x0b847, 0x06e1b, 0x0b84b, 0x0aad8, 0x0fd74, 0x172f4, 0x17081, + 0x0f97c, 0x1f273, 0x0f7a0, 0x0fd7c, 0x172f7, 0x0fd7a, 0x1bb77, 0x172fe, + 0x1f270, 0x0fd73, 0x1bb7b, 0x1a1bc, 0x1bb7d, 0x0bbc3, 0x172f6, 0x0baeb, + 0x0fb8f, 0x3f4f4, 0x3fdb4, 0x376c8, 0x3e7fa, 0x1ffd0, 0x62303, 0xb92f5, + 0x1f261, 0x31189, 0x3fdb5, 0x2c7db, 0x376c9, 0x1fad6, 0x1fad1, 0x00015, + 0x000f0, 0x002e0, 0x0058e, 0x005d7, 0x00c4d, 0x00fa1, 0x00bdb, 0x01756, + 0x01f70, 0x02c19, 0x0313c, 0x0370f, 0x03cc0, 0x02ea8, 0x058c6, 0x058c7, + 0x02eb7, 0x058d0, 0x07d18, 0x0aa58, 0x0b848, 0x05d9e, 0x05d6c, 0x0b84c, + 0x0c589, 0x0b901, 0x163f8, 0x0bac9, 0x0b9c5, 0x0f93c, 0x188d8, 0x0bbc7, + 0x160ec, 0x0fd6f, 0x188d9, 0x160ea, 0x0f7a7, 0x0f944, 0x0baab, 0x0dc3a, + 0x188cf, 0x176fb, 0x2c7d8, 0x2c7d7, 0x1bb75, 0x5ce9e, 0x62302, 0x370ed, + 0x176f4, 0x1ffd1, 0x370ef, 0x3f4f8, 0x376c7, 0x1ffe1, 0x376c6, 0x176ff, + 0x6e1d8, 0x176f6, 0x17087, 0x0f5cd, 0x00035, 0x001a0, 0x0058b, 0x00aac, + 0x00b9a, 0x0175f, 0x01e22, 0x01e8c, 0x01fb2, 0x0310b, 0x058d1, 0x0552e, + 0x05c27, 0x0686e, 0x07ca7, 0x0c474, 0x0dc33, 0x07bf2, 0x05de9, 0x07a35, + 0x0baaa, 0x0b9eb, 0x0fb95, 0x0b9b8, 0x17381, 0x1f262, 0x188cd, 0x17088, + 0x172fa, 0x0f7a2, 0x1fad3, 0x0bac0, 0x3765c, 0x1fedf, 0x1f225, 0x1fad4, + 0x2c7da, 0x5ce9d, 0x3e7f8, 0x1e203, 0x188d7, 0x00054, 0x002c0, 0x007a1, + 0x00f78, 0x01b36, 0x01fa3, 0x0313a, 0x03436, 0x0343a, 0x07d1d, 0x07bd8, + 0x05cdf, 0x0b846, 0x0b189, 0x0d9b8, 0x0fff8, 0x0d9be, 0x0c58a, 0x05dea, + 0x0d1d3, 0x160e4, 0x1f26b, 0x188da, 0x1e202, 0x2c7d2, 0x163fe, 0x31193, + 0x17782, 0x376c2, 0x2c7d1, 0x3fdb0, 0x3765d, 0x2c7d0, 0x1fad0, 0x1e201, + 0x188dd, 0x2c7e2, 0x37657, 0x37655, 0x376c4, 0x376c0, 0x176ea, 0x0006f, + 0x003cf, 0x00dd5, 0x01f23, 0x02c61, 0x02ed0, 0x05d54, 0x0552d, 0x07883, + 0x0b1a8, 0x0b91c, 0x0babf, 0x0b902, 0x0f7aa, 0x0f7a5, 0x1a1e8, 0x1ffd6, + 0x0babe, 0x1a1bf, 0x163f3, 0x1ffd8, 0x1fad7, 0x1f275, 0x1ffdc, 0x0007d, + 0x005bc, 0x01549, 0x02a99, 0x03def, 0x06273, 0x079d6, 0x07d1b, 0x0aad3, + 0x0d0fc, 0x2c7dd, 0x188d6, 0x0bac2, 0x2c7e1, 0x1bb76, 0x1a1bd, 0x31186, + 0x0fd78, 0x1a1be, 0x31183, 0x3fdb6, 0x3f4f1, 0x37652, 0x1fad5, 0x3f4f9, + 0x3e7ff, 0x5ce9c, 0x3765b, 0x31188, 0x17372, 0x000bd, 0x0078b, 0x01f21, + 0x03c43, 0x03ded, 0x0aad6, 0x07ec1, 0x0f942, 0x05c86, 0x17089, 0x0babb, + 0x1ffe8, 0x2c7de, 0x1f26e, 0x1fac4, 0x3f4f7, 0x37656, 0x1fa7d, 0x376c3, + 0x3fdb3, 0x3118f, 0x1fac6, 0x000f8, 0x007ed, 0x01efd, 0x03e7a, 0x05c91, + 0x0aad9, 0x0baec, 0x0dc32, 0x0f46e, 0x1e200, 0x176fa, 0x3765e, 0x3fdb7, + 0x2c7d6, 0x3fdb9, 0x37654, 0x37658, 0x3118e, 0x1ffdb, 0x000f6, 0x00c43, + 0x03106, 0x068ef, 0x0b84d, 0x0b188, 0x0bbcc, 0x1f264, 0x1bb69, 0x17386, + 0x1fac0, 0x00171, 0x00f39, 0x03e41, 0x068ed, 0x0d9bc, 0x0f7a1, 0x1bb67, + 0x1ffdd, 0x176f9, 0x001b9, 0x00f7d, 0x03f63, 0x0d0fd, 0x0b9ea, 0x188dc, + 0x1fac3, 0x1a1f2, 0x31192, 0x1ffe4, 0x001f6, 0x01754, 0x06865, 0x0f309, + 0x160e5, 0x176f5, 0x3765f, 0x1facc, 0x001e9, 0x01a1a, 0x06201, 0x0f105, + 0x176f0, 0x002df, 0x01756, 0x05d6d, 0x163fa, 0x176ed, 0x00342, 0x02e40, + 0x0d0ff, 0x17082, 0x003cd, 0x02a98, 0x0fffc, 0x2c7dc, 0x1fa7f, 0x003fe, + 0x03764, 0x0fffd, 0x176fc, 0x1fac5, 0x002f7, 0x02ed1, 0x0fb97, 0x0058a, + 0x02edc, 0x0bbc8, 0x005d4, 0x0623d, 0x160e8, 0x0062e, 0x05830, 0x163f9, + 0x006eb, 0x06205, 0x1f274, 0x007de, 0x062c9, 0x1f265, 0x005c9, 0x05cde, + 0x1ffd3, 0x005d4, 0x07988, 0x007ce, 0x0b849, 0x00b1b, 0x05c89, 0x1fac7, + 0x00b93, 0x05c83, 0x00b9e, 0x0f14f, 0x00c4a, 0x0b9c7, 0x00dd4, 0x0c470, + 0x1f271, 0x00f38, 0x0fb96, 0x176eb, 0x00fa0, 0x163f7, 0x00bb2, 0x0b91b, + 0x00bbe, 0x0f102, 0x00f44, 0x0f946, 0x1facd, 0x00f79, 0x0d9bd, 0x0154d, + 0x0bbc6, 0x00fd2, 0x160e7, 0x0172b, 0x188cb, 0x0175e, 0x0fd76, 0x0175c, + 0x1bb71, 0x0189f, 0x1a1ee, 0x01f24, 0x1a1f6, 0x01ba7, 0x0bbca, 0x01f7d, + 0x0ffff, 0x01f2e, 0x1bb65, 0x01bb5, 0x172f9, 0x01fef, 0x1f26c, 0x01f3e, + 0x0fd77, 0x01762, 0x1bb6e, 0x01ef9, 0x172fc, 0x01fa0, 0x02ab7, 0x02e4a, + 0x1f267, 0x01fb3, 0x1ffda, 0x02e42, 0x03101, 0x17780, 0x0313d, 0x03475, + 0x17784, 0x03126, 0x1facf, 0x03c51, 0x17783, 0x03e40, 0x1ffe5, 0x03663, + 0x1ffe0, 0x03e8f, 0x1f26d, 0x0343c, 0x03cc1, 0x176fd, 0x03e45, 0x02ec0, + 0x03f61, 0x03dee, 0x03fd8, 0x0583e, 0x02e45, 0x03e59, 0x03d02, 0x05ce8, + 0x05568, 0x176fe, 0x02f69, 0x1fad8, 0x058c1, 0x05c83, 0x1ffe6, 0x06271, + 0x06e1c, 0x062c7, 0x068e1, 0x0552f, 0x06864, 0x06866, 0x06e99, 0x05cbc, + 0x07ca5, 0x078a1, 0x05c82, 0x07dcf, 0x0623b, 0x0623e, 0x068e8, 0x07a36, + 0x05d9c, 0x0b077, 0x07cf3, 0x07a34, 0x07ca4, 0x07d19, 0x079d2, 0x07d1c, + 0x07bd9, 0x0b84a, 0x0fb94, 0x0aad5, 0x0dc30, 0x07bf3, 0x0baee, 0x0b07a, + 0x0c472, 0x0b91e, 0x0d9ba, 0x05d9f, 0x0d0fe, 0x0b9c6, 0x05c87, 0x0f14e, + 0x0baed, 0x0b92e, 0x0f103, 0x0b9c4, 0x0fb91, 0x0d9bb, 0x0b1ab, 0x0c58d, + 0x0fffe, 0x0f93b, 0x0f941, 0x0baea, 0x0b91f, 0x0f5cc, 0x0d9bf, 0x0f943, + 0x0f104, 0x1f260, 0x0fb92, 0x0f93f, 0x0f3a6, 0x0bac7, 0x0f7ab, 0x0bac6, + 0x17383, 0x0fd6d, 0x0bae9, 0x0fd6e, 0x1e74f, 0x188ca, 0x1f227, 0x0fb93, + 0x0fb90, 0x0fff7, 0x17085, 0x17083, 0x160e1, 0x17084, 0x0f93e, 0x160e2, + 0x160c6, 0x1a1f1, 0x1bb6f, 0x17384, 0x0fd70, 0x1f263, 0x188d5, 0x173a6, + 0x0f5ce, 0x163f2, 0x0fd71, 0x1ffd2, 0x160c4, 0x1ffd4, 0x2c7d3, 0x1bb74, +}; + +static const uint8_t coef3_huffbits[1072] = { + 9, 7, 2, 3, 4, 4, 5, 5, + 6, 6, 6, 6, 7, 7, 7, 7, + 7, 8, 8, 8, 8, 8, 8, 8, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 12, 11, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 14, 13, 14, 14, 13, 14, 13, + 13, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 15, + 14, 14, 15, 14, 14, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 14, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 14, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 16, 15, 16, 16, 16, + 16, 15, 15, 16, 16, 16, 16, 16, + 15, 16, 16, 16, 15, 16, 15, 15, + 16, 15, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 17, 16, 17, 16, 17, 17, 16, + 17, 16, 17, 16, 16, 17, 17, 17, + 16, 17, 16, 16, 17, 16, 17, 16, + 17, 17, 16, 16, 17, 17, 17, 17, + 17, 17, 17, 17, 16, 17, 17, 16, + 17, 17, 17, 17, 17, 17, 17, 17, + 16, 18, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 16, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 18, + 17, 17, 17, 17, 18, 17, 17, 18, + 19, 17, 17, 17, 18, 17, 17, 17, + 18, 18, 18, 17, 17, 17, 18, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 17, 18, 18, 18, 18, 17, + 18, 18, 18, 17, 17, 18, 18, 18, + 18, 19, 18, 18, 19, 19, 20, 18, + 19, 18, 19, 19, 18, 19, 20, 18, + 19, 4, 6, 7, 8, 9, 9, 9, + 10, 10, 10, 11, 11, 11, 11, 12, + 12, 12, 12, 12, 12, 13, 13, 13, + 13, 13, 13, 13, 13, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 16, 15, 15, 15, + 15, 16, 16, 15, 16, 16, 15, 16, + 17, 17, 17, 17, 17, 16, 16, 16, + 16, 16, 17, 17, 17, 16, 18, 17, + 17, 17, 18, 17, 17, 18, 17, 17, + 17, 17, 17, 18, 17, 18, 18, 18, + 17, 17, 18, 19, 18, 18, 17, 17, + 18, 18, 18, 18, 19, 17, 17, 18, + 20, 19, 19, 18, 19, 18, 19, 19, + 19, 19, 17, 5, 7, 9, 10, 10, + 11, 11, 12, 12, 12, 13, 13, 13, + 13, 13, 14, 14, 14, 14, 14, 15, + 14, 15, 15, 15, 15, 15, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 15, 16, 16, 17, 17, 17, + 16, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 16, + 16, 19, 18, 18, 19, 17, 19, 20, + 17, 18, 18, 18, 18, 18, 18, 6, + 8, 10, 11, 12, 12, 12, 13, 13, + 13, 14, 14, 14, 14, 15, 15, 15, + 15, 15, 15, 16, 16, 16, 16, 16, + 16, 17, 17, 17, 16, 16, 17, 17, + 17, 17, 17, 17, 17, 16, 16, 16, + 17, 18, 18, 18, 17, 19, 19, 18, + 18, 17, 18, 19, 18, 17, 18, 18, + 19, 18, 17, 17, 6, 9, 11, 12, + 13, 13, 13, 14, 14, 14, 15, 15, + 15, 15, 15, 16, 16, 16, 16, 16, + 16, 17, 16, 17, 17, 17, 17, 17, + 17, 17, 18, 17, 18, 17, 17, 18, + 18, 19, 19, 17, 17, 7, 10, 12, + 13, 13, 14, 14, 14, 14, 15, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 17, 17, 17, 17, 18, 17, 18, + 18, 18, 18, 18, 18, 18, 18, 17, + 17, 18, 18, 18, 18, 18, 18, 7, + 10, 12, 13, 14, 15, 15, 15, 15, + 16, 16, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 18, 17, 17, 8, + 11, 13, 14, 15, 15, 15, 15, 16, + 16, 18, 17, 17, 18, 17, 17, 18, + 17, 17, 18, 18, 19, 18, 18, 19, + 19, 19, 18, 18, 18, 8, 11, 13, + 14, 15, 16, 16, 16, 16, 17, 17, + 17, 18, 17, 18, 19, 18, 18, 18, + 18, 18, 18, 8, 12, 14, 15, 15, + 16, 16, 16, 17, 17, 18, 18, 18, + 18, 18, 18, 18, 18, 17, 9, 12, + 14, 15, 16, 16, 17, 17, 17, 17, + 18, 9, 12, 14, 15, 16, 17, 17, + 17, 18, 9, 13, 15, 16, 17, 17, + 18, 17, 18, 17, 9, 13, 15, 16, + 17, 18, 18, 18, 10, 13, 15, 16, + 18, 10, 14, 16, 17, 18, 10, 14, + 16, 17, 10, 14, 16, 18, 18, 10, + 14, 16, 18, 18, 11, 15, 16, 11, + 15, 17, 11, 15, 17, 11, 15, 17, + 11, 15, 17, 11, 15, 17, 12, 16, + 17, 12, 15, 12, 16, 12, 16, 18, + 12, 16, 12, 16, 12, 16, 12, 16, + 17, 12, 16, 18, 12, 17, 13, 16, + 13, 16, 13, 16, 18, 13, 16, 13, + 17, 13, 17, 13, 17, 13, 17, 13, + 17, 13, 17, 13, 17, 13, 17, 13, + 16, 13, 17, 13, 17, 13, 17, 14, + 17, 14, 17, 14, 17, 14, 14, 14, + 17, 14, 17, 14, 14, 18, 14, 14, + 18, 14, 18, 14, 18, 14, 17, 14, + 17, 14, 17, 14, 14, 18, 14, 15, + 15, 15, 14, 15, 15, 14, 15, 15, + 15, 18, 15, 18, 15, 15, 17, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 16, 15, 15, 15, 15, 16, + 16, 16, 16, 16, 15, 15, 15, 15, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 17, 16, 16, + 16, 17, 16, 16, 16, 17, 17, 17, + 17, 17, 16, 17, 17, 17, 17, 16, + 16, 16, 17, 17, 17, 17, 16, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 18, 17, +}; + +static const uint32_t coef4_huffcodes[476] = { + 0x00f01, 0x0001e, 0x00000, 0x00004, 0x00006, 0x0000d, 0x0000a, 0x00017, + 0x0001d, 0x00017, 0x0002c, 0x00031, 0x00039, 0x0003e, 0x00039, 0x0005a, + 0x00066, 0x00070, 0x0007b, 0x00070, 0x00077, 0x000af, 0x000c9, 0x000f2, + 0x000f4, 0x000b2, 0x000e3, 0x0015b, 0x0015d, 0x00181, 0x0019d, 0x001e3, + 0x001c5, 0x002b5, 0x002db, 0x00338, 0x003c3, 0x003cc, 0x003f0, 0x002cd, + 0x003fa, 0x003a1, 0x005b4, 0x00657, 0x007ab, 0x0074d, 0x0074c, 0x00ac1, + 0x00ac5, 0x0076b, 0x00ca8, 0x00f04, 0x00f00, 0x00fe3, 0x00f3c, 0x00f10, + 0x00f39, 0x00fe6, 0x00e26, 0x00e90, 0x016c5, 0x01827, 0x01954, 0x015c5, + 0x01958, 0x01f8a, 0x01c4a, 0x02b0f, 0x02b41, 0x02b0e, 0x033c6, 0x03050, + 0x01c4f, 0x02d88, 0x0305c, 0x03c18, 0x02b4f, 0x02cc2, 0x03a47, 0x05680, + 0x0569d, 0x06442, 0x06443, 0x06446, 0x0656e, 0x06444, 0x07120, 0x0748a, + 0x0c1ba, 0x07e22, 0x07aa6, 0x07f25, 0x07aa7, 0x07e20, 0x0c11b, 0x0c118, + 0x07aa5, 0x0ad0a, 0x0f389, 0x19ebb, 0x0caad, 0x0fe42, 0x0fe40, 0x16c34, + 0x2b4e5, 0x33d65, 0x16c30, 0x1e7ae, 0x1e25c, 0x18370, 0x1e703, 0x19eba, + 0x16c37, 0x0e234, 0x16c6e, 0x00004, 0x0002a, 0x00061, 0x00075, 0x000cb, + 0x000ff, 0x00190, 0x001eb, 0x001d1, 0x002b9, 0x00307, 0x00339, 0x0033f, + 0x003fb, 0x003b4, 0x0060c, 0x00679, 0x00645, 0x0067d, 0x0078a, 0x007e3, + 0x00749, 0x00ac4, 0x00ad2, 0x00ae3, 0x00c10, 0x00c16, 0x00ad1, 0x00cf4, + 0x00fe2, 0x01586, 0x00e9d, 0x019f1, 0x01664, 0x01e26, 0x01d38, 0x02b4d, + 0x033c5, 0x01fc2, 0x01fc3, 0x01d28, 0x03c1d, 0x0598e, 0x0f094, 0x07aa4, + 0x0ad38, 0x0ac0c, 0x0c11a, 0x079ea, 0x0c881, 0x0fe44, 0x0b635, 0x0ac0d, + 0x0b61e, 0x05987, 0x07121, 0x0f382, 0x0f387, 0x0e237, 0x0fe47, 0x0f383, + 0x0f091, 0x0f385, 0x0e233, 0x182ee, 0x19eb8, 0x1663e, 0x0f093, 0x00014, + 0x00058, 0x00159, 0x00167, 0x00300, 0x003d4, 0x005b5, 0x0079d, 0x0076a, + 0x00b67, 0x00b60, 0x00f05, 0x00cf0, 0x00f17, 0x00e95, 0x01822, 0x01913, + 0x016c2, 0x0182f, 0x01959, 0x01fcb, 0x01e27, 0x01c40, 0x033c7, 0x01e7b, + 0x01c49, 0x02d89, 0x01e23, 0x01660, 0x03f12, 0x02cc6, 0x033e1, 0x05b34, + 0x0609a, 0x06569, 0x07488, 0x07e21, 0x0cf5f, 0x0712c, 0x0389d, 0x067cf, + 0x07f28, 0x1663f, 0x33d67, 0x1663d, 0x1e25d, 0x3c1ab, 0x15c44, 0x16c36, + 0x0001f, 0x000ec, 0x00323, 0x005b2, 0x0079f, 0x00ac2, 0x00f16, 0x00e9e, + 0x01956, 0x01e0f, 0x019ea, 0x01666, 0x02b89, 0x02b02, 0x02d8c, 0x03c1b, + 0x03c19, 0x032b5, 0x03f9c, 0x02ccf, 0x03897, 0x05b35, 0x0ad02, 0x07f29, + 0x06441, 0x03884, 0x07888, 0x0784e, 0x06568, 0x0c1bb, 0x05986, 0x067cc, + 0x0fe49, 0x0fe48, 0x0c1bc, 0x0fe41, 0x18371, 0x1663c, 0x0e231, 0x0711e, + 0x0ad09, 0x0f092, 0x0002d, 0x001db, 0x00781, 0x00c1a, 0x00f55, 0x01580, + 0x01ea8, 0x02d9b, 0x032af, 0x03f16, 0x03c1c, 0x07834, 0x03c45, 0x0389c, + 0x067ce, 0x06445, 0x0c1b9, 0x07889, 0x07f3a, 0x0784f, 0x07f2b, 0x0ad0b, + 0x0f090, 0x0c11d, 0x0e94e, 0x0711f, 0x0e9f1, 0x0f38e, 0x079e9, 0x0ad03, + 0x0f09b, 0x0caae, 0x0fe46, 0x2b4e6, 0x0e9f0, 0x19eb6, 0x67ac1, 0x67ac0, + 0x33d66, 0x0f388, 0x00071, 0x003a0, 0x00ca9, 0x01829, 0x01d39, 0x02b43, + 0x02cc4, 0x06554, 0x0f09a, 0x0b61f, 0x067cd, 0x0711c, 0x0b636, 0x07f2a, + 0x0b634, 0x0c11f, 0x0cf5e, 0x0b61d, 0x0f06b, 0x0caab, 0x0c1be, 0x0e94c, + 0x0f099, 0x182ed, 0x0e94f, 0x0c119, 0x0e232, 0x2b4e4, 0x0f38a, 0x19eb4, + 0x1e25f, 0x0e94d, 0x000b7, 0x00785, 0x016cc, 0x03051, 0x033c4, 0x0656f, + 0x03891, 0x0711d, 0x0caaf, 0x0f097, 0x07489, 0x0f098, 0x0c880, 0x0caaa, + 0x0f386, 0x19eb7, 0x16c6f, 0x0f384, 0x182e8, 0x182e9, 0x0e230, 0x1e700, + 0x33d62, 0x33d63, 0x33d64, 0x16c33, 0x0e216, 0x000fd, 0x00c15, 0x01665, + 0x03c4a, 0x07f3b, 0x07896, 0x0c11c, 0x0e215, 0x16c32, 0x0f38b, 0x0f38d, + 0x182ea, 0x1e701, 0x712df, 0x15c46, 0x00194, 0x00fe0, 0x03f13, 0x0748b, + 0x0f096, 0x0cf80, 0x1e25e, 0xe25bd, 0x33d61, 0x16c31, 0x001f9, 0x01912, + 0x05710, 0x0f3d0, 0x0c1bf, 0x00301, 0x01e24, 0x0ad08, 0x003cd, 0x01c41, + 0x0c1bd, 0x00563, 0x03a52, 0x0f3d1, 0x00570, 0x02cce, 0x0e217, 0x0067b, + 0x0655d, 0x0074b, 0x06447, 0x00c12, 0x074fb, 0x00f08, 0x0b61c, 0x00e22, + 0x0fe43, 0x016c7, 0x01836, 0x019f2, 0x01c43, 0x01d3f, 0x01fcf, 0x02b4c, + 0x0304c, 0x032b6, 0x03a46, 0x05607, 0x03f17, 0x02cc5, 0x0609b, 0x0655c, + 0x07e23, 0x067c1, 0x07f26, 0x07f27, 0x0f095, 0x0e9f3, 0x0cf81, 0x0c11e, + 0x0caac, 0x0f38f, 0x0e9f2, 0x074fa, 0x0e236, 0x0fe45, 0x1c428, 0x0e235, + 0x182ef, 0x19eb5, 0x0f3d6, 0x182ec, 0x16c35, 0x0f38c, 0x2b4e7, 0x15c47, + 0xe25bc, 0x1e702, 0x1c4b6, 0x0e25a, 0x3c1aa, 0x15c45, 0x1c429, 0x19eb9, + 0x1e7af, 0x182eb, 0x1e0d4, 0x3896e, +}; + +static const uint8_t coef4_huffbits[476] = { + 12, 6, 2, 3, 4, 4, 5, 5, + 5, 6, 6, 6, 6, 6, 7, 7, + 7, 7, 7, 8, 8, 8, 8, 8, + 8, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 11, + 10, 11, 11, 11, 11, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 13, 13, 13, 13, 13, 13, + 13, 13, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 16, 16, + 16, 15, 15, 15, 15, 15, 16, 16, + 15, 16, 16, 17, 16, 16, 16, 17, + 18, 18, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 4, 6, 7, 8, 8, + 8, 9, 9, 10, 10, 10, 10, 10, + 10, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 13, 13, 13, 14, 13, 14, 14, + 14, 13, 13, 14, 14, 16, 16, 15, + 16, 16, 16, 15, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 17, 16, 16, + 16, 16, 17, 17, 17, 18, 16, 5, + 8, 9, 10, 10, 10, 11, 11, 12, + 12, 12, 12, 12, 12, 13, 13, 13, + 13, 13, 13, 13, 13, 14, 14, 13, + 14, 14, 13, 14, 14, 15, 14, 15, + 15, 15, 16, 15, 16, 16, 15, 15, + 15, 18, 18, 18, 17, 18, 17, 17, + 6, 9, 10, 11, 11, 12, 12, 13, + 13, 13, 13, 14, 14, 14, 14, 14, + 14, 14, 14, 15, 15, 15, 16, 15, + 15, 15, 15, 15, 15, 16, 16, 15, + 16, 16, 16, 16, 17, 18, 17, 16, + 16, 16, 7, 10, 11, 12, 12, 13, + 13, 14, 14, 14, 14, 15, 14, 15, + 15, 15, 16, 15, 15, 15, 15, 16, + 16, 16, 17, 16, 17, 16, 15, 16, + 16, 16, 16, 18, 17, 17, 19, 19, + 18, 16, 7, 11, 12, 13, 14, 14, + 15, 15, 16, 16, 15, 16, 16, 15, + 16, 16, 16, 16, 16, 16, 16, 17, + 16, 17, 17, 16, 17, 18, 16, 17, + 17, 17, 8, 11, 13, 14, 14, 15, + 15, 16, 16, 16, 16, 16, 16, 16, + 16, 17, 17, 16, 17, 17, 17, 17, + 18, 18, 18, 17, 17, 8, 12, 14, + 14, 15, 15, 16, 17, 17, 16, 16, + 17, 17, 20, 17, 9, 12, 14, 16, + 16, 16, 17, 21, 18, 17, 9, 13, + 15, 16, 16, 10, 13, 16, 10, 14, + 16, 11, 15, 16, 11, 15, 17, 11, + 15, 12, 15, 12, 16, 12, 16, 13, + 16, 13, 13, 13, 14, 14, 13, 14, + 14, 14, 15, 15, 14, 15, 15, 15, + 15, 15, 15, 15, 16, 17, 16, 16, + 16, 16, 17, 16, 17, 16, 18, 17, + 17, 17, 16, 17, 17, 16, 18, 17, + 21, 17, 18, 17, 18, 17, 18, 17, + 17, 17, 17, 19, +}; + +static const uint32_t coef5_huffcodes[435] = { + 0x00347, 0x0000b, 0x00001, 0x00001, 0x0000c, 0x00004, 0x00010, 0x00015, + 0x0001f, 0x0000b, 0x00023, 0x00026, 0x00029, 0x00035, 0x00037, 0x00001, + 0x00015, 0x0001a, 0x0001d, 0x0001c, 0x0001e, 0x0004e, 0x00049, 0x00051, + 0x00078, 0x00004, 0x00000, 0x00008, 0x0000d, 0x0007b, 0x00005, 0x00032, + 0x00095, 0x00091, 0x00096, 0x000a1, 0x000d9, 0x00003, 0x00019, 0x00061, + 0x00066, 0x00060, 0x00017, 0x0000e, 0x00063, 0x001a0, 0x001b7, 0x001e6, + 0x001e7, 0x001b6, 0x00018, 0x001e8, 0x00038, 0x00031, 0x00005, 0x0003d, + 0x00027, 0x001ea, 0x0001a, 0x000c5, 0x000f9, 0x000ff, 0x000db, 0x00250, + 0x000fc, 0x0025c, 0x00008, 0x00075, 0x003d7, 0x003d3, 0x001b0, 0x0007c, + 0x003ca, 0x00036, 0x00189, 0x004a6, 0x004a2, 0x004fb, 0x000c0, 0x0007f, + 0x0009a, 0x00311, 0x0006e, 0x0009b, 0x0068c, 0x006c0, 0x00484, 0x00012, + 0x000c3, 0x0094f, 0x00979, 0x009f9, 0x00d09, 0x00da6, 0x00da8, 0x00901, + 0x000c1, 0x00373, 0x00d08, 0x009fa, 0x00d8b, 0x00d85, 0x00d86, 0x000df, + 0x006e2, 0x000ce, 0x00f24, 0x009fe, 0x001f7, 0x007c1, 0x000cf, 0x009fc, + 0x009ff, 0x00d89, 0x00da9, 0x009fd, 0x001f8, 0x01a36, 0x0128c, 0x0129d, + 0x01a37, 0x00196, 0x003ea, 0x00f8b, 0x00d93, 0x01e45, 0x01e58, 0x01e4b, + 0x01e59, 0x013f1, 0x00309, 0x00265, 0x00308, 0x0243a, 0x027e1, 0x00f89, + 0x00324, 0x03cbc, 0x03c86, 0x03695, 0x0243c, 0x0243b, 0x0243e, 0x01e4a, + 0x003a5, 0x03468, 0x03428, 0x03c84, 0x027e0, 0x025e2, 0x01880, 0x00197, + 0x00325, 0x03cb7, 0x0791e, 0x007ec, 0x06c75, 0x004c8, 0x04bc7, 0x004c6, + 0x00983, 0x0481e, 0x01b53, 0x0251b, 0x01b58, 0x00984, 0x04fa8, 0x03cbb, + 0x00f8a, 0x00322, 0x0346a, 0x0243d, 0x00326, 0x03469, 0x0481f, 0x0481d, + 0x00746, 0x09032, 0x01b50, 0x01d13, 0x0d8e4, 0x0481b, 0x06c74, 0x0796b, + 0x07969, 0x00985, 0x0d8e3, 0x00986, 0x00fa2, 0x01301, 0x06c7c, 0x00987, + 0x03cb8, 0x0f4af, 0x00e88, 0x1b1c0, 0x00fce, 0x033eb, 0x03f6a, 0x03f69, + 0x00fcf, 0x0791f, 0x004c9, 0x04871, 0x00fcd, 0x00982, 0x00fcc, 0x00fa3, + 0x01d12, 0x0796c, 0x01b47, 0x00321, 0x0796a, 0x0d8e2, 0x04872, 0x04873, + 0x0000e, 0x00014, 0x0000a, 0x000a0, 0x00012, 0x0007d, 0x001a2, 0x0003b, + 0x0025f, 0x000dd, 0x0027c, 0x00343, 0x00368, 0x0036b, 0x0003e, 0x001fa, + 0x00485, 0x001b3, 0x0007f, 0x001b1, 0x0019e, 0x004ba, 0x007ad, 0x00339, + 0x00066, 0x007a4, 0x00793, 0x006c6, 0x0007e, 0x000f1, 0x00372, 0x009fb, + 0x00d83, 0x00d8a, 0x00947, 0x009f4, 0x001d0, 0x01b09, 0x01b4b, 0x007ec, + 0x003e1, 0x000ca, 0x003ec, 0x02539, 0x04fa9, 0x01b57, 0x03429, 0x03d2a, + 0x00d97, 0x003a7, 0x00dc0, 0x00d96, 0x00dc1, 0x007eb, 0x03cba, 0x00c43, + 0x00c41, 0x01b52, 0x007ef, 0x00323, 0x03cb9, 0x03c83, 0x007d0, 0x007ed, + 0x06c7f, 0x09033, 0x03f6c, 0x36383, 0x1e95d, 0x06c78, 0x00747, 0x01b51, + 0x00022, 0x00016, 0x00039, 0x00252, 0x00079, 0x00486, 0x00338, 0x00369, + 0x00d88, 0x00026, 0x00d87, 0x00f4b, 0x00d82, 0x00027, 0x001e1, 0x01a15, + 0x007c7, 0x012f0, 0x001e0, 0x006d0, 0x01a16, 0x01e44, 0x01e5f, 0x03690, + 0x00d90, 0x00c42, 0x00daf, 0x00d92, 0x00f80, 0x00cfb, 0x0342f, 0x0487f, + 0x01b46, 0x07968, 0x00d95, 0x00d91, 0x01b55, 0x03f68, 0x04bc6, 0x03cbd, + 0x00f81, 0x00320, 0x00069, 0x000fe, 0x006d5, 0x0033f, 0x000de, 0x007c6, + 0x01e40, 0x00d94, 0x00f88, 0x03c8e, 0x03694, 0x00dae, 0x00dad, 0x00267, + 0x003a6, 0x00327, 0x0487e, 0x007ee, 0x00749, 0x004c7, 0x03692, 0x01b56, + 0x00fd1, 0x07a56, 0x06c77, 0x09031, 0x00748, 0x06c7a, 0x0796d, 0x033ea, + 0x06c76, 0x00fd0, 0x36382, 0x1e417, 0x00745, 0x04faf, 0x0d8e1, 0x03f6b, + 0x1e95c, 0x04fad, 0x0009e, 0x004bd, 0x0067c, 0x01b08, 0x003eb, 0x01b45, + 0x03691, 0x0d8e5, 0x07904, 0x00981, 0x007ea, 0x019f4, 0x06c7d, 0x04fab, + 0x04fac, 0x06c7e, 0x01300, 0x06c7b, 0x0006f, 0x003f7, 0x03c85, 0x004c4, + 0x0001e, 0x006e1, 0x03693, 0x01b44, 0x00241, 0x01e46, 0x0019d, 0x00266, + 0x004bb, 0x02538, 0x007ac, 0x01b54, 0x00902, 0x04870, 0x00da7, 0x00900, + 0x00185, 0x06c79, 0x006e3, 0x003e9, 0x01e94, 0x003ed, 0x003f2, 0x0342e, + 0x0346b, 0x0251a, 0x004c5, 0x01881, 0x0481c, 0x01b59, 0x03c87, 0x04fae, + 0x007e9, 0x03f6d, 0x0f20a, 0x09030, 0x04faa, 0x0d8e6, 0x03f6f, 0x0481a, + 0x03f6e, 0x1e416, 0x0d8e7, +}; + +static const uint8_t coef5_huffbits[435] = { + 10, 4, 2, 4, 4, 5, 5, 5, + 5, 6, 6, 6, 6, 6, 6, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 8, 8, 8, 8, 7, 8, 8, + 8, 8, 8, 8, 8, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 10, 9, 10, 10, 10, 10, + 10, 9, 10, 10, 10, 10, 10, 10, + 10, 10, 11, 11, 10, 10, 11, 11, + 10, 11, 11, 11, 11, 11, 12, 12, + 12, 12, 12, 12, 11, 11, 11, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 13, + 13, 13, 12, 12, 13, 13, 13, 12, + 12, 12, 12, 12, 13, 13, 13, 13, + 13, 14, 14, 14, 14, 13, 13, 13, + 13, 13, 14, 14, 14, 14, 14, 14, + 15, 14, 14, 14, 14, 14, 14, 13, + 14, 14, 14, 14, 14, 14, 15, 14, + 15, 14, 15, 15, 15, 15, 15, 15, + 16, 15, 15, 14, 15, 16, 15, 14, + 14, 15, 14, 14, 15, 14, 15, 15, + 15, 16, 15, 17, 16, 15, 15, 15, + 15, 16, 16, 16, 16, 17, 15, 16, + 14, 16, 16, 17, 16, 16, 16, 16, + 16, 15, 15, 15, 16, 16, 16, 16, + 17, 15, 15, 15, 15, 16, 15, 15, + 4, 7, 8, 8, 9, 9, 9, 10, + 10, 10, 10, 10, 10, 10, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 12, + 12, 11, 11, 11, 12, 12, 12, 12, + 12, 12, 12, 12, 13, 13, 13, 13, + 12, 13, 14, 14, 15, 15, 14, 14, + 14, 14, 14, 14, 14, 15, 14, 14, + 14, 15, 15, 15, 14, 14, 15, 15, + 15, 16, 16, 18, 17, 15, 15, 15, + 6, 9, 10, 10, 11, 11, 12, 12, + 12, 13, 12, 12, 12, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 14, + 14, 14, 14, 14, 14, 14, 14, 15, + 15, 15, 14, 14, 15, 16, 15, 14, + 14, 15, 7, 10, 11, 12, 13, 13, + 13, 14, 14, 14, 14, 14, 14, 14, + 14, 15, 15, 15, 15, 15, 14, 15, + 16, 15, 15, 16, 15, 15, 15, 16, + 15, 16, 18, 17, 15, 15, 16, 16, + 17, 15, 8, 11, 13, 13, 14, 15, + 14, 16, 15, 16, 15, 15, 15, 15, + 15, 15, 17, 15, 9, 12, 14, 15, + 10, 13, 14, 15, 10, 13, 11, 14, + 11, 14, 11, 15, 12, 15, 12, 12, + 13, 15, 13, 14, 13, 14, 14, 14, + 14, 14, 15, 15, 15, 15, 14, 15, + 15, 16, 16, 16, 15, 16, 16, 15, + 16, 17, 16, +}; + +static const uint16_t levels0[60] = { +317, 92, 62, 60, 19, 17, 10, 7, + 6, 5, 5, 3, 3, 3, 2, 2, + 2, 2, 2, 2, 2, 1, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, +}; + +static const uint16_t levels1[40] = { +311, 91, 61, 28, 10, 6, 5, 2, + 2, 2, 2, 2, 2, 2, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static const uint16_t levels2[340] = { +181,110, 78, 63, 61, 62, 60, 61, + 33, 41, 41, 19, 17, 19, 12, 11, + 9, 11, 10, 6, 8, 7, 6, 4, + 5, 5, 4, 4, 3, 4, 3, 5, + 3, 4, 3, 3, 3, 3, 3, 3, + 2, 2, 4, 2, 3, 2, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, + 3, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 1, 2, 1, 2, 2, + 2, 2, 1, 2, 1, 1, 1, 2, + 2, 1, 2, 1, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, +}; + +static const uint16_t levels3[180] = { +351,122, 76, 61, 41, 42, 24, 30, + 22, 19, 11, 9, 10, 8, 5, 5, + 4, 5, 5, 3, 3, 3, 3, 3, + 3, 3, 2, 2, 3, 2, 2, 2, + 3, 3, 2, 2, 2, 3, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 1, + 2, 2, 1, 2, 1, 2, 2, 2, + 2, 2, 2, 1, 2, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 2, + 2, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, +}; + +static const uint16_t levels4[70] = { +113, 68, 49, 42, 40, 32, 27, 15, + 10, 5, 3, 3, 3, 3, 2, 2, + 2, 2, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, +}; + +static const uint16_t levels5[40] = { +214, 72, 42, 40, 18, 4, 4, 2, + 2, 2, 2, 2, 1, 1, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static const CoefVLCTable coef_vlcs[6] = { + { + sizeof(coef0_huffbits), coef0_huffcodes, coef0_huffbits, levels0, + }, + { + sizeof(coef1_huffbits), coef1_huffcodes, coef1_huffbits, levels1, + }, + { + sizeof(coef2_huffbits), coef2_huffcodes, coef2_huffbits, levels2, + }, + { + sizeof(coef3_huffbits), coef3_huffcodes, coef3_huffbits, levels3, + }, + { + sizeof(coef4_huffbits), coef4_huffcodes, coef4_huffbits, levels4, + }, + { + sizeof(coef5_huffbits), coef5_huffcodes, coef5_huffbits, levels5, + }, +}; diff --git a/src/libffmpeg/libavcodec/wmadec.c b/src/libffmpeg/libavcodec/wmadec.c new file mode 100644 index 000000000..0eef66c6b --- /dev/null +++ b/src/libffmpeg/libavcodec/wmadec.c @@ -0,0 +1,1339 @@ +/* + * WMA compatible decoder + * Copyright (c) 2002 The FFmpeg Project. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "avcodec.h" +#include "dsputil.h" + +//#define DEBUG_PARAMS +//#define DEBUG_TRACE + +/* size of blocks */ +#define BLOCK_MIN_BITS 7 +#define BLOCK_MAX_BITS 11 +#define BLOCK_MAX_SIZE (1 << BLOCK_MAX_BITS) + +#define BLOCK_NB_SIZES (BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1) + +/* XXX: find exact max size */ +#define HIGH_BAND_MAX_SIZE 16 + +#define NB_LSP_COEFS 10 + +/* XXX: is it a suitable value ? */ +#define MAX_CODED_SUPERFRAME_SIZE 4096 + +#define MAX_CHANNELS 2 + +#define NOISE_TAB_SIZE 8192 + +#define LSP_POW_BITS 7 + +typedef struct WMADecodeContext { + GetBitContext gb; + int sample_rate; + int nb_channels; + int bit_rate; + int version; /* 1 = 0x160 (WMAV1), 2 = 0x161 (WMAV2) */ + int block_align; + int use_bit_reservoir; + int use_variable_block_len; + int use_exp_vlc; /* exponent coding: 0 = lsp, 1 = vlc + delta */ + int use_noise_coding; /* true if perceptual noise is added */ + int byte_offset_bits; + VLC exp_vlc; + int exponent_sizes[BLOCK_NB_SIZES]; + uint16_t exponent_bands[BLOCK_NB_SIZES][25]; + int high_band_start[BLOCK_NB_SIZES]; /* index of first coef in high band */ + int coefs_start; /* first coded coef */ + int coefs_end[BLOCK_NB_SIZES]; /* max number of coded coefficients */ + int exponent_high_sizes[BLOCK_NB_SIZES]; + int exponent_high_bands[BLOCK_NB_SIZES][HIGH_BAND_MAX_SIZE]; + VLC hgain_vlc; + + /* coded values in high bands */ + int high_band_coded[MAX_CHANNELS][HIGH_BAND_MAX_SIZE]; + int high_band_values[MAX_CHANNELS][HIGH_BAND_MAX_SIZE]; + + /* there are two possible tables for spectral coefficients */ + VLC coef_vlc[2]; + uint16_t *run_table[2]; + uint16_t *level_table[2]; + /* frame info */ + int frame_len; /* frame length in samples */ + int frame_len_bits; /* frame_len = 1 << frame_len_bits */ + int nb_block_sizes; /* number of block sizes */ + /* block info */ + int reset_block_lengths; + int block_len_bits; /* log2 of current block length */ + int next_block_len_bits; /* log2 of next block length */ + int prev_block_len_bits; /* log2 of prev block length */ + int block_len; /* block length in samples */ + int block_num; /* block number in current frame */ + int block_pos; /* current position in frame */ + uint8_t ms_stereo; /* true if mid/side stereo mode */ + uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */ + float exponents[MAX_CHANNELS][BLOCK_MAX_SIZE]; + float max_exponent[MAX_CHANNELS]; + int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; + float coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]; + MDCTContext mdct_ctx[BLOCK_NB_SIZES]; + float *windows[BLOCK_NB_SIZES]; + FFTSample mdct_tmp[BLOCK_MAX_SIZE]; /* temporary storage for imdct */ + /* output buffer for one frame and the last for IMDCT windowing */ + float frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2]; + /* last frame info */ + uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */ + int last_bitoffset; + int last_superframe_len; + float noise_table[NOISE_TAB_SIZE]; + int noise_index; + float noise_mult; /* XXX: suppress that and integrate it in the noise array */ + /* lsp_to_curve tables */ + float lsp_cos_table[BLOCK_MAX_SIZE]; + float lsp_pow_e_table[256]; + float lsp_pow_m_table1[(1 << LSP_POW_BITS)]; + float lsp_pow_m_table2[(1 << LSP_POW_BITS)]; +} WMADecodeContext; + +typedef struct CoefVLCTable { + int n; /* total number of codes */ + const uint32_t *huffcodes; /* VLC bit values */ + const uint8_t *huffbits; /* VLC bit size */ + const uint16_t *levels; /* table to build run/level tables */ +} CoefVLCTable; + +static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len); + +#include "wmadata.h" + +#ifdef DEBUG_TRACE + +int frame_count; + +static FILE *flog; + +void trace(const char *fmt, ...) +{ + va_list ap; + + + if (!flog) { + flog = fopen("/tmp/out.log", "w"); + setlinebuf(flog); + } + + va_start(ap, fmt); + vfprintf(flog, fmt, ap); + va_end(ap); +} + +#define get_bits(s, n) get_bits_trace(s, n) +#define get_vlc(s, vlc) get_vlc_trace(s, vlc) + +unsigned int get_bits_trace(GetBitContext *s, int n) +{ + unsigned int val; + val = (get_bits)(s, n); + trace("get_bits(%d) : 0x%x\n", n, val); + return val; +} + +static int get_vlc_trace(GetBitContext *s, VLC *vlc) +{ + int code; + code = (get_vlc)(s, vlc); + trace("get_vlc() : %d\n", code); + return code; +} + +static void dump_shorts(const char *name, const short *tab, int n) +{ + int i; + + trace("%s[%d]:\n", name, n); + for(i=0;in; + const uint8_t *table_bits = vlc_table->huffbits; + const uint32_t *table_codes = vlc_table->huffcodes; + const uint16_t *levels_table = vlc_table->levels; + uint16_t *run_table, *level_table; + const uint16_t *p; + int i, l, j, level; + + init_vlc(vlc, 9, n, table_bits, 1, 1, table_codes, 4, 4); + + run_table = malloc(n * sizeof(uint16_t)); + level_table = malloc(n * sizeof(uint16_t)); + p = levels_table; + i = 2; + level = 1; + while (i < n) { + l = *p++; + for(j=0;jpriv_data; + int i, flags1, flags2; + float *window; + uint8_t *extradata; + float bps1, high_freq, bps; + int sample_rate1; + int coef_vlc_table; + + s->sample_rate = avctx->sample_rate; + s->nb_channels = avctx->channels; + s->bit_rate = avctx->bit_rate; + s->block_align = avctx->block_align; + + if (avctx->codec_id == CODEC_ID_WMAV1) { + s->version = 1; + } else { + s->version = 2; + } + + /* extract flag infos */ + flags1 = 0; + flags2 = 0; + extradata = avctx->extradata; + if (s->version == 1 && avctx->extradata_size >= 4) { + flags1 = extradata[0] | (extradata[1] << 8); + flags2 = extradata[2] | (extradata[3] << 8); + } else if (s->version == 2 && avctx->extradata_size >= 6) { + flags1 = extradata[0] | (extradata[1] << 8) | + (extradata[2] << 16) | (extradata[3] << 24); + flags2 = extradata[4] | (extradata[5] << 8); + } + s->use_exp_vlc = flags2 & 0x0001; + s->use_bit_reservoir = flags2 & 0x0002; + s->use_variable_block_len = flags2 & 0x0004; + + /* compute MDCT block size */ + if (s->sample_rate <= 16000) { + s->frame_len_bits = 9; + } else if (s->sample_rate <= 32000 && s->version == 1) { + s->frame_len_bits = 10; + } else { + s->frame_len_bits = 11; + } + s->frame_len = 1 << s->frame_len_bits; + if (s->use_variable_block_len) { + s->nb_block_sizes = s->frame_len_bits - BLOCK_MIN_BITS + 1; + } else { + s->nb_block_sizes = 1; + } + + /* init rate dependant parameters */ + s->use_noise_coding = 1; + high_freq = s->sample_rate * 0.5; + + /* if version 2, then the rates are normalized */ + sample_rate1 = s->sample_rate; + if (s->version == 2) { + if (sample_rate1 >= 44100) + sample_rate1 = 44100; + else if (sample_rate1 >= 22050) + sample_rate1 = 22050; + else if (sample_rate1 >= 16000) + sample_rate1 = 16000; + else if (sample_rate1 >= 11025) + sample_rate1 = 11025; + else if (sample_rate1 >= 8000) + sample_rate1 = 8000; + } + + bps = (float)s->bit_rate / (float)(s->nb_channels * s->sample_rate); + s->byte_offset_bits = av_log2((int)(bps * s->frame_len / 8.0)) + 2; + + /* compute high frequency value and choose if noise coding should + be activated */ + bps1 = bps; + if (s->nb_channels == 2) + bps1 = bps * 1.6; + if (sample_rate1 == 44100) { + if (bps1 >= 0.61) + s->use_noise_coding = 0; + else + high_freq = high_freq * 0.4; + } else if (sample_rate1 == 22050) { + if (bps1 >= 1.16) + s->use_noise_coding = 0; + else if (bps1 >= 0.72) + high_freq = high_freq * 0.7; + else + high_freq = high_freq * 0.6; + } else if (sample_rate1 == 16000) { + if (bps > 0.5) + high_freq = high_freq * 0.5; + else + high_freq = high_freq * 0.3; + } else if (sample_rate1 == 11025) { + high_freq = high_freq * 0.7; + } else if (sample_rate1 == 8000) { + if (bps <= 0.625) { + high_freq = high_freq * 0.5; + } else if (bps > 0.75) { + s->use_noise_coding = 0; + } else { + high_freq = high_freq * 0.65; + } + } else { + if (bps >= 0.8) { + high_freq = high_freq * 0.75; + } else if (bps >= 0.6) { + high_freq = high_freq * 0.6; + } else { + high_freq = high_freq * 0.5; + } + } +#ifdef DEBUG_PARAMS + printf("flags1=0x%x flags2=0x%x\n", flags1, flags2); + printf("version=%d channels=%d sample_rate=%d bitrate=%d block_align=%d\n", + s->version, s->nb_channels, s->sample_rate, s->bit_rate, + s->block_align); + printf("bps=%f bps1=%f high_freq=%f bitoffset=%d\n", + bps, bps1, high_freq, s->byte_offset_bits); + printf("use_noise_coding=%d use_exp_vlc=%d\n", + s->use_noise_coding, s->use_exp_vlc); +#endif + + /* compute the scale factor band sizes for each MDCT block size */ + { + int a, b, pos, lpos, k, block_len, i, j, n; + const uint8_t *table; + + if (s->version == 1) { + s->coefs_start = 3; + } else { + s->coefs_start = 0; + } + for(k = 0; k < s->nb_block_sizes; k++) { + block_len = s->frame_len >> k; + + if (s->version == 1) { + lpos = 0; + for(i=0;i<25;i++) { + a = wma_critical_freqs[i]; + b = s->sample_rate; + pos = ((block_len * 2 * a) + (b >> 1)) / b; + if (pos > block_len) + pos = block_len; + s->exponent_bands[0][i] = pos - lpos; + if (pos >= block_len) { + i++; + break; + } + lpos = pos; + } + s->exponent_sizes[0] = i; + } else { + /* hardcoded tables */ + table = NULL; + a = s->frame_len_bits - BLOCK_MIN_BITS - k; + if (a < 3) { + if (s->sample_rate >= 44100) + table = exponent_band_44100[a]; + else if (s->sample_rate >= 32000) + table = exponent_band_32000[a]; + else if (s->sample_rate >= 22050) + table = exponent_band_22050[a]; + } + if (table) { + n = *table++; + for(i=0;iexponent_bands[k][i] = table[i]; + s->exponent_sizes[k] = n; + } else { + j = 0; + lpos = 0; + for(i=0;i<25;i++) { + a = wma_critical_freqs[i]; + b = s->sample_rate; + pos = ((block_len * 2 * a) + (b << 1)) / (4 * b); + pos <<= 2; + if (pos > block_len) + pos = block_len; + if (pos > lpos) + s->exponent_bands[k][j++] = pos - lpos; + if (pos >= block_len) + break; + lpos = pos; + } + s->exponent_sizes[k] = j; + } + } + + /* max number of coefs */ + s->coefs_end[k] = (s->frame_len - ((s->frame_len * 9) / 100)) >> k; + /* high freq computation */ + s->high_band_start[k] = (int)((block_len * 2 * high_freq) / + s->sample_rate + 0.5); + n = s->exponent_sizes[k]; + j = 0; + pos = 0; + for(i=0;iexponent_bands[k][i]; + end = pos; + if (start < s->high_band_start[k]) + start = s->high_band_start[k]; + if (end > s->coefs_end[k]) + end = s->coefs_end[k]; + if (end > start) + s->exponent_high_bands[k][j++] = end - start; + } + s->exponent_high_sizes[k] = j; +#if 0 + trace("%5d: coefs_end=%d high_band_start=%d nb_high_bands=%d: ", + s->frame_len >> k, + s->coefs_end[k], + s->high_band_start[k], + s->exponent_high_sizes[k]); + for(j=0;jexponent_high_sizes[k];j++) + trace(" %d", s->exponent_high_bands[k][j]); + trace("\n"); +#endif + } + } + +#ifdef DEBUG_TRACE + { + int i, j; + for(i = 0; i < s->nb_block_sizes; i++) { + trace("%5d: n=%2d:", + s->frame_len >> i, + s->exponent_sizes[i]); + for(j=0;jexponent_sizes[i];j++) + trace(" %d", s->exponent_bands[i][j]); + trace("\n"); + } + } +#endif + + /* init MDCT */ + for(i = 0; i < s->nb_block_sizes; i++) + mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1); + + /* init MDCT windows : simple sinus window */ + for(i = 0; i < s->nb_block_sizes; i++) { + int n, j; + float alpha; + n = 1 << (s->frame_len_bits - i); + window = av_malloc(sizeof(float) * n); + alpha = M_PI / (2.0 * n); + for(j=0;jwindows[i] = window; + } + + s->reset_block_lengths = 1; + + if (s->use_noise_coding) { + + /* init the noise generator */ + if (s->use_exp_vlc) + s->noise_mult = 0.02; + else + s->noise_mult = 0.04; + +#if defined(DEBUG_TRACE) + for(i=0;inoise_table[i] = 1.0 * s->noise_mult; +#else + { + unsigned int seed; + float norm; + seed = 1; + norm = (1.0 / (float)(1LL << 31)) * sqrt(3) * s->noise_mult; + for(i=0;inoise_table[i] = (float)((int)seed) * norm; + } + } +#endif + init_vlc(&s->hgain_vlc, 9, sizeof(hgain_huffbits), + hgain_huffbits, 1, 1, + hgain_huffcodes, 2, 2); + } + + if (s->use_exp_vlc) { + init_vlc(&s->exp_vlc, 9, sizeof(scale_huffbits), + scale_huffbits, 1, 1, + scale_huffcodes, 4, 4); + } else { + wma_lsp_to_curve_init(s, s->frame_len); + } + + /* choose the VLC tables for the coefficients */ + coef_vlc_table = 2; + if (s->sample_rate >= 32000) { + if (bps1 < 0.72) + coef_vlc_table = 0; + else if (bps1 < 1.16) + coef_vlc_table = 1; + } + + init_coef_vlc(&s->coef_vlc[0], &s->run_table[0], &s->level_table[0], + &coef_vlcs[coef_vlc_table * 2]); + init_coef_vlc(&s->coef_vlc[1], &s->run_table[1], &s->level_table[1], + &coef_vlcs[coef_vlc_table * 2 + 1]); + return 0; +} + +/* interpolate values for a bigger or smaller block. The block must + have multiple sizes */ +static void interpolate_array(float *scale, int old_size, int new_size) +{ + int i, j, jincr, k; + float v; + + if (new_size > old_size) { + jincr = new_size / old_size; + j = new_size; + for(i = old_size - 1; i >=0; i--) { + v = scale[i]; + k = jincr; + do { + scale[--j] = v; + } while (--k); + } + } else if (new_size < old_size) { + j = 0; + jincr = old_size / new_size; + for(i = 0; i < new_size; i++) { + scale[i] = scale[j]; + j += jincr; + } + } +} + +/* compute x^-0.25 with an exponent and mantissa table. We use linear + interpolation to reduce the mantissa table size at a small speed + expense (linear interpolation approximately doubles the number of + bits of precision). */ +static inline float pow_m1_4(WMADecodeContext *s, float x) +{ + union { + float f; + unsigned int v; + } u, t; + unsigned int e, m; + float a, b; + + u.f = x; + e = u.v >> 23; + m = (u.v >> (23 - LSP_POW_BITS)) & ((1 << LSP_POW_BITS) - 1); + /* build interpolation scale: 1 <= t < 2. */ + t.v = ((u.v << LSP_POW_BITS) & ((1 << 23) - 1)) | (127 << 23); + a = s->lsp_pow_m_table1[m]; + b = s->lsp_pow_m_table2[m]; + return s->lsp_pow_e_table[e] * (a + b * t.f); +} + +static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len) +{ + float wdel, a, b; + int i, e, m; + + wdel = M_PI / frame_len; + for(i=0;ilsp_cos_table[i] = 2.0f * cos(wdel * i); + + /* tables for x^-0.25 computation */ + for(i=0;i<256;i++) { + e = i - 126; + s->lsp_pow_e_table[i] = pow(2.0, e * -0.25); + } + + /* NOTE: these two tables are needed to avoid two operations in + pow_m1_4 */ + b = 1.0; + for(i=(1 << LSP_POW_BITS) - 1;i>=0;i--) { + m = (1 << LSP_POW_BITS) + i; + a = (float)m * (0.5 / (1 << LSP_POW_BITS)); + a = pow(a, -0.25); + s->lsp_pow_m_table1[i] = 2 * a - b; + s->lsp_pow_m_table2[i] = b - a; + b = a; + } +#if 0 + for(i=1;i<20;i++) { + float v, r1, r2; + v = 5.0 / i; + r1 = pow_m1_4(s, v); + r2 = pow(v,-0.25); + printf("%f^-0.25=%f e=%f\n", v, r1, r2 - r1); + } +#endif +} + +/* NOTE: We use the same code as Vorbis here */ +/* XXX: optimize it further with SSE/3Dnow */ +static void wma_lsp_to_curve(WMADecodeContext *s, + float *out, float *val_max_ptr, + int n, float *lsp) +{ + int i, j; + float p, q, w, v, val_max; + + val_max = 0; + for(i=0;ilsp_cos_table[i]; + for(j=1;j val_max) + val_max = v; + out[i] = v; + } + *val_max_ptr = val_max; +} + +/* decode exponents coded with LSP coefficients (same idea as Vorbis) */ +static void decode_exp_lsp(WMADecodeContext *s, int ch) +{ + float lsp_coefs[NB_LSP_COEFS]; + int val, i; + + for(i = 0; i < NB_LSP_COEFS; i++) { + if (i == 0 || i >= 8) + val = get_bits(&s->gb, 3); + else + val = get_bits(&s->gb, 4); + lsp_coefs[i] = lsp_codebook[i][val]; + } + + wma_lsp_to_curve(s, s->exponents[ch], &s->max_exponent[ch], + s->block_len, lsp_coefs); +} + +/* decode exponents coded with VLC codes */ +static int decode_exp_vlc(WMADecodeContext *s, int ch) +{ + int last_exp, n, code; + const uint16_t *ptr, *band_ptr; + float v, *q, max_scale, *q_end; + + band_ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; + ptr = band_ptr; + q = s->exponents[ch]; + q_end = q + s->block_len; + max_scale = 0; + if (s->version == 1) { + last_exp = get_bits(&s->gb, 5) + 10; + /* XXX: use a table */ + v = pow(10, last_exp * (1.0 / 16.0)); + max_scale = v; + n = *ptr++; + do { + *q++ = v; + } while (--n); + } + last_exp = 36; + while (q < q_end) { + code = get_vlc(&s->gb, &s->exp_vlc); + if (code < 0) + return -1; + /* NOTE: this offset is the same as MPEG4 AAC ! */ + last_exp += code - 60; + /* XXX: use a table */ + v = pow(10, last_exp * (1.0 / 16.0)); + if (v > max_scale) + max_scale = v; + n = *ptr++; + do { + *q++ = v; + } while (--n); + } + s->max_exponent[ch] = max_scale; + return 0; +} + +/* return 0 if OK. return 1 if last block of frame. return -1 if + unrecorrable error. */ +static int wma_decode_block(WMADecodeContext *s) +{ + int n, v, a, ch, code, bsize; + int coef_nb_bits, total_gain, parse_exponents; + float window[BLOCK_MAX_SIZE * 2]; + int nb_coefs[MAX_CHANNELS]; + float mdct_norm; + + trace("***decode_block: %d:%d\n", frame_count - 1, s->block_num); + + /* compute current block length */ + if (s->use_variable_block_len) { + n = av_log2(s->nb_block_sizes - 1) + 1; + + if (s->reset_block_lengths) { + s->reset_block_lengths = 0; + v = get_bits(&s->gb, n); + if (v >= s->nb_block_sizes) + return -1; + s->prev_block_len_bits = s->frame_len_bits - v; + v = get_bits(&s->gb, n); + if (v >= s->nb_block_sizes) + return -1; + s->block_len_bits = s->frame_len_bits - v; + } else { + /* update block lengths */ + s->prev_block_len_bits = s->block_len_bits; + s->block_len_bits = s->next_block_len_bits; + } + v = get_bits(&s->gb, n); + if (v >= s->nb_block_sizes) + return -1; + s->next_block_len_bits = s->frame_len_bits - v; + } else { + /* fixed block len */ + s->next_block_len_bits = s->frame_len_bits; + s->prev_block_len_bits = s->frame_len_bits; + s->block_len_bits = s->frame_len_bits; + } + + /* now check if the block length is coherent with the frame length */ + s->block_len = 1 << s->block_len_bits; + if ((s->block_pos + s->block_len) > s->frame_len) + return -1; + + if (s->nb_channels == 2) { + s->ms_stereo = get_bits(&s->gb, 1); + } + v = 0; + for(ch = 0; ch < s->nb_channels; ch++) { + a = get_bits(&s->gb, 1); + s->channel_coded[ch] = a; + v |= a; + } + /* if no channel coded, no need to go further */ + /* XXX: fix potential framing problems */ + if (!v) + goto next; + + bsize = s->frame_len_bits - s->block_len_bits; + + /* read total gain and extract corresponding number of bits for + coef escape coding */ + total_gain = 1; + for(;;) { + a = get_bits(&s->gb, 7); + total_gain += a; + if (a != 127) + break; + } + + if (total_gain < 15) + coef_nb_bits = 13; + else if (total_gain < 32) + coef_nb_bits = 12; + else if (total_gain < 40) + coef_nb_bits = 11; + else if (total_gain < 45) + coef_nb_bits = 10; + else + coef_nb_bits = 9; + + /* compute number of coefficients */ + n = s->coefs_end[bsize] - s->coefs_start; + for(ch = 0; ch < s->nb_channels; ch++) + nb_coefs[ch] = n; + + /* complex coding */ + if (s->use_noise_coding) { + + for(ch = 0; ch < s->nb_channels; ch++) { + if (s->channel_coded[ch]) { + int i, n, a; + n = s->exponent_high_sizes[bsize]; + for(i=0;igb, 1); + s->high_band_coded[ch][i] = a; + /* if noise coding, the coefficients are not transmitted */ + if (a) + nb_coefs[ch] -= s->exponent_high_bands[bsize][i]; + } + } + } + for(ch = 0; ch < s->nb_channels; ch++) { + if (s->channel_coded[ch]) { + int i, n, val, code; + + n = s->exponent_high_sizes[bsize]; + val = (int)0x80000000; + for(i=0;ihigh_band_coded[ch][i]) { + if (val == (int)0x80000000) { + val = get_bits(&s->gb, 7) - 19; + } else { + code = get_vlc(&s->gb, &s->hgain_vlc); + if (code < 0) + return -1; + val += code - 18; + } + s->high_band_values[ch][i] = val; + } + } + } + } + } + + /* exposant can be interpolated in short blocks. */ + parse_exponents = 1; + if (s->block_len_bits != s->frame_len_bits) { + parse_exponents = get_bits(&s->gb, 1); + } + + if (parse_exponents) { + for(ch = 0; ch < s->nb_channels; ch++) { + if (s->channel_coded[ch]) { + if (s->use_exp_vlc) { + if (decode_exp_vlc(s, ch) < 0) + return -1; + } else { + decode_exp_lsp(s, ch); + } + } + } + } else { + for(ch = 0; ch < s->nb_channels; ch++) { + if (s->channel_coded[ch]) { + interpolate_array(s->exponents[ch], 1 << s->prev_block_len_bits, + s->block_len); + } + } + } + + /* parse spectral coefficients : just RLE encoding */ + for(ch = 0; ch < s->nb_channels; ch++) { + if (s->channel_coded[ch]) { + VLC *coef_vlc; + int level, run, sign, tindex; + int16_t *ptr, *eptr; + const int16_t *level_table, *run_table; + + /* special VLC tables are used for ms stereo because + there is potentially less energy there */ + tindex = (ch == 1 && s->ms_stereo); + coef_vlc = &s->coef_vlc[tindex]; + run_table = s->run_table[tindex]; + level_table = s->level_table[tindex]; + /* XXX: optimize */ + ptr = &s->coefs1[ch][0]; + eptr = ptr + nb_coefs[ch]; + memset(ptr, 0, s->block_len * sizeof(int16_t)); + for(;;) { + code = get_vlc(&s->gb, coef_vlc); + if (code < 0) + return -1; + if (code == 1) { + /* EOB */ + break; + } else if (code == 0) { + /* escape */ + level = get_bits(&s->gb, coef_nb_bits); + /* NOTE: this is rather suboptimal. reading + block_len_bits would be better */ + run = get_bits(&s->gb, s->frame_len_bits); + } else { + /* normal code */ + run = run_table[code]; + level = level_table[code]; + } + sign = get_bits(&s->gb, 1); + if (!sign) + level = -level; + ptr += run; + if (ptr >= eptr) + return -1; + *ptr++ = level; + /* NOTE: EOB can be omitted */ + if (ptr >= eptr) + break; + } + } + if (s->version == 1 && s->nb_channels >= 2) { + align_get_bits(&s->gb); + } + } + + /* normalize */ + { + int n4 = s->block_len / 2; + mdct_norm = 1.0 / (float)n4; + if (s->version == 1) { + mdct_norm *= sqrt(n4); + } + } + + /* finally compute the MDCT coefficients */ + for(ch = 0; ch < s->nb_channels; ch++) { + if (s->channel_coded[ch]) { + int16_t *coefs1; + float *coefs, *exponents, mult, mult1, noise, *exp_ptr; + int i, j, n, n1, last_high_band; + float exp_power[HIGH_BAND_MAX_SIZE]; + + coefs1 = s->coefs1[ch]; + exponents = s->exponents[ch]; + mult = pow(10, total_gain * 0.05) / s->max_exponent[ch]; + mult *= mdct_norm; + coefs = s->coefs[ch]; + if (s->use_noise_coding) { + mult1 = mult; + /* very low freqs : noise */ + for(i = 0;i < s->coefs_start; i++) { + *coefs++ = s->noise_table[s->noise_index] * (*exponents++) * mult1; + s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); + } + + n1 = s->exponent_high_sizes[bsize]; + + /* compute power of high bands */ + exp_ptr = exponents + + s->high_band_start[bsize] - + s->coefs_start; + last_high_band = 0; /* avoid warning */ + for(j=0;jexponent_high_bands[s->frame_len_bits - + s->block_len_bits][j]; + if (s->high_band_coded[ch][j]) { + float e2, v; + e2 = 0; + for(i = 0;i < n; i++) { + v = exp_ptr[i]; + e2 += v * v; + } + exp_power[j] = e2 / n; + last_high_band = j; + trace("%d: power=%f (%d)\n", j, exp_power[j], n); + } + exp_ptr += n; + } + + /* main freqs and high freqs */ + for(j=-1;jhigh_band_start[bsize] - + s->coefs_start; + } else { + n = s->exponent_high_bands[s->frame_len_bits - + s->block_len_bits][j]; + } + if (j >= 0 && s->high_band_coded[ch][j]) { + /* use noise with specified power */ + mult1 = sqrt(exp_power[j] / exp_power[last_high_band]); + /* XXX: use a table */ + mult1 = mult1 * pow(10, s->high_band_values[ch][j] * 0.05); + mult1 = mult1 / (s->max_exponent[ch] * s->noise_mult); + mult1 *= mdct_norm; + for(i = 0;i < n; i++) { + noise = s->noise_table[s->noise_index]; + s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); + *coefs++ = (*exponents++) * noise * mult1; + } + } else { + /* coded values + small noise */ + for(i = 0;i < n; i++) { + noise = s->noise_table[s->noise_index]; + s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); + *coefs++ = ((*coefs1++) + noise) * (*exponents++) * mult; + } + } + } + + /* very high freqs : noise */ + n = s->block_len - s->coefs_end[bsize]; + mult1 = mult * exponents[-1]; + for(i = 0; i < n; i++) { + *coefs++ = s->noise_table[s->noise_index] * mult1; + s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); + } + } else { + /* XXX: optimize more */ + for(i = 0;i < s->coefs_start; i++) + *coefs++ = 0.0; + n = nb_coefs[ch]; + for(i = 0;i < n; i++) { + *coefs++ = coefs1[i] * exponents[i] * mult; + } + n = s->block_len - s->coefs_end[bsize]; + for(i = 0;i < n; i++) + *coefs++ = 0.0; + } + } + } + +#ifdef DEBUG_TRACE + for(ch = 0; ch < s->nb_channels; ch++) { + if (s->channel_coded[ch]) { + dump_floats("exponents", 3, s->exponents[ch], s->block_len); + dump_floats("coefs", 1, s->coefs[ch], s->block_len); + } + } +#endif + + if (s->ms_stereo && s->channel_coded[1]) { + float a, b; + int i; + + /* nominal case for ms stereo: we do it before mdct */ + /* no need to optimize this case because it should almost + never happen */ + if (!s->channel_coded[0]) { +#ifdef DEBUG_TRACE + trace("rare ms-stereo case happened\n"); +#endif + memset(s->coefs[0], 0, sizeof(float) * s->block_len); + s->channel_coded[0] = 1; + } + + for(i = 0; i < s->block_len; i++) { + a = s->coefs[0][i]; + b = s->coefs[1][i]; + s->coefs[0][i] = a + b; + s->coefs[1][i] = a - b; + } + } + + /* build the window : we ensure that when the windows overlap + their squared sum is always 1 (MDCT reconstruction rule) */ + /* XXX: merge with output */ + { + int i, next_block_len, block_len, prev_block_len, n; + float *wptr; + + block_len = s->block_len; + prev_block_len = 1 << s->prev_block_len_bits; + next_block_len = 1 << s->next_block_len_bits; + + /* right part */ + wptr = window + block_len; + if (block_len <= next_block_len) { + for(i=0;iwindows[bsize][i]; + } else { + /* overlap */ + n = (block_len / 2) - (next_block_len / 2); + for(i=0;iwindows[s->frame_len_bits - s->next_block_len_bits][i]; + for(i=0;iwindows[bsize][i]; + } else { + /* overlap */ + n = (block_len / 2) - (prev_block_len / 2); + for(i=0;iwindows[s->frame_len_bits - s->prev_block_len_bits][i]; + for(i=0;inb_channels; ch++) { + if (s->channel_coded[ch]) { + FFTSample output[BLOCK_MAX_SIZE * 2]; + float *ptr; + int i, n4, index, n; + + n = s->block_len; + n4 = s->block_len / 2; + imdct_calc(&s->mdct_ctx[bsize], + output, s->coefs[ch], s->mdct_tmp); + + /* XXX: optimize all that by build the window and + multipying/adding at the same time */ + /* multiply by the window */ + for(i=0;iframe_len / 2) + s->block_pos - n4; + ptr = &s->frame_out[ch][index]; + for(i=0;ims_stereo && !s->channel_coded[1]) { + ptr = &s->frame_out[1][index]; + for(i=0;iblock_num++; + s->block_pos += s->block_len; + if (s->block_pos >= s->frame_len) + return 1; + else + return 0; +} + +/* decode a frame of frame_len samples */ +static int wma_decode_frame(WMADecodeContext *s, int16_t *samples) +{ + int ret, i, n, a, ch, incr; + int16_t *ptr; + float *iptr; + + trace("***decode_frame: %d size=%d\n", frame_count++, s->frame_len); + + /* read each block */ + s->block_num = 0; + s->block_pos = 0; + for(;;) { + ret = wma_decode_block(s); + if (ret < 0) + return -1; + if (ret) + break; + } + + /* convert frame to integer */ + n = s->frame_len; + incr = s->nb_channels; + for(ch = 0; ch < s->nb_channels; ch++) { + ptr = samples + ch; + iptr = s->frame_out[ch]; + + for(i=0;i 32767) + a = 32767; + else if (a < -32768) + a = -32768; + *ptr = a; + ptr += incr; + } + /* prepare for next block */ + memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len], + s->frame_len * sizeof(float)); + /* XXX: suppress this */ + memset(&s->frame_out[ch][s->frame_len], 0, + s->frame_len * sizeof(float)); + } + +#ifdef DEBUG_TRACE + dump_shorts("samples", samples, n * s->nb_channels); +#endif + return 0; +} + +static int wma_decode_superframe(AVCodecContext *avctx, + void *data, int *data_size, + UINT8 *buf, int buf_size) +{ + WMADecodeContext *s = avctx->priv_data; + int nb_frames, bit_offset, i, pos, len; + uint8_t *q; + int16_t *samples; + + trace("***decode_superframe:\n"); + + samples = data; + + init_get_bits(&s->gb, buf, buf_size); + + if (s->use_bit_reservoir) { + /* read super frame header */ + get_bits(&s->gb, 4); /* super frame index */ + nb_frames = get_bits(&s->gb, 4) - 1; + + bit_offset = get_bits(&s->gb, s->byte_offset_bits + 3); + + if (s->last_superframe_len > 0) { + // printf("skip=%d\n", s->last_bitoffset); + /* add bit_offset bits to last frame */ + if ((s->last_superframe_len + ((bit_offset + 7) >> 3)) > + MAX_CODED_SUPERFRAME_SIZE) + return -1; + q = s->last_superframe + s->last_superframe_len; + len = bit_offset; + while (len > 0) { + *q++ = (get_bits)(&s->gb, 8); + len -= 8; + } + if (len > 0) { + *q++ = (get_bits)(&s->gb, len) << (8 - len); + } + + /* XXX: bit_offset bits into last frame */ + init_get_bits(&s->gb, s->last_superframe, MAX_CODED_SUPERFRAME_SIZE); + /* skip unused bits */ + if (s->last_bitoffset > 0) + skip_bits(&s->gb, s->last_bitoffset); + /* this frame is stored in the last superframe and in the + current one */ + if (wma_decode_frame(s, samples) < 0) + return -1; + samples += s->nb_channels * s->frame_len; + } + + /* read each frame starting from bit_offset */ + pos = bit_offset + 4 + 4 + s->byte_offset_bits + 3; + init_get_bits(&s->gb, buf + (pos >> 3), MAX_CODED_SUPERFRAME_SIZE - (pos >> 3)); + len = pos & 7; + if (len > 0) + skip_bits(&s->gb, len); + + s->reset_block_lengths = 1; + for(i=0;inb_channels * s->frame_len; + } + + /* we copy the end of the frame in the last frame buffer */ + pos = get_bits_count(&s->gb) + ((bit_offset + 4 + 4 + s->byte_offset_bits + 3) & ~7); + s->last_bitoffset = pos & 7; + pos >>= 3; + len = buf_size - pos; + if (len > MAX_CODED_SUPERFRAME_SIZE) { + return -1; + } + s->last_superframe_len = len; + memcpy(s->last_superframe, buf + pos, len); + } else { + /* single frame decode */ + if (wma_decode_frame(s, samples) < 0) + return -1; + samples += s->nb_channels * s->frame_len; + } + *data_size = (int8_t *)samples - (int8_t *)data; + return s->block_align; +} + +static int wma_decode_end(AVCodecContext *avctx) +{ + WMADecodeContext *s = avctx->priv_data; + int i; + + for(i = 0; i < s->nb_block_sizes; i++) + mdct_end(&s->mdct_ctx[i]); + for(i = 0; i < s->nb_block_sizes; i++) + av_free(s->windows[i]); + + if (s->use_exp_vlc) { + free_vlc(&s->exp_vlc); + } + if (s->use_noise_coding) { + free_vlc(&s->hgain_vlc); + } + for(i = 0;i < 2; i++) { + free_vlc(&s->coef_vlc[i]); + av_free(s->run_table[i]); + av_free(s->level_table[i]); + } + + return 0; +} + +AVCodec wmav1_decoder = +{ + "wmav1", + CODEC_TYPE_AUDIO, + CODEC_ID_WMAV1, + sizeof(WMADecodeContext), + wma_decode_init, + NULL, + wma_decode_end, + wma_decode_superframe, +}; + +AVCodec wmav2_decoder = +{ + "wmav2", + CODEC_TYPE_AUDIO, + CODEC_ID_WMAV2, + sizeof(WMADecodeContext), + wma_decode_init, + NULL, + wma_decode_end, + wma_decode_superframe, +}; -- cgit v1.2.3