diff options
author | Guenter Bartsch <guenter@users.sourceforge.net> | 2002-04-06 20:51:22 +0000 |
---|---|---|
committer | Guenter Bartsch <guenter@users.sourceforge.net> | 2002-04-06 20:51:22 +0000 |
commit | 55e772ec62ef638f8a0b44e379da663f78245355 (patch) | |
tree | 3b90a73ab2e800ed32f68e24f125164de7a655b3 /src | |
parent | 0176e107fd9b6672d87f75a9eb5d83e163e0179f (diff) | |
download | xine-lib-55e772ec62ef638f8a0b44e379da663f78245355.tar.gz xine-lib-55e772ec62ef638f8a0b44e379da663f78245355.tar.bz2 |
update to latest ffmpeg cvs, this should add mpeg-4 bframe support :-) (BTW: ffmpeg-guys: impressive work\!\!)
CVS patchset: 1690
CVS date: 2002/04/06 20:51:22
Diffstat (limited to 'src')
-rw-r--r-- | src/libffmpeg/libavcodec/avcodec.h | 41 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/common.c | 9 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/common.h | 96 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/dsputil.c | 214 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/dsputil.h | 19 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/h263.c | 1056 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/h263data.h | 4 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/h263dec.c | 138 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/i386/dsputil_mmx.c | 331 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/i386/motion_est_mmx.c | 514 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/motion_est.c | 703 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/mpeg12.c | 90 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/mpeg4data.h | 27 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/mpegvideo.c | 860 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/mpegvideo.h | 103 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/msmpeg4.c | 531 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/msmpeg4data.h | 16 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/utils.c | 41 |
18 files changed, 3807 insertions, 986 deletions
diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h index 3c27d99ea..05b27d8c2 100644 --- a/src/libffmpeg/libavcodec/avcodec.h +++ b/src/libffmpeg/libavcodec/avcodec.h @@ -14,11 +14,14 @@ enum CodecID { CODEC_ID_MJPEG, CODEC_ID_MPEG4, CODEC_ID_RAWVIDEO, - CODEC_ID_MSMPEG4, + CODEC_ID_MSMPEG4V1, + CODEC_ID_MSMPEG4V2, + CODEC_ID_MSMPEG4V3, CODEC_ID_H263P, CODEC_ID_H263I, }; +#define CODEC_ID_MSMPEG4 CODEC_ID_MSMPEG4V3 enum CodecType { CODEC_TYPE_VIDEO, @@ -48,11 +51,19 @@ extern int motion_estimation_method; #define ME_FULL 1 #define ME_LOG 2 #define ME_PHODS 3 +#define ME_EPZS 4 +#define ME_X1 5 /* encoding support */ +/* note not everything is supported yet */ #define CODEC_FLAG_HQ 0x0001 /* high quality (non real time) encoding */ #define CODEC_FLAG_QSCALE 0x0002 /* use fixed qscale */ +#define CODEC_FLAG_4MV 0x0004 /* 4 MV per MB allowed */ +#define CODEC_FLAG_B 0x0008 /* use B frames */ +#define CODEC_FLAG_QPEL 0x0010 /* use qpel MC */ +#define CODEC_FLAG_GMC 0x0020 /* use GMC */ +#define CODEC_FLAG_TYPE 0x0040 /* fixed I/P frame type, from avctx->key_frame */ /* codec capabilities */ @@ -63,12 +74,19 @@ extern int motion_estimation_method; typedef struct AVCodecContext { int bit_rate; + int bit_rate_tolerance; /* amount of +- bits (>0)*/ int flags; int sub_id; /* some codecs needs additionnal format info. It is stored there */ /* video only */ int frame_rate; /* frames per sec multiplied by FRAME_RATE_BASE */ int width, height; + int aspect_ratio_info; +#define FF_ASPECT_SQUARE 1 +#define FF_ASPECT_4_3_625 2 +#define FF_ASPECT_4_3_525 3 +#define FF_ASPECT_16_9_625 4 +#define FF_ASPECT_16_9_525 5 int gop_size; /* 0 = intra only */ int pix_fmt; /* pixel format, see PIX_FMT_xxx */ @@ -92,6 +110,12 @@ typedef struct AVCodecContext { a key frame (intra, or seekable) */ int quality; /* quality of the previous encoded frame (between 1 (good) and 31 (bad)) */ + float qcompress; /* amount of qscale change between easy & hard scenes (0.0-1.0)*/ + float qblur; /* amount of qscale smoothing over time (0.0-1.0) */ + int qmin; /* min qscale */ + int qmax; /* max qscale */ + int max_qdiff; /* max qscale difference between frames */ + struct AVCodec *codec; void *priv_data; @@ -122,6 +146,17 @@ typedef struct AVCodecContext { float psnr_cb; float psnr_cr; + /* statistics, used for 2-pass encoding */ + int mv_bits; + int header_bits; + int i_tex_bits; + int p_tex_bits; + int i_count; + int p_count; + int skip_count; + int misc_bits; // cbp, mb_type + int frame_bits; + /* the following fields are ignored */ void *opaque; /* can be used to carry app specific stuff */ char codec_name[32]; @@ -152,7 +187,9 @@ typedef struct AVPicture { extern AVCodec h263_decoder; extern AVCodec mpeg4_decoder; -extern AVCodec msmpeg4_decoder; +extern AVCodec msmpeg4v1_decoder; +extern AVCodec msmpeg4v2_decoder; +extern AVCodec msmpeg4v3_decoder; extern AVCodec mpeg_decoder; extern AVCodec h263i_decoder; extern AVCodec rv10_decoder; diff --git a/src/libffmpeg/libavcodec/common.c b/src/libffmpeg/libavcodec/common.c index 77f94689e..96d8a303a 100644 --- a/src/libffmpeg/libavcodec/common.c +++ b/src/libffmpeg/libavcodec/common.c @@ -128,6 +128,7 @@ void init_get_bits(GetBitContext *s, s->bit_cnt += 8; } #endif + s->size= buffer_size; } #ifndef ALT_BITSTREAM_READER @@ -201,6 +202,14 @@ void align_get_bits(GetBitContext *s) #endif } +int check_marker(GetBitContext *s, char *msg) +{ + int bit= get_bits1(s); + if(!bit) printf("Marker bit missing %s\n", msg); + + return bit; +} + #ifndef ALT_BITSTREAM_READER /* This function is identical to get_bits_long(), the */ /* only diference is that it doesn't touch the buffer */ diff --git a/src/libffmpeg/libavcodec/common.h b/src/libffmpeg/libavcodec/common.h index fd4bba129..707dbbc8f 100644 --- a/src/libffmpeg/libavcodec/common.h +++ b/src/libffmpeg/libavcodec/common.h @@ -13,6 +13,7 @@ //#define ALT_BITSTREAM_READER //#define ALIGNED_BITSTREAM #define FAST_GET_FIRST_VLC +//#define DUMP_STREAM // only works with the ALT_BITSTREAM_READER #ifdef HAVE_AV_CONFIG_H /* only include the following when compiling package */ @@ -197,8 +198,11 @@ typedef struct GetBitContext { int bit_cnt; UINT8 *buf, *buf_ptr, *buf_end; #endif + int size; } GetBitContext; +static inline int get_bits_count(GetBitContext *s); + typedef struct VLC { int bits; INT16 *table_codes; @@ -466,6 +470,13 @@ static inline unsigned int get_bits(GetBitContext *s, int n){ result>>= 32 - n; index+= n; s->index= index; +#ifdef DUMP_STREAM + while(n){ + printf("%d", (result>>(n-1))&1); + n--; + } + printf(" "); +#endif return result; #endif //!ALIGNED_BITSTREAM @@ -492,6 +503,9 @@ static inline unsigned int get_bits1(GetBitContext *s){ result>>= 8 - 1; index++; s->index= index; +#ifdef DUMP_STREAM + printf("%d ", result); +#endif return result; #else @@ -550,9 +564,54 @@ static inline unsigned int show_bits(GetBitContext *s, int n) #endif //!ALT_BITSTREAM_READER } +static inline int show_aligned_bits(GetBitContext *s, int offset, int n) +{ +#ifdef ALT_BITSTREAM_READER +#ifdef ALIGNED_BITSTREAM + int index= (s->index + offset + 7)&(~7); + uint32_t result1= be2me_32( ((uint32_t *)s->buffer)[index>>5] ); + uint32_t result2= be2me_32( ((uint32_t *)s->buffer)[(index>>5) + 1] ); +#ifdef ARCH_X86 + asm ("shldl %%cl, %2, %0\n\t" + : "=r" (result1) + : "0" (result1), "r" (result2), "c" (index)); +#else + result1<<= (index&0x1F); + result2= (result2>>1) >> (31-(index&0x1F)); + result1|= result2; +#endif + result1>>= 32 - n; + + return result1; +#else //ALIGNED_BITSTREAM + int index= (s->index + offset + 7)>>3; + uint32_t result= be2me_32( unaligned32( ((uint8_t *)s->buffer)+index ) ); + + result>>= 32 - n; + + return result; +#endif //!ALIGNED_BITSTREAM +#else //ALT_BITSTREAM_READER + int index= (get_bits_count(s) + offset + 7)>>3; + uint32_t result= be2me_32( unaligned32( ((uint8_t *)s->buf)+index ) ); + + result>>= 32 - n; +//printf(" %X %X %d \n", (int)(((uint8_t *)s->buf)+index ), (int)s->buf_ptr, s->bit_cnt); + return result; +#endif //!ALT_BITSTREAM_READER +} + static inline void skip_bits(GetBitContext *s, int n){ #ifdef ALT_BITSTREAM_READER s->index+= n; +#ifdef DUMP_STREAM + { + int result; + s->index-= n; + result= get_bits(s, n); + } +#endif + #else if(s->bit_cnt>=n){ /* most common case here */ @@ -570,6 +629,10 @@ static inline void skip_bits(GetBitContext *s, int n){ static inline void skip_bits1(GetBitContext *s){ #ifdef ALT_BITSTREAM_READER s->index++; +#ifdef DUMP_STREAM + s->index--; + printf("%d ", get_bits1(s)); +#endif #else if(s->bit_cnt>0){ /* most common case here */ @@ -593,6 +656,7 @@ static inline int get_bits_count(GetBitContext *s) #endif } +int check_marker(GetBitContext *s, char *msg); void align_get_bits(GetBitContext *s); int init_vlc(VLC *vlc, int nb_bits, int nb_codes, const void *bits, int bits_wrap, int bits_size, @@ -694,6 +758,13 @@ static inline int get_vlc(GetBitContext *s, VLC *vlc) if (n > 0) { /* most common case (90%)*/ FLUSH_BITS(n); +#ifdef DUMP_STREAM + { + int n= bit_cnt - s->index; + skip_bits(s, n); + RESTORE_BITS(s); + } +#endif RESTORE_BITS(s); return code; } else if (n == 0) { @@ -728,6 +799,13 @@ static inline int get_vlc(GetBitContext *s, VLC *vlc) table_bits = vlc->table_bits + code; } } +#ifdef DUMP_STREAM + { + int n= bit_cnt - s->index; + skip_bits(s, n); + RESTORE_BITS(s); + } +#endif RESTORE_BITS(s); return code; } @@ -786,6 +864,24 @@ static inline int av_log2(unsigned int v) return n; } +/* median of 3 */ +static inline int mid_pred(int a, int b, int c) +{ + int vmin, vmax; + vmax = vmin = a; + if (b < vmin) + vmin = b; + else + vmax = b; + + if (c < vmin) + vmin = c; + else if (c > vmax) + vmax = c; + + return a + b + c - vmin - vmax; +} + /* memory */ void *av_mallocz(int size); diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c index 0e698f35c..dcfad05a5 100644 --- a/src/libffmpeg/libavcodec/dsputil.c +++ b/src/libffmpeg/libavcodec/dsputil.c @@ -30,12 +30,18 @@ void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); +void (*clear_blocks)(DCTELEM *blocks); op_pixels_abs_func pix_abs16x16; op_pixels_abs_func pix_abs16x16_x2; op_pixels_abs_func pix_abs16x16_y2; op_pixels_abs_func pix_abs16x16_xy2; +op_pixels_abs_func pix_abs8x8; +op_pixels_abs_func pix_abs8x8_x2; +op_pixels_abs_func pix_abs8x8_y2; +op_pixels_abs_func pix_abs8x8_xy2; + UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; UINT32 squareTbl[512]; @@ -377,14 +383,14 @@ static void qpel_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int i; for(i=0; i<h; i++) { - dst[0]= cm[(((src[0]+src[1])*160 - (src[0]+src[2])*48 + (src[1]+src[3])*24 - (src[2]+src[4])*8 + r)>>8)]; - dst[1]= cm[(((src[1]+src[2])*160 - (src[0]+src[3])*48 + (src[0]+src[4])*24 - (src[1]+src[5])*8 + r)>>8)]; - dst[2]= cm[(((src[2]+src[3])*160 - (src[1]+src[4])*48 + (src[0]+src[5])*24 - (src[0]+src[6])*8 + r)>>8)]; - dst[3]= cm[(((src[3]+src[4])*160 - (src[2]+src[5])*48 + (src[1]+src[6])*24 - (src[0]+src[7])*8 + r)>>8)]; - dst[4]= cm[(((src[4]+src[5])*160 - (src[3]+src[6])*48 + (src[2]+src[7])*24 - (src[1]+src[8])*8 + r)>>8)]; - dst[5]= cm[(((src[5]+src[6])*160 - (src[4]+src[7])*48 + (src[3]+src[8])*24 - (src[2]+src[8])*8 + r)>>8)]; - dst[6]= cm[(((src[6]+src[7])*160 - (src[5]+src[8])*48 + (src[4]+src[8])*24 - (src[3]+src[7])*8 + r)>>8)]; - dst[7]= cm[(((src[7]+src[8])*160 - (src[6]+src[8])*48 + (src[5]+src[7])*24 - (src[4]+src[6])*8 + r)>>8)]; + dst[0]= cm[(((src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]) + r)>>5)]; + dst[1]= cm[(((src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]) + r)>>5)]; + dst[2]= cm[(((src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]) + r)>>5)]; + dst[3]= cm[(((src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]) + r)>>5)]; + dst[4]= cm[(((src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]) + r)>>5)]; + dst[5]= cm[(((src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]) + r)>>5)]; + dst[6]= cm[(((src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]) + r)>>5)]; + dst[7]= cm[(((src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]) + r)>>5)]; dst+=dstStride; src+=srcStride; } @@ -405,14 +411,14 @@ static void qpel_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, const int src6= src[6*srcStride]; const int src7= src[7*srcStride]; const int src8= src[8*srcStride]; - dst[0*dstStride]= cm[(((src0+src1)*160 - (src0+src2)*48 + (src1+src3)*24 - (src2+src4)*8 + r)>>8)]; - dst[1*dstStride]= cm[(((src1+src2)*160 - (src0+src3)*48 + (src0+src4)*24 - (src1+src5)*8 + r)>>8)]; - dst[2*dstStride]= cm[(((src2+src3)*160 - (src1+src4)*48 + (src0+src5)*24 - (src0+src6)*8 + r)>>8)]; - dst[3*dstStride]= cm[(((src3+src4)*160 - (src2+src5)*48 + (src1+src6)*24 - (src0+src7)*8 + r)>>8)]; - dst[4*dstStride]= cm[(((src4+src5)*160 - (src3+src6)*48 + (src2+src7)*24 - (src1+src8)*8 + r)>>8)]; - dst[5*dstStride]= cm[(((src5+src6)*160 - (src4+src7)*48 + (src3+src8)*24 - (src2+src8)*8 + r)>>8)]; - dst[6*dstStride]= cm[(((src6+src7)*160 - (src5+src8)*48 + (src4+src8)*24 - (src3+src7)*8 + r)>>8)]; - dst[7*dstStride]= cm[(((src7+src8)*160 - (src6+src8)*48 + (src5+src7)*24 - (src4+src6)*8 + r)>>8)]; + dst[0*dstStride]= cm[(((src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4) + r)>>5)]; + dst[1*dstStride]= cm[(((src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5) + r)>>5)]; + dst[2*dstStride]= cm[(((src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6) + r)>>5)]; + dst[3*dstStride]= cm[(((src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7) + r)>>5)]; + dst[4*dstStride]= cm[(((src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8) + r)>>5)]; + dst[5*dstStride]= cm[(((src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8) + r)>>5)]; + dst[6*dstStride]= cm[(((src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7) + r)>>5)]; + dst[7*dstStride]= cm[(((src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6) + r)>>5)]; dst++; src++; } @@ -485,38 +491,38 @@ static void qpel_mc00_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ UINT8 half[64];\ - qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\ + qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\ avg2_block(dst, src, half, dstStride, srcStride, 1-r);\ }\ \ static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ - qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\ + qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\ }\ \ static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ UINT8 half[64];\ - qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\ + qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\ avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\ }\ \ static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ UINT8 half[64];\ - qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\ + qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\ avg2_block(dst, src, half, dstStride, srcStride, 1-r);\ }\ \ static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ - qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\ + qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\ }\ \ static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ UINT8 half[64];\ - qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\ + qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\ avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\ }\ static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ @@ -524,9 +530,9 @@ static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ }\ static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ @@ -534,9 +540,9 @@ static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ }\ static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ @@ -544,9 +550,9 @@ static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\ }\ static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ @@ -554,25 +560,25 @@ static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\ }\ static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ UINT8 halfH[72];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\ }\ static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ UINT8 halfH[72];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\ }\ static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ @@ -580,9 +586,9 @@ static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\ }\ static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ @@ -590,16 +596,16 @@ static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\ }\ static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ UINT8 halfH[72];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 16-r);\ }\ qpel_mc_func qpel_mc ## name ## _tab[16]={ \ qpel_mc00_c ## name, \ @@ -623,12 +629,12 @@ qpel_mc_func qpel_mc ## name ## _tab[16]={ \ QPEL_MC(0, _rnd) QPEL_MC(1, _no_rnd) -int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) +int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) { int s, i; s = 0; - for(i=0;i<h;i++) { + for(i=0;i<16;i++) { s += abs(pix1[0] - pix2[0]); s += abs(pix1[1] - pix2[1]); s += abs(pix1[2] - pix2[2]); @@ -651,12 +657,12 @@ int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) return s; } -int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) +int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) { int s, i; s = 0; - for(i=0;i<h;i++) { + for(i=0;i<16;i++) { s += abs(pix1[0] - avg2(pix2[0], pix2[1])); s += abs(pix1[1] - avg2(pix2[1], pix2[2])); s += abs(pix1[2] - avg2(pix2[2], pix2[3])); @@ -679,13 +685,13 @@ int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) return s; } -int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) +int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) { int s, i; UINT8 *pix3 = pix2 + line_size; s = 0; - for(i=0;i<h;i++) { + for(i=0;i<16;i++) { s += abs(pix1[0] - avg2(pix2[0], pix3[0])); s += abs(pix1[1] - avg2(pix2[1], pix3[1])); s += abs(pix1[2] - avg2(pix2[2], pix3[2])); @@ -709,13 +715,13 @@ int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) return s; } -int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) +int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) { int s, i; UINT8 *pix3 = pix2 + line_size; s = 0; - for(i=0;i<h;i++) { + for(i=0;i<16;i++) { s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); @@ -739,6 +745,90 @@ int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) return s; } +int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) +{ + int s, i; + + s = 0; + for(i=0;i<8;i++) { + s += abs(pix1[0] - pix2[0]); + s += abs(pix1[1] - pix2[1]); + s += abs(pix1[2] - pix2[2]); + s += abs(pix1[3] - pix2[3]); + s += abs(pix1[4] - pix2[4]); + s += abs(pix1[5] - pix2[5]); + s += abs(pix1[6] - pix2[6]); + s += abs(pix1[7] - pix2[7]); + pix1 += line_size; + pix2 += line_size; + } + return s; +} + +int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) +{ + int s, i; + + s = 0; + for(i=0;i<8;i++) { + s += abs(pix1[0] - avg2(pix2[0], pix2[1])); + s += abs(pix1[1] - avg2(pix2[1], pix2[2])); + s += abs(pix1[2] - avg2(pix2[2], pix2[3])); + s += abs(pix1[3] - avg2(pix2[3], pix2[4])); + s += abs(pix1[4] - avg2(pix2[4], pix2[5])); + s += abs(pix1[5] - avg2(pix2[5], pix2[6])); + s += abs(pix1[6] - avg2(pix2[6], pix2[7])); + s += abs(pix1[7] - avg2(pix2[7], pix2[8])); + pix1 += line_size; + pix2 += line_size; + } + return s; +} + +int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) +{ + int s, i; + UINT8 *pix3 = pix2 + line_size; + + s = 0; + for(i=0;i<8;i++) { + s += abs(pix1[0] - avg2(pix2[0], pix3[0])); + s += abs(pix1[1] - avg2(pix2[1], pix3[1])); + s += abs(pix1[2] - avg2(pix2[2], pix3[2])); + s += abs(pix1[3] - avg2(pix2[3], pix3[3])); + s += abs(pix1[4] - avg2(pix2[4], pix3[4])); + s += abs(pix1[5] - avg2(pix2[5], pix3[5])); + s += abs(pix1[6] - avg2(pix2[6], pix3[6])); + s += abs(pix1[7] - avg2(pix2[7], pix3[7])); + pix1 += line_size; + pix2 += line_size; + pix3 += line_size; + } + return s; +} + +int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) +{ + int s, i; + UINT8 *pix3 = pix2 + line_size; + + s = 0; + for(i=0;i<8;i++) { + s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); + s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); + s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); + s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); + s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); + s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); + s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); + s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); + pix1 += line_size; + pix2 += line_size; + pix3 += line_size; + } + return s; +} + /* permute block according so that it corresponds to the MMX idct order */ #ifdef SIMPLE_IDCT @@ -777,6 +867,11 @@ void block_permute(INT16 *block) } #endif +void clear_blocks_c(DCTELEM *blocks) +{ + memset(blocks, 0, sizeof(DCTELEM)*6*64); +} + void dsputil_init(void) { int i, j; @@ -801,11 +896,16 @@ void dsputil_init(void) put_pixels_clamped = put_pixels_clamped_c; add_pixels_clamped = add_pixels_clamped_c; gmc1= gmc1_c; + clear_blocks= clear_blocks_c; - pix_abs16x16 = pix_abs16x16_c; - pix_abs16x16_x2 = pix_abs16x16_x2_c; - pix_abs16x16_y2 = pix_abs16x16_y2_c; + pix_abs16x16 = pix_abs16x16_c; + pix_abs16x16_x2 = pix_abs16x16_x2_c; + pix_abs16x16_y2 = pix_abs16x16_y2_c; pix_abs16x16_xy2 = pix_abs16x16_xy2_c; + pix_abs8x8 = pix_abs8x8_c; + pix_abs8x8_x2 = pix_abs8x8_x2_c; + pix_abs8x8_y2 = pix_abs8x8_y2_c; + pix_abs8x8_xy2 = pix_abs8x8_xy2_c; av_fdct = jpeg_fdct_ifast; use_permuted_idct = 1; diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h index d0a6e68ba..dc63f06f1 100644 --- a/src/libffmpeg/libavcodec/dsputil.h +++ b/src/libffmpeg/libavcodec/dsputil.h @@ -41,11 +41,13 @@ extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); +extern void (*clear_blocks)(DCTELEM *blocks); void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size); void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); +void clear_blocks_c(DCTELEM *blocks); /* add and put pixel (decoding) */ typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h); @@ -67,17 +69,21 @@ extern void (*sub_pixels_tab[4])(DCTELEM *block, const UINT8 *pixels, int line_s /* motion estimation */ -typedef int (*op_pixels_abs_func)(UINT8 *blk1, UINT8 *blk2, int line_size, int h); +typedef int (*op_pixels_abs_func)(UINT8 *blk1, UINT8 *blk2, int line_size); extern op_pixels_abs_func pix_abs16x16; extern op_pixels_abs_func pix_abs16x16_x2; extern op_pixels_abs_func pix_abs16x16_y2; extern op_pixels_abs_func pix_abs16x16_xy2; +extern op_pixels_abs_func pix_abs8x8; +extern op_pixels_abs_func pix_abs8x8_x2; +extern op_pixels_abs_func pix_abs8x8_y2; +extern op_pixels_abs_func pix_abs8x8_xy2; -int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx, int h); -int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h); -int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h); -int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h); +int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); static inline int block_permute_op(int j) { @@ -102,7 +108,8 @@ void block_permute(INT16 *block); extern int mm_flags; -int mm_support(void); +/* int mm_support(void); */ +#define mm_support() xine_mm_accel() #if 0 static inline void emms(void) diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c index 79b74631d..52127aaad 100644 --- a/src/libffmpeg/libavcodec/h263.c +++ b/src/libffmpeg/libavcodec/h263.c @@ -17,6 +17,8 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * ac prediction encoding by Michael Niedermayer <michaelni@gmx.at> */ #include "common.h" #include "dsputil.h" @@ -28,19 +30,35 @@ //rounded divison & shift #define RDIV(a,b) ((a) > 0 ? ((a)+((b)>>1))/(b) : ((a)-((b)>>1))/(b)) #define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b)) +#define ABS(a) (((a)>=0)?(a):(-(a))) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#define MIN(a,b) ((a) < (b) ? (a) : (b)) static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n); static void h263_encode_motion(MpegEncContext * s, int val); static void h263p_encode_umotion(MpegEncContext * s, int val); static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, - int n); -static int h263_decode_motion(MpegEncContext * s, int pred); + int n, int dc, UINT8 *scan_table); +static int h263_decode_motion(MpegEncContext * s, int pred, int fcode); static int h263p_decode_umotion(MpegEncContext * s, int pred); static int h263_decode_block(MpegEncContext * s, DCTELEM * block, int n, int coded); static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, int n, int coded); +static inline int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr); +static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n, + int dir); +static void mpeg4_decode_sprite_trajectory(MpegEncContext * s); + +extern UINT32 inverse[256]; + +static UINT16 mv_penalty[MAX_FCODE+1][MAX_MV*2+1]; +static UINT8 fcode_tab[MAX_MV*2+1]; +static UINT8 umv_fcode_tab[MAX_MV*2+1]; + +static UINT16 uni_DCtab_lum [512][2]; +static UINT16 uni_DCtab_chrom[512][2]; int h263_get_picture_format(int width, int height) { @@ -195,7 +213,213 @@ int h263_encode_gob_header(MpegEncContext * s, int mb_line) } return 0; } + +static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int dir[6]) +{ + int score0=0, score1=0; + int i, n; + + for(n=0; n<6; n++){ + INT16 *ac_val, *ac_val1; + + ac_val = s->ac_val[0][0] + s->block_index[n] * 16; + ac_val1= ac_val; + if(dir[n]){ + ac_val-= s->block_wrap[n]*16; + for(i=1; i<8; i++){ + const int level= block[n][block_permute_op(i )]; + score0+= ABS(level); + score1+= ABS(level - ac_val[i+8]); + ac_val1[i ]= block[n][block_permute_op(i<<3)]; + ac_val1[i+8]= level; + } + }else{ + ac_val-= 16; + for(i=1; i<8; i++){ + const int level= block[n][block_permute_op(i<<3)]; + score0+= ABS(level); + score1+= ABS(level - ac_val[i]); + ac_val1[i ]= level; + ac_val1[i+8]= block[n][block_permute_op(i )]; + } + } + } + + return score0 > score1 ? 1 : 0; +} + +void mpeg4_encode_mb(MpegEncContext * s, + DCTELEM block[6][64], + int motion_x, int motion_y) +{ + int cbpc, cbpy, i, cbp, pred_x, pred_y; + int bits; + // printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); + if (!s->mb_intra) { + /* compute cbp */ + cbp = 0; + for (i = 0; i < 6; i++) { + if (s->block_last_index[i] >= 0) + cbp |= 1 << (5 - i); + } + if ((cbp | motion_x | motion_y) == 0 && s->mv_type==MV_TYPE_16X16) { + /* skip macroblock */ + put_bits(&s->pb, 1, 1); + s->misc_bits++; + s->last_bits++; + s->skip_count++; + return; + } + put_bits(&s->pb, 1, 0); /* mb coded */ + if(s->mv_type==MV_TYPE_16X16){ + cbpc = cbp & 3; + put_bits(&s->pb, + inter_MCBPC_bits[cbpc], + inter_MCBPC_code[cbpc]); + cbpy = cbp >> 2; + cbpy ^= 0xf; + put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + + bits= get_bit_count(&s->pb); + s->misc_bits+= bits - s->last_bits; + s->last_bits=bits; + + /* motion vectors: 16x16 mode */ + h263_pred_motion(s, 0, &pred_x, &pred_y); + + h263_encode_motion(s, motion_x - pred_x); + h263_encode_motion(s, motion_y - pred_y); + }else{ + cbpc = (cbp & 3)+16; + put_bits(&s->pb, + inter_MCBPC_bits[cbpc], + inter_MCBPC_code[cbpc]); + cbpy = cbp >> 2; + cbpy ^= 0xf; + put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + + bits= get_bit_count(&s->pb); + s->misc_bits+= bits - s->last_bits; + s->last_bits=bits; + + for(i=0; i<4; i++){ + /* motion vectors: 8x8 mode*/ + h263_pred_motion(s, i, &pred_x, &pred_y); + + h263_encode_motion(s, s->motion_val[ s->block_index[i] ][0] - pred_x); + h263_encode_motion(s, s->motion_val[ s->block_index[i] ][1] - pred_y); + } + } + bits= get_bit_count(&s->pb); + s->mv_bits+= bits - s->last_bits; + s->last_bits=bits; + + /* encode each block */ + for (i = 0; i < 6; i++) { + mpeg4_encode_block(s, block[i], i, 0, zigzag_direct); + } + bits= get_bit_count(&s->pb); + s->p_tex_bits+= bits - s->last_bits; + s->last_bits=bits; + s->p_count++; + } else { + int dc_diff[6]; //dc values with the dc prediction subtracted + int dir[6]; //prediction direction + int zigzag_last_index[6]; + UINT8 *scan_table[6]; + + for(i=0; i<6; i++){ + const int level= block[i][0]; + UINT16 *dc_ptr; + + dc_diff[i]= level - mpeg4_pred_dc(s, i, &dc_ptr, &dir[i]); + if (i < 4) { + *dc_ptr = level * s->y_dc_scale; + } else { + *dc_ptr = level * s->c_dc_scale; + } + } + + s->ac_pred= decide_ac_pred(s, block, dir); + + if(s->ac_pred){ + for(i=0; i<6; i++){ + UINT8 *st; + int last_index; + + mpeg4_inv_pred_ac(s, block[i], i, dir[i]); + if (dir[i]==0) st = ff_alternate_vertical_scan; /* left */ + else st = ff_alternate_horizontal_scan; /* top */ + + for(last_index=63; last_index>=0; last_index--) //FIXME optimize + if(block[i][st[last_index]]) break; + zigzag_last_index[i]= s->block_last_index[i]; + s->block_last_index[i]= last_index; + scan_table[i]= st; + } + }else{ + for(i=0; i<6; i++) + scan_table[i]= zigzag_direct; + } + + /* compute cbp */ + cbp = 0; + for (i = 0; i < 6; i++) { + if (s->block_last_index[i] >= 1) + cbp |= 1 << (5 - i); + } + + cbpc = cbp & 3; + if (s->pict_type == I_TYPE) { + put_bits(&s->pb, + intra_MCBPC_bits[cbpc], + intra_MCBPC_code[cbpc]); + } else { + put_bits(&s->pb, 1, 0); /* mb coded */ + put_bits(&s->pb, + inter_MCBPC_bits[cbpc + 4], + inter_MCBPC_code[cbpc + 4]); + } + put_bits(&s->pb, 1, s->ac_pred); + cbpy = cbp >> 2; + put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + + bits= get_bit_count(&s->pb); + s->misc_bits+= bits - s->last_bits; + s->last_bits=bits; + + /* encode each block */ + for (i = 0; i < 6; i++) { + mpeg4_encode_block(s, block[i], i, dc_diff[i], scan_table[i]); + } + + bits= get_bit_count(&s->pb); + s->i_tex_bits+= bits - s->last_bits; + s->last_bits=bits; + s->i_count++; + + /* restore ac coeffs & last_index stuff if we messed them up with the prediction */ + if(s->ac_pred){ + for(i=0; i<6; i++){ + int j; + INT16 *ac_val; + + ac_val = s->ac_val[0][0] + s->block_index[i] * 16; + + if(dir[i]){ + for(j=1; j<8; j++) + block[i][block_permute_op(j )]= ac_val[j+8]; + }else{ + for(j=1; j<8; j++) + block[i][block_permute_op(j<<3)]= ac_val[j ]; + } + s->block_last_index[i]= zigzag_last_index[i]; + } + } + } +} + void h263_encode_mb(MpegEncContext * s, DCTELEM block[6][64], int motion_x, int motion_y) @@ -266,18 +490,11 @@ void h263_encode_mb(MpegEncContext * s, } /* encode each block */ - if (s->h263_pred) { - for (i = 0; i < 6; i++) { - mpeg4_encode_block(s, block[i], i); - } - } else { - for (i = 0; i < 6; i++) { - h263_encode_block(s, block[i], i); - } + for (i = 0; i < 6; i++) { + h263_encode_block(s, block[i], i); } } - void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n) { int x, y, wrap, a, c, pred_dc, scale, i; @@ -359,69 +576,33 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n) ac_val1[8 + i] = block[block_permute_op(i)]; } - -static inline int mid_pred(int a, int b, int c) -{ - int vmin, vmax; - vmax = vmin = a; - if (b < vmin) - vmin = b; - else - vmax = b; - - if (c < vmin) - vmin = c; - else if (c > vmax) - vmax = c; - - return a + b + c - vmin - vmax; -} - INT16 *h263_pred_motion(MpegEncContext * s, int block, int *px, int *py) { - int xy, y, wrap; + int xy, wrap; INT16 *A, *B, *C, *mot_val; + static const int off[4]= {2, 1, 1, -1}; - wrap = 2 * s->mb_width + 2; - y = xy = 2 * s->mb_y + 1 + ((block >> 1) & 1); // y - xy *= wrap; // y * wrap - xy += 2 * s->mb_x + 1 + (block & 1); // x + y * wrap + wrap = s->block_wrap[0]; + xy = s->block_index[block]; mot_val = s->motion_val[xy]; /* special case for first line */ - if (y == 1 || s->first_slice_line || s->first_gob_line) { + if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) { A = s->motion_val[xy - 1]; *px = A[0]; *py = A[1]; } else { - switch(block) { - default: - case 0: - A = s->motion_val[xy - 1]; - B = s->motion_val[xy - wrap]; - C = s->motion_val[xy + 2 - wrap]; - break; - case 1: - case 2: - A = s->motion_val[xy - 1]; - B = s->motion_val[xy - wrap]; - C = s->motion_val[xy + 1 - wrap]; - break; - case 3: - A = s->motion_val[xy - 1]; - B = s->motion_val[xy - 1 - wrap]; - C = s->motion_val[xy - wrap]; - break; - } + A = s->motion_val[xy - 1]; + B = s->motion_val[xy - wrap]; + C = s->motion_val[xy + off[block] - wrap]; *px = mid_pred(A[0], B[0], C[0]); *py = mid_pred(A[1], B[1], C[1]); } return mot_val; } - static void h263_encode_motion(MpegEncContext * s, int val) { int range, l, m, bit_size, sign, code, bits; @@ -501,15 +682,119 @@ static void h263p_encode_umotion(MpegEncContext * s, int val) } } -void h263_encode_init_vlc(MpegEncContext *s) +static void init_mv_penalty_and_fcode(MpegEncContext *s) +{ + int f_code; + int mv; + for(f_code=1; f_code<=MAX_FCODE; f_code++){ + for(mv=-MAX_MV; mv<=MAX_MV; mv++){ + int len; + + if(mv==0) len= mvtab[0][1]; + else{ + int val, bit_size, range, code; + + bit_size = s->f_code - 1; + range = 1 << bit_size; + + val=mv; + if (val < 0) + val = -val; + val--; + code = (val >> bit_size) + 1; + if(code<33){ + len= mvtab[code][1] + 1 + bit_size; + }else{ + len= mvtab[32][1] + 2 + bit_size; + } + } + + mv_penalty[f_code][mv+MAX_MV]= len; + } + } + + for(f_code=MAX_FCODE; f_code>0; f_code--){ + for(mv=-(16<<f_code); mv<(16<<f_code); mv++){ + fcode_tab[mv+MAX_MV]= f_code; + } + } + + for(mv=0; mv<MAX_MV*2+1; mv++){ + umv_fcode_tab[mv]= 1; + } +} + +static void init_uni_dc_tab() +{ + int level, uni_code, uni_len; + + for(level=-255; level<256; level++){ + int size, v, l; + /* find number of bits */ + size = 0; + v = abs(level); + while (v) { + v >>= 1; + size++; + } + + if (level < 0) + l= (-level) ^ ((1 << size) - 1); + else + l= level; + + /* luminance */ + uni_code= DCtab_lum[size][0]; + uni_len = DCtab_lum[size][1]; + + if (size > 0) { + uni_code<<=size; uni_code|=l; + uni_len+=size; + if (size > 8){ + uni_code<<=1; uni_code|=1; + uni_len++; + } + } + uni_DCtab_lum[level+256][0]= uni_code; + uni_DCtab_lum[level+256][1]= uni_len; + + /* chrominance */ + uni_code= DCtab_chrom[size][0]; + uni_len = DCtab_chrom[size][1]; + + if (size > 0) { + uni_code<<=size; uni_code|=l; + uni_len+=size; + if (size > 8){ + uni_code<<=1; uni_code|=1; + uni_len++; + } + } + uni_DCtab_chrom[level+256][0]= uni_code; + uni_DCtab_chrom[level+256][1]= uni_len; + + } +} + +void h263_encode_init(MpegEncContext *s) { static int done = 0; if (!done) { done = 1; + + init_uni_dc_tab(); + init_rl(&rl_inter); init_rl(&rl_intra); + + init_mv_penalty_and_fcode(s); } + s->mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p + + // use fcodes >1 only for mpeg4 & h263 & h263p FIXME + if(s->h263_plus) s->fcode_tab= umv_fcode_tab; + else if(s->h263_pred && !s->h263_msmpeg4) s->fcode_tab= fcode_tab; } static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n) @@ -571,11 +856,90 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n) /***************************************************/ +static void mpeg4_stuffing(PutBitContext * pbc) +{ + int length; + put_bits(pbc, 1, 0); + length= (-get_bit_count(pbc))&7; + put_bits(pbc, length, (1<<length)-1); +} + +static void put_string(PutBitContext * pbc, char *s) +{ + while(*s){ + put_bits(pbc, 8, *s); + s++; + } + put_bits(pbc, 8, 0); +} + +static void mpeg4_encode_vol_header(MpegEncContext * s) +{ + int vo_ver_id=1; //must be 2 if we want GMC or q-pel + + if(get_bit_count(&s->pb)!=0) mpeg4_stuffing(&s->pb); + put_bits(&s->pb, 16, 0); + put_bits(&s->pb, 16, 0x100); /* video obj */ + put_bits(&s->pb, 16, 0); + put_bits(&s->pb, 16, 0x120); /* video obj layer */ + + put_bits(&s->pb, 1, 0); /* random access vol */ + put_bits(&s->pb, 8, 1); /* video obj type indication= simple obj */ + put_bits(&s->pb, 1, 1); /* is obj layer id= yes */ + put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */ + put_bits(&s->pb, 3, 1); /* is obj layer priority */ + if(s->aspect_ratio_info) + put_bits(&s->pb, 4, s->aspect_ratio_info);/* aspect ratio info */ + else + put_bits(&s->pb, 4, 1); /* aspect ratio info= sqare pixel */ + put_bits(&s->pb, 1, 0); /* vol control parameters= no */ + put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */ + put_bits(&s->pb, 1, 1); /* marker bit */ + put_bits(&s->pb, 16, s->time_increment_resolution=30000); + s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1; + if (s->time_increment_bits < 1) + s->time_increment_bits = 1; + put_bits(&s->pb, 1, 1); /* marker bit */ + put_bits(&s->pb, 1, 0); /* fixed vop rate=no */ + put_bits(&s->pb, 1, 1); /* marker bit */ + put_bits(&s->pb, 13, s->width); /* vol width */ + put_bits(&s->pb, 1, 1); /* marker bit */ + put_bits(&s->pb, 13, s->height); /* vol height */ + put_bits(&s->pb, 1, 1); /* marker bit */ + put_bits(&s->pb, 1, 0); /* interlace */ + put_bits(&s->pb, 1, 1); /* obmc disable */ + if (vo_ver_id == 1) { + put_bits(&s->pb, 1, s->vol_sprite_usage=0); /* sprite enable */ + }else{ /* vo_ver_id == 2 */ + put_bits(&s->pb, 2, s->vol_sprite_usage=0); /* sprite enable */ + } + put_bits(&s->pb, 1, 0); /* not 8 bit */ + put_bits(&s->pb, 1, 0); /* quant type= h263 style*/ + if (vo_ver_id != 1) + put_bits(&s->pb, 1, s->quarter_sample=0); + put_bits(&s->pb, 1, 1); /* complexity estimation disable */ + put_bits(&s->pb, 1, 1); /* resync marker disable */ + put_bits(&s->pb, 1, 0); /* data partitioned */ + if (vo_ver_id != 1){ + put_bits(&s->pb, 1, 0); /* newpred */ + put_bits(&s->pb, 1, 0); /* reduced res vop */ + } + put_bits(&s->pb, 1, 0); /* scalability */ + + mpeg4_stuffing(&s->pb); + put_bits(&s->pb, 16, 0); + put_bits(&s->pb, 16, 0x1B2); /* user_data */ + put_string(&s->pb, "ffmpeg"); //FIXME append some version ... + + s->no_rounding = 0; +} + /* write mpeg4 VOP header */ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) { - align_put_bits(&s->pb); + if(s->pict_type==I_TYPE) mpeg4_encode_vol_header(s); + if(get_bit_count(&s->pb)!=0) mpeg4_stuffing(&s->pb); put_bits(&s->pb, 16, 0); /* vop header */ put_bits(&s->pb, 16, 0x1B6); /* vop header */ put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */ @@ -584,26 +948,41 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) put_bits(&s->pb, 1, 0); put_bits(&s->pb, 1, 1); /* marker */ - put_bits(&s->pb, 4, 1); /* XXX: correct time increment */ + put_bits(&s->pb, s->time_increment_bits, 1); /* XXX: correct time increment */ put_bits(&s->pb, 1, 1); /* marker */ put_bits(&s->pb, 1, 1); /* vop coded */ - if (s->pict_type == P_TYPE) { - s->no_rounding = 0; + if ( s->pict_type == P_TYPE + || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) { + s->no_rounding ^= 1; put_bits(&s->pb, 1, s->no_rounding); /* rounding type */ } put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */ + //FIXME sprite stuff put_bits(&s->pb, 5, s->qscale); if (s->pict_type != I_TYPE) put_bits(&s->pb, 3, s->f_code); /* fcode_for */ + if (s->pict_type == B_TYPE) + put_bits(&s->pb, 3, s->b_code); /* fcode_back */ // printf("****frame %d\n", picture_number); } void h263_dc_scale(MpegEncContext * s) { +#if 1 + const static UINT8 y_tab[32]={ + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 + 0, 8, 8, 8, 8,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,36,38,40,42,44,46 + }; + const static UINT8 c_tab[32]={ + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 + 0, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,20,21,22,23,24,25 + }; + s->y_dc_scale = y_tab[s->qscale]; + s->c_dc_scale = c_tab[s->qscale]; +#else int quant; - quant = s->qscale; /* luminance */ if (quant < 5) @@ -621,36 +1000,30 @@ void h263_dc_scale(MpegEncContext * s) s->c_dc_scale = ((quant + 13) / 2); else s->c_dc_scale = (quant - 6); +#endif } -static int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr) +static inline int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr) { - int a, b, c, xy, wrap, pred, scale; + int a, b, c, wrap, pred, scale; UINT16 *dc_val; + int dummy; /* find prediction */ if (n < 4) { - wrap = s->mb_width * 2 + 2; - xy = 2 * s->mb_y + 1 + ((n & 2) >> 1); - xy *= wrap; - xy += 2 * s->mb_x + 1 + (n & 1); - dc_val = s->dc_val[0]; scale = s->y_dc_scale; } else { - wrap = s->mb_width + 2; - xy = s->mb_y + 1; - xy *= wrap; - xy += s->mb_x + 1; - dc_val = s->dc_val[n - 4 + 1]; scale = s->c_dc_scale; } + wrap= s->block_wrap[n]; + dc_val = s->dc_val[0] + s->block_index[n]; /* B C * A X */ - a = dc_val[xy - 1]; - b = dc_val[xy - 1 - wrap]; - c = dc_val[xy - wrap]; + a = dc_val[ - 1]; + b = dc_val[ - 1 - wrap]; + c = dc_val[ - wrap]; if (abs(a - b) < abs(b - c)) { pred = c; @@ -660,10 +1033,19 @@ static int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *di *dir_ptr = 0; /* left */ } /* we assume pred is positive */ +#ifdef ARCH_X86 + asm volatile ( + "xorl %%edx, %%edx \n\t" + "mul %%ecx \n\t" + : "=d" (pred), "=a"(dummy) + : "a" (pred + (scale >> 1)), "c" (inverse[scale]) + ); +#else pred = (pred + (scale >> 1)) / scale; +#endif /* prepare address for prediction update */ - *dc_val_ptr = &dc_val[xy]; + *dc_val_ptr = &dc_val[0]; return pred; } @@ -671,22 +1053,11 @@ static int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *di void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, int dir) { - int x, y, wrap, i; + int i; INT16 *ac_val, *ac_val1; /* find prediction */ - if (n < 4) { - x = 2 * s->mb_x + 1 + (n & 1); - y = 2 * s->mb_y + 1 + ((n & 2) >> 1); - wrap = s->mb_width * 2 + 2; - ac_val = s->ac_val[0][0]; - } else { - x = s->mb_x + 1; - y = s->mb_y + 1; - wrap = s->mb_width + 2; - ac_val = s->ac_val[n - 4 + 1][0]; - } - ac_val += ((y) * wrap + (x)) * 16; + ac_val = s->ac_val[0][0] + s->block_index[n] * 16; ac_val1 = ac_val; if (s->ac_pred) { if (dir == 0) { @@ -697,7 +1068,7 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, } } else { /* top prediction */ - ac_val -= 16 * wrap; + ac_val -= 16 * s->block_wrap[n]; for(i=1;i<8;i++) { block[block_permute_op(i)] += ac_val[i + 8]; } @@ -711,20 +1082,43 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, ac_val1[8 + i] = block[block_permute_op(i)]; } -static inline void mpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr) +static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n, + int dir) { - int size, v, pred; - UINT16 *dc_val; + int i; + INT16 *ac_val; - pred = mpeg4_pred_dc(s, n, &dc_val, dir_ptr); - if (n < 4) { - *dc_val = level * s->y_dc_scale; + /* find prediction */ + ac_val = s->ac_val[0][0] + s->block_index[n] * 16; + + if (dir == 0) { + /* left prediction */ + ac_val -= 16; + for(i=1;i<8;i++) { + block[block_permute_op(i*8)] -= ac_val[i]; + } } else { - *dc_val = level * s->c_dc_scale; + /* top prediction */ + ac_val -= 16 * s->block_wrap[n]; + for(i=1;i<8;i++) { + block[block_permute_op(i)] -= ac_val[i + 8]; + } } +} - /* do the prediction */ - level -= pred; +static inline void mpeg4_encode_dc(MpegEncContext * s, int level, int n) +{ +#if 1 + level+=256; + if (n < 4) { + /* luminance */ + put_bits(&s->pb, uni_DCtab_lum[level][1], uni_DCtab_lum[level][0]); + } else { + /* chrominance */ + put_bits(&s->pb, uni_DCtab_chrom[level][1], uni_DCtab_chrom[level][0]); + } +#else + int size, v; /* find number of bits */ size = 0; v = abs(level); @@ -749,17 +1143,18 @@ static inline void mpeg4_encode_dc(MpegEncContext * s, int level, int n, int *di if (size > 8) put_bits(&s->pb, 1, 1); } +#endif } -static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n) +static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, UINT8 *scan_table) { int level, run, last, i, j, last_index, last_non_zero, sign, slevel; - int code, dc_pred_dir; + int code; const RLTable *rl; if (s->mb_intra) { /* mpeg4 based DC predictor */ - mpeg4_encode_dc(s, block[0], n, &dc_pred_dir); + mpeg4_encode_dc(s, intra_dc, n); i = 1; rl = &rl_intra; } else { @@ -771,7 +1166,7 @@ static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n) last_index = s->block_last_index[n]; last_non_zero = i - 1; for (; i <= last_index; i++) { - j = zigzag_direct[i]; + j = scan_table[i]; level = block[j]; if (level) { run = i - last_non_zero - 1; @@ -839,6 +1234,7 @@ static VLC cbpy_vlc; static VLC mv_vlc; static VLC dc_lum, dc_chrom; static VLC sprite_trajectory; +static VLC mb_type_b_vlc; void init_rl(RLTable *rl) { @@ -922,6 +1318,9 @@ void h263_decode_init_vlc(MpegEncContext *s) init_vlc(&sprite_trajectory, 9, 15, &sprite_trajectory_tab[0][1], 4, 2, &sprite_trajectory_tab[0][0], 4, 2); + init_vlc(&mb_type_b_vlc, 4, 4, + &mb_type_b_tab[0][1], 2, 1, + &mb_type_b_tab[0][0], 2, 1); } } @@ -950,13 +1349,163 @@ int h263_decode_gob_header(MpegEncContext *s) } +static inline void memsetw(short *tab, int val, int n) +{ + int i; + for(i=0;i<n;i++) + tab[i] = val; +} + +static int mpeg4_resync(MpegEncContext *s) +{ + int state, v, bits; + int mb_num_bits= av_log2(s->mb_num - 1) + 1; + int header_extension=0, mb_num; + int c_wrap, c_xy, l_wrap, l_xy; +//printf("resync at %d %d\n", s->mb_x, s->mb_y); +//printf("%X\n", show_bits(&s->gb, 24)); + + if( get_bits_count(&s->gb) > s->gb.size*8-32) + return 0; + + align_get_bits(&s->gb); + state = 0xff; + for(;;) { + v = get_bits(&s->gb, 8); +//printf("%X ", v); + state = ((state << 8) | v) & 0xffff; + if (state == 0) break; + if( get_bits_count(&s->gb) > s->gb.size*8-32){ + printf("resync failed\n"); + return -1; + } + } +//printf("%X\n", show_bits(&s->gb, 24)); + bits=0; + while(!get_bits1(&s->gb) && bits<30) bits++; + if(s->pict_type == P_TYPE && bits != s->f_code-1) + printf("marker does not match f_code\n"); + //FIXME check bits for B-framess +//printf("%X\n", show_bits(&s->gb, 24)); + + if(s->shape != RECT_SHAPE){ + header_extension= get_bits1(&s->gb); + //FIXME more stuff here + } + + mb_num= get_bits(&s->gb, mb_num_bits); + if(mb_num != s->mb_x + s->mb_y*s->mb_width){ + printf("MB-num change not supported %d %d\n", mb_num, s->mb_x + s->mb_y*s->mb_width); +// s->mb_x= mb_num % s->mb_width; +// s->mb_y= mb_num / s->mb_width; + //FIXME many vars are wrong now + } + + if(s->shape != BIN_ONLY_SHAPE){ + s->qscale= get_bits(&s->gb, 5); + h263_dc_scale(s); + } + + if(s->shape == RECT_SHAPE){ + header_extension= get_bits1(&s->gb); + } + if(header_extension){ + int time_incr=0; + printf("header extension not really supported\n"); + while (get_bits1(&s->gb) != 0) + time_incr++; + + check_marker(&s->gb, "before time_increment in video packed header"); + s->time_increment= get_bits(&s->gb, s->time_increment_bits); + if(s->pict_type!=B_TYPE){ + s->time_base+= time_incr; + s->last_non_b_time[1]= s->last_non_b_time[0]; + s->last_non_b_time[0]= s->time_base*s->time_increment_resolution + s->time_increment; + }else{ + s->time= (s->last_non_b_time[1]/s->time_increment_resolution + time_incr)*s->time_increment_resolution; + s->time+= s->time_increment; + } + check_marker(&s->gb, "before vop_coding_type in video packed header"); + + skip_bits(&s->gb, 2); /* vop coding type */ + //FIXME not rect stuff here + + if(s->shape != BIN_ONLY_SHAPE){ + skip_bits(&s->gb, 3); /* intra dc vlc threshold */ + + if(s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE && s->num_sprite_warping_points){ + mpeg4_decode_sprite_trajectory(s); + } + + //FIXME reduced res stuff here + + if (s->pict_type != I_TYPE) { + s->f_code = get_bits(&s->gb, 3); /* fcode_for */ + if(s->f_code==0){ + printf("Error, video packet header damaged or not MPEG4 header (f_code=0)\n"); + return -1; // makes no sense to continue, as the MV decoding will break very quickly + } + } + if (s->pict_type == B_TYPE) { + s->b_code = get_bits(&s->gb, 3); + } + } + + } + //FIXME new-pred stuff + + l_wrap= s->block_wrap[0]; + l_xy= s->mb_y*l_wrap*2; + c_wrap= s->block_wrap[4]; + c_xy= s->mb_y*c_wrap; + + /* clean DC */ + memsetw(s->dc_val[0] + l_xy, 1024, l_wrap*3); + memsetw(s->dc_val[1] + c_xy, 1024, c_wrap*2); + memsetw(s->dc_val[2] + c_xy, 1024, c_wrap*2); + + /* clean AC */ + memset(s->ac_val[0] + l_xy, 0, l_wrap*3*16*sizeof(INT16)); + memset(s->ac_val[1] + c_xy, 0, c_wrap*2*16*sizeof(INT16)); + memset(s->ac_val[2] + c_xy, 0, c_wrap*2*16*sizeof(INT16)); + + /* clean MV */ + memset(s->motion_val + l_xy, 0, l_wrap*3*2*sizeof(INT16)); +// memset(s->motion_val, 0, 2*sizeof(INT16)*(2 + s->mb_width*2)*(2 + s->mb_height*2)); + s->resync_x_pos= s->mb_x; + s->first_slice_line=1; + + return 0; +} + int h263_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) { int cbpc, cbpy, i, cbp, pred_x, pred_y, mx, my, dquant; INT16 *mot_val; static INT8 quant_tab[4] = { -1, -2, 1, 2 }; - + + if(s->resync_marker){ + if( s->resync_x_pos == s->mb_x+1 + || s->resync_x_pos == s->mb_x){ + /* f*ck mpeg4 + this is here so we dont need to slowdown h263_pred_motion with it */ + if(s->resync_x_pos == s->mb_x+1 && s->mb_x==0){ + int xy= s->block_index[0] - s->block_wrap[0]; + s->motion_val[xy][0]= s->motion_val[xy+2][0]; + s->motion_val[xy][1]= s->motion_val[xy+2][1]; + } + + s->first_slice_line=0; + s->resync_x_pos=0; // isnt needed but for cleanness sake ;) + } + + if(show_aligned_bits(&s->gb, 1, 16) == 0){ + if( mpeg4_resync(s) < 0 ) return -1; + + } + } + if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) { if (get_bits1(&s->gb)) { /* skip mb */ @@ -970,8 +1519,13 @@ int h263_decode_mb(MpegEncContext *s, // int l = (1 << (s->f_code - 1)) * 32; s->mcsel=1; - s->mv[0][0][0] = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample); - s->mv[0][0][1] = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample); + if(s->divx_version==500 && s->divx_build==413){ + s->mv[0][0][0] = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample)); + s->mv[0][0][1] = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample)); + }else{ + s->mv[0][0][0] = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample); + s->mv[0][0][1] = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample); + } /* if (s->mv[0][0][0] < -l) s->mv[0][0][0]= -l; else if (s->mv[0][0][0] >= l) s->mv[0][0][0]= l-1; if (s->mv[0][0][1] < -l) s->mv[0][0][1]= -l; @@ -997,15 +1551,8 @@ int h263_decode_mb(MpegEncContext *s, dquant = cbpc & 8; s->mb_intra = ((cbpc & 4) != 0); - } else { - cbpc = get_vlc(&s->gb, &intra_MCBPC_vlc); - if (cbpc < 0) - return -1; - dquant = cbpc & 4; - s->mb_intra = 1; - } - - if (!s->mb_intra) { + if (s->mb_intra) goto intra; + if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0) s->mcsel= get_bits1(&s->gb); else s->mcsel= 0; @@ -1017,6 +1564,7 @@ int h263_decode_mb(MpegEncContext *s, s->qscale = 1; else if (s->qscale > 31) s->qscale = 31; + h263_dc_scale(s); } s->mv_dir = MV_DIR_FORWARD; if ((cbpc & 16) == 0) { @@ -1026,13 +1574,17 @@ int h263_decode_mb(MpegEncContext *s, if (s->umvplus_dec) mx = h263p_decode_umotion(s, pred_x); else if(!s->mcsel) - mx = h263_decode_motion(s, pred_x); + mx = h263_decode_motion(s, pred_x, s->f_code); else { const int a= s->sprite_warping_accuracy; // int l = (1 << (s->f_code - 1)) * 32; - mx= RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample); -// if (mx < -l) mx= -l; -// else if (mx >= l) mx= l-1; + if(s->divx_version==500 && s->divx_build==413){ + mx = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample)); + }else{ + mx = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample); + } +// if (mx < -l) mx= -l, printf("C"); +// else if (mx >= l) mx= l-1, printf("C"); } if (mx >= 0xffff) return -1; @@ -1040,13 +1592,17 @@ int h263_decode_mb(MpegEncContext *s, if (s->umvplus_dec) my = h263p_decode_umotion(s, pred_y); else if(!s->mcsel) - my = h263_decode_motion(s, pred_y); + my = h263_decode_motion(s, pred_y, s->f_code); else{ const int a= s->sprite_warping_accuracy; // int l = (1 << (s->f_code - 1)) * 32; - my= RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample); -// if (my < -l) my= -l; -// else if (my >= l) my= l-1; + if(s->divx_version==500 && s->divx_build==413){ + my = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample)); + }else{ + my = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample); + } +// if (my < -l) my= -l, printf("C"); +// else if (my >= l) my= l-1, printf("C"); } if (my >= 0xffff) return -1; @@ -1065,14 +1621,14 @@ int h263_decode_mb(MpegEncContext *s, if (s->umvplus_dec) mx = h263p_decode_umotion(s, pred_x); else - mx = h263_decode_motion(s, pred_x); + mx = h263_decode_motion(s, pred_x, s->f_code); if (mx >= 0xffff) return -1; if (s->umvplus_dec) my = h263p_decode_umotion(s, pred_y); else - my = h263_decode_motion(s, pred_y); + my = h263_decode_motion(s, pred_y, s->f_code); if (my >= 0xffff) return -1; s->mv[0][i][0] = mx; @@ -1083,7 +1639,126 @@ int h263_decode_mb(MpegEncContext *s, mot_val[1] = my; } } - } else { + } else if(s->pict_type==B_TYPE) { + int modb1; // first bit of modb + int modb2; // second bit of modb + int mb_type; + int time_pp; + int time_pb; + int xy; + + s->mb_intra = 0; //B-frames never contain intra blocks + s->mcsel=0; // ... true gmc blocks + + if(s->mb_x==0){ + s->last_mv[0][0][0]= + s->last_mv[0][0][1]= + s->last_mv[1][0][0]= + s->last_mv[1][0][1]= 0; + } + + /* if we skipped it in the future P Frame than skip it now too */ + s->mb_skiped= s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]; // Note, skiptab=0 if last was GMC + + if(s->mb_skiped){ + /* skip mb */ + for(i=0;i<6;i++) + s->block_last_index[i] = -1; + + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + s->mv[1][0][0] = 0; + s->mv[1][0][1] = 0; +//FIXME is this correct? +/* s->last_mv[0][0][0]= + s->last_mv[0][0][1]=0;*/ + s->mb_skiped = 1; + return 0; + } + + modb1= get_bits1(&s->gb); + if(modb1==0){ + modb2= get_bits1(&s->gb); + mb_type= get_vlc(&s->gb, &mb_type_b_vlc); + if(modb2==0) cbp= get_bits(&s->gb, 6); + else cbp=0; + if (mb_type && cbp) { + if(get_bits1(&s->gb)){ + s->qscale +=get_bits1(&s->gb)*4 - 2; + if (s->qscale < 1) + s->qscale = 1; + else if (s->qscale > 31) + s->qscale = 31; + h263_dc_scale(s); + } + } + }else{ + mb_type=4; //like 0 but no vectors coded + cbp=0; + } + s->mv_type = MV_TYPE_16X16; // we'll switch to 8x8 only if the last P frame had 8x8 for this MB and mb_type=0 here + mx=my=0; //for case 4, we could put this to the mb_type=4 but than gcc compains about uninitalized mx/my + switch(mb_type) + { + case 0: + mx = h263_decode_motion(s, 0, 1); + my = h263_decode_motion(s, 0, 1); + case 4: + s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT; + xy= s->block_index[0]; + time_pp= s->last_non_b_time[0] - s->last_non_b_time[1]; + time_pb= s->time - s->last_non_b_time[1]; +//if(time_pp>3000 )printf("%d %d ", time_pp, time_pb); + //FIXME 4MV + //FIXME avoid divides + s->mv[0][0][0] = s->motion_val[xy][0]*time_pb/time_pp + mx; + s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp + my; + s->mv[1][0][0] = mx ? s->mv[0][0][0] - s->motion_val[xy][0] + : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp + mx; + s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1] + : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp + my; +/* s->mv[0][0][0] = + s->mv[0][0][1] = + s->mv[1][0][0] = + s->mv[1][0][1] = 1000;*/ + break; + case 1: + s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD; + mx = h263_decode_motion(s, s->last_mv[0][0][0], s->f_code); + my = h263_decode_motion(s, s->last_mv[0][0][1], s->f_code); + s->last_mv[0][0][0]= s->mv[0][0][0] = mx; + s->last_mv[0][0][1]= s->mv[0][0][1] = my; + + mx = h263_decode_motion(s, s->last_mv[1][0][0], s->b_code); + my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code); + s->last_mv[1][0][0]= s->mv[1][0][0] = mx; + s->last_mv[1][0][1]= s->mv[1][0][1] = my; + break; + case 2: + s->mv_dir = MV_DIR_BACKWARD; + mx = h263_decode_motion(s, s->last_mv[1][0][0], s->b_code); + my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code); + s->last_mv[1][0][0]= s->mv[1][0][0] = mx; + s->last_mv[1][0][1]= s->mv[1][0][1] = my; + break; + case 3: + s->mv_dir = MV_DIR_FORWARD; + mx = h263_decode_motion(s, s->last_mv[0][0][0], s->f_code); + my = h263_decode_motion(s, s->last_mv[0][0][1], s->f_code); + s->last_mv[0][0][0]= s->mv[0][0][0] = mx; + s->last_mv[0][0][1]= s->mv[0][0][1] = my; + break; + default: return -1; + } + } else { /* I-Frame */ + cbpc = get_vlc(&s->gb, &intra_MCBPC_vlc); + if (cbpc < 0) + return -1; + dquant = cbpc & 4; + s->mb_intra = 1; +intra: s->ac_pred = 0; if (s->h263_pred || s->h263_aic) { s->ac_pred = get_bits1(&s->gb); @@ -1102,6 +1777,7 @@ int h263_decode_mb(MpegEncContext *s, s->qscale = 1; else if (s->qscale > 31) s->qscale = 31; + h263_dc_scale(s); } } @@ -1120,7 +1796,7 @@ int h263_decode_mb(MpegEncContext *s, return 0; } -static int h263_decode_motion(MpegEncContext * s, int pred) +static int h263_decode_motion(MpegEncContext * s, int pred, int f_code) { int code, val, sign, shift, l, m; @@ -1131,7 +1807,7 @@ static int h263_decode_motion(MpegEncContext * s, int pred) if (code == 0) return pred; sign = get_bits1(&s->gb); - shift = s->f_code - 1; + shift = f_code - 1; val = (code - 1) << shift; if (shift > 0) val |= get_bits(&s->gb, shift); @@ -1142,7 +1818,7 @@ static int h263_decode_motion(MpegEncContext * s, int pred) /* modulo decoding */ if (!s->h263_long_vectors) { - l = (1 << (s->f_code - 1)) * 32; + l = (1 << (f_code - 1)) * 32; m = 2 * l; if (val < -l) { val += m; @@ -1269,7 +1945,7 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block, not_coded: if (s->mb_intra && s->h263_aic) { h263_pred_acdc(s, block, n); - i = 64; + i = 63; } s->block_last_index[n] = i; return 0; @@ -1577,21 +2253,21 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s) h2= 1<<beta; // Note, the 4th point isnt used for GMC -/* - sprite_ref[0][0]= (a>>1)*(2*vop_ref[0][0] + d[0][0]); - sprite_ref[0][1]= (a>>1)*(2*vop_ref[0][1] + d[0][1]); - sprite_ref[1][0]= (a>>1)*(2*vop_ref[1][0] + d[0][0] + d[1][0]); - sprite_ref[1][1]= (a>>1)*(2*vop_ref[1][1] + d[0][1] + d[1][1]); - sprite_ref[2][0]= (a>>1)*(2*vop_ref[2][0] + d[0][0] + d[2][0]); - sprite_ref[2][1]= (a>>1)*(2*vop_ref[2][1] + d[0][1] + d[2][1]); -*/ -//FIXME DIVX5 vs. mpeg4 ? - sprite_ref[0][0]= a*vop_ref[0][0] + d[0][0]; - sprite_ref[0][1]= a*vop_ref[0][1] + d[0][1]; - sprite_ref[1][0]= a*vop_ref[1][0] + d[0][0] + d[1][0]; - sprite_ref[1][1]= a*vop_ref[1][1] + d[0][1] + d[1][1]; - sprite_ref[2][0]= a*vop_ref[2][0] + d[0][0] + d[2][0]; - sprite_ref[2][1]= a*vop_ref[2][1] + d[0][1] + d[2][1]; + if(s->divx_version==500 && s->divx_build==413){ + sprite_ref[0][0]= a*vop_ref[0][0] + d[0][0]; + sprite_ref[0][1]= a*vop_ref[0][1] + d[0][1]; + sprite_ref[1][0]= a*vop_ref[1][0] + d[0][0] + d[1][0]; + sprite_ref[1][1]= a*vop_ref[1][1] + d[0][1] + d[1][1]; + sprite_ref[2][0]= a*vop_ref[2][0] + d[0][0] + d[2][0]; + sprite_ref[2][1]= a*vop_ref[2][1] + d[0][1] + d[2][1]; + } else { + sprite_ref[0][0]= (a>>1)*(2*vop_ref[0][0] + d[0][0]); + sprite_ref[0][1]= (a>>1)*(2*vop_ref[0][1] + d[0][1]); + sprite_ref[1][0]= (a>>1)*(2*vop_ref[1][0] + d[0][0] + d[1][0]); + sprite_ref[1][1]= (a>>1)*(2*vop_ref[1][1] + d[0][1] + d[1][1]); + sprite_ref[2][0]= (a>>1)*(2*vop_ref[2][0] + d[0][0] + d[2][0]); + sprite_ref[2][1]= (a>>1)*(2*vop_ref[2][1] + d[0][1] + d[2][1]); + } /* sprite_ref[3][0]= (a>>1)*(2*vop_ref[3][0] + d[0][0] + d[1][0] + d[2][0] + d[3][0]); sprite_ref[3][1]= (a>>1)*(2*vop_ref[3][1] + d[0][1] + d[1][1] + d[2][1] + d[3][1]); */ @@ -1715,7 +2391,7 @@ printf("%d %d\n", s->sprite_delta[1][1][1], a<<s->sprite_shift[1][1]);*/ else s->real_sprite_warping_points= s->num_sprite_warping_points; -//FIXME convert stuff if accurace != 3 +//printf("%d %d %d %d\n", d[0][0], d[0][1], s->sprite_offset[0][0], s->sprite_offset[0][1]); } /* decode mpeg4 VOP header */ @@ -1735,13 +2411,14 @@ int mpeg4_decode_picture_header(MpegEncContext * s) break; } state = ((state << 8) | v) & 0xffffff; - /* XXX: really detect end of frame */ - if (state == 0) + if( get_bits_count(&s->gb) > s->gb.size*8-32){ + printf("no VOP startcode found\n"); return -1; + } } //printf("startcode %X %d\n", startcode, get_bits_count(&s->gb)); if (startcode == 0x120) { // Video Object Layer - int time_increment_resolution, width, height, vo_ver_id; + int width, height, vo_ver_id; /* vol header */ skip_bits(&s->gb, 1); /* random access */ @@ -1758,11 +2435,13 @@ int mpeg4_decode_picture_header(MpegEncContext * s) skip_bits(&s->gb, 8); //par_width skip_bits(&s->gb, 8); // par_height } + if(get_bits1(&s->gb)){ /* vol control parameter */ printf("vol control parameter not supported\n"); return -1; } s->shape = get_bits(&s->gb, 2); /* vol shape */ + if(s->shape != RECT_SHAPE) printf("only rectangular vol supported\n"); if(s->shape == GRAY_SHAPE && vo_ver_id != 1){ printf("Gray shape not supported\n"); skip_bits(&s->gb, 4); //video_object_layer_shape_extension @@ -1770,8 +2449,8 @@ int mpeg4_decode_picture_header(MpegEncContext * s) skip_bits1(&s->gb); /* marker */ - time_increment_resolution = get_bits(&s->gb, 16); - s->time_increment_bits = av_log2(time_increment_resolution - 1) + 1; + s->time_increment_resolution = get_bits(&s->gb, 16); + s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1; if (s->time_increment_bits < 1) s->time_increment_bits = 1; skip_bits1(&s->gb); /* marker */ @@ -1787,9 +2466,14 @@ int mpeg4_decode_picture_header(MpegEncContext * s) skip_bits1(&s->gb); /* marker */ height = get_bits(&s->gb, 13); skip_bits1(&s->gb); /* marker */ + if(width && height){ /* they should be non zero but who knows ... */ + s->width = width; + s->height = height; +// printf("%d %d\n", width, height); + } } - skip_bits1(&s->gb); /* interlaced */ + if(get_bits1(&s->gb)) printf("interlaced not supported\n"); /* interlaced */ if(!get_bits1(&s->gb)) printf("OBMC not supported\n"); /* OBMC Disable */ if (vo_ver_id == 1) { s->vol_sprite_usage = get_bits1(&s->gb); /* vol_sprite_usage */ @@ -1818,7 +2502,8 @@ int mpeg4_decode_picture_header(MpegEncContext * s) if (get_bits1(&s->gb) == 1) { /* not_8_bit */ s->quant_precision = get_bits(&s->gb, 4); /* quant_precision */ - skip_bits(&s->gb, 4); /* bits_per_pixel */ + if(get_bits(&s->gb, 4)!=8) printf("N-bit not supported\n"); /* bits_per_pixel */ + if(s->quant_precision!=5) printf("quant precission %d\n", s->quant_precision); } else { s->quant_precision = 5; } @@ -1828,13 +2513,11 @@ int mpeg4_decode_picture_header(MpegEncContext * s) if(vo_ver_id != 1) s->quarter_sample= get_bits1(&s->gb); else s->quarter_sample=0; -#if 0 - if(get_bits1(&s->gb)) printf("Complexity est disabled\n"); - if(get_bits1(&s->gb)) printf("resync disable\n"); -#else - skip_bits1(&s->gb); /* complexity_estimation_disabled */ - skip_bits1(&s->gb); /* resync_marker_disabled */ -#endif + + if(!get_bits1(&s->gb)) printf("Complexity estimation not supported\n"); + + s->resync_marker= !get_bits1(&s->gb); /* resync_marker_disabled */ + s->data_partioning= get_bits1(&s->gb); if(s->data_partioning){ printf("data partitioning not supported\n"); @@ -1858,8 +2541,7 @@ int mpeg4_decode_picture_header(MpegEncContext * s) s->scalability= get_bits1(&s->gb); if (s->scalability) { - printf("bad scalability!!!\n"); - return -1; + printf("scalability not supported\n"); } } //printf("end Data %X %d\n", show_bits(&s->gb, 32), get_bits_count(&s->gb)&0x7); @@ -1899,24 +2581,34 @@ int mpeg4_decode_picture_header(MpegEncContext * s) } s->pict_type = get_bits(&s->gb, 2) + 1; /* pict type: I = 0 , P = 1 */ - if(s->pict_type == B_TYPE) - { - printf("B-VOP\n"); - return -1; - } - - /* XXX: parse time base */ - time_incr = 0; +//printf("pic: %d, qpel:%d\n", s->pict_type, s->quarter_sample); + time_incr=0; while (get_bits1(&s->gb) != 0) time_incr++; - skip_bits1(&s->gb); /* marker */ - skip_bits(&s->gb, s->time_increment_bits); - skip_bits1(&s->gb); /* marker */ + check_marker(&s->gb, "before time_increment"); + s->time_increment= get_bits(&s->gb, s->time_increment_bits); + if(s->pict_type!=B_TYPE){ + s->time_base+= time_incr; + s->last_non_b_time[1]= s->last_non_b_time[0]; + s->last_non_b_time[0]= s->time_base*s->time_increment_resolution + s->time_increment; + }else{ + s->time= (s->last_non_b_time[1]/s->time_increment_resolution + time_incr)*s->time_increment_resolution; + s->time+= s->time_increment; + } + + if(check_marker(&s->gb, "before vop_coded")==0 && s->picture_number==0){ + printf("hmm, seems the headers arnt complete, trying to guess time_increment_bits\n"); + for(s->time_increment_bits++ ;s->time_increment_bits<16; s->time_increment_bits++){ + if(get_bits1(&s->gb)) break; + } + printf("my guess is %d bits ;)\n",s->time_increment_bits); + } /* vop coded */ if (get_bits1(&s->gb) != 1) goto redo; - +//printf("time %d %d %d || %d %d %d\n", s->time_increment_bits, s->time_increment, s->time_base, +//s->time, s->last_non_b_time[0], s->last_non_b_time[1]); if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) { /* rounding type for motion estimation */ @@ -1947,7 +2639,9 @@ int mpeg4_decode_picture_header(MpegEncContext * s) //FIXME complexity estimation stuff if (s->shape != BIN_ONLY_SHAPE) { - skip_bits(&s->gb, 3); /* intra dc VLC threshold */ + int t; + t=get_bits(&s->gb, 3); /* intra dc VLC threshold */ +//printf("threshold %d\n", t); //FIXME interlaced specific bits } @@ -1964,12 +2658,21 @@ int mpeg4_decode_picture_header(MpegEncContext * s) MPEG4 vol header as it is found on some old opendivx movies */ s->qscale = get_bits(&s->gb, 5); + if(s->qscale==0){ + printf("Error, header damaged or not MPEG4 header (qscale=0)\n"); + return -1; // makes no sense to continue, as there is nothing left from the image then + } if (s->pict_type != I_TYPE) { s->f_code = get_bits(&s->gb, 3); /* fcode_for */ + if(s->f_code==0){ + printf("Error, header damaged or not MPEG4 header (f_code=0)\n"); + return -1; // makes no sense to continue, as the MV decoding will break very quickly + } } if (s->pict_type == B_TYPE) { s->b_code = get_bits(&s->gb, 3); +//printf("b-code %d\n", s->b_code); } //printf("quant:%d fcode:%d\n", s->qscale, s->f_code); if(!s->scalability){ @@ -1978,7 +2681,6 @@ int mpeg4_decode_picture_header(MpegEncContext * s) } } } -//printf("end Data %X %d\n", show_bits(&s->gb, 32), get_bits_count(&s->gb)&0x7); s->picture_number++; // better than pic number==0 allways ;) return 0; } diff --git a/src/libffmpeg/libavcodec/h263data.h b/src/libffmpeg/libavcodec/h263data.h index 88e456ba2..a129fd6bf 100644 --- a/src/libffmpeg/libavcodec/h263data.h +++ b/src/libffmpeg/libavcodec/h263data.h @@ -40,13 +40,13 @@ static const UINT8 inter_MCBPC_bits[20] = { 3, 7, 7, 8, };*/ -static const UINT8 cbpy_tab[16][2] = +const UINT8 cbpy_tab[16][2] = { {3,4}, {5,5}, {4,5}, {9,4}, {3,5}, {7,4}, {2,6}, {11,4}, {2,5}, {3,6}, {5,4}, {10,4}, {4,4}, {8,4}, {6,4}, {3,2} }; -static const UINT8 mvtab[33][2] = +const UINT8 mvtab[33][2] = { {1,1}, {1,2}, {1,3}, {1,4}, {3,6}, {5,7}, {4,7}, {3,7}, {11,9}, {10,9}, {9,9}, {17,10}, {16,10}, {15,10}, {14,10}, {13,10}, diff --git a/src/libffmpeg/libavcodec/h263dec.c b/src/libffmpeg/libavcodec/h263dec.c index 3733ed565..e909ac56e 100644 --- a/src/libffmpeg/libavcodec/h263dec.c +++ b/src/libffmpeg/libavcodec/h263dec.c @@ -47,10 +47,22 @@ static int h263_decode_init(AVCodecContext *avctx) case CODEC_ID_MPEG4: s->time_increment_bits = 4; /* default value for broken headers */ s->h263_pred = 1; + s->has_b_frames = 1; break; - case CODEC_ID_MSMPEG4: + case CODEC_ID_MSMPEG4V1: s->h263_msmpeg4 = 1; s->h263_pred = 1; + s->msmpeg4_version=1; + break; + case CODEC_ID_MSMPEG4V2: + s->h263_msmpeg4 = 1; + s->h263_pred = 1; + s->msmpeg4_version=2; + break; + case CODEC_ID_MSMPEG4V3: + s->h263_msmpeg4 = 1; + s->h263_pred = 1; + s->msmpeg4_version=3; break; case CODEC_ID_H263I: s->h263_intel = 1; @@ -60,7 +72,7 @@ static int h263_decode_init(AVCodecContext *avctx) } /* for h263, we allocate the images after having read the header */ - if (avctx->codec->id != CODEC_ID_H263) + if (avctx->codec->id != CODEC_ID_H263 && avctx->codec->id != CODEC_ID_MPEG4) if (MPV_common_init(s) < 0) return -1; @@ -115,22 +127,25 @@ static int h263_decode_frame(AVCodecContext *avctx, ret = intel_h263_decode_picture_header(s); } else { ret = h263_decode_picture_header(s); - /* After H263 header decode we have the height, width, */ + } + + /* After H263 & mpeg4 header decode we have the height, width,*/ /* and other parameters. So then we could init the picture */ /* FIXME: By the way H263 decoder is evolving it should have */ /* an H263EncContext */ - if (!s->context_initialized) { - avctx->width = s->width; - avctx->height = s->height; - if (MPV_common_init(s) < 0) - return -1; - } else if (s->width != avctx->width || s->height != avctx->height) { - /* H.263 could change picture size any time */ - MPV_common_end(s); - if (MPV_common_init(s) < 0) - return -1; - } + if (!s->context_initialized) { + avctx->width = s->width; + avctx->height = s->height; + avctx->aspect_ratio_info= s->aspect_ratio_info; + if (MPV_common_init(s) < 0) + return -1; + } else if (s->width != avctx->width || s->height != avctx->height) { + /* H.263 could change picture size any time */ + MPV_common_end(s); + if (MPV_common_init(s) < 0) + return -1; } + if (ret < 0) return -1; @@ -141,6 +156,12 @@ static int h263_decode_frame(AVCodecContext *avctx, #endif /* decode each macroblock */ + s->block_wrap[0]= + s->block_wrap[1]= + s->block_wrap[2]= + s->block_wrap[3]= s->mb_width*2 + 2; + s->block_wrap[4]= + s->block_wrap[5]= s->mb_width + 2; for(s->mb_y=0; s->mb_y < s->mb_height; s->mb_y++) { /* Check for GOB headers on H.263 */ /* FIXME: In the future H.263+ will have intra prediction */ @@ -148,7 +169,20 @@ static int h263_decode_frame(AVCodecContext *avctx, if (s->mb_y && !s->h263_pred) { s->first_gob_line = h263_decode_gob_header(s); } + + s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1; + s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1); + s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1; + s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2); + s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2); + s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2); for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) { + s->block_index[0]+=2; + s->block_index[1]+=2; + s->block_index[2]+=2; + s->block_index[3]+=2; + s->block_index[4]++; + s->block_index[5]++; #ifdef DEBUG printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); #endif @@ -163,28 +197,8 @@ static int h263_decode_frame(AVCodecContext *avctx, s->y_dc_scale = 8; s->c_dc_scale = 8; } - -#ifdef HAVE_MMX - if (mm_flags & MM_MMX) { - asm volatile( - "pxor %%mm7, %%mm7 \n\t" - "movl $-128*6, %%eax \n\t" - "1: \n\t" - "movq %%mm7, (%0, %%eax) \n\t" - "movq %%mm7, 8(%0, %%eax) \n\t" - "movq %%mm7, 16(%0, %%eax) \n\t" - "movq %%mm7, 24(%0, %%eax) \n\t" - "addl $32, %%eax \n\t" - " js 1b \n\t" - : : "r" (((int)s->block)+128*6) - : "%eax" - ); - }else{ - memset(s->block, 0, sizeof(s->block)); - } -#else - memset(s->block, 0, sizeof(s->block)); -#endif + clear_blocks(s->block[0]); + s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_16X16; if (s->h263_msmpeg4) { @@ -208,9 +222,15 @@ static int h263_decode_frame(AVCodecContext *avctx, if (h > 16) h = 16; offset = y * s->linesize; - src_ptr[0] = s->current_picture[0] + offset; - src_ptr[1] = s->current_picture[1] + (offset >> 2); - src_ptr[2] = s->current_picture[2] + (offset >> 2); + if(s->pict_type==B_TYPE || (!s->has_b_frames)){ + src_ptr[0] = s->current_picture[0] + offset; + src_ptr[1] = s->current_picture[1] + (offset >> 2); + src_ptr[2] = s->current_picture[2] + (offset >> 2); + } else { + src_ptr[0] = s->last_picture[0] + offset; + src_ptr[1] = s->last_picture[1] + (offset >> 2); + src_ptr[2] = s->last_picture[2] + (offset >> 2); + } avctx->draw_horiz_band(avctx, src_ptr, s->linesize, y, s->width, h); } @@ -221,9 +241,15 @@ static int h263_decode_frame(AVCodecContext *avctx, MPV_frame_end(s); - pict->data[0] = s->current_picture[0]; - pict->data[1] = s->current_picture[1]; - pict->data[2] = s->current_picture[2]; + if(s->pict_type==B_TYPE || (!s->has_b_frames)){ + pict->data[0] = s->current_picture[0]; + pict->data[1] = s->current_picture[1]; + pict->data[2] = s->current_picture[2]; + } else { + pict->data[0] = s->last_picture[0]; + pict->data[1] = s->last_picture[1]; + pict->data[2] = s->last_picture[2]; + } pict->linesize[0] = s->linesize; pict->linesize[1] = s->linesize / 2; pict->linesize[2] = s->linesize / 2; @@ -262,10 +288,34 @@ AVCodec h263_decoder = { CODEC_CAP_DRAW_HORIZ_BAND, }; -AVCodec msmpeg4_decoder = { +AVCodec msmpeg4v1_decoder = { + "msmpeg4v1", + CODEC_TYPE_VIDEO, + CODEC_ID_MSMPEG4V1, + sizeof(MpegEncContext), + h263_decode_init, + NULL, + h263_decode_end, + h263_decode_frame, + CODEC_CAP_DRAW_HORIZ_BAND, +}; + +AVCodec msmpeg4v2_decoder = { + "msmpeg4v2", + CODEC_TYPE_VIDEO, + CODEC_ID_MSMPEG4V2, + sizeof(MpegEncContext), + h263_decode_init, + NULL, + h263_decode_end, + h263_decode_frame, + CODEC_CAP_DRAW_HORIZ_BAND, +}; + +AVCodec msmpeg4v3_decoder = { "msmpeg4", CODEC_TYPE_VIDEO, - CODEC_ID_MSMPEG4, + CODEC_ID_MSMPEG4V3, sizeof(MpegEncContext), h263_decode_init, NULL, diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c index 37716a983..2c71850ee 100644 --- a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c +++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c @@ -25,22 +25,58 @@ int mm_flags; /* multimedia extension flags */ -int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); -int pix_abs16x16_sse(UINT8 *blk1, UINT8 *blk2, int lx, int h); -int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); -int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); -int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); +int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); + +int pix_abs16x16_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs16x16_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs16x16_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs16x16_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); + +int pix_abs8x8_mmx(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs8x8_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs8x8_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs8x8_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); + +int pix_abs8x8_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); + /* external functions, from idct_mmx.c */ void ff_mmx_idct(DCTELEM *block); void ff_mmxext_idct(DCTELEM *block); /* pixel operations */ -static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001; -static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002; +static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001LL; +static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002LL; //static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 }; //static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 }; +#define JUMPALIGN() __asm __volatile (".balign 8"::) +#define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::) + +#ifndef PIC +#define MOVQ_WONE(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wone)) +#define MOVQ_WTWO(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wtwo)) +#else +// for shared library it's better to use this way for accessing constants +// pcmpeqd -> -1 +#define MOVQ_WONE(regd) \ + __asm __volatile ( \ + "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ + "psrlw $15, %%" #regd ::) + +#define MOVQ_WTWO(regd) \ + __asm __volatile ( \ + "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ + "psrlw $15, %%" #regd " \n\t" \ + "psllw $1, %%" #regd ::) +#endif + /***********************************/ /* 3Dnow specific */ @@ -78,7 +114,7 @@ static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size) /* read the pixels */ p = block; pix = pixels; - __asm __volatile("pxor %%mm7, %%mm7":); + MOVQ_ZERO(mm7); for(i=0;i<4;i++) { __asm __volatile( "movq %1, %%mm0\n\t" @@ -105,12 +141,11 @@ static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line { const DCTELEM *p; UINT8 *pix; - int i; /* read the pixels */ p = block; pix = pixels; - for(i=0;i<2;i++) { + /* unrolled loop */ __asm __volatile( "movq %3, %%mm0\n\t" "movq 8%3, %%mm1\n\t" @@ -132,7 +167,29 @@ static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line :"memory"); pix += line_size*4; p += 32; - } + + // if here would be an exact copy of the code above + // compiler would generate some very strange code + // thus using "r" + __asm __volatile( + "movq (%3), %%mm0\n\t" + "movq 8(%3), %%mm1\n\t" + "movq 16(%3), %%mm2\n\t" + "movq 24(%3), %%mm3\n\t" + "movq 32(%3), %%mm4\n\t" + "movq 40(%3), %%mm5\n\t" + "movq 48(%3), %%mm6\n\t" + "movq 56(%3), %%mm7\n\t" + "packuswb %%mm1, %%mm0\n\t" + "packuswb %%mm3, %%mm2\n\t" + "packuswb %%mm5, %%mm4\n\t" + "packuswb %%mm7, %%mm6\n\t" + "movq %%mm0, (%0)\n\t" + "movq %%mm2, (%0, %1)\n\t" + "movq %%mm4, (%0, %1, 2)\n\t" + "movq %%mm6, (%0, %2)\n\t" + ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p) + :"memory"); } static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) @@ -144,8 +201,9 @@ static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line /* read the pixels */ p = block; pix = pixels; - __asm __volatile("pxor %%mm7, %%mm7":); - for(i=0;i<4;i++) { + MOVQ_ZERO(mm7); + i = 4; + while (i) { __asm __volatile( "movq %2, %%mm0\n\t" "movq 8%2, %%mm1\n\t" @@ -172,19 +230,47 @@ static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line :"memory"); pix += line_size*2; p += 16; - } + i--; + }; } static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) { - int dh, hh; + int hh; UINT8 *p; const UINT8 *pix; + p = block; - pix = pixels; + pix = pixels; // 2s +#if 0 + do { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; + p += line_size; + } while (--h); +#else + // this optimized code is not very usefull + // the above loop is definitely faster + // at least on Celeron 500MHz + hh = h & 3; + while (hh) { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; + p += line_size; + hh--; + } hh=h>>2; - dh=h&3; - while(hh--) { + while (hh) { __asm __volatile( "movq (%1), %%mm0 \n\t" "movq (%1, %2), %%mm1 \n\t" @@ -196,19 +282,11 @@ static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int "movq %%mm3, (%0, %3) \n\t" ::"r"(p), "r"(pix), "r"(line_size), "r"(line_size*3) :"memory"); - pix = pix + line_size*4; - p = p + line_size*4; - } - while(dh--) { - __asm __volatile( - "movq %1, %%mm0\n\t" - "movq %%mm0, %0\n\t" - :"=m"(*p) - :"m"(*pix) - :"memory"); - pix = pix + line_size; - p = p + line_size; + pix += line_size*4; + p += line_size*4; + hh--; } +#endif } static void put_pixels_x2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) @@ -217,10 +295,9 @@ static void put_pixels_x2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, const UINT8 *pix; p = block; pix = pixels; - __asm __volatile( - "pxor %%mm7, %%mm7\n\t" - "movq %0, %%mm4\n\t" - ::"m"(mm_wone)); + MOVQ_ZERO(mm7); + MOVQ_WONE(mm4); + JUMPALIGN(); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -252,10 +329,9 @@ static void put_pixels_y2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, const UINT8 *pix; p = block; pix = pixels; - __asm __volatile( - "pxor %%mm7, %%mm7\n\t" - "movq %0, %%mm4\n\t" - ::"m"(mm_wone)); + MOVQ_ZERO(mm7); + MOVQ_WONE(mm4); + JUMPALIGN(); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -288,11 +364,10 @@ static void put_pixels_xy2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, UINT8 *p; const UINT8 *pix; p = block; - pix = pixels; - __asm __volatile( - "pxor %%mm7, %%mm7\n\t" - "movq %0, %%mm6\n\t" - ::"m"(mm_wtwo)); + pix = pixels; // 1s + MOVQ_ZERO(mm7); + MOVQ_WTWO(mm6); + JUMPALIGN(); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -338,7 +413,7 @@ static void put_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int const UINT8 *pix; p = block; pix = pixels; - __asm __volatile("pxor %%mm7, %%mm7\n\t":); + MOVQ_ZERO(mm7); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -369,7 +444,8 @@ static void put_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int li const UINT8 *pix; p = block; pix = pixels; - __asm __volatile("pxor %%mm7, %%mm7\n\t":); + MOVQ_ZERO(mm7); + JUMPALIGN(); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -401,10 +477,9 @@ static void put_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int const UINT8 *pix; p = block; pix = pixels; - __asm __volatile( - "pxor %%mm7, %%mm7\n\t" - "movq %0, %%mm6\n\t" - ::"m"(mm_wone)); + MOVQ_ZERO(mm7); + MOVQ_WONE(mm6); + JUMPALIGN(); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -450,10 +525,9 @@ static void avg_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int const UINT8 *pix; p = block; pix = pixels; - __asm __volatile( - "pxor %%mm7, %%mm7\n\t" - "movq %0, %%mm6\n\t" - ::"m"(mm_wone)); + MOVQ_ZERO(mm7); + MOVQ_WONE(mm6); + JUMPALIGN(); do { __asm __volatile( "movq %0, %%mm0\n\t" @@ -487,10 +561,9 @@ static void avg_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_si const UINT8 *pix; p = block; pix = pixels; - __asm __volatile( - "pxor %%mm7, %%mm7\n\t" - "movq %0, %%mm6\n\t" - ::"m"(mm_wone)); + MOVQ_ZERO(mm7); + MOVQ_WONE(mm6); + JUMPALIGN(); do { __asm __volatile( "movq %1, %%mm1\n\t" @@ -533,10 +606,9 @@ static void avg_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_si const UINT8 *pix; p = block; pix = pixels; - __asm __volatile( - "pxor %%mm7, %%mm7\n\t" - "movq %0, %%mm6\n\t" - ::"m"(mm_wone)); + MOVQ_ZERO(mm7); + MOVQ_WONE(mm6); + JUMPALIGN(); do { __asm __volatile( "movq %1, %%mm1\n\t" @@ -579,10 +651,10 @@ static void avg_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_s const UINT8 *pix; p = block; pix = pixels; - __asm __volatile( - "pxor %%mm7, %%mm7\n\t" - "movq %0, %%mm6\n\t" - ::"m"(mm_wtwo)); + MOVQ_ZERO(mm7); + // this doesn't seem to be used offten - so + // the inside usage of mm_wone is not optimized + MOVQ_WTWO(mm6); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -639,7 +711,7 @@ static void avg_no_rnd_pixels_mmx( UINT8 *block, const UINT8 *pixels, int line_ const UINT8 *pix; p = block; pix = pixels; - __asm __volatile("pxor %%mm7, %%mm7\n\t":); + MOVQ_ZERO(mm7); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -670,8 +742,7 @@ static void avg_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int const UINT8 *pix; p = block; pix = pixels; - __asm __volatile( - "pxor %%mm7, %%mm7\n\t":); + MOVQ_ZERO(mm7); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -710,8 +781,7 @@ static void avg_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int const UINT8 *pix; p = block; pix = pixels; - __asm __volatile( - "pxor %%mm7, %%mm7\n\t":); + MOVQ_ZERO(mm7); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -750,10 +820,9 @@ static void avg_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int const UINT8 *pix; p = block; pix = pixels; - __asm __volatile( - "pxor %%mm7, %%mm7\n\t" - "movq %0, %%mm6\n\t" - ::"m"(mm_wone)); + MOVQ_ZERO(mm7); + MOVQ_WONE(mm6); + JUMPALIGN(); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -807,7 +876,7 @@ static void sub_pixels_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, const UINT8 *pix; p = block; pix = pixels; - __asm __volatile("pxor %%mm7, %%mm7":); + MOVQ_ZERO(mm7); do { __asm __volatile( "movq %0, %%mm0\n\t" @@ -834,10 +903,9 @@ static void sub_pixels_x2_mmx( DCTELEM *block, const UINT8 *pixels, int line_si const UINT8 *pix; p = block; pix = pixels; - __asm __volatile( - "pxor %%mm7, %%mm7\n\t" - "movq %0, %%mm6" - ::"m"(mm_wone)); + MOVQ_ZERO(mm7); + MOVQ_WONE(mm6); + JUMPALIGN(); do { __asm __volatile( "movq %0, %%mm0\n\t" @@ -874,10 +942,8 @@ static void sub_pixels_y2_mmx( DCTELEM *block, const UINT8 *pixels, int line_si const UINT8 *pix; p = block; pix = pixels; - __asm __volatile( - "pxor %%mm7, %%mm7\n\t" - "movq %0, %%mm6" - ::"m"(mm_wone)); + MOVQ_ZERO(mm7); + MOVQ_WONE(mm6); do { __asm __volatile( "movq %0, %%mm0\n\t" @@ -914,10 +980,9 @@ static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line const UINT8 *pix; p = block; pix = pixels; - __asm __volatile( - "pxor %%mm7, %%mm7\n\t" - "movq %0, %%mm6\n\t" - ::"m"(mm_wtwo)); + MOVQ_ZERO(mm7); + MOVQ_WTWO(mm6); + JUMPALIGN(); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -961,11 +1026,30 @@ static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line } while(--h); } +static void clear_blocks_mmx(DCTELEM *blocks) +{ + asm volatile( + "pxor %%mm7, %%mm7 \n\t" + "movl $-128*6, %%eax \n\t" + "1: \n\t" + "movq %%mm7, (%0, %%eax) \n\t" + "movq %%mm7, 8(%0, %%eax) \n\t" + "movq %%mm7, 16(%0, %%eax) \n\t" + "movq %%mm7, 24(%0, %%eax) \n\t" + "addl $32, %%eax \n\t" + " js 1b \n\t" + : : "r" (((int)blocks)+128*6) + : "%eax" + ); +} + +static void just_return() { return; } + void dsputil_init_mmx(void) { - mm_flags = xine_mm_accel(); -#if 0 - printf("CPU flags:"); + mm_flags = mm_support(); +#if 1 + printf("libavcodec: CPU flags:"); if (mm_flags & MM_MMX) printf(" mmx"); if (mm_flags & MM_MMXEXT) @@ -983,11 +1067,16 @@ void dsputil_init_mmx(void) get_pixels = get_pixels_mmx; put_pixels_clamped = put_pixels_clamped_mmx; add_pixels_clamped = add_pixels_clamped_mmx; - - pix_abs16x16 = pix_abs16x16_mmx; - pix_abs16x16_x2 = pix_abs16x16_x2_mmx; - pix_abs16x16_y2 = pix_abs16x16_y2_mmx; + clear_blocks= clear_blocks_mmx; + + pix_abs16x16 = pix_abs16x16_mmx; + pix_abs16x16_x2 = pix_abs16x16_x2_mmx; + pix_abs16x16_y2 = pix_abs16x16_y2_mmx; pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; + pix_abs8x8 = pix_abs8x8_mmx; + pix_abs8x8_x2 = pix_abs8x8_x2_mmx; + pix_abs8x8_y2 = pix_abs8x8_y2_mmx; + pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; av_fdct = fdct_mmx; put_pixels_tab[0] = put_pixels_mmx; @@ -1016,10 +1105,16 @@ void dsputil_init_mmx(void) sub_pixels_tab[3] = sub_pixels_xy2_mmx; if (mm_flags & MM_MMXEXT) { - pix_abs16x16 = pix_abs16x16_sse; - } - - if (mm_flags & MM_SSE) { + pix_abs16x16 = pix_abs16x16_mmx2; + pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; + pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; + pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2; + + pix_abs8x8 = pix_abs8x8_mmx2; + pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; + pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; + pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2; + put_pixels_tab[1] = put_pixels_x2_sse; put_pixels_tab[2] = put_pixels_y2_sse; @@ -1054,4 +1149,44 @@ void dsputil_init_mmx(void) ff_idct = simple_idct_mmx; #endif } + +#if 0 + // for speed testing + get_pixels = just_return; + put_pixels_clamped = just_return; + add_pixels_clamped = just_return; + + pix_abs16x16 = just_return; + pix_abs16x16_x2 = just_return; + pix_abs16x16_y2 = just_return; + pix_abs16x16_xy2 = just_return; + + put_pixels_tab[0] = just_return; + put_pixels_tab[1] = just_return; + put_pixels_tab[2] = just_return; + put_pixels_tab[3] = just_return; + + put_no_rnd_pixels_tab[0] = just_return; + put_no_rnd_pixels_tab[1] = just_return; + put_no_rnd_pixels_tab[2] = just_return; + put_no_rnd_pixels_tab[3] = just_return; + + avg_pixels_tab[0] = just_return; + avg_pixels_tab[1] = just_return; + avg_pixels_tab[2] = just_return; + avg_pixels_tab[3] = just_return; + + avg_no_rnd_pixels_tab[0] = just_return; + avg_no_rnd_pixels_tab[1] = just_return; + avg_no_rnd_pixels_tab[2] = just_return; + avg_no_rnd_pixels_tab[3] = just_return; + + sub_pixels_tab[0] = just_return; + sub_pixels_tab[1] = just_return; + sub_pixels_tab[2] = just_return; + sub_pixels_tab[3] = just_return; + + //av_fdct = just_return; + //ff_idct = just_return; +#endif } diff --git a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c index 35b16b711..e704c4219 100644 --- a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c +++ b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c @@ -16,229 +16,347 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * + * mostly by Michael Niedermayer <michaelni@gmx.at> */ #include "../dsputil.h" -#include "mmx.h" -static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001; -static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002; +static const __attribute__ ((aligned(8))) UINT64 round_tab[3]={ +0x0000000000000000, +0x0001000100010001, +0x0002000200020002, +}; -/* mm7 is accumulator, mm6 is zero */ -static inline void sad_add(const UINT8 *p1, const UINT8 *p2) +static inline void sad8_mmx(UINT8 *blk1, UINT8 *blk2, int stride, int h) { - movq_m2r(*p1, mm0); - movq_m2r(*p2, mm1); - movq_r2r(mm0, mm2); - psubusb_r2r(mm1, mm0); - psubusb_r2r(mm2, mm1); - por_r2r(mm1, mm0); /* mm0 is absolute value */ - - movq_r2r(mm0, mm1); - punpcklbw_r2r(mm6, mm0); - punpckhbw_r2r(mm6, mm1); - paddusw_r2r(mm0, mm7); - paddusw_r2r(mm1, mm7); + int len= -(stride<<h); + asm volatile( + ".balign 16 \n\t" + "1: \n\t" + "movq (%1, %%eax), %%mm0 \n\t" + "movq (%2, %%eax), %%mm2 \n\t" + "movq (%2, %%eax), %%mm4 \n\t" + "addl %3, %%eax \n\t" + "psubusb %%mm0, %%mm2 \n\t" + "psubusb %%mm4, %%mm0 \n\t" + "movq (%1, %%eax), %%mm1 \n\t" + "movq (%2, %%eax), %%mm3 \n\t" + "movq (%2, %%eax), %%mm5 \n\t" + "psubusb %%mm1, %%mm3 \n\t" + "psubusb %%mm5, %%mm1 \n\t" + "por %%mm2, %%mm0 \n\t" + "por %%mm1, %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm3, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "punpckhbw %%mm7, %%mm2 \n\t" + "paddw %%mm1, %%mm0 \n\t" + "paddw %%mm3, %%mm2 \n\t" + "paddw %%mm2, %%mm0 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "addl %3, %%eax \n\t" + " js 1b \n\t" + : "+a" (len) + : "r" (blk1 - len), "r" (blk2 - len), "r" (stride) + ); } -/* convert mm7 to value */ -static inline int sad_end(void) +static inline void sad8_mmx2(UINT8 *blk1, UINT8 *blk2, int stride, int h) { - int res; - - movq_r2r(mm7, mm0); - psrlq_i2r(32, mm7); - paddusw_r2r(mm0, mm7); - - movq_r2r(mm7, mm0); - psrlq_i2r(16, mm7); - paddusw_r2r(mm0, mm7); - __asm __volatile ("movd %%mm7, %0" : "=a" (res)); - return res & 0xffff; + int len= -(stride<<h); + asm volatile( + ".balign 16 \n\t" + "1: \n\t" + "movq (%1, %%eax), %%mm0 \n\t" + "movq (%2, %%eax), %%mm2 \n\t" + "psadbw %%mm2, %%mm0 \n\t" + "addl %3, %%eax \n\t" + "movq (%1, %%eax), %%mm1 \n\t" + "movq (%2, %%eax), %%mm3 \n\t" + "psadbw %%mm1, %%mm3 \n\t" + "paddw %%mm3, %%mm0 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "addl %3, %%eax \n\t" + " js 1b \n\t" + : "+a" (len) + : "r" (blk1 - len), "r" (blk2 - len), "r" (stride) + ); } -int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h) +static inline void sad8_2_mmx2(UINT8 *blk1a, UINT8 *blk1b, UINT8 *blk2, int stride, int h) { - const UINT8 *p1, *p2; - - h >>= 1; - p1 = blk1; - p2 = blk2; - pxor_r2r(mm7, mm7); /* mm7 is accumulator */ - pxor_r2r(mm6, mm6); /* mm7 is zero constant */ - do { - sad_add(p1, p2); - sad_add(p1 + 8, p2 + 8); - p1 += lx; - p2 += lx; - sad_add(p1, p2); - sad_add(p1 + 8, p2 + 8); - p1 += lx; - p2 += lx; - } while (--h); - return sad_end(); + int len= -(stride<<h); + asm volatile( + ".balign 16 \n\t" + "1: \n\t" + "movq (%1, %%eax), %%mm0 \n\t" + "movq (%2, %%eax), %%mm2 \n\t" + "pavgb %%mm2, %%mm0 \n\t" + "movq (%3, %%eax), %%mm2 \n\t" + "psadbw %%mm2, %%mm0 \n\t" + "addl %4, %%eax \n\t" + "movq (%1, %%eax), %%mm1 \n\t" + "movq (%2, %%eax), %%mm3 \n\t" + "pavgb %%mm1, %%mm3 \n\t" + "movq (%3, %%eax), %%mm1 \n\t" + "psadbw %%mm1, %%mm3 \n\t" + "paddw %%mm3, %%mm0 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "addl %4, %%eax \n\t" + " js 1b \n\t" + : "+a" (len) + : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride) + ); } -/* please test it ! */ -static inline void sad_add_sse(const UINT8 *p1, const UINT8 *p2) -{ - movq_m2r(*(p1 + 0), mm0); - movq_m2r(*(p1 + 8), mm1); - psadbw_m2r(*(p2 + 0), mm0); - psadbw_m2r(*(p2 + 8), mm1); - paddusw_r2r(mm0, mm7); - paddusw_r2r(mm1, mm7); +static inline void sad8_4_mmx2(UINT8 *blk1, UINT8 *blk2, int stride, int h) +{ //FIXME reuse src + int len= -(stride<<h); + asm volatile( + ".balign 16 \n\t" + "1: \n\t" + "movq (%1, %%eax), %%mm0 \n\t" + "movq (%2, %%eax), %%mm2 \n\t" + "movq 1(%1, %%eax), %%mm1 \n\t" + "movq 1(%2, %%eax), %%mm3 \n\t" + "pavgb %%mm2, %%mm0 \n\t" + "pavgb %%mm1, %%mm3 \n\t" + "pavgb %%mm3, %%mm0 \n\t" + "movq (%3, %%eax), %%mm2 \n\t" + "psadbw %%mm2, %%mm0 \n\t" + "addl %4, %%eax \n\t" + "movq (%1, %%eax), %%mm1 \n\t" + "movq (%2, %%eax), %%mm3 \n\t" + "movq 1(%1, %%eax), %%mm2 \n\t" + "movq 1(%2, %%eax), %%mm4 \n\t" + "pavgb %%mm3, %%mm1 \n\t" + "pavgb %%mm4, %%mm2 \n\t" + "pavgb %%mm1, %%mm2 \n\t" + "movq (%3, %%eax), %%mm1 \n\t" + "psadbw %%mm1, %%mm2 \n\t" + "paddw %%mm2, %%mm0 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "addl %4, %%eax \n\t" + " js 1b \n\t" + : "+a" (len) + : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" (stride) + ); } -int pix_abs16x16_sse(UINT8 *blk1, UINT8 *blk2, int lx, int h) +static inline void sad8_2_mmx(UINT8 *blk1a, UINT8 *blk1b, UINT8 *blk2, int stride, int h) { - const UINT8 *p1, *p2; - - h >>= 1; - p1 = blk1; - p2 = blk2; - pxor_r2r(mm7, mm7); /* mm7 is accumulator */ - do { - sad_add_sse(p1, p2); - p1 += lx; - p2 += lx; - sad_add_sse(p1, p2); - p1 += lx; - p2 += lx; - } while (--h); - return sad_end(); + int len= -(stride<<h); + asm volatile( + ".balign 16 \n\t" + "1: \n\t" + "movq (%1, %%eax), %%mm0 \n\t" + "movq (%2, %%eax), %%mm1 \n\t" + "movq (%1, %%eax), %%mm2 \n\t" + "movq (%2, %%eax), %%mm3 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "punpckhbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "paddw %%mm0, %%mm1 \n\t" + "paddw %%mm2, %%mm3 \n\t" + "movq (%3, %%eax), %%mm4 \n\t" + "movq (%3, %%eax), %%mm2 \n\t" + "paddw %%mm5, %%mm1 \n\t" + "paddw %%mm5, %%mm3 \n\t" + "psrlw $1, %%mm1 \n\t" + "psrlw $1, %%mm3 \n\t" + "packuswb %%mm3, %%mm1 \n\t" + "psubusb %%mm1, %%mm4 \n\t" + "psubusb %%mm2, %%mm1 \n\t" + "por %%mm4, %%mm1 \n\t" + "movq %%mm1, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "paddw %%mm1, %%mm0 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "addl %4, %%eax \n\t" + " js 1b \n\t" + : "+a" (len) + : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride) + ); } -#define DUMP(reg) { mmx_t tmp; movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq); } - -/* mm7 is accumulator, mm6 is zero */ -static inline void sad_add_x2(const UINT8 *p1, const UINT8 *p2, const UINT8 *p3) +static inline void sad8_4_mmx(UINT8 *blk1, UINT8 *blk2, int stride, int h) { - movq_m2r(*(p2 + 0), mm0); - movq_m2r(*(p3 + 0), mm1); - movq_r2r(mm0, mm2); - movq_r2r(mm1, mm3); - punpcklbw_r2r(mm6, mm0); /* extract 4 bytes low */ - punpcklbw_r2r(mm6, mm1); - punpckhbw_r2r(mm6, mm2); /* high */ - punpckhbw_r2r(mm6, mm3); - paddusw_r2r(mm1, mm0); - paddusw_r2r(mm3, mm2); - movq_m2r(*(p1 + 0), mm1); /* mm1 : other value */ - paddusw_r2r(mm5, mm0); /* + 1 */ - paddusw_r2r(mm5, mm2); /* + 1 */ - psrlw_i2r(1, mm0); - psrlw_i2r(1, mm2); - packuswb_r2r(mm2, mm0); /* average is in mm0 */ - - movq_r2r(mm1, mm2); - psubusb_r2r(mm0, mm1); - psubusb_r2r(mm2, mm0); - por_r2r(mm1, mm0); /* mm0 is absolute value */ - - movq_r2r(mm0, mm1); - punpcklbw_r2r(mm6, mm0); - punpckhbw_r2r(mm6, mm1); - paddusw_r2r(mm0, mm7); - paddusw_r2r(mm1, mm7); + int len= -(stride<<h); + asm volatile( + ".balign 16 \n\t" + "1: \n\t" + "movq (%1, %%eax), %%mm0 \n\t" + "movq (%2, %%eax), %%mm1 \n\t" + "movq %%mm0, %%mm4 \n\t" + "movq %%mm1, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "punpckhbw %%mm7, %%mm4 \n\t" + "punpckhbw %%mm7, %%mm2 \n\t" + "paddw %%mm1, %%mm0 \n\t" + "paddw %%mm2, %%mm4 \n\t" + "movq 1(%1, %%eax), %%mm2 \n\t" + "movq 1(%2, %%eax), %%mm3 \n\t" + "movq %%mm2, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "paddw %%mm0, %%mm2 \n\t" + "paddw %%mm4, %%mm1 \n\t" + "movq %%mm3, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "punpckhbw %%mm7, %%mm4 \n\t" + "paddw %%mm3, %%mm2 \n\t" + "paddw %%mm4, %%mm1 \n\t" + "movq (%3, %%eax), %%mm3 \n\t" + "movq (%3, %%eax), %%mm4 \n\t" + "paddw %%mm5, %%mm2 \n\t" + "paddw %%mm5, %%mm1 \n\t" + "psrlw $2, %%mm2 \n\t" + "psrlw $2, %%mm1 \n\t" + "packuswb %%mm1, %%mm2 \n\t" + "psubusb %%mm2, %%mm3 \n\t" + "psubusb %%mm4, %%mm2 \n\t" + "por %%mm3, %%mm2 \n\t" + "movq %%mm2, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm2 \n\t" + "paddw %%mm2, %%mm0 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "addl %4, %%eax \n\t" + " js 1b \n\t" + : "+a" (len) + : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" (stride) + ); } -int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h) +static inline int sum_mmx() { - const UINT8 *p1, *p2; - - p1 = blk1; - p2 = blk2; - pxor_r2r(mm7, mm7); /* mm7 is accumulator */ - pxor_r2r(mm6, mm6); /* mm7 is zero constant */ - movq_m2r(mm_wone, mm5); /* one constant */ - do { - sad_add_x2(p1, p2, p2 + 1); - sad_add_x2(p1 + 8, p2 + 8, p2 + 9); - p1 += lx; - p2 += lx; - } while (--h); - return sad_end(); + int ret; + asm volatile( + "movq %%mm6, %%mm0 \n\t" + "psrlq $32, %%mm6 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "movq %%mm6, %%mm0 \n\t" + "psrlq $16, %%mm6 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "movd %%mm6, %0 \n\t" + : "=r" (ret) + ); + return ret&0xFFFF; } -int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h) +static inline int sum_mmx2() { - const UINT8 *p1, *p2; - - p1 = blk1; - p2 = blk2; - pxor_r2r(mm7, mm7); /* mm7 is accumulator */ - pxor_r2r(mm6, mm6); /* mm7 is zero constant */ - movq_m2r(mm_wone, mm5); /* one constant */ - do { - sad_add_x2(p1, p2, p2 + lx); - sad_add_x2(p1 + 8, p2 + 8, p2 + 8 + lx); - p1 += lx; - p2 += lx; - } while (--h); - return sad_end(); + int ret; + asm volatile( + "movd %%mm6, %0 \n\t" + : "=r" (ret) + ); + return ret; } -/* mm7 is accumulator, mm6 is zero */ -static inline void sad_add_xy2(const UINT8 *p1, const UINT8 *p2, const UINT8 *p3) -{ - movq_m2r(*(p2 + 0), mm0); - movq_m2r(*(p3 + 0), mm1); - movq_r2r(mm0, mm2); - movq_r2r(mm1, mm3); - punpcklbw_r2r(mm6, mm0); /* extract 4 bytes low */ - punpcklbw_r2r(mm6, mm1); - punpckhbw_r2r(mm6, mm2); /* high */ - punpckhbw_r2r(mm6, mm3); - paddusw_r2r(mm1, mm0); - paddusw_r2r(mm3, mm2); - - movq_m2r(*(p2 + 1), mm1); - movq_m2r(*(p3 + 1), mm3); - movq_r2r(mm1, mm4); - punpcklbw_r2r(mm6, mm1); /* low */ - punpckhbw_r2r(mm6, mm4); /* high */ - paddusw_r2r(mm1, mm0); - paddusw_r2r(mm4, mm2); - movq_r2r(mm3, mm4); - punpcklbw_r2r(mm6, mm3); /* low */ - punpckhbw_r2r(mm6, mm4); /* high */ - paddusw_r2r(mm3, mm0); - paddusw_r2r(mm4, mm2); - - movq_m2r(*(p1 + 0), mm1); /* mm1 : other value */ - paddusw_r2r(mm5, mm0); /* + 2 */ - paddusw_r2r(mm5, mm2); /* + 2 */ - psrlw_i2r(2, mm0); - psrlw_i2r(2, mm2); - packuswb_r2r(mm2, mm0); /* average is in mm0 */ +#define PIX_SAD(suf)\ +int pix_abs8x8_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ +{\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t":);\ +\ + sad8_ ## suf(blk1, blk2, stride, 3);\ +\ + return sum_ ## suf();\ +}\ +\ +int pix_abs8x8_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ +{\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "movq %0, %%mm5 \n\t"\ + :: "m"(round_tab[1]) \ + );\ +\ + sad8_2_ ## suf(blk1, blk2+1, blk2, stride, 3);\ +\ + return sum_ ## suf();\ +}\ +\ +int pix_abs8x8_y2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ +{\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "movq %0, %%mm5 \n\t"\ + :: "m"(round_tab[1]) \ + );\ +\ + sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 3);\ +\ + return sum_ ## suf();\ +}\ +\ +int pix_abs8x8_xy2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ +{\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "movq %0, %%mm5 \n\t"\ + :: "m"(round_tab[2]) \ + );\ +\ + sad8_4_ ## suf(blk1, blk2, stride, 3);\ +\ + return sum_ ## suf();\ +}\ +\ +int pix_abs16x16_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ +{\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t":);\ +\ + sad8_ ## suf(blk1 , blk2 , stride, 4);\ + sad8_ ## suf(blk1+8, blk2+8, stride, 4);\ +\ + return sum_ ## suf();\ +}\ +int pix_abs16x16_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ +{\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "movq %0, %%mm5 \n\t"\ + :: "m"(round_tab[1]) \ + );\ +\ + sad8_2_ ## suf(blk1 , blk1+1, blk2 , stride, 4);\ + sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, 4);\ +\ + return sum_ ## suf();\ +}\ +int pix_abs16x16_y2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ +{\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "movq %0, %%mm5 \n\t"\ + :: "m"(round_tab[1]) \ + );\ +\ + sad8_2_ ## suf(blk1 , blk1+stride, blk2 , stride, 4);\ + sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, 4);\ +\ + return sum_ ## suf();\ +}\ +int pix_abs16x16_xy2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ +{\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "movq %0, %%mm5 \n\t"\ + :: "m"(round_tab[2]) \ + );\ +\ + sad8_4_ ## suf(blk1 , blk2 , stride, 4);\ + sad8_4_ ## suf(blk1+8, blk2+8, stride, 4);\ +\ + return sum_ ## suf();\ +}\ - movq_r2r(mm1, mm2); - psubusb_r2r(mm0, mm1); - psubusb_r2r(mm2, mm0); - por_r2r(mm1, mm0); /* mm0 is absolute value */ - - movq_r2r(mm0, mm1); - punpcklbw_r2r(mm6, mm0); - punpckhbw_r2r(mm6, mm1); - paddusw_r2r(mm0, mm7); - paddusw_r2r(mm1, mm7); -} - -int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h) -{ - const UINT8 *p1, *p2, *p3; - - p1 = blk1; - p2 = blk2; - p3 = blk2 + lx; - pxor_r2r(mm7, mm7); /* mm7 is accumulator */ - pxor_r2r(mm6, mm6); /* mm7 is zero constant */ - movq_m2r(mm_wtwo, mm5); /* one constant */ - do { - sad_add_xy2(p1, p2, p2 + lx); - sad_add_xy2(p1 + 8, p2 + 8, p2 + 8 + lx); - p1 += lx; - p2 += lx; - } while (--h); - return sad_end(); -} +PIX_SAD(mmx) +PIX_SAD(mmx2) diff --git a/src/libffmpeg/libavcodec/motion_est.c b/src/libffmpeg/libavcodec/motion_est.c index 084eb6038..92724ac87 100644 --- a/src/libffmpeg/libavcodec/motion_est.c +++ b/src/libffmpeg/libavcodec/motion_est.c @@ -16,6 +16,8 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at> */ #include "config.h" #include "xine-utils/xineutils.h" @@ -25,9 +27,14 @@ #include "dsputil.h" #include "mpegvideo.h" +#define ABS(a) ((a)>0 ? (a) : -(a)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#define INTER_BIAS 257 + static void halfpel_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax); + int xmin, int ymin, int xmax, int ymax, + int pred_x, int pred_y); /* config it to test motion vector encoding (send random vectors) */ //#define CONFIG_TEST_MV_ENCODE @@ -54,6 +61,28 @@ static int pix_sum(UINT8 * pix, int line_size) return s; } +static int pix_dev(UINT8 * pix, int line_size, int mean) +{ + int s, i, j; + + s = 0; + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j += 8) { + s += ABS(pix[0]-mean); + s += ABS(pix[1]-mean); + s += ABS(pix[2]-mean); + s += ABS(pix[3]-mean); + s += ABS(pix[4]-mean); + s += ABS(pix[5]-mean); + s += ABS(pix[6]-mean); + s += ABS(pix[7]-mean); + pix += 8; + } + pix += line_size - 16; + } + return s; +} + static int pix_norm1(UINT8 * pix, int line_size) { int s, i, j; @@ -138,7 +167,7 @@ static int full_motion_search(MpegEncContext * s, for (y = y1; y <= y2; y++) { for (x = x1; x <= x2; x++) { d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, - s->linesize, 16); + s->linesize); if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < @@ -202,7 +231,7 @@ static int log_motion_search(MpegEncContext * s, do { for (y = y1; y <= y2; y += range) { for (x = x1; x <= x2; x += range) { - d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize, 16); + d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize); if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { dmin = d; mx = x; @@ -282,7 +311,7 @@ static int phods_motion_search(MpegEncContext * s, lastx = x; for (x = x1; x <= x2; x += range) { - d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize, 16); + d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize); if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { dminx = d; mx = x; @@ -291,7 +320,7 @@ static int phods_motion_search(MpegEncContext * s, x = lastx; for (y = y1; y <= y2; y += range) { - d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize, 16); + d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize); if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { dminy = d; my = y; @@ -330,78 +359,474 @@ static int phods_motion_search(MpegEncContext * s, return dminy; } + +#define Z_THRESHOLD 256 + +#define CHECK_MV(x,y)\ +{\ + d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ + d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\ + if(d<dmin){\ + best[0]=x;\ + best[1]=y;\ + dmin=d;\ + }\ +} + +#define CHECK_MV_DIR(x,y,new_dir)\ +{\ + d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ + d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\ + if(d<dmin){\ + best[0]=x;\ + best[1]=y;\ + dmin=d;\ + next_dir= new_dir;\ + }\ +} + +#define CHECK_MV4(x,y)\ +{\ + d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ + d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\ + if(d<dmin){\ + best[0]=x;\ + best[1]=y;\ + dmin=d;\ + }\ +} + +#define CHECK_MV4_DIR(x,y,new_dir)\ +{\ + d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ + d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\ + if(d<dmin){\ + best[0]=x;\ + best[1]=y;\ + dmin=d;\ + next_dir= new_dir;\ + }\ +} + + +#define check(x,y,S,v)\ +if( (x)<(xmin<<(S)) ) printf("%d %d %d %d xmin" #v, (x), (y), s->mb_x, s->mb_y);\ +if( (x)>(xmax<<(S)) ) printf("%d %d %d %d xmax" #v, (x), (y), s->mb_x, s->mb_y);\ +if( (y)<(ymin<<(S)) ) printf("%d %d %d %d ymin" #v, (x), (y), s->mb_x, s->mb_y);\ +if( (y)>(ymax<<(S)) ) printf("%d %d %d %d ymax" #v, (x), (y), s->mb_x, s->mb_y);\ + + +static inline int small_diamond_search(MpegEncContext * s, int *best, int dmin, + UINT8 *new_pic, UINT8 *old_pic, int pic_stride, + int pred_x, int pred_y, UINT16 *mv_penalty, int quant, + int xmin, int ymin, int xmax, int ymax, int shift) +{ + int next_dir=-1; + + for(;;){ + int d; + const int dir= next_dir; + const int x= best[0]; + const int y= best[1]; + next_dir=-1; + +//printf("%d", dir); + if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0) + if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1) + if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2) + if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3) + + if(next_dir==-1){ + return dmin; + } + } + +/* for(;;){ + int d; + const int x= best[0]; + const int y= best[1]; + const int last_min=dmin; + if(x>xmin) CHECK_MV(x-1, y ) + if(y>xmin) CHECK_MV(x , y-1) + if(x<xmax) CHECK_MV(x+1, y ) + if(y<xmax) CHECK_MV(x , y+1) + if(x>xmin && y>ymin) CHECK_MV(x-1, y-1) + if(x>xmin && y<ymax) CHECK_MV(x-1, y+1) + if(x<xmax && y>ymin) CHECK_MV(x+1, y-1) + if(x<xmax && y<ymax) CHECK_MV(x+1, y+1) + if(x-1>xmin) CHECK_MV(x-2, y ) + if(y-1>xmin) CHECK_MV(x , y-2) + if(x+1<xmax) CHECK_MV(x+2, y ) + if(y+1<xmax) CHECK_MV(x , y+2) + if(x-1>xmin && y-1>ymin) CHECK_MV(x-2, y-2) + if(x-1>xmin && y+1<ymax) CHECK_MV(x-2, y+2) + if(x+1<xmax && y-1>ymin) CHECK_MV(x+2, y-2) + if(x+1<xmax && y+1<ymax) CHECK_MV(x+2, y+2) + if(dmin==last_min) return dmin; + } + */ +} + +static inline int small_diamond_search4MV(MpegEncContext * s, int *best, int dmin, + UINT8 *new_pic, UINT8 *old_pic, int pic_stride, + int pred_x, int pred_y, UINT16 *mv_penalty, int quant, + int xmin, int ymin, int xmax, int ymax, int shift) +{ + int next_dir=-1; + + for(;;){ + int d; + const int dir= next_dir; + const int x= best[0]; + const int y= best[1]; + next_dir=-1; + +//printf("%d", dir); + if(dir!=2 && x>xmin) CHECK_MV4_DIR(x-1, y , 0) + if(dir!=3 && y>ymin) CHECK_MV4_DIR(x , y-1, 1) + if(dir!=0 && x<xmax) CHECK_MV4_DIR(x+1, y , 2) + if(dir!=1 && y<ymax) CHECK_MV4_DIR(x , y+1, 3) + + if(next_dir==-1){ + return dmin; + } + } +} + +static inline int snake_search(MpegEncContext * s, int *best, int dmin, + UINT8 *new_pic, UINT8 *old_pic, int pic_stride, + int pred_x, int pred_y, UINT16 *mv_penalty, int quant, + int xmin, int ymin, int xmax, int ymax, int shift) +{ + int dir=0; + int c=1; + static int x_dir[8]= {1,1,0,-1,-1,-1, 0, 1}; + static int y_dir[8]= {0,1,1, 1, 0,-1,-1,-1}; + int fails=0; + int last_d[2]={dmin, dmin}; + +/*static int good=0; +static int bad=0; +static int point=0; + +point++; +if(256*256*256*64%point==0) +{ + printf("%d %d %d\n", good, bad, point); +}*/ + + for(;;){ + int x= best[0]; + int y= best[1]; + int d; + x+=x_dir[dir]; + y+=y_dir[dir]; + if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){ + d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride); + d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant; + }else{ + d = dmin + 10000; //FIXME smarter boundary handling + } + if(d<dmin){ + best[0]=x; + best[1]=y; + dmin=d; + + if(last_d[1] - last_d[0] > last_d[0] - d) c= -c; + dir+=c; + + fails=0; +//good++; + last_d[1]=last_d[0]; + last_d[0]=d; + }else{ +//bad++; + if(fails){ + if(fails>=3) return dmin; + }else{ + c= -c; + } + dir+=c*2; + fails++; + } + dir&=7; + } +} + +static int epzs_motion_search(MpegEncContext * s, + int *mx_ptr, int *my_ptr, + int P[5][2], int pred_x, int pred_y, + int xmin, int ymin, int xmax, int ymax) +{ + int best[2]={0, 0}; + int d, dmin; + UINT8 *new_pic, *old_pic; + const int pic_stride= s->linesize; + const int pic_xy= (s->mb_y*pic_stride + s->mb_x)*16; + UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame + int quant= s->qscale; // qscale of the prev frame + const int shift= 1+s->quarter_sample; + + new_pic = s->new_picture[0] + pic_xy; + old_pic = s->last_picture[0] + pic_xy; + + dmin = pix_abs16x16(new_pic, old_pic, pic_stride); + if(dmin<Z_THRESHOLD){ + *mx_ptr= 0; + *my_ptr= 0; +//printf("Z"); + return dmin; + } + + /* first line */ + if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line)) { + CHECK_MV(P[1][0]>>shift, P[1][1]>>shift) + }else{ + CHECK_MV(P[4][0]>>shift, P[4][1]>>shift) + if(dmin<Z_THRESHOLD){ + *mx_ptr= P[4][0]>>shift; + *my_ptr= P[4][1]>>shift; +//printf("M\n"); + return dmin; + } + CHECK_MV(P[1][0]>>shift, P[1][1]>>shift) + CHECK_MV(P[2][0]>>shift, P[2][1]>>shift) + CHECK_MV(P[3][0]>>shift, P[3][1]>>shift) + } + CHECK_MV(P[0][0]>>shift, P[0][1]>>shift) + +//check(best[0],best[1],0, b0) + if(s->full_search==ME_EPZS) + dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, + pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift); + else + dmin= snake_search(s, best, dmin, new_pic, old_pic, pic_stride, + pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift); +//check(best[0],best[1],0, b1) + *mx_ptr= best[0]; + *my_ptr= best[1]; + +// printf("%d %d %d \n", best[0], best[1], dmin); + return dmin; +} + +static int epzs_motion_search4(MpegEncContext * s, int block, + int *mx_ptr, int *my_ptr, + int P[6][2], int pred_x, int pred_y, + int xmin, int ymin, int xmax, int ymax) +{ + int best[2]={0, 0}; + int d, dmin; + UINT8 *new_pic, *old_pic; + const int pic_stride= s->linesize; + const int pic_xy= ((s->mb_y*2 + (block>>1))*pic_stride + s->mb_x*2 + (block&1))*8; + UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame + int quant= s->qscale; // qscale of the prev frame + const int shift= 1+s->quarter_sample; + + new_pic = s->new_picture[0] + pic_xy; + old_pic = s->last_picture[0] + pic_xy; + + dmin = pix_abs8x8(new_pic, old_pic, pic_stride); + + /* first line */ + if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) { + CHECK_MV4(P[1][0]>>shift, P[1][1]>>shift) + }else{ + CHECK_MV4(P[4][0]>>shift, P[4][1]>>shift) + if(dmin<Z_THRESHOLD){ + *mx_ptr= P[4][0]>>shift; + *my_ptr= P[4][1]>>shift; +//printf("M\n"); + return dmin; + } + CHECK_MV4(P[1][0]>>shift, P[1][1]>>shift) + CHECK_MV4(P[2][0]>>shift, P[2][1]>>shift) + CHECK_MV4(P[3][0]>>shift, P[3][1]>>shift) + } + CHECK_MV4(P[0][0]>>shift, P[0][1]>>shift) + CHECK_MV4(P[5][0]>>shift, P[5][1]>>shift) + +//check(best[0],best[1],0, b0) + dmin= small_diamond_search4MV(s, best, dmin, new_pic, old_pic, pic_stride, + pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift); +//check(best[0],best[1],0, b1) + *mx_ptr= best[0]; + *my_ptr= best[1]; + +// printf("%d %d %d \n", best[0], best[1], dmin); + return dmin; +} + +#define CHECK_HALF_MV(suffix, x, y) \ + d= pix_abs16x16_ ## suffix(pix, ptr+((x)>>1), s->linesize);\ + d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\ + if(d<dminh){\ + dminh= d;\ + mx= mx1 + x;\ + my= my1 + y;\ + } + +#define CHECK_HALF_MV4(suffix, x, y) \ + d= pix_abs8x8_ ## suffix(pix, ptr+((x)>>1), s->linesize);\ + d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\ + if(d<dminh){\ + dminh= d;\ + mx= mx1 + x;\ + my= my1 + y;\ + } + /* The idea would be to make half pel ME after Inter/Intra decision to save time. */ -static void halfpel_motion_search(MpegEncContext * s, +static inline void halfpel_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax) + int xmin, int ymin, int xmax, int ymax, + int pred_x, int pred_y) { + UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame + const int quant= s->qscale; + int pen_x, pen_y; int mx, my, mx1, my1, d, xx, yy, dminh; - UINT8 *pix; + UINT8 *pix, *ptr; - mx = *mx_ptr << 1; - my = *my_ptr << 1; + mx = *mx_ptr; + my = *my_ptr; + ptr = s->last_picture[0] + (my * s->linesize) + mx; xx = 16 * s->mb_x; yy = 16 * s->mb_y; - + pix = s->new_picture[0] + (yy * s->linesize) + xx; + dminh = dmin; - /* Half pixel search */ - mx1 = mx; - my1 = my; + if (mx > xmin && mx < xmax && + my > ymin && my < ymax) { - pix = s->new_picture[0] + (yy * s->linesize) + xx; + mx= mx1= 2*(mx - xx); + my= my1= 2*(my - yy); + if(dmin < Z_THRESHOLD && mx==0 && my==0){ + *mx_ptr = 0; + *my_ptr = 0; + return; + } + + pen_x= pred_x + mx; + pen_y= pred_y + my; + + ptr-= s->linesize; + CHECK_HALF_MV(xy2, -1, -1) + CHECK_HALF_MV(y2 , 0, -1) + CHECK_HALF_MV(xy2, +1, -1) + + ptr+= s->linesize; + CHECK_HALF_MV(x2 , -1, 0) + CHECK_HALF_MV(x2 , +1, 0) + CHECK_HALF_MV(xy2, -1, +1) + CHECK_HALF_MV(y2 , 0, +1) + CHECK_HALF_MV(xy2, +1, +1) + + }else{ + mx= 2*(mx - xx); + my= 2*(my - yy); + } - if ((mx > (xmin << 1)) && mx < (xmax << 1) && - (my > (ymin << 1)) && my < (ymax << 1)) { - int dx, dy, px, py; - UINT8 *ptr; - for (dy = -1; dy <= 1; dy++) { - for (dx = -1; dx <= 1; dx++) { - if (dx != 0 || dy != 0) { - px = mx1 + dx; - py = my1 + dy; - ptr = s->last_picture[0] + ((py >> 1) * s->linesize) + (px >> 1); - switch (((py & 1) << 1) | (px & 1)) { - default: - case 0: - d = pix_abs16x16(pix, ptr, s->linesize, 16); - break; - case 1: - d = pix_abs16x16_x2(pix, ptr, s->linesize, 16); - break; - case 2: - d = pix_abs16x16_y2(pix, ptr, s->linesize, 16); - break; - case 3: - d = pix_abs16x16_xy2(pix, ptr, s->linesize, 16); - break; - } - if (d < dminh) { - dminh = d; - mx = px; - my = py; - } - } - } + *mx_ptr = mx; + *my_ptr = my; +} + +static inline void halfpel_motion_search4(MpegEncContext * s, + int *mx_ptr, int *my_ptr, int dmin, + int xmin, int ymin, int xmax, int ymax, + int pred_x, int pred_y, int block_x, int block_y) +{ + UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame + const int quant= s->qscale; + int pen_x, pen_y; + int mx, my, mx1, my1, d, xx, yy, dminh; + UINT8 *pix, *ptr; + + xx = 8 * block_x; + yy = 8 * block_y; + pix = s->new_picture[0] + (yy * s->linesize) + xx; + + mx = *mx_ptr; + my = *my_ptr; + ptr = s->last_picture[0] + ((yy+my) * s->linesize) + xx + mx; + + dminh = dmin; + + if (mx > xmin && mx < xmax && + my > ymin && my < ymax) { + + mx= mx1= 2*mx; + my= my1= 2*my; + if(dmin < Z_THRESHOLD && mx==0 && my==0){ + *mx_ptr = 0; + *my_ptr = 0; + return; } + + pen_x= pred_x + mx; + pen_y= pred_y + my; + + ptr-= s->linesize; + CHECK_HALF_MV4(xy2, -1, -1) + CHECK_HALF_MV4(y2 , 0, -1) + CHECK_HALF_MV4(xy2, +1, -1) + + ptr+= s->linesize; + CHECK_HALF_MV4(x2 , -1, 0) + CHECK_HALF_MV4(x2 , +1, 0) + CHECK_HALF_MV4(xy2, -1, +1) + CHECK_HALF_MV4(y2 , 0, +1) + CHECK_HALF_MV4(xy2, +1, +1) + + }else{ + mx*=2; + my*=2; } - *mx_ptr = mx - (xx << 1); - *my_ptr = my - (yy << 1); - //fprintf(stderr,"half - MX: %d\tMY: %d\n",*mx_ptr ,*my_ptr); + *mx_ptr = mx; + *my_ptr = my; +} + +static inline void set_mv_tables(MpegEncContext * s, int mx, int my) +{ + const int xy= s->mb_x + s->mb_y*s->mb_width; + + s->mv_table[0][xy] = mx; + s->mv_table[1][xy] = my; + + /* has allready been set to the 4 MV if 4MV is done */ + if(!(s->flags&CODEC_FLAG_4MV)){ + int mot_xy= s->block_index[0]; + + s->motion_val[mot_xy ][0]= mx; + s->motion_val[mot_xy ][1]= my; + s->motion_val[mot_xy+1][0]= mx; + s->motion_val[mot_xy+1][1]= my; + + mot_xy += s->block_wrap[0]; + s->motion_val[mot_xy ][0]= mx; + s->motion_val[mot_xy ][1]= my; + s->motion_val[mot_xy+1][0]= mx; + s->motion_val[mot_xy+1][1]= my; + } } #ifndef CONFIG_TEST_MV_ENCODE -int estimate_motion(MpegEncContext * s, - int mb_x, int mb_y, - int *mx_ptr, int *my_ptr) +void estimate_motion(MpegEncContext * s, + int mb_x, int mb_y) { UINT8 *pix, *ppix; int sum, varc, vard, mx, my, range, dmin, xx, yy; int xmin, ymin, xmax, ymax; + int rel_xmin, rel_ymin, rel_xmax, rel_ymax; + int pred_x=0, pred_y=0; + int P[6][2]; + const int shift= 1+s->quarter_sample; + int mb_type=0; range = 8 * (1 << (s->f_code - 1)); /* XXX: temporary kludge to avoid overflow for msmpeg4 */ @@ -411,6 +836,8 @@ int estimate_motion(MpegEncContext * s, if (s->unrestricted_mv) { xmin = -16; ymin = -16; + if (s->h263_plus) + range *= 2; if(s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MPEG4){ xmax = s->mb_width*16; ymax = s->mb_height*16; @@ -426,7 +853,6 @@ int estimate_motion(MpegEncContext * s, xmax = s->mb_width*16 - 16; ymax = s->mb_height*16 - 16; } - switch(s->full_search) { case ME_ZERO: default: @@ -442,8 +868,116 @@ int estimate_motion(MpegEncContext * s, case ME_PHODS: dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax); break; + case ME_X1: + case ME_EPZS: + { + const int mot_stride = s->block_wrap[0]; + const int mot_xy = s->block_index[0]; + + rel_xmin= xmin - mb_x*16; + rel_xmax= xmax - mb_x*16; + rel_ymin= ymin - mb_y*16; + rel_ymax= ymax - mb_y*16; + + P[0][0] = s->motion_val[mot_xy ][0]; + P[0][1] = s->motion_val[mot_xy ][1]; + P[1][0] = s->motion_val[mot_xy - 1][0]; + P[1][1] = s->motion_val[mot_xy - 1][1]; + if(P[1][0] > (rel_xmax<<shift)) P[1][0]= (rel_xmax<<shift); + + /* special case for first line */ + if ((mb_y == 0 || s->first_slice_line || s->first_gob_line)) { + P[4][0] = P[1][0]; + P[4][1] = P[1][1]; + } else { + P[2][0] = s->motion_val[mot_xy - mot_stride ][0]; + P[2][1] = s->motion_val[mot_xy - mot_stride ][1]; + P[3][0] = s->motion_val[mot_xy - mot_stride + 2 ][0]; + P[3][1] = s->motion_val[mot_xy - mot_stride + 2 ][1]; + if(P[2][1] > (rel_ymax<<shift)) P[2][1]= (rel_ymax<<shift); + if(P[3][0] < (rel_xmin<<shift)) P[3][0]= (rel_xmin<<shift); + if(P[3][1] > (rel_ymax<<shift)) P[3][1]= (rel_ymax<<shift); + + P[4][0]= mid_pred(P[1][0], P[2][0], P[3][0]); + P[4][1]= mid_pred(P[1][1], P[2][1], P[3][1]); + } + if(s->out_format == FMT_H263){ + pred_x = P[4][0]; + pred_y = P[4][1]; + }else { /* mpeg1 at least */ + pred_x= P[1][0]; + pred_y= P[1][1]; + } + } + dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax); + + mx+= mb_x*16; + my+= mb_y*16; + break; + } + + if(s->flags&CODEC_FLAG_4MV){ + int block; + + mb_type|= MB_TYPE_INTER4V; + + for(block=0; block<4; block++){ + int mx4, my4; + int pred_x4, pred_y4; + int dmin4; + static const int off[4]= {2, 1, 1, -1}; + const int mot_stride = s->block_wrap[0]; + const int mot_xy = s->block_index[block]; + const int block_x= mb_x*2 + (block&1); + const int block_y= mb_y*2 + (block>>1); + + const int rel_xmin4= xmin - block_x*8; + const int rel_xmax4= xmax - block_x*8 + 8; + const int rel_ymin4= ymin - block_y*8; + const int rel_ymax4= ymax - block_y*8 + 8; + + P[0][0] = s->motion_val[mot_xy ][0]; + P[0][1] = s->motion_val[mot_xy ][1]; + P[1][0] = s->motion_val[mot_xy - 1][0]; + P[1][1] = s->motion_val[mot_xy - 1][1]; + if(P[1][0] > (rel_xmax4<<shift)) P[1][0]= (rel_xmax4<<shift); + + /* special case for first line */ + if ((mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) { + P[4][0] = P[1][0]; + P[4][1] = P[1][1]; + } else { + P[2][0] = s->motion_val[mot_xy - mot_stride ][0]; + P[2][1] = s->motion_val[mot_xy - mot_stride ][1]; + P[3][0] = s->motion_val[mot_xy - mot_stride + off[block]][0]; + P[3][1] = s->motion_val[mot_xy - mot_stride + off[block]][1]; + if(P[2][1] > (rel_ymax4<<shift)) P[2][1]= (rel_ymax4<<shift); + if(P[3][0] < (rel_xmin4<<shift)) P[3][0]= (rel_xmin4<<shift); + if(P[3][0] > (rel_xmax4<<shift)) P[3][0]= (rel_xmax4<<shift); + if(P[3][1] > (rel_ymax4<<shift)) P[3][1]= (rel_ymax4<<shift); + + P[4][0]= mid_pred(P[1][0], P[2][0], P[3][0]); + P[4][1]= mid_pred(P[1][1], P[2][1], P[3][1]); + } + if(s->out_format == FMT_H263){ + pred_x4 = P[4][0]; + pred_y4 = P[4][1]; + }else { /* mpeg1 at least */ + pred_x4= P[1][0]; + pred_y4= P[1][1]; + } + P[5][0]= mx - mb_x*16; + P[5][1]= my - mb_y*16; + + dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4); + + halfpel_motion_search4(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, + pred_x4, pred_y4, block_x, block_y); + + s->motion_val[ s->block_index[block] ][0]= mx4; + s->motion_val[ s->block_index[block] ][1]= my4; + } } - emms_c(); /* intra / predictive decision */ xx = mb_x * 16; @@ -452,36 +986,53 @@ int estimate_motion(MpegEncContext * s, pix = s->new_picture[0] + (yy * s->linesize) + xx; /* At this point (mx,my) are full-pell and the absolute displacement */ ppix = s->last_picture[0] + (my * s->linesize) + mx; - + sum = pix_sum(pix, s->linesize); - varc = pix_norm1(pix, s->linesize); - vard = pix_norm(pix, ppix, s->linesize); +#if 0 + varc = pix_dev(pix, s->linesize, (sum+128)>>8) + INTER_BIAS; + vard = pix_abs16x16(pix, ppix, s->linesize); +#else + sum= (sum+8)>>4; + varc = ((pix_norm1(pix, s->linesize) - sum*sum + 128 + 500)>>8); + vard = (pix_norm(pix, ppix, s->linesize)+128)>>8; +#endif - vard = vard >> 8; - sum = sum >> 8; - varc = (varc >> 8) - (sum * sum); s->mb_var[s->mb_width * mb_y + mb_x] = varc; - s->avg_mb_var += varc; - + s->avg_mb_var+= varc; + s->mc_mb_var += vard; + #if 0 printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n", varc, s->avg_mb_var, sum, vard, mx - xx, my - yy); #endif - if (vard <= 64 || vard < varc) { - if (s->full_search != ME_ZERO) { - halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax); - } else { - mx -= 16 * s->mb_x; - my -= 16 * s->mb_y; + if(s->flags&CODEC_FLAG_HQ){ + if (vard*2 + 200 > varc) + mb_type|= MB_TYPE_INTRA; + if (varc*2 + 200 > vard){ + mb_type|= MB_TYPE_INTER; + halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y); + }else{ + mx = mx*2 - mb_x*32; + my = my*2 - mb_y*32; + } + }else{ + if (vard <= 64 || vard < varc) { + mb_type|= MB_TYPE_INTER; + if (s->full_search != ME_ZERO) { + halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y); + } else { + mx -= 16 * mb_x; + my -= 16 * mb_y; + } + }else{ + mb_type|= MB_TYPE_INTRA; + mx = 0;//mx*2 - 32 * mb_x; + my = 0;//my*2 - 32 * mb_y; } - *mx_ptr = mx; - *my_ptr = my; - return 0; - } else { - *mx_ptr = 0; - *my_ptr = 0; - return 1; } + + s->mb_type[mb_y*s->mb_width + mb_x]= mb_type; + set_mv_tables(s, mx, my); } #else diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c index 41bf524e4..ac614d5ce 100644 --- a/src/libffmpeg/libavcodec/mpeg12.c +++ b/src/libffmpeg/libavcodec/mpeg12.c @@ -20,6 +20,7 @@ #include "avcodec.h" #include "dsputil.h" #include "mpegvideo.h" +#include "xineutils.h" #include "mpeg12data.h" @@ -51,6 +52,9 @@ static int mpeg2_decode_block_intra(MpegEncContext *s, int n); static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred); +static UINT16 mv_penalty[MAX_FCODE+1][MAX_MV*2+1]; +static UINT8 fcode_tab[MAX_MV*2+1]; + static void put_header(MpegEncContext *s, int header) { align_put_bits(&s->pb); @@ -66,7 +70,7 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s) int n; UINT64 time_code; - if ((s->picture_number % s->gop_size) == 0) { + if (s->picture_in_gop_number == 0) { /* mpeg1 header repeated every gop */ put_header(s, SEQ_START_CODE); @@ -129,7 +133,6 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s) } } - s->fake_picture_number++; } @@ -226,6 +229,7 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number) /* temporal reference */ put_bits(&s->pb, 10, (s->fake_picture_number - s->gop_picture_number) & 0x3ff); + s->fake_picture_number++; put_bits(&s->pb, 3, s->pict_type); put_bits(&s->pb, 16, 0xffff); /* non constant bit rate */ @@ -353,6 +357,53 @@ static void mpeg1_encode_motion(MpegEncContext *s, int val) } } +void mpeg1_encode_init(MpegEncContext *s) +{ + static int done=0; + if(!done){ + int f_code; + int mv; + + done=1; + for(f_code=1; f_code<=MAX_FCODE; f_code++){ + for(mv=-MAX_MV; mv<=MAX_MV; mv++){ + int len; + + if(mv==0) len= mbMotionVectorTable[0][1]; + else{ + int val, bit_size, range, code; + + bit_size = s->f_code - 1; + range = 1 << bit_size; + + val=mv; + if (val < 0) + val = -val; + val--; + code = (val >> bit_size) + 1; + if(code<17){ + len= mbMotionVectorTable[code][1] + 1 + bit_size; + }else{ + len= mbMotionVectorTable[16][1] + 2 + bit_size; + } + } + + mv_penalty[f_code][mv+MAX_MV]= len; + } + } + + + for(f_code=MAX_FCODE; f_code>0; f_code--){ + for(mv=-(8<<f_code); mv<(8<<f_code); mv++){ + fcode_tab[mv+MAX_MV]= f_code; + } + } + } + s->mv_penalty= mv_penalty; + + s->fcode_tab= fcode_tab; +} + static inline void encode_dc(MpegEncContext *s, int diff, int component) { if (component == 0) { @@ -1119,6 +1170,7 @@ typedef struct Mpeg1Context { UINT8 *buf_ptr; int buffer_size; int mpeg_enc_ctx_allocated; /* true if decoding context allocated */ + int repeat_field; /* true if we must repeat the field */ } Mpeg1Context; static int mpeg_decode_init(AVCodecContext *avctx) @@ -1131,6 +1183,7 @@ static int mpeg_decode_init(AVCodecContext *avctx) s->start_code = -1; s->buf_ptr = s->buffer; s->mpeg_enc_ctx.picture_number = 0; + s->repeat_field = 0; return 0; } @@ -1203,7 +1256,7 @@ static void mpeg_decode_sequence_extension(MpegEncContext *s) int frame_rate_ext_n, frame_rate_ext_d; skip_bits(&s->gb, 8); /* profil and level */ - skip_bits(&s->gb, 1); /* progressive_sequence */ + s->progressive_sequence = get_bits1(&s->gb); /* progressive_sequence */ skip_bits(&s->gb, 2); /* chroma_format */ horiz_size_ext = get_bits(&s->gb, 2); vert_size_ext = get_bits(&s->gb, 2); @@ -1279,12 +1332,13 @@ static void mpeg_decode_picture_coding_extension(MpegEncContext *s) s->chroma_420_type = get_bits1(&s->gb); s->progressive_frame = get_bits1(&s->gb); /* composite display not parsed */ - dprintf("intra_dc_precion=%d\n", s->intra_dc_precision); + dprintf("intra_dc_precision=%d\n", s->intra_dc_precision); dprintf("picture_structure=%d\n", s->picture_structure); dprintf("conceal=%d\n", s->concealment_motion_vectors); dprintf("intra_vlc_format=%d\n", s->intra_vlc_format); dprintf("alternate_scan=%d\n", s->alternate_scan); dprintf("frame_pred_frame_dct=%d\n", s->frame_pred_frame_dct); + dprintf("progressive_frame=%d\n", s->progressive_frame); } static void mpeg_decode_extension(AVCodecContext *avctx, @@ -1349,7 +1403,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx, } for(;;) { - memset(s->block, 0, sizeof(s->block)); + clear_blocks(s->block[0]); ret = mpeg_decode_mb(s, s->block); dprintf("ret=%d\n", ret); if (ret < 0) @@ -1358,7 +1412,8 @@ static int mpeg_decode_slice(AVCodecContext *avctx, break; MPV_decode_mb(s, s->block); } - + emms_c(); + /* end of slice reached */ if (s->mb_x == (s->mb_width - 1) && s->mb_y == (s->mb_height - 1)) { @@ -1434,6 +1489,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, avctx->width = width; avctx->height = height; avctx->frame_rate = frame_rate_tab[s->frame_rate_index]; + s->frame_rate = avctx->frame_rate; avctx->bit_rate = s->bit_rate; if (MPV_common_init(s) < 0) @@ -1505,13 +1561,14 @@ static int mpeg_decode_frame(AVCodecContext *avctx, UINT8 *buf_end, *buf_ptr, *buf_start; int len, start_code_found, ret, code, start_code, input_size; AVPicture *picture = data; - + MpegEncContext *s2 = &s->mpeg_enc_ctx; + dprintf("fill_buffer\n"); *data_size = 0; + /* special case for last picture */ if (buf_size == 0) { - MpegEncContext *s2 = &s->mpeg_enc_ctx; if (s2->picture_number > 0) { picture->data[0] = s2->next_picture[0]; picture->data[1] = s2->next_picture[1]; @@ -1526,6 +1583,15 @@ static int mpeg_decode_frame(AVCodecContext *avctx, buf_ptr = buf; buf_end = buf + buf_size; + + if (s->repeat_field % 2 == 1) { + s->repeat_field++; + //fprintf(stderr,"\nRepeating last frame: %d -> %d! pict: %d %d", avctx->frame_number-1, avctx->frame_number, + // s2->picture_number, s->repeat_field); + *data_size = sizeof(AVPicture); + goto the_end; + } + while (buf_ptr < buf_end) { buf_start = buf_ptr; /* find start next code */ @@ -1574,6 +1640,14 @@ static int mpeg_decode_frame(AVCodecContext *avctx, start_code, s->buffer, input_size); if (ret == 1) { /* got a picture: exit */ + /* first check if we must repeat the frame */ + if (s2->progressive_frame && s2->repeat_first_field) { + //fprintf(stderr,"\nRepeat this frame: %d! pict: %d",avctx->frame_number,s2->picture_number); + s2->repeat_first_field = 0; + s2->progressive_frame = 0; + if (++s->repeat_field > 2) + s->repeat_field = 0; + } *data_size = sizeof(AVPicture); goto the_end; } diff --git a/src/libffmpeg/libavcodec/mpeg4data.h b/src/libffmpeg/libavcodec/mpeg4data.h index 4eed75654..91b99625f 100644 --- a/src/libffmpeg/libavcodec/mpeg4data.h +++ b/src/libffmpeg/libavcodec/mpeg4data.h @@ -12,13 +12,13 @@ #define GMC_SPRITE 2 /* dc encoding for mpeg4 */ -static const UINT8 DCtab_lum[13][2] = +const UINT8 DCtab_lum[13][2] = { {3,3}, {3,2}, {2,2}, {2,3}, {1,3}, {1,4}, {1,5}, {1,6}, {1,7}, {1,8}, {1,9}, {1,10}, {1,11}, }; -static const UINT8 DCtab_chrom[13][2] = +const UINT8 DCtab_chrom[13][2] = { {3,2}, {2,2}, {1,2}, {1,3}, {1,4}, {1,5}, {1,6}, {1,7}, {1,8}, {1,9}, {1,10}, {1,11}, {1,12}, @@ -99,3 +99,26 @@ static const UINT16 sprite_trajectory_tab[15][2] = { {0x0E, 4}, {0x1E, 5}, {0x3E, 6}, {0x7E, 7}, {0xFE, 8}, {0x1FE, 9},{0x3FE, 10},{0x7FE, 11},{0xFFE, 12}, }; + +static const UINT8 mb_type_b_tab[4][2] = { + {1, 1}, {1, 2}, {1, 3}, {1, 4}, +}; + +static const UINT16 pixel_aspect[16][2]={ + {0, 0}, + {1, 1}, + {12, 11}, + {10, 11}, + {16, 11}, + {40, 33}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, +}; diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c index a8297a2c1..9f572c3d9 100644 --- a/src/libffmpeg/libavcodec/mpegvideo.c +++ b/src/libffmpeg/libavcodec/mpegvideo.c @@ -15,19 +15,20 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * 4MV & hq encoding stuff by Michael Niedermayer <michaelni@gmx.at> */ #include <stdlib.h> #include <stdio.h> #include <math.h> #include <string.h> - -#include "config.h" -#include "xine-utils/xineutils.h" - #include "avcodec.h" #include "dsputil.h" #include "mpegvideo.h" +#include "config.h" +#include "xine-utils/xineutils.h" + #ifdef USE_FASTMEMCPY #include "fastmemcpy.h" #endif @@ -71,6 +72,9 @@ static UINT8 h263_chroma_roundtab[16] = { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, }; +static UINT16 default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1]; +static UINT8 default_fcode_tab[MAX_MV*2+1]; + /* default motion estimation */ int motion_estimation_method = ME_LOG; @@ -175,7 +179,7 @@ int MPV_common_init(MpegEncContext *s) } } - if (s->out_format == FMT_H263) { + if (s->out_format == FMT_H263 || s->encoding) { int size; /* MV prediction */ size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2); @@ -228,6 +232,8 @@ int MPV_common_init(MpegEncContext *s) if (!s->mbskip_table) goto fail; } + + s->block= s->intra_block; s->context_initialized = 1; return 0; @@ -280,6 +286,7 @@ int MPV_encode_init(AVCodecContext *avctx) int i; s->bit_rate = avctx->bit_rate; + s->bit_rate_tolerance = avctx->bit_rate_tolerance; s->frame_rate = avctx->frame_rate; s->width = avctx->width; s->height = avctx->height; @@ -288,7 +295,14 @@ int MPV_encode_init(AVCodecContext *avctx) s->rtp_payload_size = avctx->rtp_payload_size; if (avctx->rtp_callback) s->rtp_callback = avctx->rtp_callback; + s->qmin= avctx->qmin; + s->qmax= avctx->qmax; + s->max_qdiff= avctx->max_qdiff; + s->qcompress= avctx->qcompress; + s->qblur= avctx->qblur; s->avctx = avctx; + s->aspect_ratio_info= avctx->aspect_ratio_info; + s->flags= avctx->flags; if (s->gop_size <= 1) { s->intra_only = 1; @@ -344,18 +358,59 @@ int MPV_encode_init(AVCodecContext *avctx) s->h263_pred = 1; s->unrestricted_mv = 1; break; - case CODEC_ID_MSMPEG4: + case CODEC_ID_MSMPEG4V1: + s->out_format = FMT_H263; + s->h263_msmpeg4 = 1; + s->h263_pred = 1; + s->unrestricted_mv = 1; + s->msmpeg4_version= 1; + break; + case CODEC_ID_MSMPEG4V2: s->out_format = FMT_H263; s->h263_msmpeg4 = 1; s->h263_pred = 1; s->unrestricted_mv = 1; + s->msmpeg4_version= 2; + break; + case CODEC_ID_MSMPEG4V3: + s->out_format = FMT_H263; + s->h263_msmpeg4 = 1; + s->h263_pred = 1; + s->unrestricted_mv = 1; + s->msmpeg4_version= 3; break; default: return -1; } + + if((s->flags&CODEC_FLAG_4MV) && !(s->flags&CODEC_FLAG_HQ)){ + printf("4MV is currently only supported in HQ mode\n"); + return -1; + } + + { /* set up some save defaults, some codecs might override them later */ + static int done=0; + if(!done){ + int i; + done=1; + memset(default_mv_penalty, 0, sizeof(UINT16)*(MAX_FCODE+1)*(2*MAX_MV+1)); + memset(default_fcode_tab , 0, sizeof(UINT8)*(2*MAX_MV+1)); + + for(i=-16; i<16; i++){ + default_fcode_tab[i + MAX_MV]= 1; + } + } + } + s->mv_penalty= default_mv_penalty; + s->fcode_tab= default_fcode_tab; if (s->out_format == FMT_H263) - h263_encode_init_vlc(s); + h263_encode_init(s); + else if (s->out_format == FMT_MPEG1) + mpeg1_encode_init(s); + + /* dont use mv_penalty table for crap MV as it would be confused */ + if(s->full_search<4) s->mv_penalty= default_mv_penalty; s->encoding = 1; @@ -373,6 +428,7 @@ int MPV_encode_init(AVCodecContext *avctx) rate_control_init(s); s->picture_number = 0; + s->picture_in_gop_number = 0; s->fake_picture_number = 0; /* motion detector init */ s->f_code = 1; @@ -434,6 +490,7 @@ void MPV_frame_start(MpegEncContext *s) s->current_picture[i] = s->aux_picture[i]; } } else { + s->last_non_b_pict_type= s->pict_type; for(i=0;i<3;i++) { /* swap next and last */ tmp = s->last_picture[i]; @@ -475,16 +532,18 @@ int MPV_encode_picture(AVCodecContext *avctx, init_put_bits(&s->pb, buf, buf_size, NULL, NULL); + s->force_type= (avctx->flags&CODEC_FLAG_TYPE) ? + (avctx->key_frame ? I_TYPE : P_TYPE) : 0; if (!s->intra_only) { /* first picture of GOP is intra */ - if ((s->picture_number % s->gop_size) == 0) + if (s->picture_in_gop_number % s->gop_size==0 || s->force_type==I_TYPE){ + s->picture_in_gop_number=0; s->pict_type = I_TYPE; - else + }else s->pict_type = P_TYPE; } else { s->pict_type = I_TYPE; } - avctx->key_frame = (s->pict_type == I_TYPE); MPV_frame_start(s); @@ -515,15 +574,30 @@ int MPV_encode_picture(AVCodecContext *avctx, } encode_picture(s, s->picture_number); - + avctx->key_frame = (s->pict_type == I_TYPE); + avctx->header_bits = s->header_bits; + avctx->mv_bits = s->mv_bits; + avctx->misc_bits = s->misc_bits; + avctx->i_tex_bits = s->i_tex_bits; + avctx->p_tex_bits = s->p_tex_bits; + avctx->i_count = s->i_count; + avctx->p_count = s->p_count; + avctx->skip_count = s->skip_count; + MPV_frame_end(s); s->picture_number++; + s->picture_in_gop_number++; if (s->out_format == FMT_MJPEG) mjpeg_picture_trailer(s); flush_put_bits(&s->pb); - s->total_bits += (pbBufPtr(&s->pb) - s->pb.buf) * 8; + s->last_frame_bits= s->frame_bits; + s->frame_bits = (pbBufPtr(&s->pb) - s->pb.buf) * 8; + s->total_bits += s->frame_bits; + avctx->frame_bits = s->frame_bits; +//printf("fcode: %d, type: %d, head: %d, mv: %d, misc: %d, frame: %d, itex: %d, ptex: %d\n", +//s->f_code, avctx->key_frame, s->header_bits, s->mv_bits, s->misc_bits, s->frame_bits, s->i_tex_bits, s->p_tex_bits); avctx->quality = s->qscale; if (avctx->get_psnr) { @@ -555,7 +629,7 @@ static inline void gmc1_motion(MpegEncContext *s, int dxy, offset, mx, my, src_x, src_y, height, linesize; int motion_x, motion_y; - if(s->real_sprite_warping_points>1) printf("Oops, thats bad, contact the developers\n"); + if(s->real_sprite_warping_points>1) printf("more than 1 warp point isnt supported\n"); motion_x= s->sprite_offset[0][0]; motion_y= s->sprite_offset[0][1]; src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1)); @@ -749,7 +823,7 @@ static inline void MPV_motion(MpegEncContext *s, ref_picture, 0, 16); #endif - }else if(s->quarter_sample){ + }else if(s->quarter_sample && dir==0){ //FIXME qpel_motion(s, dest_y, dest_cb, dest_cr, 0, ref_picture, 0, 0, pix_op, qpix_op, @@ -768,7 +842,7 @@ static inline void MPV_motion(MpegEncContext *s, dxy = ((motion_y & 1) << 1) | (motion_x & 1); src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8; - src_y = mb_y * 16 + (motion_y >> 1) + ((i >> 1) & 1) * 8; + src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8; /* WARNING: do no forget half pels */ src_x = clip(src_x, -16, s->width); @@ -934,8 +1008,9 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) else if (s->h263_pred || s->h263_aic) s->mbintra_table[mb_x + mb_y*s->mb_width]=1; - /* update motion predictor */ + /* update motion predictor, not for B-frames as they need the motion_val from the last P/S-Frame */ if (s->out_format == FMT_H263) { + if(s->pict_type!=B_TYPE){ int xy, wrap, motion_x, motion_y; wrap = 2 * s->mb_width + 2; @@ -958,6 +1033,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) s->motion_val[xy + 1 + wrap][0] = motion_x; s->motion_val[xy + 1 + wrap][1] = motion_y; } + } } if (!s->intra_only) { @@ -1031,16 +1107,326 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) } } the_end: - emms_c(); + emms_c(); //FIXME remove } -static void encode_picture(MpegEncContext *s, int picture_number) +static void encode_mb(MpegEncContext *s) { - int mb_x, mb_y, wrap, last_gob, pdif = 0; + int wrap; + const int mb_x= s->mb_x; + const int mb_y= s->mb_y; UINT8 *ptr; - int i, motion_x, motion_y; + const int motion_x= s->mv[0][0][0]; + const int motion_y= s->mv[0][0][1]; + int i; + + /* get the pixels */ + wrap = s->linesize; + ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16; + get_pixels(s->block[0], ptr, wrap); + get_pixels(s->block[1], ptr + 8, wrap); + get_pixels(s->block[2], ptr + 8 * wrap, wrap); + get_pixels(s->block[3], ptr + 8 * wrap + 8, wrap); + wrap = s->linesize >> 1; + ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8; + get_pixels(s->block[4], ptr, wrap); + + wrap = s->linesize >> 1; + ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8; + get_pixels(s->block[5], ptr, wrap); + + /* subtract previous frame if non intra */ + if (!s->mb_intra) { + int dxy, offset, mx, my; + + if(s->mv_type==MV_TYPE_16X16){ + dxy = ((motion_y & 1) << 1) | (motion_x & 1); + ptr = s->last_picture[0] + + ((mb_y * 16 + (motion_y >> 1)) * s->linesize) + + (mb_x * 16 + (motion_x >> 1)); + + sub_pixels_2(s->block[0], ptr, s->linesize, dxy); + sub_pixels_2(s->block[1], ptr + 8, s->linesize, dxy); + sub_pixels_2(s->block[2], ptr + s->linesize * 8, s->linesize, dxy); + sub_pixels_2(s->block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy); + + if (s->out_format == FMT_H263) { + /* special rounding for h263 */ + dxy = 0; + if ((motion_x & 3) != 0) + dxy |= 1; + if ((motion_y & 3) != 0) + dxy |= 2; + mx = motion_x >> 2; + my = motion_y >> 2; + } else { + mx = motion_x / 2; + my = motion_y / 2; + dxy = ((my & 1) << 1) | (mx & 1); + mx >>= 1; + my >>= 1; + } + offset = ((mb_y * 8 + my) * (s->linesize >> 1)) + (mb_x * 8 + mx); + ptr = s->last_picture[1] + offset; + sub_pixels_2(s->block[4], ptr, s->linesize >> 1, dxy); + ptr = s->last_picture[2] + offset; + sub_pixels_2(s->block[5], ptr, s->linesize >> 1, dxy); + }else{ + int src_x, src_y; + + for(i=0;i<4;i++) { + int motion_x = s->mv[0][i][0]; + int motion_y = s->mv[0][i][1]; + + dxy = ((motion_y & 1) << 1) | (motion_x & 1); + src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8; + src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8; + + ptr = s->last_picture[0] + (src_y * s->linesize) + (src_x); + sub_pixels_2(s->block[i], ptr, s->linesize, dxy); + } + /* In case of 8X8, we construct a single chroma motion vector + with a special rounding */ + mx = 0; + my = 0; + for(i=0;i<4;i++) { + mx += s->mv[0][i][0]; + my += s->mv[0][i][1]; + } + if (mx >= 0) + mx = (h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1)); + else { + mx = -mx; + mx = -(h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1)); + } + if (my >= 0) + my = (h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1)); + else { + my = -my; + my = -(h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1)); + } + dxy = ((my & 1) << 1) | (mx & 1); + mx >>= 1; + my >>= 1; + + src_x = mb_x * 8 + mx; + src_y = mb_y * 8 + my; + src_x = clip(src_x, -8, s->width/2); + if (src_x == s->width/2) + dxy &= ~1; + src_y = clip(src_y, -8, s->height/2); + if (src_y == s->height/2) + dxy &= ~2; + + offset = (src_y * (s->linesize >> 1)) + src_x; + ptr = s->last_picture[1] + offset; + sub_pixels_2(s->block[4], ptr, s->linesize >> 1, dxy); + ptr = s->last_picture[2] + offset; + sub_pixels_2(s->block[5], ptr, s->linesize >> 1, dxy); + } + } + +#if 0 + { + float adap_parm; + + adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_width*mb_y+mb_x] + 1.0) / + ((s->mb_var[s->mb_width*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0); + + printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d", + (s->mb_type[s->mb_width*mb_y+mb_x] > 0) ? 'I' : 'P', + s->qscale, adap_parm, s->qscale*adap_parm, + s->mb_var[s->mb_width*mb_y+mb_x], s->avg_mb_var); + } +#endif + /* DCT & quantize */ + if (s->h263_msmpeg4) { + msmpeg4_dc_scale(s); + } else if (s->h263_pred) { + h263_dc_scale(s); + } else { + /* default quantization values */ + s->y_dc_scale = 8; + s->c_dc_scale = 8; + } + for(i=0;i<6;i++) { + s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale); + } + + /* huffman encode */ + switch(s->out_format) { + case FMT_MPEG1: + mpeg1_encode_mb(s, s->block, motion_x, motion_y); + break; + case FMT_H263: + if (s->h263_msmpeg4) + msmpeg4_encode_mb(s, s->block, motion_x, motion_y); + else if(s->h263_pred) + mpeg4_encode_mb(s, s->block, motion_x, motion_y); + else + h263_encode_mb(s, s->block, motion_x, motion_y); + break; + case FMT_MJPEG: + mjpeg_encode_mb(s, s->block); + break; + } +} + +static void copy_bits(PutBitContext *pb, UINT8 *src, int length) +{ + int bytes= length>>3; + int bits= length&7; + int i; + + for(i=0; i<bytes; i++) put_bits(pb, 8, src[i]); + put_bits(pb, bits, src[i]>>(8-bits)); +} + +static void encode_picture(MpegEncContext *s, int picture_number) +{ + int mb_x, mb_y, last_gob, pdif = 0; + int i; + int bits; + MpegEncContext best_s; + UINT8 bit_buf[4][3000]; //FIXME check that this is ALLWAYS large enogh for a MB s->picture_number = picture_number; + + s->block_wrap[0]= + s->block_wrap[1]= + s->block_wrap[2]= + s->block_wrap[3]= s->mb_width*2 + 2; + s->block_wrap[4]= + s->block_wrap[5]= s->mb_width + 2; + + s->last_mc_mb_var = s->mc_mb_var; + /* Reset the average MB variance */ + s->avg_mb_var = 0; + s->mc_mb_var = 0; + /* Estimate motion for every MB */ + if(s->pict_type == P_TYPE){ + for(mb_y=0; mb_y < s->mb_height; mb_y++) { + s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1; + s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1); + s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1; + s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2); + for(mb_x=0; mb_x < s->mb_width; mb_x++) { + s->mb_x = mb_x; + s->mb_y = mb_y; + s->block_index[0]+=2; + s->block_index[1]+=2; + s->block_index[2]+=2; + s->block_index[3]+=2; + + /* compute motion vector & mb_type and store in context */ + estimate_motion(s, mb_x, mb_y); +// s->mb_type[mb_y*s->mb_width + mb_x]=MB_TYPE_INTER; + } + } + emms_c(); + }else{ + /* I-Frame */ + //FIXME do we need to zero them? + memset(s->motion_val[0], 0, sizeof(INT16)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2); + memset(s->mv_table[0] , 0, sizeof(INT16)*s->mb_width*s->mb_height); + memset(s->mv_table[1] , 0, sizeof(INT16)*s->mb_width*s->mb_height); + memset(s->mb_type , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height); + } + + if(s->avg_mb_var < s->mc_mb_var && s->pict_type != B_TYPE && (!s->force_type)){ //FIXME subtract MV bits + s->pict_type= I_TYPE; + s->picture_in_gop_number=0; + memset(s->mb_type , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height); +//printf("Scene change detected, encoding as I Frame\n"); + } + + /* find best f_code for ME which do unlimited searches */ + if(s->pict_type==P_TYPE && s->full_search>3){ + int mv_num[8]; + int i; + int loose=0; + UINT8 * fcode_tab= s->fcode_tab; + + for(i=0; i<8; i++) mv_num[i]=0; + + for(i=0; i<s->mb_num; i++){ + if(s->mb_type[i] & MB_TYPE_INTER){ + mv_num[ fcode_tab[s->mv_table[0][i] + MAX_MV] ]++; + mv_num[ fcode_tab[s->mv_table[1][i] + MAX_MV] ]++; +//printf("%d %d %d\n", s->mv_table[0][i], fcode_tab[s->mv_table[0][i] + MAX_MV], i); + } +//else printf("I"); + } + + for(i=MAX_FCODE; i>1; i--){ + loose+= mv_num[i]; + if(loose > 10) break; //FIXME this is pretty ineffective + } + s->f_code= i; +/* for(i=0; i<=MAX_FCODE; i++){ + printf("%d ", mv_num[i]); + } + printf("\n");*/ + }else{ + s->f_code= 1; + } + +//printf("f_code %d ///\n", s->f_code); + /* convert MBs with too long MVs to I-Blocks */ + if(s->pict_type==P_TYPE){ + int i, x, y; + const int f_code= s->f_code; + UINT8 * fcode_tab= s->fcode_tab; +//FIXME try to clip instead of intra izing ;) + /* clip / convert to intra 16x16 type MVs */ + for(i=0; i<s->mb_num; i++){ + if(s->mb_type[i]&MB_TYPE_INTER){ + if( fcode_tab[s->mv_table[0][i] + MAX_MV] > f_code + || fcode_tab[s->mv_table[0][i] + MAX_MV] == 0 + || fcode_tab[s->mv_table[1][i] + MAX_MV] > f_code + || fcode_tab[s->mv_table[1][i] + MAX_MV] == 0 ){ + s->mb_type[i] &= ~MB_TYPE_INTER; + s->mb_type[i] |= MB_TYPE_INTRA; + s->mv_table[0][i] = 0; + s->mv_table[1][i] = 0; + } + } + } + + if(s->flags&CODEC_FLAG_4MV){ + int wrap= 2+ s->mb_width*2; + + /* clip / convert to intra 8x8 type MVs */ + for(y=0; y<s->mb_height; y++){ + int xy= (y*2 + 1)*wrap + 1; + i= y*s->mb_width; + + for(x=0; x<s->mb_width; x++){ + if(s->mb_type[i]&MB_TYPE_INTER4V){ + int block; + for(block=0; block<4; block++){ + int off= (block& 1) + (block>>1)*wrap; + int mx= s->motion_val[ xy + off ][0]; + int my= s->motion_val[ xy + off ][1]; + + if( fcode_tab[mx + MAX_MV] > f_code + || fcode_tab[mx + MAX_MV] == 0 + || fcode_tab[my + MAX_MV] > f_code + || fcode_tab[my + MAX_MV] == 0 ){ + s->mb_type[i] &= ~MB_TYPE_INTER4V; + s->mb_type[i] |= MB_TYPE_INTRA; + } + } + xy+=2; + i++; + } + } + } + } + } + +// printf("%d %d\n", s->avg_mb_var, s->mc_mb_var); + if (!s->fixed_qscale) s->qscale = rate_estimate_qscale(s); @@ -1056,6 +1442,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) convert_matrix(s->q_non_intra_matrix, s->q_non_intra_matrix16, s->non_intra_matrix, s->qscale); } + s->last_bits= get_bit_count(&s->pb); switch(s->out_format) { case FMT_MJPEG: mjpeg_picture_header(s); @@ -1074,7 +1461,17 @@ static void encode_picture(MpegEncContext *s, int picture_number) mpeg1_encode_picture_header(s, picture_number); break; } - + bits= get_bit_count(&s->pb); + s->header_bits= bits - s->last_bits; + s->last_bits= bits; + s->mv_bits=0; + s->misc_bits=0; + s->i_tex_bits=0; + s->p_tex_bits=0; + s->i_count=0; + s->p_count=0; + s->skip_count=0; + /* init last dc values */ /* note: quant matrix value (8) is implied here */ s->last_dc[0] = 128; @@ -1083,8 +1480,6 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mb_incr = 1; s->last_mv[0][0][0] = 0; s->last_mv[0][0][1] = 0; - s->mv_type = MV_TYPE_16X16; - s->mv_dir = MV_DIR_FORWARD; /* Get the GOB height based on picture height */ if (s->out_format == FMT_H263 && !s->h263_pred && !s->h263_msmpeg4) { @@ -1095,33 +1490,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) else s->gob_index = 4; } - - /* Reset the average MB variance */ - s->avg_mb_var = 0; - - /* Estimate motion for every MB */ - for(mb_y=0; mb_y < s->mb_height; mb_y++) { - for(mb_x=0; mb_x < s->mb_width; mb_x++) { - s->mb_x = mb_x; - s->mb_y = mb_y; - - /* compute motion vector and macro block type (intra or non intra) */ - motion_x = 0; - motion_y = 0; - if (s->pict_type == P_TYPE) { - s->mb_intra = estimate_motion(s, mb_x, mb_y, - &motion_x, - &motion_y); - } else { - s->mb_intra = 1; - } - /* Store MB type and MV */ - s->mb_type[mb_y * s->mb_width + mb_x] = s->mb_intra; - s->mv_table[0][mb_y * s->mb_width + mb_x] = motion_x; - s->mv_table[1][mb_y * s->mb_width + mb_x] = motion_y; - } - } - + s->avg_mb_var = s->avg_mb_var / s->mb_num; for(mb_y=0; mb_y < s->mb_height; mb_y++) { @@ -1139,127 +1508,134 @@ static void encode_picture(MpegEncContext *s, int picture_number) } } + s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1; + s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1); + s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1; + s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2); + s->block_index[4]= s->block_wrap[4]*(mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2); + s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2); for(mb_x=0; mb_x < s->mb_width; mb_x++) { + const int mb_type= s->mb_type[mb_y * s->mb_width + mb_x]; + PutBitContext pb; + int d; + int dmin=10000000; + int best=0; s->mb_x = mb_x; s->mb_y = mb_y; -#if 0 - /* compute motion vector and macro block type (intra or non intra) */ - motion_x = 0; - motion_y = 0; - if (s->pict_type == P_TYPE) { - s->mb_intra = estimate_motion(s, mb_x, mb_y, - &motion_x, - &motion_y); - } else { - s->mb_intra = 1; - } -#endif - - s->mb_intra = s->mb_type[mb_y * s->mb_width + mb_x]; - motion_x = s->mv_table[0][mb_y * s->mb_width + mb_x]; - motion_y = s->mv_table[1][mb_y * s->mb_width + mb_x]; - - /* get the pixels */ - wrap = s->linesize; - ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16; - get_pixels(s->block[0], ptr, wrap); - get_pixels(s->block[1], ptr + 8, wrap); - get_pixels(s->block[2], ptr + 8 * wrap, wrap); - get_pixels(s->block[3], ptr + 8 * wrap + 8, wrap); - wrap = s->linesize >> 1; - ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8; - get_pixels(s->block[4], ptr, wrap); - - wrap = s->linesize >> 1; - ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8; - get_pixels(s->block[5], ptr, wrap); - - /* subtract previous frame if non intra */ - if (!s->mb_intra) { - int dxy, offset, mx, my; - - dxy = ((motion_y & 1) << 1) | (motion_x & 1); - ptr = s->last_picture[0] + - ((mb_y * 16 + (motion_y >> 1)) * s->linesize) + - (mb_x * 16 + (motion_x >> 1)); - - sub_pixels_2(s->block[0], ptr, s->linesize, dxy); - sub_pixels_2(s->block[1], ptr + 8, s->linesize, dxy); - sub_pixels_2(s->block[2], ptr + s->linesize * 8, s->linesize, dxy); - sub_pixels_2(s->block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy); - - if (s->out_format == FMT_H263) { - /* special rounding for h263 */ - dxy = 0; - if ((motion_x & 3) != 0) - dxy |= 1; - if ((motion_y & 3) != 0) - dxy |= 2; - mx = motion_x >> 2; - my = motion_y >> 2; - } else { - mx = motion_x / 2; - my = motion_y / 2; - dxy = ((my & 1) << 1) | (mx & 1); - mx >>= 1; - my >>= 1; + s->block_index[0]+=2; + s->block_index[1]+=2; + s->block_index[2]+=2; + s->block_index[3]+=2; + s->block_index[4]++; + s->block_index[5]++; + + s->mv_dir = MV_DIR_FORWARD; + if(mb_type & (mb_type-1)){ // more than 1 MB type possible + pb= s->pb; + if(mb_type&MB_TYPE_INTER){ + s->mv_type = MV_TYPE_16X16; + s->mb_intra= 0; + s->mv[0][0][0] = s->mv_table[0][mb_y * s->mb_width + mb_x]; + s->mv[0][0][1] = s->mv_table[1][mb_y * s->mb_width + mb_x]; + init_put_bits(&s->pb, bit_buf[1], 3000, NULL, NULL); + s->block= s->inter_block; + + encode_mb(s); + d= get_bit_count(&s->pb); + if(d<dmin){ + flush_put_bits(&s->pb); + dmin=d; + best_s.mv[0][0][0]= s->mv[0][0][0]; + best_s.mv[0][0][1]= s->mv[0][0][1]; + best_s.mb_intra= 0; + best_s.mv_type = MV_TYPE_16X16; + best_s.pb=s->pb; + best_s.block= s->block; + best=1; + for(i=0; i<6; i++) + best_s.block_last_index[i]= s->block_last_index[i]; + } } - offset = ((mb_y * 8 + my) * (s->linesize >> 1)) + (mb_x * 8 + mx); - ptr = s->last_picture[1] + offset; - sub_pixels_2(s->block[4], ptr, s->linesize >> 1, dxy); - ptr = s->last_picture[2] + offset; - sub_pixels_2(s->block[5], ptr, s->linesize >> 1, dxy); - } - emms_c(); - -#if 0 - { - float adap_parm; - - adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_width*mb_y+mb_x] + 1.0) / - ((s->mb_var[s->mb_width*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0); - - printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d", - (s->mb_type[s->mb_width*mb_y+mb_x] > 0) ? 'I' : 'P', - s->qscale, adap_parm, s->qscale*adap_parm, - s->mb_var[s->mb_width*mb_y+mb_x], s->avg_mb_var); - } -#endif - /* DCT & quantize */ - if (s->h263_msmpeg4) { - msmpeg4_dc_scale(s); - } else if (s->h263_pred) { - h263_dc_scale(s); + if(mb_type&MB_TYPE_INTER4V){ + s->mv_type = MV_TYPE_8X8; + s->mb_intra= 0; + for(i=0; i<4; i++){ + s->mv[0][i][0] = s->motion_val[s->block_index[i]][0]; + s->mv[0][i][1] = s->motion_val[s->block_index[i]][1]; + } + init_put_bits(&s->pb, bit_buf[2], 3000, NULL, NULL); + s->block= s->inter4v_block; + + encode_mb(s); + d= get_bit_count(&s->pb); + if(d<dmin){ + flush_put_bits(&s->pb); + dmin=d; + for(i=0; i<4; i++){ + best_s.mv[0][i][0] = s->mv[0][i][0]; + best_s.mv[0][i][1] = s->mv[0][i][1]; + } + best_s.mb_intra= 0; + best_s.mv_type = MV_TYPE_8X8; + best_s.pb=s->pb; + best_s.block= s->block; + best=2; + for(i=0; i<6; i++) + best_s.block_last_index[i]= s->block_last_index[i]; + } + } + if(mb_type&MB_TYPE_INTRA){ + s->mv_type = MV_TYPE_16X16; + s->mb_intra= 1; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + init_put_bits(&s->pb, bit_buf[0], 3000, NULL, NULL); + s->block= s->intra_block; + + encode_mb(s); + d= get_bit_count(&s->pb); + if(d<dmin){ + flush_put_bits(&s->pb); + dmin=d; + best_s.mv[0][0][0]= 0; + best_s.mv[0][0][1]= 0; + best_s.mb_intra= 1; + best_s.mv_type = MV_TYPE_16X16; + best_s.pb=s->pb; + best_s.block= s->block; + for(i=0; i<6; i++) + best_s.block_last_index[i]= s->block_last_index[i]; + best=0; + } + /* force cleaning of ac/dc if needed ... */ + s->mbintra_table[mb_x + mb_y*s->mb_width]=1; + } + for(i=0; i<4; i++){ + s->mv[0][i][0] = best_s.mv[0][i][0]; + s->mv[0][i][1] = best_s.mv[0][i][1]; + } + s->mb_intra= best_s.mb_intra; + s->mv_type= best_s.mv_type; + for(i=0; i<6; i++) + s->block_last_index[i]= best_s.block_last_index[i]; + copy_bits(&pb, bit_buf[best], dmin); + s->block= best_s.block; + s->pb= pb; } else { - /* default quantization values */ - s->y_dc_scale = 8; - s->c_dc_scale = 8; - } - for(i=0;i<6;i++) { - s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale); - } - - /* huffman encode */ - switch(s->out_format) { - case FMT_MPEG1: - mpeg1_encode_mb(s, s->block, motion_x, motion_y); - break; - case FMT_H263: - if (s->h263_msmpeg4) - msmpeg4_encode_mb(s, s->block, motion_x, motion_y); - else - h263_encode_mb(s, s->block, motion_x, motion_y); - break; - case FMT_MJPEG: - mjpeg_encode_mb(s, s->block); - break; + // only one MB-Type possible + if(mb_type&MB_TYPE_INTRA){ + s->mb_intra= 1; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + }else{ + s->mb_intra= 0; + s->mv[0][0][0] = s->mv_table[0][mb_y * s->mb_width + mb_x]; + s->mv[0][0][1] = s->mv_table[1][mb_y * s->mb_width + mb_x]; + } + encode_mb(s); } - /* decompress blocks so that we keep the state of the decoder */ - s->mv[0][0][0] = motion_x; - s->mv[0][0][1] = motion_y; - MPV_decode_mb(s, s->block); } @@ -1277,7 +1653,8 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->first_gob_line = 0; } } - + emms_c(); + if (s->h263_msmpeg4 && s->pict_type == I_TYPE) msmpeg4_encode_ext_header(s); @@ -1294,7 +1671,6 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->ptr_lastgob = pbBufPtr(&s->pb); //fprintf(stderr,"\nGOB: %2d size: %d (last)", s->gob_number, pdif); } - } static int dct_quantize_c(MpegEncContext *s, @@ -1523,6 +1899,22 @@ static void dct_unquantize_h263_c(MpegEncContext *s, static void rate_control_init(MpegEncContext *s) { +#if 1 + emms_c(); + + //initial values, they dont really matter as they will be totally different within a few frames + s->i_pred.coeff= s->p_pred.coeff= 7.0; + s->i_pred.count= s->p_pred.count= 1.0; + + s->i_pred.decay= s->p_pred.decay= 0.4; + + // use more bits at the beginning, otherwise high motion at the begin will look like shit + s->qsum=100; + s->qcount=100; + + s->short_term_qsum=0.001; + s->short_term_qcount=0.001; +#else s->wanted_bits = 0; if (s->intra_only) { @@ -1533,24 +1925,123 @@ static void rate_control_init(MpegEncContext *s) (float)((float)s->frame_rate / FRAME_RATE_BASE * (I_FRAME_SIZE_RATIO + s->gop_size - 1))); s->I_frame_bits = (int)(s->P_frame_bits * I_FRAME_SIZE_RATIO); } - + #if defined(DEBUG) printf("I_frame_size=%d P_frame_size=%d\n", s->I_frame_bits, s->P_frame_bits); #endif +#endif +} + +static double predict(Predictor *p, double q, double var) +{ + return p->coeff*var / (q*p->count); } +static void update_predictor(Predictor *p, double q, double var, double size) +{ + double new_coeff= size*q / (var + 1); + if(var<1000) return; +/*{ +int pred= predict(p, q, var); +int error= abs(pred-size); +static double sum=0; +static int count=0; +if(count>5) sum+=error; +count++; +if(256*256*256*64%count==0){ + printf("%d %f %f\n", count, sum/count, p->coeff); +} +}*/ + p->count*= p->decay; + p->coeff*= p->decay; + p->count++; + p->coeff+= new_coeff; +} -/* - * This heuristic is rather poor, but at least we do not have to - * change the qscale at every macroblock. - */ static int rate_estimate_qscale(MpegEncContext *s) { - INT64 diff, total_bits = s->total_bits; +#if 1 + int qmin= s->qmin; + int qmax= s->qmax; + int rate_q=5; float q; - int qscale, qmin; + int qscale; + float br_compensation; + double diff; + double short_term_q; + double long_term_q; + int last_qscale= s->qscale; + double fps; + INT64 wanted_bits; + emms_c(); + + fps= (double)s->frame_rate / FRAME_RATE_BASE; + wanted_bits= s->bit_rate*(double)s->picture_number/fps; + + + if(s->picture_number>2){ + /* update predictors */ + if(s->last_pict_type == I_TYPE){ + //FIXME + }else{ //P Frame +//printf("%d %d %d %f\n", s->qscale, s->last_mc_mb_var, s->frame_bits, s->p_pred.coeff); + update_predictor(&s->p_pred, s->qscale, s->last_mc_mb_var, s->frame_bits); + } + } + + if(s->pict_type == I_TYPE){ + //FIXME + rate_q= s->qsum/s->qcount; + }else{ //P Frame + int i; + int diff, best_diff=1000000000; + for(i=1; i<=31; i++){ + diff= predict(&s->p_pred, i, s->mc_mb_var) - (double)s->bit_rate/fps; + if(diff<0) diff= -diff; + if(diff<best_diff){ + best_diff= diff; + rate_q= i; + } + } + } + + s->short_term_qsum*=s->qblur; + s->short_term_qcount*=s->qblur; + + s->short_term_qsum+= rate_q; + s->short_term_qcount++; + short_term_q= s->short_term_qsum/s->short_term_qcount; + + long_term_q= s->qsum/s->qcount*(s->total_bits+1)/(wanted_bits+1); //+1 to avoid nan & 0 + +// q= (long_term_q - short_term_q)*s->qcompress + short_term_q; + q= 1/((1/long_term_q - 1/short_term_q)*s->qcompress + 1/short_term_q); + + diff= s->total_bits - wanted_bits; + br_compensation= (s->bit_rate_tolerance - diff)/s->bit_rate_tolerance; + if(br_compensation<=0.0) br_compensation=0.001; + q/=br_compensation; + qscale= (int)(q + 0.5); + if (qscale<qmin) qscale=qmin; + else if(qscale>qmax) qscale=qmax; + + if (qscale<last_qscale-s->max_qdiff) qscale=last_qscale-s->max_qdiff; + else if(qscale>last_qscale+s->max_qdiff) qscale=last_qscale+s->max_qdiff; + + s->qsum+= qscale; + s->qcount++; + + s->last_pict_type= s->pict_type; +//printf("q:%d diff:%d comp:%f rate_q:%d st_q:%f fvar:%d last_size:%d\n", qscale, (int)diff, br_compensation, +// rate_q, short_term_q, s->mc_mb_var, s->frame_bits); +//printf("%d %d\n", s->bit_rate, (int)fps); + return qscale; +#else + INT64 diff, total_bits = s->total_bits; + float q; + int qscale; if (s->pict_type == I_TYPE) { s->wanted_bits += s->I_frame_bits; } else { @@ -1581,6 +2072,7 @@ static int rate_estimate_qscale(MpegEncContext *s) (int)diff, q); #endif return qscale; +#endif } AVCodec mpeg1video_encoder = { @@ -1643,10 +2135,30 @@ AVCodec mpeg4_encoder = { MPV_encode_end, }; -AVCodec msmpeg4_encoder = { +AVCodec msmpeg4v1_encoder = { + "msmpeg4v1", + CODEC_TYPE_VIDEO, + CODEC_ID_MSMPEG4V1, + sizeof(MpegEncContext), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; + +AVCodec msmpeg4v2_encoder = { + "msmpeg4v2", + CODEC_TYPE_VIDEO, + CODEC_ID_MSMPEG4V2, + sizeof(MpegEncContext), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; + +AVCodec msmpeg4v3_encoder = { "msmpeg4", CODEC_TYPE_VIDEO, - CODEC_ID_MSMPEG4, + CODEC_ID_MSMPEG4V3, sizeof(MpegEncContext), MPV_encode_init, MPV_encode_picture, diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h index 01e477865..f809a1255 100644 --- a/src/libffmpeg/libavcodec/mpegvideo.h +++ b/src/libffmpeg/libavcodec/mpegvideo.h @@ -34,6 +34,15 @@ enum OutputFormat { #define QMAT_SHIFT_MMX 19 #define QMAT_SHIFT 25 +#define MAX_FCODE 7 +#define MAX_MV 2048 + +typedef struct Predictor{ + double coeff; + double count; + double decay; +} Predictor; + typedef struct MpegEncContext { struct AVCodecContext *avctx; /* the following parameters must be initialized before encoding */ @@ -42,6 +51,7 @@ typedef struct MpegEncContext { int frame_rate; /* number of frames per second */ int intra_only; /* if true, only intra pictures are generated */ int bit_rate; /* wanted bit rate */ + int bit_rate_tolerance; /* amount of +- bits (>0)*/ enum OutputFormat out_format; /* output format */ int h263_plus; /* h263 plus headers */ int h263_rv10; /* use RV10 variation for H263 */ @@ -49,7 +59,14 @@ typedef struct MpegEncContext { int h263_msmpeg4; /* generate MSMPEG4 compatible stream */ int h263_intel; /* use I263 intel h263 header */ int fixed_qscale; /* fixed qscale if non zero */ + float qcompress; /* amount of qscale change between easy & hard scenes (0.0-1.0) */ + float qblur; /* amount of qscale smoothing over time (0.0-1.0) */ + int qmin; /* min qscale */ + int qmax; /* max qscale */ + int max_qdiff; /* max qscale difference between frames */ int encoding; /* true if we are encoding (vs decoding) */ + int flags; /* AVCodecContext.flags (HQ, MV4, ...) */ + int force_type; /* 0= no force, otherwise I_TYPE, P_TYPE, ... */ /* the following fields are managed internally by the encoder */ /* bit output */ @@ -59,7 +76,8 @@ typedef struct MpegEncContext { int context_initialized; int picture_number; int fake_picture_number; /* picture number at the bitstream frame rate */ - int gop_picture_number; /* index of the first picture of a GOP */ + int gop_picture_number; /* index of the first picture of a GOP based on fake_pic_num & mpeg1 specific */ + int picture_in_gop_number; /* 0-> first pic in gop, ... */ int mb_width, mb_height; int mb_num; /* number of MBs of a picture */ int linesize; /* line size, in bytes, may be different from width */ @@ -72,10 +90,10 @@ typedef struct MpegEncContext { UINT8 *aux_picture_base[3]; /* real start of the picture */ UINT8 *current_picture[3]; /* buffer to store the decompressed current picture */ int last_dc[3]; /* last DC values for MPEG1 */ - INT16 *dc_val[3]; /* used for mpeg4 DC prediction */ + INT16 *dc_val[3]; /* used for mpeg4 DC prediction, all 3 arrays must be continuous */ int y_dc_scale, c_dc_scale; UINT8 *coded_block; /* used for coded block pattern prediction */ - INT16 (*ac_val[3])[16]; /* used for for mpeg4 AC prediction */ + INT16 (*ac_val[3])[16]; /* used for for mpeg4 AC prediction, all 3 arrays must be continuous */ int ac_pred; int mb_skiped; /* MUST BE SET only during DECODING */ UINT8 *mbskip_table; /* used to avoid copy if macroblock @@ -84,22 +102,25 @@ typedef struct MpegEncContext { int qscale; int pict_type; + int last_non_b_pict_type; /* used for mpeg4 gmc b-frames */ + int last_pict_type; /* used for bit rate stuff (needs that to update the right predictor) */ int frame_rate_index; /* motion compensation */ int unrestricted_mv; int h263_long_vectors; /* use horrible h263v1 long vector mode */ int f_code; /* resolution */ - int b_code; /* resolution for B Frames*/ - INT16 *mv_table[2]; /* MV table */ - INT16 (*motion_val)[2]; /* used for MV prediction */ + int b_code; /* backward resolution for B Frames (mpeg4) */ + INT16 *mv_table[2]; /* MV table (1MV per MB)*/ + INT16 (*motion_val)[2]; /* used for MV prediction (4MV per MB)*/ int full_search; int mv_dir; #define MV_DIR_BACKWARD 1 #define MV_DIR_FORWARD 2 +#define MV_DIRECT 4 // bidirectional mode where the difference equals the MV of the last P/S/I-Frame (mpeg4) int mv_type; #define MV_TYPE_16X16 0 /* 1 vector for the whole mb */ -#define MV_TYPE_8X8 1 /* 4 vectors (h263) */ +#define MV_TYPE_8X8 1 /* 4 vectors (h263, mpeg4 4MV) */ #define MV_TYPE_16X8 2 /* 2 vectors, one per 16x8 block */ #define MV_TYPE_FIELD 3 /* 2 vectors, one per field */ #define MV_TYPE_DMV 4 /* 2 vectors, special mpeg2 Dual Prime Vectors */ @@ -111,6 +132,8 @@ typedef struct MpegEncContext { int mv[2][4][2]; int field_select[2][2]; int last_mv[2][2][2]; + UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV, used for ME */ + UINT8 *fcode_tab; /* smallest fcode needed for each MV */ int has_b_frames; int no_rounding; /* apply no rounding to motion estimation (MPEG4) */ @@ -119,9 +142,20 @@ typedef struct MpegEncContext { int mb_x, mb_y; int mb_incr; int mb_intra; - INT16 *mb_var; /* Table for MB variances */ - char *mb_type; /* Table for MB type */ - + UINT16 *mb_var; /* Table for MB variances */ + UINT8 *mb_type; /* Table for MB type */ +#define MB_TYPE_INTRA 0x01 +#define MB_TYPE_INTER 0x02 +#define MB_TYPE_INTER4V 0x04 +#define MB_TYPE_SKIPED 0x08 +#define MB_TYPE_DIRECT 0x10 +#define MB_TYPE_FORWARD 0x20 +#define MB_TYPE_BACKWAD 0x40 +#define MB_TYPE_BIDIR 0x80 + + int block_index[6]; + int block_wrap[6]; + /* matrix transmitted in the bitstream */ UINT16 intra_matrix[64]; UINT16 chroma_intra_matrix[64]; @@ -141,9 +175,30 @@ typedef struct MpegEncContext { int I_frame_bits; /* wanted number of bits per I frame */ int P_frame_bits; /* same for P frame */ int avg_mb_var; /* average MB variance for current frame */ + int mc_mb_var; /* motion compensated MB variance for current frame */ + int last_mc_mb_var; /* motion compensated MB variance for last frame */ INT64 wanted_bits; INT64 total_bits; - + int frame_bits; /* bits used for the current frame */ + int last_frame_bits; /* bits used for the last frame */ + Predictor i_pred; + Predictor p_pred; + double qsum; /* sum of qscales */ + double qcount; /* count of qscales */ + double short_term_qsum; /* sum of recent qscales */ + double short_term_qcount; /* count of recent qscales */ + + /* statistics, used for 2-pass encoding */ + int mv_bits; + int header_bits; + int i_tex_bits; + int p_tex_bits; + int i_count; + int p_count; + int skip_count; + int misc_bits; // cbp, mb_type + int last_bits; //temp var used for calculating the above vars + /* H.263 specific */ int gob_number; int gob_index; @@ -156,7 +211,12 @@ typedef struct MpegEncContext { int h263_aic_dir; /* AIC direction: 0 = left, 1 = top */ /* mpeg4 specific */ + int time_increment_resolution; int time_increment_bits; + int time_increment; + int time_base; + int time; + int last_non_b_time[2]; int shape; int vol_sprite_usage; int sprite_width; @@ -179,6 +239,8 @@ typedef struct MpegEncContext { int sprite_warping_accuracy; int low_latency_sprite; int data_partioning; + int resync_marker; + int resync_x_pos; /* divx specific, used to workaround (many) bugs in divx5 */ int divx_version; @@ -202,9 +264,10 @@ typedef struct MpegEncContext { int dc_table_index; int use_skip_mb_code; int slice_height; /* in macroblocks */ - int first_slice_line; + int first_slice_line; /* used in mpeg4 too to handle resync markers */ int flipflop_rounding; int bitrate; + int msmpeg4_version; /* 1=mp41, 2=mp42, 3=mp43/divx3 */ /* decompression specific */ GetBitContext gb; @@ -242,7 +305,10 @@ typedef struct MpegEncContext { UINT8 *ptr_last_mb_line; UINT32 mb_line_avgsize; - DCTELEM block[6][64] __align8; + DCTELEM (*block)[64]; /* points to one of the following blocks */ + DCTELEM intra_block[6][64] __align8; + DCTELEM inter_block[6][64] __align8; + DCTELEM inter4v_block[6][64] __align8; void (*dct_unquantize)(struct MpegEncContext *s, DCTELEM *block, int n, int qscale); } MpegEncContext; @@ -258,9 +324,8 @@ void MPV_common_init_mmx(MpegEncContext *s); /* motion_est.c */ -int estimate_motion(MpegEncContext *s, - int mb_x, int mb_y, - int *mx_ptr, int *my_ptr); +void estimate_motion(MpegEncContext *s, + int mb_x, int mb_y); /* mpeg12.c */ extern INT16 default_intra_matrix[64]; @@ -270,6 +335,7 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number); void mpeg1_encode_mb(MpegEncContext *s, DCTELEM block[6][64], int motion_x, int motion_y); +void mpeg1_encode_init(MpegEncContext *s); /* h263enc.c */ @@ -306,6 +372,9 @@ static inline int get_rl_index(const RLTable *rl, int last, int run, int level) void h263_encode_mb(MpegEncContext *s, DCTELEM block[6][64], int motion_x, int motion_y); +void mpeg4_encode_mb(MpegEncContext *s, + DCTELEM block[6][64], + int motion_x, int motion_y); void h263_encode_picture_header(MpegEncContext *s, int picture_number); int h263_encode_gob_header(MpegEncContext * s, int mb_line); void h263_dc_scale(MpegEncContext *s); @@ -314,7 +383,7 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block, void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, int dir); void mpeg4_encode_picture_header(MpegEncContext *s, int picture_number); -void h263_encode_init_vlc(MpegEncContext *s); +void h263_encode_init(MpegEncContext *s); void h263_decode_init_vlc(MpegEncContext *s); int h263_decode_picture_header(MpegEncContext *s); diff --git a/src/libffmpeg/libavcodec/msmpeg4.c b/src/libffmpeg/libavcodec/msmpeg4.c index 8fa9aefaa..66fc5255e 100644 --- a/src/libffmpeg/libavcodec/msmpeg4.c +++ b/src/libffmpeg/libavcodec/msmpeg4.c @@ -29,7 +29,6 @@ * TODO: * - (encoding) select best mv table (two choices) * - (encoding) select best vlc/dc table - * - (decoding) handle slice indication */ //#define DEBUG @@ -44,12 +43,18 @@ typedef struct MVTable { VLC vlc; /* decoding: vlc */ } MVTable; +static UINT32 v2_dc_lum_table[512][2]; +static UINT32 v2_dc_chroma_table[512][2]; + static void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n); static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, int n, int coded); static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr); static int msmpeg4_decode_motion(MpegEncContext * s, int *mx_ptr, int *my_ptr); +static void msmpeg4v2_encode_motion(MpegEncContext * s, int val); +static void init_h263_dc_for_msmpeg4(); + extern UINT32 inverse[256]; @@ -166,7 +171,11 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number) put_bits(&s->pb, 5, s->qscale); s->rl_table_index = 2; - s->rl_chroma_table_index = 1; /* only for I frame */ + if(s->msmpeg4_version==2) + s->rl_chroma_table_index = 2; /* only for I frame */ + else + s->rl_chroma_table_index = 1; /* only for I frame */ + s->dc_table_index = 1; s->mv_table_index = 1; /* only if P frame */ s->use_skip_mb_code = 1; /* only if P frame */ @@ -174,21 +183,25 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number) if (s->pict_type == I_TYPE) { put_bits(&s->pb, 5, 0x17); /* indicate only one "slice" */ - code012(&s->pb, s->rl_chroma_table_index); - code012(&s->pb, s->rl_table_index); + if(s->msmpeg4_version!=2){ + code012(&s->pb, s->rl_chroma_table_index); + code012(&s->pb, s->rl_table_index); - put_bits(&s->pb, 1, s->dc_table_index); + put_bits(&s->pb, 1, s->dc_table_index); + } s->no_rounding = 1; } else { put_bits(&s->pb, 1, s->use_skip_mb_code); s->rl_chroma_table_index = s->rl_table_index; - code012(&s->pb, s->rl_table_index); + if(s->msmpeg4_version!=2){ + code012(&s->pb, s->rl_table_index); - put_bits(&s->pb, 1, s->dc_table_index); + put_bits(&s->pb, 1, s->dc_table_index); + + put_bits(&s->pb, 1, s->mv_table_index); + } - put_bits(&s->pb, 1, s->mv_table_index); - if(s->flipflop_rounding){ s->no_rounding ^= 1; }else{ @@ -203,6 +216,8 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number) init_mv_table(&mv_tables[1]); for(i=0;i<NB_RL_TABLES;i++) init_rl(&rl_table[i]); + + init_h263_dc_for_msmpeg4(); } #ifdef DEBUG @@ -226,18 +241,17 @@ void msmpeg4_encode_ext_header(MpegEncContext * s) /* predict coded block */ static inline int coded_block_pred(MpegEncContext * s, int n, UINT8 **coded_block_ptr) { - int x, y, wrap, pred, a, b, c; + int xy, wrap, pred, a, b, c; - x = 2 * s->mb_x + 1 + (n & 1); - y = 2 * s->mb_y + 1 + ((n & 2) >> 1); - wrap = s->mb_width * 2 + 2; + xy = s->block_index[n]; + wrap = s->block_wrap[0]; /* B C * A X */ - a = s->coded_block[(x - 1) + (y) * wrap]; - b = s->coded_block[(x - 1) + (y - 1) * wrap]; - c = s->coded_block[(x) + (y - 1) * wrap]; + a = s->coded_block[xy - 1 ]; + b = s->coded_block[xy - 1 - wrap]; + c = s->coded_block[xy - wrap]; if (b == c) { pred = a; @@ -246,7 +260,7 @@ static inline int coded_block_pred(MpegEncContext * s, int n, UINT8 **coded_bloc } /* store value */ - *coded_block_ptr = &s->coded_block[(x) + (y) * wrap]; + *coded_block_ptr = &s->coded_block[xy]; return pred; } @@ -314,14 +328,30 @@ void msmpeg4_encode_mb(MpegEncContext * s, if (s->use_skip_mb_code) put_bits(&s->pb, 1, 0); /* mb coded */ - put_bits(&s->pb, - table_mb_non_intra[cbp + 64][1], - table_mb_non_intra[cbp + 64][0]); - - /* motion vector */ - h263_pred_motion(s, 0, &pred_x, &pred_y); - msmpeg4_encode_motion(s, motion_x - pred_x, - motion_y - pred_y); + if(s->msmpeg4_version==2){ + put_bits(&s->pb, + v2_mb_type[cbp&3][1], + v2_mb_type[cbp&3][0]); + if((cbp&3) != 3) coded_cbp= cbp ^ 0x3C; + else coded_cbp= cbp; + + put_bits(&s->pb, + cbpy_tab[coded_cbp>>2][1], + cbpy_tab[coded_cbp>>2][0]); + + h263_pred_motion(s, 0, &pred_x, &pred_y); + msmpeg4v2_encode_motion(s, motion_x - pred_x); + msmpeg4v2_encode_motion(s, motion_y - pred_y); + }else{ + put_bits(&s->pb, + table_mb_non_intra[cbp + 64][1], + table_mb_non_intra[cbp + 64][0]); + + /* motion vector */ + h263_pred_motion(s, 0, &pred_x, &pred_y); + msmpeg4_encode_motion(s, motion_x - pred_x, + motion_y - pred_y); + } } else { /* compute cbp */ cbp = 0; @@ -343,19 +373,36 @@ void msmpeg4_encode_mb(MpegEncContext * s, printf("cbp=%x %x\n", cbp, coded_cbp); #endif - if (s->pict_type == I_TYPE) { - set_stat(ST_INTRA_MB); - put_bits(&s->pb, - table_mb_intra[coded_cbp][1], table_mb_intra[coded_cbp][0]); - } else { - if (s->use_skip_mb_code) - put_bits(&s->pb, 1, 0); /* mb coded */ + if(s->msmpeg4_version==2){ + if (s->pict_type == I_TYPE) { + put_bits(&s->pb, + v2_intra_cbpc[cbp&3][1], v2_intra_cbpc[cbp&3][0]); + } else { + if (s->use_skip_mb_code) + put_bits(&s->pb, 1, 0); /* mb coded */ + put_bits(&s->pb, + v2_mb_type[(cbp&3) + 4][1], + v2_mb_type[(cbp&3) + 4][0]); + } + put_bits(&s->pb, 1, 0); /* no AC prediction yet */ put_bits(&s->pb, - table_mb_non_intra[cbp][1], - table_mb_non_intra[cbp][0]); + cbpy_tab[cbp>>2][1], + cbpy_tab[cbp>>2][0]); + }else{ + if (s->pict_type == I_TYPE) { + set_stat(ST_INTRA_MB); + put_bits(&s->pb, + table_mb_intra[coded_cbp][1], table_mb_intra[coded_cbp][0]); + } else { + if (s->use_skip_mb_code) + put_bits(&s->pb, 1, 0); /* mb coded */ + put_bits(&s->pb, + table_mb_non_intra[cbp][1], + table_mb_non_intra[cbp][0]); + } + set_stat(ST_INTRA_MB); + put_bits(&s->pb, 1, 0); /* no AC prediction yet */ } - set_stat(ST_INTRA_MB); - put_bits(&s->pb, 1, 0); /* no AC prediction yet */ } for (i = 0; i < 6; i++) { @@ -367,10 +414,9 @@ void msmpeg4_encode_mb(MpegEncContext * s, /* strongly inspirated from MPEG4, but not exactly the same ! */ void msmpeg4_dc_scale(MpegEncContext * s) { - if (s->qscale < 5){ + if (s->qscale < 5 || s->msmpeg4_version==2){ s->y_dc_scale = 8; s->c_dc_scale = 8; -// s->c_dc_scale = (s->qscale + 13)>>1; }else if (s->qscale < 9){ s->y_dc_scale = 2 * s->qscale; s->c_dc_scale = (s->qscale + 13)>>1; @@ -378,56 +424,30 @@ void msmpeg4_dc_scale(MpegEncContext * s) s->y_dc_scale = s->qscale + 8; s->c_dc_scale = (s->qscale + 13)>>1; } - // this differs for quant >24 from mpeg4 - -// if(s->qscale==13) s->c_dc_scale=14; - -// if(s->qscale>=6) -// printf("%d", s->qscale); - - /* s->c_dc_scale values (found by Michael Nidermayer) - qscale=2 -> 8 (yes iam sure about that) - qscale=3 -> 8 - qscale=4 -> 8 - qscale=5 -> 9 - qscale=6 -> 9 - qscale=7 -> 10 - qscale=8 -> 10 - qscale=9 -> 11 - qscale=10-> 11 - */ } /* dir = 0: left, dir = 1: top prediction */ static int msmpeg4_pred_dc(MpegEncContext * s, int n, INT16 **dc_val_ptr, int *dir_ptr) { - int a, b, c, xy, wrap, pred, scale; + int a, b, c, wrap, pred, scale; INT16 *dc_val; /* find prediction */ if (n < 4) { - wrap = s->mb_width * 2 + 2; - xy = 2 * s->mb_y + 1 + ((n & 2) >> 1); - xy *= wrap; - xy += 2 * s->mb_x + 1 + (n & 1); - dc_val = s->dc_val[0]; scale = s->y_dc_scale; } else { - wrap = s->mb_width + 2; - xy = s->mb_y + 1; - xy *= wrap; - xy += s->mb_x + 1; - dc_val = s->dc_val[n - 4 + 1]; scale = s->c_dc_scale; } + wrap = s->block_wrap[n]; + dc_val= s->dc_val[0] + s->block_index[n]; /* B C * A X */ - a = dc_val[xy - 1]; - b = dc_val[xy - 1 - wrap]; - c = dc_val[xy - wrap]; + a = dc_val[ - 1]; + b = dc_val[ - 1 - wrap]; + c = dc_val[ - wrap]; /* XXX: the following solution consumes divisions, but it does not necessitate to modify mpegvideo.c. The problem comes from the @@ -478,7 +498,7 @@ static int msmpeg4_pred_dc(MpegEncContext * s, int n, } /* update predictor */ - *dc_val_ptr = &dc_val[xy]; + *dc_val_ptr = &dc_val[0]; return pred; } @@ -502,35 +522,46 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr /* do the prediction */ level -= pred; - sign = 0; - if (level < 0) { - level = -level; - sign = 1; - } - - code = level; - if (code > DC_MAX) - code = DC_MAX; - - if (s->dc_table_index == 0) { + if(s->msmpeg4_version==2){ if (n < 4) { - put_bits(&s->pb, table0_dc_lum[code][1], table0_dc_lum[code][0]); - } else { - put_bits(&s->pb, table0_dc_chroma[code][1], table0_dc_chroma[code][0]); + put_bits(&s->pb, + v2_dc_lum_table[level+256][1], + v2_dc_lum_table[level+256][0]); + }else{ + put_bits(&s->pb, + v2_dc_chroma_table[level+256][1], + v2_dc_chroma_table[level+256][0]); } - } else { - if (n < 4) { - put_bits(&s->pb, table1_dc_lum[code][1], table1_dc_lum[code][0]); + }else{ + sign = 0; + if (level < 0) { + level = -level; + sign = 1; + } + code = level; + if (code > DC_MAX) + code = DC_MAX; + + if (s->dc_table_index == 0) { + if (n < 4) { + put_bits(&s->pb, table0_dc_lum[code][1], table0_dc_lum[code][0]); + } else { + put_bits(&s->pb, table0_dc_chroma[code][1], table0_dc_chroma[code][0]); + } } else { - put_bits(&s->pb, table1_dc_chroma[code][1], table1_dc_chroma[code][0]); + if (n < 4) { + put_bits(&s->pb, table1_dc_lum[code][1], table1_dc_lum[code][0]); + } else { + put_bits(&s->pb, table1_dc_chroma[code][1], table1_dc_chroma[code][0]); + } + } + + if (code == DC_MAX) + put_bits(&s->pb, 8, level); + + if (level != 0) { + put_bits(&s->pb, 1, sign); } - } - - if (code == DC_MAX) - put_bits(&s->pb, 8, level); - - if (level != 0) { - put_bits(&s->pb, 1, sign); } } @@ -558,7 +589,10 @@ static void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n) } else { i = 0; rl = &rl_table[3 + s->rl_table_index]; - run_diff = 1; + if(s->msmpeg4_version==2) + run_diff = 0; + else + run_diff = 1; set_stat(ST_INTER_AC); } @@ -629,6 +663,72 @@ static VLC mb_non_intra_vlc; static VLC mb_intra_vlc; static VLC dc_lum_vlc[2]; static VLC dc_chroma_vlc[2]; +static VLC v2_dc_lum_vlc; +static VLC v2_dc_chroma_vlc; +static VLC cbpy_vlc; +static VLC v2_intra_cbpc_vlc; +static VLC v2_mb_type_vlc; +static VLC v2_mv_vlc; + +/* this table is practically identical to the one from h263 except that its inverted */ +static void init_h263_dc_for_msmpeg4() +{ + static int inited=0; + + if(!inited){ + int level, uni_code, uni_len; + inited=1; + + for(level=-256; level<256; level++){ + int size, v, l; + /* find number of bits */ + size = 0; + v = abs(level); + while (v) { + v >>= 1; + size++; + } + + if (level < 0) + l= (-level) ^ ((1 << size) - 1); + else + l= level; + + /* luminance h263 */ + uni_code= DCtab_lum[size][0]; + uni_len = DCtab_lum[size][1]; + uni_code ^= (1<<uni_len)-1; //M$ doesnt like compatibility + + if (size > 0) { + uni_code<<=size; uni_code|=l; + uni_len+=size; + if (size > 8){ + uni_code<<=1; uni_code|=1; + uni_len++; + } + } + v2_dc_lum_table[level+256][0]= uni_code; + v2_dc_lum_table[level+256][1]= uni_len; + + /* chrominance h263 */ + uni_code= DCtab_chrom[size][0]; + uni_len = DCtab_chrom[size][1]; + uni_code ^= (1<<uni_len)-1; //M$ doesnt like compatibility + + if (size > 0) { + uni_code<<=size; uni_code|=l; + uni_len+=size; + if (size > 8){ + uni_code<<=1; uni_code|=1; + uni_len++; + } + } + v2_dc_chroma_table[level+256][0]= uni_code; + v2_dc_chroma_table[level+256][1]= uni_len; + + } + } +} /* init all vlc decoding tables */ int msmpeg4_decode_init_vlc(MpegEncContext *s) @@ -659,6 +759,27 @@ int msmpeg4_decode_init_vlc(MpegEncContext *s) init_vlc(&dc_chroma_vlc[1], 9, 120, &table1_dc_chroma[0][1], 8, 4, &table1_dc_chroma[0][0], 8, 4); + + init_h263_dc_for_msmpeg4(); + init_vlc(&v2_dc_lum_vlc, 9, 512, + &v2_dc_lum_table[0][1], 8, 4, + &v2_dc_lum_table[0][0], 8, 4); + init_vlc(&v2_dc_chroma_vlc, 9, 512, + &v2_dc_chroma_table[0][1], 8, 4, + &v2_dc_chroma_table[0][0], 8, 4); + + init_vlc(&cbpy_vlc, 6, 16, + &cbpy_tab[0][1], 2, 1, + &cbpy_tab[0][0], 2, 1); + init_vlc(&v2_intra_cbpc_vlc, 3, 4, + &v2_intra_cbpc[0][1], 2, 1, + &v2_intra_cbpc[0][0], 2, 1); + init_vlc(&v2_mb_type_vlc, 5, 8, + &v2_mb_type[0][1], 2, 1, + &v2_mb_type[0][0], 2, 1); + init_vlc(&v2_mv_vlc, 9, 33, + &mvtab[0][1], 2, 1, + &mvtab[0][0], 2, 1); init_vlc(&mb_non_intra_vlc, 9, 128, &table_mb_non_intra[0][1], 8, 4, @@ -692,16 +813,21 @@ int msmpeg4_decode_picture_header(MpegEncContext * s) if (s->pict_type == I_TYPE) { code = get_bits(&s->gb, 5); - /* 0x17: one slice, 0x18: three slices */ - /* XXX: implement it */ - //printf("%d %d %d\n", code, s->slice_height, s->first_slice_line); + /* 0x17: one slice, 0x18: two slices */ if (code < 0x17) return -1; s->slice_height = s->mb_height / (code - 0x16); - s->rl_chroma_table_index = decode012(&s->gb); - s->rl_table_index = decode012(&s->gb); + if(s->msmpeg4_version==2){ + s->rl_chroma_table_index = 2; + s->rl_table_index = 2; + + s->dc_table_index = 0; //not used + }else{ + s->rl_chroma_table_index = decode012(&s->gb); + s->rl_table_index = decode012(&s->gb); - s->dc_table_index = get_bits1(&s->gb); + s->dc_table_index = get_bits1(&s->gb); + } s->no_rounding = 1; /* printf(" %d %d %d %d \n", s->qscale, @@ -711,12 +837,21 @@ int msmpeg4_decode_picture_header(MpegEncContext * s) } else { s->use_skip_mb_code = get_bits1(&s->gb); - s->rl_table_index = decode012(&s->gb); - s->rl_chroma_table_index = s->rl_table_index; + if(s->msmpeg4_version==2){ + s->rl_table_index = 2; + s->rl_chroma_table_index = s->rl_table_index; + + s->dc_table_index = 0; //not used - s->dc_table_index = get_bits1(&s->gb); + s->mv_table_index = 0; + }else{ + s->rl_table_index = decode012(&s->gb); + s->rl_chroma_table_index = s->rl_table_index; - s->mv_table_index = get_bits1(&s->gb); + s->dc_table_index = get_bits1(&s->gb); + + s->mv_table_index = get_bits1(&s->gb); + } /* printf(" %d %d %d %d %d \n", s->use_skip_mb_code, s->rl_table_index, @@ -731,7 +866,18 @@ int msmpeg4_decode_picture_header(MpegEncContext * s) // printf("%d", s->no_rounding); } - +#if 0 +if(s->msmpeg4_version==2) +{ +int i; +for(i=0; i<s->gb.size*8; i++) +// printf("%d", get_bits1(&s->gb)); + get_bits1(&s->gb); +printf("END\n"); +return -1; +} +#endif + #ifdef DEBUG printf("*****frame %d:\n", frame_count++); #endif @@ -767,6 +913,127 @@ static inline void memsetw(short *tab, int val, int n) tab[i] = val; } +static void msmpeg4v2_encode_motion(MpegEncContext * s, int val) +{ + int range, bit_size, sign, code, bits; + + if (val == 0) { + /* zero vector */ + code = 0; + put_bits(&s->pb, mvtab[code][1], mvtab[code][0]); + } else { + bit_size = s->f_code - 1; + range = 1 << bit_size; + if (val <= -64) + val += 64; + else if (val >= 64) + val -= 64; + + if (val >= 0) { + sign = 0; + } else { + val = -val; + sign = 1; + } + val--; + code = (val >> bit_size) + 1; + bits = val & (range - 1); + + put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign); + if (bit_size > 0) { + put_bits(&s->pb, bit_size, bits); + } + } +} + +/* this is identical to h263 except that its range is multiplied by 2 */ +static int msmpeg4v2_decode_motion(MpegEncContext * s, int pred, int f_code) +{ + int code, val, sign, shift; + + code = get_vlc(&s->gb, &v2_mv_vlc); + if (code < 0) + return 0xffff; + + if (code == 0) + return pred; + sign = get_bits1(&s->gb); + shift = f_code - 1; + val = (code - 1) << shift; + if (shift > 0) + val |= get_bits(&s->gb, shift); + val++; + if (sign) + val = -val; + val += pred; + + if (val <= -64) + val += 64; + else if (val >= 64) + val -= 64; + + return val; +} + + +int msmpeg4v2_decode_mb(MpegEncContext *s, + DCTELEM block[6][64]) +{ + int cbp, code, i; + if (s->pict_type == P_TYPE) { + if (s->use_skip_mb_code) { + if (get_bits1(&s->gb)) { + /* skip mb */ + s->mb_intra = 0; + for(i=0;i<6;i++) + s->block_last_index[i] = -1; + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + s->mb_skiped = 1; + return 0; + } + } + + code = get_vlc(&s->gb, &v2_mb_type_vlc); + s->mb_intra = code >>2; + + cbp = code & 0x3; + } else { + s->mb_intra = 1; + cbp= get_vlc(&s->gb, &v2_intra_cbpc_vlc); + } + + if (!s->mb_intra) { + int mx, my; + + cbp|= get_vlc(&s->gb, &cbpy_vlc)<<2; + if((cbp&3) != 3) cbp^= 0x3C; + + h263_pred_motion(s, 0, &mx, &my); + mx= msmpeg4v2_decode_motion(s, mx, 1); + my= msmpeg4v2_decode_motion(s, my, 1); + + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->mv[0][0][0] = mx; + s->mv[0][0][1] = my; + } else { + s->ac_pred = get_bits1(&s->gb); + cbp|= get_vlc(&s->gb, &cbpy_vlc)<<2; + } + + for (i = 0; i < 6; i++) { + if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0) + { + fprintf(stderr,"\nIgnoring error while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); + return -1; + } + } + return 0; +} + int msmpeg4_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) { @@ -803,6 +1070,8 @@ int msmpeg4_decode_mb(MpegEncContext *s, } } + if(s->msmpeg4_version==2) return msmpeg4v2_decode_mb(s, block); //FIXME merge if possible + if (s->pict_type == P_TYPE) { set_stat(ST_INTER_MB); if (s->use_skip_mb_code) { @@ -915,7 +1184,12 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, qadd = (s->qscale - 1) | 1; i = 0; rl = &rl_table[3 + s->rl_table_index]; - run_diff = 1; + + if(s->msmpeg4_version==2) + run_diff = 0; + else + run_diff = 1; + if (!coded) { s->block_last_index[n] = i - 1; return 0; @@ -999,21 +1273,32 @@ static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr) int level, pred; INT16 *dc_val; - if (n < 4) { - level = get_vlc(&s->gb, &dc_lum_vlc[s->dc_table_index]); - } else { - level = get_vlc(&s->gb, &dc_chroma_vlc[s->dc_table_index]); - } - if (level < 0) - return -1; + if(s->msmpeg4_version==2){ + if (n < 4) { + level = get_vlc(&s->gb, &v2_dc_lum_vlc); + } else { + level = get_vlc(&s->gb, &v2_dc_chroma_vlc); + } + if (level < 0) + return -1; + level-=256; + }else{ //FIXME optimize use unified tables & index + if (n < 4) { + level = get_vlc(&s->gb, &dc_lum_vlc[s->dc_table_index]); + } else { + level = get_vlc(&s->gb, &dc_chroma_vlc[s->dc_table_index]); + } + if (level < 0) + return -1; - if (level == DC_MAX) { - level = get_bits(&s->gb, 8); - if (get_bits1(&s->gb)) - level = -level; - } else if (level != 0) { - if (get_bits1(&s->gb)) - level = -level; + if (level == DC_MAX) { + level = get_bits(&s->gb, 8); + if (get_bits1(&s->gb)) + level = -level; + } else if (level != 0) { + if (get_bits1(&s->gb)) + level = -level; + } } pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr); diff --git a/src/libffmpeg/libavcodec/msmpeg4data.h b/src/libffmpeg/libavcodec/msmpeg4data.h index 03a261211..9dcb8276f 100644 --- a/src/libffmpeg/libavcodec/msmpeg4data.h +++ b/src/libffmpeg/libavcodec/msmpeg4data.h @@ -569,6 +569,13 @@ extern const UINT16 intra_vlc[103][2]; extern const INT8 intra_level[102]; extern const INT8 intra_run[102]; +extern const UINT8 DCtab_lum[13][2]; +extern const UINT8 DCtab_chrom[13][2]; + +extern const UINT8 cbpy_tab[16][2]; +extern const UINT8 mvtab[33][2]; + + #define NB_RL_TABLES 6 static RLTable rl_table[NB_RL_TABLES] = { @@ -1765,3 +1772,12 @@ static MVTable mv_tables[2] = { table1_mvy, } }; + +static const UINT8 v2_mb_type[8][2] = { + {1, 1}, {0 , 2}, {3 , 3}, {9 , 5}, + {5, 4}, {0x21, 7}, {0x20, 7}, {0x11, 6}, +}; + +static const UINT8 v2_intra_cbpc[4][2] = { + {1, 1}, {0, 3}, {1, 3}, {1, 2}, +}; diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c index f84b17e63..180712314 100644 --- a/src/libffmpeg/libavcodec/utils.c +++ b/src/libffmpeg/libavcodec/utils.c @@ -113,7 +113,8 @@ int avcodec_decode_video(AVCodecContext *avctx, AVPicture *picture, ret = avctx->codec->decode(avctx, picture, got_picture_ptr, buf, buf_size); - avctx->frame_number++; + if (*got_picture_ptr) + avctx->frame_number++; return ret; } @@ -367,21 +368,57 @@ int avpicture_get_size(int pix_fmt, int width, int height) /* must be called before any other functions */ void avcodec_init(void) { + static int inited = 0; + + if (inited != 0) + return; + inited = 1; + dsputil_init(); } /* simple call to use all the codecs */ void avcodec_register_all(void) { + static int inited = 0; + + if (inited != 0) + return; + inited = 1; + + /* encoders */ +#ifdef CONFIG_ENCODERS + register_avcodec(&ac3_encoder); + register_avcodec(&mp2_encoder); +#ifdef CONFIG_MP3LAME + register_avcodec(&mp3lame_encoder); +#endif + register_avcodec(&mpeg1video_encoder); + register_avcodec(&h263_encoder); + register_avcodec(&h263p_encoder); + register_avcodec(&rv10_encoder); + register_avcodec(&mjpeg_encoder); + register_avcodec(&mpeg4_encoder); + register_avcodec(&msmpeg4v1_encoder); + register_avcodec(&msmpeg4v2_encoder); + register_avcodec(&msmpeg4v3_encoder); +#endif /* CONFIG_ENCODERS */ + register_avcodec(&rawvideo_codec); + /* decoders */ #ifdef CONFIG_DECODERS register_avcodec(&h263_decoder); register_avcodec(&mpeg4_decoder); - register_avcodec(&msmpeg4_decoder); + register_avcodec(&msmpeg4v1_decoder); + register_avcodec(&msmpeg4v2_decoder); + register_avcodec(&msmpeg4v3_decoder); register_avcodec(&mpeg_decoder); register_avcodec(&h263i_decoder); register_avcodec(&rv10_decoder); register_avcodec(&mjpeg_decoder); +#ifdef CONFIG_AC3 + register_avcodec(&ac3_decoder); +#endif #endif /* CONFIG_DECODERS */ } |