summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGuenter Bartsch <guenter@users.sourceforge.net>2002-04-06 20:51:22 +0000
committerGuenter Bartsch <guenter@users.sourceforge.net>2002-04-06 20:51:22 +0000
commit55e772ec62ef638f8a0b44e379da663f78245355 (patch)
tree3b90a73ab2e800ed32f68e24f125164de7a655b3 /src
parent0176e107fd9b6672d87f75a9eb5d83e163e0179f (diff)
downloadxine-lib-55e772ec62ef638f8a0b44e379da663f78245355.tar.gz
xine-lib-55e772ec62ef638f8a0b44e379da663f78245355.tar.bz2
update to latest ffmpeg cvs, this should add mpeg-4 bframe support :-) (BTW: ffmpeg-guys: impressive work\!\!)
CVS patchset: 1690 CVS date: 2002/04/06 20:51:22
Diffstat (limited to 'src')
-rw-r--r--src/libffmpeg/libavcodec/avcodec.h41
-rw-r--r--src/libffmpeg/libavcodec/common.c9
-rw-r--r--src/libffmpeg/libavcodec/common.h96
-rw-r--r--src/libffmpeg/libavcodec/dsputil.c214
-rw-r--r--src/libffmpeg/libavcodec/dsputil.h19
-rw-r--r--src/libffmpeg/libavcodec/h263.c1056
-rw-r--r--src/libffmpeg/libavcodec/h263data.h4
-rw-r--r--src/libffmpeg/libavcodec/h263dec.c138
-rw-r--r--src/libffmpeg/libavcodec/i386/dsputil_mmx.c331
-rw-r--r--src/libffmpeg/libavcodec/i386/motion_est_mmx.c514
-rw-r--r--src/libffmpeg/libavcodec/motion_est.c703
-rw-r--r--src/libffmpeg/libavcodec/mpeg12.c90
-rw-r--r--src/libffmpeg/libavcodec/mpeg4data.h27
-rw-r--r--src/libffmpeg/libavcodec/mpegvideo.c860
-rw-r--r--src/libffmpeg/libavcodec/mpegvideo.h103
-rw-r--r--src/libffmpeg/libavcodec/msmpeg4.c531
-rw-r--r--src/libffmpeg/libavcodec/msmpeg4data.h16
-rw-r--r--src/libffmpeg/libavcodec/utils.c41
18 files changed, 3807 insertions, 986 deletions
diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h
index 3c27d99ea..05b27d8c2 100644
--- a/src/libffmpeg/libavcodec/avcodec.h
+++ b/src/libffmpeg/libavcodec/avcodec.h
@@ -14,11 +14,14 @@ enum CodecID {
CODEC_ID_MJPEG,
CODEC_ID_MPEG4,
CODEC_ID_RAWVIDEO,
- CODEC_ID_MSMPEG4,
+ CODEC_ID_MSMPEG4V1,
+ CODEC_ID_MSMPEG4V2,
+ CODEC_ID_MSMPEG4V3,
CODEC_ID_H263P,
CODEC_ID_H263I,
};
+#define CODEC_ID_MSMPEG4 CODEC_ID_MSMPEG4V3
enum CodecType {
CODEC_TYPE_VIDEO,
@@ -48,11 +51,19 @@ extern int motion_estimation_method;
#define ME_FULL 1
#define ME_LOG 2
#define ME_PHODS 3
+#define ME_EPZS 4
+#define ME_X1 5
/* encoding support */
+/* note not everything is supported yet */
#define CODEC_FLAG_HQ 0x0001 /* high quality (non real time) encoding */
#define CODEC_FLAG_QSCALE 0x0002 /* use fixed qscale */
+#define CODEC_FLAG_4MV 0x0004 /* 4 MV per MB allowed */
+#define CODEC_FLAG_B 0x0008 /* use B frames */
+#define CODEC_FLAG_QPEL 0x0010 /* use qpel MC */
+#define CODEC_FLAG_GMC 0x0020 /* use GMC */
+#define CODEC_FLAG_TYPE 0x0040 /* fixed I/P frame type, from avctx->key_frame */
/* codec capabilities */
@@ -63,12 +74,19 @@ extern int motion_estimation_method;
typedef struct AVCodecContext {
int bit_rate;
+ int bit_rate_tolerance; /* amount of +- bits (>0)*/
int flags;
int sub_id; /* some codecs needs additionnal format info. It is
stored there */
/* video only */
int frame_rate; /* frames per sec multiplied by FRAME_RATE_BASE */
int width, height;
+ int aspect_ratio_info;
+#define FF_ASPECT_SQUARE 1
+#define FF_ASPECT_4_3_625 2
+#define FF_ASPECT_4_3_525 3
+#define FF_ASPECT_16_9_625 4
+#define FF_ASPECT_16_9_525 5
int gop_size; /* 0 = intra only */
int pix_fmt; /* pixel format, see PIX_FMT_xxx */
@@ -92,6 +110,12 @@ typedef struct AVCodecContext {
a key frame (intra, or seekable) */
int quality; /* quality of the previous encoded frame
(between 1 (good) and 31 (bad)) */
+ float qcompress; /* amount of qscale change between easy & hard scenes (0.0-1.0)*/
+ float qblur; /* amount of qscale smoothing over time (0.0-1.0) */
+ int qmin; /* min qscale */
+ int qmax; /* max qscale */
+ int max_qdiff; /* max qscale difference between frames */
+
struct AVCodec *codec;
void *priv_data;
@@ -122,6 +146,17 @@ typedef struct AVCodecContext {
float psnr_cb;
float psnr_cr;
+ /* statistics, used for 2-pass encoding */
+ int mv_bits;
+ int header_bits;
+ int i_tex_bits;
+ int p_tex_bits;
+ int i_count;
+ int p_count;
+ int skip_count;
+ int misc_bits; // cbp, mb_type
+ int frame_bits;
+
/* the following fields are ignored */
void *opaque; /* can be used to carry app specific stuff */
char codec_name[32];
@@ -152,7 +187,9 @@ typedef struct AVPicture {
extern AVCodec h263_decoder;
extern AVCodec mpeg4_decoder;
-extern AVCodec msmpeg4_decoder;
+extern AVCodec msmpeg4v1_decoder;
+extern AVCodec msmpeg4v2_decoder;
+extern AVCodec msmpeg4v3_decoder;
extern AVCodec mpeg_decoder;
extern AVCodec h263i_decoder;
extern AVCodec rv10_decoder;
diff --git a/src/libffmpeg/libavcodec/common.c b/src/libffmpeg/libavcodec/common.c
index 77f94689e..96d8a303a 100644
--- a/src/libffmpeg/libavcodec/common.c
+++ b/src/libffmpeg/libavcodec/common.c
@@ -128,6 +128,7 @@ void init_get_bits(GetBitContext *s,
s->bit_cnt += 8;
}
#endif
+ s->size= buffer_size;
}
#ifndef ALT_BITSTREAM_READER
@@ -201,6 +202,14 @@ void align_get_bits(GetBitContext *s)
#endif
}
+int check_marker(GetBitContext *s, char *msg)
+{
+ int bit= get_bits1(s);
+ if(!bit) printf("Marker bit missing %s\n", msg);
+
+ return bit;
+}
+
#ifndef ALT_BITSTREAM_READER
/* This function is identical to get_bits_long(), the */
/* only diference is that it doesn't touch the buffer */
diff --git a/src/libffmpeg/libavcodec/common.h b/src/libffmpeg/libavcodec/common.h
index fd4bba129..707dbbc8f 100644
--- a/src/libffmpeg/libavcodec/common.h
+++ b/src/libffmpeg/libavcodec/common.h
@@ -13,6 +13,7 @@
//#define ALT_BITSTREAM_READER
//#define ALIGNED_BITSTREAM
#define FAST_GET_FIRST_VLC
+//#define DUMP_STREAM // only works with the ALT_BITSTREAM_READER
#ifdef HAVE_AV_CONFIG_H
/* only include the following when compiling package */
@@ -197,8 +198,11 @@ typedef struct GetBitContext {
int bit_cnt;
UINT8 *buf, *buf_ptr, *buf_end;
#endif
+ int size;
} GetBitContext;
+static inline int get_bits_count(GetBitContext *s);
+
typedef struct VLC {
int bits;
INT16 *table_codes;
@@ -466,6 +470,13 @@ static inline unsigned int get_bits(GetBitContext *s, int n){
result>>= 32 - n;
index+= n;
s->index= index;
+#ifdef DUMP_STREAM
+ while(n){
+ printf("%d", (result>>(n-1))&1);
+ n--;
+ }
+ printf(" ");
+#endif
return result;
#endif //!ALIGNED_BITSTREAM
@@ -492,6 +503,9 @@ static inline unsigned int get_bits1(GetBitContext *s){
result>>= 8 - 1;
index++;
s->index= index;
+#ifdef DUMP_STREAM
+ printf("%d ", result);
+#endif
return result;
#else
@@ -550,9 +564,54 @@ static inline unsigned int show_bits(GetBitContext *s, int n)
#endif //!ALT_BITSTREAM_READER
}
+static inline int show_aligned_bits(GetBitContext *s, int offset, int n)
+{
+#ifdef ALT_BITSTREAM_READER
+#ifdef ALIGNED_BITSTREAM
+ int index= (s->index + offset + 7)&(~7);
+ uint32_t result1= be2me_32( ((uint32_t *)s->buffer)[index>>5] );
+ uint32_t result2= be2me_32( ((uint32_t *)s->buffer)[(index>>5) + 1] );
+#ifdef ARCH_X86
+ asm ("shldl %%cl, %2, %0\n\t"
+ : "=r" (result1)
+ : "0" (result1), "r" (result2), "c" (index));
+#else
+ result1<<= (index&0x1F);
+ result2= (result2>>1) >> (31-(index&0x1F));
+ result1|= result2;
+#endif
+ result1>>= 32 - n;
+
+ return result1;
+#else //ALIGNED_BITSTREAM
+ int index= (s->index + offset + 7)>>3;
+ uint32_t result= be2me_32( unaligned32( ((uint8_t *)s->buffer)+index ) );
+
+ result>>= 32 - n;
+
+ return result;
+#endif //!ALIGNED_BITSTREAM
+#else //ALT_BITSTREAM_READER
+ int index= (get_bits_count(s) + offset + 7)>>3;
+ uint32_t result= be2me_32( unaligned32( ((uint8_t *)s->buf)+index ) );
+
+ result>>= 32 - n;
+//printf(" %X %X %d \n", (int)(((uint8_t *)s->buf)+index ), (int)s->buf_ptr, s->bit_cnt);
+ return result;
+#endif //!ALT_BITSTREAM_READER
+}
+
static inline void skip_bits(GetBitContext *s, int n){
#ifdef ALT_BITSTREAM_READER
s->index+= n;
+#ifdef DUMP_STREAM
+ {
+ int result;
+ s->index-= n;
+ result= get_bits(s, n);
+ }
+#endif
+
#else
if(s->bit_cnt>=n){
/* most common case here */
@@ -570,6 +629,10 @@ static inline void skip_bits(GetBitContext *s, int n){
static inline void skip_bits1(GetBitContext *s){
#ifdef ALT_BITSTREAM_READER
s->index++;
+#ifdef DUMP_STREAM
+ s->index--;
+ printf("%d ", get_bits1(s));
+#endif
#else
if(s->bit_cnt>0){
/* most common case here */
@@ -593,6 +656,7 @@ static inline int get_bits_count(GetBitContext *s)
#endif
}
+int check_marker(GetBitContext *s, char *msg);
void align_get_bits(GetBitContext *s);
int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
const void *bits, int bits_wrap, int bits_size,
@@ -694,6 +758,13 @@ static inline int get_vlc(GetBitContext *s, VLC *vlc)
if (n > 0) {
/* most common case (90%)*/
FLUSH_BITS(n);
+#ifdef DUMP_STREAM
+ {
+ int n= bit_cnt - s->index;
+ skip_bits(s, n);
+ RESTORE_BITS(s);
+ }
+#endif
RESTORE_BITS(s);
return code;
} else if (n == 0) {
@@ -728,6 +799,13 @@ static inline int get_vlc(GetBitContext *s, VLC *vlc)
table_bits = vlc->table_bits + code;
}
}
+#ifdef DUMP_STREAM
+ {
+ int n= bit_cnt - s->index;
+ skip_bits(s, n);
+ RESTORE_BITS(s);
+ }
+#endif
RESTORE_BITS(s);
return code;
}
@@ -786,6 +864,24 @@ static inline int av_log2(unsigned int v)
return n;
}
+/* median of 3 */
+static inline int mid_pred(int a, int b, int c)
+{
+ int vmin, vmax;
+ vmax = vmin = a;
+ if (b < vmin)
+ vmin = b;
+ else
+ vmax = b;
+
+ if (c < vmin)
+ vmin = c;
+ else if (c > vmax)
+ vmax = c;
+
+ return a + b + c - vmin - vmax;
+}
+
/* memory */
void *av_mallocz(int size);
diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c
index 0e698f35c..dcfad05a5 100644
--- a/src/libffmpeg/libavcodec/dsputil.c
+++ b/src/libffmpeg/libavcodec/dsputil.c
@@ -30,12 +30,18 @@ void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
+void (*clear_blocks)(DCTELEM *blocks);
op_pixels_abs_func pix_abs16x16;
op_pixels_abs_func pix_abs16x16_x2;
op_pixels_abs_func pix_abs16x16_y2;
op_pixels_abs_func pix_abs16x16_xy2;
+op_pixels_abs_func pix_abs8x8;
+op_pixels_abs_func pix_abs8x8_x2;
+op_pixels_abs_func pix_abs8x8_y2;
+op_pixels_abs_func pix_abs8x8_xy2;
+
UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
UINT32 squareTbl[512];
@@ -377,14 +383,14 @@ static void qpel_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride,
int i;
for(i=0; i<h; i++)
{
- dst[0]= cm[(((src[0]+src[1])*160 - (src[0]+src[2])*48 + (src[1]+src[3])*24 - (src[2]+src[4])*8 + r)>>8)];
- dst[1]= cm[(((src[1]+src[2])*160 - (src[0]+src[3])*48 + (src[0]+src[4])*24 - (src[1]+src[5])*8 + r)>>8)];
- dst[2]= cm[(((src[2]+src[3])*160 - (src[1]+src[4])*48 + (src[0]+src[5])*24 - (src[0]+src[6])*8 + r)>>8)];
- dst[3]= cm[(((src[3]+src[4])*160 - (src[2]+src[5])*48 + (src[1]+src[6])*24 - (src[0]+src[7])*8 + r)>>8)];
- dst[4]= cm[(((src[4]+src[5])*160 - (src[3]+src[6])*48 + (src[2]+src[7])*24 - (src[1]+src[8])*8 + r)>>8)];
- dst[5]= cm[(((src[5]+src[6])*160 - (src[4]+src[7])*48 + (src[3]+src[8])*24 - (src[2]+src[8])*8 + r)>>8)];
- dst[6]= cm[(((src[6]+src[7])*160 - (src[5]+src[8])*48 + (src[4]+src[8])*24 - (src[3]+src[7])*8 + r)>>8)];
- dst[7]= cm[(((src[7]+src[8])*160 - (src[6]+src[8])*48 + (src[5]+src[7])*24 - (src[4]+src[6])*8 + r)>>8)];
+ dst[0]= cm[(((src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]) + r)>>5)];
+ dst[1]= cm[(((src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]) + r)>>5)];
+ dst[2]= cm[(((src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]) + r)>>5)];
+ dst[3]= cm[(((src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]) + r)>>5)];
+ dst[4]= cm[(((src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]) + r)>>5)];
+ dst[5]= cm[(((src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]) + r)>>5)];
+ dst[6]= cm[(((src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]) + r)>>5)];
+ dst[7]= cm[(((src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]) + r)>>5)];
dst+=dstStride;
src+=srcStride;
}
@@ -405,14 +411,14 @@ static void qpel_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride,
const int src6= src[6*srcStride];
const int src7= src[7*srcStride];
const int src8= src[8*srcStride];
- dst[0*dstStride]= cm[(((src0+src1)*160 - (src0+src2)*48 + (src1+src3)*24 - (src2+src4)*8 + r)>>8)];
- dst[1*dstStride]= cm[(((src1+src2)*160 - (src0+src3)*48 + (src0+src4)*24 - (src1+src5)*8 + r)>>8)];
- dst[2*dstStride]= cm[(((src2+src3)*160 - (src1+src4)*48 + (src0+src5)*24 - (src0+src6)*8 + r)>>8)];
- dst[3*dstStride]= cm[(((src3+src4)*160 - (src2+src5)*48 + (src1+src6)*24 - (src0+src7)*8 + r)>>8)];
- dst[4*dstStride]= cm[(((src4+src5)*160 - (src3+src6)*48 + (src2+src7)*24 - (src1+src8)*8 + r)>>8)];
- dst[5*dstStride]= cm[(((src5+src6)*160 - (src4+src7)*48 + (src3+src8)*24 - (src2+src8)*8 + r)>>8)];
- dst[6*dstStride]= cm[(((src6+src7)*160 - (src5+src8)*48 + (src4+src8)*24 - (src3+src7)*8 + r)>>8)];
- dst[7*dstStride]= cm[(((src7+src8)*160 - (src6+src8)*48 + (src5+src7)*24 - (src4+src6)*8 + r)>>8)];
+ dst[0*dstStride]= cm[(((src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4) + r)>>5)];
+ dst[1*dstStride]= cm[(((src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5) + r)>>5)];
+ dst[2*dstStride]= cm[(((src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6) + r)>>5)];
+ dst[3*dstStride]= cm[(((src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7) + r)>>5)];
+ dst[4*dstStride]= cm[(((src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8) + r)>>5)];
+ dst[5*dstStride]= cm[(((src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8) + r)>>5)];
+ dst[6*dstStride]= cm[(((src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7) + r)>>5)];
+ dst[7*dstStride]= cm[(((src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6) + r)>>5)];
dst++;
src++;
}
@@ -485,38 +491,38 @@ static void qpel_mc00_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS
static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
{\
UINT8 half[64];\
- qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\
+ qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
}\
\
static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
{\
- qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\
+ qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
}\
\
static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
{\
UINT8 half[64];\
- qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\
+ qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\
}\
\
static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
{\
UINT8 half[64];\
- qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\
+ qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
}\
\
static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
{\
- qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\
+ qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
}\
\
static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
{\
UINT8 half[64];\
- qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\
+ qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\
}\
static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
@@ -524,9 +530,9 @@ static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
- qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
- qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\
- qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+ qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
+ qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
+ qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
}\
static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
@@ -534,9 +540,9 @@ static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
- qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
- qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\
- qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+ qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
+ qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
+ qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
}\
static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
@@ -544,9 +550,9 @@ static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
- qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
- qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\
- qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+ qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
+ qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
+ qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
}\
static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
@@ -554,25 +560,25 @@ static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
- qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
- qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\
- qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+ qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
+ qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
+ qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
}\
static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
{\
UINT8 halfH[72];\
UINT8 halfHV[64];\
- qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
- qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+ qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
+ qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\
}\
static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
{\
UINT8 halfH[72];\
UINT8 halfHV[64];\
- qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
- qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+ qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
+ qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\
}\
static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
@@ -580,9 +586,9 @@ static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
- qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
- qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\
- qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+ qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
+ qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
+ qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
}\
static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
@@ -590,16 +596,16 @@ static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
- qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
- qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\
- qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+ qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
+ qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
+ qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
}\
static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
{\
UINT8 halfH[72];\
- qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
- qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 128-r);\
+ qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
+ qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 16-r);\
}\
qpel_mc_func qpel_mc ## name ## _tab[16]={ \
qpel_mc00_c ## name, \
@@ -623,12 +629,12 @@ qpel_mc_func qpel_mc ## name ## _tab[16]={ \
QPEL_MC(0, _rnd)
QPEL_MC(1, _no_rnd)
-int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
+int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{
int s, i;
s = 0;
- for(i=0;i<h;i++) {
+ for(i=0;i<16;i++) {
s += abs(pix1[0] - pix2[0]);
s += abs(pix1[1] - pix2[1]);
s += abs(pix1[2] - pix2[2]);
@@ -651,12 +657,12 @@ int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
return s;
}
-int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
+int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{
int s, i;
s = 0;
- for(i=0;i<h;i++) {
+ for(i=0;i<16;i++) {
s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
@@ -679,13 +685,13 @@ int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
return s;
}
-int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
+int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{
int s, i;
UINT8 *pix3 = pix2 + line_size;
s = 0;
- for(i=0;i<h;i++) {
+ for(i=0;i<16;i++) {
s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
@@ -709,13 +715,13 @@ int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
return s;
}
-int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
+int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{
int s, i;
UINT8 *pix3 = pix2 + line_size;
s = 0;
- for(i=0;i<h;i++) {
+ for(i=0;i<16;i++) {
s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
@@ -739,6 +745,90 @@ int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
return s;
}
+int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
+{
+ int s, i;
+
+ s = 0;
+ for(i=0;i<8;i++) {
+ s += abs(pix1[0] - pix2[0]);
+ s += abs(pix1[1] - pix2[1]);
+ s += abs(pix1[2] - pix2[2]);
+ s += abs(pix1[3] - pix2[3]);
+ s += abs(pix1[4] - pix2[4]);
+ s += abs(pix1[5] - pix2[5]);
+ s += abs(pix1[6] - pix2[6]);
+ s += abs(pix1[7] - pix2[7]);
+ pix1 += line_size;
+ pix2 += line_size;
+ }
+ return s;
+}
+
+int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
+{
+ int s, i;
+
+ s = 0;
+ for(i=0;i<8;i++) {
+ s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
+ s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
+ s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
+ s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
+ s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
+ s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
+ s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
+ s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
+ pix1 += line_size;
+ pix2 += line_size;
+ }
+ return s;
+}
+
+int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
+{
+ int s, i;
+ UINT8 *pix3 = pix2 + line_size;
+
+ s = 0;
+ for(i=0;i<8;i++) {
+ s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
+ s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
+ s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
+ s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
+ s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
+ s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
+ s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
+ s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
+ pix1 += line_size;
+ pix2 += line_size;
+ pix3 += line_size;
+ }
+ return s;
+}
+
+int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
+{
+ int s, i;
+ UINT8 *pix3 = pix2 + line_size;
+
+ s = 0;
+ for(i=0;i<8;i++) {
+ s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
+ s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
+ s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
+ s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
+ s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
+ s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
+ s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
+ s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
+ pix1 += line_size;
+ pix2 += line_size;
+ pix3 += line_size;
+ }
+ return s;
+}
+
/* permute block according so that it corresponds to the MMX idct
order */
#ifdef SIMPLE_IDCT
@@ -777,6 +867,11 @@ void block_permute(INT16 *block)
}
#endif
+void clear_blocks_c(DCTELEM *blocks)
+{
+ memset(blocks, 0, sizeof(DCTELEM)*6*64);
+}
+
void dsputil_init(void)
{
int i, j;
@@ -801,11 +896,16 @@ void dsputil_init(void)
put_pixels_clamped = put_pixels_clamped_c;
add_pixels_clamped = add_pixels_clamped_c;
gmc1= gmc1_c;
+ clear_blocks= clear_blocks_c;
- pix_abs16x16 = pix_abs16x16_c;
- pix_abs16x16_x2 = pix_abs16x16_x2_c;
- pix_abs16x16_y2 = pix_abs16x16_y2_c;
+ pix_abs16x16 = pix_abs16x16_c;
+ pix_abs16x16_x2 = pix_abs16x16_x2_c;
+ pix_abs16x16_y2 = pix_abs16x16_y2_c;
pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
+ pix_abs8x8 = pix_abs8x8_c;
+ pix_abs8x8_x2 = pix_abs8x8_x2_c;
+ pix_abs8x8_y2 = pix_abs8x8_y2_c;
+ pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
av_fdct = jpeg_fdct_ifast;
use_permuted_idct = 1;
diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h
index d0a6e68ba..dc63f06f1 100644
--- a/src/libffmpeg/libavcodec/dsputil.h
+++ b/src/libffmpeg/libavcodec/dsputil.h
@@ -41,11 +41,13 @@ extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
+extern void (*clear_blocks)(DCTELEM *blocks);
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
+void clear_blocks_c(DCTELEM *blocks);
/* add and put pixel (decoding) */
typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h);
@@ -67,17 +69,21 @@ extern void (*sub_pixels_tab[4])(DCTELEM *block, const UINT8 *pixels, int line_s
/* motion estimation */
-typedef int (*op_pixels_abs_func)(UINT8 *blk1, UINT8 *blk2, int line_size, int h);
+typedef int (*op_pixels_abs_func)(UINT8 *blk1, UINT8 *blk2, int line_size);
extern op_pixels_abs_func pix_abs16x16;
extern op_pixels_abs_func pix_abs16x16_x2;
extern op_pixels_abs_func pix_abs16x16_y2;
extern op_pixels_abs_func pix_abs16x16_xy2;
+extern op_pixels_abs_func pix_abs8x8;
+extern op_pixels_abs_func pix_abs8x8_x2;
+extern op_pixels_abs_func pix_abs8x8_y2;
+extern op_pixels_abs_func pix_abs8x8_xy2;
-int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx, int h);
-int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h);
-int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h);
-int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h);
+int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx);
+int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
+int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
+int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
static inline int block_permute_op(int j)
{
@@ -102,7 +108,8 @@ void block_permute(INT16 *block);
extern int mm_flags;
-int mm_support(void);
+/* int mm_support(void); */
+#define mm_support() xine_mm_accel()
#if 0
static inline void emms(void)
diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c
index 79b74631d..52127aaad 100644
--- a/src/libffmpeg/libavcodec/h263.c
+++ b/src/libffmpeg/libavcodec/h263.c
@@ -17,6 +17,8 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * ac prediction encoding by Michael Niedermayer <michaelni@gmx.at>
*/
#include "common.h"
#include "dsputil.h"
@@ -28,19 +30,35 @@
//rounded divison & shift
#define RDIV(a,b) ((a) > 0 ? ((a)+((b)>>1))/(b) : ((a)-((b)>>1))/(b))
#define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
+#define ABS(a) (((a)>=0)?(a):(-(a)))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
static void h263_encode_block(MpegEncContext * s, DCTELEM * block,
int n);
static void h263_encode_motion(MpegEncContext * s, int val);
static void h263p_encode_umotion(MpegEncContext * s, int val);
static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block,
- int n);
-static int h263_decode_motion(MpegEncContext * s, int pred);
+ int n, int dc, UINT8 *scan_table);
+static int h263_decode_motion(MpegEncContext * s, int pred, int fcode);
static int h263p_decode_umotion(MpegEncContext * s, int pred);
static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
int n, int coded);
static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
int n, int coded);
+static inline int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr);
+static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
+ int dir);
+static void mpeg4_decode_sprite_trajectory(MpegEncContext * s);
+
+extern UINT32 inverse[256];
+
+static UINT16 mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
+static UINT8 fcode_tab[MAX_MV*2+1];
+static UINT8 umv_fcode_tab[MAX_MV*2+1];
+
+static UINT16 uni_DCtab_lum [512][2];
+static UINT16 uni_DCtab_chrom[512][2];
int h263_get_picture_format(int width, int height)
{
@@ -195,7 +213,213 @@ int h263_encode_gob_header(MpegEncContext * s, int mb_line)
}
return 0;
}
+
+static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int dir[6])
+{
+ int score0=0, score1=0;
+ int i, n;
+
+ for(n=0; n<6; n++){
+ INT16 *ac_val, *ac_val1;
+
+ ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
+ ac_val1= ac_val;
+ if(dir[n]){
+ ac_val-= s->block_wrap[n]*16;
+ for(i=1; i<8; i++){
+ const int level= block[n][block_permute_op(i )];
+ score0+= ABS(level);
+ score1+= ABS(level - ac_val[i+8]);
+ ac_val1[i ]= block[n][block_permute_op(i<<3)];
+ ac_val1[i+8]= level;
+ }
+ }else{
+ ac_val-= 16;
+ for(i=1; i<8; i++){
+ const int level= block[n][block_permute_op(i<<3)];
+ score0+= ABS(level);
+ score1+= ABS(level - ac_val[i]);
+ ac_val1[i ]= level;
+ ac_val1[i+8]= block[n][block_permute_op(i )];
+ }
+ }
+ }
+
+ return score0 > score1 ? 1 : 0;
+}
+
+void mpeg4_encode_mb(MpegEncContext * s,
+ DCTELEM block[6][64],
+ int motion_x, int motion_y)
+{
+ int cbpc, cbpy, i, cbp, pred_x, pred_y;
+ int bits;
+ // printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
+ if (!s->mb_intra) {
+ /* compute cbp */
+ cbp = 0;
+ for (i = 0; i < 6; i++) {
+ if (s->block_last_index[i] >= 0)
+ cbp |= 1 << (5 - i);
+ }
+ if ((cbp | motion_x | motion_y) == 0 && s->mv_type==MV_TYPE_16X16) {
+ /* skip macroblock */
+ put_bits(&s->pb, 1, 1);
+ s->misc_bits++;
+ s->last_bits++;
+ s->skip_count++;
+ return;
+ }
+ put_bits(&s->pb, 1, 0); /* mb coded */
+ if(s->mv_type==MV_TYPE_16X16){
+ cbpc = cbp & 3;
+ put_bits(&s->pb,
+ inter_MCBPC_bits[cbpc],
+ inter_MCBPC_code[cbpc]);
+ cbpy = cbp >> 2;
+ cbpy ^= 0xf;
+ put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+
+ bits= get_bit_count(&s->pb);
+ s->misc_bits+= bits - s->last_bits;
+ s->last_bits=bits;
+
+ /* motion vectors: 16x16 mode */
+ h263_pred_motion(s, 0, &pred_x, &pred_y);
+
+ h263_encode_motion(s, motion_x - pred_x);
+ h263_encode_motion(s, motion_y - pred_y);
+ }else{
+ cbpc = (cbp & 3)+16;
+ put_bits(&s->pb,
+ inter_MCBPC_bits[cbpc],
+ inter_MCBPC_code[cbpc]);
+ cbpy = cbp >> 2;
+ cbpy ^= 0xf;
+ put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+
+ bits= get_bit_count(&s->pb);
+ s->misc_bits+= bits - s->last_bits;
+ s->last_bits=bits;
+
+ for(i=0; i<4; i++){
+ /* motion vectors: 8x8 mode*/
+ h263_pred_motion(s, i, &pred_x, &pred_y);
+
+ h263_encode_motion(s, s->motion_val[ s->block_index[i] ][0] - pred_x);
+ h263_encode_motion(s, s->motion_val[ s->block_index[i] ][1] - pred_y);
+ }
+ }
+ bits= get_bit_count(&s->pb);
+ s->mv_bits+= bits - s->last_bits;
+ s->last_bits=bits;
+
+ /* encode each block */
+ for (i = 0; i < 6; i++) {
+ mpeg4_encode_block(s, block[i], i, 0, zigzag_direct);
+ }
+ bits= get_bit_count(&s->pb);
+ s->p_tex_bits+= bits - s->last_bits;
+ s->last_bits=bits;
+ s->p_count++;
+ } else {
+ int dc_diff[6]; //dc values with the dc prediction subtracted
+ int dir[6]; //prediction direction
+ int zigzag_last_index[6];
+ UINT8 *scan_table[6];
+
+ for(i=0; i<6; i++){
+ const int level= block[i][0];
+ UINT16 *dc_ptr;
+
+ dc_diff[i]= level - mpeg4_pred_dc(s, i, &dc_ptr, &dir[i]);
+ if (i < 4) {
+ *dc_ptr = level * s->y_dc_scale;
+ } else {
+ *dc_ptr = level * s->c_dc_scale;
+ }
+ }
+
+ s->ac_pred= decide_ac_pred(s, block, dir);
+
+ if(s->ac_pred){
+ for(i=0; i<6; i++){
+ UINT8 *st;
+ int last_index;
+
+ mpeg4_inv_pred_ac(s, block[i], i, dir[i]);
+ if (dir[i]==0) st = ff_alternate_vertical_scan; /* left */
+ else st = ff_alternate_horizontal_scan; /* top */
+
+ for(last_index=63; last_index>=0; last_index--) //FIXME optimize
+ if(block[i][st[last_index]]) break;
+ zigzag_last_index[i]= s->block_last_index[i];
+ s->block_last_index[i]= last_index;
+ scan_table[i]= st;
+ }
+ }else{
+ for(i=0; i<6; i++)
+ scan_table[i]= zigzag_direct;
+ }
+
+ /* compute cbp */
+ cbp = 0;
+ for (i = 0; i < 6; i++) {
+ if (s->block_last_index[i] >= 1)
+ cbp |= 1 << (5 - i);
+ }
+
+ cbpc = cbp & 3;
+ if (s->pict_type == I_TYPE) {
+ put_bits(&s->pb,
+ intra_MCBPC_bits[cbpc],
+ intra_MCBPC_code[cbpc]);
+ } else {
+ put_bits(&s->pb, 1, 0); /* mb coded */
+ put_bits(&s->pb,
+ inter_MCBPC_bits[cbpc + 4],
+ inter_MCBPC_code[cbpc + 4]);
+ }
+ put_bits(&s->pb, 1, s->ac_pred);
+ cbpy = cbp >> 2;
+ put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+
+ bits= get_bit_count(&s->pb);
+ s->misc_bits+= bits - s->last_bits;
+ s->last_bits=bits;
+
+ /* encode each block */
+ for (i = 0; i < 6; i++) {
+ mpeg4_encode_block(s, block[i], i, dc_diff[i], scan_table[i]);
+ }
+
+ bits= get_bit_count(&s->pb);
+ s->i_tex_bits+= bits - s->last_bits;
+ s->last_bits=bits;
+ s->i_count++;
+
+ /* restore ac coeffs & last_index stuff if we messed them up with the prediction */
+ if(s->ac_pred){
+ for(i=0; i<6; i++){
+ int j;
+ INT16 *ac_val;
+
+ ac_val = s->ac_val[0][0] + s->block_index[i] * 16;
+
+ if(dir[i]){
+ for(j=1; j<8; j++)
+ block[i][block_permute_op(j )]= ac_val[j+8];
+ }else{
+ for(j=1; j<8; j++)
+ block[i][block_permute_op(j<<3)]= ac_val[j ];
+ }
+ s->block_last_index[i]= zigzag_last_index[i];
+ }
+ }
+ }
+}
+
void h263_encode_mb(MpegEncContext * s,
DCTELEM block[6][64],
int motion_x, int motion_y)
@@ -266,18 +490,11 @@ void h263_encode_mb(MpegEncContext * s,
}
/* encode each block */
- if (s->h263_pred) {
- for (i = 0; i < 6; i++) {
- mpeg4_encode_block(s, block[i], i);
- }
- } else {
- for (i = 0; i < 6; i++) {
- h263_encode_block(s, block[i], i);
- }
+ for (i = 0; i < 6; i++) {
+ h263_encode_block(s, block[i], i);
}
}
-
void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
{
int x, y, wrap, a, c, pred_dc, scale, i;
@@ -359,69 +576,33 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
ac_val1[8 + i] = block[block_permute_op(i)];
}
-
-static inline int mid_pred(int a, int b, int c)
-{
- int vmin, vmax;
- vmax = vmin = a;
- if (b < vmin)
- vmin = b;
- else
- vmax = b;
-
- if (c < vmin)
- vmin = c;
- else if (c > vmax)
- vmax = c;
-
- return a + b + c - vmin - vmax;
-}
-
INT16 *h263_pred_motion(MpegEncContext * s, int block,
int *px, int *py)
{
- int xy, y, wrap;
+ int xy, wrap;
INT16 *A, *B, *C, *mot_val;
+ static const int off[4]= {2, 1, 1, -1};
- wrap = 2 * s->mb_width + 2;
- y = xy = 2 * s->mb_y + 1 + ((block >> 1) & 1); // y
- xy *= wrap; // y * wrap
- xy += 2 * s->mb_x + 1 + (block & 1); // x + y * wrap
+ wrap = s->block_wrap[0];
+ xy = s->block_index[block];
mot_val = s->motion_val[xy];
/* special case for first line */
- if (y == 1 || s->first_slice_line || s->first_gob_line) {
+ if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) {
A = s->motion_val[xy - 1];
*px = A[0];
*py = A[1];
} else {
- switch(block) {
- default:
- case 0:
- A = s->motion_val[xy - 1];
- B = s->motion_val[xy - wrap];
- C = s->motion_val[xy + 2 - wrap];
- break;
- case 1:
- case 2:
- A = s->motion_val[xy - 1];
- B = s->motion_val[xy - wrap];
- C = s->motion_val[xy + 1 - wrap];
- break;
- case 3:
- A = s->motion_val[xy - 1];
- B = s->motion_val[xy - 1 - wrap];
- C = s->motion_val[xy - wrap];
- break;
- }
+ A = s->motion_val[xy - 1];
+ B = s->motion_val[xy - wrap];
+ C = s->motion_val[xy + off[block] - wrap];
*px = mid_pred(A[0], B[0], C[0]);
*py = mid_pred(A[1], B[1], C[1]);
}
return mot_val;
}
-
static void h263_encode_motion(MpegEncContext * s, int val)
{
int range, l, m, bit_size, sign, code, bits;
@@ -501,15 +682,119 @@ static void h263p_encode_umotion(MpegEncContext * s, int val)
}
}
-void h263_encode_init_vlc(MpegEncContext *s)
+static void init_mv_penalty_and_fcode(MpegEncContext *s)
+{
+ int f_code;
+ int mv;
+ for(f_code=1; f_code<=MAX_FCODE; f_code++){
+ for(mv=-MAX_MV; mv<=MAX_MV; mv++){
+ int len;
+
+ if(mv==0) len= mvtab[0][1];
+ else{
+ int val, bit_size, range, code;
+
+ bit_size = s->f_code - 1;
+ range = 1 << bit_size;
+
+ val=mv;
+ if (val < 0)
+ val = -val;
+ val--;
+ code = (val >> bit_size) + 1;
+ if(code<33){
+ len= mvtab[code][1] + 1 + bit_size;
+ }else{
+ len= mvtab[32][1] + 2 + bit_size;
+ }
+ }
+
+ mv_penalty[f_code][mv+MAX_MV]= len;
+ }
+ }
+
+ for(f_code=MAX_FCODE; f_code>0; f_code--){
+ for(mv=-(16<<f_code); mv<(16<<f_code); mv++){
+ fcode_tab[mv+MAX_MV]= f_code;
+ }
+ }
+
+ for(mv=0; mv<MAX_MV*2+1; mv++){
+ umv_fcode_tab[mv]= 1;
+ }
+}
+
+static void init_uni_dc_tab()
+{
+ int level, uni_code, uni_len;
+
+ for(level=-255; level<256; level++){
+ int size, v, l;
+ /* find number of bits */
+ size = 0;
+ v = abs(level);
+ while (v) {
+ v >>= 1;
+ size++;
+ }
+
+ if (level < 0)
+ l= (-level) ^ ((1 << size) - 1);
+ else
+ l= level;
+
+ /* luminance */
+ uni_code= DCtab_lum[size][0];
+ uni_len = DCtab_lum[size][1];
+
+ if (size > 0) {
+ uni_code<<=size; uni_code|=l;
+ uni_len+=size;
+ if (size > 8){
+ uni_code<<=1; uni_code|=1;
+ uni_len++;
+ }
+ }
+ uni_DCtab_lum[level+256][0]= uni_code;
+ uni_DCtab_lum[level+256][1]= uni_len;
+
+ /* chrominance */
+ uni_code= DCtab_chrom[size][0];
+ uni_len = DCtab_chrom[size][1];
+
+ if (size > 0) {
+ uni_code<<=size; uni_code|=l;
+ uni_len+=size;
+ if (size > 8){
+ uni_code<<=1; uni_code|=1;
+ uni_len++;
+ }
+ }
+ uni_DCtab_chrom[level+256][0]= uni_code;
+ uni_DCtab_chrom[level+256][1]= uni_len;
+
+ }
+}
+
+void h263_encode_init(MpegEncContext *s)
{
static int done = 0;
if (!done) {
done = 1;
+
+ init_uni_dc_tab();
+
init_rl(&rl_inter);
init_rl(&rl_intra);
+
+ init_mv_penalty_and_fcode(s);
}
+ s->mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p
+
+ // use fcodes >1 only for mpeg4 & h263 & h263p FIXME
+ if(s->h263_plus) s->fcode_tab= umv_fcode_tab;
+ else if(s->h263_pred && !s->h263_msmpeg4) s->fcode_tab= fcode_tab;
}
static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
@@ -571,11 +856,90 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
/***************************************************/
+static void mpeg4_stuffing(PutBitContext * pbc)
+{
+ int length;
+ put_bits(pbc, 1, 0);
+ length= (-get_bit_count(pbc))&7;
+ put_bits(pbc, length, (1<<length)-1);
+}
+
+static void put_string(PutBitContext * pbc, char *s)
+{
+ while(*s){
+ put_bits(pbc, 8, *s);
+ s++;
+ }
+ put_bits(pbc, 8, 0);
+}
+
+static void mpeg4_encode_vol_header(MpegEncContext * s)
+{
+ int vo_ver_id=1; //must be 2 if we want GMC or q-pel
+
+ if(get_bit_count(&s->pb)!=0) mpeg4_stuffing(&s->pb);
+ put_bits(&s->pb, 16, 0);
+ put_bits(&s->pb, 16, 0x100); /* video obj */
+ put_bits(&s->pb, 16, 0);
+ put_bits(&s->pb, 16, 0x120); /* video obj layer */
+
+ put_bits(&s->pb, 1, 0); /* random access vol */
+ put_bits(&s->pb, 8, 1); /* video obj type indication= simple obj */
+ put_bits(&s->pb, 1, 1); /* is obj layer id= yes */
+ put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */
+ put_bits(&s->pb, 3, 1); /* is obj layer priority */
+ if(s->aspect_ratio_info)
+ put_bits(&s->pb, 4, s->aspect_ratio_info);/* aspect ratio info */
+ else
+ put_bits(&s->pb, 4, 1); /* aspect ratio info= sqare pixel */
+ put_bits(&s->pb, 1, 0); /* vol control parameters= no */
+ put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */
+ put_bits(&s->pb, 1, 1); /* marker bit */
+ put_bits(&s->pb, 16, s->time_increment_resolution=30000);
+ s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
+ if (s->time_increment_bits < 1)
+ s->time_increment_bits = 1;
+ put_bits(&s->pb, 1, 1); /* marker bit */
+ put_bits(&s->pb, 1, 0); /* fixed vop rate=no */
+ put_bits(&s->pb, 1, 1); /* marker bit */
+ put_bits(&s->pb, 13, s->width); /* vol width */
+ put_bits(&s->pb, 1, 1); /* marker bit */
+ put_bits(&s->pb, 13, s->height); /* vol height */
+ put_bits(&s->pb, 1, 1); /* marker bit */
+ put_bits(&s->pb, 1, 0); /* interlace */
+ put_bits(&s->pb, 1, 1); /* obmc disable */
+ if (vo_ver_id == 1) {
+ put_bits(&s->pb, 1, s->vol_sprite_usage=0); /* sprite enable */
+ }else{ /* vo_ver_id == 2 */
+ put_bits(&s->pb, 2, s->vol_sprite_usage=0); /* sprite enable */
+ }
+ put_bits(&s->pb, 1, 0); /* not 8 bit */
+ put_bits(&s->pb, 1, 0); /* quant type= h263 style*/
+ if (vo_ver_id != 1)
+ put_bits(&s->pb, 1, s->quarter_sample=0);
+ put_bits(&s->pb, 1, 1); /* complexity estimation disable */
+ put_bits(&s->pb, 1, 1); /* resync marker disable */
+ put_bits(&s->pb, 1, 0); /* data partitioned */
+ if (vo_ver_id != 1){
+ put_bits(&s->pb, 1, 0); /* newpred */
+ put_bits(&s->pb, 1, 0); /* reduced res vop */
+ }
+ put_bits(&s->pb, 1, 0); /* scalability */
+
+ mpeg4_stuffing(&s->pb);
+ put_bits(&s->pb, 16, 0);
+ put_bits(&s->pb, 16, 0x1B2); /* user_data */
+ put_string(&s->pb, "ffmpeg"); //FIXME append some version ...
+
+ s->no_rounding = 0;
+}
+
/* write mpeg4 VOP header */
void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
{
- align_put_bits(&s->pb);
+ if(s->pict_type==I_TYPE) mpeg4_encode_vol_header(s);
+ if(get_bit_count(&s->pb)!=0) mpeg4_stuffing(&s->pb);
put_bits(&s->pb, 16, 0); /* vop header */
put_bits(&s->pb, 16, 0x1B6); /* vop header */
put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */
@@ -584,26 +948,41 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb, 1, 0);
put_bits(&s->pb, 1, 1); /* marker */
- put_bits(&s->pb, 4, 1); /* XXX: correct time increment */
+ put_bits(&s->pb, s->time_increment_bits, 1); /* XXX: correct time increment */
put_bits(&s->pb, 1, 1); /* marker */
put_bits(&s->pb, 1, 1); /* vop coded */
- if (s->pict_type == P_TYPE) {
- s->no_rounding = 0;
+ if ( s->pict_type == P_TYPE
+ || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) {
+ s->no_rounding ^= 1;
put_bits(&s->pb, 1, s->no_rounding); /* rounding type */
}
put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */
+ //FIXME sprite stuff
put_bits(&s->pb, 5, s->qscale);
if (s->pict_type != I_TYPE)
put_bits(&s->pb, 3, s->f_code); /* fcode_for */
+ if (s->pict_type == B_TYPE)
+ put_bits(&s->pb, 3, s->b_code); /* fcode_back */
// printf("****frame %d\n", picture_number);
}
void h263_dc_scale(MpegEncContext * s)
{
+#if 1
+ const static UINT8 y_tab[32]={
+ // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ 0, 8, 8, 8, 8,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,36,38,40,42,44,46
+ };
+ const static UINT8 c_tab[32]={
+ // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ 0, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,20,21,22,23,24,25
+ };
+ s->y_dc_scale = y_tab[s->qscale];
+ s->c_dc_scale = c_tab[s->qscale];
+#else
int quant;
-
quant = s->qscale;
/* luminance */
if (quant < 5)
@@ -621,36 +1000,30 @@ void h263_dc_scale(MpegEncContext * s)
s->c_dc_scale = ((quant + 13) / 2);
else
s->c_dc_scale = (quant - 6);
+#endif
}
-static int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr)
+static inline int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr)
{
- int a, b, c, xy, wrap, pred, scale;
+ int a, b, c, wrap, pred, scale;
UINT16 *dc_val;
+ int dummy;
/* find prediction */
if (n < 4) {
- wrap = s->mb_width * 2 + 2;
- xy = 2 * s->mb_y + 1 + ((n & 2) >> 1);
- xy *= wrap;
- xy += 2 * s->mb_x + 1 + (n & 1);
- dc_val = s->dc_val[0];
scale = s->y_dc_scale;
} else {
- wrap = s->mb_width + 2;
- xy = s->mb_y + 1;
- xy *= wrap;
- xy += s->mb_x + 1;
- dc_val = s->dc_val[n - 4 + 1];
scale = s->c_dc_scale;
}
+ wrap= s->block_wrap[n];
+ dc_val = s->dc_val[0] + s->block_index[n];
/* B C
* A X
*/
- a = dc_val[xy - 1];
- b = dc_val[xy - 1 - wrap];
- c = dc_val[xy - wrap];
+ a = dc_val[ - 1];
+ b = dc_val[ - 1 - wrap];
+ c = dc_val[ - wrap];
if (abs(a - b) < abs(b - c)) {
pred = c;
@@ -660,10 +1033,19 @@ static int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *di
*dir_ptr = 0; /* left */
}
/* we assume pred is positive */
+#ifdef ARCH_X86
+ asm volatile (
+ "xorl %%edx, %%edx \n\t"
+ "mul %%ecx \n\t"
+ : "=d" (pred), "=a"(dummy)
+ : "a" (pred + (scale >> 1)), "c" (inverse[scale])
+ );
+#else
pred = (pred + (scale >> 1)) / scale;
+#endif
/* prepare address for prediction update */
- *dc_val_ptr = &dc_val[xy];
+ *dc_val_ptr = &dc_val[0];
return pred;
}
@@ -671,22 +1053,11 @@ static int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *di
void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
int dir)
{
- int x, y, wrap, i;
+ int i;
INT16 *ac_val, *ac_val1;
/* find prediction */
- if (n < 4) {
- x = 2 * s->mb_x + 1 + (n & 1);
- y = 2 * s->mb_y + 1 + ((n & 2) >> 1);
- wrap = s->mb_width * 2 + 2;
- ac_val = s->ac_val[0][0];
- } else {
- x = s->mb_x + 1;
- y = s->mb_y + 1;
- wrap = s->mb_width + 2;
- ac_val = s->ac_val[n - 4 + 1][0];
- }
- ac_val += ((y) * wrap + (x)) * 16;
+ ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
ac_val1 = ac_val;
if (s->ac_pred) {
if (dir == 0) {
@@ -697,7 +1068,7 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
}
} else {
/* top prediction */
- ac_val -= 16 * wrap;
+ ac_val -= 16 * s->block_wrap[n];
for(i=1;i<8;i++) {
block[block_permute_op(i)] += ac_val[i + 8];
}
@@ -711,20 +1082,43 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
ac_val1[8 + i] = block[block_permute_op(i)];
}
-static inline void mpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr)
+static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
+ int dir)
{
- int size, v, pred;
- UINT16 *dc_val;
+ int i;
+ INT16 *ac_val;
- pred = mpeg4_pred_dc(s, n, &dc_val, dir_ptr);
- if (n < 4) {
- *dc_val = level * s->y_dc_scale;
+ /* find prediction */
+ ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
+
+ if (dir == 0) {
+ /* left prediction */
+ ac_val -= 16;
+ for(i=1;i<8;i++) {
+ block[block_permute_op(i*8)] -= ac_val[i];
+ }
} else {
- *dc_val = level * s->c_dc_scale;
+ /* top prediction */
+ ac_val -= 16 * s->block_wrap[n];
+ for(i=1;i<8;i++) {
+ block[block_permute_op(i)] -= ac_val[i + 8];
+ }
}
+}
- /* do the prediction */
- level -= pred;
+static inline void mpeg4_encode_dc(MpegEncContext * s, int level, int n)
+{
+#if 1
+ level+=256;
+ if (n < 4) {
+ /* luminance */
+ put_bits(&s->pb, uni_DCtab_lum[level][1], uni_DCtab_lum[level][0]);
+ } else {
+ /* chrominance */
+ put_bits(&s->pb, uni_DCtab_chrom[level][1], uni_DCtab_chrom[level][0]);
+ }
+#else
+ int size, v;
/* find number of bits */
size = 0;
v = abs(level);
@@ -749,17 +1143,18 @@ static inline void mpeg4_encode_dc(MpegEncContext * s, int level, int n, int *di
if (size > 8)
put_bits(&s->pb, 1, 1);
}
+#endif
}
-static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n)
+static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, UINT8 *scan_table)
{
int level, run, last, i, j, last_index, last_non_zero, sign, slevel;
- int code, dc_pred_dir;
+ int code;
const RLTable *rl;
if (s->mb_intra) {
/* mpeg4 based DC predictor */
- mpeg4_encode_dc(s, block[0], n, &dc_pred_dir);
+ mpeg4_encode_dc(s, intra_dc, n);
i = 1;
rl = &rl_intra;
} else {
@@ -771,7 +1166,7 @@ static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n)
last_index = s->block_last_index[n];
last_non_zero = i - 1;
for (; i <= last_index; i++) {
- j = zigzag_direct[i];
+ j = scan_table[i];
level = block[j];
if (level) {
run = i - last_non_zero - 1;
@@ -839,6 +1234,7 @@ static VLC cbpy_vlc;
static VLC mv_vlc;
static VLC dc_lum, dc_chrom;
static VLC sprite_trajectory;
+static VLC mb_type_b_vlc;
void init_rl(RLTable *rl)
{
@@ -922,6 +1318,9 @@ void h263_decode_init_vlc(MpegEncContext *s)
init_vlc(&sprite_trajectory, 9, 15,
&sprite_trajectory_tab[0][1], 4, 2,
&sprite_trajectory_tab[0][0], 4, 2);
+ init_vlc(&mb_type_b_vlc, 4, 4,
+ &mb_type_b_tab[0][1], 2, 1,
+ &mb_type_b_tab[0][0], 2, 1);
}
}
@@ -950,13 +1349,163 @@ int h263_decode_gob_header(MpegEncContext *s)
}
+static inline void memsetw(short *tab, int val, int n)
+{
+ int i;
+ for(i=0;i<n;i++)
+ tab[i] = val;
+}
+
+static int mpeg4_resync(MpegEncContext *s)
+{
+ int state, v, bits;
+ int mb_num_bits= av_log2(s->mb_num - 1) + 1;
+ int header_extension=0, mb_num;
+ int c_wrap, c_xy, l_wrap, l_xy;
+//printf("resync at %d %d\n", s->mb_x, s->mb_y);
+//printf("%X\n", show_bits(&s->gb, 24));
+
+ if( get_bits_count(&s->gb) > s->gb.size*8-32)
+ return 0;
+
+ align_get_bits(&s->gb);
+ state = 0xff;
+ for(;;) {
+ v = get_bits(&s->gb, 8);
+//printf("%X ", v);
+ state = ((state << 8) | v) & 0xffff;
+ if (state == 0) break;
+ if( get_bits_count(&s->gb) > s->gb.size*8-32){
+ printf("resync failed\n");
+ return -1;
+ }
+ }
+//printf("%X\n", show_bits(&s->gb, 24));
+ bits=0;
+ while(!get_bits1(&s->gb) && bits<30) bits++;
+ if(s->pict_type == P_TYPE && bits != s->f_code-1)
+ printf("marker does not match f_code\n");
+ //FIXME check bits for B-framess
+//printf("%X\n", show_bits(&s->gb, 24));
+
+ if(s->shape != RECT_SHAPE){
+ header_extension= get_bits1(&s->gb);
+ //FIXME more stuff here
+ }
+
+ mb_num= get_bits(&s->gb, mb_num_bits);
+ if(mb_num != s->mb_x + s->mb_y*s->mb_width){
+ printf("MB-num change not supported %d %d\n", mb_num, s->mb_x + s->mb_y*s->mb_width);
+// s->mb_x= mb_num % s->mb_width;
+// s->mb_y= mb_num / s->mb_width;
+ //FIXME many vars are wrong now
+ }
+
+ if(s->shape != BIN_ONLY_SHAPE){
+ s->qscale= get_bits(&s->gb, 5);
+ h263_dc_scale(s);
+ }
+
+ if(s->shape == RECT_SHAPE){
+ header_extension= get_bits1(&s->gb);
+ }
+ if(header_extension){
+ int time_incr=0;
+ printf("header extension not really supported\n");
+ while (get_bits1(&s->gb) != 0)
+ time_incr++;
+
+ check_marker(&s->gb, "before time_increment in video packed header");
+ s->time_increment= get_bits(&s->gb, s->time_increment_bits);
+ if(s->pict_type!=B_TYPE){
+ s->time_base+= time_incr;
+ s->last_non_b_time[1]= s->last_non_b_time[0];
+ s->last_non_b_time[0]= s->time_base*s->time_increment_resolution + s->time_increment;
+ }else{
+ s->time= (s->last_non_b_time[1]/s->time_increment_resolution + time_incr)*s->time_increment_resolution;
+ s->time+= s->time_increment;
+ }
+ check_marker(&s->gb, "before vop_coding_type in video packed header");
+
+ skip_bits(&s->gb, 2); /* vop coding type */
+ //FIXME not rect stuff here
+
+ if(s->shape != BIN_ONLY_SHAPE){
+ skip_bits(&s->gb, 3); /* intra dc vlc threshold */
+
+ if(s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE && s->num_sprite_warping_points){
+ mpeg4_decode_sprite_trajectory(s);
+ }
+
+ //FIXME reduced res stuff here
+
+ if (s->pict_type != I_TYPE) {
+ s->f_code = get_bits(&s->gb, 3); /* fcode_for */
+ if(s->f_code==0){
+ printf("Error, video packet header damaged or not MPEG4 header (f_code=0)\n");
+ return -1; // makes no sense to continue, as the MV decoding will break very quickly
+ }
+ }
+ if (s->pict_type == B_TYPE) {
+ s->b_code = get_bits(&s->gb, 3);
+ }
+ }
+
+ }
+ //FIXME new-pred stuff
+
+ l_wrap= s->block_wrap[0];
+ l_xy= s->mb_y*l_wrap*2;
+ c_wrap= s->block_wrap[4];
+ c_xy= s->mb_y*c_wrap;
+
+ /* clean DC */
+ memsetw(s->dc_val[0] + l_xy, 1024, l_wrap*3);
+ memsetw(s->dc_val[1] + c_xy, 1024, c_wrap*2);
+ memsetw(s->dc_val[2] + c_xy, 1024, c_wrap*2);
+
+ /* clean AC */
+ memset(s->ac_val[0] + l_xy, 0, l_wrap*3*16*sizeof(INT16));
+ memset(s->ac_val[1] + c_xy, 0, c_wrap*2*16*sizeof(INT16));
+ memset(s->ac_val[2] + c_xy, 0, c_wrap*2*16*sizeof(INT16));
+
+ /* clean MV */
+ memset(s->motion_val + l_xy, 0, l_wrap*3*2*sizeof(INT16));
+// memset(s->motion_val, 0, 2*sizeof(INT16)*(2 + s->mb_width*2)*(2 + s->mb_height*2));
+ s->resync_x_pos= s->mb_x;
+ s->first_slice_line=1;
+
+ return 0;
+}
+
int h263_decode_mb(MpegEncContext *s,
DCTELEM block[6][64])
{
int cbpc, cbpy, i, cbp, pred_x, pred_y, mx, my, dquant;
INT16 *mot_val;
static INT8 quant_tab[4] = { -1, -2, 1, 2 };
-
+
+ if(s->resync_marker){
+ if( s->resync_x_pos == s->mb_x+1
+ || s->resync_x_pos == s->mb_x){
+ /* f*ck mpeg4
+ this is here so we dont need to slowdown h263_pred_motion with it */
+ if(s->resync_x_pos == s->mb_x+1 && s->mb_x==0){
+ int xy= s->block_index[0] - s->block_wrap[0];
+ s->motion_val[xy][0]= s->motion_val[xy+2][0];
+ s->motion_val[xy][1]= s->motion_val[xy+2][1];
+ }
+
+ s->first_slice_line=0;
+ s->resync_x_pos=0; // isnt needed but for cleanness sake ;)
+ }
+
+ if(show_aligned_bits(&s->gb, 1, 16) == 0){
+ if( mpeg4_resync(s) < 0 ) return -1;
+
+ }
+ }
+
if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) {
if (get_bits1(&s->gb)) {
/* skip mb */
@@ -970,8 +1519,13 @@ int h263_decode_mb(MpegEncContext *s,
// int l = (1 << (s->f_code - 1)) * 32;
s->mcsel=1;
- s->mv[0][0][0] = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
- s->mv[0][0][1] = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
+ if(s->divx_version==500 && s->divx_build==413){
+ s->mv[0][0][0] = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample));
+ s->mv[0][0][1] = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample));
+ }else{
+ s->mv[0][0][0] = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
+ s->mv[0][0][1] = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
+ }
/* if (s->mv[0][0][0] < -l) s->mv[0][0][0]= -l;
else if (s->mv[0][0][0] >= l) s->mv[0][0][0]= l-1;
if (s->mv[0][0][1] < -l) s->mv[0][0][1]= -l;
@@ -997,15 +1551,8 @@ int h263_decode_mb(MpegEncContext *s,
dquant = cbpc & 8;
s->mb_intra = ((cbpc & 4) != 0);
- } else {
- cbpc = get_vlc(&s->gb, &intra_MCBPC_vlc);
- if (cbpc < 0)
- return -1;
- dquant = cbpc & 4;
- s->mb_intra = 1;
- }
-
- if (!s->mb_intra) {
+ if (s->mb_intra) goto intra;
+
if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0)
s->mcsel= get_bits1(&s->gb);
else s->mcsel= 0;
@@ -1017,6 +1564,7 @@ int h263_decode_mb(MpegEncContext *s,
s->qscale = 1;
else if (s->qscale > 31)
s->qscale = 31;
+ h263_dc_scale(s);
}
s->mv_dir = MV_DIR_FORWARD;
if ((cbpc & 16) == 0) {
@@ -1026,13 +1574,17 @@ int h263_decode_mb(MpegEncContext *s,
if (s->umvplus_dec)
mx = h263p_decode_umotion(s, pred_x);
else if(!s->mcsel)
- mx = h263_decode_motion(s, pred_x);
+ mx = h263_decode_motion(s, pred_x, s->f_code);
else {
const int a= s->sprite_warping_accuracy;
// int l = (1 << (s->f_code - 1)) * 32;
- mx= RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
-// if (mx < -l) mx= -l;
-// else if (mx >= l) mx= l-1;
+ if(s->divx_version==500 && s->divx_build==413){
+ mx = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample));
+ }else{
+ mx = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
+ }
+// if (mx < -l) mx= -l, printf("C");
+// else if (mx >= l) mx= l-1, printf("C");
}
if (mx >= 0xffff)
return -1;
@@ -1040,13 +1592,17 @@ int h263_decode_mb(MpegEncContext *s,
if (s->umvplus_dec)
my = h263p_decode_umotion(s, pred_y);
else if(!s->mcsel)
- my = h263_decode_motion(s, pred_y);
+ my = h263_decode_motion(s, pred_y, s->f_code);
else{
const int a= s->sprite_warping_accuracy;
// int l = (1 << (s->f_code - 1)) * 32;
- my= RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
-// if (my < -l) my= -l;
-// else if (my >= l) my= l-1;
+ if(s->divx_version==500 && s->divx_build==413){
+ my = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample));
+ }else{
+ my = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
+ }
+// if (my < -l) my= -l, printf("C");
+// else if (my >= l) my= l-1, printf("C");
}
if (my >= 0xffff)
return -1;
@@ -1065,14 +1621,14 @@ int h263_decode_mb(MpegEncContext *s,
if (s->umvplus_dec)
mx = h263p_decode_umotion(s, pred_x);
else
- mx = h263_decode_motion(s, pred_x);
+ mx = h263_decode_motion(s, pred_x, s->f_code);
if (mx >= 0xffff)
return -1;
if (s->umvplus_dec)
my = h263p_decode_umotion(s, pred_y);
else
- my = h263_decode_motion(s, pred_y);
+ my = h263_decode_motion(s, pred_y, s->f_code);
if (my >= 0xffff)
return -1;
s->mv[0][i][0] = mx;
@@ -1083,7 +1639,126 @@ int h263_decode_mb(MpegEncContext *s,
mot_val[1] = my;
}
}
- } else {
+ } else if(s->pict_type==B_TYPE) {
+ int modb1; // first bit of modb
+ int modb2; // second bit of modb
+ int mb_type;
+ int time_pp;
+ int time_pb;
+ int xy;
+
+ s->mb_intra = 0; //B-frames never contain intra blocks
+ s->mcsel=0; // ... true gmc blocks
+
+ if(s->mb_x==0){
+ s->last_mv[0][0][0]=
+ s->last_mv[0][0][1]=
+ s->last_mv[1][0][0]=
+ s->last_mv[1][0][1]= 0;
+ }
+
+ /* if we skipped it in the future P Frame than skip it now too */
+ s->mb_skiped= s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]; // Note, skiptab=0 if last was GMC
+
+ if(s->mb_skiped){
+ /* skip mb */
+ for(i=0;i<6;i++)
+ s->block_last_index[i] = -1;
+
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mv_type = MV_TYPE_16X16;
+ s->mv[0][0][0] = 0;
+ s->mv[0][0][1] = 0;
+ s->mv[1][0][0] = 0;
+ s->mv[1][0][1] = 0;
+//FIXME is this correct?
+/* s->last_mv[0][0][0]=
+ s->last_mv[0][0][1]=0;*/
+ s->mb_skiped = 1;
+ return 0;
+ }
+
+ modb1= get_bits1(&s->gb);
+ if(modb1==0){
+ modb2= get_bits1(&s->gb);
+ mb_type= get_vlc(&s->gb, &mb_type_b_vlc);
+ if(modb2==0) cbp= get_bits(&s->gb, 6);
+ else cbp=0;
+ if (mb_type && cbp) {
+ if(get_bits1(&s->gb)){
+ s->qscale +=get_bits1(&s->gb)*4 - 2;
+ if (s->qscale < 1)
+ s->qscale = 1;
+ else if (s->qscale > 31)
+ s->qscale = 31;
+ h263_dc_scale(s);
+ }
+ }
+ }else{
+ mb_type=4; //like 0 but no vectors coded
+ cbp=0;
+ }
+ s->mv_type = MV_TYPE_16X16; // we'll switch to 8x8 only if the last P frame had 8x8 for this MB and mb_type=0 here
+ mx=my=0; //for case 4, we could put this to the mb_type=4 but than gcc compains about uninitalized mx/my
+ switch(mb_type)
+ {
+ case 0:
+ mx = h263_decode_motion(s, 0, 1);
+ my = h263_decode_motion(s, 0, 1);
+ case 4:
+ s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+ xy= s->block_index[0];
+ time_pp= s->last_non_b_time[0] - s->last_non_b_time[1];
+ time_pb= s->time - s->last_non_b_time[1];
+//if(time_pp>3000 )printf("%d %d ", time_pp, time_pb);
+ //FIXME 4MV
+ //FIXME avoid divides
+ s->mv[0][0][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
+ s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
+ s->mv[1][0][0] = mx ? s->mv[0][0][0] - s->motion_val[xy][0]
+ : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp + mx;
+ s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1]
+ : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp + my;
+/* s->mv[0][0][0] =
+ s->mv[0][0][1] =
+ s->mv[1][0][0] =
+ s->mv[1][0][1] = 1000;*/
+ break;
+ case 1:
+ s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
+ mx = h263_decode_motion(s, s->last_mv[0][0][0], s->f_code);
+ my = h263_decode_motion(s, s->last_mv[0][0][1], s->f_code);
+ s->last_mv[0][0][0]= s->mv[0][0][0] = mx;
+ s->last_mv[0][0][1]= s->mv[0][0][1] = my;
+
+ mx = h263_decode_motion(s, s->last_mv[1][0][0], s->b_code);
+ my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code);
+ s->last_mv[1][0][0]= s->mv[1][0][0] = mx;
+ s->last_mv[1][0][1]= s->mv[1][0][1] = my;
+ break;
+ case 2:
+ s->mv_dir = MV_DIR_BACKWARD;
+ mx = h263_decode_motion(s, s->last_mv[1][0][0], s->b_code);
+ my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code);
+ s->last_mv[1][0][0]= s->mv[1][0][0] = mx;
+ s->last_mv[1][0][1]= s->mv[1][0][1] = my;
+ break;
+ case 3:
+ s->mv_dir = MV_DIR_FORWARD;
+ mx = h263_decode_motion(s, s->last_mv[0][0][0], s->f_code);
+ my = h263_decode_motion(s, s->last_mv[0][0][1], s->f_code);
+ s->last_mv[0][0][0]= s->mv[0][0][0] = mx;
+ s->last_mv[0][0][1]= s->mv[0][0][1] = my;
+ break;
+ default: return -1;
+ }
+ } else { /* I-Frame */
+ cbpc = get_vlc(&s->gb, &intra_MCBPC_vlc);
+ if (cbpc < 0)
+ return -1;
+ dquant = cbpc & 4;
+ s->mb_intra = 1;
+intra:
s->ac_pred = 0;
if (s->h263_pred || s->h263_aic) {
s->ac_pred = get_bits1(&s->gb);
@@ -1102,6 +1777,7 @@ int h263_decode_mb(MpegEncContext *s,
s->qscale = 1;
else if (s->qscale > 31)
s->qscale = 31;
+ h263_dc_scale(s);
}
}
@@ -1120,7 +1796,7 @@ int h263_decode_mb(MpegEncContext *s,
return 0;
}
-static int h263_decode_motion(MpegEncContext * s, int pred)
+static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
{
int code, val, sign, shift, l, m;
@@ -1131,7 +1807,7 @@ static int h263_decode_motion(MpegEncContext * s, int pred)
if (code == 0)
return pred;
sign = get_bits1(&s->gb);
- shift = s->f_code - 1;
+ shift = f_code - 1;
val = (code - 1) << shift;
if (shift > 0)
val |= get_bits(&s->gb, shift);
@@ -1142,7 +1818,7 @@ static int h263_decode_motion(MpegEncContext * s, int pred)
/* modulo decoding */
if (!s->h263_long_vectors) {
- l = (1 << (s->f_code - 1)) * 32;
+ l = (1 << (f_code - 1)) * 32;
m = 2 * l;
if (val < -l) {
val += m;
@@ -1269,7 +1945,7 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
not_coded:
if (s->mb_intra && s->h263_aic) {
h263_pred_acdc(s, block, n);
- i = 64;
+ i = 63;
}
s->block_last_index[n] = i;
return 0;
@@ -1577,21 +2253,21 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s)
h2= 1<<beta;
// Note, the 4th point isnt used for GMC
-/*
- sprite_ref[0][0]= (a>>1)*(2*vop_ref[0][0] + d[0][0]);
- sprite_ref[0][1]= (a>>1)*(2*vop_ref[0][1] + d[0][1]);
- sprite_ref[1][0]= (a>>1)*(2*vop_ref[1][0] + d[0][0] + d[1][0]);
- sprite_ref[1][1]= (a>>1)*(2*vop_ref[1][1] + d[0][1] + d[1][1]);
- sprite_ref[2][0]= (a>>1)*(2*vop_ref[2][0] + d[0][0] + d[2][0]);
- sprite_ref[2][1]= (a>>1)*(2*vop_ref[2][1] + d[0][1] + d[2][1]);
-*/
-//FIXME DIVX5 vs. mpeg4 ?
- sprite_ref[0][0]= a*vop_ref[0][0] + d[0][0];
- sprite_ref[0][1]= a*vop_ref[0][1] + d[0][1];
- sprite_ref[1][0]= a*vop_ref[1][0] + d[0][0] + d[1][0];
- sprite_ref[1][1]= a*vop_ref[1][1] + d[0][1] + d[1][1];
- sprite_ref[2][0]= a*vop_ref[2][0] + d[0][0] + d[2][0];
- sprite_ref[2][1]= a*vop_ref[2][1] + d[0][1] + d[2][1];
+ if(s->divx_version==500 && s->divx_build==413){
+ sprite_ref[0][0]= a*vop_ref[0][0] + d[0][0];
+ sprite_ref[0][1]= a*vop_ref[0][1] + d[0][1];
+ sprite_ref[1][0]= a*vop_ref[1][0] + d[0][0] + d[1][0];
+ sprite_ref[1][1]= a*vop_ref[1][1] + d[0][1] + d[1][1];
+ sprite_ref[2][0]= a*vop_ref[2][0] + d[0][0] + d[2][0];
+ sprite_ref[2][1]= a*vop_ref[2][1] + d[0][1] + d[2][1];
+ } else {
+ sprite_ref[0][0]= (a>>1)*(2*vop_ref[0][0] + d[0][0]);
+ sprite_ref[0][1]= (a>>1)*(2*vop_ref[0][1] + d[0][1]);
+ sprite_ref[1][0]= (a>>1)*(2*vop_ref[1][0] + d[0][0] + d[1][0]);
+ sprite_ref[1][1]= (a>>1)*(2*vop_ref[1][1] + d[0][1] + d[1][1]);
+ sprite_ref[2][0]= (a>>1)*(2*vop_ref[2][0] + d[0][0] + d[2][0]);
+ sprite_ref[2][1]= (a>>1)*(2*vop_ref[2][1] + d[0][1] + d[2][1]);
+ }
/* sprite_ref[3][0]= (a>>1)*(2*vop_ref[3][0] + d[0][0] + d[1][0] + d[2][0] + d[3][0]);
sprite_ref[3][1]= (a>>1)*(2*vop_ref[3][1] + d[0][1] + d[1][1] + d[2][1] + d[3][1]); */
@@ -1715,7 +2391,7 @@ printf("%d %d\n", s->sprite_delta[1][1][1], a<<s->sprite_shift[1][1]);*/
else
s->real_sprite_warping_points= s->num_sprite_warping_points;
-//FIXME convert stuff if accurace != 3
+//printf("%d %d %d %d\n", d[0][0], d[0][1], s->sprite_offset[0][0], s->sprite_offset[0][1]);
}
/* decode mpeg4 VOP header */
@@ -1735,13 +2411,14 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
break;
}
state = ((state << 8) | v) & 0xffffff;
- /* XXX: really detect end of frame */
- if (state == 0)
+ if( get_bits_count(&s->gb) > s->gb.size*8-32){
+ printf("no VOP startcode found\n");
return -1;
+ }
}
//printf("startcode %X %d\n", startcode, get_bits_count(&s->gb));
if (startcode == 0x120) { // Video Object Layer
- int time_increment_resolution, width, height, vo_ver_id;
+ int width, height, vo_ver_id;
/* vol header */
skip_bits(&s->gb, 1); /* random access */
@@ -1758,11 +2435,13 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
skip_bits(&s->gb, 8); //par_width
skip_bits(&s->gb, 8); // par_height
}
+
if(get_bits1(&s->gb)){ /* vol control parameter */
printf("vol control parameter not supported\n");
return -1;
}
s->shape = get_bits(&s->gb, 2); /* vol shape */
+ if(s->shape != RECT_SHAPE) printf("only rectangular vol supported\n");
if(s->shape == GRAY_SHAPE && vo_ver_id != 1){
printf("Gray shape not supported\n");
skip_bits(&s->gb, 4); //video_object_layer_shape_extension
@@ -1770,8 +2449,8 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
skip_bits1(&s->gb); /* marker */
- time_increment_resolution = get_bits(&s->gb, 16);
- s->time_increment_bits = av_log2(time_increment_resolution - 1) + 1;
+ s->time_increment_resolution = get_bits(&s->gb, 16);
+ s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
if (s->time_increment_bits < 1)
s->time_increment_bits = 1;
skip_bits1(&s->gb); /* marker */
@@ -1787,9 +2466,14 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
skip_bits1(&s->gb); /* marker */
height = get_bits(&s->gb, 13);
skip_bits1(&s->gb); /* marker */
+ if(width && height){ /* they should be non zero but who knows ... */
+ s->width = width;
+ s->height = height;
+// printf("%d %d\n", width, height);
+ }
}
- skip_bits1(&s->gb); /* interlaced */
+ if(get_bits1(&s->gb)) printf("interlaced not supported\n"); /* interlaced */
if(!get_bits1(&s->gb)) printf("OBMC not supported\n"); /* OBMC Disable */
if (vo_ver_id == 1) {
s->vol_sprite_usage = get_bits1(&s->gb); /* vol_sprite_usage */
@@ -1818,7 +2502,8 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
if (get_bits1(&s->gb) == 1) { /* not_8_bit */
s->quant_precision = get_bits(&s->gb, 4); /* quant_precision */
- skip_bits(&s->gb, 4); /* bits_per_pixel */
+ if(get_bits(&s->gb, 4)!=8) printf("N-bit not supported\n"); /* bits_per_pixel */
+ if(s->quant_precision!=5) printf("quant precission %d\n", s->quant_precision);
} else {
s->quant_precision = 5;
}
@@ -1828,13 +2513,11 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
if(vo_ver_id != 1)
s->quarter_sample= get_bits1(&s->gb);
else s->quarter_sample=0;
-#if 0
- if(get_bits1(&s->gb)) printf("Complexity est disabled\n");
- if(get_bits1(&s->gb)) printf("resync disable\n");
-#else
- skip_bits1(&s->gb); /* complexity_estimation_disabled */
- skip_bits1(&s->gb); /* resync_marker_disabled */
-#endif
+
+ if(!get_bits1(&s->gb)) printf("Complexity estimation not supported\n");
+
+ s->resync_marker= !get_bits1(&s->gb); /* resync_marker_disabled */
+
s->data_partioning= get_bits1(&s->gb);
if(s->data_partioning){
printf("data partitioning not supported\n");
@@ -1858,8 +2541,7 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
s->scalability= get_bits1(&s->gb);
if (s->scalability) {
- printf("bad scalability!!!\n");
- return -1;
+ printf("scalability not supported\n");
}
}
//printf("end Data %X %d\n", show_bits(&s->gb, 32), get_bits_count(&s->gb)&0x7);
@@ -1899,24 +2581,34 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
}
s->pict_type = get_bits(&s->gb, 2) + 1; /* pict type: I = 0 , P = 1 */
- if(s->pict_type == B_TYPE)
- {
- printf("B-VOP\n");
- return -1;
- }
-
- /* XXX: parse time base */
- time_incr = 0;
+//printf("pic: %d, qpel:%d\n", s->pict_type, s->quarter_sample);
+ time_incr=0;
while (get_bits1(&s->gb) != 0)
time_incr++;
- skip_bits1(&s->gb); /* marker */
- skip_bits(&s->gb, s->time_increment_bits);
- skip_bits1(&s->gb); /* marker */
+ check_marker(&s->gb, "before time_increment");
+ s->time_increment= get_bits(&s->gb, s->time_increment_bits);
+ if(s->pict_type!=B_TYPE){
+ s->time_base+= time_incr;
+ s->last_non_b_time[1]= s->last_non_b_time[0];
+ s->last_non_b_time[0]= s->time_base*s->time_increment_resolution + s->time_increment;
+ }else{
+ s->time= (s->last_non_b_time[1]/s->time_increment_resolution + time_incr)*s->time_increment_resolution;
+ s->time+= s->time_increment;
+ }
+
+ if(check_marker(&s->gb, "before vop_coded")==0 && s->picture_number==0){
+ printf("hmm, seems the headers arnt complete, trying to guess time_increment_bits\n");
+ for(s->time_increment_bits++ ;s->time_increment_bits<16; s->time_increment_bits++){
+ if(get_bits1(&s->gb)) break;
+ }
+ printf("my guess is %d bits ;)\n",s->time_increment_bits);
+ }
/* vop coded */
if (get_bits1(&s->gb) != 1)
goto redo;
-
+//printf("time %d %d %d || %d %d %d\n", s->time_increment_bits, s->time_increment, s->time_base,
+//s->time, s->last_non_b_time[0], s->last_non_b_time[1]);
if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE
|| (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) {
/* rounding type for motion estimation */
@@ -1947,7 +2639,9 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
//FIXME complexity estimation stuff
if (s->shape != BIN_ONLY_SHAPE) {
- skip_bits(&s->gb, 3); /* intra dc VLC threshold */
+ int t;
+ t=get_bits(&s->gb, 3); /* intra dc VLC threshold */
+//printf("threshold %d\n", t);
//FIXME interlaced specific bits
}
@@ -1964,12 +2658,21 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
MPEG4 vol header as it is found on some old opendivx
movies */
s->qscale = get_bits(&s->gb, 5);
+ if(s->qscale==0){
+ printf("Error, header damaged or not MPEG4 header (qscale=0)\n");
+ return -1; // makes no sense to continue, as there is nothing left from the image then
+ }
if (s->pict_type != I_TYPE) {
s->f_code = get_bits(&s->gb, 3); /* fcode_for */
+ if(s->f_code==0){
+ printf("Error, header damaged or not MPEG4 header (f_code=0)\n");
+ return -1; // makes no sense to continue, as the MV decoding will break very quickly
+ }
}
if (s->pict_type == B_TYPE) {
s->b_code = get_bits(&s->gb, 3);
+//printf("b-code %d\n", s->b_code);
}
//printf("quant:%d fcode:%d\n", s->qscale, s->f_code);
if(!s->scalability){
@@ -1978,7 +2681,6 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
}
}
}
-//printf("end Data %X %d\n", show_bits(&s->gb, 32), get_bits_count(&s->gb)&0x7);
s->picture_number++; // better than pic number==0 allways ;)
return 0;
}
diff --git a/src/libffmpeg/libavcodec/h263data.h b/src/libffmpeg/libavcodec/h263data.h
index 88e456ba2..a129fd6bf 100644
--- a/src/libffmpeg/libavcodec/h263data.h
+++ b/src/libffmpeg/libavcodec/h263data.h
@@ -40,13 +40,13 @@ static const UINT8 inter_MCBPC_bits[20] = {
3, 7, 7, 8,
};*/
-static const UINT8 cbpy_tab[16][2] =
+const UINT8 cbpy_tab[16][2] =
{
{3,4}, {5,5}, {4,5}, {9,4}, {3,5}, {7,4}, {2,6}, {11,4},
{2,5}, {3,6}, {5,4}, {10,4}, {4,4}, {8,4}, {6,4}, {3,2}
};
-static const UINT8 mvtab[33][2] =
+const UINT8 mvtab[33][2] =
{
{1,1}, {1,2}, {1,3}, {1,4}, {3,6}, {5,7}, {4,7}, {3,7},
{11,9}, {10,9}, {9,9}, {17,10}, {16,10}, {15,10}, {14,10}, {13,10},
diff --git a/src/libffmpeg/libavcodec/h263dec.c b/src/libffmpeg/libavcodec/h263dec.c
index 3733ed565..e909ac56e 100644
--- a/src/libffmpeg/libavcodec/h263dec.c
+++ b/src/libffmpeg/libavcodec/h263dec.c
@@ -47,10 +47,22 @@ static int h263_decode_init(AVCodecContext *avctx)
case CODEC_ID_MPEG4:
s->time_increment_bits = 4; /* default value for broken headers */
s->h263_pred = 1;
+ s->has_b_frames = 1;
break;
- case CODEC_ID_MSMPEG4:
+ case CODEC_ID_MSMPEG4V1:
s->h263_msmpeg4 = 1;
s->h263_pred = 1;
+ s->msmpeg4_version=1;
+ break;
+ case CODEC_ID_MSMPEG4V2:
+ s->h263_msmpeg4 = 1;
+ s->h263_pred = 1;
+ s->msmpeg4_version=2;
+ break;
+ case CODEC_ID_MSMPEG4V3:
+ s->h263_msmpeg4 = 1;
+ s->h263_pred = 1;
+ s->msmpeg4_version=3;
break;
case CODEC_ID_H263I:
s->h263_intel = 1;
@@ -60,7 +72,7 @@ static int h263_decode_init(AVCodecContext *avctx)
}
/* for h263, we allocate the images after having read the header */
- if (avctx->codec->id != CODEC_ID_H263)
+ if (avctx->codec->id != CODEC_ID_H263 && avctx->codec->id != CODEC_ID_MPEG4)
if (MPV_common_init(s) < 0)
return -1;
@@ -115,22 +127,25 @@ static int h263_decode_frame(AVCodecContext *avctx,
ret = intel_h263_decode_picture_header(s);
} else {
ret = h263_decode_picture_header(s);
- /* After H263 header decode we have the height, width, */
+ }
+
+ /* After H263 & mpeg4 header decode we have the height, width,*/
/* and other parameters. So then we could init the picture */
/* FIXME: By the way H263 decoder is evolving it should have */
/* an H263EncContext */
- if (!s->context_initialized) {
- avctx->width = s->width;
- avctx->height = s->height;
- if (MPV_common_init(s) < 0)
- return -1;
- } else if (s->width != avctx->width || s->height != avctx->height) {
- /* H.263 could change picture size any time */
- MPV_common_end(s);
- if (MPV_common_init(s) < 0)
- return -1;
- }
+ if (!s->context_initialized) {
+ avctx->width = s->width;
+ avctx->height = s->height;
+ avctx->aspect_ratio_info= s->aspect_ratio_info;
+ if (MPV_common_init(s) < 0)
+ return -1;
+ } else if (s->width != avctx->width || s->height != avctx->height) {
+ /* H.263 could change picture size any time */
+ MPV_common_end(s);
+ if (MPV_common_init(s) < 0)
+ return -1;
}
+
if (ret < 0)
return -1;
@@ -141,6 +156,12 @@ static int h263_decode_frame(AVCodecContext *avctx,
#endif
/* decode each macroblock */
+ s->block_wrap[0]=
+ s->block_wrap[1]=
+ s->block_wrap[2]=
+ s->block_wrap[3]= s->mb_width*2 + 2;
+ s->block_wrap[4]=
+ s->block_wrap[5]= s->mb_width + 2;
for(s->mb_y=0; s->mb_y < s->mb_height; s->mb_y++) {
/* Check for GOB headers on H.263 */
/* FIXME: In the future H.263+ will have intra prediction */
@@ -148,7 +169,20 @@ static int h263_decode_frame(AVCodecContext *avctx,
if (s->mb_y && !s->h263_pred) {
s->first_gob_line = h263_decode_gob_header(s);
}
+
+ s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1;
+ s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1);
+ s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1;
+ s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2);
+ s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2);
+ s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2);
for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
+ s->block_index[0]+=2;
+ s->block_index[1]+=2;
+ s->block_index[2]+=2;
+ s->block_index[3]+=2;
+ s->block_index[4]++;
+ s->block_index[5]++;
#ifdef DEBUG
printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
#endif
@@ -163,28 +197,8 @@ static int h263_decode_frame(AVCodecContext *avctx,
s->y_dc_scale = 8;
s->c_dc_scale = 8;
}
-
-#ifdef HAVE_MMX
- if (mm_flags & MM_MMX) {
- asm volatile(
- "pxor %%mm7, %%mm7 \n\t"
- "movl $-128*6, %%eax \n\t"
- "1: \n\t"
- "movq %%mm7, (%0, %%eax) \n\t"
- "movq %%mm7, 8(%0, %%eax) \n\t"
- "movq %%mm7, 16(%0, %%eax) \n\t"
- "movq %%mm7, 24(%0, %%eax) \n\t"
- "addl $32, %%eax \n\t"
- " js 1b \n\t"
- : : "r" (((int)s->block)+128*6)
- : "%eax"
- );
- }else{
- memset(s->block, 0, sizeof(s->block));
- }
-#else
- memset(s->block, 0, sizeof(s->block));
-#endif
+ clear_blocks(s->block[0]);
+
s->mv_dir = MV_DIR_FORWARD;
s->mv_type = MV_TYPE_16X16;
if (s->h263_msmpeg4) {
@@ -208,9 +222,15 @@ static int h263_decode_frame(AVCodecContext *avctx,
if (h > 16)
h = 16;
offset = y * s->linesize;
- src_ptr[0] = s->current_picture[0] + offset;
- src_ptr[1] = s->current_picture[1] + (offset >> 2);
- src_ptr[2] = s->current_picture[2] + (offset >> 2);
+ if(s->pict_type==B_TYPE || (!s->has_b_frames)){
+ src_ptr[0] = s->current_picture[0] + offset;
+ src_ptr[1] = s->current_picture[1] + (offset >> 2);
+ src_ptr[2] = s->current_picture[2] + (offset >> 2);
+ } else {
+ src_ptr[0] = s->last_picture[0] + offset;
+ src_ptr[1] = s->last_picture[1] + (offset >> 2);
+ src_ptr[2] = s->last_picture[2] + (offset >> 2);
+ }
avctx->draw_horiz_band(avctx, src_ptr, s->linesize,
y, s->width, h);
}
@@ -221,9 +241,15 @@ static int h263_decode_frame(AVCodecContext *avctx,
MPV_frame_end(s);
- pict->data[0] = s->current_picture[0];
- pict->data[1] = s->current_picture[1];
- pict->data[2] = s->current_picture[2];
+ if(s->pict_type==B_TYPE || (!s->has_b_frames)){
+ pict->data[0] = s->current_picture[0];
+ pict->data[1] = s->current_picture[1];
+ pict->data[2] = s->current_picture[2];
+ } else {
+ pict->data[0] = s->last_picture[0];
+ pict->data[1] = s->last_picture[1];
+ pict->data[2] = s->last_picture[2];
+ }
pict->linesize[0] = s->linesize;
pict->linesize[1] = s->linesize / 2;
pict->linesize[2] = s->linesize / 2;
@@ -262,10 +288,34 @@ AVCodec h263_decoder = {
CODEC_CAP_DRAW_HORIZ_BAND,
};
-AVCodec msmpeg4_decoder = {
+AVCodec msmpeg4v1_decoder = {
+ "msmpeg4v1",
+ CODEC_TYPE_VIDEO,
+ CODEC_ID_MSMPEG4V1,
+ sizeof(MpegEncContext),
+ h263_decode_init,
+ NULL,
+ h263_decode_end,
+ h263_decode_frame,
+ CODEC_CAP_DRAW_HORIZ_BAND,
+};
+
+AVCodec msmpeg4v2_decoder = {
+ "msmpeg4v2",
+ CODEC_TYPE_VIDEO,
+ CODEC_ID_MSMPEG4V2,
+ sizeof(MpegEncContext),
+ h263_decode_init,
+ NULL,
+ h263_decode_end,
+ h263_decode_frame,
+ CODEC_CAP_DRAW_HORIZ_BAND,
+};
+
+AVCodec msmpeg4v3_decoder = {
"msmpeg4",
CODEC_TYPE_VIDEO,
- CODEC_ID_MSMPEG4,
+ CODEC_ID_MSMPEG4V3,
sizeof(MpegEncContext),
h263_decode_init,
NULL,
diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
index 37716a983..2c71850ee 100644
--- a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
@@ -25,22 +25,58 @@
int mm_flags; /* multimedia extension flags */
-int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
-int pix_abs16x16_sse(UINT8 *blk1, UINT8 *blk2, int lx, int h);
-int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
-int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
-int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
+int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
+int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
+int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
+int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
+
+int pix_abs16x16_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
+int pix_abs16x16_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
+int pix_abs16x16_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
+int pix_abs16x16_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
+
+int pix_abs8x8_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
+int pix_abs8x8_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
+int pix_abs8x8_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
+int pix_abs8x8_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
+
+int pix_abs8x8_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
+int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
+int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
+int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
+
/* external functions, from idct_mmx.c */
void ff_mmx_idct(DCTELEM *block);
void ff_mmxext_idct(DCTELEM *block);
/* pixel operations */
-static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001;
-static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002;
+static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001LL;
+static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002LL;
//static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 };
//static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 };
+#define JUMPALIGN() __asm __volatile (".balign 8"::)
+#define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::)
+
+#ifndef PIC
+#define MOVQ_WONE(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wone))
+#define MOVQ_WTWO(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wtwo))
+#else
+// for shared library it's better to use this way for accessing constants
+// pcmpeqd -> -1
+#define MOVQ_WONE(regd) \
+ __asm __volatile ( \
+ "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
+ "psrlw $15, %%" #regd ::)
+
+#define MOVQ_WTWO(regd) \
+ __asm __volatile ( \
+ "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
+ "psrlw $15, %%" #regd " \n\t" \
+ "psllw $1, %%" #regd ::)
+#endif
+
/***********************************/
/* 3Dnow specific */
@@ -78,7 +114,7 @@ static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size)
/* read the pixels */
p = block;
pix = pixels;
- __asm __volatile("pxor %%mm7, %%mm7":);
+ MOVQ_ZERO(mm7);
for(i=0;i<4;i++) {
__asm __volatile(
"movq %1, %%mm0\n\t"
@@ -105,12 +141,11 @@ static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line
{
const DCTELEM *p;
UINT8 *pix;
- int i;
/* read the pixels */
p = block;
pix = pixels;
- for(i=0;i<2;i++) {
+ /* unrolled loop */
__asm __volatile(
"movq %3, %%mm0\n\t"
"movq 8%3, %%mm1\n\t"
@@ -132,7 +167,29 @@ static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line
:"memory");
pix += line_size*4;
p += 32;
- }
+
+ // if here would be an exact copy of the code above
+ // compiler would generate some very strange code
+ // thus using "r"
+ __asm __volatile(
+ "movq (%3), %%mm0\n\t"
+ "movq 8(%3), %%mm1\n\t"
+ "movq 16(%3), %%mm2\n\t"
+ "movq 24(%3), %%mm3\n\t"
+ "movq 32(%3), %%mm4\n\t"
+ "movq 40(%3), %%mm5\n\t"
+ "movq 48(%3), %%mm6\n\t"
+ "movq 56(%3), %%mm7\n\t"
+ "packuswb %%mm1, %%mm0\n\t"
+ "packuswb %%mm3, %%mm2\n\t"
+ "packuswb %%mm5, %%mm4\n\t"
+ "packuswb %%mm7, %%mm6\n\t"
+ "movq %%mm0, (%0)\n\t"
+ "movq %%mm2, (%0, %1)\n\t"
+ "movq %%mm4, (%0, %1, 2)\n\t"
+ "movq %%mm6, (%0, %2)\n\t"
+ ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p)
+ :"memory");
}
static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
@@ -144,8 +201,9 @@ static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line
/* read the pixels */
p = block;
pix = pixels;
- __asm __volatile("pxor %%mm7, %%mm7":);
- for(i=0;i<4;i++) {
+ MOVQ_ZERO(mm7);
+ i = 4;
+ while (i) {
__asm __volatile(
"movq %2, %%mm0\n\t"
"movq 8%2, %%mm1\n\t"
@@ -172,19 +230,47 @@ static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line
:"memory");
pix += line_size*2;
p += 16;
- }
+ i--;
+ };
}
static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
- int dh, hh;
+ int hh;
UINT8 *p;
const UINT8 *pix;
+
p = block;
- pix = pixels;
+ pix = pixels; // 2s
+#if 0
+ do {
+ __asm __volatile(
+ "movq %1, %%mm0\n\t"
+ "movq %%mm0, %0\n\t"
+ :"=m"(*p)
+ :"m"(*pix)
+ :"memory");
+ pix += line_size;
+ p += line_size;
+ } while (--h);
+#else
+ // this optimized code is not very usefull
+ // the above loop is definitely faster
+ // at least on Celeron 500MHz
+ hh = h & 3;
+ while (hh) {
+ __asm __volatile(
+ "movq %1, %%mm0\n\t"
+ "movq %%mm0, %0\n\t"
+ :"=m"(*p)
+ :"m"(*pix)
+ :"memory");
+ pix += line_size;
+ p += line_size;
+ hh--;
+ }
hh=h>>2;
- dh=h&3;
- while(hh--) {
+ while (hh) {
__asm __volatile(
"movq (%1), %%mm0 \n\t"
"movq (%1, %2), %%mm1 \n\t"
@@ -196,19 +282,11 @@ static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int
"movq %%mm3, (%0, %3) \n\t"
::"r"(p), "r"(pix), "r"(line_size), "r"(line_size*3)
:"memory");
- pix = pix + line_size*4;
- p = p + line_size*4;
- }
- while(dh--) {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix = pix + line_size;
- p = p + line_size;
+ pix += line_size*4;
+ p += line_size*4;
+ hh--;
}
+#endif
}
static void put_pixels_x2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
@@ -217,10 +295,9 @@ static void put_pixels_x2_mmx(UINT8 *block, const UINT8 *pixels, int line_size,
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm4\n\t"
- ::"m"(mm_wone));
+ MOVQ_ZERO(mm7);
+ MOVQ_WONE(mm4);
+ JUMPALIGN();
do {
__asm __volatile(
"movq %1, %%mm0\n\t"
@@ -252,10 +329,9 @@ static void put_pixels_y2_mmx(UINT8 *block, const UINT8 *pixels, int line_size,
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm4\n\t"
- ::"m"(mm_wone));
+ MOVQ_ZERO(mm7);
+ MOVQ_WONE(mm4);
+ JUMPALIGN();
do {
__asm __volatile(
"movq %1, %%mm0\n\t"
@@ -288,11 +364,10 @@ static void put_pixels_xy2_mmx(UINT8 *block, const UINT8 *pixels, int line_size,
UINT8 *p;
const UINT8 *pix;
p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wtwo));
+ pix = pixels; // 1s
+ MOVQ_ZERO(mm7);
+ MOVQ_WTWO(mm6);
+ JUMPALIGN();
do {
__asm __volatile(
"movq %1, %%mm0\n\t"
@@ -338,7 +413,7 @@ static void put_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile("pxor %%mm7, %%mm7\n\t":);
+ MOVQ_ZERO(mm7);
do {
__asm __volatile(
"movq %1, %%mm0\n\t"
@@ -369,7 +444,8 @@ static void put_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int li
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile("pxor %%mm7, %%mm7\n\t":);
+ MOVQ_ZERO(mm7);
+ JUMPALIGN();
do {
__asm __volatile(
"movq %1, %%mm0\n\t"
@@ -401,10 +477,9 @@ static void put_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wone));
+ MOVQ_ZERO(mm7);
+ MOVQ_WONE(mm6);
+ JUMPALIGN();
do {
__asm __volatile(
"movq %1, %%mm0\n\t"
@@ -450,10 +525,9 @@ static void avg_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wone));
+ MOVQ_ZERO(mm7);
+ MOVQ_WONE(mm6);
+ JUMPALIGN();
do {
__asm __volatile(
"movq %0, %%mm0\n\t"
@@ -487,10 +561,9 @@ static void avg_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_si
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wone));
+ MOVQ_ZERO(mm7);
+ MOVQ_WONE(mm6);
+ JUMPALIGN();
do {
__asm __volatile(
"movq %1, %%mm1\n\t"
@@ -533,10 +606,9 @@ static void avg_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_si
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wone));
+ MOVQ_ZERO(mm7);
+ MOVQ_WONE(mm6);
+ JUMPALIGN();
do {
__asm __volatile(
"movq %1, %%mm1\n\t"
@@ -579,10 +651,10 @@ static void avg_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_s
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wtwo));
+ MOVQ_ZERO(mm7);
+ // this doesn't seem to be used offten - so
+ // the inside usage of mm_wone is not optimized
+ MOVQ_WTWO(mm6);
do {
__asm __volatile(
"movq %1, %%mm0\n\t"
@@ -639,7 +711,7 @@ static void avg_no_rnd_pixels_mmx( UINT8 *block, const UINT8 *pixels, int line_
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile("pxor %%mm7, %%mm7\n\t":);
+ MOVQ_ZERO(mm7);
do {
__asm __volatile(
"movq %1, %%mm0\n\t"
@@ -670,8 +742,7 @@ static void avg_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t":);
+ MOVQ_ZERO(mm7);
do {
__asm __volatile(
"movq %1, %%mm0\n\t"
@@ -710,8 +781,7 @@ static void avg_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t":);
+ MOVQ_ZERO(mm7);
do {
__asm __volatile(
"movq %1, %%mm0\n\t"
@@ -750,10 +820,9 @@ static void avg_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wone));
+ MOVQ_ZERO(mm7);
+ MOVQ_WONE(mm6);
+ JUMPALIGN();
do {
__asm __volatile(
"movq %1, %%mm0\n\t"
@@ -807,7 +876,7 @@ static void sub_pixels_mmx( DCTELEM *block, const UINT8 *pixels, int line_size,
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile("pxor %%mm7, %%mm7":);
+ MOVQ_ZERO(mm7);
do {
__asm __volatile(
"movq %0, %%mm0\n\t"
@@ -834,10 +903,9 @@ static void sub_pixels_x2_mmx( DCTELEM *block, const UINT8 *pixels, int line_si
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6"
- ::"m"(mm_wone));
+ MOVQ_ZERO(mm7);
+ MOVQ_WONE(mm6);
+ JUMPALIGN();
do {
__asm __volatile(
"movq %0, %%mm0\n\t"
@@ -874,10 +942,8 @@ static void sub_pixels_y2_mmx( DCTELEM *block, const UINT8 *pixels, int line_si
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6"
- ::"m"(mm_wone));
+ MOVQ_ZERO(mm7);
+ MOVQ_WONE(mm6);
do {
__asm __volatile(
"movq %0, %%mm0\n\t"
@@ -914,10 +980,9 @@ static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line
const UINT8 *pix;
p = block;
pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wtwo));
+ MOVQ_ZERO(mm7);
+ MOVQ_WTWO(mm6);
+ JUMPALIGN();
do {
__asm __volatile(
"movq %1, %%mm0\n\t"
@@ -961,11 +1026,30 @@ static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line
} while(--h);
}
+static void clear_blocks_mmx(DCTELEM *blocks)
+{
+ asm volatile(
+ "pxor %%mm7, %%mm7 \n\t"
+ "movl $-128*6, %%eax \n\t"
+ "1: \n\t"
+ "movq %%mm7, (%0, %%eax) \n\t"
+ "movq %%mm7, 8(%0, %%eax) \n\t"
+ "movq %%mm7, 16(%0, %%eax) \n\t"
+ "movq %%mm7, 24(%0, %%eax) \n\t"
+ "addl $32, %%eax \n\t"
+ " js 1b \n\t"
+ : : "r" (((int)blocks)+128*6)
+ : "%eax"
+ );
+}
+
+static void just_return() { return; }
+
void dsputil_init_mmx(void)
{
- mm_flags = xine_mm_accel();
-#if 0
- printf("CPU flags:");
+ mm_flags = mm_support();
+#if 1
+ printf("libavcodec: CPU flags:");
if (mm_flags & MM_MMX)
printf(" mmx");
if (mm_flags & MM_MMXEXT)
@@ -983,11 +1067,16 @@ void dsputil_init_mmx(void)
get_pixels = get_pixels_mmx;
put_pixels_clamped = put_pixels_clamped_mmx;
add_pixels_clamped = add_pixels_clamped_mmx;
-
- pix_abs16x16 = pix_abs16x16_mmx;
- pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
- pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
+ clear_blocks= clear_blocks_mmx;
+
+ pix_abs16x16 = pix_abs16x16_mmx;
+ pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
+ pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
+ pix_abs8x8 = pix_abs8x8_mmx;
+ pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
+ pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
+ pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
av_fdct = fdct_mmx;
put_pixels_tab[0] = put_pixels_mmx;
@@ -1016,10 +1105,16 @@ void dsputil_init_mmx(void)
sub_pixels_tab[3] = sub_pixels_xy2_mmx;
if (mm_flags & MM_MMXEXT) {
- pix_abs16x16 = pix_abs16x16_sse;
- }
-
- if (mm_flags & MM_SSE) {
+ pix_abs16x16 = pix_abs16x16_mmx2;
+ pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
+ pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
+ pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2;
+
+ pix_abs8x8 = pix_abs8x8_mmx2;
+ pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
+ pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
+ pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2;
+
put_pixels_tab[1] = put_pixels_x2_sse;
put_pixels_tab[2] = put_pixels_y2_sse;
@@ -1054,4 +1149,44 @@ void dsputil_init_mmx(void)
ff_idct = simple_idct_mmx;
#endif
}
+
+#if 0
+ // for speed testing
+ get_pixels = just_return;
+ put_pixels_clamped = just_return;
+ add_pixels_clamped = just_return;
+
+ pix_abs16x16 = just_return;
+ pix_abs16x16_x2 = just_return;
+ pix_abs16x16_y2 = just_return;
+ pix_abs16x16_xy2 = just_return;
+
+ put_pixels_tab[0] = just_return;
+ put_pixels_tab[1] = just_return;
+ put_pixels_tab[2] = just_return;
+ put_pixels_tab[3] = just_return;
+
+ put_no_rnd_pixels_tab[0] = just_return;
+ put_no_rnd_pixels_tab[1] = just_return;
+ put_no_rnd_pixels_tab[2] = just_return;
+ put_no_rnd_pixels_tab[3] = just_return;
+
+ avg_pixels_tab[0] = just_return;
+ avg_pixels_tab[1] = just_return;
+ avg_pixels_tab[2] = just_return;
+ avg_pixels_tab[3] = just_return;
+
+ avg_no_rnd_pixels_tab[0] = just_return;
+ avg_no_rnd_pixels_tab[1] = just_return;
+ avg_no_rnd_pixels_tab[2] = just_return;
+ avg_no_rnd_pixels_tab[3] = just_return;
+
+ sub_pixels_tab[0] = just_return;
+ sub_pixels_tab[1] = just_return;
+ sub_pixels_tab[2] = just_return;
+ sub_pixels_tab[3] = just_return;
+
+ //av_fdct = just_return;
+ //ff_idct = just_return;
+#endif
}
diff --git a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
index 35b16b711..e704c4219 100644
--- a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
@@ -16,229 +16,347 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
+ * mostly by Michael Niedermayer <michaelni@gmx.at>
*/
#include "../dsputil.h"
-#include "mmx.h"
-static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001;
-static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002;
+static const __attribute__ ((aligned(8))) UINT64 round_tab[3]={
+0x0000000000000000,
+0x0001000100010001,
+0x0002000200020002,
+};
-/* mm7 is accumulator, mm6 is zero */
-static inline void sad_add(const UINT8 *p1, const UINT8 *p2)
+static inline void sad8_mmx(UINT8 *blk1, UINT8 *blk2, int stride, int h)
{
- movq_m2r(*p1, mm0);
- movq_m2r(*p2, mm1);
- movq_r2r(mm0, mm2);
- psubusb_r2r(mm1, mm0);
- psubusb_r2r(mm2, mm1);
- por_r2r(mm1, mm0); /* mm0 is absolute value */
-
- movq_r2r(mm0, mm1);
- punpcklbw_r2r(mm6, mm0);
- punpckhbw_r2r(mm6, mm1);
- paddusw_r2r(mm0, mm7);
- paddusw_r2r(mm1, mm7);
+ int len= -(stride<<h);
+ asm volatile(
+ ".balign 16 \n\t"
+ "1: \n\t"
+ "movq (%1, %%eax), %%mm0 \n\t"
+ "movq (%2, %%eax), %%mm2 \n\t"
+ "movq (%2, %%eax), %%mm4 \n\t"
+ "addl %3, %%eax \n\t"
+ "psubusb %%mm0, %%mm2 \n\t"
+ "psubusb %%mm4, %%mm0 \n\t"
+ "movq (%1, %%eax), %%mm1 \n\t"
+ "movq (%2, %%eax), %%mm3 \n\t"
+ "movq (%2, %%eax), %%mm5 \n\t"
+ "psubusb %%mm1, %%mm3 \n\t"
+ "psubusb %%mm5, %%mm1 \n\t"
+ "por %%mm2, %%mm0 \n\t"
+ "por %%mm1, %%mm3 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "movq %%mm3, %%mm2 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm3 \n\t"
+ "punpckhbw %%mm7, %%mm2 \n\t"
+ "paddw %%mm1, %%mm0 \n\t"
+ "paddw %%mm3, %%mm2 \n\t"
+ "paddw %%mm2, %%mm0 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+ "addl %3, %%eax \n\t"
+ " js 1b \n\t"
+ : "+a" (len)
+ : "r" (blk1 - len), "r" (blk2 - len), "r" (stride)
+ );
}
-/* convert mm7 to value */
-static inline int sad_end(void)
+static inline void sad8_mmx2(UINT8 *blk1, UINT8 *blk2, int stride, int h)
{
- int res;
-
- movq_r2r(mm7, mm0);
- psrlq_i2r(32, mm7);
- paddusw_r2r(mm0, mm7);
-
- movq_r2r(mm7, mm0);
- psrlq_i2r(16, mm7);
- paddusw_r2r(mm0, mm7);
- __asm __volatile ("movd %%mm7, %0" : "=a" (res));
- return res & 0xffff;
+ int len= -(stride<<h);
+ asm volatile(
+ ".balign 16 \n\t"
+ "1: \n\t"
+ "movq (%1, %%eax), %%mm0 \n\t"
+ "movq (%2, %%eax), %%mm2 \n\t"
+ "psadbw %%mm2, %%mm0 \n\t"
+ "addl %3, %%eax \n\t"
+ "movq (%1, %%eax), %%mm1 \n\t"
+ "movq (%2, %%eax), %%mm3 \n\t"
+ "psadbw %%mm1, %%mm3 \n\t"
+ "paddw %%mm3, %%mm0 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+ "addl %3, %%eax \n\t"
+ " js 1b \n\t"
+ : "+a" (len)
+ : "r" (blk1 - len), "r" (blk2 - len), "r" (stride)
+ );
}
-int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h)
+static inline void sad8_2_mmx2(UINT8 *blk1a, UINT8 *blk1b, UINT8 *blk2, int stride, int h)
{
- const UINT8 *p1, *p2;
-
- h >>= 1;
- p1 = blk1;
- p2 = blk2;
- pxor_r2r(mm7, mm7); /* mm7 is accumulator */
- pxor_r2r(mm6, mm6); /* mm7 is zero constant */
- do {
- sad_add(p1, p2);
- sad_add(p1 + 8, p2 + 8);
- p1 += lx;
- p2 += lx;
- sad_add(p1, p2);
- sad_add(p1 + 8, p2 + 8);
- p1 += lx;
- p2 += lx;
- } while (--h);
- return sad_end();
+ int len= -(stride<<h);
+ asm volatile(
+ ".balign 16 \n\t"
+ "1: \n\t"
+ "movq (%1, %%eax), %%mm0 \n\t"
+ "movq (%2, %%eax), %%mm2 \n\t"
+ "pavgb %%mm2, %%mm0 \n\t"
+ "movq (%3, %%eax), %%mm2 \n\t"
+ "psadbw %%mm2, %%mm0 \n\t"
+ "addl %4, %%eax \n\t"
+ "movq (%1, %%eax), %%mm1 \n\t"
+ "movq (%2, %%eax), %%mm3 \n\t"
+ "pavgb %%mm1, %%mm3 \n\t"
+ "movq (%3, %%eax), %%mm1 \n\t"
+ "psadbw %%mm1, %%mm3 \n\t"
+ "paddw %%mm3, %%mm0 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+ "addl %4, %%eax \n\t"
+ " js 1b \n\t"
+ : "+a" (len)
+ : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride)
+ );
}
-/* please test it ! */
-static inline void sad_add_sse(const UINT8 *p1, const UINT8 *p2)
-{
- movq_m2r(*(p1 + 0), mm0);
- movq_m2r(*(p1 + 8), mm1);
- psadbw_m2r(*(p2 + 0), mm0);
- psadbw_m2r(*(p2 + 8), mm1);
- paddusw_r2r(mm0, mm7);
- paddusw_r2r(mm1, mm7);
+static inline void sad8_4_mmx2(UINT8 *blk1, UINT8 *blk2, int stride, int h)
+{ //FIXME reuse src
+ int len= -(stride<<h);
+ asm volatile(
+ ".balign 16 \n\t"
+ "1: \n\t"
+ "movq (%1, %%eax), %%mm0 \n\t"
+ "movq (%2, %%eax), %%mm2 \n\t"
+ "movq 1(%1, %%eax), %%mm1 \n\t"
+ "movq 1(%2, %%eax), %%mm3 \n\t"
+ "pavgb %%mm2, %%mm0 \n\t"
+ "pavgb %%mm1, %%mm3 \n\t"
+ "pavgb %%mm3, %%mm0 \n\t"
+ "movq (%3, %%eax), %%mm2 \n\t"
+ "psadbw %%mm2, %%mm0 \n\t"
+ "addl %4, %%eax \n\t"
+ "movq (%1, %%eax), %%mm1 \n\t"
+ "movq (%2, %%eax), %%mm3 \n\t"
+ "movq 1(%1, %%eax), %%mm2 \n\t"
+ "movq 1(%2, %%eax), %%mm4 \n\t"
+ "pavgb %%mm3, %%mm1 \n\t"
+ "pavgb %%mm4, %%mm2 \n\t"
+ "pavgb %%mm1, %%mm2 \n\t"
+ "movq (%3, %%eax), %%mm1 \n\t"
+ "psadbw %%mm1, %%mm2 \n\t"
+ "paddw %%mm2, %%mm0 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+ "addl %4, %%eax \n\t"
+ " js 1b \n\t"
+ : "+a" (len)
+ : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" (stride)
+ );
}
-int pix_abs16x16_sse(UINT8 *blk1, UINT8 *blk2, int lx, int h)
+static inline void sad8_2_mmx(UINT8 *blk1a, UINT8 *blk1b, UINT8 *blk2, int stride, int h)
{
- const UINT8 *p1, *p2;
-
- h >>= 1;
- p1 = blk1;
- p2 = blk2;
- pxor_r2r(mm7, mm7); /* mm7 is accumulator */
- do {
- sad_add_sse(p1, p2);
- p1 += lx;
- p2 += lx;
- sad_add_sse(p1, p2);
- p1 += lx;
- p2 += lx;
- } while (--h);
- return sad_end();
+ int len= -(stride<<h);
+ asm volatile(
+ ".balign 16 \n\t"
+ "1: \n\t"
+ "movq (%1, %%eax), %%mm0 \n\t"
+ "movq (%2, %%eax), %%mm1 \n\t"
+ "movq (%1, %%eax), %%mm2 \n\t"
+ "movq (%2, %%eax), %%mm3 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm1 \n\t"
+ "punpckhbw %%mm7, %%mm2 \n\t"
+ "punpckhbw %%mm7, %%mm3 \n\t"
+ "paddw %%mm0, %%mm1 \n\t"
+ "paddw %%mm2, %%mm3 \n\t"
+ "movq (%3, %%eax), %%mm4 \n\t"
+ "movq (%3, %%eax), %%mm2 \n\t"
+ "paddw %%mm5, %%mm1 \n\t"
+ "paddw %%mm5, %%mm3 \n\t"
+ "psrlw $1, %%mm1 \n\t"
+ "psrlw $1, %%mm3 \n\t"
+ "packuswb %%mm3, %%mm1 \n\t"
+ "psubusb %%mm1, %%mm4 \n\t"
+ "psubusb %%mm2, %%mm1 \n\t"
+ "por %%mm4, %%mm1 \n\t"
+ "movq %%mm1, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "paddw %%mm1, %%mm0 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+ "addl %4, %%eax \n\t"
+ " js 1b \n\t"
+ : "+a" (len)
+ : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride)
+ );
}
-#define DUMP(reg) { mmx_t tmp; movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq); }
-
-/* mm7 is accumulator, mm6 is zero */
-static inline void sad_add_x2(const UINT8 *p1, const UINT8 *p2, const UINT8 *p3)
+static inline void sad8_4_mmx(UINT8 *blk1, UINT8 *blk2, int stride, int h)
{
- movq_m2r(*(p2 + 0), mm0);
- movq_m2r(*(p3 + 0), mm1);
- movq_r2r(mm0, mm2);
- movq_r2r(mm1, mm3);
- punpcklbw_r2r(mm6, mm0); /* extract 4 bytes low */
- punpcklbw_r2r(mm6, mm1);
- punpckhbw_r2r(mm6, mm2); /* high */
- punpckhbw_r2r(mm6, mm3);
- paddusw_r2r(mm1, mm0);
- paddusw_r2r(mm3, mm2);
- movq_m2r(*(p1 + 0), mm1); /* mm1 : other value */
- paddusw_r2r(mm5, mm0); /* + 1 */
- paddusw_r2r(mm5, mm2); /* + 1 */
- psrlw_i2r(1, mm0);
- psrlw_i2r(1, mm2);
- packuswb_r2r(mm2, mm0); /* average is in mm0 */
-
- movq_r2r(mm1, mm2);
- psubusb_r2r(mm0, mm1);
- psubusb_r2r(mm2, mm0);
- por_r2r(mm1, mm0); /* mm0 is absolute value */
-
- movq_r2r(mm0, mm1);
- punpcklbw_r2r(mm6, mm0);
- punpckhbw_r2r(mm6, mm1);
- paddusw_r2r(mm0, mm7);
- paddusw_r2r(mm1, mm7);
+ int len= -(stride<<h);
+ asm volatile(
+ ".balign 16 \n\t"
+ "1: \n\t"
+ "movq (%1, %%eax), %%mm0 \n\t"
+ "movq (%2, %%eax), %%mm1 \n\t"
+ "movq %%mm0, %%mm4 \n\t"
+ "movq %%mm1, %%mm2 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm1 \n\t"
+ "punpckhbw %%mm7, %%mm4 \n\t"
+ "punpckhbw %%mm7, %%mm2 \n\t"
+ "paddw %%mm1, %%mm0 \n\t"
+ "paddw %%mm2, %%mm4 \n\t"
+ "movq 1(%1, %%eax), %%mm2 \n\t"
+ "movq 1(%2, %%eax), %%mm3 \n\t"
+ "movq %%mm2, %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "paddw %%mm0, %%mm2 \n\t"
+ "paddw %%mm4, %%mm1 \n\t"
+ "movq %%mm3, %%mm4 \n\t"
+ "punpcklbw %%mm7, %%mm3 \n\t"
+ "punpckhbw %%mm7, %%mm4 \n\t"
+ "paddw %%mm3, %%mm2 \n\t"
+ "paddw %%mm4, %%mm1 \n\t"
+ "movq (%3, %%eax), %%mm3 \n\t"
+ "movq (%3, %%eax), %%mm4 \n\t"
+ "paddw %%mm5, %%mm2 \n\t"
+ "paddw %%mm5, %%mm1 \n\t"
+ "psrlw $2, %%mm2 \n\t"
+ "psrlw $2, %%mm1 \n\t"
+ "packuswb %%mm1, %%mm2 \n\t"
+ "psubusb %%mm2, %%mm3 \n\t"
+ "psubusb %%mm4, %%mm2 \n\t"
+ "por %%mm3, %%mm2 \n\t"
+ "movq %%mm2, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpckhbw %%mm7, %%mm2 \n\t"
+ "paddw %%mm2, %%mm0 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+ "addl %4, %%eax \n\t"
+ " js 1b \n\t"
+ : "+a" (len)
+ : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" (stride)
+ );
}
-int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h)
+static inline int sum_mmx()
{
- const UINT8 *p1, *p2;
-
- p1 = blk1;
- p2 = blk2;
- pxor_r2r(mm7, mm7); /* mm7 is accumulator */
- pxor_r2r(mm6, mm6); /* mm7 is zero constant */
- movq_m2r(mm_wone, mm5); /* one constant */
- do {
- sad_add_x2(p1, p2, p2 + 1);
- sad_add_x2(p1 + 8, p2 + 8, p2 + 9);
- p1 += lx;
- p2 += lx;
- } while (--h);
- return sad_end();
+ int ret;
+ asm volatile(
+ "movq %%mm6, %%mm0 \n\t"
+ "psrlq $32, %%mm6 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+ "movq %%mm6, %%mm0 \n\t"
+ "psrlq $16, %%mm6 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+ "movd %%mm6, %0 \n\t"
+ : "=r" (ret)
+ );
+ return ret&0xFFFF;
}
-int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h)
+static inline int sum_mmx2()
{
- const UINT8 *p1, *p2;
-
- p1 = blk1;
- p2 = blk2;
- pxor_r2r(mm7, mm7); /* mm7 is accumulator */
- pxor_r2r(mm6, mm6); /* mm7 is zero constant */
- movq_m2r(mm_wone, mm5); /* one constant */
- do {
- sad_add_x2(p1, p2, p2 + lx);
- sad_add_x2(p1 + 8, p2 + 8, p2 + 8 + lx);
- p1 += lx;
- p2 += lx;
- } while (--h);
- return sad_end();
+ int ret;
+ asm volatile(
+ "movd %%mm6, %0 \n\t"
+ : "=r" (ret)
+ );
+ return ret;
}
-/* mm7 is accumulator, mm6 is zero */
-static inline void sad_add_xy2(const UINT8 *p1, const UINT8 *p2, const UINT8 *p3)
-{
- movq_m2r(*(p2 + 0), mm0);
- movq_m2r(*(p3 + 0), mm1);
- movq_r2r(mm0, mm2);
- movq_r2r(mm1, mm3);
- punpcklbw_r2r(mm6, mm0); /* extract 4 bytes low */
- punpcklbw_r2r(mm6, mm1);
- punpckhbw_r2r(mm6, mm2); /* high */
- punpckhbw_r2r(mm6, mm3);
- paddusw_r2r(mm1, mm0);
- paddusw_r2r(mm3, mm2);
-
- movq_m2r(*(p2 + 1), mm1);
- movq_m2r(*(p3 + 1), mm3);
- movq_r2r(mm1, mm4);
- punpcklbw_r2r(mm6, mm1); /* low */
- punpckhbw_r2r(mm6, mm4); /* high */
- paddusw_r2r(mm1, mm0);
- paddusw_r2r(mm4, mm2);
- movq_r2r(mm3, mm4);
- punpcklbw_r2r(mm6, mm3); /* low */
- punpckhbw_r2r(mm6, mm4); /* high */
- paddusw_r2r(mm3, mm0);
- paddusw_r2r(mm4, mm2);
-
- movq_m2r(*(p1 + 0), mm1); /* mm1 : other value */
- paddusw_r2r(mm5, mm0); /* + 2 */
- paddusw_r2r(mm5, mm2); /* + 2 */
- psrlw_i2r(2, mm0);
- psrlw_i2r(2, mm2);
- packuswb_r2r(mm2, mm0); /* average is in mm0 */
+#define PIX_SAD(suf)\
+int pix_abs8x8_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
+{\
+ asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t":);\
+\
+ sad8_ ## suf(blk1, blk2, stride, 3);\
+\
+ return sum_ ## suf();\
+}\
+\
+int pix_abs8x8_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
+{\
+ asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t"\
+ "movq %0, %%mm5 \n\t"\
+ :: "m"(round_tab[1]) \
+ );\
+\
+ sad8_2_ ## suf(blk1, blk2+1, blk2, stride, 3);\
+\
+ return sum_ ## suf();\
+}\
+\
+int pix_abs8x8_y2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
+{\
+ asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t"\
+ "movq %0, %%mm5 \n\t"\
+ :: "m"(round_tab[1]) \
+ );\
+\
+ sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 3);\
+\
+ return sum_ ## suf();\
+}\
+\
+int pix_abs8x8_xy2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
+{\
+ asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t"\
+ "movq %0, %%mm5 \n\t"\
+ :: "m"(round_tab[2]) \
+ );\
+\
+ sad8_4_ ## suf(blk1, blk2, stride, 3);\
+\
+ return sum_ ## suf();\
+}\
+\
+int pix_abs16x16_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
+{\
+ asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t":);\
+\
+ sad8_ ## suf(blk1 , blk2 , stride, 4);\
+ sad8_ ## suf(blk1+8, blk2+8, stride, 4);\
+\
+ return sum_ ## suf();\
+}\
+int pix_abs16x16_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
+{\
+ asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t"\
+ "movq %0, %%mm5 \n\t"\
+ :: "m"(round_tab[1]) \
+ );\
+\
+ sad8_2_ ## suf(blk1 , blk1+1, blk2 , stride, 4);\
+ sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, 4);\
+\
+ return sum_ ## suf();\
+}\
+int pix_abs16x16_y2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
+{\
+ asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t"\
+ "movq %0, %%mm5 \n\t"\
+ :: "m"(round_tab[1]) \
+ );\
+\
+ sad8_2_ ## suf(blk1 , blk1+stride, blk2 , stride, 4);\
+ sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, 4);\
+\
+ return sum_ ## suf();\
+}\
+int pix_abs16x16_xy2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
+{\
+ asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t"\
+ "movq %0, %%mm5 \n\t"\
+ :: "m"(round_tab[2]) \
+ );\
+\
+ sad8_4_ ## suf(blk1 , blk2 , stride, 4);\
+ sad8_4_ ## suf(blk1+8, blk2+8, stride, 4);\
+\
+ return sum_ ## suf();\
+}\
- movq_r2r(mm1, mm2);
- psubusb_r2r(mm0, mm1);
- psubusb_r2r(mm2, mm0);
- por_r2r(mm1, mm0); /* mm0 is absolute value */
-
- movq_r2r(mm0, mm1);
- punpcklbw_r2r(mm6, mm0);
- punpckhbw_r2r(mm6, mm1);
- paddusw_r2r(mm0, mm7);
- paddusw_r2r(mm1, mm7);
-}
-
-int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h)
-{
- const UINT8 *p1, *p2, *p3;
-
- p1 = blk1;
- p2 = blk2;
- p3 = blk2 + lx;
- pxor_r2r(mm7, mm7); /* mm7 is accumulator */
- pxor_r2r(mm6, mm6); /* mm7 is zero constant */
- movq_m2r(mm_wtwo, mm5); /* one constant */
- do {
- sad_add_xy2(p1, p2, p2 + lx);
- sad_add_xy2(p1 + 8, p2 + 8, p2 + 8 + lx);
- p1 += lx;
- p2 += lx;
- } while (--h);
- return sad_end();
-}
+PIX_SAD(mmx)
+PIX_SAD(mmx2)
diff --git a/src/libffmpeg/libavcodec/motion_est.c b/src/libffmpeg/libavcodec/motion_est.c
index 084eb6038..92724ac87 100644
--- a/src/libffmpeg/libavcodec/motion_est.c
+++ b/src/libffmpeg/libavcodec/motion_est.c
@@ -16,6 +16,8 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
*/
#include "config.h"
#include "xine-utils/xineutils.h"
@@ -25,9 +27,14 @@
#include "dsputil.h"
#include "mpegvideo.h"
+#define ABS(a) ((a)>0 ? (a) : -(a))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#define INTER_BIAS 257
+
static void halfpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
- int xmin, int ymin, int xmax, int ymax);
+ int xmin, int ymin, int xmax, int ymax,
+ int pred_x, int pred_y);
/* config it to test motion vector encoding (send random vectors) */
//#define CONFIG_TEST_MV_ENCODE
@@ -54,6 +61,28 @@ static int pix_sum(UINT8 * pix, int line_size)
return s;
}
+static int pix_dev(UINT8 * pix, int line_size, int mean)
+{
+ int s, i, j;
+
+ s = 0;
+ for (i = 0; i < 16; i++) {
+ for (j = 0; j < 16; j += 8) {
+ s += ABS(pix[0]-mean);
+ s += ABS(pix[1]-mean);
+ s += ABS(pix[2]-mean);
+ s += ABS(pix[3]-mean);
+ s += ABS(pix[4]-mean);
+ s += ABS(pix[5]-mean);
+ s += ABS(pix[6]-mean);
+ s += ABS(pix[7]-mean);
+ pix += 8;
+ }
+ pix += line_size - 16;
+ }
+ return s;
+}
+
static int pix_norm1(UINT8 * pix, int line_size)
{
int s, i, j;
@@ -138,7 +167,7 @@ static int full_motion_search(MpegEncContext * s,
for (y = y1; y <= y2; y++) {
for (x = x1; x <= x2; x++) {
d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x,
- s->linesize, 16);
+ s->linesize);
if (d < dmin ||
(d == dmin &&
(abs(x - xx) + abs(y - yy)) <
@@ -202,7 +231,7 @@ static int log_motion_search(MpegEncContext * s,
do {
for (y = y1; y <= y2; y += range) {
for (x = x1; x <= x2; x += range) {
- d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize, 16);
+ d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize);
if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dmin = d;
mx = x;
@@ -282,7 +311,7 @@ static int phods_motion_search(MpegEncContext * s,
lastx = x;
for (x = x1; x <= x2; x += range) {
- d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize, 16);
+ d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize);
if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dminx = d;
mx = x;
@@ -291,7 +320,7 @@ static int phods_motion_search(MpegEncContext * s,
x = lastx;
for (y = y1; y <= y2; y += range) {
- d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize, 16);
+ d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize);
if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dminy = d;
my = y;
@@ -330,78 +359,474 @@ static int phods_motion_search(MpegEncContext * s,
return dminy;
}
+
+#define Z_THRESHOLD 256
+
+#define CHECK_MV(x,y)\
+{\
+ d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
+ d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
+ if(d<dmin){\
+ best[0]=x;\
+ best[1]=y;\
+ dmin=d;\
+ }\
+}
+
+#define CHECK_MV_DIR(x,y,new_dir)\
+{\
+ d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
+ d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
+ if(d<dmin){\
+ best[0]=x;\
+ best[1]=y;\
+ dmin=d;\
+ next_dir= new_dir;\
+ }\
+}
+
+#define CHECK_MV4(x,y)\
+{\
+ d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
+ d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
+ if(d<dmin){\
+ best[0]=x;\
+ best[1]=y;\
+ dmin=d;\
+ }\
+}
+
+#define CHECK_MV4_DIR(x,y,new_dir)\
+{\
+ d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
+ d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
+ if(d<dmin){\
+ best[0]=x;\
+ best[1]=y;\
+ dmin=d;\
+ next_dir= new_dir;\
+ }\
+}
+
+
+#define check(x,y,S,v)\
+if( (x)<(xmin<<(S)) ) printf("%d %d %d %d xmin" #v, (x), (y), s->mb_x, s->mb_y);\
+if( (x)>(xmax<<(S)) ) printf("%d %d %d %d xmax" #v, (x), (y), s->mb_x, s->mb_y);\
+if( (y)<(ymin<<(S)) ) printf("%d %d %d %d ymin" #v, (x), (y), s->mb_x, s->mb_y);\
+if( (y)>(ymax<<(S)) ) printf("%d %d %d %d ymax" #v, (x), (y), s->mb_x, s->mb_y);\
+
+
+static inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
+ UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
+ int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
+ int xmin, int ymin, int xmax, int ymax, int shift)
+{
+ int next_dir=-1;
+
+ for(;;){
+ int d;
+ const int dir= next_dir;
+ const int x= best[0];
+ const int y= best[1];
+ next_dir=-1;
+
+//printf("%d", dir);
+ if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
+ if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
+ if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
+ if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
+
+ if(next_dir==-1){
+ return dmin;
+ }
+ }
+
+/* for(;;){
+ int d;
+ const int x= best[0];
+ const int y= best[1];
+ const int last_min=dmin;
+ if(x>xmin) CHECK_MV(x-1, y )
+ if(y>xmin) CHECK_MV(x , y-1)
+ if(x<xmax) CHECK_MV(x+1, y )
+ if(y<xmax) CHECK_MV(x , y+1)
+ if(x>xmin && y>ymin) CHECK_MV(x-1, y-1)
+ if(x>xmin && y<ymax) CHECK_MV(x-1, y+1)
+ if(x<xmax && y>ymin) CHECK_MV(x+1, y-1)
+ if(x<xmax && y<ymax) CHECK_MV(x+1, y+1)
+ if(x-1>xmin) CHECK_MV(x-2, y )
+ if(y-1>xmin) CHECK_MV(x , y-2)
+ if(x+1<xmax) CHECK_MV(x+2, y )
+ if(y+1<xmax) CHECK_MV(x , y+2)
+ if(x-1>xmin && y-1>ymin) CHECK_MV(x-2, y-2)
+ if(x-1>xmin && y+1<ymax) CHECK_MV(x-2, y+2)
+ if(x+1<xmax && y-1>ymin) CHECK_MV(x+2, y-2)
+ if(x+1<xmax && y+1<ymax) CHECK_MV(x+2, y+2)
+ if(dmin==last_min) return dmin;
+ }
+ */
+}
+
+static inline int small_diamond_search4MV(MpegEncContext * s, int *best, int dmin,
+ UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
+ int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
+ int xmin, int ymin, int xmax, int ymax, int shift)
+{
+ int next_dir=-1;
+
+ for(;;){
+ int d;
+ const int dir= next_dir;
+ const int x= best[0];
+ const int y= best[1];
+ next_dir=-1;
+
+//printf("%d", dir);
+ if(dir!=2 && x>xmin) CHECK_MV4_DIR(x-1, y , 0)
+ if(dir!=3 && y>ymin) CHECK_MV4_DIR(x , y-1, 1)
+ if(dir!=0 && x<xmax) CHECK_MV4_DIR(x+1, y , 2)
+ if(dir!=1 && y<ymax) CHECK_MV4_DIR(x , y+1, 3)
+
+ if(next_dir==-1){
+ return dmin;
+ }
+ }
+}
+
+static inline int snake_search(MpegEncContext * s, int *best, int dmin,
+ UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
+ int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
+ int xmin, int ymin, int xmax, int ymax, int shift)
+{
+ int dir=0;
+ int c=1;
+ static int x_dir[8]= {1,1,0,-1,-1,-1, 0, 1};
+ static int y_dir[8]= {0,1,1, 1, 0,-1,-1,-1};
+ int fails=0;
+ int last_d[2]={dmin, dmin};
+
+/*static int good=0;
+static int bad=0;
+static int point=0;
+
+point++;
+if(256*256*256*64%point==0)
+{
+ printf("%d %d %d\n", good, bad, point);
+}*/
+
+ for(;;){
+ int x= best[0];
+ int y= best[1];
+ int d;
+ x+=x_dir[dir];
+ y+=y_dir[dir];
+ if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){
+ d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);
+ d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;
+ }else{
+ d = dmin + 10000; //FIXME smarter boundary handling
+ }
+ if(d<dmin){
+ best[0]=x;
+ best[1]=y;
+ dmin=d;
+
+ if(last_d[1] - last_d[0] > last_d[0] - d) c= -c;
+ dir+=c;
+
+ fails=0;
+//good++;
+ last_d[1]=last_d[0];
+ last_d[0]=d;
+ }else{
+//bad++;
+ if(fails){
+ if(fails>=3) return dmin;
+ }else{
+ c= -c;
+ }
+ dir+=c*2;
+ fails++;
+ }
+ dir&=7;
+ }
+}
+
+static int epzs_motion_search(MpegEncContext * s,
+ int *mx_ptr, int *my_ptr,
+ int P[5][2], int pred_x, int pred_y,
+ int xmin, int ymin, int xmax, int ymax)
+{
+ int best[2]={0, 0};
+ int d, dmin;
+ UINT8 *new_pic, *old_pic;
+ const int pic_stride= s->linesize;
+ const int pic_xy= (s->mb_y*pic_stride + s->mb_x)*16;
+ UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
+ int quant= s->qscale; // qscale of the prev frame
+ const int shift= 1+s->quarter_sample;
+
+ new_pic = s->new_picture[0] + pic_xy;
+ old_pic = s->last_picture[0] + pic_xy;
+
+ dmin = pix_abs16x16(new_pic, old_pic, pic_stride);
+ if(dmin<Z_THRESHOLD){
+ *mx_ptr= 0;
+ *my_ptr= 0;
+//printf("Z");
+ return dmin;
+ }
+
+ /* first line */
+ if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line)) {
+ CHECK_MV(P[1][0]>>shift, P[1][1]>>shift)
+ }else{
+ CHECK_MV(P[4][0]>>shift, P[4][1]>>shift)
+ if(dmin<Z_THRESHOLD){
+ *mx_ptr= P[4][0]>>shift;
+ *my_ptr= P[4][1]>>shift;
+//printf("M\n");
+ return dmin;
+ }
+ CHECK_MV(P[1][0]>>shift, P[1][1]>>shift)
+ CHECK_MV(P[2][0]>>shift, P[2][1]>>shift)
+ CHECK_MV(P[3][0]>>shift, P[3][1]>>shift)
+ }
+ CHECK_MV(P[0][0]>>shift, P[0][1]>>shift)
+
+//check(best[0],best[1],0, b0)
+ if(s->full_search==ME_EPZS)
+ dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride,
+ pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift);
+ else
+ dmin= snake_search(s, best, dmin, new_pic, old_pic, pic_stride,
+ pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift);
+//check(best[0],best[1],0, b1)
+ *mx_ptr= best[0];
+ *my_ptr= best[1];
+
+// printf("%d %d %d \n", best[0], best[1], dmin);
+ return dmin;
+}
+
+static int epzs_motion_search4(MpegEncContext * s, int block,
+ int *mx_ptr, int *my_ptr,
+ int P[6][2], int pred_x, int pred_y,
+ int xmin, int ymin, int xmax, int ymax)
+{
+ int best[2]={0, 0};
+ int d, dmin;
+ UINT8 *new_pic, *old_pic;
+ const int pic_stride= s->linesize;
+ const int pic_xy= ((s->mb_y*2 + (block>>1))*pic_stride + s->mb_x*2 + (block&1))*8;
+ UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
+ int quant= s->qscale; // qscale of the prev frame
+ const int shift= 1+s->quarter_sample;
+
+ new_pic = s->new_picture[0] + pic_xy;
+ old_pic = s->last_picture[0] + pic_xy;
+
+ dmin = pix_abs8x8(new_pic, old_pic, pic_stride);
+
+ /* first line */
+ if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) {
+ CHECK_MV4(P[1][0]>>shift, P[1][1]>>shift)
+ }else{
+ CHECK_MV4(P[4][0]>>shift, P[4][1]>>shift)
+ if(dmin<Z_THRESHOLD){
+ *mx_ptr= P[4][0]>>shift;
+ *my_ptr= P[4][1]>>shift;
+//printf("M\n");
+ return dmin;
+ }
+ CHECK_MV4(P[1][0]>>shift, P[1][1]>>shift)
+ CHECK_MV4(P[2][0]>>shift, P[2][1]>>shift)
+ CHECK_MV4(P[3][0]>>shift, P[3][1]>>shift)
+ }
+ CHECK_MV4(P[0][0]>>shift, P[0][1]>>shift)
+ CHECK_MV4(P[5][0]>>shift, P[5][1]>>shift)
+
+//check(best[0],best[1],0, b0)
+ dmin= small_diamond_search4MV(s, best, dmin, new_pic, old_pic, pic_stride,
+ pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift);
+//check(best[0],best[1],0, b1)
+ *mx_ptr= best[0];
+ *my_ptr= best[1];
+
+// printf("%d %d %d \n", best[0], best[1], dmin);
+ return dmin;
+}
+
+#define CHECK_HALF_MV(suffix, x, y) \
+ d= pix_abs16x16_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
+ d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\
+ if(d<dminh){\
+ dminh= d;\
+ mx= mx1 + x;\
+ my= my1 + y;\
+ }
+
+#define CHECK_HALF_MV4(suffix, x, y) \
+ d= pix_abs8x8_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
+ d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\
+ if(d<dminh){\
+ dminh= d;\
+ mx= mx1 + x;\
+ my= my1 + y;\
+ }
+
/* The idea would be to make half pel ME after Inter/Intra decision to
save time. */
-static void halfpel_motion_search(MpegEncContext * s,
+static inline void halfpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
- int xmin, int ymin, int xmax, int ymax)
+ int xmin, int ymin, int xmax, int ymax,
+ int pred_x, int pred_y)
{
+ UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
+ const int quant= s->qscale;
+ int pen_x, pen_y;
int mx, my, mx1, my1, d, xx, yy, dminh;
- UINT8 *pix;
+ UINT8 *pix, *ptr;
- mx = *mx_ptr << 1;
- my = *my_ptr << 1;
+ mx = *mx_ptr;
+ my = *my_ptr;
+ ptr = s->last_picture[0] + (my * s->linesize) + mx;
xx = 16 * s->mb_x;
yy = 16 * s->mb_y;
-
+ pix = s->new_picture[0] + (yy * s->linesize) + xx;
+
dminh = dmin;
- /* Half pixel search */
- mx1 = mx;
- my1 = my;
+ if (mx > xmin && mx < xmax &&
+ my > ymin && my < ymax) {
- pix = s->new_picture[0] + (yy * s->linesize) + xx;
+ mx= mx1= 2*(mx - xx);
+ my= my1= 2*(my - yy);
+ if(dmin < Z_THRESHOLD && mx==0 && my==0){
+ *mx_ptr = 0;
+ *my_ptr = 0;
+ return;
+ }
+
+ pen_x= pred_x + mx;
+ pen_y= pred_y + my;
+
+ ptr-= s->linesize;
+ CHECK_HALF_MV(xy2, -1, -1)
+ CHECK_HALF_MV(y2 , 0, -1)
+ CHECK_HALF_MV(xy2, +1, -1)
+
+ ptr+= s->linesize;
+ CHECK_HALF_MV(x2 , -1, 0)
+ CHECK_HALF_MV(x2 , +1, 0)
+ CHECK_HALF_MV(xy2, -1, +1)
+ CHECK_HALF_MV(y2 , 0, +1)
+ CHECK_HALF_MV(xy2, +1, +1)
+
+ }else{
+ mx= 2*(mx - xx);
+ my= 2*(my - yy);
+ }
- if ((mx > (xmin << 1)) && mx < (xmax << 1) &&
- (my > (ymin << 1)) && my < (ymax << 1)) {
- int dx, dy, px, py;
- UINT8 *ptr;
- for (dy = -1; dy <= 1; dy++) {
- for (dx = -1; dx <= 1; dx++) {
- if (dx != 0 || dy != 0) {
- px = mx1 + dx;
- py = my1 + dy;
- ptr = s->last_picture[0] + ((py >> 1) * s->linesize) + (px >> 1);
- switch (((py & 1) << 1) | (px & 1)) {
- default:
- case 0:
- d = pix_abs16x16(pix, ptr, s->linesize, 16);
- break;
- case 1:
- d = pix_abs16x16_x2(pix, ptr, s->linesize, 16);
- break;
- case 2:
- d = pix_abs16x16_y2(pix, ptr, s->linesize, 16);
- break;
- case 3:
- d = pix_abs16x16_xy2(pix, ptr, s->linesize, 16);
- break;
- }
- if (d < dminh) {
- dminh = d;
- mx = px;
- my = py;
- }
- }
- }
+ *mx_ptr = mx;
+ *my_ptr = my;
+}
+
+static inline void halfpel_motion_search4(MpegEncContext * s,
+ int *mx_ptr, int *my_ptr, int dmin,
+ int xmin, int ymin, int xmax, int ymax,
+ int pred_x, int pred_y, int block_x, int block_y)
+{
+ UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
+ const int quant= s->qscale;
+ int pen_x, pen_y;
+ int mx, my, mx1, my1, d, xx, yy, dminh;
+ UINT8 *pix, *ptr;
+
+ xx = 8 * block_x;
+ yy = 8 * block_y;
+ pix = s->new_picture[0] + (yy * s->linesize) + xx;
+
+ mx = *mx_ptr;
+ my = *my_ptr;
+ ptr = s->last_picture[0] + ((yy+my) * s->linesize) + xx + mx;
+
+ dminh = dmin;
+
+ if (mx > xmin && mx < xmax &&
+ my > ymin && my < ymax) {
+
+ mx= mx1= 2*mx;
+ my= my1= 2*my;
+ if(dmin < Z_THRESHOLD && mx==0 && my==0){
+ *mx_ptr = 0;
+ *my_ptr = 0;
+ return;
}
+
+ pen_x= pred_x + mx;
+ pen_y= pred_y + my;
+
+ ptr-= s->linesize;
+ CHECK_HALF_MV4(xy2, -1, -1)
+ CHECK_HALF_MV4(y2 , 0, -1)
+ CHECK_HALF_MV4(xy2, +1, -1)
+
+ ptr+= s->linesize;
+ CHECK_HALF_MV4(x2 , -1, 0)
+ CHECK_HALF_MV4(x2 , +1, 0)
+ CHECK_HALF_MV4(xy2, -1, +1)
+ CHECK_HALF_MV4(y2 , 0, +1)
+ CHECK_HALF_MV4(xy2, +1, +1)
+
+ }else{
+ mx*=2;
+ my*=2;
}
- *mx_ptr = mx - (xx << 1);
- *my_ptr = my - (yy << 1);
- //fprintf(stderr,"half - MX: %d\tMY: %d\n",*mx_ptr ,*my_ptr);
+ *mx_ptr = mx;
+ *my_ptr = my;
+}
+
+static inline void set_mv_tables(MpegEncContext * s, int mx, int my)
+{
+ const int xy= s->mb_x + s->mb_y*s->mb_width;
+
+ s->mv_table[0][xy] = mx;
+ s->mv_table[1][xy] = my;
+
+ /* has allready been set to the 4 MV if 4MV is done */
+ if(!(s->flags&CODEC_FLAG_4MV)){
+ int mot_xy= s->block_index[0];
+
+ s->motion_val[mot_xy ][0]= mx;
+ s->motion_val[mot_xy ][1]= my;
+ s->motion_val[mot_xy+1][0]= mx;
+ s->motion_val[mot_xy+1][1]= my;
+
+ mot_xy += s->block_wrap[0];
+ s->motion_val[mot_xy ][0]= mx;
+ s->motion_val[mot_xy ][1]= my;
+ s->motion_val[mot_xy+1][0]= mx;
+ s->motion_val[mot_xy+1][1]= my;
+ }
}
#ifndef CONFIG_TEST_MV_ENCODE
-int estimate_motion(MpegEncContext * s,
- int mb_x, int mb_y,
- int *mx_ptr, int *my_ptr)
+void estimate_motion(MpegEncContext * s,
+ int mb_x, int mb_y)
{
UINT8 *pix, *ppix;
int sum, varc, vard, mx, my, range, dmin, xx, yy;
int xmin, ymin, xmax, ymax;
+ int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
+ int pred_x=0, pred_y=0;
+ int P[6][2];
+ const int shift= 1+s->quarter_sample;
+ int mb_type=0;
range = 8 * (1 << (s->f_code - 1));
/* XXX: temporary kludge to avoid overflow for msmpeg4 */
@@ -411,6 +836,8 @@ int estimate_motion(MpegEncContext * s,
if (s->unrestricted_mv) {
xmin = -16;
ymin = -16;
+ if (s->h263_plus)
+ range *= 2;
if(s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MPEG4){
xmax = s->mb_width*16;
ymax = s->mb_height*16;
@@ -426,7 +853,6 @@ int estimate_motion(MpegEncContext * s,
xmax = s->mb_width*16 - 16;
ymax = s->mb_height*16 - 16;
}
-
switch(s->full_search) {
case ME_ZERO:
default:
@@ -442,8 +868,116 @@ int estimate_motion(MpegEncContext * s,
case ME_PHODS:
dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax);
break;
+ case ME_X1:
+ case ME_EPZS:
+ {
+ const int mot_stride = s->block_wrap[0];
+ const int mot_xy = s->block_index[0];
+
+ rel_xmin= xmin - mb_x*16;
+ rel_xmax= xmax - mb_x*16;
+ rel_ymin= ymin - mb_y*16;
+ rel_ymax= ymax - mb_y*16;
+
+ P[0][0] = s->motion_val[mot_xy ][0];
+ P[0][1] = s->motion_val[mot_xy ][1];
+ P[1][0] = s->motion_val[mot_xy - 1][0];
+ P[1][1] = s->motion_val[mot_xy - 1][1];
+ if(P[1][0] > (rel_xmax<<shift)) P[1][0]= (rel_xmax<<shift);
+
+ /* special case for first line */
+ if ((mb_y == 0 || s->first_slice_line || s->first_gob_line)) {
+ P[4][0] = P[1][0];
+ P[4][1] = P[1][1];
+ } else {
+ P[2][0] = s->motion_val[mot_xy - mot_stride ][0];
+ P[2][1] = s->motion_val[mot_xy - mot_stride ][1];
+ P[3][0] = s->motion_val[mot_xy - mot_stride + 2 ][0];
+ P[3][1] = s->motion_val[mot_xy - mot_stride + 2 ][1];
+ if(P[2][1] > (rel_ymax<<shift)) P[2][1]= (rel_ymax<<shift);
+ if(P[3][0] < (rel_xmin<<shift)) P[3][0]= (rel_xmin<<shift);
+ if(P[3][1] > (rel_ymax<<shift)) P[3][1]= (rel_ymax<<shift);
+
+ P[4][0]= mid_pred(P[1][0], P[2][0], P[3][0]);
+ P[4][1]= mid_pred(P[1][1], P[2][1], P[3][1]);
+ }
+ if(s->out_format == FMT_H263){
+ pred_x = P[4][0];
+ pred_y = P[4][1];
+ }else { /* mpeg1 at least */
+ pred_x= P[1][0];
+ pred_y= P[1][1];
+ }
+ }
+ dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax);
+
+ mx+= mb_x*16;
+ my+= mb_y*16;
+ break;
+ }
+
+ if(s->flags&CODEC_FLAG_4MV){
+ int block;
+
+ mb_type|= MB_TYPE_INTER4V;
+
+ for(block=0; block<4; block++){
+ int mx4, my4;
+ int pred_x4, pred_y4;
+ int dmin4;
+ static const int off[4]= {2, 1, 1, -1};
+ const int mot_stride = s->block_wrap[0];
+ const int mot_xy = s->block_index[block];
+ const int block_x= mb_x*2 + (block&1);
+ const int block_y= mb_y*2 + (block>>1);
+
+ const int rel_xmin4= xmin - block_x*8;
+ const int rel_xmax4= xmax - block_x*8 + 8;
+ const int rel_ymin4= ymin - block_y*8;
+ const int rel_ymax4= ymax - block_y*8 + 8;
+
+ P[0][0] = s->motion_val[mot_xy ][0];
+ P[0][1] = s->motion_val[mot_xy ][1];
+ P[1][0] = s->motion_val[mot_xy - 1][0];
+ P[1][1] = s->motion_val[mot_xy - 1][1];
+ if(P[1][0] > (rel_xmax4<<shift)) P[1][0]= (rel_xmax4<<shift);
+
+ /* special case for first line */
+ if ((mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) {
+ P[4][0] = P[1][0];
+ P[4][1] = P[1][1];
+ } else {
+ P[2][0] = s->motion_val[mot_xy - mot_stride ][0];
+ P[2][1] = s->motion_val[mot_xy - mot_stride ][1];
+ P[3][0] = s->motion_val[mot_xy - mot_stride + off[block]][0];
+ P[3][1] = s->motion_val[mot_xy - mot_stride + off[block]][1];
+ if(P[2][1] > (rel_ymax4<<shift)) P[2][1]= (rel_ymax4<<shift);
+ if(P[3][0] < (rel_xmin4<<shift)) P[3][0]= (rel_xmin4<<shift);
+ if(P[3][0] > (rel_xmax4<<shift)) P[3][0]= (rel_xmax4<<shift);
+ if(P[3][1] > (rel_ymax4<<shift)) P[3][1]= (rel_ymax4<<shift);
+
+ P[4][0]= mid_pred(P[1][0], P[2][0], P[3][0]);
+ P[4][1]= mid_pred(P[1][1], P[2][1], P[3][1]);
+ }
+ if(s->out_format == FMT_H263){
+ pred_x4 = P[4][0];
+ pred_y4 = P[4][1];
+ }else { /* mpeg1 at least */
+ pred_x4= P[1][0];
+ pred_y4= P[1][1];
+ }
+ P[5][0]= mx - mb_x*16;
+ P[5][1]= my - mb_y*16;
+
+ dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4);
+
+ halfpel_motion_search4(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4,
+ pred_x4, pred_y4, block_x, block_y);
+
+ s->motion_val[ s->block_index[block] ][0]= mx4;
+ s->motion_val[ s->block_index[block] ][1]= my4;
+ }
}
- emms_c();
/* intra / predictive decision */
xx = mb_x * 16;
@@ -452,36 +986,53 @@ int estimate_motion(MpegEncContext * s,
pix = s->new_picture[0] + (yy * s->linesize) + xx;
/* At this point (mx,my) are full-pell and the absolute displacement */
ppix = s->last_picture[0] + (my * s->linesize) + mx;
-
+
sum = pix_sum(pix, s->linesize);
- varc = pix_norm1(pix, s->linesize);
- vard = pix_norm(pix, ppix, s->linesize);
+#if 0
+ varc = pix_dev(pix, s->linesize, (sum+128)>>8) + INTER_BIAS;
+ vard = pix_abs16x16(pix, ppix, s->linesize);
+#else
+ sum= (sum+8)>>4;
+ varc = ((pix_norm1(pix, s->linesize) - sum*sum + 128 + 500)>>8);
+ vard = (pix_norm(pix, ppix, s->linesize)+128)>>8;
+#endif
- vard = vard >> 8;
- sum = sum >> 8;
- varc = (varc >> 8) - (sum * sum);
s->mb_var[s->mb_width * mb_y + mb_x] = varc;
- s->avg_mb_var += varc;
-
+ s->avg_mb_var+= varc;
+ s->mc_mb_var += vard;
+
#if 0
printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
#endif
- if (vard <= 64 || vard < varc) {
- if (s->full_search != ME_ZERO) {
- halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax);
- } else {
- mx -= 16 * s->mb_x;
- my -= 16 * s->mb_y;
+ if(s->flags&CODEC_FLAG_HQ){
+ if (vard*2 + 200 > varc)
+ mb_type|= MB_TYPE_INTRA;
+ if (varc*2 + 200 > vard){
+ mb_type|= MB_TYPE_INTER;
+ halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y);
+ }else{
+ mx = mx*2 - mb_x*32;
+ my = my*2 - mb_y*32;
+ }
+ }else{
+ if (vard <= 64 || vard < varc) {
+ mb_type|= MB_TYPE_INTER;
+ if (s->full_search != ME_ZERO) {
+ halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y);
+ } else {
+ mx -= 16 * mb_x;
+ my -= 16 * mb_y;
+ }
+ }else{
+ mb_type|= MB_TYPE_INTRA;
+ mx = 0;//mx*2 - 32 * mb_x;
+ my = 0;//my*2 - 32 * mb_y;
}
- *mx_ptr = mx;
- *my_ptr = my;
- return 0;
- } else {
- *mx_ptr = 0;
- *my_ptr = 0;
- return 1;
}
+
+ s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
+ set_mv_tables(s, mx, my);
}
#else
diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c
index 41bf524e4..ac614d5ce 100644
--- a/src/libffmpeg/libavcodec/mpeg12.c
+++ b/src/libffmpeg/libavcodec/mpeg12.c
@@ -20,6 +20,7 @@
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
+#include "xineutils.h"
#include "mpeg12data.h"
@@ -51,6 +52,9 @@ static int mpeg2_decode_block_intra(MpegEncContext *s,
int n);
static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred);
+static UINT16 mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
+static UINT8 fcode_tab[MAX_MV*2+1];
+
static void put_header(MpegEncContext *s, int header)
{
align_put_bits(&s->pb);
@@ -66,7 +70,7 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
int n;
UINT64 time_code;
- if ((s->picture_number % s->gop_size) == 0) {
+ if (s->picture_in_gop_number == 0) {
/* mpeg1 header repeated every gop */
put_header(s, SEQ_START_CODE);
@@ -129,7 +133,6 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
}
}
- s->fake_picture_number++;
}
@@ -226,6 +229,7 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number)
/* temporal reference */
put_bits(&s->pb, 10, (s->fake_picture_number -
s->gop_picture_number) & 0x3ff);
+ s->fake_picture_number++;
put_bits(&s->pb, 3, s->pict_type);
put_bits(&s->pb, 16, 0xffff); /* non constant bit rate */
@@ -353,6 +357,53 @@ static void mpeg1_encode_motion(MpegEncContext *s, int val)
}
}
+void mpeg1_encode_init(MpegEncContext *s)
+{
+ static int done=0;
+ if(!done){
+ int f_code;
+ int mv;
+
+ done=1;
+ for(f_code=1; f_code<=MAX_FCODE; f_code++){
+ for(mv=-MAX_MV; mv<=MAX_MV; mv++){
+ int len;
+
+ if(mv==0) len= mbMotionVectorTable[0][1];
+ else{
+ int val, bit_size, range, code;
+
+ bit_size = s->f_code - 1;
+ range = 1 << bit_size;
+
+ val=mv;
+ if (val < 0)
+ val = -val;
+ val--;
+ code = (val >> bit_size) + 1;
+ if(code<17){
+ len= mbMotionVectorTable[code][1] + 1 + bit_size;
+ }else{
+ len= mbMotionVectorTable[16][1] + 2 + bit_size;
+ }
+ }
+
+ mv_penalty[f_code][mv+MAX_MV]= len;
+ }
+ }
+
+
+ for(f_code=MAX_FCODE; f_code>0; f_code--){
+ for(mv=-(8<<f_code); mv<(8<<f_code); mv++){
+ fcode_tab[mv+MAX_MV]= f_code;
+ }
+ }
+ }
+ s->mv_penalty= mv_penalty;
+
+ s->fcode_tab= fcode_tab;
+}
+
static inline void encode_dc(MpegEncContext *s, int diff, int component)
{
if (component == 0) {
@@ -1119,6 +1170,7 @@ typedef struct Mpeg1Context {
UINT8 *buf_ptr;
int buffer_size;
int mpeg_enc_ctx_allocated; /* true if decoding context allocated */
+ int repeat_field; /* true if we must repeat the field */
} Mpeg1Context;
static int mpeg_decode_init(AVCodecContext *avctx)
@@ -1131,6 +1183,7 @@ static int mpeg_decode_init(AVCodecContext *avctx)
s->start_code = -1;
s->buf_ptr = s->buffer;
s->mpeg_enc_ctx.picture_number = 0;
+ s->repeat_field = 0;
return 0;
}
@@ -1203,7 +1256,7 @@ static void mpeg_decode_sequence_extension(MpegEncContext *s)
int frame_rate_ext_n, frame_rate_ext_d;
skip_bits(&s->gb, 8); /* profil and level */
- skip_bits(&s->gb, 1); /* progressive_sequence */
+ s->progressive_sequence = get_bits1(&s->gb); /* progressive_sequence */
skip_bits(&s->gb, 2); /* chroma_format */
horiz_size_ext = get_bits(&s->gb, 2);
vert_size_ext = get_bits(&s->gb, 2);
@@ -1279,12 +1332,13 @@ static void mpeg_decode_picture_coding_extension(MpegEncContext *s)
s->chroma_420_type = get_bits1(&s->gb);
s->progressive_frame = get_bits1(&s->gb);
/* composite display not parsed */
- dprintf("intra_dc_precion=%d\n", s->intra_dc_precision);
+ dprintf("intra_dc_precision=%d\n", s->intra_dc_precision);
dprintf("picture_structure=%d\n", s->picture_structure);
dprintf("conceal=%d\n", s->concealment_motion_vectors);
dprintf("intra_vlc_format=%d\n", s->intra_vlc_format);
dprintf("alternate_scan=%d\n", s->alternate_scan);
dprintf("frame_pred_frame_dct=%d\n", s->frame_pred_frame_dct);
+ dprintf("progressive_frame=%d\n", s->progressive_frame);
}
static void mpeg_decode_extension(AVCodecContext *avctx,
@@ -1349,7 +1403,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
}
for(;;) {
- memset(s->block, 0, sizeof(s->block));
+ clear_blocks(s->block[0]);
ret = mpeg_decode_mb(s, s->block);
dprintf("ret=%d\n", ret);
if (ret < 0)
@@ -1358,7 +1412,8 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
break;
MPV_decode_mb(s, s->block);
}
-
+ emms_c();
+
/* end of slice reached */
if (s->mb_x == (s->mb_width - 1) &&
s->mb_y == (s->mb_height - 1)) {
@@ -1434,6 +1489,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
avctx->width = width;
avctx->height = height;
avctx->frame_rate = frame_rate_tab[s->frame_rate_index];
+ s->frame_rate = avctx->frame_rate;
avctx->bit_rate = s->bit_rate;
if (MPV_common_init(s) < 0)
@@ -1505,13 +1561,14 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
UINT8 *buf_end, *buf_ptr, *buf_start;
int len, start_code_found, ret, code, start_code, input_size;
AVPicture *picture = data;
-
+ MpegEncContext *s2 = &s->mpeg_enc_ctx;
+
dprintf("fill_buffer\n");
*data_size = 0;
+
/* special case for last picture */
if (buf_size == 0) {
- MpegEncContext *s2 = &s->mpeg_enc_ctx;
if (s2->picture_number > 0) {
picture->data[0] = s2->next_picture[0];
picture->data[1] = s2->next_picture[1];
@@ -1526,6 +1583,15 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
buf_ptr = buf;
buf_end = buf + buf_size;
+
+ if (s->repeat_field % 2 == 1) {
+ s->repeat_field++;
+ //fprintf(stderr,"\nRepeating last frame: %d -> %d! pict: %d %d", avctx->frame_number-1, avctx->frame_number,
+ // s2->picture_number, s->repeat_field);
+ *data_size = sizeof(AVPicture);
+ goto the_end;
+ }
+
while (buf_ptr < buf_end) {
buf_start = buf_ptr;
/* find start next code */
@@ -1574,6 +1640,14 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
start_code, s->buffer, input_size);
if (ret == 1) {
/* got a picture: exit */
+ /* first check if we must repeat the frame */
+ if (s2->progressive_frame && s2->repeat_first_field) {
+ //fprintf(stderr,"\nRepeat this frame: %d! pict: %d",avctx->frame_number,s2->picture_number);
+ s2->repeat_first_field = 0;
+ s2->progressive_frame = 0;
+ if (++s->repeat_field > 2)
+ s->repeat_field = 0;
+ }
*data_size = sizeof(AVPicture);
goto the_end;
}
diff --git a/src/libffmpeg/libavcodec/mpeg4data.h b/src/libffmpeg/libavcodec/mpeg4data.h
index 4eed75654..91b99625f 100644
--- a/src/libffmpeg/libavcodec/mpeg4data.h
+++ b/src/libffmpeg/libavcodec/mpeg4data.h
@@ -12,13 +12,13 @@
#define GMC_SPRITE 2
/* dc encoding for mpeg4 */
-static const UINT8 DCtab_lum[13][2] =
+const UINT8 DCtab_lum[13][2] =
{
{3,3}, {3,2}, {2,2}, {2,3}, {1,3}, {1,4}, {1,5}, {1,6}, {1,7},
{1,8}, {1,9}, {1,10}, {1,11},
};
-static const UINT8 DCtab_chrom[13][2] =
+const UINT8 DCtab_chrom[13][2] =
{
{3,2}, {2,2}, {1,2}, {1,3}, {1,4}, {1,5}, {1,6}, {1,7}, {1,8},
{1,9}, {1,10}, {1,11}, {1,12},
@@ -99,3 +99,26 @@ static const UINT16 sprite_trajectory_tab[15][2] = {
{0x0E, 4}, {0x1E, 5}, {0x3E, 6}, {0x7E, 7}, {0xFE, 8},
{0x1FE, 9},{0x3FE, 10},{0x7FE, 11},{0xFFE, 12},
};
+
+static const UINT8 mb_type_b_tab[4][2] = {
+ {1, 1}, {1, 2}, {1, 3}, {1, 4},
+};
+
+static const UINT16 pixel_aspect[16][2]={
+ {0, 0},
+ {1, 1},
+ {12, 11},
+ {10, 11},
+ {16, 11},
+ {40, 33},
+ {0, 0},
+ {0, 0},
+ {0, 0},
+ {0, 0},
+ {0, 0},
+ {0, 0},
+ {0, 0},
+ {0, 0},
+ {0, 0},
+ {0, 0},
+};
diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c
index a8297a2c1..9f572c3d9 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.c
+++ b/src/libffmpeg/libavcodec/mpegvideo.c
@@ -15,19 +15,20 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * 4MV & hq encoding stuff by Michael Niedermayer <michaelni@gmx.at>
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
-
-#include "config.h"
-#include "xine-utils/xineutils.h"
-
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
+#include "config.h"
+#include "xine-utils/xineutils.h"
+
#ifdef USE_FASTMEMCPY
#include "fastmemcpy.h"
#endif
@@ -71,6 +72,9 @@ static UINT8 h263_chroma_roundtab[16] = {
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
};
+static UINT16 default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
+static UINT8 default_fcode_tab[MAX_MV*2+1];
+
/* default motion estimation */
int motion_estimation_method = ME_LOG;
@@ -175,7 +179,7 @@ int MPV_common_init(MpegEncContext *s)
}
}
- if (s->out_format == FMT_H263) {
+ if (s->out_format == FMT_H263 || s->encoding) {
int size;
/* MV prediction */
size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
@@ -228,6 +232,8 @@ int MPV_common_init(MpegEncContext *s)
if (!s->mbskip_table)
goto fail;
}
+
+ s->block= s->intra_block;
s->context_initialized = 1;
return 0;
@@ -280,6 +286,7 @@ int MPV_encode_init(AVCodecContext *avctx)
int i;
s->bit_rate = avctx->bit_rate;
+ s->bit_rate_tolerance = avctx->bit_rate_tolerance;
s->frame_rate = avctx->frame_rate;
s->width = avctx->width;
s->height = avctx->height;
@@ -288,7 +295,14 @@ int MPV_encode_init(AVCodecContext *avctx)
s->rtp_payload_size = avctx->rtp_payload_size;
if (avctx->rtp_callback)
s->rtp_callback = avctx->rtp_callback;
+ s->qmin= avctx->qmin;
+ s->qmax= avctx->qmax;
+ s->max_qdiff= avctx->max_qdiff;
+ s->qcompress= avctx->qcompress;
+ s->qblur= avctx->qblur;
s->avctx = avctx;
+ s->aspect_ratio_info= avctx->aspect_ratio_info;
+ s->flags= avctx->flags;
if (s->gop_size <= 1) {
s->intra_only = 1;
@@ -344,18 +358,59 @@ int MPV_encode_init(AVCodecContext *avctx)
s->h263_pred = 1;
s->unrestricted_mv = 1;
break;
- case CODEC_ID_MSMPEG4:
+ case CODEC_ID_MSMPEG4V1:
+ s->out_format = FMT_H263;
+ s->h263_msmpeg4 = 1;
+ s->h263_pred = 1;
+ s->unrestricted_mv = 1;
+ s->msmpeg4_version= 1;
+ break;
+ case CODEC_ID_MSMPEG4V2:
s->out_format = FMT_H263;
s->h263_msmpeg4 = 1;
s->h263_pred = 1;
s->unrestricted_mv = 1;
+ s->msmpeg4_version= 2;
+ break;
+ case CODEC_ID_MSMPEG4V3:
+ s->out_format = FMT_H263;
+ s->h263_msmpeg4 = 1;
+ s->h263_pred = 1;
+ s->unrestricted_mv = 1;
+ s->msmpeg4_version= 3;
break;
default:
return -1;
}
+
+ if((s->flags&CODEC_FLAG_4MV) && !(s->flags&CODEC_FLAG_HQ)){
+ printf("4MV is currently only supported in HQ mode\n");
+ return -1;
+ }
+
+ { /* set up some save defaults, some codecs might override them later */
+ static int done=0;
+ if(!done){
+ int i;
+ done=1;
+ memset(default_mv_penalty, 0, sizeof(UINT16)*(MAX_FCODE+1)*(2*MAX_MV+1));
+ memset(default_fcode_tab , 0, sizeof(UINT8)*(2*MAX_MV+1));
+
+ for(i=-16; i<16; i++){
+ default_fcode_tab[i + MAX_MV]= 1;
+ }
+ }
+ }
+ s->mv_penalty= default_mv_penalty;
+ s->fcode_tab= default_fcode_tab;
if (s->out_format == FMT_H263)
- h263_encode_init_vlc(s);
+ h263_encode_init(s);
+ else if (s->out_format == FMT_MPEG1)
+ mpeg1_encode_init(s);
+
+ /* dont use mv_penalty table for crap MV as it would be confused */
+ if(s->full_search<4) s->mv_penalty= default_mv_penalty;
s->encoding = 1;
@@ -373,6 +428,7 @@ int MPV_encode_init(AVCodecContext *avctx)
rate_control_init(s);
s->picture_number = 0;
+ s->picture_in_gop_number = 0;
s->fake_picture_number = 0;
/* motion detector init */
s->f_code = 1;
@@ -434,6 +490,7 @@ void MPV_frame_start(MpegEncContext *s)
s->current_picture[i] = s->aux_picture[i];
}
} else {
+ s->last_non_b_pict_type= s->pict_type;
for(i=0;i<3;i++) {
/* swap next and last */
tmp = s->last_picture[i];
@@ -475,16 +532,18 @@ int MPV_encode_picture(AVCodecContext *avctx,
init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
+ s->force_type= (avctx->flags&CODEC_FLAG_TYPE) ?
+ (avctx->key_frame ? I_TYPE : P_TYPE) : 0;
if (!s->intra_only) {
/* first picture of GOP is intra */
- if ((s->picture_number % s->gop_size) == 0)
+ if (s->picture_in_gop_number % s->gop_size==0 || s->force_type==I_TYPE){
+ s->picture_in_gop_number=0;
s->pict_type = I_TYPE;
- else
+ }else
s->pict_type = P_TYPE;
} else {
s->pict_type = I_TYPE;
}
- avctx->key_frame = (s->pict_type == I_TYPE);
MPV_frame_start(s);
@@ -515,15 +574,30 @@ int MPV_encode_picture(AVCodecContext *avctx,
}
encode_picture(s, s->picture_number);
-
+ avctx->key_frame = (s->pict_type == I_TYPE);
+ avctx->header_bits = s->header_bits;
+ avctx->mv_bits = s->mv_bits;
+ avctx->misc_bits = s->misc_bits;
+ avctx->i_tex_bits = s->i_tex_bits;
+ avctx->p_tex_bits = s->p_tex_bits;
+ avctx->i_count = s->i_count;
+ avctx->p_count = s->p_count;
+ avctx->skip_count = s->skip_count;
+
MPV_frame_end(s);
s->picture_number++;
+ s->picture_in_gop_number++;
if (s->out_format == FMT_MJPEG)
mjpeg_picture_trailer(s);
flush_put_bits(&s->pb);
- s->total_bits += (pbBufPtr(&s->pb) - s->pb.buf) * 8;
+ s->last_frame_bits= s->frame_bits;
+ s->frame_bits = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
+ s->total_bits += s->frame_bits;
+ avctx->frame_bits = s->frame_bits;
+//printf("fcode: %d, type: %d, head: %d, mv: %d, misc: %d, frame: %d, itex: %d, ptex: %d\n",
+//s->f_code, avctx->key_frame, s->header_bits, s->mv_bits, s->misc_bits, s->frame_bits, s->i_tex_bits, s->p_tex_bits);
avctx->quality = s->qscale;
if (avctx->get_psnr) {
@@ -555,7 +629,7 @@ static inline void gmc1_motion(MpegEncContext *s,
int dxy, offset, mx, my, src_x, src_y, height, linesize;
int motion_x, motion_y;
- if(s->real_sprite_warping_points>1) printf("Oops, thats bad, contact the developers\n");
+ if(s->real_sprite_warping_points>1) printf("more than 1 warp point isnt supported\n");
motion_x= s->sprite_offset[0][0];
motion_y= s->sprite_offset[0][1];
src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
@@ -749,7 +823,7 @@ static inline void MPV_motion(MpegEncContext *s,
ref_picture, 0,
16);
#endif
- }else if(s->quarter_sample){
+ }else if(s->quarter_sample && dir==0){ //FIXME
qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
ref_picture, 0,
0, pix_op, qpix_op,
@@ -768,7 +842,7 @@ static inline void MPV_motion(MpegEncContext *s,
dxy = ((motion_y & 1) << 1) | (motion_x & 1);
src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
- src_y = mb_y * 16 + (motion_y >> 1) + ((i >> 1) & 1) * 8;
+ src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
/* WARNING: do no forget half pels */
src_x = clip(src_x, -16, s->width);
@@ -934,8 +1008,9 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
else if (s->h263_pred || s->h263_aic)
s->mbintra_table[mb_x + mb_y*s->mb_width]=1;
- /* update motion predictor */
+ /* update motion predictor, not for B-frames as they need the motion_val from the last P/S-Frame */
if (s->out_format == FMT_H263) {
+ if(s->pict_type!=B_TYPE){
int xy, wrap, motion_x, motion_y;
wrap = 2 * s->mb_width + 2;
@@ -958,6 +1033,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
s->motion_val[xy + 1 + wrap][0] = motion_x;
s->motion_val[xy + 1 + wrap][1] = motion_y;
}
+ }
}
if (!s->intra_only) {
@@ -1031,16 +1107,326 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
}
}
the_end:
- emms_c();
+ emms_c(); //FIXME remove
}
-static void encode_picture(MpegEncContext *s, int picture_number)
+static void encode_mb(MpegEncContext *s)
{
- int mb_x, mb_y, wrap, last_gob, pdif = 0;
+ int wrap;
+ const int mb_x= s->mb_x;
+ const int mb_y= s->mb_y;
UINT8 *ptr;
- int i, motion_x, motion_y;
+ const int motion_x= s->mv[0][0][0];
+ const int motion_y= s->mv[0][0][1];
+ int i;
+
+ /* get the pixels */
+ wrap = s->linesize;
+ ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16;
+ get_pixels(s->block[0], ptr, wrap);
+ get_pixels(s->block[1], ptr + 8, wrap);
+ get_pixels(s->block[2], ptr + 8 * wrap, wrap);
+ get_pixels(s->block[3], ptr + 8 * wrap + 8, wrap);
+ wrap = s->linesize >> 1;
+ ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8;
+ get_pixels(s->block[4], ptr, wrap);
+
+ wrap = s->linesize >> 1;
+ ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8;
+ get_pixels(s->block[5], ptr, wrap);
+
+ /* subtract previous frame if non intra */
+ if (!s->mb_intra) {
+ int dxy, offset, mx, my;
+
+ if(s->mv_type==MV_TYPE_16X16){
+ dxy = ((motion_y & 1) << 1) | (motion_x & 1);
+ ptr = s->last_picture[0] +
+ ((mb_y * 16 + (motion_y >> 1)) * s->linesize) +
+ (mb_x * 16 + (motion_x >> 1));
+
+ sub_pixels_2(s->block[0], ptr, s->linesize, dxy);
+ sub_pixels_2(s->block[1], ptr + 8, s->linesize, dxy);
+ sub_pixels_2(s->block[2], ptr + s->linesize * 8, s->linesize, dxy);
+ sub_pixels_2(s->block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy);
+
+ if (s->out_format == FMT_H263) {
+ /* special rounding for h263 */
+ dxy = 0;
+ if ((motion_x & 3) != 0)
+ dxy |= 1;
+ if ((motion_y & 3) != 0)
+ dxy |= 2;
+ mx = motion_x >> 2;
+ my = motion_y >> 2;
+ } else {
+ mx = motion_x / 2;
+ my = motion_y / 2;
+ dxy = ((my & 1) << 1) | (mx & 1);
+ mx >>= 1;
+ my >>= 1;
+ }
+ offset = ((mb_y * 8 + my) * (s->linesize >> 1)) + (mb_x * 8 + mx);
+ ptr = s->last_picture[1] + offset;
+ sub_pixels_2(s->block[4], ptr, s->linesize >> 1, dxy);
+ ptr = s->last_picture[2] + offset;
+ sub_pixels_2(s->block[5], ptr, s->linesize >> 1, dxy);
+ }else{
+ int src_x, src_y;
+
+ for(i=0;i<4;i++) {
+ int motion_x = s->mv[0][i][0];
+ int motion_y = s->mv[0][i][1];
+
+ dxy = ((motion_y & 1) << 1) | (motion_x & 1);
+ src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
+ src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
+
+ ptr = s->last_picture[0] + (src_y * s->linesize) + (src_x);
+ sub_pixels_2(s->block[i], ptr, s->linesize, dxy);
+ }
+ /* In case of 8X8, we construct a single chroma motion vector
+ with a special rounding */
+ mx = 0;
+ my = 0;
+ for(i=0;i<4;i++) {
+ mx += s->mv[0][i][0];
+ my += s->mv[0][i][1];
+ }
+ if (mx >= 0)
+ mx = (h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
+ else {
+ mx = -mx;
+ mx = -(h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
+ }
+ if (my >= 0)
+ my = (h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
+ else {
+ my = -my;
+ my = -(h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
+ }
+ dxy = ((my & 1) << 1) | (mx & 1);
+ mx >>= 1;
+ my >>= 1;
+
+ src_x = mb_x * 8 + mx;
+ src_y = mb_y * 8 + my;
+ src_x = clip(src_x, -8, s->width/2);
+ if (src_x == s->width/2)
+ dxy &= ~1;
+ src_y = clip(src_y, -8, s->height/2);
+ if (src_y == s->height/2)
+ dxy &= ~2;
+
+ offset = (src_y * (s->linesize >> 1)) + src_x;
+ ptr = s->last_picture[1] + offset;
+ sub_pixels_2(s->block[4], ptr, s->linesize >> 1, dxy);
+ ptr = s->last_picture[2] + offset;
+ sub_pixels_2(s->block[5], ptr, s->linesize >> 1, dxy);
+ }
+ }
+
+#if 0
+ {
+ float adap_parm;
+
+ adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_width*mb_y+mb_x] + 1.0) /
+ ((s->mb_var[s->mb_width*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
+
+ printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d",
+ (s->mb_type[s->mb_width*mb_y+mb_x] > 0) ? 'I' : 'P',
+ s->qscale, adap_parm, s->qscale*adap_parm,
+ s->mb_var[s->mb_width*mb_y+mb_x], s->avg_mb_var);
+ }
+#endif
+ /* DCT & quantize */
+ if (s->h263_msmpeg4) {
+ msmpeg4_dc_scale(s);
+ } else if (s->h263_pred) {
+ h263_dc_scale(s);
+ } else {
+ /* default quantization values */
+ s->y_dc_scale = 8;
+ s->c_dc_scale = 8;
+ }
+ for(i=0;i<6;i++) {
+ s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale);
+ }
+
+ /* huffman encode */
+ switch(s->out_format) {
+ case FMT_MPEG1:
+ mpeg1_encode_mb(s, s->block, motion_x, motion_y);
+ break;
+ case FMT_H263:
+ if (s->h263_msmpeg4)
+ msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
+ else if(s->h263_pred)
+ mpeg4_encode_mb(s, s->block, motion_x, motion_y);
+ else
+ h263_encode_mb(s, s->block, motion_x, motion_y);
+ break;
+ case FMT_MJPEG:
+ mjpeg_encode_mb(s, s->block);
+ break;
+ }
+}
+
+static void copy_bits(PutBitContext *pb, UINT8 *src, int length)
+{
+ int bytes= length>>3;
+ int bits= length&7;
+ int i;
+
+ for(i=0; i<bytes; i++) put_bits(pb, 8, src[i]);
+ put_bits(pb, bits, src[i]>>(8-bits));
+}
+
+static void encode_picture(MpegEncContext *s, int picture_number)
+{
+ int mb_x, mb_y, last_gob, pdif = 0;
+ int i;
+ int bits;
+ MpegEncContext best_s;
+ UINT8 bit_buf[4][3000]; //FIXME check that this is ALLWAYS large enogh for a MB
s->picture_number = picture_number;
+
+ s->block_wrap[0]=
+ s->block_wrap[1]=
+ s->block_wrap[2]=
+ s->block_wrap[3]= s->mb_width*2 + 2;
+ s->block_wrap[4]=
+ s->block_wrap[5]= s->mb_width + 2;
+
+ s->last_mc_mb_var = s->mc_mb_var;
+ /* Reset the average MB variance */
+ s->avg_mb_var = 0;
+ s->mc_mb_var = 0;
+ /* Estimate motion for every MB */
+ if(s->pict_type == P_TYPE){
+ for(mb_y=0; mb_y < s->mb_height; mb_y++) {
+ s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
+ s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
+ s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
+ s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
+ for(mb_x=0; mb_x < s->mb_width; mb_x++) {
+ s->mb_x = mb_x;
+ s->mb_y = mb_y;
+ s->block_index[0]+=2;
+ s->block_index[1]+=2;
+ s->block_index[2]+=2;
+ s->block_index[3]+=2;
+
+ /* compute motion vector & mb_type and store in context */
+ estimate_motion(s, mb_x, mb_y);
+// s->mb_type[mb_y*s->mb_width + mb_x]=MB_TYPE_INTER;
+ }
+ }
+ emms_c();
+ }else{
+ /* I-Frame */
+ //FIXME do we need to zero them?
+ memset(s->motion_val[0], 0, sizeof(INT16)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
+ memset(s->mv_table[0] , 0, sizeof(INT16)*s->mb_width*s->mb_height);
+ memset(s->mv_table[1] , 0, sizeof(INT16)*s->mb_width*s->mb_height);
+ memset(s->mb_type , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
+ }
+
+ if(s->avg_mb_var < s->mc_mb_var && s->pict_type != B_TYPE && (!s->force_type)){ //FIXME subtract MV bits
+ s->pict_type= I_TYPE;
+ s->picture_in_gop_number=0;
+ memset(s->mb_type , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
+//printf("Scene change detected, encoding as I Frame\n");
+ }
+
+ /* find best f_code for ME which do unlimited searches */
+ if(s->pict_type==P_TYPE && s->full_search>3){
+ int mv_num[8];
+ int i;
+ int loose=0;
+ UINT8 * fcode_tab= s->fcode_tab;
+
+ for(i=0; i<8; i++) mv_num[i]=0;
+
+ for(i=0; i<s->mb_num; i++){
+ if(s->mb_type[i] & MB_TYPE_INTER){
+ mv_num[ fcode_tab[s->mv_table[0][i] + MAX_MV] ]++;
+ mv_num[ fcode_tab[s->mv_table[1][i] + MAX_MV] ]++;
+//printf("%d %d %d\n", s->mv_table[0][i], fcode_tab[s->mv_table[0][i] + MAX_MV], i);
+ }
+//else printf("I");
+ }
+
+ for(i=MAX_FCODE; i>1; i--){
+ loose+= mv_num[i];
+ if(loose > 10) break; //FIXME this is pretty ineffective
+ }
+ s->f_code= i;
+/* for(i=0; i<=MAX_FCODE; i++){
+ printf("%d ", mv_num[i]);
+ }
+ printf("\n");*/
+ }else{
+ s->f_code= 1;
+ }
+
+//printf("f_code %d ///\n", s->f_code);
+ /* convert MBs with too long MVs to I-Blocks */
+ if(s->pict_type==P_TYPE){
+ int i, x, y;
+ const int f_code= s->f_code;
+ UINT8 * fcode_tab= s->fcode_tab;
+//FIXME try to clip instead of intra izing ;)
+ /* clip / convert to intra 16x16 type MVs */
+ for(i=0; i<s->mb_num; i++){
+ if(s->mb_type[i]&MB_TYPE_INTER){
+ if( fcode_tab[s->mv_table[0][i] + MAX_MV] > f_code
+ || fcode_tab[s->mv_table[0][i] + MAX_MV] == 0
+ || fcode_tab[s->mv_table[1][i] + MAX_MV] > f_code
+ || fcode_tab[s->mv_table[1][i] + MAX_MV] == 0 ){
+ s->mb_type[i] &= ~MB_TYPE_INTER;
+ s->mb_type[i] |= MB_TYPE_INTRA;
+ s->mv_table[0][i] = 0;
+ s->mv_table[1][i] = 0;
+ }
+ }
+ }
+
+ if(s->flags&CODEC_FLAG_4MV){
+ int wrap= 2+ s->mb_width*2;
+
+ /* clip / convert to intra 8x8 type MVs */
+ for(y=0; y<s->mb_height; y++){
+ int xy= (y*2 + 1)*wrap + 1;
+ i= y*s->mb_width;
+
+ for(x=0; x<s->mb_width; x++){
+ if(s->mb_type[i]&MB_TYPE_INTER4V){
+ int block;
+ for(block=0; block<4; block++){
+ int off= (block& 1) + (block>>1)*wrap;
+ int mx= s->motion_val[ xy + off ][0];
+ int my= s->motion_val[ xy + off ][1];
+
+ if( fcode_tab[mx + MAX_MV] > f_code
+ || fcode_tab[mx + MAX_MV] == 0
+ || fcode_tab[my + MAX_MV] > f_code
+ || fcode_tab[my + MAX_MV] == 0 ){
+ s->mb_type[i] &= ~MB_TYPE_INTER4V;
+ s->mb_type[i] |= MB_TYPE_INTRA;
+ }
+ }
+ xy+=2;
+ i++;
+ }
+ }
+ }
+ }
+ }
+
+// printf("%d %d\n", s->avg_mb_var, s->mc_mb_var);
+
if (!s->fixed_qscale)
s->qscale = rate_estimate_qscale(s);
@@ -1056,6 +1442,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
convert_matrix(s->q_non_intra_matrix, s->q_non_intra_matrix16, s->non_intra_matrix, s->qscale);
}
+ s->last_bits= get_bit_count(&s->pb);
switch(s->out_format) {
case FMT_MJPEG:
mjpeg_picture_header(s);
@@ -1074,7 +1461,17 @@ static void encode_picture(MpegEncContext *s, int picture_number)
mpeg1_encode_picture_header(s, picture_number);
break;
}
-
+ bits= get_bit_count(&s->pb);
+ s->header_bits= bits - s->last_bits;
+ s->last_bits= bits;
+ s->mv_bits=0;
+ s->misc_bits=0;
+ s->i_tex_bits=0;
+ s->p_tex_bits=0;
+ s->i_count=0;
+ s->p_count=0;
+ s->skip_count=0;
+
/* init last dc values */
/* note: quant matrix value (8) is implied here */
s->last_dc[0] = 128;
@@ -1083,8 +1480,6 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->mb_incr = 1;
s->last_mv[0][0][0] = 0;
s->last_mv[0][0][1] = 0;
- s->mv_type = MV_TYPE_16X16;
- s->mv_dir = MV_DIR_FORWARD;
/* Get the GOB height based on picture height */
if (s->out_format == FMT_H263 && !s->h263_pred && !s->h263_msmpeg4) {
@@ -1095,33 +1490,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
else
s->gob_index = 4;
}
-
- /* Reset the average MB variance */
- s->avg_mb_var = 0;
-
- /* Estimate motion for every MB */
- for(mb_y=0; mb_y < s->mb_height; mb_y++) {
- for(mb_x=0; mb_x < s->mb_width; mb_x++) {
- s->mb_x = mb_x;
- s->mb_y = mb_y;
-
- /* compute motion vector and macro block type (intra or non intra) */
- motion_x = 0;
- motion_y = 0;
- if (s->pict_type == P_TYPE) {
- s->mb_intra = estimate_motion(s, mb_x, mb_y,
- &motion_x,
- &motion_y);
- } else {
- s->mb_intra = 1;
- }
- /* Store MB type and MV */
- s->mb_type[mb_y * s->mb_width + mb_x] = s->mb_intra;
- s->mv_table[0][mb_y * s->mb_width + mb_x] = motion_x;
- s->mv_table[1][mb_y * s->mb_width + mb_x] = motion_y;
- }
- }
-
+
s->avg_mb_var = s->avg_mb_var / s->mb_num;
for(mb_y=0; mb_y < s->mb_height; mb_y++) {
@@ -1139,127 +1508,134 @@ static void encode_picture(MpegEncContext *s, int picture_number)
}
}
+ s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
+ s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
+ s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
+ s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
+ s->block_index[4]= s->block_wrap[4]*(mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2);
+ s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2);
for(mb_x=0; mb_x < s->mb_width; mb_x++) {
+ const int mb_type= s->mb_type[mb_y * s->mb_width + mb_x];
+ PutBitContext pb;
+ int d;
+ int dmin=10000000;
+ int best=0;
s->mb_x = mb_x;
s->mb_y = mb_y;
-#if 0
- /* compute motion vector and macro block type (intra or non intra) */
- motion_x = 0;
- motion_y = 0;
- if (s->pict_type == P_TYPE) {
- s->mb_intra = estimate_motion(s, mb_x, mb_y,
- &motion_x,
- &motion_y);
- } else {
- s->mb_intra = 1;
- }
-#endif
-
- s->mb_intra = s->mb_type[mb_y * s->mb_width + mb_x];
- motion_x = s->mv_table[0][mb_y * s->mb_width + mb_x];
- motion_y = s->mv_table[1][mb_y * s->mb_width + mb_x];
-
- /* get the pixels */
- wrap = s->linesize;
- ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16;
- get_pixels(s->block[0], ptr, wrap);
- get_pixels(s->block[1], ptr + 8, wrap);
- get_pixels(s->block[2], ptr + 8 * wrap, wrap);
- get_pixels(s->block[3], ptr + 8 * wrap + 8, wrap);
- wrap = s->linesize >> 1;
- ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8;
- get_pixels(s->block[4], ptr, wrap);
-
- wrap = s->linesize >> 1;
- ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8;
- get_pixels(s->block[5], ptr, wrap);
-
- /* subtract previous frame if non intra */
- if (!s->mb_intra) {
- int dxy, offset, mx, my;
-
- dxy = ((motion_y & 1) << 1) | (motion_x & 1);
- ptr = s->last_picture[0] +
- ((mb_y * 16 + (motion_y >> 1)) * s->linesize) +
- (mb_x * 16 + (motion_x >> 1));
-
- sub_pixels_2(s->block[0], ptr, s->linesize, dxy);
- sub_pixels_2(s->block[1], ptr + 8, s->linesize, dxy);
- sub_pixels_2(s->block[2], ptr + s->linesize * 8, s->linesize, dxy);
- sub_pixels_2(s->block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy);
-
- if (s->out_format == FMT_H263) {
- /* special rounding for h263 */
- dxy = 0;
- if ((motion_x & 3) != 0)
- dxy |= 1;
- if ((motion_y & 3) != 0)
- dxy |= 2;
- mx = motion_x >> 2;
- my = motion_y >> 2;
- } else {
- mx = motion_x / 2;
- my = motion_y / 2;
- dxy = ((my & 1) << 1) | (mx & 1);
- mx >>= 1;
- my >>= 1;
+ s->block_index[0]+=2;
+ s->block_index[1]+=2;
+ s->block_index[2]+=2;
+ s->block_index[3]+=2;
+ s->block_index[4]++;
+ s->block_index[5]++;
+
+ s->mv_dir = MV_DIR_FORWARD;
+ if(mb_type & (mb_type-1)){ // more than 1 MB type possible
+ pb= s->pb;
+ if(mb_type&MB_TYPE_INTER){
+ s->mv_type = MV_TYPE_16X16;
+ s->mb_intra= 0;
+ s->mv[0][0][0] = s->mv_table[0][mb_y * s->mb_width + mb_x];
+ s->mv[0][0][1] = s->mv_table[1][mb_y * s->mb_width + mb_x];
+ init_put_bits(&s->pb, bit_buf[1], 3000, NULL, NULL);
+ s->block= s->inter_block;
+
+ encode_mb(s);
+ d= get_bit_count(&s->pb);
+ if(d<dmin){
+ flush_put_bits(&s->pb);
+ dmin=d;
+ best_s.mv[0][0][0]= s->mv[0][0][0];
+ best_s.mv[0][0][1]= s->mv[0][0][1];
+ best_s.mb_intra= 0;
+ best_s.mv_type = MV_TYPE_16X16;
+ best_s.pb=s->pb;
+ best_s.block= s->block;
+ best=1;
+ for(i=0; i<6; i++)
+ best_s.block_last_index[i]= s->block_last_index[i];
+ }
}
- offset = ((mb_y * 8 + my) * (s->linesize >> 1)) + (mb_x * 8 + mx);
- ptr = s->last_picture[1] + offset;
- sub_pixels_2(s->block[4], ptr, s->linesize >> 1, dxy);
- ptr = s->last_picture[2] + offset;
- sub_pixels_2(s->block[5], ptr, s->linesize >> 1, dxy);
- }
- emms_c();
-
-#if 0
- {
- float adap_parm;
-
- adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_width*mb_y+mb_x] + 1.0) /
- ((s->mb_var[s->mb_width*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
-
- printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d",
- (s->mb_type[s->mb_width*mb_y+mb_x] > 0) ? 'I' : 'P',
- s->qscale, adap_parm, s->qscale*adap_parm,
- s->mb_var[s->mb_width*mb_y+mb_x], s->avg_mb_var);
- }
-#endif
- /* DCT & quantize */
- if (s->h263_msmpeg4) {
- msmpeg4_dc_scale(s);
- } else if (s->h263_pred) {
- h263_dc_scale(s);
+ if(mb_type&MB_TYPE_INTER4V){
+ s->mv_type = MV_TYPE_8X8;
+ s->mb_intra= 0;
+ for(i=0; i<4; i++){
+ s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
+ s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
+ }
+ init_put_bits(&s->pb, bit_buf[2], 3000, NULL, NULL);
+ s->block= s->inter4v_block;
+
+ encode_mb(s);
+ d= get_bit_count(&s->pb);
+ if(d<dmin){
+ flush_put_bits(&s->pb);
+ dmin=d;
+ for(i=0; i<4; i++){
+ best_s.mv[0][i][0] = s->mv[0][i][0];
+ best_s.mv[0][i][1] = s->mv[0][i][1];
+ }
+ best_s.mb_intra= 0;
+ best_s.mv_type = MV_TYPE_8X8;
+ best_s.pb=s->pb;
+ best_s.block= s->block;
+ best=2;
+ for(i=0; i<6; i++)
+ best_s.block_last_index[i]= s->block_last_index[i];
+ }
+ }
+ if(mb_type&MB_TYPE_INTRA){
+ s->mv_type = MV_TYPE_16X16;
+ s->mb_intra= 1;
+ s->mv[0][0][0] = 0;
+ s->mv[0][0][1] = 0;
+ init_put_bits(&s->pb, bit_buf[0], 3000, NULL, NULL);
+ s->block= s->intra_block;
+
+ encode_mb(s);
+ d= get_bit_count(&s->pb);
+ if(d<dmin){
+ flush_put_bits(&s->pb);
+ dmin=d;
+ best_s.mv[0][0][0]= 0;
+ best_s.mv[0][0][1]= 0;
+ best_s.mb_intra= 1;
+ best_s.mv_type = MV_TYPE_16X16;
+ best_s.pb=s->pb;
+ best_s.block= s->block;
+ for(i=0; i<6; i++)
+ best_s.block_last_index[i]= s->block_last_index[i];
+ best=0;
+ }
+ /* force cleaning of ac/dc if needed ... */
+ s->mbintra_table[mb_x + mb_y*s->mb_width]=1;
+ }
+ for(i=0; i<4; i++){
+ s->mv[0][i][0] = best_s.mv[0][i][0];
+ s->mv[0][i][1] = best_s.mv[0][i][1];
+ }
+ s->mb_intra= best_s.mb_intra;
+ s->mv_type= best_s.mv_type;
+ for(i=0; i<6; i++)
+ s->block_last_index[i]= best_s.block_last_index[i];
+ copy_bits(&pb, bit_buf[best], dmin);
+ s->block= best_s.block;
+ s->pb= pb;
} else {
- /* default quantization values */
- s->y_dc_scale = 8;
- s->c_dc_scale = 8;
- }
- for(i=0;i<6;i++) {
- s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale);
- }
-
- /* huffman encode */
- switch(s->out_format) {
- case FMT_MPEG1:
- mpeg1_encode_mb(s, s->block, motion_x, motion_y);
- break;
- case FMT_H263:
- if (s->h263_msmpeg4)
- msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
- else
- h263_encode_mb(s, s->block, motion_x, motion_y);
- break;
- case FMT_MJPEG:
- mjpeg_encode_mb(s, s->block);
- break;
+ // only one MB-Type possible
+ if(mb_type&MB_TYPE_INTRA){
+ s->mb_intra= 1;
+ s->mv[0][0][0] = 0;
+ s->mv[0][0][1] = 0;
+ }else{
+ s->mb_intra= 0;
+ s->mv[0][0][0] = s->mv_table[0][mb_y * s->mb_width + mb_x];
+ s->mv[0][0][1] = s->mv_table[1][mb_y * s->mb_width + mb_x];
+ }
+ encode_mb(s);
}
- /* decompress blocks so that we keep the state of the decoder */
- s->mv[0][0][0] = motion_x;
- s->mv[0][0][1] = motion_y;
-
MPV_decode_mb(s, s->block);
}
@@ -1277,7 +1653,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->first_gob_line = 0;
}
}
-
+ emms_c();
+
if (s->h263_msmpeg4 && s->pict_type == I_TYPE)
msmpeg4_encode_ext_header(s);
@@ -1294,7 +1671,6 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->ptr_lastgob = pbBufPtr(&s->pb);
//fprintf(stderr,"\nGOB: %2d size: %d (last)", s->gob_number, pdif);
}
-
}
static int dct_quantize_c(MpegEncContext *s,
@@ -1523,6 +1899,22 @@ static void dct_unquantize_h263_c(MpegEncContext *s,
static void rate_control_init(MpegEncContext *s)
{
+#if 1
+ emms_c();
+
+ //initial values, they dont really matter as they will be totally different within a few frames
+ s->i_pred.coeff= s->p_pred.coeff= 7.0;
+ s->i_pred.count= s->p_pred.count= 1.0;
+
+ s->i_pred.decay= s->p_pred.decay= 0.4;
+
+ // use more bits at the beginning, otherwise high motion at the begin will look like shit
+ s->qsum=100;
+ s->qcount=100;
+
+ s->short_term_qsum=0.001;
+ s->short_term_qcount=0.001;
+#else
s->wanted_bits = 0;
if (s->intra_only) {
@@ -1533,24 +1925,123 @@ static void rate_control_init(MpegEncContext *s)
(float)((float)s->frame_rate / FRAME_RATE_BASE * (I_FRAME_SIZE_RATIO + s->gop_size - 1)));
s->I_frame_bits = (int)(s->P_frame_bits * I_FRAME_SIZE_RATIO);
}
-
+
#if defined(DEBUG)
printf("I_frame_size=%d P_frame_size=%d\n",
s->I_frame_bits, s->P_frame_bits);
#endif
+#endif
+}
+
+static double predict(Predictor *p, double q, double var)
+{
+ return p->coeff*var / (q*p->count);
}
+static void update_predictor(Predictor *p, double q, double var, double size)
+{
+ double new_coeff= size*q / (var + 1);
+ if(var<1000) return;
+/*{
+int pred= predict(p, q, var);
+int error= abs(pred-size);
+static double sum=0;
+static int count=0;
+if(count>5) sum+=error;
+count++;
+if(256*256*256*64%count==0){
+ printf("%d %f %f\n", count, sum/count, p->coeff);
+}
+}*/
+ p->count*= p->decay;
+ p->coeff*= p->decay;
+ p->count++;
+ p->coeff+= new_coeff;
+}
-/*
- * This heuristic is rather poor, but at least we do not have to
- * change the qscale at every macroblock.
- */
static int rate_estimate_qscale(MpegEncContext *s)
{
- INT64 diff, total_bits = s->total_bits;
+#if 1
+ int qmin= s->qmin;
+ int qmax= s->qmax;
+ int rate_q=5;
float q;
- int qscale, qmin;
+ int qscale;
+ float br_compensation;
+ double diff;
+ double short_term_q;
+ double long_term_q;
+ int last_qscale= s->qscale;
+ double fps;
+ INT64 wanted_bits;
+ emms_c();
+
+ fps= (double)s->frame_rate / FRAME_RATE_BASE;
+ wanted_bits= s->bit_rate*(double)s->picture_number/fps;
+
+
+ if(s->picture_number>2){
+ /* update predictors */
+ if(s->last_pict_type == I_TYPE){
+ //FIXME
+ }else{ //P Frame
+//printf("%d %d %d %f\n", s->qscale, s->last_mc_mb_var, s->frame_bits, s->p_pred.coeff);
+ update_predictor(&s->p_pred, s->qscale, s->last_mc_mb_var, s->frame_bits);
+ }
+ }
+
+ if(s->pict_type == I_TYPE){
+ //FIXME
+ rate_q= s->qsum/s->qcount;
+ }else{ //P Frame
+ int i;
+ int diff, best_diff=1000000000;
+ for(i=1; i<=31; i++){
+ diff= predict(&s->p_pred, i, s->mc_mb_var) - (double)s->bit_rate/fps;
+ if(diff<0) diff= -diff;
+ if(diff<best_diff){
+ best_diff= diff;
+ rate_q= i;
+ }
+ }
+ }
+
+ s->short_term_qsum*=s->qblur;
+ s->short_term_qcount*=s->qblur;
+
+ s->short_term_qsum+= rate_q;
+ s->short_term_qcount++;
+ short_term_q= s->short_term_qsum/s->short_term_qcount;
+
+ long_term_q= s->qsum/s->qcount*(s->total_bits+1)/(wanted_bits+1); //+1 to avoid nan & 0
+
+// q= (long_term_q - short_term_q)*s->qcompress + short_term_q;
+ q= 1/((1/long_term_q - 1/short_term_q)*s->qcompress + 1/short_term_q);
+
+ diff= s->total_bits - wanted_bits;
+ br_compensation= (s->bit_rate_tolerance - diff)/s->bit_rate_tolerance;
+ if(br_compensation<=0.0) br_compensation=0.001;
+ q/=br_compensation;
+ qscale= (int)(q + 0.5);
+ if (qscale<qmin) qscale=qmin;
+ else if(qscale>qmax) qscale=qmax;
+
+ if (qscale<last_qscale-s->max_qdiff) qscale=last_qscale-s->max_qdiff;
+ else if(qscale>last_qscale+s->max_qdiff) qscale=last_qscale+s->max_qdiff;
+
+ s->qsum+= qscale;
+ s->qcount++;
+
+ s->last_pict_type= s->pict_type;
+//printf("q:%d diff:%d comp:%f rate_q:%d st_q:%f fvar:%d last_size:%d\n", qscale, (int)diff, br_compensation,
+// rate_q, short_term_q, s->mc_mb_var, s->frame_bits);
+//printf("%d %d\n", s->bit_rate, (int)fps);
+ return qscale;
+#else
+ INT64 diff, total_bits = s->total_bits;
+ float q;
+ int qscale;
if (s->pict_type == I_TYPE) {
s->wanted_bits += s->I_frame_bits;
} else {
@@ -1581,6 +2072,7 @@ static int rate_estimate_qscale(MpegEncContext *s)
(int)diff, q);
#endif
return qscale;
+#endif
}
AVCodec mpeg1video_encoder = {
@@ -1643,10 +2135,30 @@ AVCodec mpeg4_encoder = {
MPV_encode_end,
};
-AVCodec msmpeg4_encoder = {
+AVCodec msmpeg4v1_encoder = {
+ "msmpeg4v1",
+ CODEC_TYPE_VIDEO,
+ CODEC_ID_MSMPEG4V1,
+ sizeof(MpegEncContext),
+ MPV_encode_init,
+ MPV_encode_picture,
+ MPV_encode_end,
+};
+
+AVCodec msmpeg4v2_encoder = {
+ "msmpeg4v2",
+ CODEC_TYPE_VIDEO,
+ CODEC_ID_MSMPEG4V2,
+ sizeof(MpegEncContext),
+ MPV_encode_init,
+ MPV_encode_picture,
+ MPV_encode_end,
+};
+
+AVCodec msmpeg4v3_encoder = {
"msmpeg4",
CODEC_TYPE_VIDEO,
- CODEC_ID_MSMPEG4,
+ CODEC_ID_MSMPEG4V3,
sizeof(MpegEncContext),
MPV_encode_init,
MPV_encode_picture,
diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h
index 01e477865..f809a1255 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.h
+++ b/src/libffmpeg/libavcodec/mpegvideo.h
@@ -34,6 +34,15 @@ enum OutputFormat {
#define QMAT_SHIFT_MMX 19
#define QMAT_SHIFT 25
+#define MAX_FCODE 7
+#define MAX_MV 2048
+
+typedef struct Predictor{
+ double coeff;
+ double count;
+ double decay;
+} Predictor;
+
typedef struct MpegEncContext {
struct AVCodecContext *avctx;
/* the following parameters must be initialized before encoding */
@@ -42,6 +51,7 @@ typedef struct MpegEncContext {
int frame_rate; /* number of frames per second */
int intra_only; /* if true, only intra pictures are generated */
int bit_rate; /* wanted bit rate */
+ int bit_rate_tolerance; /* amount of +- bits (>0)*/
enum OutputFormat out_format; /* output format */
int h263_plus; /* h263 plus headers */
int h263_rv10; /* use RV10 variation for H263 */
@@ -49,7 +59,14 @@ typedef struct MpegEncContext {
int h263_msmpeg4; /* generate MSMPEG4 compatible stream */
int h263_intel; /* use I263 intel h263 header */
int fixed_qscale; /* fixed qscale if non zero */
+ float qcompress; /* amount of qscale change between easy & hard scenes (0.0-1.0) */
+ float qblur; /* amount of qscale smoothing over time (0.0-1.0) */
+ int qmin; /* min qscale */
+ int qmax; /* max qscale */
+ int max_qdiff; /* max qscale difference between frames */
int encoding; /* true if we are encoding (vs decoding) */
+ int flags; /* AVCodecContext.flags (HQ, MV4, ...) */
+ int force_type; /* 0= no force, otherwise I_TYPE, P_TYPE, ... */
/* the following fields are managed internally by the encoder */
/* bit output */
@@ -59,7 +76,8 @@ typedef struct MpegEncContext {
int context_initialized;
int picture_number;
int fake_picture_number; /* picture number at the bitstream frame rate */
- int gop_picture_number; /* index of the first picture of a GOP */
+ int gop_picture_number; /* index of the first picture of a GOP based on fake_pic_num & mpeg1 specific */
+ int picture_in_gop_number; /* 0-> first pic in gop, ... */
int mb_width, mb_height;
int mb_num; /* number of MBs of a picture */
int linesize; /* line size, in bytes, may be different from width */
@@ -72,10 +90,10 @@ typedef struct MpegEncContext {
UINT8 *aux_picture_base[3]; /* real start of the picture */
UINT8 *current_picture[3]; /* buffer to store the decompressed current picture */
int last_dc[3]; /* last DC values for MPEG1 */
- INT16 *dc_val[3]; /* used for mpeg4 DC prediction */
+ INT16 *dc_val[3]; /* used for mpeg4 DC prediction, all 3 arrays must be continuous */
int y_dc_scale, c_dc_scale;
UINT8 *coded_block; /* used for coded block pattern prediction */
- INT16 (*ac_val[3])[16]; /* used for for mpeg4 AC prediction */
+ INT16 (*ac_val[3])[16]; /* used for for mpeg4 AC prediction, all 3 arrays must be continuous */
int ac_pred;
int mb_skiped; /* MUST BE SET only during DECODING */
UINT8 *mbskip_table; /* used to avoid copy if macroblock
@@ -84,22 +102,25 @@ typedef struct MpegEncContext {
int qscale;
int pict_type;
+ int last_non_b_pict_type; /* used for mpeg4 gmc b-frames */
+ int last_pict_type; /* used for bit rate stuff (needs that to update the right predictor) */
int frame_rate_index;
/* motion compensation */
int unrestricted_mv;
int h263_long_vectors; /* use horrible h263v1 long vector mode */
int f_code; /* resolution */
- int b_code; /* resolution for B Frames*/
- INT16 *mv_table[2]; /* MV table */
- INT16 (*motion_val)[2]; /* used for MV prediction */
+ int b_code; /* backward resolution for B Frames (mpeg4) */
+ INT16 *mv_table[2]; /* MV table (1MV per MB)*/
+ INT16 (*motion_val)[2]; /* used for MV prediction (4MV per MB)*/
int full_search;
int mv_dir;
#define MV_DIR_BACKWARD 1
#define MV_DIR_FORWARD 2
+#define MV_DIRECT 4 // bidirectional mode where the difference equals the MV of the last P/S/I-Frame (mpeg4)
int mv_type;
#define MV_TYPE_16X16 0 /* 1 vector for the whole mb */
-#define MV_TYPE_8X8 1 /* 4 vectors (h263) */
+#define MV_TYPE_8X8 1 /* 4 vectors (h263, mpeg4 4MV) */
#define MV_TYPE_16X8 2 /* 2 vectors, one per 16x8 block */
#define MV_TYPE_FIELD 3 /* 2 vectors, one per field */
#define MV_TYPE_DMV 4 /* 2 vectors, special mpeg2 Dual Prime Vectors */
@@ -111,6 +132,8 @@ typedef struct MpegEncContext {
int mv[2][4][2];
int field_select[2][2];
int last_mv[2][2][2];
+ UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV, used for ME */
+ UINT8 *fcode_tab; /* smallest fcode needed for each MV */
int has_b_frames;
int no_rounding; /* apply no rounding to motion estimation (MPEG4) */
@@ -119,9 +142,20 @@ typedef struct MpegEncContext {
int mb_x, mb_y;
int mb_incr;
int mb_intra;
- INT16 *mb_var; /* Table for MB variances */
- char *mb_type; /* Table for MB type */
-
+ UINT16 *mb_var; /* Table for MB variances */
+ UINT8 *mb_type; /* Table for MB type */
+#define MB_TYPE_INTRA 0x01
+#define MB_TYPE_INTER 0x02
+#define MB_TYPE_INTER4V 0x04
+#define MB_TYPE_SKIPED 0x08
+#define MB_TYPE_DIRECT 0x10
+#define MB_TYPE_FORWARD 0x20
+#define MB_TYPE_BACKWAD 0x40
+#define MB_TYPE_BIDIR 0x80
+
+ int block_index[6];
+ int block_wrap[6];
+
/* matrix transmitted in the bitstream */
UINT16 intra_matrix[64];
UINT16 chroma_intra_matrix[64];
@@ -141,9 +175,30 @@ typedef struct MpegEncContext {
int I_frame_bits; /* wanted number of bits per I frame */
int P_frame_bits; /* same for P frame */
int avg_mb_var; /* average MB variance for current frame */
+ int mc_mb_var; /* motion compensated MB variance for current frame */
+ int last_mc_mb_var; /* motion compensated MB variance for last frame */
INT64 wanted_bits;
INT64 total_bits;
-
+ int frame_bits; /* bits used for the current frame */
+ int last_frame_bits; /* bits used for the last frame */
+ Predictor i_pred;
+ Predictor p_pred;
+ double qsum; /* sum of qscales */
+ double qcount; /* count of qscales */
+ double short_term_qsum; /* sum of recent qscales */
+ double short_term_qcount; /* count of recent qscales */
+
+ /* statistics, used for 2-pass encoding */
+ int mv_bits;
+ int header_bits;
+ int i_tex_bits;
+ int p_tex_bits;
+ int i_count;
+ int p_count;
+ int skip_count;
+ int misc_bits; // cbp, mb_type
+ int last_bits; //temp var used for calculating the above vars
+
/* H.263 specific */
int gob_number;
int gob_index;
@@ -156,7 +211,12 @@ typedef struct MpegEncContext {
int h263_aic_dir; /* AIC direction: 0 = left, 1 = top */
/* mpeg4 specific */
+ int time_increment_resolution;
int time_increment_bits;
+ int time_increment;
+ int time_base;
+ int time;
+ int last_non_b_time[2];
int shape;
int vol_sprite_usage;
int sprite_width;
@@ -179,6 +239,8 @@ typedef struct MpegEncContext {
int sprite_warping_accuracy;
int low_latency_sprite;
int data_partioning;
+ int resync_marker;
+ int resync_x_pos;
/* divx specific, used to workaround (many) bugs in divx5 */
int divx_version;
@@ -202,9 +264,10 @@ typedef struct MpegEncContext {
int dc_table_index;
int use_skip_mb_code;
int slice_height; /* in macroblocks */
- int first_slice_line;
+ int first_slice_line; /* used in mpeg4 too to handle resync markers */
int flipflop_rounding;
int bitrate;
+ int msmpeg4_version; /* 1=mp41, 2=mp42, 3=mp43/divx3 */
/* decompression specific */
GetBitContext gb;
@@ -242,7 +305,10 @@ typedef struct MpegEncContext {
UINT8 *ptr_last_mb_line;
UINT32 mb_line_avgsize;
- DCTELEM block[6][64] __align8;
+ DCTELEM (*block)[64]; /* points to one of the following blocks */
+ DCTELEM intra_block[6][64] __align8;
+ DCTELEM inter_block[6][64] __align8;
+ DCTELEM inter4v_block[6][64] __align8;
void (*dct_unquantize)(struct MpegEncContext *s,
DCTELEM *block, int n, int qscale);
} MpegEncContext;
@@ -258,9 +324,8 @@ void MPV_common_init_mmx(MpegEncContext *s);
/* motion_est.c */
-int estimate_motion(MpegEncContext *s,
- int mb_x, int mb_y,
- int *mx_ptr, int *my_ptr);
+void estimate_motion(MpegEncContext *s,
+ int mb_x, int mb_y);
/* mpeg12.c */
extern INT16 default_intra_matrix[64];
@@ -270,6 +335,7 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number);
void mpeg1_encode_mb(MpegEncContext *s,
DCTELEM block[6][64],
int motion_x, int motion_y);
+void mpeg1_encode_init(MpegEncContext *s);
/* h263enc.c */
@@ -306,6 +372,9 @@ static inline int get_rl_index(const RLTable *rl, int last, int run, int level)
void h263_encode_mb(MpegEncContext *s,
DCTELEM block[6][64],
int motion_x, int motion_y);
+void mpeg4_encode_mb(MpegEncContext *s,
+ DCTELEM block[6][64],
+ int motion_x, int motion_y);
void h263_encode_picture_header(MpegEncContext *s, int picture_number);
int h263_encode_gob_header(MpegEncContext * s, int mb_line);
void h263_dc_scale(MpegEncContext *s);
@@ -314,7 +383,7 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block,
void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
int dir);
void mpeg4_encode_picture_header(MpegEncContext *s, int picture_number);
-void h263_encode_init_vlc(MpegEncContext *s);
+void h263_encode_init(MpegEncContext *s);
void h263_decode_init_vlc(MpegEncContext *s);
int h263_decode_picture_header(MpegEncContext *s);
diff --git a/src/libffmpeg/libavcodec/msmpeg4.c b/src/libffmpeg/libavcodec/msmpeg4.c
index 8fa9aefaa..66fc5255e 100644
--- a/src/libffmpeg/libavcodec/msmpeg4.c
+++ b/src/libffmpeg/libavcodec/msmpeg4.c
@@ -29,7 +29,6 @@
* TODO:
* - (encoding) select best mv table (two choices)
* - (encoding) select best vlc/dc table
- * - (decoding) handle slice indication
*/
//#define DEBUG
@@ -44,12 +43,18 @@ typedef struct MVTable {
VLC vlc; /* decoding: vlc */
} MVTable;
+static UINT32 v2_dc_lum_table[512][2];
+static UINT32 v2_dc_chroma_table[512][2];
+
static void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n);
static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
int n, int coded);
static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
static int msmpeg4_decode_motion(MpegEncContext * s,
int *mx_ptr, int *my_ptr);
+static void msmpeg4v2_encode_motion(MpegEncContext * s, int val);
+static void init_h263_dc_for_msmpeg4();
+
extern UINT32 inverse[256];
@@ -166,7 +171,11 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb, 5, s->qscale);
s->rl_table_index = 2;
- s->rl_chroma_table_index = 1; /* only for I frame */
+ if(s->msmpeg4_version==2)
+ s->rl_chroma_table_index = 2; /* only for I frame */
+ else
+ s->rl_chroma_table_index = 1; /* only for I frame */
+
s->dc_table_index = 1;
s->mv_table_index = 1; /* only if P frame */
s->use_skip_mb_code = 1; /* only if P frame */
@@ -174,21 +183,25 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
if (s->pict_type == I_TYPE) {
put_bits(&s->pb, 5, 0x17); /* indicate only one "slice" */
- code012(&s->pb, s->rl_chroma_table_index);
- code012(&s->pb, s->rl_table_index);
+ if(s->msmpeg4_version!=2){
+ code012(&s->pb, s->rl_chroma_table_index);
+ code012(&s->pb, s->rl_table_index);
- put_bits(&s->pb, 1, s->dc_table_index);
+ put_bits(&s->pb, 1, s->dc_table_index);
+ }
s->no_rounding = 1;
} else {
put_bits(&s->pb, 1, s->use_skip_mb_code);
s->rl_chroma_table_index = s->rl_table_index;
- code012(&s->pb, s->rl_table_index);
+ if(s->msmpeg4_version!=2){
+ code012(&s->pb, s->rl_table_index);
- put_bits(&s->pb, 1, s->dc_table_index);
+ put_bits(&s->pb, 1, s->dc_table_index);
+
+ put_bits(&s->pb, 1, s->mv_table_index);
+ }
- put_bits(&s->pb, 1, s->mv_table_index);
-
if(s->flipflop_rounding){
s->no_rounding ^= 1;
}else{
@@ -203,6 +216,8 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
init_mv_table(&mv_tables[1]);
for(i=0;i<NB_RL_TABLES;i++)
init_rl(&rl_table[i]);
+
+ init_h263_dc_for_msmpeg4();
}
#ifdef DEBUG
@@ -226,18 +241,17 @@ void msmpeg4_encode_ext_header(MpegEncContext * s)
/* predict coded block */
static inline int coded_block_pred(MpegEncContext * s, int n, UINT8 **coded_block_ptr)
{
- int x, y, wrap, pred, a, b, c;
+ int xy, wrap, pred, a, b, c;
- x = 2 * s->mb_x + 1 + (n & 1);
- y = 2 * s->mb_y + 1 + ((n & 2) >> 1);
- wrap = s->mb_width * 2 + 2;
+ xy = s->block_index[n];
+ wrap = s->block_wrap[0];
/* B C
* A X
*/
- a = s->coded_block[(x - 1) + (y) * wrap];
- b = s->coded_block[(x - 1) + (y - 1) * wrap];
- c = s->coded_block[(x) + (y - 1) * wrap];
+ a = s->coded_block[xy - 1 ];
+ b = s->coded_block[xy - 1 - wrap];
+ c = s->coded_block[xy - wrap];
if (b == c) {
pred = a;
@@ -246,7 +260,7 @@ static inline int coded_block_pred(MpegEncContext * s, int n, UINT8 **coded_bloc
}
/* store value */
- *coded_block_ptr = &s->coded_block[(x) + (y) * wrap];
+ *coded_block_ptr = &s->coded_block[xy];
return pred;
}
@@ -314,14 +328,30 @@ void msmpeg4_encode_mb(MpegEncContext * s,
if (s->use_skip_mb_code)
put_bits(&s->pb, 1, 0); /* mb coded */
- put_bits(&s->pb,
- table_mb_non_intra[cbp + 64][1],
- table_mb_non_intra[cbp + 64][0]);
-
- /* motion vector */
- h263_pred_motion(s, 0, &pred_x, &pred_y);
- msmpeg4_encode_motion(s, motion_x - pred_x,
- motion_y - pred_y);
+ if(s->msmpeg4_version==2){
+ put_bits(&s->pb,
+ v2_mb_type[cbp&3][1],
+ v2_mb_type[cbp&3][0]);
+ if((cbp&3) != 3) coded_cbp= cbp ^ 0x3C;
+ else coded_cbp= cbp;
+
+ put_bits(&s->pb,
+ cbpy_tab[coded_cbp>>2][1],
+ cbpy_tab[coded_cbp>>2][0]);
+
+ h263_pred_motion(s, 0, &pred_x, &pred_y);
+ msmpeg4v2_encode_motion(s, motion_x - pred_x);
+ msmpeg4v2_encode_motion(s, motion_y - pred_y);
+ }else{
+ put_bits(&s->pb,
+ table_mb_non_intra[cbp + 64][1],
+ table_mb_non_intra[cbp + 64][0]);
+
+ /* motion vector */
+ h263_pred_motion(s, 0, &pred_x, &pred_y);
+ msmpeg4_encode_motion(s, motion_x - pred_x,
+ motion_y - pred_y);
+ }
} else {
/* compute cbp */
cbp = 0;
@@ -343,19 +373,36 @@ void msmpeg4_encode_mb(MpegEncContext * s,
printf("cbp=%x %x\n", cbp, coded_cbp);
#endif
- if (s->pict_type == I_TYPE) {
- set_stat(ST_INTRA_MB);
- put_bits(&s->pb,
- table_mb_intra[coded_cbp][1], table_mb_intra[coded_cbp][0]);
- } else {
- if (s->use_skip_mb_code)
- put_bits(&s->pb, 1, 0); /* mb coded */
+ if(s->msmpeg4_version==2){
+ if (s->pict_type == I_TYPE) {
+ put_bits(&s->pb,
+ v2_intra_cbpc[cbp&3][1], v2_intra_cbpc[cbp&3][0]);
+ } else {
+ if (s->use_skip_mb_code)
+ put_bits(&s->pb, 1, 0); /* mb coded */
+ put_bits(&s->pb,
+ v2_mb_type[(cbp&3) + 4][1],
+ v2_mb_type[(cbp&3) + 4][0]);
+ }
+ put_bits(&s->pb, 1, 0); /* no AC prediction yet */
put_bits(&s->pb,
- table_mb_non_intra[cbp][1],
- table_mb_non_intra[cbp][0]);
+ cbpy_tab[cbp>>2][1],
+ cbpy_tab[cbp>>2][0]);
+ }else{
+ if (s->pict_type == I_TYPE) {
+ set_stat(ST_INTRA_MB);
+ put_bits(&s->pb,
+ table_mb_intra[coded_cbp][1], table_mb_intra[coded_cbp][0]);
+ } else {
+ if (s->use_skip_mb_code)
+ put_bits(&s->pb, 1, 0); /* mb coded */
+ put_bits(&s->pb,
+ table_mb_non_intra[cbp][1],
+ table_mb_non_intra[cbp][0]);
+ }
+ set_stat(ST_INTRA_MB);
+ put_bits(&s->pb, 1, 0); /* no AC prediction yet */
}
- set_stat(ST_INTRA_MB);
- put_bits(&s->pb, 1, 0); /* no AC prediction yet */
}
for (i = 0; i < 6; i++) {
@@ -367,10 +414,9 @@ void msmpeg4_encode_mb(MpegEncContext * s,
/* strongly inspirated from MPEG4, but not exactly the same ! */
void msmpeg4_dc_scale(MpegEncContext * s)
{
- if (s->qscale < 5){
+ if (s->qscale < 5 || s->msmpeg4_version==2){
s->y_dc_scale = 8;
s->c_dc_scale = 8;
-// s->c_dc_scale = (s->qscale + 13)>>1;
}else if (s->qscale < 9){
s->y_dc_scale = 2 * s->qscale;
s->c_dc_scale = (s->qscale + 13)>>1;
@@ -378,56 +424,30 @@ void msmpeg4_dc_scale(MpegEncContext * s)
s->y_dc_scale = s->qscale + 8;
s->c_dc_scale = (s->qscale + 13)>>1;
}
- // this differs for quant >24 from mpeg4
-
-// if(s->qscale==13) s->c_dc_scale=14;
-
-// if(s->qscale>=6)
-// printf("%d", s->qscale);
-
- /* s->c_dc_scale values (found by Michael Nidermayer)
- qscale=2 -> 8 (yes iam sure about that)
- qscale=3 -> 8
- qscale=4 -> 8
- qscale=5 -> 9
- qscale=6 -> 9
- qscale=7 -> 10
- qscale=8 -> 10
- qscale=9 -> 11
- qscale=10-> 11
- */
}
/* dir = 0: left, dir = 1: top prediction */
static int msmpeg4_pred_dc(MpegEncContext * s, int n,
INT16 **dc_val_ptr, int *dir_ptr)
{
- int a, b, c, xy, wrap, pred, scale;
+ int a, b, c, wrap, pred, scale;
INT16 *dc_val;
/* find prediction */
if (n < 4) {
- wrap = s->mb_width * 2 + 2;
- xy = 2 * s->mb_y + 1 + ((n & 2) >> 1);
- xy *= wrap;
- xy += 2 * s->mb_x + 1 + (n & 1);
- dc_val = s->dc_val[0];
scale = s->y_dc_scale;
} else {
- wrap = s->mb_width + 2;
- xy = s->mb_y + 1;
- xy *= wrap;
- xy += s->mb_x + 1;
- dc_val = s->dc_val[n - 4 + 1];
scale = s->c_dc_scale;
}
+ wrap = s->block_wrap[n];
+ dc_val= s->dc_val[0] + s->block_index[n];
/* B C
* A X
*/
- a = dc_val[xy - 1];
- b = dc_val[xy - 1 - wrap];
- c = dc_val[xy - wrap];
+ a = dc_val[ - 1];
+ b = dc_val[ - 1 - wrap];
+ c = dc_val[ - wrap];
/* XXX: the following solution consumes divisions, but it does not
necessitate to modify mpegvideo.c. The problem comes from the
@@ -478,7 +498,7 @@ static int msmpeg4_pred_dc(MpegEncContext * s, int n,
}
/* update predictor */
- *dc_val_ptr = &dc_val[xy];
+ *dc_val_ptr = &dc_val[0];
return pred;
}
@@ -502,35 +522,46 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr
/* do the prediction */
level -= pred;
- sign = 0;
- if (level < 0) {
- level = -level;
- sign = 1;
- }
-
- code = level;
- if (code > DC_MAX)
- code = DC_MAX;
-
- if (s->dc_table_index == 0) {
+ if(s->msmpeg4_version==2){
if (n < 4) {
- put_bits(&s->pb, table0_dc_lum[code][1], table0_dc_lum[code][0]);
- } else {
- put_bits(&s->pb, table0_dc_chroma[code][1], table0_dc_chroma[code][0]);
+ put_bits(&s->pb,
+ v2_dc_lum_table[level+256][1],
+ v2_dc_lum_table[level+256][0]);
+ }else{
+ put_bits(&s->pb,
+ v2_dc_chroma_table[level+256][1],
+ v2_dc_chroma_table[level+256][0]);
}
- } else {
- if (n < 4) {
- put_bits(&s->pb, table1_dc_lum[code][1], table1_dc_lum[code][0]);
+ }else{
+ sign = 0;
+ if (level < 0) {
+ level = -level;
+ sign = 1;
+ }
+ code = level;
+ if (code > DC_MAX)
+ code = DC_MAX;
+
+ if (s->dc_table_index == 0) {
+ if (n < 4) {
+ put_bits(&s->pb, table0_dc_lum[code][1], table0_dc_lum[code][0]);
+ } else {
+ put_bits(&s->pb, table0_dc_chroma[code][1], table0_dc_chroma[code][0]);
+ }
} else {
- put_bits(&s->pb, table1_dc_chroma[code][1], table1_dc_chroma[code][0]);
+ if (n < 4) {
+ put_bits(&s->pb, table1_dc_lum[code][1], table1_dc_lum[code][0]);
+ } else {
+ put_bits(&s->pb, table1_dc_chroma[code][1], table1_dc_chroma[code][0]);
+ }
+ }
+
+ if (code == DC_MAX)
+ put_bits(&s->pb, 8, level);
+
+ if (level != 0) {
+ put_bits(&s->pb, 1, sign);
}
- }
-
- if (code == DC_MAX)
- put_bits(&s->pb, 8, level);
-
- if (level != 0) {
- put_bits(&s->pb, 1, sign);
}
}
@@ -558,7 +589,10 @@ static void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n)
} else {
i = 0;
rl = &rl_table[3 + s->rl_table_index];
- run_diff = 1;
+ if(s->msmpeg4_version==2)
+ run_diff = 0;
+ else
+ run_diff = 1;
set_stat(ST_INTER_AC);
}
@@ -629,6 +663,72 @@ static VLC mb_non_intra_vlc;
static VLC mb_intra_vlc;
static VLC dc_lum_vlc[2];
static VLC dc_chroma_vlc[2];
+static VLC v2_dc_lum_vlc;
+static VLC v2_dc_chroma_vlc;
+static VLC cbpy_vlc;
+static VLC v2_intra_cbpc_vlc;
+static VLC v2_mb_type_vlc;
+static VLC v2_mv_vlc;
+
+/* this table is practically identical to the one from h263 except that its inverted */
+static void init_h263_dc_for_msmpeg4()
+{
+ static int inited=0;
+
+ if(!inited){
+ int level, uni_code, uni_len;
+ inited=1;
+
+ for(level=-256; level<256; level++){
+ int size, v, l;
+ /* find number of bits */
+ size = 0;
+ v = abs(level);
+ while (v) {
+ v >>= 1;
+ size++;
+ }
+
+ if (level < 0)
+ l= (-level) ^ ((1 << size) - 1);
+ else
+ l= level;
+
+ /* luminance h263 */
+ uni_code= DCtab_lum[size][0];
+ uni_len = DCtab_lum[size][1];
+ uni_code ^= (1<<uni_len)-1; //M$ doesnt like compatibility
+
+ if (size > 0) {
+ uni_code<<=size; uni_code|=l;
+ uni_len+=size;
+ if (size > 8){
+ uni_code<<=1; uni_code|=1;
+ uni_len++;
+ }
+ }
+ v2_dc_lum_table[level+256][0]= uni_code;
+ v2_dc_lum_table[level+256][1]= uni_len;
+
+ /* chrominance h263 */
+ uni_code= DCtab_chrom[size][0];
+ uni_len = DCtab_chrom[size][1];
+ uni_code ^= (1<<uni_len)-1; //M$ doesnt like compatibility
+
+ if (size > 0) {
+ uni_code<<=size; uni_code|=l;
+ uni_len+=size;
+ if (size > 8){
+ uni_code<<=1; uni_code|=1;
+ uni_len++;
+ }
+ }
+ v2_dc_chroma_table[level+256][0]= uni_code;
+ v2_dc_chroma_table[level+256][1]= uni_len;
+
+ }
+ }
+}
/* init all vlc decoding tables */
int msmpeg4_decode_init_vlc(MpegEncContext *s)
@@ -659,6 +759,27 @@ int msmpeg4_decode_init_vlc(MpegEncContext *s)
init_vlc(&dc_chroma_vlc[1], 9, 120,
&table1_dc_chroma[0][1], 8, 4,
&table1_dc_chroma[0][0], 8, 4);
+
+ init_h263_dc_for_msmpeg4();
+ init_vlc(&v2_dc_lum_vlc, 9, 512,
+ &v2_dc_lum_table[0][1], 8, 4,
+ &v2_dc_lum_table[0][0], 8, 4);
+ init_vlc(&v2_dc_chroma_vlc, 9, 512,
+ &v2_dc_chroma_table[0][1], 8, 4,
+ &v2_dc_chroma_table[0][0], 8, 4);
+
+ init_vlc(&cbpy_vlc, 6, 16,
+ &cbpy_tab[0][1], 2, 1,
+ &cbpy_tab[0][0], 2, 1);
+ init_vlc(&v2_intra_cbpc_vlc, 3, 4,
+ &v2_intra_cbpc[0][1], 2, 1,
+ &v2_intra_cbpc[0][0], 2, 1);
+ init_vlc(&v2_mb_type_vlc, 5, 8,
+ &v2_mb_type[0][1], 2, 1,
+ &v2_mb_type[0][0], 2, 1);
+ init_vlc(&v2_mv_vlc, 9, 33,
+ &mvtab[0][1], 2, 1,
+ &mvtab[0][0], 2, 1);
init_vlc(&mb_non_intra_vlc, 9, 128,
&table_mb_non_intra[0][1], 8, 4,
@@ -692,16 +813,21 @@ int msmpeg4_decode_picture_header(MpegEncContext * s)
if (s->pict_type == I_TYPE) {
code = get_bits(&s->gb, 5);
- /* 0x17: one slice, 0x18: three slices */
- /* XXX: implement it */
- //printf("%d %d %d\n", code, s->slice_height, s->first_slice_line);
+ /* 0x17: one slice, 0x18: two slices */
if (code < 0x17)
return -1;
s->slice_height = s->mb_height / (code - 0x16);
- s->rl_chroma_table_index = decode012(&s->gb);
- s->rl_table_index = decode012(&s->gb);
+ if(s->msmpeg4_version==2){
+ s->rl_chroma_table_index = 2;
+ s->rl_table_index = 2;
+
+ s->dc_table_index = 0; //not used
+ }else{
+ s->rl_chroma_table_index = decode012(&s->gb);
+ s->rl_table_index = decode012(&s->gb);
- s->dc_table_index = get_bits1(&s->gb);
+ s->dc_table_index = get_bits1(&s->gb);
+ }
s->no_rounding = 1;
/* printf(" %d %d %d %d \n",
s->qscale,
@@ -711,12 +837,21 @@ int msmpeg4_decode_picture_header(MpegEncContext * s)
} else {
s->use_skip_mb_code = get_bits1(&s->gb);
- s->rl_table_index = decode012(&s->gb);
- s->rl_chroma_table_index = s->rl_table_index;
+ if(s->msmpeg4_version==2){
+ s->rl_table_index = 2;
+ s->rl_chroma_table_index = s->rl_table_index;
+
+ s->dc_table_index = 0; //not used
- s->dc_table_index = get_bits1(&s->gb);
+ s->mv_table_index = 0;
+ }else{
+ s->rl_table_index = decode012(&s->gb);
+ s->rl_chroma_table_index = s->rl_table_index;
- s->mv_table_index = get_bits1(&s->gb);
+ s->dc_table_index = get_bits1(&s->gb);
+
+ s->mv_table_index = get_bits1(&s->gb);
+ }
/* printf(" %d %d %d %d %d \n",
s->use_skip_mb_code,
s->rl_table_index,
@@ -731,7 +866,18 @@ int msmpeg4_decode_picture_header(MpegEncContext * s)
// printf("%d", s->no_rounding);
}
-
+#if 0
+if(s->msmpeg4_version==2)
+{
+int i;
+for(i=0; i<s->gb.size*8; i++)
+// printf("%d", get_bits1(&s->gb));
+ get_bits1(&s->gb);
+printf("END\n");
+return -1;
+}
+#endif
+
#ifdef DEBUG
printf("*****frame %d:\n", frame_count++);
#endif
@@ -767,6 +913,127 @@ static inline void memsetw(short *tab, int val, int n)
tab[i] = val;
}
+static void msmpeg4v2_encode_motion(MpegEncContext * s, int val)
+{
+ int range, bit_size, sign, code, bits;
+
+ if (val == 0) {
+ /* zero vector */
+ code = 0;
+ put_bits(&s->pb, mvtab[code][1], mvtab[code][0]);
+ } else {
+ bit_size = s->f_code - 1;
+ range = 1 << bit_size;
+ if (val <= -64)
+ val += 64;
+ else if (val >= 64)
+ val -= 64;
+
+ if (val >= 0) {
+ sign = 0;
+ } else {
+ val = -val;
+ sign = 1;
+ }
+ val--;
+ code = (val >> bit_size) + 1;
+ bits = val & (range - 1);
+
+ put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign);
+ if (bit_size > 0) {
+ put_bits(&s->pb, bit_size, bits);
+ }
+ }
+}
+
+/* this is identical to h263 except that its range is multiplied by 2 */
+static int msmpeg4v2_decode_motion(MpegEncContext * s, int pred, int f_code)
+{
+ int code, val, sign, shift;
+
+ code = get_vlc(&s->gb, &v2_mv_vlc);
+ if (code < 0)
+ return 0xffff;
+
+ if (code == 0)
+ return pred;
+ sign = get_bits1(&s->gb);
+ shift = f_code - 1;
+ val = (code - 1) << shift;
+ if (shift > 0)
+ val |= get_bits(&s->gb, shift);
+ val++;
+ if (sign)
+ val = -val;
+ val += pred;
+
+ if (val <= -64)
+ val += 64;
+ else if (val >= 64)
+ val -= 64;
+
+ return val;
+}
+
+
+int msmpeg4v2_decode_mb(MpegEncContext *s,
+ DCTELEM block[6][64])
+{
+ int cbp, code, i;
+ if (s->pict_type == P_TYPE) {
+ if (s->use_skip_mb_code) {
+ if (get_bits1(&s->gb)) {
+ /* skip mb */
+ s->mb_intra = 0;
+ for(i=0;i<6;i++)
+ s->block_last_index[i] = -1;
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mv_type = MV_TYPE_16X16;
+ s->mv[0][0][0] = 0;
+ s->mv[0][0][1] = 0;
+ s->mb_skiped = 1;
+ return 0;
+ }
+ }
+
+ code = get_vlc(&s->gb, &v2_mb_type_vlc);
+ s->mb_intra = code >>2;
+
+ cbp = code & 0x3;
+ } else {
+ s->mb_intra = 1;
+ cbp= get_vlc(&s->gb, &v2_intra_cbpc_vlc);
+ }
+
+ if (!s->mb_intra) {
+ int mx, my;
+
+ cbp|= get_vlc(&s->gb, &cbpy_vlc)<<2;
+ if((cbp&3) != 3) cbp^= 0x3C;
+
+ h263_pred_motion(s, 0, &mx, &my);
+ mx= msmpeg4v2_decode_motion(s, mx, 1);
+ my= msmpeg4v2_decode_motion(s, my, 1);
+
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mv_type = MV_TYPE_16X16;
+ s->mv[0][0][0] = mx;
+ s->mv[0][0][1] = my;
+ } else {
+ s->ac_pred = get_bits1(&s->gb);
+ cbp|= get_vlc(&s->gb, &cbpy_vlc)<<2;
+ }
+
+ for (i = 0; i < 6; i++) {
+ if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
+ {
+ fprintf(stderr,"\nIgnoring error while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
+ return -1;
+ }
+ }
+ return 0;
+}
+
int msmpeg4_decode_mb(MpegEncContext *s,
DCTELEM block[6][64])
{
@@ -803,6 +1070,8 @@ int msmpeg4_decode_mb(MpegEncContext *s,
}
}
+ if(s->msmpeg4_version==2) return msmpeg4v2_decode_mb(s, block); //FIXME merge if possible
+
if (s->pict_type == P_TYPE) {
set_stat(ST_INTER_MB);
if (s->use_skip_mb_code) {
@@ -915,7 +1184,12 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
qadd = (s->qscale - 1) | 1;
i = 0;
rl = &rl_table[3 + s->rl_table_index];
- run_diff = 1;
+
+ if(s->msmpeg4_version==2)
+ run_diff = 0;
+ else
+ run_diff = 1;
+
if (!coded) {
s->block_last_index[n] = i - 1;
return 0;
@@ -999,21 +1273,32 @@ static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
int level, pred;
INT16 *dc_val;
- if (n < 4) {
- level = get_vlc(&s->gb, &dc_lum_vlc[s->dc_table_index]);
- } else {
- level = get_vlc(&s->gb, &dc_chroma_vlc[s->dc_table_index]);
- }
- if (level < 0)
- return -1;
+ if(s->msmpeg4_version==2){
+ if (n < 4) {
+ level = get_vlc(&s->gb, &v2_dc_lum_vlc);
+ } else {
+ level = get_vlc(&s->gb, &v2_dc_chroma_vlc);
+ }
+ if (level < 0)
+ return -1;
+ level-=256;
+ }else{ //FIXME optimize use unified tables & index
+ if (n < 4) {
+ level = get_vlc(&s->gb, &dc_lum_vlc[s->dc_table_index]);
+ } else {
+ level = get_vlc(&s->gb, &dc_chroma_vlc[s->dc_table_index]);
+ }
+ if (level < 0)
+ return -1;
- if (level == DC_MAX) {
- level = get_bits(&s->gb, 8);
- if (get_bits1(&s->gb))
- level = -level;
- } else if (level != 0) {
- if (get_bits1(&s->gb))
- level = -level;
+ if (level == DC_MAX) {
+ level = get_bits(&s->gb, 8);
+ if (get_bits1(&s->gb))
+ level = -level;
+ } else if (level != 0) {
+ if (get_bits1(&s->gb))
+ level = -level;
+ }
}
pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr);
diff --git a/src/libffmpeg/libavcodec/msmpeg4data.h b/src/libffmpeg/libavcodec/msmpeg4data.h
index 03a261211..9dcb8276f 100644
--- a/src/libffmpeg/libavcodec/msmpeg4data.h
+++ b/src/libffmpeg/libavcodec/msmpeg4data.h
@@ -569,6 +569,13 @@ extern const UINT16 intra_vlc[103][2];
extern const INT8 intra_level[102];
extern const INT8 intra_run[102];
+extern const UINT8 DCtab_lum[13][2];
+extern const UINT8 DCtab_chrom[13][2];
+
+extern const UINT8 cbpy_tab[16][2];
+extern const UINT8 mvtab[33][2];
+
+
#define NB_RL_TABLES 6
static RLTable rl_table[NB_RL_TABLES] = {
@@ -1765,3 +1772,12 @@ static MVTable mv_tables[2] = {
table1_mvy,
}
};
+
+static const UINT8 v2_mb_type[8][2] = {
+ {1, 1}, {0 , 2}, {3 , 3}, {9 , 5},
+ {5, 4}, {0x21, 7}, {0x20, 7}, {0x11, 6},
+};
+
+static const UINT8 v2_intra_cbpc[4][2] = {
+ {1, 1}, {0, 3}, {1, 3}, {1, 2},
+};
diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c
index f84b17e63..180712314 100644
--- a/src/libffmpeg/libavcodec/utils.c
+++ b/src/libffmpeg/libavcodec/utils.c
@@ -113,7 +113,8 @@ int avcodec_decode_video(AVCodecContext *avctx, AVPicture *picture,
ret = avctx->codec->decode(avctx, picture, got_picture_ptr,
buf, buf_size);
- avctx->frame_number++;
+ if (*got_picture_ptr)
+ avctx->frame_number++;
return ret;
}
@@ -367,21 +368,57 @@ int avpicture_get_size(int pix_fmt, int width, int height)
/* must be called before any other functions */
void avcodec_init(void)
{
+ static int inited = 0;
+
+ if (inited != 0)
+ return;
+ inited = 1;
+
dsputil_init();
}
/* simple call to use all the codecs */
void avcodec_register_all(void)
{
+ static int inited = 0;
+
+ if (inited != 0)
+ return;
+ inited = 1;
+
+ /* encoders */
+#ifdef CONFIG_ENCODERS
+ register_avcodec(&ac3_encoder);
+ register_avcodec(&mp2_encoder);
+#ifdef CONFIG_MP3LAME
+ register_avcodec(&mp3lame_encoder);
+#endif
+ register_avcodec(&mpeg1video_encoder);
+ register_avcodec(&h263_encoder);
+ register_avcodec(&h263p_encoder);
+ register_avcodec(&rv10_encoder);
+ register_avcodec(&mjpeg_encoder);
+ register_avcodec(&mpeg4_encoder);
+ register_avcodec(&msmpeg4v1_encoder);
+ register_avcodec(&msmpeg4v2_encoder);
+ register_avcodec(&msmpeg4v3_encoder);
+#endif /* CONFIG_ENCODERS */
+ register_avcodec(&rawvideo_codec);
+
/* decoders */
#ifdef CONFIG_DECODERS
register_avcodec(&h263_decoder);
register_avcodec(&mpeg4_decoder);
- register_avcodec(&msmpeg4_decoder);
+ register_avcodec(&msmpeg4v1_decoder);
+ register_avcodec(&msmpeg4v2_decoder);
+ register_avcodec(&msmpeg4v3_decoder);
register_avcodec(&mpeg_decoder);
register_avcodec(&h263i_decoder);
register_avcodec(&rv10_decoder);
register_avcodec(&mjpeg_decoder);
+#ifdef CONFIG_AC3
+ register_avcodec(&ac3_decoder);
+#endif
#endif /* CONFIG_DECODERS */
}