summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiguel Freitas <miguelfreitas@users.sourceforge.net>2003-01-31 18:29:43 +0000
committerMiguel Freitas <miguelfreitas@users.sourceforge.net>2003-01-31 18:29:43 +0000
commit5350f2b7701f01bc4f234d3971fb8a623a8cd72a (patch)
tree5f6cd350778863ad8d2612bce4ac2f6270919115
parent8b0e8647a0d0c279b6a355362452dff4bd6f5c05 (diff)
downloadxine-lib-5350f2b7701f01bc4f234d3971fb8a623a8cd72a.tar.gz
xine-lib-5350f2b7701f01bc4f234d3971fb8a623a8cd72a.tar.bz2
update ffmpeg
CVS patchset: 4068 CVS date: 2003/01/31 18:29:43
-rw-r--r--src/libffmpeg/libavcodec/alpha/dsputil_alpha.c12
-rw-r--r--src/libffmpeg/libavcodec/avcodec.h55
-rw-r--r--src/libffmpeg/libavcodec/common.c21
-rw-r--r--src/libffmpeg/libavcodec/common.h78
-rw-r--r--src/libffmpeg/libavcodec/dsputil.c397
-rw-r--r--src/libffmpeg/libavcodec/dsputil.h37
-rw-r--r--src/libffmpeg/libavcodec/dv.c130
-rw-r--r--src/libffmpeg/libavcodec/dvdata.h39
-rw-r--r--src/libffmpeg/libavcodec/fft.c8
-rw-r--r--src/libffmpeg/libavcodec/h263.c133
-rw-r--r--src/libffmpeg/libavcodec/h263dec.c207
-rw-r--r--src/libffmpeg/libavcodec/huffyuv.c16
-rw-r--r--src/libffmpeg/libavcodec/i386/dsputil_mmx.c256
-rw-r--r--src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h268
-rw-r--r--src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c3
-rw-r--r--src/libffmpeg/libavcodec/imgconvert.c1661
-rw-r--r--src/libffmpeg/libavcodec/mem.c28
-rw-r--r--src/libffmpeg/libavcodec/mjpeg.c18
-rw-r--r--src/libffmpeg/libavcodec/motion_est.c219
-rw-r--r--src/libffmpeg/libavcodec/motion_est_template.c83
-rw-r--r--src/libffmpeg/libavcodec/mpeg12.c19
-rw-r--r--src/libffmpeg/libavcodec/mpegaudiodec.c6
-rw-r--r--src/libffmpeg/libavcodec/mpegvideo.c299
-rw-r--r--src/libffmpeg/libavcodec/mpegvideo.h25
-rw-r--r--src/libffmpeg/libavcodec/msmpeg4.c8
-rw-r--r--src/libffmpeg/libavcodec/msmpeg4data.h5
-rw-r--r--src/libffmpeg/libavcodec/ppc/dsputil_altivec.c902
-rw-r--r--src/libffmpeg/libavcodec/ppc/dsputil_altivec.h65
-rw-r--r--src/libffmpeg/libavcodec/ppc/dsputil_ppc.c195
-rw-r--r--src/libffmpeg/libavcodec/ppc/fft_altivec.c149
-rw-r--r--src/libffmpeg/libavcodec/ppc/idct_altivec.c37
-rw-r--r--src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c169
-rw-r--r--src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c7
-rw-r--r--src/libffmpeg/libavcodec/ratecontrol.c8
-rw-r--r--src/libffmpeg/libavcodec/rv10.c2
-rw-r--r--src/libffmpeg/libavcodec/simple_idct.c80
-rw-r--r--src/libffmpeg/libavcodec/simple_idct.h18
-rw-r--r--src/libffmpeg/libavcodec/svq1.c6
-rw-r--r--src/libffmpeg/libavcodec/utils.c180
-rw-r--r--src/libffmpeg/libavcodec/wmadec.c12
-rw-r--r--src/libffmpeg/libavcodec/wmv2.c19
-rw-r--r--src/libffmpeg/xine_decoder.c20
-rw-r--r--src/libmad/xine_decoder.c4
-rw-r--r--src/libxinevdec/svq1.c4
-rw-r--r--src/xine-engine/buffer.h3
-rw-r--r--src/xine-engine/buffer_types.c9
46 files changed, 4210 insertions, 1710 deletions
diff --git a/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c
index 706462a59..5cbc00167 100644
--- a/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c
+++ b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c
@@ -285,6 +285,16 @@ void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
}
+static int sad16x16_mvi(void *s, uint8_t *a, uint8_t *b, int stride)
+{
+ return pix_abs16x16_mvi_asm(a, b, stride);
+}
+
+static int sad8x8_mvi(void *s, uint8_t *a, uint8_t *b, int stride)
+{
+ return pix_abs8x8_mvi(a, b, stride);
+}
+
void dsputil_init_alpha(DSPContext* c, unsigned mask)
{
c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
@@ -336,6 +346,8 @@ void dsputil_init_alpha(DSPContext* c, unsigned mask)
c->get_pixels = get_pixels_mvi;
c->diff_pixels = diff_pixels_mvi;
+ c->sad[0] = sad16x16_mvi;
+ c->sad[1] = sad8x8_mvi;
c->pix_abs8x8 = pix_abs8x8_mvi;
c->pix_abs16x16 = pix_abs16x16_mvi_asm;
c->pix_abs16x16_x2 = pix_abs16x16_x2_mvi;
diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h
index fd7eafbd4..6ee2b84cd 100644
--- a/src/libffmpeg/libavcodec/avcodec.h
+++ b/src/libffmpeg/libavcodec/avcodec.h
@@ -1,12 +1,16 @@
#ifndef AVCODEC_H
#define AVCODEC_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#include "common.h"
#define LIBAVCODEC_VERSION_INT 0x000406
#define LIBAVCODEC_VERSION "0.4.6"
-#define LIBAVCODEC_BUILD 4652
-#define LIBAVCODEC_BUILD_STR "4652"
+#define LIBAVCODEC_BUILD 4654
+#define LIBAVCODEC_BUILD_STR "4654"
enum CodecID {
CODEC_ID_NONE,
@@ -62,21 +66,19 @@ enum CodecType {
enum PixelFormat {
PIX_FMT_YUV420P,
PIX_FMT_YUV422,
- PIX_FMT_RGB24,
- PIX_FMT_BGR24,
+ PIX_FMT_RGB24, /* 3 bytes, R is first */
+ PIX_FMT_BGR24, /* 3 bytes, B is first */
PIX_FMT_YUV422P,
PIX_FMT_YUV444P,
- PIX_FMT_RGBA32,
- PIX_FMT_BGRA32,
+ PIX_FMT_RGBA32, /* always stored in cpu endianness */
PIX_FMT_YUV410P,
PIX_FMT_YUV411P,
- PIX_FMT_RGB565,
- PIX_FMT_RGB555,
-// PIX_FMT_RGB5551,
- PIX_FMT_BGR565,
- PIX_FMT_BGR555,
-// PIX_FMT_GBR565,
-// PIX_FMT_GBR555
+ PIX_FMT_RGB565, /* always stored in cpu endianness */
+ PIX_FMT_RGB555, /* always stored in cpu endianness, most significant bit to 1 */
+ PIX_FMT_GRAY8,
+ PIX_FMT_MONOWHITE, /* 0 is white */
+ PIX_FMT_MONOBLACK, /* 0 is black */
+ PIX_FMT_NB,
};
/* currently unused, may be used if 24/32 bits samples ever supported */
@@ -520,6 +522,7 @@ typedef struct AVCodecContext {
#define FF_BUG_NO_PADDING 16
#define FF_BUG_AC_VLC 32
#define FF_BUG_QPEL_CHROMA 64
+#define FF_BUG_STD_QPEL 128
//#define FF_BUG_FAKE_SCALABILITY 16 //autodetection should work 100%
/**
@@ -924,6 +927,16 @@ typedef struct AVCodecContext {
*/
int me_subpel_quality;
+ /**
+ * callback to negotiate the pixelFormat
+ * @param fmt is the list of formats which are supported by the codec,
+ * its terminated by -1 as 0 is a valid format, the formats are ordered by quality
+ * the first is allways the native one
+ * @return the choosen format
+ * encoding: unused
+ * decoding: set by user, if not set then the native format will always be choosen
+ */
+ enum PixelFormat (*get_format)(struct AVCodecContext *s, enum PixelFormat * fmt);
} AVCodecContext;
typedef struct AVCodec {
@@ -1048,10 +1061,11 @@ void img_resample(ImgReSampleContext *s,
void img_resample_close(ImgReSampleContext *s);
-void avpicture_fill(AVPicture *picture, UINT8 *ptr,
- int pix_fmt, int width, int height);
+int avpicture_fill(AVPicture *picture, UINT8 *ptr,
+ int pix_fmt, int width, int height);
int avpicture_get_size(int pix_fmt, int width, int height);
-void avcodec_get_chroma_sub_sample(int fmt, int *h_shift, int *v_shift);
+void avcodec_get_chroma_sub_sample(int pix_fmt, int *h_shift, int *v_shift);
+const char *avcodec_get_pix_fmt_name(int pix_fmt);
/* convert among pixel formats */
int img_convert(AVPicture *dst, int dst_pix_fmt,
@@ -1142,7 +1156,7 @@ typedef struct {
const char* supported;
} avc_config_t;
-void avcodec_getopt(AVCodecContext* avctx, char* str, avc_config_t** config);
+void avcodec_getopt(AVCodecContext* avctx, const char* str, avc_config_t** config);
/**
* Interface for 0.5.0 version
@@ -1223,13 +1237,20 @@ int avcodec(void* handle, avc_cmd_t cmd, void* pin, void* pout);
/* memory */
void *av_malloc(unsigned int size);
void *av_mallocz(unsigned int size);
+void *av_realloc(void *ptr, unsigned int size);
void av_free(void *ptr);
+char *av_strdup(const char *s);
void __av_freep(void **ptr);
#define av_freep(p) __av_freep((void **)(p))
+void *av_fast_realloc(void *ptr, int *size, int min_size);
/* for static data only */
/* call av_free_static to release all staticaly allocated tables */
void av_free_static(void);
void *__av_mallocz_static(void** location, unsigned int size);
#define av_mallocz_static(p, s) __av_mallocz_static((void **)(p), s)
+#ifdef __cplusplus
+}
+#endif
+
#endif /* AVCODEC_H */
diff --git a/src/libffmpeg/libavcodec/common.c b/src/libffmpeg/libavcodec/common.c
index 40ba49811..aa766280b 100644
--- a/src/libffmpeg/libavcodec/common.c
+++ b/src/libffmpeg/libavcodec/common.c
@@ -27,6 +27,17 @@ const UINT8 ff_sqrt_tab[128]={
9, 9, 9, 9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11
};
+const uint8_t ff_log2_tab[256]={
+ 0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
void init_put_bits(PutBitContext *s,
UINT8 *buffer, int buffer_size,
void *opaque,
@@ -99,10 +110,12 @@ void put_string(PutBitContext * pbc, char *s)
/* bit input functions */
void init_get_bits(GetBitContext *s,
- UINT8 *buffer, int buffer_size)
+ UINT8 *buffer, int bit_size)
{
+ const int buffer_size= (bit_size+7)>>3;
+
s->buffer= buffer;
- s->size= buffer_size;
+ s->size_in_bits= bit_size;
s->buffer_end= buffer + buffer_size;
#ifdef ALT_BITSTREAM_READER
s->index=0;
@@ -169,8 +182,8 @@ static int alloc_table(VLC *vlc, int size)
vlc->table_size += size;
if (vlc->table_size > vlc->table_allocated) {
vlc->table_allocated += (1 << vlc->bits);
- vlc->table = realloc(vlc->table,
- sizeof(VLC_TYPE) * 2 * vlc->table_allocated);
+ vlc->table = av_realloc(vlc->table,
+ sizeof(VLC_TYPE) * 2 * vlc->table_allocated);
if (!vlc->table)
return -1;
}
diff --git a/src/libffmpeg/libavcodec/common.h b/src/libffmpeg/libavcodec/common.h
index 405ddaa09..c2305b45e 100644
--- a/src/libffmpeg/libavcodec/common.h
+++ b/src/libffmpeg/libavcodec/common.h
@@ -88,8 +88,40 @@ typedef INT64 int64_t;
# endif
# define snprintf _snprintf
+# define vsnprintf _vsnprintf
-#else /* CONFIG_WIN32 */
+/* CONFIG_WIN32 end */
+#elif defined (CONFIG_OS2)
+/* OS/2 EMX */
+
+#include <inttypes.h>
+
+typedef unsigned char UINT8;
+typedef unsigned short UINT16;
+typedef unsigned int UINT32;
+typedef unsigned long long UINT64;
+typedef signed char INT8;
+typedef signed short INT16;
+typedef signed int INT32;
+typedef signed long long INT64;
+
+#ifdef HAVE_AV_CONFIG_H
+
+#ifndef INT64_C
+#define INT64_C(c) (c ## LL)
+#define UINT64_C(c) (c ## ULL)
+#endif
+
+#ifdef USE_FASTMEMCPY
+#include "fastmemcpy.h"
+#endif
+
+#include <float.h>
+
+#endif /* HAVE_AV_CONFIG_H */
+
+/* CONFIG_OS2 end */
+#else
/* unix */
@@ -119,7 +151,7 @@ typedef signed long long INT64;
# endif
# endif /* HAVE_AV_CONFIG_H */
-#endif /* !CONFIG_WIN32 */
+#endif /* !CONFIG_WIN32 && !CONFIG_OS2 */
#ifdef HAVE_AV_CONFIG_H
@@ -238,7 +270,7 @@ typedef struct GetBitContext {
UINT32 cache1;
int bit_count;
#endif
- int size;
+ int size_in_bits;
} GetBitContext;
static inline int get_bits_count(GetBitContext *s);
@@ -667,6 +699,12 @@ int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
const void *codes, int codes_wrap, int codes_size);
void free_vlc(VLC *vlc);
+/**
+ *
+ * if the vlc code is invalid and max_depth=1 than no bits will be removed
+ * if the vlc code is invalid and max_depth>1 than the number of bits removed
+ * is undefined
+ */
#define GET_VLC(code, name, gb, table, bits, max_depth)\
{\
int n, index, nb_bits;\
@@ -775,6 +813,7 @@ void print_stats(void);
#endif
/* misc math functions */
+extern const uint8_t ff_log2_tab[256];
static inline int av_log2(unsigned int v)
{
@@ -789,20 +828,26 @@ static inline int av_log2(unsigned int v)
v >>= 8;
n += 8;
}
- if (v & 0xf0) {
- v >>= 4;
- n += 4;
- }
- if (v & 0xc) {
- v >>= 2;
- n += 2;
- }
- if (v & 0x2) {
- n++;
+ n += ff_log2_tab[v];
+
+ return n;
+}
+
+static inline int av_log2_16bit(unsigned int v)
+{
+ int n;
+
+ n = 0;
+ if (v & 0xff00) {
+ v >>= 8;
+ n += 8;
}
+ n += ff_log2_tab[v];
+
return n;
}
+
/* median of 3 */
static inline int mid_pred(int a, int b, int c)
{
@@ -832,7 +877,7 @@ static inline int clip(int a, int amin, int amax)
}
/* math */
-extern const UINT8 ff_sqrt_tab[128];
+extern const uint8_t ff_sqrt_tab[128];
int ff_gcd(int a, int b);
@@ -902,6 +947,11 @@ if((y)<(x)){\
#define CLAMP_TO_8BIT(d) ((d > 0xff) ? 0xff : (d < 0) ? 0 : d)
+/* avoid usage of various functions */
+#define malloc please_use_av_malloc
+#define free please_use_av_free
+#define realloc please_use_av_realloc
+
#endif /* HAVE_AV_CONFIG_H */
#endif /* COMMON_H */
diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c
index 0d7556f65..06da93ba7 100644
--- a/src/libffmpeg/libavcodec/dsputil.c
+++ b/src/libffmpeg/libavcodec/dsputil.c
@@ -129,6 +129,7 @@ static int pix_norm1_c(UINT8 * pix, int line_size)
s = 0;
for (i = 0; i < 16; i++) {
for (j = 0; j < 16; j += 8) {
+#if 0
s += sq[pix[0]];
s += sq[pix[1]];
s += sq[pix[2]];
@@ -137,6 +138,30 @@ static int pix_norm1_c(UINT8 * pix, int line_size)
s += sq[pix[5]];
s += sq[pix[6]];
s += sq[pix[7]];
+#else
+#if LONG_MAX > 2147483647
+ register uint64_t x=*(uint64_t*)pix;
+ s += sq[x&0xff];
+ s += sq[(x>>8)&0xff];
+ s += sq[(x>>16)&0xff];
+ s += sq[(x>>24)&0xff];
+ s += sq[(x>>32)&0xff];
+ s += sq[(x>>40)&0xff];
+ s += sq[(x>>48)&0xff];
+ s += sq[(x>>56)&0xff];
+#else
+ register uint32_t x=*(uint32_t*)pix;
+ s += sq[x&0xff];
+ s += sq[(x>>8)&0xff];
+ s += sq[(x>>16)&0xff];
+ s += sq[(x>>24)&0xff];
+ x=*(uint32_t*)(pix+4);
+ s += sq[x&0xff];
+ s += sq[(x>>8)&0xff];
+ s += sq[(x>>16)&0xff];
+ s += sq[(x>>24)&0xff];
+#endif
+#endif
pix += 8;
}
pix += line_size - 16;
@@ -166,27 +191,32 @@ static int sse8_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size)
return s;
}
-static int sse16_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size)
+static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
{
- int s, i, j;
- UINT32 *sq = squareTbl + 256;
+ int s, i;
+ uint32_t *sq = squareTbl + 256;
s = 0;
for (i = 0; i < 16; i++) {
- for (j = 0; j < 16; j += 8) {
- s += sq[pix1[0] - pix2[0]];
- s += sq[pix1[1] - pix2[1]];
- s += sq[pix1[2] - pix2[2]];
- s += sq[pix1[3] - pix2[3]];
- s += sq[pix1[4] - pix2[4]];
- s += sq[pix1[5] - pix2[5]];
- s += sq[pix1[6] - pix2[6]];
- s += sq[pix1[7] - pix2[7]];
- pix1 += 8;
- pix2 += 8;
- }
- pix1 += line_size - 16;
- pix2 += line_size - 16;
+ s += sq[pix1[ 0] - pix2[ 0]];
+ s += sq[pix1[ 1] - pix2[ 1]];
+ s += sq[pix1[ 2] - pix2[ 2]];
+ s += sq[pix1[ 3] - pix2[ 3]];
+ s += sq[pix1[ 4] - pix2[ 4]];
+ s += sq[pix1[ 5] - pix2[ 5]];
+ s += sq[pix1[ 6] - pix2[ 6]];
+ s += sq[pix1[ 7] - pix2[ 7]];
+ s += sq[pix1[ 8] - pix2[ 8]];
+ s += sq[pix1[ 9] - pix2[ 9]];
+ s += sq[pix1[10] - pix2[10]];
+ s += sq[pix1[11] - pix2[11]];
+ s += sq[pix1[12] - pix2[12]];
+ s += sq[pix1[13] - pix2[13]];
+ s += sq[pix1[14] - pix2[14]];
+ s += sq[pix1[15] - pix2[15]];
+
+ pix1 += line_size;
+ pix2 += line_size;
}
return s;
}
@@ -801,7 +831,8 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass(UINT8 *dst, UINT8 *src, int dstStrid
}\
}\
\
-static void OPNAME ## mpeg4_qpel8_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w){\
+static void OPNAME ## mpeg4_qpel8_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride){\
+ const int w=8;\
UINT8 *cm = cropTbl + MAX_NEG_CROP;\
int i;\
for(i=0; i<w; i++)\
@@ -923,107 +954,163 @@ static void OPNAME ## qpel8_mc01_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 half[64];\
copy_block9(full, src, 16, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16, 8);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
}\
\
static void OPNAME ## qpel8_mc02_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
copy_block9(full, src, 16, stride, 9);\
- OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16, 8);\
+ OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
}\
\
static void OPNAME ## qpel8_mc03_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 half[64];\
copy_block9(full, src, 16, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16, 8);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
}\
-static void OPNAME ## qpel8_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
+void ff_ ## OPNAME ## qpel8_mc11_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\
-static void OPNAME ## qpel8_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
+static void OPNAME ## qpel8_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
+ UINT8 full[16*9];\
+ UINT8 halfH[72];\
+ UINT8 halfHV[64];\
+ copy_block9(full, src, 16, stride, 9);\
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+ put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+ OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc31_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\
-static void OPNAME ## qpel8_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
+static void OPNAME ## qpel8_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
+ UINT8 full[16*9];\
+ UINT8 halfH[72];\
+ UINT8 halfHV[64];\
+ copy_block9(full, src, 16, stride, 9);\
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+ put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+ OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc13_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\
-static void OPNAME ## qpel8_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
+static void OPNAME ## qpel8_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
+ UINT8 full[16*9];\
+ UINT8 halfH[72];\
+ UINT8 halfHV[64];\
+ copy_block9(full, src, 16, stride, 9);\
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+ put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+ OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc33_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\
+static void OPNAME ## qpel8_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
+ UINT8 full[16*9];\
+ UINT8 halfH[72];\
+ UINT8 halfHV[64];\
+ copy_block9(full, src, 16, stride, 9);\
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+ put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+ OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
static void OPNAME ## qpel8_mc21_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 halfH[72];\
UINT8 halfHV[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc23_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 halfH[72];\
UINT8 halfHV[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
}\
-static void OPNAME ## qpel8_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
+void ff_ ## OPNAME ## qpel8_mc12_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
}\
-static void OPNAME ## qpel8_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
+static void OPNAME ## qpel8_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
+ UINT8 full[16*9];\
+ UINT8 halfH[72];\
+ copy_block9(full, src, 16, stride, 9);\
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+ put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+ OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc32_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
}\
+static void OPNAME ## qpel8_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
+ UINT8 full[16*9];\
+ UINT8 halfH[72];\
+ copy_block9(full, src, 16, stride, 9);\
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+ put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+ OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
static void OPNAME ## qpel8_mc22_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 halfH[72];\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
- OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8, 8);\
+ OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
}\
static void OPNAME ## qpel16_mc00_c (UINT8 *dst, UINT8 *src, int stride){\
OPNAME ## pixels16_c(dst, src, stride, 16);\
@@ -1066,7 +1153,7 @@ static void OPNAME ## qpel16_mc03_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
}\
-static void OPNAME ## qpel16_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
+void ff_ ## OPNAME ## qpel16_mc11_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfV[256];\
@@ -1077,7 +1164,17 @@ static void OPNAME ## qpel16_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\
-static void OPNAME ## qpel16_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
+static void OPNAME ## qpel16_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
+ UINT8 full[24*17];\
+ UINT8 halfH[272];\
+ UINT8 halfHV[256];\
+ copy_block17(full, src, 24, stride, 17);\
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+ put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+ OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc31_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfV[256];\
@@ -1088,7 +1185,17 @@ static void OPNAME ## qpel16_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\
-static void OPNAME ## qpel16_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
+static void OPNAME ## qpel16_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
+ UINT8 full[24*17];\
+ UINT8 halfH[272];\
+ UINT8 halfHV[256];\
+ copy_block17(full, src, 24, stride, 17);\
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+ put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+ OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc13_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfV[256];\
@@ -1099,7 +1206,17 @@ static void OPNAME ## qpel16_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\
-static void OPNAME ## qpel16_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
+static void OPNAME ## qpel16_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
+ UINT8 full[24*17];\
+ UINT8 halfH[272];\
+ UINT8 halfHV[256];\
+ copy_block17(full, src, 24, stride, 17);\
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+ put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+ OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc33_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfV[256];\
@@ -1110,6 +1227,16 @@ static void OPNAME ## qpel16_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\
+static void OPNAME ## qpel16_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
+ UINT8 full[24*17];\
+ UINT8 halfH[272];\
+ UINT8 halfHV[256];\
+ copy_block17(full, src, 24, stride, 17);\
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+ put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+ OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
static void OPNAME ## qpel16_mc21_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 halfH[272];\
UINT8 halfHV[256];\
@@ -1124,7 +1251,7 @@ static void OPNAME ## qpel16_mc23_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
}\
-static void OPNAME ## qpel16_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
+void ff_ ## OPNAME ## qpel16_mc12_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfV[256];\
@@ -1135,7 +1262,15 @@ static void OPNAME ## qpel16_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
}\
-static void OPNAME ## qpel16_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
+static void OPNAME ## qpel16_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
+ UINT8 full[24*17];\
+ UINT8 halfH[272];\
+ copy_block17(full, src, 24, stride, 17);\
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+ put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+ OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc32_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfV[256];\
@@ -1146,6 +1281,14 @@ static void OPNAME ## qpel16_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
}\
+static void OPNAME ## qpel16_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
+ UINT8 full[24*17];\
+ UINT8 halfH[272];\
+ copy_block17(full, src, 24, stride, 17);\
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+ put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+ OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}\
static void OPNAME ## qpel16_mc22_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 halfH[272];\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
@@ -1498,7 +1641,7 @@ static void clear_blocks_c(DCTELEM *blocks)
static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
int i;
- for(i=0; i+7<w; i++){
+ for(i=0; i+7<w; i+=8){
dst[i+0] += src[i+0];
dst[i+1] += src[i+1];
dst[i+2] += src[i+2];
@@ -1514,7 +1657,7 @@ static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
int i;
- for(i=0; i+7<w; i++){
+ for(i=0; i+7<w; i+=8){
dst[i+0] = src1[i+0]-src2[i+0];
dst[i+1] = src1[i+1]-src2[i+1];
dst[i+2] = src1[i+2]-src2[i+2];
@@ -1639,7 +1782,8 @@ static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
MpegEncContext * const s= (MpegEncContext *)c;
- DCTELEM temp[64];
+ uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
+ DCTELEM * const temp= (DCTELEM*)aligned_temp;
int sum=0, i;
s->dsp.diff_pixels(temp, src1, src2, stride);
@@ -1651,11 +1795,13 @@ static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
return sum;
}
-void simple_idct(INT16 *block); //FIXME
+void simple_idct(DCTELEM *block); //FIXME
static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
MpegEncContext * const s= (MpegEncContext *)c;
- DCTELEM temp[64], bak[64];
+ uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8];
+ DCTELEM * const temp= (DCTELEM*)aligned_temp;
+ DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
int sum=0, i;
s->mb_intra=0;
@@ -1664,7 +1810,7 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
memcpy(bak, temp, 64*sizeof(DCTELEM));
- s->dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+ s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
s->dct_unquantize(s, temp, 0, s->qscale);
simple_idct(temp); //FIXME
@@ -1674,9 +1820,144 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
return sum;
}
+static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
+ MpegEncContext * const s= (MpegEncContext *)c;
+ const UINT8 *scantable= s->intra_scantable.permutated;
+ uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
+ uint64_t __align8 aligned_bak[stride];
+ DCTELEM * const temp= (DCTELEM*)aligned_temp;
+ uint8_t * const bak= (uint8_t*)aligned_bak;
+ int i, last, run, bits, level, distoration, start_i;
+ const int esc_length= s->ac_esc_length;
+ uint8_t * length;
+ uint8_t * last_length;
+
+ for(i=0; i<8; i++){
+ ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
+ ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
+ }
+
+ s->dsp.diff_pixels(temp, src1, src2, stride);
+
+ s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+
+ bits=0;
+
+ if (s->mb_intra) {
+ start_i = 1;
+ length = s->intra_ac_vlc_length;
+ last_length= s->intra_ac_vlc_last_length;
+ bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
+ } else {
+ start_i = 0;
+ length = s->inter_ac_vlc_length;
+ last_length= s->inter_ac_vlc_last_length;
+ }
+
+ if(last>=start_i){
+ run=0;
+ for(i=start_i; i<last; i++){
+ int j= scantable[i];
+ level= temp[j];
+
+ if(level){
+ level+=64;
+ if((level&(~127)) == 0){
+ bits+= length[UNI_AC_ENC_INDEX(run, level)];
+ }else
+ bits+= esc_length;
+ run=0;
+ }else
+ run++;
+ }
+ i= scantable[last];
+
+ level= temp[i] + 64;
+
+ assert(level - 64);
+
+ if((level&(~127)) == 0){
+ bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
+ }else
+ bits+= esc_length;
+
+ }
+
+ if(last>=0){
+ s->dct_unquantize(s, temp, 0, s->qscale);
+ }
+
+ s->idct_add(bak, stride, temp);
+
+ distoration= s->dsp.sse[1](NULL, bak, src1, stride);
+
+ return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
+}
+
+static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
+ MpegEncContext * const s= (MpegEncContext *)c;
+ const UINT8 *scantable= s->intra_scantable.permutated;
+ uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
+ DCTELEM * const temp= (DCTELEM*)aligned_temp;
+ int i, last, run, bits, level, start_i;
+ const int esc_length= s->ac_esc_length;
+ uint8_t * length;
+ uint8_t * last_length;
+
+ s->dsp.diff_pixels(temp, src1, src2, stride);
+
+ s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+
+ bits=0;
+
+ if (s->mb_intra) {
+ start_i = 1;
+ length = s->intra_ac_vlc_length;
+ last_length= s->intra_ac_vlc_last_length;
+ bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
+ } else {
+ start_i = 0;
+ length = s->inter_ac_vlc_length;
+ last_length= s->inter_ac_vlc_last_length;
+ }
+
+ if(last>=start_i){
+ run=0;
+ for(i=start_i; i<last; i++){
+ int j= scantable[i];
+ level= temp[j];
+
+ if(level){
+ level+=64;
+ if((level&(~127)) == 0){
+ bits+= length[UNI_AC_ENC_INDEX(run, level)];
+ }else
+ bits+= esc_length;
+ run=0;
+ }else
+ run++;
+ }
+ i= scantable[last];
+
+ level= temp[i] + 64;
+
+ assert(level - 64);
+
+ if((level&(~127)) == 0){
+ bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
+ }else
+ bits+= esc_length;
+ }
+
+ return bits;
+}
+
+
WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c)
WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c)
WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c)
+WARPER88_1616(rd8x8_c, rd16x16_c)
+WARPER88_1616(bit8x8_c, bit16x16_c)
void dsputil_init(DSPContext* c, unsigned mask)
{
@@ -1790,7 +2071,13 @@ void dsputil_init(DSPContext* c, unsigned mask)
c->quant_psnr[0]= quant_psnr16x16_c;
c->quant_psnr[1]= quant_psnr8x8_c;
-
+
+ c->rd[0]= rd16x16_c;
+ c->rd[1]= rd8x8_c;
+
+ c->bit[0]= bit16x16_c;
+ c->bit[1]= bit8x8_c;
+
c->add_bytes= add_bytes_c;
c->diff_bytes= diff_bytes_c;
diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h
index f34a8f078..2220b4871 100644
--- a/src/libffmpeg/libavcodec/dsputil.h
+++ b/src/libffmpeg/libavcodec/dsputil.h
@@ -30,6 +30,7 @@
#undef DEBUG
/* dct code */
typedef short DCTELEM;
+//typedef int DCTELEM;
void fdct_ifast (DCTELEM *data);
void ff_jpeg_fdct_islow (DCTELEM *data);
@@ -74,7 +75,23 @@ void clear_blocks_c(DCTELEM *blocks);
typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h);
typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride);
-
+#define DEF_OLD_QPEL(name)\
+void ff_put_ ## name (UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride);\
+void ff_put_no_rnd_ ## name (UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride);\
+void ff_avg_ ## name (UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride);
+
+DEF_OLD_QPEL(qpel16_mc11_old_c)
+DEF_OLD_QPEL(qpel16_mc31_old_c)
+DEF_OLD_QPEL(qpel16_mc12_old_c)
+DEF_OLD_QPEL(qpel16_mc32_old_c)
+DEF_OLD_QPEL(qpel16_mc13_old_c)
+DEF_OLD_QPEL(qpel16_mc33_old_c)
+DEF_OLD_QPEL(qpel8_mc11_old_c)
+DEF_OLD_QPEL(qpel8_mc31_old_c)
+DEF_OLD_QPEL(qpel8_mc12_old_c)
+DEF_OLD_QPEL(qpel8_mc32_old_c)
+DEF_OLD_QPEL(qpel8_mc13_old_c)
+DEF_OLD_QPEL(qpel8_mc33_old_c)
#define CALL_2X_PIXELS(a, b, n)\
static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
@@ -105,6 +122,8 @@ typedef struct DSPContext {
me_cmp_func hadamard8_diff[2];
me_cmp_func dct_sad[2];
me_cmp_func quant_psnr[2];
+ me_cmp_func bit[2];
+ me_cmp_func rd[2];
int (*hadamard8_abs )(uint8_t *src, int stride, int mean);
me_cmp_func me_pre_cmp[11];
@@ -143,10 +162,14 @@ void dsputil_init(DSPContext* p, unsigned mask);
* permute block according to permuatation.
* @param last last non zero element in scantable order
*/
-void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last);
+void ff_block_permute(DCTELEM *block, UINT8 *permutation, const UINT8 *scantable, int last);
#define emms_c()
+/* should be defined by architectures supporting
+ one or more MultiMedia extension */
+int mm_support(void);
+
#if defined(HAVE_MMX)
#undef emms_c
@@ -161,7 +184,6 @@ void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable,
extern int mm_flags;
-int mm_support(void);
void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size);
void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size);
@@ -211,6 +233,10 @@ void dsputil_init_alpha(DSPContext* c, unsigned mask);
extern int mm_flags;
+#if defined(HAVE_ALTIVEC) && !defined(CONFIG_DARWIN)
+#include <altivec.h>
+#endif
+
#define __align8 __attribute__ ((aligned (16)))
void dsputil_init_ppc(DSPContext* c, unsigned mask);
@@ -314,7 +340,12 @@ static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int st
/* btw, rintf() is existing on fbsd too -- alex */
static inline long int lrintf(float x)
{
+#ifdef CONFIG_WIN32
+ /* XXX: incorrect, but make it compile */
+ return (int)(x);
+#else
return (int)(rint(x));
+#endif
}
#endif
diff --git a/src/libffmpeg/libavcodec/dv.c b/src/libffmpeg/libavcodec/dv.c
index f436caf12..554b593e7 100644
--- a/src/libffmpeg/libavcodec/dv.c
+++ b/src/libffmpeg/libavcodec/dv.c
@@ -157,7 +157,7 @@ static const UINT16 block_sizes[6] = {
/* decode ac coefs */
static void dv_decode_ac(DVVideoDecodeContext *s,
- BlockInfo *mb, INT16 *block, int last_index)
+ BlockInfo *mb, DCTELEM *block, int last_index)
{
int last_re_index;
int shift_offset = mb->shift_offset;
@@ -195,7 +195,7 @@ static void dv_decode_ac(DVVideoDecodeContext *s,
v, partial_bit_count, (mb->partial_bit_buffer << l));
#endif
/* try to read the codeword */
- init_get_bits(&gb1, buf, 4);
+ init_get_bits(&gb1, buf, 4*8);
{
OPEN_READER(re1, &gb1);
UPDATE_CACHE(re1, &gb1);
@@ -333,7 +333,7 @@ static inline void dv_decode_video_segment(DVVideoDecodeContext *s,
block = block1;
for(j = 0;j < 6; j++) {
/* NOTE: size is not important here */
- init_get_bits(&s->gb, buf_ptr, 14);
+ init_get_bits(&s->gb, buf_ptr, 14*8);
/* get the dc */
dc = get_bits(&s->gb, 9);
@@ -382,7 +382,7 @@ static inline void dv_decode_video_segment(DVVideoDecodeContext *s,
#endif
block = block1;
mb = mb1;
- init_get_bits(&s->gb, mb_bit_buffer, 80);
+ init_get_bits(&s->gb, mb_bit_buffer, 80*8);
for(j = 0;j < 6; j++) {
if (!mb->eob_reached && s->gb.index < mb_bit_count) {
dv_decode_ac(s, mb, block, mb_bit_count);
@@ -421,7 +421,7 @@ static inline void dv_decode_video_segment(DVVideoDecodeContext *s,
#endif
block = &s->block[0][0];
mb = mb_data;
- init_get_bits(&s->gb, vs_bit_buffer, 5 * 80);
+ init_get_bits(&s->gb, vs_bit_buffer, 5 * 80*8);
for(mb_index = 0; mb_index < 5; mb_index++) {
for(j = 0;j < 6; j++) {
if (!mb->eob_reached) {
@@ -501,7 +501,7 @@ static int dvvideo_decode_frame(AVCodecContext *avctx,
const UINT16 *mb_pos_ptr;
/* parse id */
- init_get_bits(&s->gb, buf, buf_size);
+ init_get_bits(&s->gb, buf, buf_size*8);
sct = get_bits(&s->gb, 3);
if (sct != 0)
return -1;
@@ -634,7 +634,6 @@ AVCodec dvvideo_decoder = {
typedef struct DVAudioDecodeContext {
AVCodecContext *avctx;
GetBitContext gb;
-
} DVAudioDecodeContext;
static int dvaudio_decode_init(AVCodecContext *avctx)
@@ -643,13 +642,126 @@ static int dvaudio_decode_init(AVCodecContext *avctx)
return 0;
}
+static UINT16 dv_audio_12to16(UINT16 sample)
+{
+ UINT16 shift, result;
+
+ sample = (sample < 0x800) ? sample : sample | 0xf000;
+ shift = (sample & 0xf00) >> 8;
+
+ if (shift < 0x2 || shift > 0xd) {
+ result = sample;
+ } else if (shift < 0x8) {
+ shift--;
+ result = (sample - (256 * shift)) << shift;
+ } else {
+ shift = 0xe - shift;
+ result = ((sample + ((256 * shift) + 1)) << shift) - 1;
+ }
+
+ return result;
+}
+
/* NOTE: exactly one frame must be given (120000 bytes for NTSC,
- 144000 bytes for PAL) */
+ 144000 bytes for PAL)
+
+ There's a couple of assumptions being made here:
+ 1. We don't do any kind of audio error correction. It means,
+ that erroneous samples 0x8000 are being passed upwards.
+ Do we need to silence erroneous samples ? Average them ?
+ 2. We don't do software emphasis.
+ 3. We are not checking for 'speed' argument being valid.
+ 4. Audio is always returned as 16bit linear samples: 12bit
+ nonlinear samples are converted into 16bit linear ones.
+*/
static int dvaudio_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
UINT8 *buf, int buf_size)
{
- // DVAudioDecodeContext *s = avctx->priv_data;
+ DVVideoDecodeContext *s = avctx->priv_data;
+ const UINT16 (*unshuffle)[9];
+ int smpls, freq, quant, sys, stride, difseg, ad, dp, nb_dif_segs, i;
+ UINT16 lc, rc;
+ UINT8 *buf_ptr;
+
+ /* parse id */
+ init_get_bits(&s->gb, &buf[AAUX_OFFSET], 5*8);
+ i = get_bits(&s->gb, 8);
+ if (i != 0x50) { /* No audio ? */
+ *data_size = 0;
+ return buf_size;
+ }
+
+ get_bits(&s->gb, 1); /* 0 - locked audio, 1 - unlocked audio */
+ skip_bits(&s->gb, 1);
+ smpls = get_bits(&s->gb, 6); /* samples in this frame - min. samples */
+
+ skip_bits(&s->gb, 8);
+
+ skip_bits(&s->gb, 2);
+ sys = get_bits(&s->gb, 1); /* 0 - 60 fields, 1 = 50 fields */
+ skip_bits(&s->gb, 5);
+
+ get_bits(&s->gb, 1); /* 0 - emphasis on, 1 - emphasis off */
+ get_bits(&s->gb, 1); /* 0 - reserved, 1 - emphasis time constant 50/15us */
+ freq = get_bits(&s->gb, 3); /* 0 - 48KHz, 1 - 44,1kHz, 2 - 32 kHz */
+ quant = get_bits(&s->gb, 3); /* 0 - 16bit linear, 1 - 12bit nonlinear */
+
+ if (quant > 1)
+ return -1; /* Unsupported quantization */
+
+ avctx->sample_rate = dv_audio_frequency[freq];
+ // What about:
+ // avctx->bit_rate =
+ // avctx->frame_size =
+
+ *data_size = (dv_audio_min_samples[sys][freq] + smpls) *
+ avctx->channels * 2;
+
+ if (sys) {
+ nb_dif_segs = 12;
+ stride = 108;
+ unshuffle = dv_place_audio50;
+ } else {
+ nb_dif_segs = 10;
+ stride = 90;
+ unshuffle = dv_place_audio60;
+ }
+
+ /* for each DIF segment */
+ buf_ptr = buf;
+ for (difseg = 0; difseg < nb_dif_segs; difseg++) {
+ buf_ptr += 6 * 80; /* skip DIF segment header */
+ for (ad = 0; ad < 9; ad++) {
+
+ for (dp = 8; dp < 80; dp+=2) {
+ if (quant == 0) { /* 16bit quantization */
+ i = unshuffle[difseg][ad] + (dp - 8)/2 * stride;
+ ((short *)data)[i] = (buf_ptr[dp] << 8) | buf_ptr[dp+1];
+ } else { /* 12bit quantization */
+ if (difseg >= nb_dif_segs/2)
+ goto out; /* We're not doing 4ch at this time */
+
+ lc = ((UINT16)buf_ptr[dp] << 4) |
+ ((UINT16)buf_ptr[dp+2] >> 4);
+ rc = ((UINT16)buf_ptr[dp+1] << 4) |
+ ((UINT16)buf_ptr[dp+2] & 0x0f);
+ lc = dv_audio_12to16(lc);
+ rc = dv_audio_12to16(rc);
+
+ i = unshuffle[difseg][ad] + (dp - 8)/3 * stride;
+ ((short *)data)[i] = lc;
+ i = unshuffle[difseg+nb_dif_segs/2][ad] + (dp - 8)/3 * stride;
+ ((short *)data)[i] = rc;
+ ++dp;
+ }
+ }
+
+ buf_ptr += 16 * 80; /* 15 Video DIFs + 1 Audio DIF */
+ }
+ }
+
+out:
return buf_size;
}
diff --git a/src/libffmpeg/libavcodec/dvdata.h b/src/libffmpeg/libavcodec/dvdata.h
index b5c1f5607..4e1fc39c7 100644
--- a/src/libffmpeg/libavcodec/dvdata.h
+++ b/src/libffmpeg/libavcodec/dvdata.h
@@ -18,6 +18,7 @@
*/
#define NB_DV_VLC 409
+#define AAUX_OFFSET (80*6 + 80*16*3 + 3)
static const UINT16 dv_vlc_bits[409] = {
0x0000, 0x0002, 0x0007, 0x0008, 0x0009, 0x0014, 0x0015, 0x0016,
@@ -905,3 +906,41 @@ static const UINT16 dv_place_411[1350] = {
0x0834, 0x2320, 0x2f44, 0x3810, 0x1658,
};
+static const UINT16 dv_place_audio60[10][9] = {
+ { 0, 30, 60, 20, 50, 80, 10, 40, 70 }, /* 1st channel */
+ { 6, 36, 66, 26, 56, 86, 16, 46, 76 },
+ { 12, 42, 72, 2, 32, 62, 22, 52, 82 },
+ { 18, 48, 78, 8, 38, 68, 28, 58, 88 },
+ { 24, 54, 84, 14, 44, 74, 4, 34, 64 },
+
+ { 1, 31, 61, 21, 51, 81, 11, 41, 71 }, /* 2nd channel */
+ { 7, 37, 67, 27, 57, 87, 17, 47, 77 },
+ { 13, 43, 73, 3, 33, 63, 23, 53, 83 },
+ { 19, 49, 79, 9, 39, 69, 29, 59, 89 },
+ { 25, 55, 85, 15, 45, 75, 5, 35, 65 },
+};
+
+static const UINT16 dv_place_audio50[12][9] = {
+ { 0, 36, 72, 26, 62, 98, 16, 52, 88}, /* 1st channel */
+ { 6, 42, 78, 32, 68, 104, 22, 58, 94},
+ { 12, 48, 84, 2, 38, 74, 28, 64, 100},
+ { 18, 54, 90, 8, 44, 80, 34, 70, 106},
+ { 24, 60, 96, 14, 50, 86, 4, 40, 76},
+ { 30, 66, 102, 20, 56, 92, 10, 46, 82},
+
+ { 1, 37, 73, 27, 63, 99, 17, 53, 89}, /* 2nd channel */
+ { 7, 43, 79, 33, 69, 105, 23, 59, 95},
+ { 13, 49, 85, 3, 39, 75, 29, 65, 101},
+ { 19, 55, 91, 9, 45, 81, 35, 71, 107},
+ { 25, 61, 97, 15, 51, 87, 5, 41, 77},
+ { 31, 67, 103, 21, 57, 93, 11, 47, 83},
+};
+
+static const int dv_audio_frequency[3] = {
+ 48000, 44100, 32000,
+};
+
+static const int dv_audio_min_samples[2][3] = {
+ { 1580, 1452, 1053 }, /* 60 fields */
+ { 1896, 1742, 1264 }, /* 50 fileds */
+};
diff --git a/src/libffmpeg/libavcodec/fft.c b/src/libffmpeg/libavcodec/fft.c
index f060992f4..65eb575f3 100644
--- a/src/libffmpeg/libavcodec/fft.c
+++ b/src/libffmpeg/libavcodec/fft.c
@@ -53,13 +53,13 @@ int fft_init(FFTContext *s, int nbits, int inverse)
/* compute constant table for HAVE_SSE version */
#if (defined(HAVE_MMX) && defined(HAVE_BUILTIN_VECTOR)) || defined(HAVE_ALTIVEC)
{
- int has_vectors;
+ int has_vectors = 0;
#if defined(HAVE_MMX)
has_vectors = mm_support() & MM_SSE;
-#else
- /* XXX: should also use mm_support() ? */
- has_vectors = has_altivec() & MM_ALTIVEC;
+#endif
+#if defined(HAVE_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE)
+ has_vectors = mm_support() & MM_ALTIVEC;
#endif
if (has_vectors) {
int np, nblocks, np2, l;
diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c
index bc21e0cd8..63bf19059 100644
--- a/src/libffmpeg/libavcodec/h263.c
+++ b/src/libffmpeg/libavcodec/h263.c
@@ -22,6 +22,12 @@
* qpel decoding, gmc decoding, interlaced decoding,
* by Michael Niedermayer <michaelni@gmx.at>
*/
+
+/**
+ * @file h263.c
+ * @brief h263/mpeg4 codec
+ *
+ */
//#define DEBUG
#include "common.h"
@@ -67,15 +73,17 @@ static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
int n, int coded, int intra);
static int h263_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr);
-static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
+static void mpeg4_inv_pred_ac(MpegEncContext * s, DCTELEM *block, int n,
int dir);
static void mpeg4_decode_sprite_trajectory(MpegEncContext * s);
static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr);
extern UINT32 inverse[256];
-static UINT16 uni_DCtab_lum [512][2];
-static UINT16 uni_DCtab_chrom[512][2];
+static UINT8 uni_DCtab_lum_len[512];
+static UINT8 uni_DCtab_chrom_len[512];
+static UINT16 uni_DCtab_lum_bits[512];
+static UINT16 uni_DCtab_chrom_bits[512];
#ifdef CONFIG_ENCODERS
static UINT16 (*mv_penalty)[MAX_MV*2+1]= NULL;
@@ -999,7 +1007,7 @@ static int h263_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr)
}
-void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
+void h263_pred_acdc(MpegEncContext * s, DCTELEM *block, int n)
{
int x, y, wrap, a, c, pred_dc, scale, i;
INT16 *dc_val, *ac_val, *ac_val1;
@@ -1309,8 +1317,8 @@ static void init_uni_dc_tab(void)
uni_len++;
}
}
- uni_DCtab_lum[level+256][0]= uni_code;
- uni_DCtab_lum[level+256][1]= uni_len;
+ uni_DCtab_lum_bits[level+256]= uni_code;
+ uni_DCtab_lum_len [level+256]= uni_len;
/* chrominance */
uni_code= DCtab_chrom[size][0];
@@ -1324,8 +1332,8 @@ static void init_uni_dc_tab(void)
uni_len++;
}
}
- uni_DCtab_chrom[level+256][0]= uni_code;
- uni_DCtab_chrom[level+256][1]= uni_len;
+ uni_DCtab_chrom_bits[level+256]= uni_code;
+ uni_DCtab_chrom_len [level+256]= uni_len;
}
}
@@ -1446,6 +1454,8 @@ void h263_encode_init(MpegEncContext *s)
s->intra_ac_vlc_last_length= uni_mpeg4_intra_rl_len + 128*64;
s->inter_ac_vlc_length = uni_mpeg4_inter_rl_len;
s->inter_ac_vlc_last_length= uni_mpeg4_inter_rl_len + 128*64;
+ s->luma_dc_vlc_length= uni_DCtab_lum_len;
+ s->chroma_dc_vlc_length= uni_DCtab_chrom_len;
s->ac_esc_length= 7+2+1+6+1+12+1;
break;
case CODEC_ID_H263P:
@@ -1470,6 +1480,11 @@ void h263_encode_init(MpegEncContext *s)
}
}
+/**
+ * encodes a 8x8 block.
+ * @param block the 8x8 block
+ * @param n block index (0-3 are luma, 4-5 are chroma)
+ */
static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
{
int level, run, last, i, j, last_index, last_non_zero, sign, slevel, code;
@@ -1795,6 +1810,13 @@ static void change_qscale(MpegEncContext * s, int dquant)
s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
}
+/**
+ * predicts the dc.
+ * @param n block index (0-3 are luma, 4-5 are chroma)
+ * @param dc_val_ptr a pointer to the dc_val entry for the current MB will be stored here
+ * @param dir_ptr pointer to an integer where the prediction direction will be stored
+ * @return the quantized predicted dc
+ */
static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr)
{
int a, b, c, wrap, pred, scale;
@@ -1852,7 +1874,12 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_pt
return pred;
}
-void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
+/**
+ * predicts the ac.
+ * @param n block index (0-3 are luma, 4-5 are chroma)
+ * @param dir the ac prediction direction
+ */
+void mpeg4_pred_ac(MpegEncContext * s, DCTELEM *block, int n,
int dir)
{
int i;
@@ -1907,7 +1934,7 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
}
-static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
+static void mpeg4_inv_pred_ac(MpegEncContext * s, DCTELEM *block, int n,
int dir)
{
int i;
@@ -1950,6 +1977,10 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
}
}
+/**
+ * encodes the dc value.
+ * @param n block index (0-3 are luma, 4-5 are chroma)
+ */
static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
{
#if 1
@@ -1957,10 +1988,10 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
level+=256;
if (n < 4) {
/* luminance */
- put_bits(s, uni_DCtab_lum[level][1], uni_DCtab_lum[level][0]);
+ put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]);
} else {
/* chrominance */
- put_bits(s, uni_DCtab_chrom[level][1], uni_DCtab_chrom[level][0]);
+ put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]);
}
#else
int size, v;
@@ -1991,6 +2022,10 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
#endif
}
#ifdef CONFIG_ENCODERS
+/**
+ * encodes a 8x8 block
+ * @param n block index (0-3 are luma, 4-5 are chroma)
+ */
static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
UINT8 *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb)
{
@@ -2175,44 +2210,6 @@ static VLC dc_lum, dc_chrom;
static VLC sprite_trajectory;
static VLC mb_type_b_vlc;
-void init_rl(RLTable *rl)
-{
- INT8 max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
- UINT8 index_run[MAX_RUN+1];
- int last, run, level, start, end, i;
-
- /* compute max_level[], max_run[] and index_run[] */
- for(last=0;last<2;last++) {
- if (last == 0) {
- start = 0;
- end = rl->last;
- } else {
- start = rl->last;
- end = rl->n;
- }
-
- memset(max_level, 0, MAX_RUN + 1);
- memset(max_run, 0, MAX_LEVEL + 1);
- memset(index_run, rl->n, MAX_RUN + 1);
- for(i=start;i<end;i++) {
- run = rl->table_run[i];
- level = rl->table_level[i];
- if (index_run[run] == rl->n)
- index_run[run] = i;
- if (level > max_level[run])
- max_level[run] = level;
- if (run > max_run[level])
- max_run[level] = run;
- }
- rl->max_level[last] = av_malloc(MAX_RUN + 1);
- memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
- rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
- memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
- rl->index_run[last] = av_malloc(MAX_RUN + 1);
- memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
- }
-}
-
void init_vlc_rl(RLTable *rl)
{
int i, q;
@@ -2331,7 +2328,7 @@ static int h263_decode_gob_header(MpegEncContext *s)
/* We have a GBSC probably with GSTUFF */
skip_bits(&s->gb, 16); /* Drop the zeros */
- left= s->gb.size*8 - get_bits_count(&s->gb);
+ left= s->gb.size_in_bits - get_bits_count(&s->gb);
//MN: we must check the bits left or we might end in a infinite loop (or segfault)
for(;left>13; left--){
if(get_bits1(&s->gb)) break; /* Seek the '1' bit */
@@ -2431,7 +2428,7 @@ static inline int mpeg4_is_resync(MpegEncContext *s){
return 0;
}
- if(bits_count + 8 >= s->gb.size*8){
+ if(bits_count + 8 >= s->gb.size_in_bits){
int v= show_bits(&s->gb, 8);
v|= 0x7F >> (7-(bits_count&7));
@@ -2468,7 +2465,7 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s)
int header_extension=0, mb_num, len;
/* is there enough space left for a video packet + header */
- if( get_bits_count(&s->gb) > s->gb.size*8-20) return -1;
+ if( get_bits_count(&s->gb) > s->gb.size_in_bits-20) return -1;
for(len=0; len<32; len++){
if(get_bits1(&s->gb)) break;
@@ -2600,7 +2597,7 @@ int ff_h263_resync(MpegEncContext *s){
//ok, its not where its supposed to be ...
s->gb= s->last_resync_gb;
align_get_bits(&s->gb);
- left= s->gb.size*8 - get_bits_count(&s->gb);
+ left= s->gb.size_in_bits - get_bits_count(&s->gb);
for(;left>16+1+5+5; left-=8){
if(show_bits(&s->gb, 16)==0){
@@ -2622,6 +2619,7 @@ int ff_h263_resync(MpegEncContext *s){
}
/**
+ * gets the average motion vector for a GMC MB.
* @param n either 0 for the x component or 1 for y
* @returns the average MV for a GMC MB
*/
@@ -2654,8 +2652,7 @@ static inline int get_amv(MpegEncContext *s, int n){
v+= dx;
}
}
- sum /= 256;
- sum= RSHIFT(sum<<s->quarter_sample, a);
+ sum= RSHIFT(sum, a+8-s->quarter_sample);
}
if (sum < -len) sum= -len;
@@ -3055,7 +3052,7 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
/* per-MB end of slice check */
if(--s->mb_num_left <= 0){
-//printf("%06X %d\n", show_bits(&s->gb, 24), s->gb.size*8 - get_bits_count(&s->gb));
+//printf("%06X %d\n", show_bits(&s->gb, 24), s->gb.size_in_bits - get_bits_count(&s->gb));
if(mpeg4_is_resync(s))
return SLICE_END;
else
@@ -3419,8 +3416,8 @@ end:
}else{
int v= show_bits(&s->gb, 16);
- if(get_bits_count(&s->gb) + 16 > s->gb.size*8){
- v>>= get_bits_count(&s->gb) + 16 - s->gb.size*8;
+ if(get_bits_count(&s->gb) + 16 > s->gb.size_in_bits){
+ v>>= get_bits_count(&s->gb) + 16 - s->gb.size_in_bits;
}
if(v==0)
@@ -3588,6 +3585,12 @@ not_coded:
return 0;
}
+/**
+ * decodes the dc value.
+ * @param n block index (0-3 are luma, 4-5 are chroma)
+ * @param dir_ptr the prediction direction will be stored here
+ * @return the quantized dc
+ */
static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
{
int level, pred, code;
@@ -4473,6 +4476,10 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
return 0;
}
+/**
+ * decodes the user data stuff in the header.
+ * allso inits divx/xvid/lavc_version/build
+ */
static int decode_user_data(MpegEncContext *s, GetBitContext *gb){
char buf[256];
int i;
@@ -4685,7 +4692,7 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
printf("qp:%d fc:%d,%d %s size:%d pro:%d alt:%d top:%d %spel part:%d resync:%d w:%d a:%d\n",
s->qscale, s->f_code, s->b_code,
s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")),
- gb->size,s->progressive_sequence, s->alternate_scan, s->top_field_first,
+ gb->size_in_bits,s->progressive_sequence, s->alternate_scan, s->top_field_first,
s->quarter_sample ? "q" : "h", s->data_partitioning, s->resync_marker, s->num_sprite_warping_points,
s->sprite_warping_accuracy);
}
@@ -4740,9 +4747,9 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb)
v = get_bits(gb, 8);
startcode = ((startcode << 8) | v) & 0xffffffff;
- if(get_bits_count(gb) >= gb->size*8){
- if(gb->size==1 && s->divx_version){
- printf("frame skip %d\n", gb->size);
+ if(get_bits_count(gb) >= gb->size_in_bits){
+ if(gb->size_in_bits==8 && s->divx_version){
+ printf("frame skip %d\n", gb->size_in_bits);
return FRAME_SKIPED; //divx bug
}else
return -1; //end of stream
diff --git a/src/libffmpeg/libavcodec/h263dec.c b/src/libffmpeg/libavcodec/h263dec.c
index 93a14a06e..a5dadeec4 100644
--- a/src/libffmpeg/libavcodec/h263dec.c
+++ b/src/libffmpeg/libavcodec/h263dec.c
@@ -249,15 +249,17 @@ static int decode_slice(MpegEncContext *s){
/* try to detect the padding bug */
if( s->codec_id==CODEC_ID_MPEG4
&& (s->workaround_bugs&FF_BUG_AUTODETECT)
- && s->gb.size*8 - get_bits_count(&s->gb) >=0
- && s->gb.size*8 - get_bits_count(&s->gb) < 48
+ && s->gb.size_in_bits - get_bits_count(&s->gb) >=0
+ && s->gb.size_in_bits - get_bits_count(&s->gb) < 48
// && !s->resync_marker
&& !s->data_partitioning){
const int bits_count= get_bits_count(&s->gb);
- const int bits_left = s->gb.size*8 - bits_count;
+ const int bits_left = s->gb.size_in_bits - bits_count;
- if(bits_left==0 || bits_left>8){
+ if(bits_left==0){
+ s->padding_bug_score+=16;
+ }else if(bits_left>8){
s->padding_bug_score++;
} else if(bits_left != 1){
int v= show_bits(&s->gb, 8);
@@ -267,17 +269,12 @@ static int decode_slice(MpegEncContext *s){
s->padding_bug_score--;
else
s->padding_bug_score++;
- }
-
- if(s->padding_bug_score > -2)
- s->workaround_bugs |= FF_BUG_NO_PADDING;
- else
- s->workaround_bugs &= ~FF_BUG_NO_PADDING;
+ }
}
// handle formats which dont have unique end markers
if(s->msmpeg4_version || (s->workaround_bugs&FF_BUG_NO_PADDING)){ //FIXME perhaps solve this more cleanly
- int left= s->gb.size*8 - get_bits_count(&s->gb);
+ int left= s->gb.size_in_bits - get_bits_count(&s->gb);
int max_extra=7;
/* no markers in M$ crap */
@@ -302,7 +299,7 @@ static int decode_slice(MpegEncContext *s){
}
fprintf(stderr, "slice end not reached but screenspace end (%d left %06X)\n",
- s->gb.size*8 - get_bits_count(&s->gb),
+ s->gb.size_in_bits - get_bits_count(&s->gb),
show_bits(&s->gb, 24));
return -1;
}
@@ -344,6 +341,61 @@ static int mpeg4_find_frame_end(MpegEncContext *s, UINT8 *buf, int buf_size){
return -1;
}
+static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
+ int t, x, y, f;
+
+ ex= clip(ex, 0, w-1);
+ ey= clip(ey, 0, h-1);
+
+ buf[sy*stride + sx]+= color;
+
+ if(ABS(ex - sx) > ABS(ey - sy)){
+ if(sx > ex){
+ t=sx; sx=ex; ex=t;
+ t=sy; sy=ey; ey=t;
+ }
+ buf+= sx + sy*stride;
+ ex-= sx;
+ f= ((ey-sy)<<16)/ex;
+ for(x= 0; x <= ex; x++){
+ y= ((x*f) + (1<<15))>>16;
+ buf[y*stride + x]+= color;
+ }
+ }else{
+ if(sy > ey){
+ t=sx; sx=ex; ex=t;
+ t=sy; sy=ey; ey=t;
+ }
+ buf+= sx + sy*stride;
+ ey-= sy;
+ if(ey) f= ((ex-sx)<<16)/ey;
+ else f= 0;
+ for(y= 0; y <= ey; y++){
+ x= ((y*f) + (1<<15))>>16;
+ buf[y*stride + x]+= color;
+ }
+ }
+}
+
+static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
+ int dx= ex - sx;
+ int dy= ey - sy;
+
+ if(dx*dx + dy*dy > 3*3){
+ int rx= dx + dy;
+ int ry= -dx + dy;
+ int length= ff_sqrt((rx*rx + ry*ry)<<8);
+
+ //FIXME subpixel accuracy
+ rx= ROUNDED_DIV(rx*3<<4, length);
+ ry= ROUNDED_DIV(ry*3<<4, length);
+
+ draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
+ draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
+ }
+ draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
+}
+
int ff_h263_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
UINT8 *buf, int buf_size)
@@ -368,49 +420,27 @@ uint64_t time= rdtsc();
if (buf_size == 0) {
return 0;
}
-
+
if(s->flags&CODEC_FLAG_TRUNCATED){
int next;
- ParseContext *pc= &s->parse_context;
- pc->last_index= pc->index;
-
if(s->codec_id==CODEC_ID_MPEG4){
next= mpeg4_find_frame_end(s, buf, buf_size);
}else{
fprintf(stderr, "this codec doesnt support truncated bitstreams\n");
return -1;
}
- if(next==-1){
- if(buf_size + FF_INPUT_BUFFER_PADDING_SIZE + pc->index > pc->buffer_size){
- pc->buffer_size= buf_size + pc->index + 10*1024;
- pc->buffer= realloc(pc->buffer, pc->buffer_size);
- }
-
- memcpy(&pc->buffer[pc->index], buf, buf_size);
- pc->index += buf_size;
+
+ if( ff_combine_frame(s, next, &buf, &buf_size) < 0 )
return buf_size;
- }
-
- if(pc->index){
- if(next + FF_INPUT_BUFFER_PADDING_SIZE + pc->index > pc->buffer_size){
- pc->buffer_size= next + pc->index + 10*1024;
- pc->buffer= realloc(pc->buffer, pc->buffer_size);
- }
-
- memcpy(&pc->buffer[pc->index], buf, next + FF_INPUT_BUFFER_PADDING_SIZE );
- pc->index = 0;
- buf= pc->buffer;
- buf_size= pc->last_index + next;
- }
}
retry:
if(s->bitstream_buffer_size && buf_size<20){ //divx 5.01+ frame reorder
- init_get_bits(&s->gb, s->bitstream_buffer, s->bitstream_buffer_size);
+ init_get_bits(&s->gb, s->bitstream_buffer, s->bitstream_buffer_size*8);
}else
- init_get_bits(&s->gb, buf, buf_size);
+ init_get_bits(&s->gb, buf, buf_size*8);
s->bitstream_buffer_size=0;
if (!s->context_initialized) {
@@ -427,7 +457,7 @@ retry:
if(s->avctx->extradata_size && s->picture_number==0){
GetBitContext gb;
- init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size);
+ init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size*8);
ret = ff_mpeg4_decode_picture_header(s, &gb);
}
ret = ff_mpeg4_decode_picture_header(s, &s->gb);
@@ -442,6 +472,11 @@ retry:
avctx->has_b_frames= !s->low_delay;
if(s->workaround_bugs&FF_BUG_AUTODETECT){
+ if(s->padding_bug_score > -2 && !s->data_partitioning)
+ s->workaround_bugs |= FF_BUG_NO_PADDING;
+ else
+ s->workaround_bugs &= ~FF_BUG_NO_PADDING;
+
if(s->avctx->fourcc == ff_get_fourcc("XVIX"))
s->workaround_bugs|= FF_BUG_XVID_ILACE;
#if 0
@@ -472,6 +507,14 @@ retry:
if(s->xvid_build && s->xvid_build<=1)
s->workaround_bugs|= FF_BUG_QPEL_CHROMA;
+#define SET_QPEL_FUNC(postfix1, postfix2) \
+ s->dsp.put_ ## postfix1 = ff_put_ ## postfix2;\
+ s->dsp.put_no_rnd_ ## postfix1 = ff_put_no_rnd_ ## postfix2;\
+ s->dsp.avg_ ## postfix1 = ff_avg_ ## postfix2;
+
+ if(s->lavc_build && s->lavc_build<4653)
+ s->workaround_bugs|= FF_BUG_STD_QPEL;
+
//printf("padding_bug_score: %d\n", s->padding_bug_score);
#if 0
if(s->divx_version==500)
@@ -489,6 +532,21 @@ retry:
#endif
}
+ if(s->workaround_bugs& FF_BUG_STD_QPEL){
+ SET_QPEL_FUNC(qpel_pixels_tab[0][ 5], qpel16_mc11_old_c)
+ SET_QPEL_FUNC(qpel_pixels_tab[0][ 7], qpel16_mc31_old_c)
+ SET_QPEL_FUNC(qpel_pixels_tab[0][ 9], qpel16_mc12_old_c)
+ SET_QPEL_FUNC(qpel_pixels_tab[0][11], qpel16_mc32_old_c)
+ SET_QPEL_FUNC(qpel_pixels_tab[0][13], qpel16_mc13_old_c)
+ SET_QPEL_FUNC(qpel_pixels_tab[0][15], qpel16_mc33_old_c)
+
+ SET_QPEL_FUNC(qpel_pixels_tab[1][ 5], qpel8_mc11_old_c)
+ SET_QPEL_FUNC(qpel_pixels_tab[1][ 7], qpel8_mc31_old_c)
+ SET_QPEL_FUNC(qpel_pixels_tab[1][ 9], qpel8_mc12_old_c)
+ SET_QPEL_FUNC(qpel_pixels_tab[1][11], qpel8_mc32_old_c)
+ SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_old_c)
+ SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_old_c)
+ }
#if 0 // dump bits per frame / qp / complexity
{
@@ -571,7 +629,7 @@ retry:
decode_slice(s);
s->error_status_table[0]|= VP_START;
- while(s->mb_y<s->mb_height && s->gb.size*8 - get_bits_count(&s->gb)>16){
+ while(s->mb_y<s->mb_height && s->gb.size_in_bits - get_bits_count(&s->gb)>16){
if(s->msmpeg4_version){
if(s->mb_x!=0 || (s->mb_y%s->slice_height)!=0)
break;
@@ -580,7 +638,7 @@ retry:
break;
}
- if(s->msmpeg4_version!=4 && s->h263_pred)
+ if(s->msmpeg4_version<4 && s->h263_pred)
ff_mpeg4_clean_buffers(s);
decode_slice(s);
@@ -645,41 +703,40 @@ retry:
}
MPV_frame_end(s);
-#if 0 //dirty show MVs, we should export the MV tables and write a filter to show them
-{
- int mb_y;
- s->has_b_frames=1;
- for(mb_y=0; mb_y<s->mb_height; mb_y++){
- int mb_x;
- int y= mb_y*16 + 8;
- for(mb_x=0; mb_x<s->mb_width; mb_x++){
- int x= mb_x*16 + 8;
- uint8_t *ptr= s->last_picture.data[0];
- int xy= 1 + mb_x*2 + (mb_y*2 + 1)*(s->mb_width*2 + 2);
- int mx= (s->motion_val[xy][0]>>1) + x;
- int my= (s->motion_val[xy][1]>>1) + y;
- int i;
- int max;
-
- if(mx<0) mx=0;
- if(my<0) my=0;
- if(mx>=s->width) mx= s->width -1;
- if(my>=s->height) my= s->height-1;
- max= ABS(mx-x);
- if(ABS(my-y) > max) max= ABS(my-y);
- /* the ugliest linedrawing routine ... */
- for(i=0; i<max; i++){
- int x1= x + (mx-x)*i/max;
- int y1= y + (my-y)*i/max;
- ptr[y1*s->linesize + x1]+=100;
- }
- ptr[y*s->linesize + x]+=100;
- s->mbskip_table[mb_x + mb_y*s->mb_width]=0;
+
+ if((avctx->debug&FF_DEBUG_VIS_MV) && s->last_picture.data[0]){
+ const int shift= 1 + s->quarter_sample;
+ int mb_y;
+ uint8_t *ptr= s->last_picture.data[0];
+ s->low_delay=0; //needed to see the vectors without trashing the buffers
+
+ for(mb_y=0; mb_y<s->mb_height; mb_y++){
+ int mb_x;
+ for(mb_x=0; mb_x<s->mb_width; mb_x++){
+ const int mb_index= mb_x + mb_y*s->mb_width;
+ if(s->co_located_type_table[mb_index] == MV_TYPE_8X8){
+ int i;
+ for(i=0; i<4; i++){
+ int sx= mb_x*16 + 4 + 8*(i&1);
+ int sy= mb_y*16 + 4 + 8*(i>>1);
+ int xy= 1 + mb_x*2 + (i&1) + (mb_y*2 + 1 + (i>>1))*(s->mb_width*2 + 2);
+ int mx= (s->motion_val[xy][0]>>shift) + sx;
+ int my= (s->motion_val[xy][1]>>shift) + sy;
+ draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
+ }
+ }else{
+ int sx= mb_x*16 + 8;
+ int sy= mb_y*16 + 8;
+ int xy= 1 + mb_x*2 + (mb_y*2 + 1)*(s->mb_width*2 + 2);
+ int mx= (s->motion_val[xy][0]>>shift) + sx;
+ int my= (s->motion_val[xy][1]>>shift) + sy;
+ draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
+ }
+ s->mbskip_table[mb_index]=0;
+ }
+ }
}
- }
-}
-#endif
if(s->pict_type==B_TYPE || s->low_delay){
*pict= *(AVFrame*)&s->current_picture;
diff --git a/src/libffmpeg/libavcodec/huffyuv.c b/src/libffmpeg/libavcodec/huffyuv.c
index 0eb701037..cff642d11 100644
--- a/src/libffmpeg/libavcodec/huffyuv.c
+++ b/src/libffmpeg/libavcodec/huffyuv.c
@@ -1,7 +1,7 @@
/*
* huffyuv codec for libavcodec
*
- * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2002-2003 Michael Niedermayer <michaelni@gmx.at>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -271,7 +271,7 @@ static int read_huffman_tables(HYuvContext *s, uint8_t *src, int length){
GetBitContext gb;
int i;
- init_get_bits(&gb, src, length);
+ init_get_bits(&gb, src, length*8);
for(i=0; i<3; i++){
read_len_table(s->len[i], &gb);
@@ -295,9 +295,9 @@ static int read_old_huffman_tables(HYuvContext *s){
GetBitContext gb;
int i;
- init_get_bits(&gb, classic_shift_luma, sizeof(classic_shift_luma));
+ init_get_bits(&gb, classic_shift_luma, sizeof(classic_shift_luma)*8);
read_len_table(s->len[0], &gb);
- init_get_bits(&gb, classic_shift_chroma, sizeof(classic_shift_chroma));
+ init_get_bits(&gb, classic_shift_chroma, sizeof(classic_shift_chroma)*8);
read_len_table(s->len[1], &gb);
for(i=0; i<256; i++) s->bits[0][i] = classic_add_luma [i];
@@ -403,7 +403,7 @@ s->bgr32=1;
case 24:
case 32:
if(s->bgr32){
- avctx->pix_fmt = PIX_FMT_BGRA32;
+ avctx->pix_fmt = PIX_FMT_RGBA32;
}else{
avctx->pix_fmt = PIX_FMT_BGR24;
}
@@ -461,8 +461,6 @@ static int encode_init(AVCodecContext *avctx)
s->version=2;
avctx->coded_frame= &s->picture;
- s->picture.pict_type= FF_I_TYPE;
- s->picture.key_frame= 1;
switch(avctx->pix_fmt){
case PIX_FMT_YUV420P:
@@ -682,7 +680,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
bswap_buf((uint32_t*)s->bitstream_buffer, (uint32_t*)buf, buf_size/4);
- init_get_bits(&s->gb, s->bitstream_buffer, buf_size);
+ init_get_bits(&s->gb, s->bitstream_buffer, buf_size*8);
p->reference= 0;
if(avctx->get_buffer(avctx, p) < 0){
@@ -933,6 +931,8 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
*p = *pict;
+ p->pict_type= FF_I_TYPE;
+ p->key_frame= 1;
if(avctx->pix_fmt == PIX_FMT_YUV422P || avctx->pix_fmt == PIX_FMT_YUV420P){
int lefty, leftu, leftv, y, cy;
diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
index 5fce7f914..857f1d398 100644
--- a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
@@ -485,6 +485,107 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
dst[i+0] += src[i+0];
}
+static int pix_norm1_mmx(uint8_t *pix, int line_size) {
+ int tmp;
+ asm volatile (
+ "movl $16,%%ecx\n"
+ "pxor %%mm0,%%mm0\n"
+ "pxor %%mm7,%%mm7\n"
+ "1:\n"
+ "movq (%0),%%mm2\n" /* mm2 = pix[0-7] */
+ "movq 8(%0),%%mm3\n" /* mm3 = pix[8-15] */
+
+ "movq %%mm2,%%mm1\n" /* mm1 = mm2 = pix[0-7] */
+
+ "punpckhbw %%mm0,%%mm1\n" /* mm1 = [pix4-7] */
+ "punpcklbw %%mm0,%%mm2\n" /* mm2 = [pix0-3] */
+
+ "movq %%mm3,%%mm4\n" /* mm4 = mm3 = pix[8-15] */
+ "punpckhbw %%mm0,%%mm3\n" /* mm3 = [pix12-15] */
+ "punpcklbw %%mm0,%%mm4\n" /* mm4 = [pix8-11] */
+
+ "pmaddwd %%mm1,%%mm1\n" /* mm1 = (pix0^2+pix1^2,pix2^2+pix3^2) */
+ "pmaddwd %%mm2,%%mm2\n" /* mm2 = (pix4^2+pix5^2,pix6^2+pix7^2) */
+
+ "pmaddwd %%mm3,%%mm3\n"
+ "pmaddwd %%mm4,%%mm4\n"
+
+ "paddd %%mm1,%%mm2\n" /* mm2 = (pix0^2+pix1^2+pix4^2+pix5^2,
+ pix2^2+pix3^2+pix6^2+pix7^2) */
+ "paddd %%mm3,%%mm4\n"
+ "paddd %%mm2,%%mm7\n"
+
+ "addl %2, %0\n"
+ "paddd %%mm4,%%mm7\n"
+ "dec %%ecx\n"
+ "jnz 1b\n"
+
+ "movq %%mm7,%%mm1\n"
+ "psrlq $32, %%mm7\n" /* shift hi dword to lo */
+ "paddd %%mm7,%%mm1\n"
+ "movd %%mm1,%1\n"
+ : "+r" (pix), "=r"(tmp) : "r" (line_size) : "%ecx" );
+ return tmp;
+}
+
+static int sse16_mmx(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) {
+ int tmp;
+ asm volatile (
+ "movl $16,%%ecx\n"
+ "pxor %%mm0,%%mm0\n" /* mm0 = 0 */
+ "pxor %%mm7,%%mm7\n" /* mm7 holds the sum */
+ "1:\n"
+ "movq (%0),%%mm1\n" /* mm1 = pix1[0-7] */
+ "movq (%1),%%mm2\n" /* mm2 = pix2[0-7] */
+ "movq 8(%0),%%mm3\n" /* mm3 = pix1[8-15] */
+ "movq 8(%1),%%mm4\n" /* mm4 = pix2[8-15] */
+
+ /* todo: mm1-mm2, mm3-mm4 */
+ /* algo: substract mm1 from mm2 with saturation and vice versa */
+ /* OR the results to get absolute difference */
+ "movq %%mm1,%%mm5\n"
+ "movq %%mm3,%%mm6\n"
+ "psubusb %%mm2,%%mm1\n"
+ "psubusb %%mm4,%%mm3\n"
+ "psubusb %%mm5,%%mm2\n"
+ "psubusb %%mm6,%%mm4\n"
+
+ "por %%mm1,%%mm2\n"
+ "por %%mm3,%%mm4\n"
+
+ /* now convert to 16-bit vectors so we can square them */
+ "movq %%mm2,%%mm1\n"
+ "movq %%mm4,%%mm3\n"
+
+ "punpckhbw %%mm0,%%mm2\n"
+ "punpckhbw %%mm0,%%mm4\n"
+ "punpcklbw %%mm0,%%mm1\n" /* mm1 now spread over (mm1,mm2) */
+ "punpcklbw %%mm0,%%mm3\n" /* mm4 now spread over (mm3,mm4) */
+
+ "pmaddwd %%mm2,%%mm2\n"
+ "pmaddwd %%mm4,%%mm4\n"
+ "pmaddwd %%mm1,%%mm1\n"
+ "pmaddwd %%mm3,%%mm3\n"
+
+ "addl %3,%0\n"
+ "addl %3,%1\n"
+
+ "paddd %%mm2,%%mm1\n"
+ "paddd %%mm4,%%mm3\n"
+ "paddd %%mm1,%%mm7\n"
+ "paddd %%mm3,%%mm7\n"
+
+ "decl %%ecx\n"
+ "jnz 1b\n"
+
+ "movq %%mm7,%%mm1\n"
+ "psrlq $32, %%mm7\n" /* shift hi dword to lo */
+ "paddd %%mm7,%%mm1\n"
+ "movd %%mm1,%2\n"
+ : "+r" (pix1), "+r" (pix2), "=r"(tmp) : "r" (line_size) : "ecx");
+ return tmp;
+}
+
static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
int i=0;
asm volatile(
@@ -1085,7 +1186,7 @@ static void OPNAME ## qpel8_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\
}\
\
static void OPNAME ## qpel8_mc10_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t temp[32];\
+ uint64_t temp[8];\
uint8_t * const half= (uint8_t*)temp;\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\
@@ -1096,14 +1197,14 @@ static void OPNAME ## qpel8_mc20_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
}\
\
static void OPNAME ## qpel8_mc30_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t temp[32];\
+ uint64_t temp[8];\
uint8_t * const half= (uint8_t*)temp;\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
OPNAME ## pixels8_l2_mmx(dst, src+1, half, stride, stride, 8);\
}\
\
static void OPNAME ## qpel8_mc01_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t temp[32];\
+ uint64_t temp[8];\
uint8_t * const half= (uint8_t*)temp;\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\
@@ -1114,53 +1215,49 @@ static void OPNAME ## qpel8_mc02_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
}\
\
static void OPNAME ## qpel8_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t temp[32];\
+ uint64_t temp[8];\
uint8_t * const half= (uint8_t*)temp;\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\
}\
static void OPNAME ## qpel8_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t half[8*2 + 8*2 + 18*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 2*64 + 8;\
- uint8_t * const halfV= ((uint8_t*)half);\
- uint8_t * const halfHV= ((uint8_t*)half) + 64;\
+ uint64_t half[8 + 9];\
+ uint8_t * const halfH= ((uint8_t*)half) + 64;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src, 8, stride);\
+ put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l4_mmx(dst, src, (uint8_t*)half, stride, 8);\
+ OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\
}\
static void OPNAME ## qpel8_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t half[8*2 + 8*2 + 18*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 2*64 + 8;\
- uint8_t * const halfV= ((uint8_t*)half);\
- uint8_t * const halfHV= ((uint8_t*)half) + 64;\
+ uint64_t half[8 + 9];\
+ uint8_t * const halfH= ((uint8_t*)half) + 64;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src+1, 8, stride);\
+ put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l4_mmx(dst, src+1, (uint8_t*)half, stride, 8);\
+ OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\
}\
static void OPNAME ## qpel8_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t half[8*2 + 8*2 + 9*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 2*64;\
- uint8_t * const halfV= ((uint8_t*)half);\
- uint8_t * const halfHV= ((uint8_t*)half) + 64;\
+ uint64_t half[8 + 9];\
+ uint8_t * const halfH= ((uint8_t*)half) + 64;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src, 8, stride);\
+ put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l4_mmx(dst, src+stride, (uint8_t*)half, stride, 8);\
+ OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\
}\
static void OPNAME ## qpel8_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t half[8*2 + 8*2 + 9*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 2*64;\
- uint8_t * const halfV= ((uint8_t*)half);\
- uint8_t * const halfHV= ((uint8_t*)half) + 64;\
- put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src , 8, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src+1, 8, stride);\
+ uint64_t half[8 + 9];\
+ uint8_t * const halfH= ((uint8_t*)half) + 64;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+ put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l4_mmx(dst, src+stride+1, (uint8_t*)half, stride, 8);\
+ OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\
}\
static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t half[8*2 + 9*2];\
+ uint64_t half[8 + 9];\
uint8_t * const halfH= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
@@ -1168,7 +1265,7 @@ static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\
}\
static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t half[8*2 + 9*2];\
+ uint64_t half[8 + 9];\
uint8_t * const halfH= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
@@ -1176,27 +1273,21 @@ static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\
}\
static void OPNAME ## qpel8_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t half[8*2 + 8*2 + 9*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 2*64;\
- uint8_t * const halfV= ((uint8_t*)half);\
- uint8_t * const halfHV= ((uint8_t*)half) + 64;\
+ uint64_t half[8 + 9];\
+ uint8_t * const halfH= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src, 8, stride);\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2_mmx(dst, halfV, halfHV, stride, 8, 8);\
+ put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\
+ OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
}\
static void OPNAME ## qpel8_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t half[8*2 + 8*2 + 9*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 2*64;\
- uint8_t * const halfV= ((uint8_t*)half);\
- uint8_t * const halfHV= ((uint8_t*)half) + 64;\
+ uint64_t half[8 + 9];\
+ uint8_t * const halfH= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src+1, 8, stride);\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2_mmx(dst, halfV, halfHV, stride, 8, 8);\
+ put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\
+ OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
}\
static void OPNAME ## qpel8_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t half[9*2];\
+ uint64_t half[9];\
uint8_t * const halfH= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
@@ -1241,44 +1332,40 @@ static void OPNAME ## qpel16_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\
}\
static void OPNAME ## qpel16_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t half[16*2 + 16*2 + 18*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 2*256 + 16;\
- uint8_t * const halfV= ((uint8_t*)half);\
- uint8_t * const halfHV= ((uint8_t*)half) + 256;\
+ uint64_t half[16*2 + 17*2];\
+ uint8_t * const halfH= ((uint8_t*)half) + 256;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src, 16, stride);\
+ put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l4_mmx(dst, src, (uint8_t*)half, stride, 16);\
+ OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\
}\
static void OPNAME ## qpel16_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t half[16*2 + 16*2 + 18*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 2*256 + 16;\
- uint8_t * const halfV= ((uint8_t*)half);\
- uint8_t * const halfHV= ((uint8_t*)half) + 256;\
+ uint64_t half[16*2 + 17*2];\
+ uint8_t * const halfH= ((uint8_t*)half) + 256;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src+1, 16, stride);\
+ put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l4_mmx(dst, src+1, (uint8_t*)half, stride, 16);\
+ OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\
}\
static void OPNAME ## qpel16_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t half[16*2 + 16*2 + 17*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 2*256;\
- uint8_t * const halfV= ((uint8_t*)half);\
- uint8_t * const halfHV= ((uint8_t*)half) + 256;\
+ uint64_t half[16*2 + 17*2];\
+ uint8_t * const halfH= ((uint8_t*)half) + 256;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src, 16, stride);\
+ put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l4_mmx(dst, src+stride, (uint8_t*)half, stride, 16);\
+ OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\
}\
static void OPNAME ## qpel16_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t half[16*2 + 16*2 + 17*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 2*256;\
- uint8_t * const halfV= ((uint8_t*)half);\
- uint8_t * const halfHV= ((uint8_t*)half) + 256;\
- put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src , 16, stride, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src+1, 16, stride);\
+ uint64_t half[16*2 + 17*2];\
+ uint8_t * const halfH= ((uint8_t*)half) + 256;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+ put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l4_mmx(dst, src+stride+1, (uint8_t*)half, stride, 16);\
+ OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\
}\
static void OPNAME ## qpel16_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[16*2 + 17*2];\
@@ -1297,24 +1384,18 @@ static void OPNAME ## qpel16_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\
}\
static void OPNAME ## qpel16_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t half[16*2 + 16*2 + 17*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 2*256;\
- uint8_t * const halfV= ((uint8_t*)half);\
- uint8_t * const halfHV= ((uint8_t*)half) + 256;\
+ uint64_t half[17*2];\
+ uint8_t * const halfH= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src, 16, stride);\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2_mmx(dst, halfV, halfHV, stride, 16, 16);\
+ put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\
+ OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
}\
static void OPNAME ## qpel16_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
- uint64_t half[16*2 + 16*2 + 17*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 2*256;\
- uint8_t * const halfV= ((uint8_t*)half);\
- uint8_t * const halfHV= ((uint8_t*)half) + 256;\
+ uint64_t half[17*2];\
+ uint8_t * const halfH= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src+1, 16, stride);\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2_mmx(dst, halfV, halfHV, stride, 16, 16);\
+ put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\
+ OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
}\
static void OPNAME ## qpel16_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[17*2];\
@@ -1436,6 +1517,9 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
c->sad[0]= sad16x16_mmx;
c->sad[1]= sad8x8_mmx;
+
+ c->pix_norm1 = pix_norm1_mmx;
+ c->sse[0] = sse16_mmx;
if (mm_flags & MM_MMXEXT) {
c->pix_abs16x16 = pix_abs16x16_mmx2;
@@ -1525,7 +1609,7 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
-
+
SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow)
SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow)
SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_3dnow)
diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h b/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h
index 0ae1cd99d..956edf798 100644
--- a/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h
+++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h
@@ -58,6 +58,16 @@ static void DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
{
MOVQ_BFE(mm6);
__asm __volatile(
+ "testl $1, %0 \n\t"
+ " jz 1f \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%2), %%mm1 \n\t"
+ "addl %4, %1 \n\t"
+ "addl $8, %2 \n\t"
+ PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
+ "movq %%mm4, (%3) \n\t"
+ "addl %5, %3 \n\t"
+ "decl %0 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
@@ -144,6 +154,19 @@ static void DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, in
{
MOVQ_BFE(mm6);
__asm __volatile(
+ "testl $1, %0 \n\t"
+ " jz 1f \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%2), %%mm1 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
+ "movq 8(%2), %%mm3 \n\t"
+ "addl %4, %1 \n\t"
+ "addl $16, %2 \n\t"
+ PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%3) \n\t"
+ "movq %%mm5, 8(%3) \n\t"
+ "addl %5, %3 \n\t"
+ "decl %0 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
@@ -271,124 +294,6 @@ static void DEF(put, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_si
:"eax", "memory");
}
-static void DEF(put, pixels8_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int stride, int h)
-{
- MOVQ_ZERO(mm7);
- SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
- __asm __volatile(
- ".balign 8 \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%2), %%mm1 \n\t"
- "movq 64(%2), %%mm2 \n\t"
- "movq 136(%2), %%mm3 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm2 \n\t"
- "punpcklbw %%mm7, %%mm3 \n\t"
- "paddusw %%mm6, %%mm0 \n\t"
- "paddusw %%mm0, %%mm1 \n\t"
- "paddusw %%mm2, %%mm3 \n\t"
- "paddusw %%mm1, %%mm3 \n\t"
- "psrlw $2, %%mm3 \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%2), %%mm1 \n\t"
- "movq 64(%2), %%mm2 \n\t"
- "movq 136(%2), %%mm4 \n\t"
- "punpckhbw %%mm7, %%mm0 \n\t"
- "punpckhbw %%mm7, %%mm1 \n\t"
- "punpckhbw %%mm7, %%mm2 \n\t"
- "punpckhbw %%mm7, %%mm4 \n\t"
- "paddusw %%mm6, %%mm0 \n\t"
- "paddusw %%mm0, %%mm1 \n\t"
- "paddusw %%mm2, %%mm4 \n\t"
- "paddusw %%mm1, %%mm4 \n\t"
- "psrlw $2, %%mm4 \n\t"
- "packuswb %%mm4, %%mm3 \n\t"
- "movq %%mm3, (%0) \n\t"
- "addl %4, %0 \n\t"
- "addl %4, %1 \n\t"
- "addl $8, %2 \n\t"
- "decl %3 \n\t"
- "jnz 1b \n\t"
- :"+r"(dst), "+r"(src1), "+r"(src2), "+r"(h)
- :"r"(stride)
- :"memory");
-}
-
-static void DEF(put, pixels16_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int stride, int h)
-{
- MOVQ_ZERO(mm7);
- SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
- __asm __volatile(
- ".balign 8 \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%2), %%mm1 \n\t"
- "movq 256(%2), %%mm2 \n\t"
- "movq 528(%2), %%mm3 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm2 \n\t"
- "punpcklbw %%mm7, %%mm3 \n\t"
- "paddusw %%mm6, %%mm0 \n\t"
- "paddusw %%mm0, %%mm1 \n\t"
- "paddusw %%mm2, %%mm3 \n\t"
- "paddusw %%mm1, %%mm3 \n\t"
- "psrlw $2, %%mm3 \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%2), %%mm1 \n\t"
- "movq 256(%2), %%mm2 \n\t"
- "movq 528(%2), %%mm4 \n\t"
- "punpckhbw %%mm7, %%mm0 \n\t"
- "punpckhbw %%mm7, %%mm1 \n\t"
- "punpckhbw %%mm7, %%mm2 \n\t"
- "punpckhbw %%mm7, %%mm4 \n\t"
- "paddusw %%mm6, %%mm0 \n\t"
- "paddusw %%mm0, %%mm1 \n\t"
- "paddusw %%mm2, %%mm4 \n\t"
- "paddusw %%mm1, %%mm4 \n\t"
- "psrlw $2, %%mm4 \n\t"
- "packuswb %%mm4, %%mm3 \n\t"
- "movq %%mm3, (%0) \n\t"
- "movq 8(%1), %%mm0 \n\t"
- "movq 8(%2), %%mm1 \n\t"
- "movq 264(%2), %%mm2 \n\t"
- "movq 536(%2), %%mm3 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm2 \n\t"
- "punpcklbw %%mm7, %%mm3 \n\t"
- "paddusw %%mm6, %%mm0 \n\t"
- "paddusw %%mm0, %%mm1 \n\t"
- "paddusw %%mm2, %%mm3 \n\t"
- "paddusw %%mm1, %%mm3 \n\t"
- "psrlw $2, %%mm3 \n\t"
- "movq 8(%1), %%mm0 \n\t"
- "movq 8(%2), %%mm1 \n\t"
- "movq 264(%2), %%mm2 \n\t"
- "movq 536(%2), %%mm4 \n\t"
- "punpckhbw %%mm7, %%mm0 \n\t"
- "punpckhbw %%mm7, %%mm1 \n\t"
- "punpckhbw %%mm7, %%mm2 \n\t"
- "punpckhbw %%mm7, %%mm4 \n\t"
- "paddusw %%mm6, %%mm0 \n\t"
- "paddusw %%mm0, %%mm1 \n\t"
- "paddusw %%mm2, %%mm4 \n\t"
- "paddusw %%mm1, %%mm4 \n\t"
- "psrlw $2, %%mm4 \n\t"
- "packuswb %%mm4, %%mm3 \n\t"
- "movq %%mm3, 8(%0) \n\t"
- "addl %4, %0 \n\t"
- "addl %4, %1 \n\t"
- "addl $16, %2 \n\t"
- "decl %3 \n\t"
- "jnz 1b \n\t"
- :"+r"(dst), "+r"(src1), "+r"(src2), "+r"(h)
- :"r"(stride)
- :"memory");
-}
-
// avg_pixels
// in case more speed is needed - unroling would certainly help
static void DEF(avg, pixels8)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
@@ -641,133 +546,6 @@ static void DEF(avg, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_si
:"eax", "memory");
}
-static void DEF(avg, pixels8_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int stride, int h)
-{
- MOVQ_ZERO(mm7);
- SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
- MOVQ_BFE(mm5);
- __asm __volatile(
- ".balign 8 \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%2), %%mm1 \n\t"
- "movq 64(%2), %%mm2 \n\t"
- "movq 136(%2), %%mm3 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm2 \n\t"
- "punpcklbw %%mm7, %%mm3 \n\t"
- "paddusw %%mm6, %%mm0 \n\t"
- "paddusw %%mm0, %%mm1 \n\t"
- "paddusw %%mm2, %%mm3 \n\t"
- "paddusw %%mm1, %%mm3 \n\t"
- "psrlw $2, %%mm3 \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%2), %%mm1 \n\t"
- "movq 64(%2), %%mm2 \n\t"
- "movq 136(%2), %%mm4 \n\t"
- "punpckhbw %%mm7, %%mm0 \n\t"
- "punpckhbw %%mm7, %%mm1 \n\t"
- "punpckhbw %%mm7, %%mm2 \n\t"
- "punpckhbw %%mm7, %%mm4 \n\t"
- "paddusw %%mm6, %%mm0 \n\t"
- "paddusw %%mm0, %%mm1 \n\t"
- "paddusw %%mm2, %%mm4 \n\t"
- "paddusw %%mm1, %%mm4 \n\t"
- "psrlw $2, %%mm4 \n\t"
- "packuswb %%mm4, %%mm3 \n\t"
- "movq (%0), %%mm4 \n\t"
- PAVGB(%%mm3, %%mm4, %%mm0, %%mm5)
- "movq %%mm0, (%0) \n\t"
- "addl %4, %0 \n\t"
- "addl %4, %1 \n\t"
- "addl $8, %2 \n\t"
- "decl %3 \n\t"
- "jnz 1b \n\t"
- :"+r"(dst), "+r"(src1), "+r"(src2), "+r"(h)
- :"r"(stride)
- :"memory");
-}
-
-static void DEF(avg, pixels16_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int stride, int h)
-{
- MOVQ_ZERO(mm7);
- SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
- MOVQ_BFE(mm5);
- __asm __volatile(
- ".balign 8 \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%2), %%mm1 \n\t"
- "movq 256(%2), %%mm2 \n\t"
- "movq 528(%2), %%mm3 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm2 \n\t"
- "punpcklbw %%mm7, %%mm3 \n\t"
- "paddusw %%mm6, %%mm0 \n\t"
- "paddusw %%mm0, %%mm1 \n\t"
- "paddusw %%mm2, %%mm3 \n\t"
- "paddusw %%mm1, %%mm3 \n\t"
- "psrlw $2, %%mm3 \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%2), %%mm1 \n\t"
- "movq 256(%2), %%mm2 \n\t"
- "movq 528(%2), %%mm4 \n\t"
- "punpckhbw %%mm7, %%mm0 \n\t"
- "punpckhbw %%mm7, %%mm1 \n\t"
- "punpckhbw %%mm7, %%mm2 \n\t"
- "punpckhbw %%mm7, %%mm4 \n\t"
- "paddusw %%mm6, %%mm0 \n\t"
- "paddusw %%mm0, %%mm1 \n\t"
- "paddusw %%mm2, %%mm4 \n\t"
- "paddusw %%mm1, %%mm4 \n\t"
- "psrlw $2, %%mm4 \n\t"
- "packuswb %%mm4, %%mm3 \n\t"
- "movq (%0), %%mm4 \n\t"
- PAVGB(%%mm3, %%mm4, %%mm0, %%mm5)
- "movq %%mm0, (%0) \n\t"
- "movq 8(%1), %%mm0 \n\t"
- "movq 8(%2), %%mm1 \n\t"
- "movq 264(%2), %%mm2 \n\t"
- "movq 536(%2), %%mm3 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm2 \n\t"
- "punpcklbw %%mm7, %%mm3 \n\t"
- "paddusw %%mm6, %%mm0 \n\t"
- "paddusw %%mm0, %%mm1 \n\t"
- "paddusw %%mm2, %%mm3 \n\t"
- "paddusw %%mm1, %%mm3 \n\t"
- "psrlw $2, %%mm3 \n\t"
- "movq 8(%1), %%mm0 \n\t"
- "movq 8(%2), %%mm1 \n\t"
- "movq 264(%2), %%mm2 \n\t"
- "movq 536(%2), %%mm4 \n\t"
- "punpckhbw %%mm7, %%mm0 \n\t"
- "punpckhbw %%mm7, %%mm1 \n\t"
- "punpckhbw %%mm7, %%mm2 \n\t"
- "punpckhbw %%mm7, %%mm4 \n\t"
- "paddusw %%mm6, %%mm0 \n\t"
- "paddusw %%mm0, %%mm1 \n\t"
- "paddusw %%mm2, %%mm4 \n\t"
- "paddusw %%mm1, %%mm4 \n\t"
- "psrlw $2, %%mm4 \n\t"
- "packuswb %%mm4, %%mm3 \n\t"
- "movq 8(%0), %%mm4 \n\t"
- PAVGB(%%mm3, %%mm4, %%mm0, %%mm5)
- "movq %%mm0, 8(%0) \n\t"
- "addl %4, %0 \n\t"
- "addl %4, %1 \n\t"
- "addl $16, %2 \n\t"
- "decl %3 \n\t"
- "jnz 1b \n\t"
- :"+r"(dst), "+r"(src1), "+r"(src2), "+r"(h)
- :"r"(stride)
- :"memory");
-}
-
-
//FIXME optimize
static void DEF(put, pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){
DEF(put, pixels8_y2)(block , pixels , line_size, h);
diff --git a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c
index 799ff1666..ead30ed31 100644
--- a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c
+++ b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c
@@ -53,8 +53,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
if (!s->h263_aic) {
#if 1
asm volatile (
- "xorl %%edx, %%edx \n\t"
- "mul %%ecx \n\t"
+ "imul %%ecx \n\t"
: "=d" (level), "=a"(dummy)
: "a" ((block[0]>>2) + q), "c" (inverse[q<<1])
);
diff --git a/src/libffmpeg/libavcodec/imgconvert.c b/src/libffmpeg/libavcodec/imgconvert.c
index bdf6fe65d..2304092fd 100644
--- a/src/libffmpeg/libavcodec/imgconvert.c
+++ b/src/libffmpeg/libavcodec/imgconvert.c
@@ -1,6 +1,6 @@
/*
* Misc image convertion routines
- * Copyright (c) 2001, 2002 Fabrice Bellard.
+ * Copyright (c) 2001, 2002, 2003 Fabrice Bellard.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -26,14 +26,220 @@
#ifdef HAVE_MMX
#include "i386/mmx.h"
#endif
+
+typedef struct PixFmtInfo {
+ const char *name;
+ UINT8 nb_components; /* number of components in AVPicture array */
+ UINT8 is_yuv : 1; /* true if YUV instead of RGB color space */
+ UINT8 is_packed : 1; /* true if multiple components in same word */
+ UINT8 is_paletted : 1; /* true if paletted */
+ UINT8 is_alpha : 1; /* true if alpha can be specified */
+ UINT8 is_gray : 1; /* true if gray or monochrome format */
+ UINT8 x_chroma_shift; /* X chroma subsampling factor is 2 ^ shift */
+ UINT8 y_chroma_shift; /* Y chroma subsampling factor is 2 ^ shift */
+} PixFmtInfo;
+
+/* this table gives more information about formats */
+static PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
+ /* YUV formats */
+ [PIX_FMT_YUV420P] = {
+ .name = "yuv420p",
+ .nb_components = 3, .is_yuv = 1,
+ .x_chroma_shift = 1, .y_chroma_shift = 1,
+ },
+ [PIX_FMT_YUV422P] = {
+ .name = "yuv422p",
+ .nb_components = 3, .is_yuv = 1,
+ .x_chroma_shift = 1, .y_chroma_shift = 0,
+ },
+ [PIX_FMT_YUV444P] = {
+ .name = "yuv444p",
+ .nb_components = 3, .is_yuv = 1,
+ .x_chroma_shift = 0, .y_chroma_shift = 0,
+ },
+ [PIX_FMT_YUV422] = {
+ .name = "yuv422",
+ .nb_components = 1, .is_yuv = 1, .is_packed = 1,
+ .x_chroma_shift = 1, .y_chroma_shift = 0,
+ },
+ [PIX_FMT_YUV410P] = {
+ .name = "yuv410p",
+ .nb_components = 3, .is_yuv = 1,
+ .x_chroma_shift = 2, .y_chroma_shift = 2,
+ },
+ [PIX_FMT_YUV411P] = {
+ .name = "yuv411p",
+ .nb_components = 3, .is_yuv = 1,
+ .x_chroma_shift = 2, .y_chroma_shift = 0,
+ },
+
+ /* RGB formats */
+ [PIX_FMT_RGB24] = {
+ .name = "rgb24",
+ .nb_components = 1, .is_packed = 1,
+ },
+ [PIX_FMT_BGR24] = {
+ .name = "bgr24",
+ .nb_components = 1, .is_packed = 1,
+ },
+ [PIX_FMT_RGBA32] = {
+ .name = "rgba32",
+ .nb_components = 1, .is_packed = 1, .is_alpha = 1,
+ },
+ [PIX_FMT_RGB565] = {
+ .name = "rgb565",
+ .nb_components = 1, .is_packed = 1,
+ },
+ [PIX_FMT_RGB555] = {
+ .name = "rgb555",
+ .nb_components = 1, .is_packed = 1, .is_alpha = 1,
+ },
+
+ /* gray / mono formats */
+ [PIX_FMT_GRAY8] = {
+ .name = "gray",
+ .nb_components = 1, .is_gray = 1,
+ },
+ [PIX_FMT_MONOWHITE] = {
+ .name = "monow",
+ .nb_components = 1, .is_packed = 1, .is_gray = 1,
+ },
+ [PIX_FMT_MONOBLACK] = {
+ .name = "monob",
+ .nb_components = 1, .is_packed = 1, .is_gray = 1,
+ },
+};
+
+void avcodec_get_chroma_sub_sample(int pix_fmt, int *h_shift, int *v_shift)
+{
+ if (pix_fmt_info[pix_fmt].is_yuv) {
+ *h_shift = pix_fmt_info[pix_fmt].x_chroma_shift;
+ *v_shift = pix_fmt_info[pix_fmt].y_chroma_shift;
+ } else {
+ *h_shift=0;
+ *v_shift=0;
+ }
+}
+
+const char *avcodec_get_pix_fmt_name(int pix_fmt)
+{
+ if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB)
+ return "???";
+ else
+ return pix_fmt_info[pix_fmt].name;
+}
+
+/* Picture field are filled with 'ptr' addresses. Also return size */
+int avpicture_fill(AVPicture *picture, UINT8 *ptr,
+ int pix_fmt, int width, int height)
+{
+ int size;
+
+ size = width * height;
+ switch(pix_fmt) {
+ case PIX_FMT_YUV420P:
+ picture->data[0] = ptr;
+ picture->data[1] = picture->data[0] + size;
+ picture->data[2] = picture->data[1] + size / 4;
+ picture->linesize[0] = width;
+ picture->linesize[1] = width / 2;
+ picture->linesize[2] = width / 2;
+ return (size * 3) / 2;
+ case PIX_FMT_RGB24:
+ case PIX_FMT_BGR24:
+ picture->data[0] = ptr;
+ picture->data[1] = NULL;
+ picture->data[2] = NULL;
+ picture->linesize[0] = width * 3;
+ return size * 3;
+ case PIX_FMT_YUV422P:
+ picture->data[0] = ptr;
+ picture->data[1] = picture->data[0] + size;
+ picture->data[2] = picture->data[1] + size / 2;
+ picture->linesize[0] = width;
+ picture->linesize[1] = width / 2;
+ picture->linesize[2] = width / 2;
+ return (size * 2);
+ case PIX_FMT_YUV444P:
+ picture->data[0] = ptr;
+ picture->data[1] = picture->data[0] + size;
+ picture->data[2] = picture->data[1] + size;
+ picture->linesize[0] = width;
+ picture->linesize[1] = width;
+ picture->linesize[2] = width;
+ return size * 3;
+ case PIX_FMT_RGBA32:
+ picture->data[0] = ptr;
+ picture->data[1] = NULL;
+ picture->data[2] = NULL;
+ picture->linesize[0] = width * 4;
+ return size * 4;
+ case PIX_FMT_YUV410P:
+ picture->data[0] = ptr;
+ picture->data[1] = picture->data[0] + size;
+ picture->data[2] = picture->data[1] + size / 16;
+ picture->linesize[0] = width;
+ picture->linesize[1] = width / 4;
+ picture->linesize[2] = width / 4;
+ return size + (size / 8);
+ case PIX_FMT_YUV411P:
+ picture->data[0] = ptr;
+ picture->data[1] = picture->data[0] + size;
+ picture->data[2] = picture->data[1] + size / 4;
+ picture->linesize[0] = width;
+ picture->linesize[1] = width / 4;
+ picture->linesize[2] = width / 4;
+ return size + (size / 2);
+ case PIX_FMT_RGB555:
+ case PIX_FMT_RGB565:
+ case PIX_FMT_YUV422:
+ picture->data[0] = ptr;
+ picture->data[1] = NULL;
+ picture->data[2] = NULL;
+ picture->linesize[0] = width * 2;
+ return size * 2;
+ case PIX_FMT_GRAY8:
+ picture->data[0] = ptr;
+ picture->data[1] = NULL;
+ picture->data[2] = NULL;
+ picture->linesize[0] = width;
+ return size;
+ case PIX_FMT_MONOWHITE:
+ case PIX_FMT_MONOBLACK:
+ picture->data[0] = ptr;
+ picture->data[1] = NULL;
+ picture->data[2] = NULL;
+ picture->linesize[0] = (width + 7) >> 3;
+ return picture->linesize[0] * height;
+ default:
+ picture->data[0] = NULL;
+ picture->data[1] = NULL;
+ picture->data[2] = NULL;
+ return -1;
+ }
+}
+
+int avpicture_get_size(int pix_fmt, int width, int height)
+{
+ AVPicture dummy_pict;
+ return avpicture_fill(&dummy_pict, NULL, pix_fmt, width, height);
+}
+
+
/* XXX: totally non optimized */
-static void yuv422_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
- UINT8 *src, int width, int height)
+static void yuv422_to_yuv420p(AVPicture *dst, AVPicture *src,
+ int width, int height)
{
+ UINT8 *lum, *cb, *cr;
int x, y;
- UINT8 *p = src;
-
+ const UINT8 *p;
+
+ lum = dst->data[0];
+ cb = dst->data[1];
+ cr = dst->data[2];
+ p = src->data[0];
+
for(y=0;y<height;y+=2) {
for(x=0;x<width;x+=2) {
lum[0] = p[0];
@@ -58,342 +264,6 @@ static void yuv422_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
#define ONE_HALF (1 << (SCALEBITS - 1))
#define FIX(x) ((int) ((x) * (1L<<SCALEBITS) + 0.5))
-static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
- UINT8 *src, int width, int height)
-{
- int wrap, wrap3, x, y;
- int r, g, b, r1, g1, b1;
- UINT8 *p;
-
- wrap = width;
- wrap3 = width * 3;
- p = src;
- for(y=0;y<height;y+=2) {
- for(x=0;x<width;x+=2) {
- r = p[0];
- g = p[1];
- b = p[2];
- r1 = r;
- g1 = g;
- b1 = b;
- lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
- r = p[3];
- g = p[4];
- b = p[5];
- r1 += r;
- g1 += g;
- b1 += b;
- lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
- p += wrap3;
- lum += wrap;
-
- r = p[0];
- g = p[1];
- b = p[2];
- r1 += r;
- g1 += g;
- b1 += b;
- lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
- r = p[3];
- g = p[4];
- b = p[5];
- r1 += r;
- g1 += g;
- b1 += b;
- lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
-
- cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
- FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
- cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
- FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
-
- cb++;
- cr++;
- p += -wrap3 + 2 * 3;
- lum += -wrap + 2;
- }
- p += wrap3;
- lum += wrap;
- }
-}
-
-static void rgba32_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
- UINT8 *src, int width, int height)
-{
- int wrap, wrap4, x, y;
- int r, g, b, r1, g1, b1;
- UINT8 *p;
-
- wrap = width;
- wrap4 = width * 4;
- p = src;
- for(y=0;y<height;y+=2) {
- for(x=0;x<width;x+=2) {
- r = p[0];
- g = p[1];
- b = p[2];
- r1 = r;
- g1 = g;
- b1 = b;
- lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
- r = p[4];
- g = p[5];
- b = p[6];
- r1 += r;
- g1 += g;
- b1 += b;
- lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
- p += wrap4;
- lum += wrap;
-
- r = p[0];
- g = p[1];
- b = p[2];
- r1 += r;
- g1 += g;
- b1 += b;
- lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
- r = p[4];
- g = p[5];
- b = p[6];
- r1 += r;
- g1 += g;
- b1 += b;
- lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
-
- cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
- FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
- cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
- FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
-
- cb++;
- cr++;
- p += -wrap4 + 2 * 4;
- lum += -wrap + 2;
- }
- p += wrap4;
- lum += wrap;
- }
-}
-
-#define rgb565_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0800,31, 0x0020,63,0x0001,31)
-#define rgb555_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0400,31, 0x0020,31,0x0001,31)
-#define rgb5551_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0800,31, 0x0040,31,0x0002,31)
-#define bgr565_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0020,63,0x0800,31)
-#define bgr555_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0020,31,0x0400,31)
-#define gbr565_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0800,31,0x0040,63)
-#define gbr555_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0400,31,0x0020,31)
-
-static void rgbmisc_to_yuv420p
- (UINT8 *lum, UINT8 *cb, UINT8 *cr,
- UINT8 *src, int width, int height,
-
- UINT16 R_LOWMASK, UINT16 R_MAX,
- UINT16 G_LOWMASK, UINT16 G_MAX,
- UINT16 B_LOWMASK, UINT16 B_MAX
- )
-{
- int wrap, wrap2, x, y;
- int r, g, b, r1, g1, b1;
- UINT8 *p;
- UINT16 pixel;
-
- wrap = width;
- wrap2 = width * 2;
- p = src;
- for(y=0;y<height;y+=2) {
- for(x=0;x<width;x+=2) {
- pixel = p[0] | (p[1]<<8);
- r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1)));
- g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1)));
- b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1)));
- r1 = r;
- g1 = g;
- b1 = b;
- lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
-
- pixel = p[2] | (p[3]<<8);
- r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1)));
- g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1)));
- b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1)));
- r1 += r;
- g1 += g;
- b1 += b;
- lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
- p += wrap2;
- lum += wrap;
-
- pixel = p[0] | (p[1]<<8);
- r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1)));
- g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1)));
- b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1)));
- r1 += r;
- g1 += g;
- b1 += b;
- lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
- pixel = p[2] | (p[3]<<8);
- r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1)));
- g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1)));
- b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1)));
- r1 += r;
- g1 += g;
- b1 += b;
- lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
-
- cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
- FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
- cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
- FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
-
- cb++;
- cr++;
- p += -wrap2 + 2 * 2;
- lum += -wrap + 2;
- }
- p += wrap2;
- lum += wrap;
- }
-}
-
-
-static void bgr24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
- UINT8 *src, int width, int height)
-{
- int wrap, wrap3, x, y;
- int r, g, b, r1, g1, b1;
- UINT8 *p;
-
- wrap = width;
- wrap3 = width * 3;
- p = src;
- for(y=0;y<height;y+=2) {
- for(x=0;x<width;x+=2) {
- b = p[0];
- g = p[1];
- r = p[2];
- r1 = r;
- g1 = g;
- b1 = b;
- lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
- b = p[3];
- g = p[4];
- r = p[5];
- r1 += r;
- g1 += g;
- b1 += b;
- lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
- p += wrap3;
- lum += wrap;
-
- b = p[0];
- g = p[1];
- r = p[2];
- r1 += r;
- g1 += g;
- b1 += b;
- lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
- b = p[3];
- g = p[4];
- r = p[5];
- r1 += r;
- g1 += g;
- b1 += b;
- lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
-
- cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
- FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
- cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
- FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
-
- cb++;
- cr++;
- p += -wrap3 + 2 * 3;
- lum += -wrap + 2;
- }
- p += wrap3;
- lum += wrap;
- }
-}
-
-static void bgra32_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
- UINT8 *src, int width, int height)
-{
- int wrap, wrap4, x, y;
- int r, g, b, r1, g1, b1;
- UINT8 *p;
-
- wrap = width;
- wrap4 = width * 4;
- p = src;
- for(y=0;y<height;y+=2) {
- for(x=0;x<width;x+=2) {
- b = p[0];
- g = p[1];
- r = p[2];
- r1 = r;
- g1 = g;
- b1 = b;
- lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
- b = p[4];
- g = p[5];
- r = p[6];
- r1 += r;
- g1 += g;
- b1 += b;
- lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
- p += wrap4;
- lum += wrap;
-
- b = p[0];
- g = p[1];
- r = p[2];
- r1 += r;
- g1 += g;
- b1 += b;
- lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
- b = p[4];
- g = p[5];
- r = p[6];
- r1 += r;
- g1 += g;
- b1 += b;
- lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
- FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
-
- cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
- FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
- cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
- FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
-
- cb++;
- cr++;
- p += -wrap4 + 2 * 4;
- lum += -wrap + 2;
- }
- p += wrap4;
- lum += wrap;
- }
-}
-
/* XXX: use generic filter ? */
/* 1x2 -> 1x1 */
static void shrink2(UINT8 *dst, int dst_wrap,
@@ -487,7 +357,7 @@ static void grow22(UINT8 *dst, int dst_wrap,
}
}
-/* 1x2 -> 2x1. width and height are given for the source picture */
+/* 1x2 -> 2x1 */
static void conv411(UINT8 *dst, int dst_wrap,
UINT8 *src, int src_wrap,
int width, int height)
@@ -495,7 +365,7 @@ static void conv411(UINT8 *dst, int dst_wrap,
int w, c;
UINT8 *s1, *s2, *d;
- for(;height > 0; height -= 2) {
+ for(;height > 0; height--) {
s1 = src;
s2 = src + src_wrap;
d = dst;
@@ -531,7 +401,7 @@ static void img_copy(UINT8 *dst, int dst_wrap,
#define C_GU (13954 >> (16 - SCALE_BITS))
#define C_GV (34903 >> (16 - SCALE_BITS))
-#define RGBOUT(r, g, b, y1)\
+#define YUV_TO_RGB2(r, g, b, y1)\
{\
y = (y1 - 16) * C_Y;\
r = cm[(y + r_add) >> SCALE_BITS];\
@@ -540,340 +410,816 @@ static void img_copy(UINT8 *dst, int dst_wrap,
}
/* XXX: no chroma interpolating is done */
-static void yuv420p_to_bgra32(AVPicture *dst, AVPicture *src,
- int width, int height)
+#define RGB_FUNCTIONS(rgb_name) \
+ \
+static void yuv420p_to_ ## rgb_name (AVPicture *dst, AVPicture *src, \
+ int width, int height) \
+{ \
+ UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2; \
+ int w, y, cb, cr, r_add, g_add, b_add, width2; \
+ UINT8 *cm = cropTbl + MAX_NEG_CROP; \
+ unsigned int r, g, b; \
+ \
+ d = dst->data[0]; \
+ y1_ptr = src->data[0]; \
+ cb_ptr = src->data[1]; \
+ cr_ptr = src->data[2]; \
+ width2 = width >> 1; \
+ for(;height > 0; height -= 2) { \
+ d1 = d; \
+ d2 = d + dst->linesize[0]; \
+ y2_ptr = y1_ptr + src->linesize[0]; \
+ for(w = width2; w > 0; w --) { \
+ cb = cb_ptr[0] - 128; \
+ cr = cr_ptr[0] - 128; \
+ r_add = C_RV * cr + (1 << (SCALE_BITS - 1)); \
+ g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1)); \
+ b_add = C_BU * cb + (1 << (SCALE_BITS - 1)); \
+ \
+ /* output 4 pixels */ \
+ YUV_TO_RGB2(r, g, b, y1_ptr[0]); \
+ RGB_OUT(d1, r, g, b); \
+ \
+ YUV_TO_RGB2(r, g, b, y1_ptr[1]); \
+ RGB_OUT(d1 + BPP, r, g, b); \
+ \
+ YUV_TO_RGB2(r, g, b, y2_ptr[0]); \
+ RGB_OUT(d2, r, g, b); \
+ \
+ YUV_TO_RGB2(r, g, b, y2_ptr[1]); \
+ RGB_OUT(d2 + BPP, r, g, b); \
+ \
+ d1 += 2 * BPP; \
+ d2 += 2 * BPP; \
+ \
+ y1_ptr += 2; \
+ y2_ptr += 2; \
+ cb_ptr++; \
+ cr_ptr++; \
+ } \
+ d += 2 * dst->linesize[0]; \
+ y1_ptr += 2 * src->linesize[0] - width; \
+ cb_ptr += src->linesize[1] - width2; \
+ cr_ptr += src->linesize[2] - width2; \
+ } \
+} \
+ \
+/* XXX: no chroma interpolating is done */ \
+static void yuv422p_to_ ## rgb_name (AVPicture *dst, AVPicture *src, \
+ int width, int height) \
+{ \
+ UINT8 *y1_ptr, *cb_ptr, *cr_ptr, *d, *d1; \
+ int w, y, cb, cr, r_add, g_add, b_add, width2; \
+ UINT8 *cm = cropTbl + MAX_NEG_CROP; \
+ unsigned int r, g, b; \
+ \
+ d = dst->data[0]; \
+ y1_ptr = src->data[0]; \
+ cb_ptr = src->data[1]; \
+ cr_ptr = src->data[2]; \
+ width2 = width >> 1; \
+ for(;height > 0; height --) { \
+ d1 = d; \
+ for(w = width2; w > 0; w --) { \
+ cb = cb_ptr[0] - 128; \
+ cr = cr_ptr[0] - 128; \
+ r_add = C_RV * cr + (1 << (SCALE_BITS - 1)); \
+ g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1)); \
+ b_add = C_BU * cb + (1 << (SCALE_BITS - 1)); \
+ \
+ /* output 2 pixels */ \
+ YUV_TO_RGB2(r, g, b, y1_ptr[0]); \
+ RGB_OUT(d, r, g, b); \
+ \
+ YUV_TO_RGB2(r, g, b, y1_ptr[1]); \
+ RGB_OUT(d + BPP, r, g, b); \
+ \
+ d += 2 * BPP; \
+ \
+ y1_ptr += 2; \
+ cb_ptr++; \
+ cr_ptr++; \
+ } \
+ d += dst->linesize[0]; \
+ y1_ptr += src->linesize[0] - width; \
+ cb_ptr += src->linesize[1] - width2; \
+ cr_ptr += src->linesize[2] - width2; \
+ } \
+} \
+ \
+static void rgb_name ## _to_yuv420p(AVPicture *dst, AVPicture *src, \
+ int width, int height) \
+{ \
+ int wrap, wrap3, x, y; \
+ int r, g, b, r1, g1, b1; \
+ UINT8 *lum, *cb, *cr; \
+ const UINT8 *p; \
+ \
+ lum = dst->data[0]; \
+ cb = dst->data[1]; \
+ cr = dst->data[2]; \
+ \
+ wrap = width; \
+ wrap3 = width * BPP; \
+ p = src->data[0]; \
+ for(y=0;y<height;y+=2) { \
+ for(x=0;x<width;x+=2) { \
+ RGB_IN(r, g, b, p); \
+ r1 = r; \
+ g1 = g; \
+ b1 = b; \
+ lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + \
+ FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; \
+ RGB_IN(r, g, b, p + BPP); \
+ r1 += r; \
+ g1 += g; \
+ b1 += b; \
+ lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + \
+ FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; \
+ p += wrap3; \
+ lum += wrap; \
+ \
+ RGB_IN(r, g, b, p); \
+ r1 += r; \
+ g1 += g; \
+ b1 += b; \
+ lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + \
+ FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; \
+ \
+ RGB_IN(r, g, b, p + BPP); \
+ r1 += r; \
+ g1 += g; \
+ b1 += b; \
+ lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + \
+ FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; \
+ \
+ cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + \
+ FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> \
+ (SCALEBITS + 2)) + 128; \
+ cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 - \
+ FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> \
+ (SCALEBITS + 2)) + 128; \
+ \
+ cb++; \
+ cr++; \
+ p += -wrap3 + 2 * BPP; \
+ lum += -wrap + 2; \
+ } \
+ p += wrap3; \
+ lum += wrap; \
+ } \
+} \
+ \
+static void rgb_name ## _to_gray(AVPicture *dst, AVPicture *src, \
+ int width, int height) \
+{ \
+ const unsigned char *p; \
+ unsigned char *q; \
+ int r, g, b, dst_wrap, src_wrap; \
+ int x, y; \
+ \
+ p = src->data[0]; \
+ src_wrap = src->linesize[0] - BPP * width; \
+ \
+ q = dst->data[0]; \
+ dst_wrap = dst->linesize[0] - width; \
+ \
+ for(y=0;y<height;y++) { \
+ for(x=0;x<width;x++) { \
+ RGB_IN(r, g, b, p); \
+ q[0] = (FIX(0.29900) * r + FIX(0.58700) * g + \
+ FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; \
+ q++; \
+ p += BPP; \
+ } \
+ p += src_wrap; \
+ q += dst_wrap; \
+ } \
+} \
+ \
+static void gray_to_ ## rgb_name(AVPicture *dst, AVPicture *src, \
+ int width, int height) \
+{ \
+ const unsigned char *p; \
+ unsigned char *q; \
+ int r, dst_wrap, src_wrap; \
+ int x, y; \
+ \
+ p = src->data[0]; \
+ src_wrap = src->linesize[0] - width; \
+ \
+ q = dst->data[0]; \
+ dst_wrap = dst->linesize[0] - BPP * width; \
+ \
+ for(y=0;y<height;y++) { \
+ for(x=0;x<width;x++) { \
+ r = p[0]; \
+ RGB_OUT(q, r, r, r); \
+ q += BPP; \
+ p ++; \
+ } \
+ p += src_wrap; \
+ q += dst_wrap; \
+ } \
+}
+
+/* copy bit n to bits 0 ... n - 1 */
+static inline unsigned int bitcopy_n(unsigned int a, int n)
{
- UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
- int w, y, cb, cr, r_add, g_add, b_add, width2;
- UINT8 *cm = cropTbl + MAX_NEG_CROP;
+ int mask;
+ mask = (1 << n) - 1;
+ return (a & (0xff & ~mask)) | ((-((a >> n) & 1)) & mask);
+}
- d = dst->data[0];
- y1_ptr = src->data[0];
- cb_ptr = src->data[1];
- cr_ptr = src->data[2];
- width2 = width >> 1;
- for(;height > 0; height -= 2) {
- d1 = d;
- d2 = d + dst->linesize[0];
- y2_ptr = y1_ptr + src->linesize[0];
- for(w = width2; w > 0; w --) {
- cb = cb_ptr[0] - 128;
- cr = cr_ptr[0] - 128;
- r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
- g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
- b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
-
- /* output 4 pixels */
- RGBOUT(d1[2], d1[1], d1[0], y1_ptr[0]);
- RGBOUT(d1[6], d1[5], d1[4], y1_ptr[1]);
- RGBOUT(d2[2], d2[1], d2[0], y2_ptr[0]);
- RGBOUT(d2[6], d2[5], d2[4], y2_ptr[1]);
-
- d1[3] = d1[7] = d2[3] = d2[7] = 255;
-
- d1 += 8;
- d2 += 8;
- y1_ptr += 2;
- y2_ptr += 2;
- cb_ptr++;
- cr_ptr++;
+/* rgb555 handling */
+
+#define RGB_IN(r, g, b, s)\
+{\
+ unsigned int v = ((UINT16 *)(s))[0];\
+ r = bitcopy_n(v >> (10 - 3), 3);\
+ g = bitcopy_n(v >> (5 - 3), 3);\
+ b = bitcopy_n(v << 3, 3);\
+}
+
+#define RGB_OUT(d, r, g, b)\
+{\
+ ((UINT16 *)(d))[0] = ((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3) | 0x8000;\
+}
+
+#define BPP 2
+
+RGB_FUNCTIONS(rgb555)
+
+#undef RGB_IN
+#undef RGB_OUT
+#undef BPP
+
+/* rgb565 handling */
+
+#define RGB_IN(r, g, b, s)\
+{\
+ unsigned int v = ((UINT16 *)(s))[0];\
+ r = bitcopy_n(v >> (11 - 3), 3);\
+ g = bitcopy_n(v >> (5 - 2), 2);\
+ b = bitcopy_n(v << 3, 3);\
+}
+
+#define RGB_OUT(d, r, g, b)\
+{\
+ ((UINT16 *)(d))[0] = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);\
+}
+
+#define BPP 2
+
+RGB_FUNCTIONS(rgb565)
+
+#undef RGB_IN
+#undef RGB_OUT
+#undef BPP
+
+/* bgr24 handling */
+
+#define RGB_IN(r, g, b, s)\
+{\
+ b = (s)[0];\
+ g = (s)[1];\
+ r = (s)[2];\
+}
+
+#define RGB_OUT(d, r, g, b)\
+{\
+ (d)[0] = b;\
+ (d)[1] = g;\
+ (d)[2] = r;\
+}
+
+#define BPP 3
+
+RGB_FUNCTIONS(bgr24)
+
+#undef RGB_IN
+#undef RGB_OUT
+#undef BPP
+
+/* rgb24 handling */
+
+#define RGB_IN(r, g, b, s)\
+{\
+ r = (s)[0];\
+ g = (s)[1];\
+ b = (s)[2];\
+}
+
+#define RGB_OUT(d, r, g, b)\
+{\
+ (d)[0] = r;\
+ (d)[1] = g;\
+ (d)[2] = b;\
+}
+
+#define BPP 3
+
+RGB_FUNCTIONS(rgb24)
+
+#undef RGB_IN
+#undef RGB_OUT
+#undef BPP
+
+/* rgba32 handling */
+
+#define RGB_IN(r, g, b, s)\
+{\
+ unsigned int v = ((UINT32 *)(s))[0];\
+ r = (v >> 16) & 0xff;\
+ g = (v >> 8) & 0xff;\
+ b = v & 0xff;\
+}
+
+#define RGB_OUT(d, r, g, b)\
+{\
+ ((UINT32 *)(d))[0] = (0xff << 24) | (r << 16) | (g << 8) | b;\
+}
+
+#define BPP 4
+
+RGB_FUNCTIONS(rgba32)
+
+#undef RGB_IN
+#undef RGB_OUT
+#undef BPP
+
+
+static void rgb24_to_rgb565(AVPicture *dst, AVPicture *src,
+ int width, int height)
+{
+ const unsigned char *p;
+ unsigned char *q;
+ int r, g, b, dst_wrap, src_wrap;
+ int x, y;
+
+ p = src->data[0];
+ src_wrap = src->linesize[0] - 3 * width;
+
+ q = dst->data[0];
+ dst_wrap = dst->linesize[0] - 2 * width;
+
+ for(y=0;y<height;y++) {
+ for(x=0;x<width;x++) {
+ r = p[0];
+ g = p[1];
+ b = p[2];
+
+ ((unsigned short *)q)[0] =
+ ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
+ q += 2;
+ p += 3;
}
- d += 2 * dst->linesize[0];
- y1_ptr += 2 * src->linesize[0] - width;
- cb_ptr += src->linesize[1] - width2;
- cr_ptr += src->linesize[2] - width2;
+ p += src_wrap;
+ q += dst_wrap;
}
}
-/* XXX: no chroma interpolating is done */
-static void yuv420p_to_rgba32(AVPicture *dst, AVPicture *src,
- int width, int height)
+/* NOTE: we also add a dummy alpha bit */
+static void rgb24_to_rgb555(AVPicture *dst, AVPicture *src,
+ int width, int height)
{
- UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
- int w, y, cb, cr, r_add, g_add, b_add, width2;
- UINT8 *cm = cropTbl + MAX_NEG_CROP;
+ const unsigned char *p;
+ unsigned char *q;
+ int r, g, b, dst_wrap, src_wrap;
+ int x, y;
- d = dst->data[0];
- y1_ptr = src->data[0];
- cb_ptr = src->data[1];
- cr_ptr = src->data[2];
- width2 = width >> 1;
- for(;height > 0; height -= 2) {
- d1 = d;
- d2 = d + dst->linesize[0];
- y2_ptr = y1_ptr + src->linesize[0];
- for(w = width2; w > 0; w --) {
- cb = cb_ptr[0] - 128;
- cr = cr_ptr[0] - 128;
- r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
- g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
- b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
-
- /* output 4 pixels */
- RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
- RGBOUT(d1[4], d1[5], d1[6], y1_ptr[1]);
- RGBOUT(d2[0], d2[1], d2[2], y2_ptr[0]);
- RGBOUT(d2[4], d2[5], d2[6], y2_ptr[1]);
-
- d1[3] = d1[7] = d2[3] = d2[7] = 255;
-
- d1 += 8;
- d2 += 8;
- y1_ptr += 2;
- y2_ptr += 2;
- cb_ptr++;
- cr_ptr++;
+ p = src->data[0];
+ src_wrap = src->linesize[0] - 3 * width;
+
+ q = dst->data[0];
+ dst_wrap = dst->linesize[0] - 2 * width;
+
+ for(y=0;y<height;y++) {
+ for(x=0;x<width;x++) {
+ r = p[0];
+ g = p[1];
+ b = p[2];
+
+ ((unsigned short *)q)[0] =
+ ((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3) | 0x8000;
+ q += 2;
+ p += 3;
}
- d += 2 * dst->linesize[0];
- y1_ptr += 2 * src->linesize[0] - width;
- cb_ptr += src->linesize[1] - width2;
- cr_ptr += src->linesize[2] - width2;
+ p += src_wrap;
+ q += dst_wrap;
}
}
-/* XXX: no chroma interpolating is done */
-static void yuv420p_to_rgb24(AVPicture *dst, AVPicture *src,
- int width, int height)
+static void mono_to_gray(AVPicture *dst, AVPicture *src,
+ int width, int height, int xor_mask)
{
- UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
- int w, y, cb, cr, r_add, g_add, b_add, width2;
- UINT8 *cm = cropTbl + MAX_NEG_CROP;
-
- d = dst->data[0];
- y1_ptr = src->data[0];
- cb_ptr = src->data[1];
- cr_ptr = src->data[2];
- width2 = width >> 1;
- for(;height > 0; height -= 2) {
- d1 = d;
- d2 = d + dst->linesize[0];
- y2_ptr = y1_ptr + src->linesize[0];
- for(w = width2; w > 0; w --) {
- cb = cb_ptr[0] - 128;
- cr = cr_ptr[0] - 128;
- r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
- g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
- b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
-
- /* output 4 pixels */
- RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
- RGBOUT(d1[3], d1[4], d1[5], y1_ptr[1]);
- RGBOUT(d2[0], d2[1], d2[2], y2_ptr[0]);
- RGBOUT(d2[3], d2[4], d2[5], y2_ptr[1]);
-
- d1 += 6;
- d2 += 6;
- y1_ptr += 2;
- y2_ptr += 2;
- cb_ptr++;
- cr_ptr++;
+ const unsigned char *p;
+ unsigned char *q;
+ int v, dst_wrap, src_wrap;
+ int y, w;
+
+ p = src->data[0];
+ src_wrap = src->linesize[0] - ((width + 7) >> 3);
+
+ q = dst->data[0];
+ dst_wrap = dst->linesize[0] - width;
+ for(y=0;y<height;y++) {
+ w = width;
+ while (w >= 8) {
+ v = *p++ ^ xor_mask;
+ q[0] = -(v >> 7);
+ q[1] = -((v >> 6) & 1);
+ q[2] = -((v >> 5) & 1);
+ q[3] = -((v >> 4) & 1);
+ q[4] = -((v >> 3) & 1);
+ q[5] = -((v >> 2) & 1);
+ q[6] = -((v >> 1) & 1);
+ q[7] = -((v >> 0) & 1);
+ w -= 8;
+ q += 8;
+ }
+ if (w > 0) {
+ v = *p++ ^ xor_mask;
+ do {
+ q[0] = -((v >> 7) & 1);
+ q++;
+ v <<= 1;
+ } while (--w);
}
- d += 2 * dst->linesize[0];
- y1_ptr += 2 * src->linesize[0] - width;
- cb_ptr += src->linesize[1] - width2;
- cr_ptr += src->linesize[2] - width2;
+ p += src_wrap;
+ q += dst_wrap;
}
}
-/* XXX: no chroma interpolating is done */
-static void yuv422p_to_rgb24(AVPicture *dst, AVPicture *src,
- int width, int height)
+static void monowhite_to_gray(AVPicture *dst, AVPicture *src,
+ int width, int height)
{
- UINT8 *y1_ptr, *cb_ptr, *cr_ptr, *d, *d1;
- int w, y, cb, cr, r_add, g_add, b_add, width2;
- UINT8 *cm = cropTbl + MAX_NEG_CROP;
+ mono_to_gray(dst, src, width, height, 0xff);
+}
+
+static void monoblack_to_gray(AVPicture *dst, AVPicture *src,
+ int width, int height)
+{
+ mono_to_gray(dst, src, width, height, 0x00);
+}
+
+static void gray_to_mono(AVPicture *dst, AVPicture *src,
+ int width, int height, int xor_mask)
+{
+ int n;
+ const UINT8 *s;
+ UINT8 *d;
+ int j, b, v, n1, src_wrap, dst_wrap, y;
+
+ s = src->data[0];
+ src_wrap = src->linesize[0] - width;
d = dst->data[0];
- y1_ptr = src->data[0];
- cb_ptr = src->data[1];
- cr_ptr = src->data[2];
- width2 = width >> 1;
- for(;height > 0; height --) {
- d1 = d;
- for(w = width2; w > 0; w --) {
- cb = cb_ptr[0] - 128;
- cr = cr_ptr[0] - 128;
- r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
- g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
- b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
-
- /* output 2 pixels */
- RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
- RGBOUT(d1[3], d1[4], d1[5], y1_ptr[1]);
-
- d1 += 6;
- y1_ptr += 2;
- cb_ptr++;
- cr_ptr++;
+ dst_wrap = dst->linesize[0] - ((width + 7) >> 3);
+ printf("%d %d\n", width, height);
+
+ for(y=0;y<height;y++) {
+ n = width;
+ while (n >= 8) {
+ v = 0;
+ for(j=0;j<8;j++) {
+ b = s[0];
+ s++;
+ v = (v << 1) | (b >> 7);
+ }
+ d[0] = v ^ xor_mask;
+ d++;
+ n -= 8;
+ }
+ if (n > 0) {
+ n1 = n;
+ v = 0;
+ while (n > 0) {
+ b = s[0];
+ s++;
+ v = (v << 1) | (b >> 7);
+ n--;
+ }
+ d[0] = (v << (8 - (n1 & 7))) ^ xor_mask;
+ d++;
}
- d += dst->linesize[0];
- y1_ptr += src->linesize[0] - width;
- cb_ptr += src->linesize[1] - width2;
- cr_ptr += src->linesize[2] - width2;
+ s += src_wrap;
+ d += dst_wrap;
}
}
+static void gray_to_monowhite(AVPicture *dst, AVPicture *src,
+ int width, int height)
+{
+ gray_to_mono(dst, src, width, height, 0xff);
+}
+
+static void gray_to_monoblack(AVPicture *dst, AVPicture *src,
+ int width, int height)
+{
+ gray_to_mono(dst, src, width, height, 0x00);
+}
+
+typedef struct ConvertEntry {
+ void (*convert)(AVPicture *dst, AVPicture *src, int width, int height);
+} ConvertEntry;
+
+/* add each new convertion function in this table */
+/* constraints;
+ - all non YUV modes must convert at least to and from PIX_FMT_RGB24
+*/
+static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = {
+ [PIX_FMT_YUV420P] = {
+ [PIX_FMT_RGB555] = {
+ .convert = yuv420p_to_rgb555
+ },
+ [PIX_FMT_RGB565] = {
+ .convert = yuv420p_to_rgb565
+ },
+ [PIX_FMT_BGR24] = {
+ .convert = yuv420p_to_bgr24
+ },
+ [PIX_FMT_RGB24] = {
+ .convert = yuv420p_to_rgb24
+ },
+ [PIX_FMT_RGBA32] = {
+ .convert = yuv420p_to_rgba32
+ },
+ },
+ [PIX_FMT_YUV422P] = {
+ [PIX_FMT_RGB555] = {
+ .convert = yuv422p_to_rgb555
+ },
+ [PIX_FMT_RGB565] = {
+ .convert = yuv422p_to_rgb565
+ },
+ [PIX_FMT_BGR24] = {
+ .convert = yuv422p_to_bgr24
+ },
+ [PIX_FMT_RGB24] = {
+ .convert = yuv422p_to_rgb24
+ },
+ [PIX_FMT_RGBA32] = {
+ .convert = yuv422p_to_rgba32
+ },
+ },
+ [PIX_FMT_YUV422] = {
+ [PIX_FMT_YUV420P] = {
+ .convert = yuv422_to_yuv420p,
+ },
+ },
+
+ [PIX_FMT_RGB24] = {
+ [PIX_FMT_YUV420P] = {
+ .convert = rgb24_to_yuv420p
+ },
+ [PIX_FMT_RGB565] = {
+ .convert = rgb24_to_rgb565
+ },
+ [PIX_FMT_RGB555] = {
+ .convert = rgb24_to_rgb555
+ },
+ [PIX_FMT_GRAY8] = {
+ .convert = rgb24_to_gray
+ },
+ },
+ [PIX_FMT_RGBA32] = {
+ [PIX_FMT_YUV420P] = {
+ .convert = rgba32_to_yuv420p
+ },
+ [PIX_FMT_GRAY8] = {
+ .convert = rgba32_to_gray
+ },
+ },
+ [PIX_FMT_BGR24] = {
+ [PIX_FMT_YUV420P] = {
+ .convert = bgr24_to_yuv420p
+ },
+ [PIX_FMT_GRAY8] = {
+ .convert = bgr24_to_gray
+ },
+ },
+ [PIX_FMT_RGB555] = {
+ [PIX_FMT_YUV420P] = {
+ .convert = rgb555_to_yuv420p
+ },
+ [PIX_FMT_GRAY8] = {
+ .convert = rgb555_to_gray
+ },
+ },
+ [PIX_FMT_RGB565] = {
+ [PIX_FMT_YUV420P] = {
+ .convert = rgb565_to_yuv420p
+ },
+ [PIX_FMT_GRAY8] = {
+ .convert = rgb565_to_gray
+ },
+ },
+ [PIX_FMT_GRAY8] = {
+ [PIX_FMT_RGB555] = {
+ .convert = gray_to_rgb555
+ },
+ [PIX_FMT_RGB565] = {
+ .convert = gray_to_rgb565
+ },
+ [PIX_FMT_RGB24] = {
+ .convert = gray_to_rgb24
+ },
+ [PIX_FMT_BGR24] = {
+ .convert = gray_to_bgr24
+ },
+ [PIX_FMT_RGBA32] = {
+ .convert = gray_to_rgba32
+ },
+ [PIX_FMT_MONOWHITE] = {
+ .convert = gray_to_monowhite
+ },
+ [PIX_FMT_MONOBLACK] = {
+ .convert = gray_to_monoblack
+ },
+ },
+ [PIX_FMT_MONOWHITE] = {
+ [PIX_FMT_GRAY8] = {
+ .convert = monowhite_to_gray
+ },
+ },
+ [PIX_FMT_MONOBLACK] = {
+ [PIX_FMT_GRAY8] = {
+ .convert = monoblack_to_gray
+ },
+ },
+};
+
+static int avpicture_alloc(AVPicture *picture,
+ int pix_fmt, int width, int height)
+{
+ int size;
+ void *ptr;
+
+ size = avpicture_get_size(pix_fmt, width, height);
+ if (size < 0)
+ goto fail;
+ ptr = av_malloc(size);
+ if (!ptr)
+ goto fail;
+ avpicture_fill(picture, ptr, pix_fmt, width, height);
+ return 0;
+ fail:
+ memset(picture, 0, sizeof(AVPicture));
+ return -1;
+}
+
+static void avpicture_free(AVPicture *picture)
+{
+ av_free(picture->data[0]);
+}
+
/* XXX: always use linesize. Return -1 if not supported */
int img_convert(AVPicture *dst, int dst_pix_fmt,
- AVPicture *src, int pix_fmt,
- int width, int height)
+ AVPicture *src, int src_pix_fmt,
+ int src_width, int src_height)
{
- int i;
+ int i, ret, dst_width, dst_height, int_pix_fmt;
+ PixFmtInfo *src_pix, *dst_pix;
+ ConvertEntry *ce;
+ AVPicture tmp1, *tmp = &tmp1;
- if (dst_pix_fmt == pix_fmt) {
- switch(pix_fmt) {
- case PIX_FMT_YUV420P:
- for(i=0;i<3;i++) {
- if (i == 1) {
- width >>= 1;
- height >>= 1;
- }
- img_copy(dst->data[i], dst->linesize[i],
- src->data[i], src->linesize[i],
- width, height);
+ if (src_pix_fmt < 0 || src_pix_fmt >= PIX_FMT_NB ||
+ dst_pix_fmt < 0 || dst_pix_fmt >= PIX_FMT_NB)
+ return -1;
+ if (src_width <= 0 || src_height <= 0)
+ return 0;
+
+ dst_width = src_width;
+ dst_height = src_height;
+
+ dst_pix = &pix_fmt_info[dst_pix_fmt];
+ src_pix = &pix_fmt_info[src_pix_fmt];
+ if (src_pix_fmt == dst_pix_fmt) {
+ /* XXX: incorrect */
+ /* same format: just copy */
+ for(i = 0; i < dst_pix->nb_components; i++) {
+ int w, h;
+ w = dst_width;
+ h = dst_height;
+ if (dst_pix->is_yuv && (i == 1 || i == 2)) {
+ w >>= dst_pix->x_chroma_shift;
+ h >>= dst_pix->y_chroma_shift;
}
- break;
- default:
- return -1;
+ img_copy(dst->data[i], dst->linesize[i],
+ src->data[i], src->linesize[i],
+ w, h);
}
- } else if (dst_pix_fmt == PIX_FMT_YUV420P) {
-
- switch(pix_fmt) {
- case PIX_FMT_YUV411P:
- img_copy(dst->data[0], dst->linesize[0],
- src->data[0], src->linesize[0],
- width, height);
- conv411(dst->data[1], dst->linesize[1],
- src->data[1], src->linesize[1],
- width / 4, height);
- conv411(dst->data[2], dst->linesize[2],
- src->data[2], src->linesize[2],
- width / 4, height);
- break;
- case PIX_FMT_YUV410P:
- img_copy(dst->data[0], dst->linesize[0],
- src->data[0], src->linesize[0],
- width, height);
- grow22(dst->data[1], dst->linesize[1],
- src->data[1], src->linesize[1],
- width/2, height/2);
- grow22(dst->data[2], dst->linesize[2],
- src->data[2], src->linesize[2],
- width/2, height/2);
- break;
- case PIX_FMT_YUV420P:
- for(i=0;i<3;i++) {
- img_copy(dst->data[i], dst->linesize[i],
- src->data[i], src->linesize[i],
- width, height);
- }
- break;
- case PIX_FMT_YUV422P:
- img_copy(dst->data[0], dst->linesize[0],
- src->data[0], src->linesize[0],
- width, height);
- width >>= 1;
- height >>= 1;
- for(i=1;i<3;i++) {
- shrink2(dst->data[i], dst->linesize[i],
- src->data[i], src->linesize[i],
- width, height);
- }
- break;
- case PIX_FMT_YUV444P:
- img_copy(dst->data[0], dst->linesize[0],
- src->data[0], src->linesize[0],
- width, height);
- width >>= 1;
- height >>= 1;
- for(i=1;i<3;i++) {
- shrink22(dst->data[i], dst->linesize[i],
- src->data[i], src->linesize[i],
- width, height);
+ return 0;
+ }
+
+ ce = &convert_table[src_pix_fmt][dst_pix_fmt];
+ if (ce->convert) {
+ /* specific convertion routine */
+ ce->convert(dst, src, dst_width, dst_height);
+ return 0;
+ }
+
+ /* gray to YUV */
+ if (dst_pix->is_yuv && src_pix_fmt == PIX_FMT_GRAY8) {
+ int w, h, y;
+ uint8_t *d;
+
+ img_copy(dst->data[0], dst->linesize[0],
+ src->data[0], src->linesize[0],
+ dst_width, dst_height);
+ /* fill U and V with 128 */
+ w = dst_width;
+ h = dst_height;
+ w >>= dst_pix->x_chroma_shift;
+ h >>= dst_pix->y_chroma_shift;
+ for(i = 1; i <= 2; i++) {
+ d = dst->data[i];
+ for(y = 0; y< h; y++) {
+ memset(d, 128, w);
+ d += dst->linesize[i];
}
- break;
- case PIX_FMT_YUV422:
- yuv422_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
- src->data[0], width, height);
- break;
- case PIX_FMT_RGB24:
- rgb24_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
- src->data[0], width, height);
- break;
- case PIX_FMT_RGBA32:
- rgba32_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
- src->data[0], width, height);
- break;
- case PIX_FMT_BGR24:
- bgr24_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
- src->data[0], width, height);
- break;
- case PIX_FMT_BGRA32:
- bgra32_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
- src->data[0], width, height);
- break;
- case PIX_FMT_RGB565:
- rgb565_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
- src->data[0], width, height);
- break;
- case PIX_FMT_RGB555:
- rgb555_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
- src->data[0], width, height);
- break;
-/* case PIX_FMT_RGB5551:
- rgb5551_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
- src->data[0], width, height);
- break;*/
- case PIX_FMT_BGR565:
- bgr565_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
- src->data[0], width, height);
- break;
- case PIX_FMT_BGR555:
- bgr555_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
- src->data[0], width, height);
- break;
-/* case PIX_FMT_GBR565:
- gbr565_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
- src->data[0], width, height);
- break;
- case PIX_FMT_GBR555:
- gbr555_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
- src->data[0], width, height);
- break;*/
- default:
- return -1;
- }
- } else if (dst_pix_fmt == PIX_FMT_RGB24) {
- switch(pix_fmt) {
- case PIX_FMT_YUV420P:
- yuv420p_to_rgb24(dst, src, width, height);
- break;
- case PIX_FMT_YUV422P:
- yuv422p_to_rgb24(dst, src, width, height);
- break;
- default:
- return -1;
- }
- } else if (dst_pix_fmt == PIX_FMT_RGBA32) {
- switch(pix_fmt) {
- case PIX_FMT_YUV420P:
- yuv420p_to_rgba32(dst, src, width, height);
- break;
- default:
- return -1;
}
- } else if (dst_pix_fmt == PIX_FMT_BGRA32) {
- switch(pix_fmt) {
- case PIX_FMT_YUV420P:
- yuv420p_to_bgra32(dst, src, width, height);
- break;
- default:
+ return 0;
+ }
+
+ /* YUV to gray */
+ if (src_pix->is_yuv && dst_pix_fmt == PIX_FMT_GRAY8) {
+ img_copy(dst->data[0], dst->linesize[0],
+ src->data[0], src->linesize[0],
+ dst_width, dst_height);
+ return 0;
+ }
+
+ /* YUV to YUV */
+ if (dst_pix->is_yuv && src_pix->is_yuv) {
+ int x_shift, y_shift, w, h;
+ void (*resize_func)(UINT8 *dst, int dst_wrap,
+ UINT8 *src, int src_wrap,
+ int width, int height);
+
+ /* compute chroma size of the smallest dimensions */
+ w = dst_width;
+ h = dst_height;
+ if (dst_pix->x_chroma_shift >= src_pix->x_chroma_shift)
+ w >>= dst_pix->x_chroma_shift;
+ else
+ w >>= src_pix->x_chroma_shift;
+ if (dst_pix->y_chroma_shift >= src_pix->y_chroma_shift)
+ h >>= dst_pix->y_chroma_shift;
+ else
+ h >>= src_pix->y_chroma_shift;
+
+ x_shift = (dst_pix->x_chroma_shift - src_pix->x_chroma_shift);
+ y_shift = (dst_pix->y_chroma_shift - src_pix->y_chroma_shift);
+ if (x_shift == 0 && y_shift == 0) {
+ resize_func = img_copy; /* should never happen */
+ } else if (x_shift == 0 && y_shift == 1) {
+ resize_func = shrink2;
+ } else if (x_shift == 1 && y_shift == 1) {
+ resize_func = shrink22;
+ } else if (x_shift == -1 && y_shift == -1) {
+ resize_func = grow22;
+ } else if (x_shift == -1 && y_shift == 1) {
+ resize_func = conv411;
+ } else {
+ /* currently not handled */
return -1;
}
+
+ img_copy(dst->data[0], dst->linesize[0],
+ src->data[0], src->linesize[0],
+ dst_width, dst_height);
+
+ for(i = 1;i <= 2; i++)
+ resize_func(dst->data[i], dst->linesize[i],
+ src->data[i], src->linesize[i],
+ w, h);
+ return 0;
+ }
+
+ /* try to use an intermediate format */
+ if (src_pix_fmt == PIX_FMT_MONOWHITE ||
+ src_pix_fmt == PIX_FMT_MONOBLACK ||
+ dst_pix_fmt == PIX_FMT_MONOWHITE ||
+ dst_pix_fmt == PIX_FMT_MONOBLACK) {
+ int_pix_fmt = PIX_FMT_GRAY8;
} else {
- return -1;
+ int_pix_fmt = PIX_FMT_RGB24;
}
- return 0;
+ if (avpicture_alloc(tmp, int_pix_fmt, dst_width, dst_height) < 0)
+ return -1;
+ ret = -1;
+ if (img_convert(tmp, int_pix_fmt,
+ src, src_pix_fmt, src_width, src_height) < 0)
+ goto fail1;
+ if (img_convert(dst, dst_pix_fmt,
+ tmp, int_pix_fmt, dst_width, dst_height) < 0)
+ goto fail1;
+ ret = 0;
+ fail1:
+ avpicture_free(tmp);
+ return ret;
}
@@ -948,6 +1294,15 @@ static void deinterlace_line(UINT8 *dst, UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lu
}
#else
+ {
+ mmx_t rounder;
+ rounder.uw[0]=4;
+ rounder.uw[1]=4;
+ rounder.uw[2]=4;
+ rounder.uw[3]=4;
+ pxor_r2r(mm7,mm7);
+ movq_m2r(rounder,mm6);
+ }
for (;size > 3; size-=4) {
DEINT_LINE_LUM
lum_m4+=4;
@@ -982,6 +1337,15 @@ static void deinterlace_line_inplace(UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2
}
#else
+ {
+ mmx_t rounder;
+ rounder.uw[0]=4;
+ rounder.uw[1]=4;
+ rounder.uw[2]=4;
+ rounder.uw[3]=4;
+ pxor_r2r(mm7,mm7);
+ movq_m2r(rounder,mm6);
+ }
for (;size > 3; size-=4) {
DEINT_INPLACE_LINE_LUM
lum_m4+=4;
@@ -1064,19 +1428,6 @@ int avpicture_deinterlace(AVPicture *dst, AVPicture *src,
if ((width & 3) != 0 || (height & 3) != 0)
return -1;
-#ifdef HAVE_MMX
- {
- mmx_t rounder;
- rounder.uw[0]=4;
- rounder.uw[1]=4;
- rounder.uw[2]=4;
- rounder.uw[3]=4;
- pxor_r2r(mm7,mm7);
- movq_m2r(rounder,mm6);
- }
-#endif
-
-
for(i=0;i<3;i++) {
if (i == 1) {
switch(pix_fmt) {
diff --git a/src/libffmpeg/libavcodec/mem.c b/src/libffmpeg/libavcodec/mem.c
index a9b5e0afa..a36952fd7 100644
--- a/src/libffmpeg/libavcodec/mem.c
+++ b/src/libffmpeg/libavcodec/mem.c
@@ -17,6 +17,12 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "avcodec.h"
+
+/* here we can use OS dependant allocation functions */
+#undef malloc
+#undef free
+#undef realloc
+
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
@@ -25,10 +31,15 @@
memory allocator. You do not need to suppress this file because the
linker will do it automatically */
-/* memory alloc */
+/**
+ * Memory allocation of size byte with alignment suitable for all
+ * memory accesses (including vectors if available on the
+ * CPU). av_malloc(0) must return a non NULL pointer.
+ */
void *av_malloc(unsigned int size)
{
void *ptr;
+
#if defined (HAVE_MEMALIGN)
ptr = memalign(16,size);
/* Why 64?
@@ -60,14 +71,19 @@ void *av_malloc(unsigned int size)
#else
ptr = malloc(size);
#endif
- if (!ptr)
- return NULL;
-//fprintf(stderr, "%X %d\n", (int)ptr, size);
- /* NOTE: this memset should not be present */
- memset(ptr, 0, size);
return ptr;
}
+/**
+ * av_realloc semantics (same as glibc): if ptr is NULL and size > 0,
+ * identical to malloc(size). If size is zero, it is identical to
+ * free(ptr) and NULL is returned.
+ */
+void *av_realloc(void *ptr, unsigned int size)
+{
+ return realloc(ptr, size);
+}
+
/* NOTE: ptr = NULL is explicetly allowed */
void av_free(void *ptr)
{
diff --git a/src/libffmpeg/libavcodec/mjpeg.c b/src/libffmpeg/libavcodec/mjpeg.c
index 9b4943582..9617816bb 100644
--- a/src/libffmpeg/libavcodec/mjpeg.c
+++ b/src/libffmpeg/libavcodec/mjpeg.c
@@ -735,7 +735,7 @@ static int mjpeg_decode_init(AVCodecContext *avctx)
if (avctx->flags & CODEC_FLAG_EXTERN_HUFF)
{
printf("mjpeg: using external huffman table\n");
- init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size);
+ init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8);
mjpeg_decode_dht(s);
/* should check for error - but dunno */
}
@@ -1404,13 +1404,13 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
break;
}
}
- init_get_bits(&s->gb, s->buffer, dst - s->buffer);
+ init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8);
dprintf("escaping removed %d bytes\n",
(buf_end - buf_ptr) - (dst - s->buffer));
}
else
- init_get_bits(&s->gb, buf_ptr, buf_end - buf_ptr);
+ init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8);
s->start_code = start_code;
@@ -1548,7 +1548,7 @@ read_header:
/* reset on every SOI */
s->restart_interval = 0;
- init_get_bits(&hgb, buf_ptr, /*buf_size*/buf_end - buf_ptr);
+ init_get_bits(&hgb, buf_ptr, /*buf_size*/(buf_end - buf_ptr)*8);
skip_bits(&hgb, 32); /* reserved zeros */
@@ -1570,7 +1570,7 @@ read_header:
dprintf("dqt offs: 0x%x\n", dqt_offs);
if (dqt_offs)
{
- init_get_bits(&s->gb, buf+dqt_offs, buf_end - (buf+dqt_offs));
+ init_get_bits(&s->gb, buf+dqt_offs, (buf_end - (buf+dqt_offs))*8);
s->start_code = DQT;
mjpeg_decode_dqt(s);
}
@@ -1579,7 +1579,7 @@ read_header:
dprintf("dht offs: 0x%x\n", dht_offs);
if (dht_offs)
{
- init_get_bits(&s->gb, buf+dht_offs, buf_end - (buf+dht_offs));
+ init_get_bits(&s->gb, buf+dht_offs, (buf_end - (buf+dht_offs))*8);
s->start_code = DHT;
mjpeg_decode_dht(s);
}
@@ -1588,7 +1588,7 @@ read_header:
dprintf("sof offs: 0x%x\n", sof_offs);
if (sof_offs)
{
- init_get_bits(&s->gb, buf+sof_offs, buf_end - (buf+sof_offs));
+ init_get_bits(&s->gb, buf+sof_offs, (buf_end - (buf+sof_offs))*8);
s->start_code = SOF0;
if (mjpeg_decode_sof0(s) < 0)
return -1;
@@ -1598,8 +1598,8 @@ read_header:
dprintf("sos offs: 0x%x\n", sos_offs);
if (sos_offs)
{
-// init_get_bits(&s->gb, buf+sos_offs, buf_end - (buf+sos_offs));
- init_get_bits(&s->gb, buf+sos_offs, field_size);
+// init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8);
+ init_get_bits(&s->gb, buf+sos_offs, field_size*8);
s->start_code = SOS;
mjpeg_decode_sos(s);
}
diff --git a/src/libffmpeg/libavcodec/motion_est.c b/src/libffmpeg/libavcodec/motion_est.c
index 8310db8d5..e4b67b22f 100644
--- a/src/libffmpeg/libavcodec/motion_est.c
+++ b/src/libffmpeg/libavcodec/motion_est.c
@@ -1,7 +1,7 @@
/*
* Motion estimation
* Copyright (c) 2000,2001 Fabrice Bellard.
- * Copyright (c) 2002 Michael Niedermayer
+ * Copyright (c) 2002-2003 Michael Niedermayer
*
*
* This library is free software; you can redistribute it and/or
@@ -286,6 +286,14 @@ static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){
cmp[0]= c->quant_psnr[0];
cmp[1]= c->quant_psnr[1];
break;
+ case FF_CMP_BIT:
+ cmp[0]= c->bit[0];
+ cmp[1]= c->bit[1];
+ break;
+ case FF_CMP_RD:
+ cmp[0]= c->rd[0];
+ cmp[1]= c->rd[1];
+ break;
case FF_CMP_ZERO:
for(i=0; i<7; i++){
cmp[i]= zero_cmp;
@@ -294,19 +302,24 @@ static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){
default:
fprintf(stderr,"internal error in cmp function selection\n");
}
-};
+}
static inline int get_penalty_factor(MpegEncContext *s, int type){
-
- switch(type){
+ switch(type&0xFF){
default:
case FF_CMP_SAD:
- return s->qscale;
- case FF_CMP_SSE:
-// return s->qscale*8;
+ return s->qscale*2;
case FF_CMP_DCT:
+ return s->qscale*3;
case FF_CMP_SATD:
- return s->qscale*8;
+ return s->qscale*6;
+ case FF_CMP_SSE:
+ return s->qscale*s->qscale*2;
+ case FF_CMP_BIT:
+ return 1;
+ case FF_CMP_RD:
+ case FF_CMP_PSNR:
+ return (s->qscale*s->qscale*185 + 64)>>7;
}
}
@@ -324,7 +337,9 @@ void ff_init_me(MpegEncContext *s){
}else{
if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
s->me.sub_motion_search= simple_chroma_hpel_motion_search;
- else if(s->avctx->me_sub_cmp == FF_CMP_SAD && s->avctx->me_cmp == FF_CMP_SAD)
+ else if( s->avctx->me_sub_cmp == FF_CMP_SAD
+ && s->avctx-> me_cmp == FF_CMP_SAD
+ && s->avctx-> mb_cmp == FF_CMP_SAD)
s->me.sub_motion_search= sad_hpel_motion_search;
else
s->me.sub_motion_search= simple_hpel_motion_search;
@@ -343,6 +358,18 @@ void ff_init_me(MpegEncContext *s){
}else{
s->me.pre_motion_search= simple_epzs_motion_search;
}
+
+ if(s->flags&CODEC_FLAG_QPEL){
+ if(s->avctx->mb_cmp&FF_CMP_CHROMA)
+ s->me.get_mb_score= simple_chroma_qpel_get_mb_score;
+ else
+ s->me.get_mb_score= simple_qpel_get_mb_score;
+ }else{
+ if(s->avctx->mb_cmp&FF_CMP_CHROMA)
+ s->me.get_mb_score= simple_chroma_hpel_get_mb_score;
+ else
+ s->me.get_mb_score= simple_hpel_get_mb_score;
+ }
}
static int pix_dev(UINT8 * pix, int line_size, int mean)
@@ -776,12 +803,11 @@ static inline void get_limits(MpegEncContext *s, int *range, int *xmin, int *ymi
}
}
-static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift)
+static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift)
{
int block;
int P[10][2];
- uint8_t *ref_picture= s->last_picture.data[0];
- int dmin_sum=0;
+ int dmin_sum=0, mx4_sum=0, my4_sum=0;
uint16_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
for(block=0; block<4; block++){
@@ -826,13 +852,15 @@ static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, in
P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
- if(s->out_format == FMT_H263){
+// if(s->out_format == FMT_H263){
pred_x4 = P_MEDIAN[0];
pred_y4 = P_MEDIAN[1];
+#if 0
}else { /* mpeg1 at least */
pred_x4= P_LEFT[0];
pred_y4= P_LEFT[1];
}
+#endif
}
P_MV1[0]= mx;
P_MV1[1]= my;
@@ -842,12 +870,80 @@ static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, in
dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4,
pred_x4, pred_y4, &s->last_picture, block, 1, mv_penalty);
-
+
+ if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
+ int dxy;
+ const int offset= ((block&1) + (block>>1)*s->linesize)*8;
+ uint8_t *dest_y = s->me.scratchpad + offset;
+
+ if(s->quarter_sample){
+ uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>2)) + (s->mb_y*16 + (my4>>2))*s->linesize + offset;
+ dxy = ((my4 & 3) << 2) | (mx4 & 3);
+
+ if(s->no_rounding)
+ s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , s->linesize);
+ else
+ s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , s->linesize);
+ }else{
+ uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>1)) + (s->mb_y*16 + (my4>>1))*s->linesize + offset;
+ dxy = ((my4 & 1) << 1) | (mx4 & 1);
+
+ if(s->no_rounding)
+ s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y , ref , s->linesize, 8);
+ else
+ s->dsp.put_pixels_tab [1][dxy](dest_y , ref , s->linesize, 8);
+ }
+ dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*s->me.mb_penalty_factor;
+ }else
+ dmin_sum+= dmin4;
+
+ if(s->quarter_sample){
+ mx4_sum+= mx4/2;
+ my4_sum+= my4/2;
+ }else{
+ mx4_sum+= mx4;
+ my4_sum+= my4;
+ }
+
s->motion_val[ s->block_index[block] ][0]= mx4;
s->motion_val[ s->block_index[block] ][1]= my4;
- dmin_sum+= dmin4;
}
- return dmin_sum;
+
+ if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
+ dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*s->linesize, s->me.scratchpad, s->linesize);
+ }
+
+ if(s->avctx->mb_cmp&FF_CMP_CHROMA){
+ int dxy;
+ int mx, my;
+ int offset;
+
+ mx= ff_h263_round_chroma(mx4_sum);
+ my= ff_h263_round_chroma(my4_sum);
+ dxy = ((my & 1) << 1) | (mx & 1);
+
+ offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize;
+
+ if(s->no_rounding){
+ s->dsp.put_no_rnd_pixels_tab[1][dxy](s->me.scratchpad , s->last_picture.data[1] + offset, s->uvlinesize, 8);
+ s->dsp.put_no_rnd_pixels_tab[1][dxy](s->me.scratchpad+8 , s->last_picture.data[2] + offset, s->uvlinesize, 8);
+ }else{
+ s->dsp.put_pixels_tab [1][dxy](s->me.scratchpad , s->last_picture.data[1] + offset, s->uvlinesize, 8);
+ s->dsp.put_pixels_tab [1][dxy](s->me.scratchpad+8 , s->last_picture.data[2] + offset, s->uvlinesize, 8);
+ }
+
+ dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad , s->uvlinesize);
+ dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize);
+ }
+
+ switch(s->avctx->mb_cmp&0xFF){
+ /*case FF_CMP_SSE:
+ return dmin_sum+ 32*s->qscale*s->qscale;*/
+ case FF_CMP_RD:
+ return dmin_sum;
+ default:
+ return dmin_sum+ 11*s->me.mb_penalty_factor;
+ }
}
void ff_estimate_p_frame_motion(MpegEncContext * s,
@@ -869,6 +965,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp);
s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
+ s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp);
get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, s->f_code);
rel_xmin= xmin - mb_x*16;
@@ -959,6 +1056,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
pic->mb_var [s->mb_width * mb_y + mb_x] = varc;
pic->mc_mb_var[s->mb_width * mb_y + mb_x] = vard;
pic->mb_mean [s->mb_width * mb_y + mb_x] = (sum+128)>>8;
+// pic->mb_cmp_score[s->mb_width * mb_y + mb_x] = dmin;
pic->mb_var_sum += varc;
pic->mc_mb_var_sum += vard;
//printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
@@ -985,44 +1083,36 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
}
if((s->flags&CODEC_FLAG_4MV)
&& !s->me.skip && varc>50 && vard>10){
- mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
+ h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
mb_type|=MB_TYPE_INTER4V;
set_p_mv_tables(s, mx, my, 0);
}else
set_p_mv_tables(s, mx, my, 1);
}else{
- if (vard <= 64 || vard < varc) {
-// if (sadP <= 32 || sadP < sadI + 500) {
- s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
- mb_type|= MB_TYPE_INTER;
- if (s->me_method != ME_ZERO) {
- dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
- pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
- if((s->flags&CODEC_FLAG_4MV)
- && !s->me.skip && varc>50 && vard>10){
- int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
- if(dmin4 + 128 <dmin)
- mb_type= MB_TYPE_INTER4V;
- }
- set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V);
+ mb_type= MB_TYPE_INTER;
- } else {
- mx <<=shift;
- my <<=shift;
- }
-#if 0
- if (vard < 10) {
- skip++;
- fprintf(stderr,"\nEarly skip: %d vard: %2d varc: %5d dmin: %d",
- skip, vard, varc, dmin);
+ dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+ pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
+
+ if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
+ dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, &s->last_picture, mv_penalty);
+
+ if((s->flags&CODEC_FLAG_4MV)
+ && !s->me.skip && varc>50 && vard>10){
+ int dmin4= h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
+ if(dmin4 < dmin){
+ mb_type= MB_TYPE_INTER4V;
+ dmin=dmin4;
}
-#endif
+ }
+ pic->mb_cmp_score[s->mb_width * mb_y + mb_x] = dmin;
+ set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V);
+
+ if (vard <= 64 || vard < varc) {
+ s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
}else{
- s->scene_change_score+= 20;
- mb_type|= MB_TYPE_INTRA;
- mx = 0;
- my = 0;
+ s->scene_change_score+= s->qscale;
}
}
@@ -1105,6 +1195,7 @@ int ff_estimate_motion_b(MpegEncContext * s,
s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp);
s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
+ s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp);
get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, f_code);
rel_xmin= xmin - mb_x*16;
@@ -1174,6 +1265,10 @@ int ff_estimate_motion_b(MpegEncContext * s,
dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
pred_x, pred_y, picture, 0, 0, mv_penalty);
+
+ if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
+ dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, picture, mv_penalty);
+
//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
// s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
mv_table[mot_xy][0]= mx;
@@ -1237,10 +1332,14 @@ static inline int check_bidir_mv(MpegEncContext * s,
s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
}
- fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.sub_penalty_factor
- +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.sub_penalty_factor;
- + s->dsp.me_sub_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
-
+ fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.mb_penalty_factor
+ +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.mb_penalty_factor
+ + s->dsp.mb_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
+
+ if(s->avctx->mb_cmp&FF_CMP_CHROMA){
+ }
+ //FIXME CHROMA !!!
+
return fbmin;
}
@@ -1344,17 +1443,24 @@ static inline int direct_search(MpegEncContext * s,
P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
}
-
+
+ //FIXME direct_search ptr in context!!! (needed for chroma anyway or this will get messy)
if(s->flags&CODEC_FLAG_QPEL){
dmin = simple_direct_qpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax,
&s->last_picture, mv_table, 1<<14, mv_penalty);
dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
0, 0, &s->last_picture, 0, 0, mv_penalty);
+
+ if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
+ dmin= simple_direct_qpel_qpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty);
}else{
dmin = simple_direct_hpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax,
&s->last_picture, mv_table, 1<<15, mv_penalty);
dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
0, 0, &s->last_picture, 0, 0, mv_penalty);
+
+ if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
+ dmin= simple_direct_hpel_hpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty);
}
s->b_direct_mv_table[mot_xy][0]= mx;
@@ -1365,18 +1471,18 @@ static inline int direct_search(MpegEncContext * s,
void ff_estimate_b_frame_motion(MpegEncContext * s,
int mb_x, int mb_y)
{
- const int penalty_factor= s->me.penalty_factor;
+ const int penalty_factor= s->me.mb_penalty_factor;
int fmin, bmin, dmin, fbmin;
int type=0;
dmin= direct_search(s, mb_x, mb_y);
- fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code);
- bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) - penalty_factor;
+ fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code) + 3*penalty_factor;
+ bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) + 2*penalty_factor;
//printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
- fbmin= bidir_refine(s, mb_x, mb_y);
-
+ fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
+//printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin);
{
int score= dmin;
type=MB_TYPE_DIRECT;
@@ -1393,9 +1499,10 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
score=fbmin;
type= MB_TYPE_BIDIR;
}
+
score= ((unsigned)(score*score + 128*256))>>16;
s->current_picture.mc_mb_var_sum += score;
- s->current_picture.mc_mb_var[mb_y*s->mb_width + mb_x] = score; //FIXME use SSD
+ s->current_picture.mc_mb_var[mb_y*s->mb_width + mb_x] = score; //FIXME use SSE
}
if(s->flags&CODEC_FLAG_HQ){
diff --git a/src/libffmpeg/libavcodec/motion_est_template.c b/src/libffmpeg/libavcodec/motion_est_template.c
index d1ca6e7fb..4725ed994 100644
--- a/src/libffmpeg/libavcodec/motion_est_template.c
+++ b/src/libffmpeg/libavcodec/motion_est_template.c
@@ -39,7 +39,7 @@
qpel_mc_func (*qpel_put)[16];\
qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\
const __attribute__((unused)) int unu= time_pp + time_pb + (int)src_u + (int)src_v + (int)ref_u + (int)ref_v\
- + (int)ref2_y + (int)hpel_avg + (int)qpel_avg;\
+ + (int)ref2_y + (int)hpel_avg + (int)qpel_avg + (int)score_map;\
if(s->no_rounding /*FIXME b_type*/){\
hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\
chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\
@@ -144,6 +144,7 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s,
const int my = *my_ptr;
const int penalty_factor= s->me.sub_penalty_factor;
me_cmp_func cmp_sub, chroma_cmp_sub;
+ int bx=2*mx, by=2*my;
LOAD_COMMON(xx, yy);
@@ -160,13 +161,12 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s,
if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
CMP_HPEL(dmin, 0, 0, mx, my, size);
- if(mx || my)
+ if(mx || my || size>0)
dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
}
if (mx > xmin && mx < xmax &&
my > ymin && my < ymax) {
- int bx=2*mx, by=2*my;
int d= dmin;
const int index= (my<<ME_MAP_SHIFT) + mx;
const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
@@ -178,7 +178,7 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s,
const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
+ (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*s->me.penalty_factor;
-#if 0
+#if 1
int key;
int map_generation= s->me.map_generation;
uint32_t *map= s->me.map;
@@ -231,20 +231,50 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s,
CHECK_HALF_MV(0, 1, mx , my)
}
assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
-
- *mx_ptr = bx;
- *my_ptr = by;
- }else{
- *mx_ptr =2*mx;
- *my_ptr =2*my;
}
+ *mx_ptr = bx;
+ *my_ptr = by;
+
return dmin;
}
#endif
+static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture,
+ uint16_t * const mv_penalty)
+{
+// const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
+ const int size= 0;
+ const int xx = 16 * s->mb_x;
+ const int yy = 16 * s->mb_y;
+ const int penalty_factor= s->me.mb_penalty_factor;
+ const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these
+ const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit
+ me_cmp_func cmp_sub, chroma_cmp_sub;
+ int d;
+
+ LOAD_COMMON(xx, yy);
+
+ //FIXME factorize
+
+ cmp_sub= s->dsp.mb_cmp[size];
+ chroma_cmp_sub= s->dsp.mb_cmp[size+1];
+
+ assert(!s->me.skip);
+ assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp);
+
+ CMP_HPEL(d, mx&1, my&1, mx>>1, my>>1, size);
+ //FIXME check cbp before adding penalty for (0,0) vector
+ if(mx || my || size>0)
+ d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
+
+ return d;
+}
+
#endif /* CMP_HPEL */
+
+
#ifdef CMP_QPEL
#define CHECK_QUARTER_MV(dx, dy, x, y)\
@@ -290,7 +320,7 @@ static int RENAME(qpel_motion_search)(MpegEncContext * s,
if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
CMP_QPEL(dmin, 0, 0, mx, my, size);
- if(mx || my)
+ if(mx || my || size>0)
dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
}
@@ -477,6 +507,37 @@ static int RENAME(qpel_motion_search)(MpegEncContext * s,
return dmin;
}
+static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture,
+ uint16_t * const mv_penalty)
+{
+ const int size= 0;
+ const int xx = 16 * s->mb_x;
+ const int yy = 16 * s->mb_y;
+ const int penalty_factor= s->me.mb_penalty_factor;
+ const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these
+ const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit
+ me_cmp_func cmp_sub, chroma_cmp_sub;
+ int d;
+
+ LOAD_COMMON(xx, yy);
+
+ //FIXME factorize
+
+ cmp_sub= s->dsp.mb_cmp[size];
+ chroma_cmp_sub= s->dsp.mb_cmp[size+1];
+
+ assert(!s->me.skip);
+ assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp);
+
+ CMP_QPEL(d, mx&3, my&3, mx>>2, my>>2, size);
+ //FIXME check cbp before adding penalty for (0,0) vector
+ if(mx || my || size>0)
+ d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
+
+ return d;
+}
+
+
#endif /* CMP_QPEL */
#define CHECK_MV(x,y)\
diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c
index 10abf1024..fecb097bd 100644
--- a/src/libffmpeg/libavcodec/mpeg12.c
+++ b/src/libffmpeg/libavcodec/mpeg12.c
@@ -504,7 +504,7 @@ static void mpeg1_encode_motion(MpegEncContext *s, int val)
void ff_mpeg1_encode_init(MpegEncContext *s)
{
-#ifdef CONFIG_ENCODERS
+#if 0
static int done=0;
common_init(s);
@@ -769,6 +769,8 @@ static int mpeg_decode_mb(MpegEncContext *s,
dprintf("decode_mb: x=%d y=%d\n", s->mb_x, s->mb_y);
+ assert(s->mb_skiped==0);
+
if (--s->mb_incr != 0) {
/* skip mb */
s->mb_intra = 0;
@@ -781,15 +783,18 @@ static int mpeg_decode_mb(MpegEncContext *s,
s->mv[0][0][0] = s->mv[0][0][1] = 0;
s->last_mv[0][0][0] = s->last_mv[0][0][1] = 0;
s->last_mv[0][1][0] = s->last_mv[0][1][1] = 0;
+ s->mb_skiped = 1;
} else {
/* if B type, reuse previous vectors and directions */
s->mv[0][0][0] = s->last_mv[0][0][0];
s->mv[0][0][1] = s->last_mv[0][0][1];
s->mv[1][0][0] = s->last_mv[1][0][0];
s->mv[1][0][1] = s->last_mv[1][0][1];
+
+ if((s->mv[0][0][0]|s->mv[0][0][1]|s->mv[1][0][0]|s->mv[1][0][1])==0)
+ s->mb_skiped = 1;
}
- s->mb_skiped = 1;
return 0;
}
@@ -1464,7 +1469,7 @@ static int mpeg1_decode_picture(AVCodecContext *avctx,
MpegEncContext *s = &s1->mpeg_enc_ctx;
int ref, f_code;
- init_get_bits(&s->gb, buf, buf_size);
+ init_get_bits(&s->gb, buf, buf_size*8);
ref = get_bits(&s->gb, 10); /* temporal ref */
s->pict_type = get_bits(&s->gb, 3);
@@ -1616,7 +1621,7 @@ static void mpeg_decode_extension(AVCodecContext *avctx,
MpegEncContext *s = &s1->mpeg_enc_ctx;
int ext_type;
- init_get_bits(&s->gb, buf, buf_size);
+ init_get_bits(&s->gb, buf, buf_size*8);
ext_type = get_bits(&s->gb, 4);
switch(ext_type) {
@@ -1672,7 +1677,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
return DECODE_SLICE_FATAL_ERROR;
if(s->avctx->debug&FF_DEBUG_PICT_INFO){
- printf("qp:%d fc:%d%d%d%d %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n",
+ printf("qp:%d fc:%2d%2d%2d%2d %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n",
s->qscale, s->mpeg_f_code[0][0],s->mpeg_f_code[0][1],s->mpeg_f_code[1][0],s->mpeg_f_code[1][1],
s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")),
s->progressive_sequence ? "pro" :"", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"",
@@ -1681,7 +1686,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
}
}
- init_get_bits(&s->gb, buf, buf_size);
+ init_get_bits(&s->gb, buf, buf_size*8);
s->qscale = get_qscale(s);
/* extra slice info */
@@ -1790,7 +1795,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
int width, height, i, v, j;
float aspect;
- init_get_bits(&s->gb, buf, buf_size);
+ init_get_bits(&s->gb, buf, buf_size*8);
width = get_bits(&s->gb, 12);
height = get_bits(&s->gb, 12);
diff --git a/src/libffmpeg/libavcodec/mpegaudiodec.c b/src/libffmpeg/libavcodec/mpegaudiodec.c
index b2c0966aa..9a066c905 100644
--- a/src/libffmpeg/libavcodec/mpegaudiodec.c
+++ b/src/libffmpeg/libavcodec/mpegaudiodec.c
@@ -507,7 +507,7 @@ static int decode_init(AVCodecContext * avctx)
return 0;
}
-/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */;
+/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
/* cos(i*pi/64) */
@@ -1460,7 +1460,7 @@ static void seek_to_maindata(MPADecodeContext *s, long backstep)
memcpy(ptr, s->inbuf1[s->inbuf_index ^ 1] +
BACKSTEP_SIZE + s->old_frame_size - backstep, backstep);
/* init get bits again */
- init_get_bits(&s->gb, ptr, s->frame_size + backstep);
+ init_get_bits(&s->gb, ptr, (s->frame_size + backstep)*8);
/* prepare next buffer */
s->inbuf_index ^= 1;
@@ -2280,7 +2280,7 @@ static int mp_decode_frame(MPADecodeContext *s,
short *samples_ptr;
init_get_bits(&s->gb, s->inbuf + HEADER_SIZE,
- s->inbuf_ptr - s->inbuf - HEADER_SIZE);
+ (s->inbuf_ptr - s->inbuf - HEADER_SIZE)*8);
/* skip error protection field */
if (s->error_protection)
diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c
index 8206df470..d721647a5 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.c
+++ b/src/libffmpeg/libavcodec/mpegvideo.c
@@ -20,6 +20,7 @@
*/
#include <ctype.h>
+#include <limits.h>
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
@@ -80,12 +81,15 @@ static const uint8_t simple_mmx_permutation[64]={
};
static const uint8_t h263_chroma_roundtab[16] = {
+// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
};
static UINT16 (*default_mv_penalty)[MAX_MV*2+1]=NULL;
static UINT8 default_fcode_tab[MAX_MV*2+1];
+enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
+
static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
const UINT16 *quant_matrix, int bias, int qmin, int qmax)
{
@@ -230,6 +234,8 @@ int DCT_common_init(MpegEncContext *s)
MPV_common_init_ppc(s);
#endif
+ s->fast_dct_quantize= s->dct_quantize;
+
if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
}
@@ -286,7 +292,7 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
if(r<0 || !pic->age || !pic->type || !pic->data[0]){
- fprintf(stderr, "get_buffer() failed (%d %d %d %X)\n", r, pic->age, pic->type, (int)pic->data[0]);
+ fprintf(stderr, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
return -1;
}
@@ -309,6 +315,7 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
CHECKED_ALLOCZ(pic->mb_var , s->mb_num * sizeof(INT16))
CHECKED_ALLOCZ(pic->mc_mb_var, s->mb_num * sizeof(INT16))
CHECKED_ALLOCZ(pic->mb_mean , s->mb_num * sizeof(INT8))
+ CHECKED_ALLOCZ(pic->mb_cmp_score, s->mb_num * sizeof(int32_t))
}
CHECKED_ALLOCZ(pic->mbskip_table , s->mb_num * sizeof(UINT8)+1) //the +1 is for the slice end check
@@ -316,6 +323,12 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
pic->qstride= s->mb_width;
}
+ //it might be nicer if the application would keep track of these but it would require a API change
+ memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
+ s->prev_pict_types[0]= s->pict_type;
+ if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
+ pic->age= INT_MAX; // skiped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
+
return 0;
fail: //for the CHECKED_ALLOCZ macro
return -1;
@@ -334,6 +347,7 @@ static void free_picture(MpegEncContext *s, Picture *pic){
av_freep(&pic->mb_var);
av_freep(&pic->mc_mb_var);
av_freep(&pic->mb_mean);
+ av_freep(&pic->mb_cmp_score);
av_freep(&pic->mbskip_table);
av_freep(&pic->qscale_table);
@@ -472,6 +486,7 @@ int MPV_common_init(MpegEncContext *s)
/* init macroblock skip table */
CHECKED_ALLOCZ(s->mbskip_table, s->mb_num+1);
//Note the +1 is for a quicker mpeg4 slice_end detection
+ CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
s->block= s->blocks[0];
@@ -511,6 +526,7 @@ void MPV_common_end(MpegEncContext *s)
av_freep(&s->me.score_map);
av_freep(&s->mbskip_table);
+ av_freep(&s->prev_pict_types);
av_freep(&s->bitstream_buffer);
av_freep(&s->tex_pb_buffer);
av_freep(&s->pb2_buffer);
@@ -609,6 +625,7 @@ int MPV_encode_init(AVCodecContext *avctx)
avctx->delay=0;
s->low_delay=1;
break;
+#ifdef CONFIG_RISKY
case CODEC_ID_H263:
if (h263_get_picture_format(s->width, s->height) == 7) {
printf("Input picture size isn't suitable for h263 codec! try h263+\n");
@@ -688,6 +705,7 @@ int MPV_encode_init(AVCodecContext *avctx)
avctx->delay=0;
s->low_delay=1;
break;
+#endif
default:
return -1;
}
@@ -725,24 +743,29 @@ int MPV_encode_init(AVCodecContext *avctx)
ff_init_me(s);
#ifdef CONFIG_ENCODERS
+#ifdef CONFIG_RISKY
if (s->out_format == FMT_H263)
h263_encode_init(s);
- else if (s->out_format == FMT_MPEG1)
- ff_mpeg1_encode_init(s);
if(s->msmpeg4_version)
ff_msmpeg4_encode_init(s);
#endif
+ if (s->out_format == FMT_MPEG1)
+ ff_mpeg1_encode_init(s);
+#endif
/* init default q matrix */
for(i=0;i<64;i++) {
int j= s->idct_permutation[i];
+#ifdef CONFIG_RISKY
if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
}else if(s->out_format == FMT_H263){
s->intra_matrix[j] =
s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
- }else{ /* mpeg1 */
+ }else
+#endif
+ { /* mpeg1 */
s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
}
@@ -787,6 +810,44 @@ int MPV_encode_end(AVCodecContext *avctx)
return 0;
}
+void init_rl(RLTable *rl)
+{
+ INT8 max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
+ UINT8 index_run[MAX_RUN+1];
+ int last, run, level, start, end, i;
+
+ /* compute max_level[], max_run[] and index_run[] */
+ for(last=0;last<2;last++) {
+ if (last == 0) {
+ start = 0;
+ end = rl->last;
+ } else {
+ start = rl->last;
+ end = rl->n;
+ }
+
+ memset(max_level, 0, MAX_RUN + 1);
+ memset(max_run, 0, MAX_LEVEL + 1);
+ memset(index_run, rl->n, MAX_RUN + 1);
+ for(i=start;i<end;i++) {
+ run = rl->table_run[i];
+ level = rl->table_level[i];
+ if (index_run[run] == rl->n)
+ index_run[run] = i;
+ if (level > max_level[run])
+ max_level[run] = level;
+ if (run > max_run[level])
+ max_run[level] = run;
+ }
+ rl->max_level[last] = av_malloc(MAX_RUN + 1);
+ memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
+ rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
+ memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
+ rl->index_run[last] = av_malloc(MAX_RUN + 1);
+ memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
+ }
+}
+
/* draw the edges of width 'w' of an image of size width, height */
//FIXME check that this is ok for mpeg4 interlaced
static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w)
@@ -1292,11 +1353,10 @@ static inline void gmc1_motion(MpegEncContext *s,
dest_y+=dest_offset;
if(s->flags&CODEC_FLAG_EMU_EDGE){
- if(src_x<0 || src_y<0 || src_x + (motion_x&15) + 16 > s->h_edge_pos
- || src_y + (motion_y&15) + 16 > s->v_edge_pos){
+ if(src_x<0 || src_y<0 || src_x + 17 >= s->h_edge_pos
+ || src_y + 17 >= s->v_edge_pos){
ff_emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
ptr= s->edge_emu_buffer;
- emu=1;
}
}
@@ -1331,9 +1391,13 @@ static inline void gmc1_motion(MpegEncContext *s,
offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
ptr = ref_picture[1] + offset;
- if(emu){
- ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
- ptr= s->edge_emu_buffer;
+ if(s->flags&CODEC_FLAG_EMU_EDGE){
+ if(src_x<0 || src_y<0 || src_x + 9 >= s->h_edge_pos>>1
+ || src_y + 9 >= s->v_edge_pos>>1){
+ ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+ ptr= s->edge_emu_buffer;
+ emu=1;
+ }
}
s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
@@ -1656,6 +1720,14 @@ static inline void qpel_motion(MpegEncContext *s,
pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
}
+inline int ff_h263_round_chroma(int x){
+ if (x >= 0)
+ return (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
+ else {
+ x = -x;
+ return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
+ }
+}
static inline void MPV_motion(MpegEncContext *s,
UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
@@ -1672,6 +1744,7 @@ static inline void MPV_motion(MpegEncContext *s,
switch(s->mv_type) {
case MV_TYPE_16X16:
+#ifdef CONFIG_RISKY
if(s->mcsel){
if(s->real_sprite_warping_points==1){
gmc1_motion(s, dest_y, dest_cb, dest_cr, 0,
@@ -1689,7 +1762,9 @@ static inline void MPV_motion(MpegEncContext *s,
ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
ref_picture, pix_op,
s->mv[dir][0][0], s->mv[dir][0][1], 16);
- }else{
+ }else
+#endif
+ {
mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
ref_picture, 0,
0, pix_op,
@@ -1766,20 +1841,8 @@ static inline void MPV_motion(MpegEncContext *s,
if(s->flags&CODEC_FLAG_GRAY) break;
/* In case of 8X8, we construct a single chroma motion vector
with a special rounding */
- for(i=0;i<4;i++) {
- }
- if (mx >= 0)
- mx = (h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
- else {
- mx = -mx;
- mx = -(h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
- }
- if (my >= 0)
- my = (h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
- else {
- my = -my;
- my = -(h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
- }
+ mx= ff_h263_round_chroma(mx);
+ my= ff_h263_round_chroma(my);
dxy = ((my & 1) << 1) | (mx & 1);
mx >>= 1;
my >>= 1;
@@ -2010,14 +2073,13 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
if(*mbskip_ptr >99) *mbskip_ptr= 99;
/* if previous was skipped too, then nothing to do ! */
- if (*mbskip_ptr >= age){
-//if(s->pict_type!=B_TYPE && s->mb_x==0) printf("\n");
-//if(s->pict_type!=B_TYPE) printf("%d%d ", *mbskip_ptr, age);
- if(s->pict_type!=B_TYPE) return;
- if(s->avctx->draw_horiz_band==NULL && *mbskip_ptr > age) return;
- /* we dont draw complete frames here so we cant skip */
+ if (*mbskip_ptr >= age && s->current_picture.reference){
+ return;
}
- } else {
+ } else if(!s->current_picture.reference){
+ (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
+ if(*mbskip_ptr >99) *mbskip_ptr= 99;
+ } else{
*mbskip_ptr = 0; /* not skipped */
}
}else
@@ -2088,9 +2150,12 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
add_dct(s, block[4], 4, dest_cb, s->uvlinesize);
add_dct(s, block[5], 5, dest_cr, s->uvlinesize);
}
- } else{
+ }
+#ifdef CONFIG_RISKY
+ else{
ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
}
+#endif
} else {
/* dct only in intra block */
if(s->encoding || !(s->mpeg2 || s->codec_id==CODEC_ID_MPEG1VIDEO)){
@@ -2585,7 +2650,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
s->block_last_index[4]=
s->block_last_index[5]= 0;
s->block[4][0]=
- s->block[5][0]= 128;
+ s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
}
#ifdef CONFIG_ENCODERS
@@ -2593,6 +2658,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
switch(s->codec_id){ //FIXME funct ptr could be slightly faster
case CODEC_ID_MPEG1VIDEO:
mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
+#ifdef CONFIG_RISKY
case CODEC_ID_MPEG4:
mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
case CODEC_ID_MSMPEG4V2:
@@ -2601,18 +2667,48 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
case CODEC_ID_WMV2:
ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
- case CODEC_ID_MJPEG:
- mjpeg_encode_mb(s, s->block); break;
case CODEC_ID_H263:
case CODEC_ID_H263P:
case CODEC_ID_RV10:
h263_encode_mb(s, s->block, motion_x, motion_y); break;
+#endif
+ case CODEC_ID_MJPEG:
+ mjpeg_encode_mb(s, s->block); break;
default:
assert(0);
}
#endif
}
+/**
+ * combines the (truncated) bitstream to a complete frame
+ * @returns -1 if no complete frame could be created
+ */
+int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size){
+ ParseContext *pc= &s->parse_context;
+
+ pc->last_index= pc->index;
+
+ if(next==-1){
+ pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
+
+ memcpy(&pc->buffer[pc->index], *buf, *buf_size);
+ pc->index += *buf_size;
+ return -1;
+ }
+
+ if(pc->index){
+ pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
+
+ memcpy(&pc->buffer[pc->index], *buf, next + FF_INPUT_BUFFER_PADDING_SIZE );
+ pc->index = 0;
+ *buf= pc->buffer;
+ *buf_size= pc->last_index + next;
+ }
+
+ return 0;
+}
+
void ff_copy_bits(PutBitContext *pb, UINT8 *src, int length)
{
int bytes= length>>4;
@@ -2769,10 +2865,12 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->current_picture.mb_var_sum = 0;
s->current_picture.mc_mb_var_sum = 0;
+#ifdef CONFIG_RISKY
/* we need to initialize some time vars before we can encode b-frames */
if (s->h263_pred && !s->h263_msmpeg4)
ff_set_mpeg4_time(s, s->picture_number);
-
+#endif
+
s->scene_change_score=0;
s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
@@ -2789,6 +2887,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->no_rounding ^= 1;
}
/* Estimate motion for every MB */
+ s->mb_intra=0; //for the rate distoration & bit compare functions
if(s->pict_type != I_TYPE){
if(s->pict_type != B_TYPE){
if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
@@ -2880,6 +2979,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->frame_qscale = ff_rate_estimate_qscale(s);
if(s->adaptive_quant){
+#ifdef CONFIG_RISKY
switch(s->codec_id){
case CODEC_ID_MPEG4:
ff_clean_mpeg4_qscales(s);
@@ -2889,6 +2989,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
ff_clean_h263_qscales(s);
break;
}
+#endif
s->qscale= s->current_picture.qscale_table[0];
}else
@@ -2918,6 +3019,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
case FMT_MJPEG:
mjpeg_picture_header(s);
break;
+#ifdef CONFIG_RISKY
case FMT_H263:
if (s->codec_id == CODEC_ID_WMV2)
ff_wmv2_encode_picture_header(s, picture_number);
@@ -2930,6 +3032,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
else
h263_encode_picture_header(s, picture_number);
break;
+#endif
case FMT_MPEG1:
mpeg1_encode_picture_header(s, picture_number);
break;
@@ -2957,11 +3060,13 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->last_mv[0][0][0] = 0;
s->last_mv[0][0][1] = 0;
+#ifdef CONFIG_RISKY
if (s->codec_id==CODEC_ID_H263 || s->codec_id==CODEC_ID_H263P)
s->gob_index = ff_h263_get_gob_height(s);
if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
ff_mpeg4_init_partitions(s);
+#endif
s->resync_mb_x=0;
s->resync_mb_y=0;
@@ -2979,7 +3084,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->block_index[4]= s->block_wrap[4]*(mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2);
s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2);
for(mb_x=0; mb_x < s->mb_width; mb_x++) {
- const int mb_type= s->mb_type[mb_y * s->mb_width + mb_x];
+ int mb_type= s->mb_type[mb_y * s->mb_width + mb_x];
const int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
// int d;
int dmin=10000000;
@@ -2994,6 +3099,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->block_index[5]++;
/* write gob / video packet header */
+#ifdef CONFIG_RISKY
if(s->rtp_mode){
int current_packet_size, is_gob_start;
@@ -3034,6 +3140,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->resync_mb_y=mb_y;
}
}
+#endif
if( (s->resync_mb_x == s->mb_x)
&& s->resync_mb_y+1 == s->mb_y){
@@ -3108,7 +3215,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
s->mb_intra= 0;
+#ifdef CONFIG_RISKY
ff_mpeg4_set_direct_mv(s, mx, my);
+#endif
encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb,
&dmin, &next_block, mx, my);
}
@@ -3145,8 +3254,93 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->last_bits= get_bit_count(&s->pb);
} else {
int motion_x, motion_y;
+ int intra_score;
+ int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_width];
+
+ if(!(s->flags&CODEC_FLAG_HQ) && s->pict_type==P_TYPE){
+ /* get luma score */
+ if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
+ intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_width]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
+ }else{
+ uint8_t *dest_y;
+
+ int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_width]; //FIXME
+ mean*= 0x01010101;
+
+ dest_y = s->new_picture.data[0] + (mb_y * 16 * s->linesize ) + mb_x * 16;
+
+ for(i=0; i<16; i++){
+ *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 0]) = mean;
+ *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean;
+ *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean;
+ *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
+ }
+
+ s->mb_intra=1;
+ intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, dest_y, s->linesize);
+
+/* printf("intra:%7d inter:%7d var:%7d mc_var.%7d\n", intra_score>>8, inter_score>>8,
+ s->current_picture.mb_var[mb_x + mb_y*s->mb_width],
+ s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_width]);*/
+ }
+
+ /* get chroma score */
+ if(s->avctx->mb_cmp&FF_CMP_CHROMA){
+ int i;
+
+ s->mb_intra=1;
+ for(i=1; i<3; i++){
+ uint8_t *dest_c;
+ int mean;
+
+ if(s->out_format == FMT_H263){
+ mean= (s->dc_val[i][mb_x + (mb_y+1)*(s->mb_width+2)] + 4)>>3; //FIXME not exact but simple ;)
+ }else{
+ mean= (s->last_dc[i] + 4)>>3;
+ }
+ dest_c = s->new_picture.data[i] + (mb_y * 8 * (s->uvlinesize)) + mb_x * 8;
+
+ mean*= 0x01010101;
+ for(i=0; i<8; i++){
+ *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 0]) = mean;
+ *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 4]) = mean;
+ }
+
+ intra_score+= s->dsp.mb_cmp[1](s, s->me.scratchpad, dest_c, s->uvlinesize);
+ }
+ }
+
+ /* bias */
+ switch(s->avctx->mb_cmp&0xFF){
+ default:
+ case FF_CMP_SAD:
+ intra_score+= 32*s->qscale;
+ break;
+ case FF_CMP_SSE:
+ intra_score+= 24*s->qscale*s->qscale;
+ break;
+ case FF_CMP_SATD:
+ intra_score+= 96*s->qscale;
+ break;
+ case FF_CMP_DCT:
+ intra_score+= 48*s->qscale;
+ break;
+ case FF_CMP_BIT:
+ intra_score+= 16;
+ break;
+ case FF_CMP_PSNR:
+ case FF_CMP_RD:
+ intra_score+= (s->qscale*s->qscale*109*8 + 64)>>7;
+ break;
+ }
+
+ if(intra_score < inter_score)
+ mb_type= MB_TYPE_INTRA;
+ }
+
s->mv_type=MV_TYPE_16X16;
// only one MB-Type possible
+
switch(mb_type){
case MB_TYPE_INTRA:
s->mv_dir = MV_DIR_FORWARD;
@@ -3175,7 +3369,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->mb_intra= 0;
motion_x=s->b_direct_mv_table[xy][0];
motion_y=s->b_direct_mv_table[xy][1];
+#ifdef CONFIG_RISKY
ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
+#endif
break;
case MB_TYPE_BIDIR:
s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
@@ -3253,6 +3449,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
}
emms_c();
+#ifdef CONFIG_RISKY
if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
ff_mpeg4_merge_partitions(s);
@@ -3261,6 +3458,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
if(s->codec_id==CODEC_ID_MPEG4)
ff_mpeg4_stuffing(&s->pb);
+#endif
//if (s->gob_number)
// fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
@@ -3376,7 +3574,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
return last_non_zero;
}
- lambda= (qscale*qscale*64*82 + 50)/100; //FIXME finetune
+ lambda= (qscale*qscale*64*105 + 64)>>7; //FIXME finetune
score_tab[0]= 0;
for(i=0; i<=last_non_zero - start_i; i++){
@@ -3783,6 +3981,8 @@ AVCodec mpeg1video_encoder = {
MPV_encode_end,
};
+#ifdef CONFIG_RISKY
+
AVCodec h263_encoder = {
"h263",
CODEC_TYPE_VIDEO,
@@ -3813,16 +4013,6 @@ AVCodec rv10_encoder = {
MPV_encode_end,
};
-AVCodec mjpeg_encoder = {
- "mjpeg",
- CODEC_TYPE_VIDEO,
- CODEC_ID_MJPEG,
- sizeof(MpegEncContext),
- MPV_encode_init,
- MPV_encode_picture,
- MPV_encode_end,
-};
-
AVCodec mpeg4_encoder = {
"mpeg4",
CODEC_TYPE_VIDEO,
@@ -3873,3 +4063,14 @@ AVCodec wmv1_encoder = {
MPV_encode_end,
};
+#endif
+
+AVCodec mjpeg_encoder = {
+ "mjpeg",
+ CODEC_TYPE_VIDEO,
+ CODEC_ID_MJPEG,
+ sizeof(MpegEncContext),
+ MPV_encode_init,
+ MPV_encode_picture,
+ MPV_encode_end,
+};
diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h
index e6909817a..7ecc6fd38 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.h
+++ b/src/libffmpeg/libavcodec/mpegvideo.h
@@ -98,7 +98,6 @@ typedef struct RateControlContext{
int last_non_b_pict_type;
}RateControlContext;
-
typedef struct ScanTable{
const UINT8 *scantable;
UINT8 permutated[64];
@@ -117,6 +116,7 @@ typedef struct Picture{
uint16_t *mb_var; /* Table for MB variances */
uint16_t *mc_mb_var; /* Table for motion compensated MB variances */
uint8_t *mb_mean; /* Table for MB luminance */
+ int32_t *mb_cmp_score; /* Table for MB cmp scores, for mb decission */
int b_frame_score; /* */
} Picture;
@@ -142,6 +142,7 @@ typedef struct MotionEstContext{
int pre_penalty_factor;
int penalty_factor;
int sub_penalty_factor;
+ int mb_penalty_factor;
int pre_pass; /* = 1 for the pre pass */
int dia_size;
UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV */
@@ -160,6 +161,8 @@ typedef struct MotionEstContext{
int P[10][2], int pred_x, int pred_y,
int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2],
int ref_mv_scale, uint16_t * const mv_penalty);
+ int (*get_mb_score)(struct MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture,
+ uint16_t * const mv_penalty);
}MotionEstContext;
typedef struct MpegEncContext {
@@ -226,6 +229,8 @@ typedef struct MpegEncContext {
UINT8 *coded_block; /* used for coded block pattern prediction (msmpeg4v3, wmv1)*/
INT16 (*ac_val[3])[16]; /* used for for mpeg4 AC prediction, all 3 arrays must be continuous */
int ac_pred;
+ uint8_t *prev_pict_types; /* previous picture types in bitstream order, used for mb skip */
+#define PREV_PICT_TYPES_BUFFER_SIZE 256
int mb_skiped; /* MUST BE SET only during DECODING */
UINT8 *mbskip_table; /* used to avoid copy if macroblock skipped (for black regions for example)
and used for b-frame encoding & decoding (contains skip table of next P Frame) */
@@ -321,6 +326,8 @@ typedef struct MpegEncContext {
uint8_t *intra_ac_vlc_last_length;
uint8_t *inter_ac_vlc_length;
uint8_t *inter_ac_vlc_last_length;
+ uint8_t *luma_dc_vlc_length;
+ uint8_t *chroma_dc_vlc_length;
#define UNI_AC_ENC_INDEX(run,level) ((run)*128 + (level))
/* precomputed matrix (combine qscale and DCT renorm) */
@@ -544,14 +551,15 @@ typedef struct MpegEncContext {
#define SLICE_NOEND -3 //no end marker or error found but mb count exceeded
void (*dct_unquantize_mpeg1)(struct MpegEncContext *s,
- DCTELEM *block, int n, int qscale);
+ DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize_mpeg2)(struct MpegEncContext *s,
- DCTELEM *block, int n, int qscale);
+ DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize_h263)(struct MpegEncContext *s,
- DCTELEM *block, int n, int qscale);
+ DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both)
- DCTELEM *block, int n, int qscale);
- int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
+ DCTELEM *block/*align 16*/, int n, int qscale);
+ int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);
+ int (*fast_dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);
void (*fdct)(DCTELEM *block/* align 16*/);
void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
void (*idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
@@ -596,7 +604,9 @@ void ff_draw_horiz_band(MpegEncContext *s);
void ff_emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h,
int src_x, int src_y, int w, int h);
char ff_get_pict_type_char(int pict_type);
+int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size);
+extern enum PixelFormat ff_yuv420p_list[2];
extern int ff_bit_exact;
@@ -690,7 +700,7 @@ void h263_encode_picture_header(MpegEncContext *s, int picture_number);
int h263_encode_gob_header(MpegEncContext * s, int mb_line);
INT16 *h263_pred_motion(MpegEncContext * s, int block,
int *px, int *py);
-void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
+void mpeg4_pred_ac(MpegEncContext * s, DCTELEM *block, int n,
int dir);
void ff_set_mpeg4_time(MpegEncContext * s, int picture_number);
void mpeg4_encode_picture_header(MpegEncContext *s, int picture_number);
@@ -717,6 +727,7 @@ int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s);
int ff_h263_resync(MpegEncContext *s);
int ff_h263_get_gob_height(MpegEncContext *s);
void ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my);
+inline int ff_h263_round_chroma(int x);
/* rv10.c */
diff --git a/src/libffmpeg/libavcodec/msmpeg4.c b/src/libffmpeg/libavcodec/msmpeg4.c
index a08418874..2c524a067 100644
--- a/src/libffmpeg/libavcodec/msmpeg4.c
+++ b/src/libffmpeg/libavcodec/msmpeg4.c
@@ -502,7 +502,7 @@ static void msmpeg4_encode_motion(MpegEncContext * s,
static inline void handle_slices(MpegEncContext *s){
if (s->mb_x == 0) {
if (s->slice_height && (s->mb_y % s->slice_height) == 0) {
- if(s->msmpeg4_version != 4){
+ if(s->msmpeg4_version < 4){
ff_mpeg4_clean_buffers(s);
}
s->first_slice_line = 1;
@@ -691,7 +691,7 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
b = dc_val[ - 1 - wrap];
c = dc_val[ - wrap];
- if(s->first_slice_line && (n&2)==0 && s->msmpeg4_version!=4){
+ if(s->first_slice_line && (n&2)==0 && s->msmpeg4_version<4){
b=c=1024;
}
@@ -1195,7 +1195,7 @@ int msmpeg4_decode_picture_header(MpegEncContext * s)
#if 0
{
int i;
-for(i=0; i<s->gb.size*8; i++)
+for(i=0; i<s->gb.size_in_bits; i++)
printf("%d", get_bits1(&s->gb));
// get_bits1(&s->gb);
printf("END\n");
@@ -1869,7 +1869,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
if (i > 62){
i-= 192;
if(i&(~63)){
- const int left= s->gb.size*8 - get_bits_count(&s->gb);
+ const int left= s->gb.size_in_bits - get_bits_count(&s->gb);
if(((i+192 == 64 && level/qmul==-1) || s->error_resilience<=1) && left>=0){
fprintf(stderr, "ignoring overflow at %d %d\n", s->mb_x, s->mb_y);
break;
diff --git a/src/libffmpeg/libavcodec/msmpeg4data.h b/src/libffmpeg/libavcodec/msmpeg4data.h
index 3490fc08c..2c3a28f0e 100644
--- a/src/libffmpeg/libavcodec/msmpeg4data.h
+++ b/src/libffmpeg/libavcodec/msmpeg4data.h
@@ -1868,7 +1868,10 @@ static const uint8_t *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={
};
static const uint8_t table_inter_intra[4][2]={
- {0,1},{2,2},{6,3},{7,3}
+ {0,1} /*Luma-Left Chroma-Left*/,
+ {2,2} /*Luma-Top Chroma-Left*/,
+ {6,3} /*luma-Left Chroma-Top */,
+ {7,3} /*luma-Top Chroma-Top */
};
#define WMV2_INTER_CBP_TABLE_COUNT 4
diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c
index 5f14ed0eb..dc62e70f4 100644
--- a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2002 Brian Foley
* Copyright (c) 2002 Dieter Shirley
+ * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -20,21 +21,39 @@
#include "../dsputil.h"
#include "dsputil_altivec.h"
-#if CONFIG_DARWIN
+#ifdef CONFIG_DARWIN
#include <sys/sysctl.h>
-#endif
+#else /* CONFIG_DARWIN */
+#include <signal.h>
+#include <setjmp.h>
+
+static sigjmp_buf jmpbuf;
+static volatile sig_atomic_t canjump = 0;
+
+static void sigill_handler (int sig)
+{
+ if (!canjump) {
+ signal (sig, SIG_DFL);
+ raise (sig);
+ }
+
+ canjump = 0;
+ siglongjmp (jmpbuf, 1);
+}
+#endif /* CONFIG_DARWIN */
int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
{
- int s, i;
- vector unsigned char *tv, zero;
+ int i;
+ int s __attribute__((aligned(16)));
+ const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
+ vector unsigned char *tv;
vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;
vector unsigned int sad;
vector signed int sumdiffs;
s = 0;
- zero = vec_splat_u8(0);
- sad = vec_splat_u32(0);
+ sad = (vector unsigned int)vec_splat_u32(0);
for(i=0;i<16;i++) {
/*
Read unaligned pixels into our vectors. The vectors are as follows:
@@ -72,16 +91,17 @@ int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
{
- int s, i;
- vector unsigned char *tv, zero;
+ int i;
+ int s __attribute__((aligned(16)));
+ const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
+ vector unsigned char *tv;
vector unsigned char pix1v, pix2v, pix3v, avgv, t5;
vector unsigned int sad;
vector signed int sumdiffs;
uint8_t *pix3 = pix2 + line_size;
s = 0;
- zero = vec_splat_u8(0);
- sad = vec_splat_u32(0);
+ sad = (vector unsigned int)vec_splat_u32(0);
/*
Due to the fact that pix3 = pix2 + line_size, the pix3 of one
@@ -131,20 +151,21 @@ int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
{
- int s, i;
+ int i;
+ int s __attribute__((aligned(16)));
uint8_t *pix3 = pix2 + line_size;
- vector unsigned char *tv, avgv, t5, zero;
+ const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
+ const vector unsigned short two = (const vector unsigned short)vec_splat_u16(2);
+ vector unsigned char *tv, avgv, t5;
vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv;
vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv;
vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
- vector unsigned short avghv, avglv, two;
+ vector unsigned short avghv, avglv;
vector unsigned short t1, t2, t3, t4;
vector unsigned int sad;
vector signed int sumdiffs;
- zero = vec_splat_u8(0);
- two = vec_splat_u16(2);
- sad = vec_splat_u32(0);
+ sad = (vector unsigned int)vec_splat_u32(0);
s = 0;
@@ -231,14 +252,15 @@ int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
{
- int i, s;
+ int i;
+ int s __attribute__((aligned(16)));
+ const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
vector unsigned char perm1, perm2, *pix1v, *pix2v;
vector unsigned char t1, t2, t3,t4, t5;
- vector unsigned int sad, zero;
+ vector unsigned int sad;
vector signed int sumdiffs;
- zero = (vector unsigned int) (0);
- sad = (vector unsigned int) (0);
+ sad = (vector unsigned int)vec_splat_u32(0);
for(i=0;i<16;i++) {
@@ -272,15 +294,20 @@ int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
{
- int i, s;
+ int i;
+ int s __attribute__((aligned(16)));
+ const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
vector unsigned char t1, t2, t3,t4, t5;
- vector unsigned int sad, zero;
+ vector unsigned int sad;
vector signed int sumdiffs;
- zero = (vector unsigned int) (0);
- sad = (vector unsigned int) (0);
- permclear = (vector unsigned char) (255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
+ sad = (vector unsigned int)vec_splat_u32(0);
+#ifdef CONFIG_DARWIN
+ permclear = (vector unsigned char)(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
+#else
+ permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0};
+#endif
for(i=0;i<8;i++) {
/* Read potentially unaligned pixels into t1 and t2
@@ -315,14 +342,15 @@ int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
int pix_norm1_altivec(uint8_t *pix, int line_size)
{
- int s, i;
- vector unsigned char *tv, zero;
+ int i;
+ int s __attribute__((aligned(16)));
+ const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
+ vector unsigned char *tv;
vector unsigned char pixv;
vector unsigned int sv;
vector signed int sum;
-
- zero = vec_splat_u8(0);
- sv = vec_splat_u32(0);
+
+ sv = (vector unsigned int)vec_splat_u32(0);
s = 0;
for (i = 0; i < 16; i++) {
@@ -343,18 +371,127 @@ int pix_norm1_altivec(uint8_t *pix, int line_size)
return s;
}
-int pix_sum_altivec(UINT8 * pix, int line_size)
+/**
+ * Sum of Squared Errors for a 8x8 block.
+ * AltiVec-enhanced.
+ * It's the pix_abs8x8_altivec code above w/ squaring added.
+ */
+int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
{
+ int i;
+ int s __attribute__((aligned(16)));
+ const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
+ vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
+ vector unsigned char t1, t2, t3,t4, t5;
+ vector unsigned int sum;
+ vector signed int sumsqr;
+
+ sum = (vector unsigned int)vec_splat_u32(0);
+#ifdef CONFIG_DARWIN
+ permclear = (vector unsigned char)(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
+#else
+ permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0};
+#endif
+
+ for(i=0;i<8;i++) {
+ /* Read potentially unaligned pixels into t1 and t2
+ Since we're reading 16 pixels, and actually only want 8,
+ mask out the last 8 pixels. The 0s don't change the sum. */
+ perm1 = vec_lvsl(0, pix1);
+ pix1v = (vector unsigned char *) pix1;
+ perm2 = vec_lvsl(0, pix2);
+ pix2v = (vector unsigned char *) pix2;
+ t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);
+ t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);
+ /*
+ Since we want to use unsigned chars, we can take advantage
+ of the fact that abs(a-b)^2 = (a-b)^2.
+ */
+
+ /* Calculate abs differences vector */
+ t3 = vec_max(t1, t2);
+ t4 = vec_min(t1, t2);
+ t5 = vec_sub(t3, t4);
+
+ /* Square the values and add them to our sum */
+ sum = vec_msum(t5, t5, sum);
+
+ pix1 += line_size;
+ pix2 += line_size;
+ }
+
+ /* Sum up the four partial sums, and put the result into s */
+ sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);
+ sumsqr = vec_splat(sumsqr, 3);
+ vec_ste(sumsqr, 0, &s);
+
+ return s;
+}
+
+/**
+ * Sum of Squared Errors for a 16x16 block.
+ * AltiVec-enhanced.
+ * It's the pix_abs16x16_altivec code above w/ squaring added.
+ */
+int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
+{
+ int i;
+ int s __attribute__((aligned(16)));
+ const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
+ vector unsigned char perm1, perm2, *pix1v, *pix2v;
+ vector unsigned char t1, t2, t3,t4, t5;
+ vector unsigned int sum;
+ vector signed int sumsqr;
+
+ sum = (vector unsigned int)vec_splat_u32(0);
+
+ for(i=0;i<16;i++) {
+ /* Read potentially unaligned pixels into t1 and t2 */
+ perm1 = vec_lvsl(0, pix1);
+ pix1v = (vector unsigned char *) pix1;
+ perm2 = vec_lvsl(0, pix2);
+ pix2v = (vector unsigned char *) pix2;
+ t1 = vec_perm(pix1v[0], pix1v[1], perm1);
+ t2 = vec_perm(pix2v[0], pix2v[1], perm2);
+
+ /*
+ Since we want to use unsigned chars, we can take advantage
+ of the fact that abs(a-b)^2 = (a-b)^2.
+ */
+
+ /* Calculate abs differences vector */
+ t3 = vec_max(t1, t2);
+ t4 = vec_min(t1, t2);
+ t5 = vec_sub(t3, t4);
+
+ /* Square the values and add them to our sum */
+ sum = vec_msum(t5, t5, sum);
+
+ pix1 += line_size;
+ pix2 += line_size;
+ }
+
+ /* Sum up the four partial sums, and put the result into s */
+ sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);
+ sumsqr = vec_splat(sumsqr, 3);
+ vec_ste(sumsqr, 0, &s);
+
+ return s;
+}
+
+int pix_sum_altivec(UINT8 * pix, int line_size)
+{
+ const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
vector unsigned char perm, *pixv;
vector unsigned char t1;
- vector unsigned int sad, zero;
+ vector unsigned int sad;
vector signed int sumdiffs;
- int s, i;
-
- zero = (vector unsigned int) (0);
- sad = (vector unsigned int) (0);
+ int i;
+ int s __attribute__((aligned(16)));
+
+ sad = (vector unsigned int)vec_splat_u32(0);
for (i = 0; i < 16; i++) {
/* Read the potentially unaligned 16 pixels into t1 */
@@ -380,7 +517,7 @@ void get_pixels_altivec(DCTELEM *restrict block, const UINT8 *pixels, int line_s
{
int i;
vector unsigned char perm, bytes, *pixv;
- vector unsigned char zero = (vector unsigned char) (0);
+ const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
vector signed short shorts;
for(i=0;i<8;i++)
@@ -407,7 +544,7 @@ void diff_pixels_altivec(DCTELEM *restrict block, const UINT8 *s1,
{
int i;
vector unsigned char perm, bytes, *pixv;
- vector unsigned char zero = (vector unsigned char) (0);
+ const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
vector signed short shorts1, shorts2;
for(i=0;i<4;i++)
@@ -474,10 +611,675 @@ void diff_pixels_altivec(DCTELEM *restrict block, const UINT8 *s1,
}
}
+int sad16x16_altivec(void *s, uint8_t *a, uint8_t *b, int stride) {
+ return pix_abs16x16_altivec(a,b,stride);
+}
+
+int sad8x8_altivec(void *s, uint8_t *a, uint8_t *b, int stride) {
+ return pix_abs8x8_altivec(a,b,stride);
+}
+
+void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+ int i;
+ for(i=0; i+7<w; i++){
+ dst[i+0] += src[i+0];
+ dst[i+1] += src[i+1];
+ dst[i+2] += src[i+2];
+ dst[i+3] += src[i+3];
+ dst[i+4] += src[i+4];
+ dst[i+5] += src[i+5];
+ dst[i+6] += src[i+6];
+ dst[i+7] += src[i+7];
+ }
+ for(; i<w; i++)
+ dst[i+0] += src[i+0];
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+ register int i;
+ register vector unsigned char vdst, vsrc;
+
+ /* dst and src are 16 bytes-aligned (guaranteed) */
+ for(i = 0 ; (i + 15) < w ; i++)
+ {
+ vdst = vec_ld(i << 4, (unsigned char*)dst);
+ vsrc = vec_ld(i << 4, (unsigned char*)src);
+ vdst = vec_add(vsrc, vdst);
+ vec_st(vdst, i << 4, (unsigned char*)dst);
+ }
+ /* if w is not a multiple of 16 */
+ for (; (i < w) ; i++)
+ {
+ dst[i] = src[i];
+ }
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+}
+
+/* next one assumes that ((line_size % 16) == 0) */
+void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+POWERPC_TBL_DECLARE(altivec_put_pixels16_num, 1);
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+ int i;
+
+POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
+
+ for(i=0; i<h; i++) {
+ *((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l);
+ *((uint32_t*)(block+4)) = (((const struct unaligned_32 *) (pixels+4))->l);
+ *((uint32_t*)(block+8)) = (((const struct unaligned_32 *) (pixels+8))->l);
+ *((uint32_t*)(block+12)) = (((const struct unaligned_32 *) (pixels+12))->l);
+ pixels+=line_size;
+ block +=line_size;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
+
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+ register vector unsigned char pixelsv1, pixelsv2;
+ register vector unsigned char perm = vec_lvsl(0, pixels);
+ int i;
+
+POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
+
+ for(i=0; i<h; i++) {
+ pixelsv1 = vec_ld(0, (unsigned char*)pixels);
+ pixelsv2 = vec_ld(16, (unsigned char*)pixels);
+ vec_st(vec_perm(pixelsv1, pixelsv2, perm),
+ 0, (unsigned char*)block);
+ pixels+=line_size;
+ block +=line_size;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
+
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+}
+
+/* next one assumes that ((line_size % 16) == 0) */
+#define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
+void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+POWERPC_TBL_DECLARE(altivec_avg_pixels16_num, 1);
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+ int i;
+
+POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
+
+ for(i=0; i<h; i++) {
+ op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l));
+ op_avg(*((uint32_t*)(block+4)),(((const struct unaligned_32 *)(pixels+4))->l));
+ op_avg(*((uint32_t*)(block+8)),(((const struct unaligned_32 *)(pixels+8))->l));
+ op_avg(*((uint32_t*)(block+12)),(((const struct unaligned_32 *)(pixels+12))->l));
+ pixels+=line_size;
+ block +=line_size;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
+
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+ register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
+ register vector unsigned char perm = vec_lvsl(0, pixels);
+ int i;
+
+POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
+
+ for(i=0; i<h; i++) {
+ pixelsv1 = vec_ld(0, (unsigned char*)pixels);
+ pixelsv2 = vec_ld(16, (unsigned char*)pixels);
+ blockv = vec_ld(0, block);
+ pixelsv = vec_perm(pixelsv1, pixelsv2, perm);
+ blockv = vec_avg(blockv,pixelsv);
+ vec_st(blockv, 0, (unsigned char*)block);
+ pixels+=line_size;
+ block +=line_size;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
+
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+}
+
+/* next one assumes that ((line_size % 8) == 0) */
+void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
+{
+POWERPC_TBL_DECLARE(altivec_avg_pixels8_num, 1);
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+ int i;
+POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
+ for (i = 0; i < h; i++) {
+ *((uint32_t *) (block)) =
+ (((*((uint32_t *) (block))) |
+ ((((const struct unaligned_32 *) (pixels))->l))) -
+ ((((*((uint32_t *) (block))) ^
+ ((((const struct unaligned_32 *) (pixels))->
+ l))) & 0xFEFEFEFEUL) >> 1));
+ *((uint32_t *) (block + 4)) =
+ (((*((uint32_t *) (block + 4))) |
+ ((((const struct unaligned_32 *) (pixels + 4))->l))) -
+ ((((*((uint32_t *) (block + 4))) ^
+ ((((const struct unaligned_32 *) (pixels +
+ 4))->
+ l))) & 0xFEFEFEFEUL) >> 1));
+ pixels += line_size;
+ block += line_size;
+ }
+POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
+
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+ register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
+ int i;
+
+POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
+
+ for (i = 0; i < h; i++) {
+ /*
+ block is 8 bytes-aligned, so we're either in the
+ left block (16 bytes-aligned) or in the right block (not)
+ */
+ int rightside = ((unsigned long)block & 0x0000000F);
+
+ blockv = vec_ld(0, block);
+ pixelsv1 = vec_ld(0, (unsigned char*)pixels);
+ pixelsv2 = vec_ld(16, (unsigned char*)pixels);
+ pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels));
+
+ if (rightside)
+ {
+ pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,s0,s1));
+ }
+ else
+ {
+ pixelsv = vec_perm(blockv, pixelsv, vcprm(s0,s1,2,3));
+ }
+
+ blockv = vec_avg(blockv, pixelsv);
+
+ vec_st(blockv, 0, block);
+
+ pixels += line_size;
+ block += line_size;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
+
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+}
+
+/* next one assumes that ((line_size % 8) == 0) */
+void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+POWERPC_TBL_DECLARE(altivec_put_pixels8_xy2_num, 1);
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+ int j;
+POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
+ for (j = 0; j < 2; j++) {
+ int i;
+ const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
+ const uint32_t b =
+ (((const struct unaligned_32 *) (pixels + 1))->l);
+ uint32_t l0 =
+ (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
+ uint32_t h0 =
+ ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+ uint32_t l1, h1;
+ pixels += line_size;
+ for (i = 0; i < h; i += 2) {
+ uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
+ uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
+ l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
+ h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+ *((uint32_t *) block) =
+ h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
+ pixels += line_size;
+ block += line_size;
+ a = (((const struct unaligned_32 *) (pixels))->l);
+ b = (((const struct unaligned_32 *) (pixels + 1))->l);
+ l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
+ h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+ *((uint32_t *) block) =
+ h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
+ pixels += line_size;
+ block += line_size;
+ } pixels += 4 - line_size * (h + 1);
+ block += 4 - line_size * h;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
+
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+ register int i;
+ register vector unsigned char
+ pixelsv1, pixelsv2,
+ pixelsavg;
+ register vector unsigned char
+ blockv, temp1, temp2;
+ register vector unsigned short
+ pixelssum1, pixelssum2, temp3;
+ register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
+ register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
+
+ temp1 = vec_ld(0, pixels);
+ temp2 = vec_ld(16, pixels);
+ pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
+ if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
+ {
+ pixelsv2 = temp2;
+ }
+ else
+ {
+ pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
+ }
+ pixelsv1 = vec_mergeh(vczero, pixelsv1);
+ pixelsv2 = vec_mergeh(vczero, pixelsv2);
+ pixelssum1 = vec_add((vector unsigned short)pixelsv1,
+ (vector unsigned short)pixelsv2);
+ pixelssum1 = vec_add(pixelssum1, vctwo);
+
+POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
+ for (i = 0; i < h ; i++) {
+ int rightside = ((unsigned long)block & 0x0000000F);
+ blockv = vec_ld(0, block);
+
+ temp1 = vec_ld(line_size, pixels);
+ temp2 = vec_ld(line_size + 16, pixels);
+ pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
+ if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F)
+ {
+ pixelsv2 = temp2;
+ }
+ else
+ {
+ pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
+ }
+
+ pixelsv1 = vec_mergeh(vczero, pixelsv1);
+ pixelsv2 = vec_mergeh(vczero, pixelsv2);
+ pixelssum2 = vec_add((vector unsigned short)pixelsv1,
+ (vector unsigned short)pixelsv2);
+ temp3 = vec_add(pixelssum1, pixelssum2);
+ temp3 = vec_sra(temp3, vctwo);
+ pixelssum1 = vec_add(pixelssum2, vctwo);
+ pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
+
+ if (rightside)
+ {
+ blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
+ }
+ else
+ {
+ blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
+ }
+
+ vec_st(blockv, 0, block);
+
+ block += line_size;
+ pixels += line_size;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+}
+
+/* next one assumes that ((line_size % 8) == 0) */
+void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+ int j;
+POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
+ for (j = 0; j < 2; j++) {
+ int i;
+ const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
+ const uint32_t b =
+ (((const struct unaligned_32 *) (pixels + 1))->l);
+ uint32_t l0 =
+ (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL;
+ uint32_t h0 =
+ ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+ uint32_t l1, h1;
+ pixels += line_size;
+ for (i = 0; i < h; i += 2) {
+ uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
+ uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
+ l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
+ h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+ *((uint32_t *) block) =
+ h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
+ pixels += line_size;
+ block += line_size;
+ a = (((const struct unaligned_32 *) (pixels))->l);
+ b = (((const struct unaligned_32 *) (pixels + 1))->l);
+ l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL;
+ h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+ *((uint32_t *) block) =
+ h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
+ pixels += line_size;
+ block += line_size;
+ } pixels += 4 - line_size * (h + 1);
+ block += 4 - line_size * h;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
+
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+ register int i;
+ register vector unsigned char
+ pixelsv1, pixelsv2,
+ pixelsavg;
+ register vector unsigned char
+ blockv, temp1, temp2;
+ register vector unsigned short
+ pixelssum1, pixelssum2, temp3;
+ register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
+ register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
+ register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
+
+ temp1 = vec_ld(0, pixels);
+ temp2 = vec_ld(16, pixels);
+ pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
+ if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
+ {
+ pixelsv2 = temp2;
+ }
+ else
+ {
+ pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
+ }
+ pixelsv1 = vec_mergeh(vczero, pixelsv1);
+ pixelsv2 = vec_mergeh(vczero, pixelsv2);
+ pixelssum1 = vec_add((vector unsigned short)pixelsv1,
+ (vector unsigned short)pixelsv2);
+ pixelssum1 = vec_add(pixelssum1, vcone);
+
+POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
+ for (i = 0; i < h ; i++) {
+ int rightside = ((unsigned long)block & 0x0000000F);
+ blockv = vec_ld(0, block);
+
+ temp1 = vec_ld(line_size, pixels);
+ temp2 = vec_ld(line_size + 16, pixels);
+ pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
+ if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F)
+ {
+ pixelsv2 = temp2;
+ }
+ else
+ {
+ pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
+ }
+
+ pixelsv1 = vec_mergeh(vczero, pixelsv1);
+ pixelsv2 = vec_mergeh(vczero, pixelsv2);
+ pixelssum2 = vec_add((vector unsigned short)pixelsv1,
+ (vector unsigned short)pixelsv2);
+ temp3 = vec_add(pixelssum1, pixelssum2);
+ temp3 = vec_sra(temp3, vctwo);
+ pixelssum1 = vec_add(pixelssum2, vcone);
+ pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
+
+ if (rightside)
+ {
+ blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
+ }
+ else
+ {
+ blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
+ }
+
+ vec_st(blockv, 0, block);
+
+ block += line_size;
+ pixels += line_size;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+}
+
+/* next one assumes that ((line_size % 16) == 0) */
+void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
+{
+POWERPC_TBL_DECLARE(altivec_put_pixels16_xy2_num, 1);
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+ int j;
+POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
+ for (j = 0; j < 4; j++) {
+ int i;
+ const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
+ const uint32_t b =
+ (((const struct unaligned_32 *) (pixels + 1))->l);
+ uint32_t l0 =
+ (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
+ uint32_t h0 =
+ ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+ uint32_t l1, h1;
+ pixels += line_size;
+ for (i = 0; i < h; i += 2) {
+ uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
+ uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
+ l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
+ h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+ *((uint32_t *) block) =
+ h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
+ pixels += line_size;
+ block += line_size;
+ a = (((const struct unaligned_32 *) (pixels))->l);
+ b = (((const struct unaligned_32 *) (pixels + 1))->l);
+ l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
+ h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+ *((uint32_t *) block) =
+ h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
+ pixels += line_size;
+ block += line_size;
+ } pixels += 4 - line_size * (h + 1);
+ block += 4 - line_size * h;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
+
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+ register int i;
+ register vector unsigned char
+ pixelsv1, pixelsv2, pixelsv3, pixelsv4;
+ register vector unsigned char
+ blockv, temp1, temp2;
+ register vector unsigned short
+ pixelssum1, pixelssum2, temp3,
+ pixelssum3, pixelssum4, temp4;
+ register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
+ register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
+
+ temp1 = vec_ld(0, pixels);
+ temp2 = vec_ld(16, pixels);
+ pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
+ if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
+ {
+ pixelsv2 = temp2;
+ }
+ else
+ {
+ pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
+ }
+ pixelsv3 = vec_mergel(vczero, pixelsv1);
+ pixelsv4 = vec_mergel(vczero, pixelsv2);
+ pixelsv1 = vec_mergeh(vczero, pixelsv1);
+ pixelsv2 = vec_mergeh(vczero, pixelsv2);
+ pixelssum3 = vec_add((vector unsigned short)pixelsv3,
+ (vector unsigned short)pixelsv4);
+ pixelssum3 = vec_add(pixelssum3, vctwo);
+ pixelssum1 = vec_add((vector unsigned short)pixelsv1,
+ (vector unsigned short)pixelsv2);
+ pixelssum1 = vec_add(pixelssum1, vctwo);
+
+POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
+ for (i = 0; i < h ; i++) {
+ blockv = vec_ld(0, block);
+
+ temp1 = vec_ld(line_size, pixels);
+ temp2 = vec_ld(line_size + 16, pixels);
+ pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
+ if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F)
+ {
+ pixelsv2 = temp2;
+ }
+ else
+ {
+ pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
+ }
+
+ pixelsv3 = vec_mergel(vczero, pixelsv1);
+ pixelsv4 = vec_mergel(vczero, pixelsv2);
+ pixelsv1 = vec_mergeh(vczero, pixelsv1);
+ pixelsv2 = vec_mergeh(vczero, pixelsv2);
+
+ pixelssum4 = vec_add((vector unsigned short)pixelsv3,
+ (vector unsigned short)pixelsv4);
+ pixelssum2 = vec_add((vector unsigned short)pixelsv1,
+ (vector unsigned short)pixelsv2);
+ temp4 = vec_add(pixelssum3, pixelssum4);
+ temp4 = vec_sra(temp4, vctwo);
+ temp3 = vec_add(pixelssum1, pixelssum2);
+ temp3 = vec_sra(temp3, vctwo);
+
+ pixelssum3 = vec_add(pixelssum4, vctwo);
+ pixelssum1 = vec_add(pixelssum2, vctwo);
+
+ blockv = vec_packsu(temp3, temp4);
+
+ vec_st(blockv, 0, block);
+
+ block += line_size;
+ pixels += line_size;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+}
+
+/* next one assumes that ((line_size % 16) == 0) */
+void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
+{
+POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+ int j;
+POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
+ for (j = 0; j < 4; j++) {
+ int i;
+ const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
+ const uint32_t b =
+ (((const struct unaligned_32 *) (pixels + 1))->l);
+ uint32_t l0 =
+ (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL;
+ uint32_t h0 =
+ ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+ uint32_t l1, h1;
+ pixels += line_size;
+ for (i = 0; i < h; i += 2) {
+ uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
+ uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
+ l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
+ h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+ *((uint32_t *) block) =
+ h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
+ pixels += line_size;
+ block += line_size;
+ a = (((const struct unaligned_32 *) (pixels))->l);
+ b = (((const struct unaligned_32 *) (pixels + 1))->l);
+ l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL;
+ h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+ *((uint32_t *) block) =
+ h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
+ pixels += line_size;
+ block += line_size;
+ } pixels += 4 - line_size * (h + 1);
+ block += 4 - line_size * h;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
+
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+ register int i;
+ register vector unsigned char
+ pixelsv1, pixelsv2, pixelsv3, pixelsv4;
+ register vector unsigned char
+ blockv, temp1, temp2;
+ register vector unsigned short
+ pixelssum1, pixelssum2, temp3,
+ pixelssum3, pixelssum4, temp4;
+ register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
+ register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
+ register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
+
+ temp1 = vec_ld(0, pixels);
+ temp2 = vec_ld(16, pixels);
+ pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
+ if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
+ {
+ pixelsv2 = temp2;
+ }
+ else
+ {
+ pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
+ }
+ pixelsv3 = vec_mergel(vczero, pixelsv1);
+ pixelsv4 = vec_mergel(vczero, pixelsv2);
+ pixelsv1 = vec_mergeh(vczero, pixelsv1);
+ pixelsv2 = vec_mergeh(vczero, pixelsv2);
+ pixelssum3 = vec_add((vector unsigned short)pixelsv3,
+ (vector unsigned short)pixelsv4);
+ pixelssum3 = vec_add(pixelssum3, vcone);
+ pixelssum1 = vec_add((vector unsigned short)pixelsv1,
+ (vector unsigned short)pixelsv2);
+ pixelssum1 = vec_add(pixelssum1, vcone);
+
+POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
+ for (i = 0; i < h ; i++) {
+ blockv = vec_ld(0, block);
+
+ temp1 = vec_ld(line_size, pixels);
+ temp2 = vec_ld(line_size + 16, pixels);
+ pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
+ if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F)
+ {
+ pixelsv2 = temp2;
+ }
+ else
+ {
+ pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
+ }
+
+ pixelsv3 = vec_mergel(vczero, pixelsv1);
+ pixelsv4 = vec_mergel(vczero, pixelsv2);
+ pixelsv1 = vec_mergeh(vczero, pixelsv1);
+ pixelsv2 = vec_mergeh(vczero, pixelsv2);
+
+ pixelssum4 = vec_add((vector unsigned short)pixelsv3,
+ (vector unsigned short)pixelsv4);
+ pixelssum2 = vec_add((vector unsigned short)pixelsv1,
+ (vector unsigned short)pixelsv2);
+ temp4 = vec_add(pixelssum3, pixelssum4);
+ temp4 = vec_sra(temp4, vctwo);
+ temp3 = vec_add(pixelssum1, pixelssum2);
+ temp3 = vec_sra(temp3, vctwo);
+
+ pixelssum3 = vec_add(pixelssum4, vcone);
+ pixelssum1 = vec_add(pixelssum2, vcone);
+
+ blockv = vec_packsu(temp3, temp4);
+
+ vec_st(blockv, 0, block);
+
+ block += line_size;
+ pixels += line_size;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+}
int has_altivec(void)
{
-#if CONFIG_DARWIN
+#ifdef CONFIG_DARWIN
int sels[2] = {CTL_HW, HW_VECTORUNIT};
int has_vu = 0;
size_t len = sizeof(has_vu);
@@ -486,7 +1288,25 @@ int has_altivec(void)
err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
if (err == 0) return (has_vu != 0);
-#endif
+#else /* CONFIG_DARWIN */
+/* no Darwin, do it the brute-force way */
+/* this is borrowed from the libmpeg2 library */
+ {
+ signal (SIGILL, sigill_handler);
+ if (sigsetjmp (jmpbuf, 1)) {
+ signal (SIGILL, SIG_DFL);
+ } else {
+ canjump = 1;
+
+ asm volatile ("mtspr 256, %0\n\t"
+ "vand %%v0, %%v0, %%v0"
+ :
+ : "r" (-1));
+
+ signal (SIGILL, SIG_DFL);
+ return 1;
+ }
+ }
+#endif /* CONFIG_DARWIN */
return 0;
}
-
diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h
index d4d259d9e..61dbec548 100644
--- a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h
+++ b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h
@@ -17,14 +17,79 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#ifndef _DSPUTIL_ALTIVEC_
+#define _DSPUTIL_ALTIVEC_
+
+#include "dsputil_ppc.h"
+
+#ifdef HAVE_ALTIVEC
+
extern int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
extern int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
extern int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
extern int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
extern int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
+extern int sad16x16_altivec(void *s, uint8_t *a, uint8_t *b, int stride);
+extern int sad8x8_altivec(void *s, uint8_t *a, uint8_t *b, int stride);
extern int pix_norm1_altivec(uint8_t *pix, int line_size);
+extern int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size);
+extern int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size);
extern int pix_sum_altivec(UINT8 * pix, int line_size);
extern void diff_pixels_altivec(DCTELEM* block, const UINT8* s1, const UINT8* s2, int stride);
extern void get_pixels_altivec(DCTELEM* block, const UINT8 * pixels, int line_size);
+extern void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w);
+extern void put_pixels_clamped_altivec(const DCTELEM *block, UINT8 *restrict pixels, int line_size);
+extern void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+extern void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+extern void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h);
+extern void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+extern void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+extern void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h);
+extern void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h);
+
+extern void gmc1_altivec(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, int rounder);
+
extern int has_altivec(void);
+
+// used to build registers permutation vectors (vcprm)
+// the 's' are for words in the _s_econd vector
+#define WORD_0 0x00,0x01,0x02,0x03
+#define WORD_1 0x04,0x05,0x06,0x07
+#define WORD_2 0x08,0x09,0x0a,0x0b
+#define WORD_3 0x0c,0x0d,0x0e,0x0f
+#define WORD_s0 0x10,0x11,0x12,0x13
+#define WORD_s1 0x14,0x15,0x16,0x17
+#define WORD_s2 0x18,0x19,0x1a,0x1b
+#define WORD_s3 0x1c,0x1d,0x1e,0x1f
+
+#ifdef CONFIG_DARWIN
+#define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d)
+#else
+#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}
+#endif
+
+// vcprmle is used to keep the same index as in the SSE version.
+// it's the same as vcprm, with the index inversed
+// ('le' is Little Endian)
+#define vcprmle(a,b,c,d) vcprm(d,c,b,a)
+
+// used to build inverse/identity vectors (vcii)
+// n is _n_egative, p is _p_ositive
+#define FLOAT_n -1.
+#define FLOAT_p 1.
+
+
+#ifdef CONFIG_DARWIN
+#define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d)
+#else
+#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
+#endif
+
+#else /* HAVE_ALTIVEC */
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+#error "I can't use ALTIVEC_USE_REFERENCE_C_CODE if I don't use HAVE_ALTIVEC"
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+#endif /* HAVE_ALTIVEC */
+
+#endif /* _DSPUTIL_ALTIVEC_ */
diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c
index 733d0c156..c502f5819 100644
--- a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c
+++ b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c
@@ -19,18 +19,168 @@
#include "../dsputil.h"
+#include "dsputil_ppc.h"
+
#ifdef HAVE_ALTIVEC
#include "dsputil_altivec.h"
#endif
int mm_flags = 0;
+int mm_support(void)
+{
+ int result = 0;
+#if HAVE_ALTIVEC
+ if (has_altivec()) {
+ result |= MM_ALTIVEC;
+ }
+#endif /* result */
+ return result;
+}
+
+#ifdef POWERPC_TBL_PERFORMANCE_REPORT
+unsigned long long perfdata[powerpc_perf_total][powerpc_data_total];
+/* list below must match enum in dsputil_ppc.h */
+static unsigned char* perfname[] = {
+ "fft_calc_altivec",
+ "gmc1_altivec",
+ "dct_unquantize_h263_altivec",
+ "idct_add_altivec",
+ "idct_put_altivec",
+ "put_pixels16_altivec",
+ "avg_pixels16_altivec",
+ "avg_pixels8_altivec",
+ "put_pixels8_xy2_altivec",
+ "put_no_rnd_pixels8_xy2_altivec",
+ "put_pixels16_xy2_altivec",
+ "put_no_rnd_pixels16_xy2_altivec",
+ "clear_blocks_dcbz32_ppc"
+};
+#ifdef POWERPC_PERF_USE_PMC
+unsigned long long perfdata_miss[powerpc_perf_total][powerpc_data_total];
+#endif
+#include <stdio.h>
+#endif
+
+#ifdef POWERPC_TBL_PERFORMANCE_REPORT
+void powerpc_display_perf_report(void)
+{
+ int i;
+#ifndef POWERPC_PERF_USE_PMC
+ fprintf(stderr, "PowerPC performance report\n Values are from the Time Base register, and represent 4 bus cycles.\n");
+#else /* POWERPC_PERF_USE_PMC */
+ fprintf(stderr, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
+#endif /* POWERPC_PERF_USE_PMC */
+ for(i = 0 ; i < powerpc_perf_total ; i++)
+ {
+ if (perfdata[i][powerpc_data_num] != (unsigned long long)0)
+ fprintf(stderr, " Function \"%s\" (pmc1):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
+ perfname[i],
+ perfdata[i][powerpc_data_min],
+ perfdata[i][powerpc_data_max],
+ (double)perfdata[i][powerpc_data_sum] /
+ (double)perfdata[i][powerpc_data_num],
+ perfdata[i][powerpc_data_num]);
+#ifdef POWERPC_PERF_USE_PMC
+ if (perfdata_miss[i][powerpc_data_num] != (unsigned long long)0)
+ fprintf(stderr, " Function \"%s\" (pmc2):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
+ perfname[i],
+ perfdata_miss[i][powerpc_data_min],
+ perfdata_miss[i][powerpc_data_max],
+ (double)perfdata_miss[i][powerpc_data_sum] /
+ (double)perfdata_miss[i][powerpc_data_num],
+ perfdata_miss[i][powerpc_data_num]);
+#endif
+ }
+}
+#endif /* POWERPC_TBL_PERFORMANCE_REPORT */
+
+/* ***** WARNING ***** WARNING ***** WARNING ***** */
+/*
+ clear_blocks_dcbz32_ppc will not work properly
+ on PowerPC processors with a cache line size
+ not equal to 32 bytes.
+ Fortunately all processor used by Apple up to
+ at least the 7450 (aka second generation G4)
+ use 32 bytes cache line.
+ This is due to the use of the 'dcbz' instruction.
+ It simply clear to zero a single cache line,
+ so you need to know the cache line size to use it !
+ It's absurd, but it's fast...
+*/
+void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
+{
+POWERPC_TBL_DECLARE(powerpc_clear_blocks_dcbz32, 1);
+ register int misal = ((unsigned long)blocks & 0x00000010);
+ register int i = 0;
+POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
+#if 1
+ if (misal) {
+ ((unsigned long*)blocks)[0] = 0L;
+ ((unsigned long*)blocks)[1] = 0L;
+ ((unsigned long*)blocks)[2] = 0L;
+ ((unsigned long*)blocks)[3] = 0L;
+ i += 16;
+ }
+ for ( ; i < sizeof(DCTELEM)*6*64 ; i += 32) {
+ asm volatile("dcbz %0,%1" : : "r" (blocks), "r" (i) : "memory");
+ }
+ if (misal) {
+ ((unsigned long*)blocks)[188] = 0L;
+ ((unsigned long*)blocks)[189] = 0L;
+ ((unsigned long*)blocks)[190] = 0L;
+ ((unsigned long*)blocks)[191] = 0L;
+ i += 16;
+ }
+#else
+ memset(blocks, 0, sizeof(DCTELEM)*6*64);
+#endif
+POWERPC_TBL_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
+}
+
+/* check dcbz report how many bytes are set to 0 by dcbz */
+long check_dcbz_effect(void)
+{
+ register char *fakedata = (char*)av_malloc(1024);
+ register char *fakedata_middle;
+ register long zero = 0;
+ register long i = 0;
+ long count = 0;
+
+ if (!fakedata)
+ {
+ return 0L;
+ }
+
+ fakedata_middle = (fakedata + 512);
+
+ memset(fakedata, 0xFF, 1024);
+
+ asm volatile("dcbz %0, %1" : : "r" (fakedata_middle), "r" (zero));
+
+ for (i = 0; i < 1024 ; i ++)
+ {
+ if (fakedata[i] == (char)0)
+ count++;
+ }
+
+ av_free(fakedata);
+
+ return count;
+}
+
void dsputil_init_ppc(DSPContext* c, unsigned mask)
{
// Common optimisations whether Altivec or not
- // ... pending ...
-
+ switch (check_dcbz_effect()) {
+ case 32:
+ c->clear_blocks = clear_blocks_dcbz32_ppc;
+ break;
+ default:
+ break;
+ }
+
#if HAVE_ALTIVEC
if (has_altivec()) {
mm_flags |= MM_ALTIVEC;
@@ -41,12 +191,51 @@ void dsputil_init_ppc(DSPContext* c, unsigned mask)
c->pix_abs16x16_xy2 = pix_abs16x16_xy2_altivec;
c->pix_abs16x16 = pix_abs16x16_altivec;
c->pix_abs8x8 = pix_abs8x8_altivec;
+ c->sad[0]= sad16x16_altivec;
+ c->sad[1]= sad8x8_altivec;
c->pix_norm1 = pix_norm1_altivec;
+ c->sse[1]= sse8_altivec;
+ c->sse[0]= sse16_altivec;
c->pix_sum = pix_sum_altivec;
c->diff_pixels = diff_pixels_altivec;
c->get_pixels = get_pixels_altivec;
+// next one disabled as it's untested.
+#if 0
+ c->add_bytes= add_bytes_altivec;
+#endif /* 0 */
+ c->put_pixels_tab[0][0] = put_pixels16_altivec;
+ c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
+// next one disabled as it's untested.
+#if 0
+ c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
+#endif /* 0 */
+ c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
+ c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
+ c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
+ c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
+
+ c->gmc1 = gmc1_altivec;
+
+#ifdef POWERPC_TBL_PERFORMANCE_REPORT
+ {
+ int i;
+ for (i = 0 ; i < powerpc_perf_total ; i++)
+ {
+ perfdata[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
+ perfdata[i][powerpc_data_max] = 0x0000000000000000;
+ perfdata[i][powerpc_data_sum] = 0x0000000000000000;
+ perfdata[i][powerpc_data_num] = 0x0000000000000000;
+#ifdef POWERPC_PERF_USE_PMC
+ perfdata_miss[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
+ perfdata_miss[i][powerpc_data_max] = 0x0000000000000000;
+ perfdata_miss[i][powerpc_data_sum] = 0x0000000000000000;
+ perfdata_miss[i][powerpc_data_num] = 0x0000000000000000;
+#endif /* POWERPC_PERF_USE_PMC */
+ }
+ }
+#endif /* POWERPC_TBL_PERFORMANCE_REPORT */
} else
-#endif
+#endif /* HAVE_ALTIVEC */
{
// Non-AltiVec PPC optimisations
diff --git a/src/libffmpeg/libavcodec/ppc/fft_altivec.c b/src/libffmpeg/libavcodec/ppc/fft_altivec.c
index 1a926b77c..992be5b8e 100644
--- a/src/libffmpeg/libavcodec/ppc/fft_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/fft_altivec.c
@@ -1,7 +1,7 @@
/*
* FFT/IFFT transforms
* AltiVec-enabled
- * Copyright (c) 2002 Romain Dolbeau <romain@dolbeau.org>
+ * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org>
* Based on code Copyright (c) 2002 Fabrice Bellard.
*
* This library is free software; you can redistribute it and/or
@@ -22,30 +22,30 @@
#include "dsputil_altivec.h"
-// used to build registers permutation vectors (vcprm)
-// the 's' are for words in the _s_econd vector
-#define WORD_0 0x00,0x01,0x02,0x03
-#define WORD_1 0x04,0x05,0x06,0x07
-#define WORD_2 0x08,0x09,0x0a,0x0b
-#define WORD_3 0x0c,0x0d,0x0e,0x0f
-#define WORD_s0 0x10,0x11,0x12,0x13
-#define WORD_s1 0x14,0x15,0x16,0x17
-#define WORD_s2 0x18,0x19,0x1a,0x1b
-#define WORD_s3 0x1c,0x1d,0x1e,0x1f
-
-#define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d)
-
-// vcprmle is used to keep the same index as in the SSE version.
-// it's the same as vcprm, with the index inversed
-// ('le' is Little Endian)
-#define vcprmle(a,b,c,d) vcprm(d,c,b,a)
-
-// used to build inverse/identity vectors (vcii)
-// n is _n_egative, p is _p_ositive
-#define FLOAT_n -1.
-#define FLOAT_p 1.
+/*
+ those three macros are from libavcodec/fft.c
+ and are required for the reference C code
+*/
+/* butter fly op */
+#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
+{\
+ FFTSample ax, ay, bx, by;\
+ bx=pre1;\
+ by=pim1;\
+ ax=qre1;\
+ ay=qim1;\
+ pre = (bx + ax);\
+ pim = (by + ay);\
+ qre = (bx - ax);\
+ qim = (by - ay);\
+}
+#define MUL16(a,b) ((a) * (b))
+#define CMUL(pre, pim, are, aim, bre, bim) \
+{\
+ pre = (MUL16(are, bre) - MUL16(aim, bim));\
+ pim = (MUL16(are, bim) + MUL16(bre, aim));\
+}
-#define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d)
/**
* Do a complex FFT with the parameters defined in fft_init(). The
@@ -55,20 +55,94 @@
* This code assumes that the 'z' pointer is 16 bytes-aligned
* It also assumes all FFTComplex are 8 bytes-aligned pair of float
* The code is exactly the same as the SSE version, except
- * that successive MUL + ADD/SUB have been fusionned into
+ * that successive MUL + ADD/SUB have been merged into
* fused multiply-add ('vec_madd' in altivec)
- *
- * To test this code you can use fft-test in libavcodec ; use
- * the following line in libavcodec to compile (MacOS X):
- * #####
- * gcc -I. -Ippc -no-cpp-precomp -pipe -O3 -fomit-frame-pointer -mdynamic-no-pic -Wall
- * -faltivec -DARCH_POWERPC -DHAVE_ALTIVEC -DCONFIG_DARWIN fft-test.c fft.c
- * ppc/fft_altivec.c ppc/dsputil_altivec.c mdct.c -DHAVE_LRINTF -o fft-test
- * #####
*/
void fft_calc_altivec(FFTContext *s, FFTComplex *z)
{
- register const vector float vczero = (vector float)( 0., 0., 0., 0.);
+POWERPC_TBL_DECLARE(altivec_fft_num, s->nbits >= 6);
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+ int ln = s->nbits;
+ int j, np, np2;
+ int nblocks, nloops;
+ register FFTComplex *p, *q;
+ FFTComplex *exptab = s->exptab;
+ int l;
+ FFTSample tmp_re, tmp_im;
+
+POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6);
+
+ np = 1 << ln;
+
+ /* pass 0 */
+
+ p=&z[0];
+ j=(np >> 1);
+ do {
+ BF(p[0].re, p[0].im, p[1].re, p[1].im,
+ p[0].re, p[0].im, p[1].re, p[1].im);
+ p+=2;
+ } while (--j != 0);
+
+ /* pass 1 */
+
+
+ p=&z[0];
+ j=np >> 2;
+ if (s->inverse) {
+ do {
+ BF(p[0].re, p[0].im, p[2].re, p[2].im,
+ p[0].re, p[0].im, p[2].re, p[2].im);
+ BF(p[1].re, p[1].im, p[3].re, p[3].im,
+ p[1].re, p[1].im, -p[3].im, p[3].re);
+ p+=4;
+ } while (--j != 0);
+ } else {
+ do {
+ BF(p[0].re, p[0].im, p[2].re, p[2].im,
+ p[0].re, p[0].im, p[2].re, p[2].im);
+ BF(p[1].re, p[1].im, p[3].re, p[3].im,
+ p[1].re, p[1].im, p[3].im, -p[3].re);
+ p+=4;
+ } while (--j != 0);
+ }
+ /* pass 2 .. ln-1 */
+
+ nblocks = np >> 3;
+ nloops = 1 << 2;
+ np2 = np >> 1;
+ do {
+ p = z;
+ q = z + nloops;
+ for (j = 0; j < nblocks; ++j) {
+ BF(p->re, p->im, q->re, q->im,
+ p->re, p->im, q->re, q->im);
+
+ p++;
+ q++;
+ for(l = nblocks; l < np2; l += nblocks) {
+ CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
+ BF(p->re, p->im, q->re, q->im,
+ p->re, p->im, tmp_re, tmp_im);
+ p++;
+ q++;
+ }
+
+ p += nloops;
+ q += nloops;
+ }
+ nblocks = nblocks >> 1;
+ nloops = nloops << 1;
+ } while (nblocks != 0);
+
+POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
+
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+#ifdef CONFIG_DARWIN
+ register const vector float vczero = (const vector float)(0.);
+#else
+ register const vector float vczero = (const vector float){0.,0.,0.,0.};
+#endif
int ln = s->nbits;
int j, np, np2;
@@ -77,6 +151,8 @@ void fft_calc_altivec(FFTContext *s, FFTComplex *z)
FFTComplex *cptr, *cptr1;
int k;
+POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6);
+
np = 1 << ln;
{
@@ -162,5 +238,8 @@ void fft_calc_altivec(FFTContext *s, FFTComplex *z)
nblocks = nblocks >> 1;
nloops = nloops << 1;
} while (nblocks != 0);
-}
+POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
+
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+}
diff --git a/src/libffmpeg/libavcodec/ppc/idct_altivec.c b/src/libffmpeg/libavcodec/ppc/idct_altivec.c
index 8036d403f..1619f1731 100644
--- a/src/libffmpeg/libavcodec/ppc/idct_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/idct_altivec.c
@@ -38,6 +38,7 @@
#include <stdlib.h> /* malloc(), free() */
#include <string.h>
#include "../dsputil.h"
+#include "dsputil_altivec.h"
#define vector_s16_t vector signed short
#define vector_u16_t vector unsigned short
@@ -150,6 +151,8 @@
vx6 = vec_sra (vy6, shift); \
vx7 = vec_sra (vy7, shift);
+
+#ifdef CONFIG_DARWIN
static const vector_s16_t constants[5] = {
(vector_s16_t)(23170, 13573, 6518, 21895, -23170, -21895, 32, 31),
(vector_s16_t)(16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725),
@@ -157,11 +160,30 @@ static const vector_s16_t constants[5] = {
(vector_s16_t)(21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692),
(vector_s16_t)(19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722)
};
+#else
+// broken gcc
+static const vector_s16_t constants[5] = {
+ (vector_s16_t){23170, 13573, 6518, 21895, -23170, -21895, 32, 31},
+ (vector_s16_t){16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725},
+ (vector_s16_t){22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521},
+ (vector_s16_t){21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692},
+ (vector_s16_t){19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722}
+};
+#endif
void idct_put_altivec(uint8_t* dest, int stride, vector_s16_t* block)
{
+POWERPC_TBL_DECLARE(altivec_idct_put_num, 1);
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1);
+ void simple_idct_put(UINT8 *dest, int line_size, INT16 *block);
+ simple_idct_put(dest, stride, (INT16*)block);
+POWERPC_TBL_STOP_COUNT(altivec_idct_put_num, 1);
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
vector_u8_t tmp;
+POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1);
+
IDCT
#define COPY(dest,src) \
@@ -177,16 +199,28 @@ void idct_put_altivec(uint8_t* dest, int stride, vector_s16_t* block)
COPY (dest, vx5) dest += stride;
COPY (dest, vx6) dest += stride;
COPY (dest, vx7)
+
+POWERPC_TBL_STOP_COUNT(altivec_idct_put_num, 1);
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
void idct_add_altivec(uint8_t* dest, int stride, vector_s16_t* block)
{
+POWERPC_TBL_DECLARE(altivec_idct_add_num, 1);
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1);
+ void simple_idct_add(UINT8 *dest, int line_size, INT16 *block);
+ simple_idct_add(dest, stride, (INT16*)block);
+POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1);
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
vector_u8_t tmp;
vector_s16_t tmp2, tmp3;
vector_u8_t perm0;
vector_u8_t perm1;
vector_u8_t p0, p1, p;
+POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1);
+
IDCT
p0 = vec_lvsl (0, dest);
@@ -212,5 +246,8 @@ void idct_add_altivec(uint8_t* dest, int stride, vector_s16_t* block)
ADD (dest, vx5, perm1) dest += stride;
ADD (dest, vx6, perm0) dest += stride;
ADD (dest, vx7, perm1)
+
+POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1);
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
diff --git a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
index bcbc1e6ba..dd898e158 100644
--- a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
@@ -20,10 +20,7 @@
#include <stdio.h>
#include "../dsputil.h"
#include "../mpegvideo.h"
-
-
-// Used when initializing constant vectors
-#define FOUR_INSTANCES(x) x,x,x,x
+#include "dsputil_altivec.h"
// Swaps two variables (used for altivec registers)
#define SWAP(a,b) \
@@ -93,6 +90,13 @@ do { \
vec = vec_splat(vec, 0); \
}
+
+#ifdef CONFIG_DARWIN
+#define FOUROF(a) (a)
+#else
+// slower, for dumb non-apple GCC
+#define FOUROF(a) {a,a,a,a}
+#endif
int dct_quantize_altivec(MpegEncContext* s,
DCTELEM* data, int n,
int qscale, int* overflow)
@@ -100,7 +104,7 @@ int dct_quantize_altivec(MpegEncContext* s,
int lastNonZero;
vector float row0, row1, row2, row3, row4, row5, row6, row7;
vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7;
- const vector float zero = {FOUR_INSTANCES(0.0f)};
+ const vector float zero = (const vector float)FOUROF(0.);
// Load the data into the row/alt vectors
{
@@ -144,18 +148,18 @@ int dct_quantize_altivec(MpegEncContext* s,
// in the vector local variables, as floats, which we'll use during the
// quantize step...
{
- const vector float vec_0_298631336 = {FOUR_INSTANCES(0.298631336f)};
- const vector float vec_0_390180644 = {FOUR_INSTANCES(-0.390180644f)};
- const vector float vec_0_541196100 = {FOUR_INSTANCES(0.541196100f)};
- const vector float vec_0_765366865 = {FOUR_INSTANCES(0.765366865f)};
- const vector float vec_0_899976223 = {FOUR_INSTANCES(-0.899976223f)};
- const vector float vec_1_175875602 = {FOUR_INSTANCES(1.175875602f)};
- const vector float vec_1_501321110 = {FOUR_INSTANCES(1.501321110f)};
- const vector float vec_1_847759065 = {FOUR_INSTANCES(-1.847759065f)};
- const vector float vec_1_961570560 = {FOUR_INSTANCES(-1.961570560f)};
- const vector float vec_2_053119869 = {FOUR_INSTANCES(2.053119869f)};
- const vector float vec_2_562915447 = {FOUR_INSTANCES(-2.562915447f)};
- const vector float vec_3_072711026 = {FOUR_INSTANCES(3.072711026f)};
+ const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f);
+ const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f);
+ const vector float vec_0_541196100 = (vector float)FOUROF(0.541196100f);
+ const vector float vec_0_765366865 = (vector float)FOUROF(0.765366865f);
+ const vector float vec_0_899976223 = (vector float)FOUROF(-0.899976223f);
+ const vector float vec_1_175875602 = (vector float)FOUROF(1.175875602f);
+ const vector float vec_1_501321110 = (vector float)FOUROF(1.501321110f);
+ const vector float vec_1_847759065 = (vector float)FOUROF(-1.847759065f);
+ const vector float vec_1_961570560 = (vector float)FOUROF(-1.961570560f);
+ const vector float vec_2_053119869 = (vector float)FOUROF(2.053119869f);
+ const vector float vec_2_562915447 = (vector float)FOUROF(-2.562915447f);
+ const vector float vec_3_072711026 = (vector float)FOUROF(3.072711026f);
int whichPass, whichHalf;
@@ -309,7 +313,7 @@ int dct_quantize_altivec(MpegEncContext* s,
// rounding when we convert to int, instead of flooring.)
{
vector signed int biasInt;
- const vector float negOneFloat = (vector float)(FOUR_INSTANCES(-1.0f));
+ const vector float negOneFloat = (vector float)FOUROF(-1.0f);
LOAD4(biasInt, biasAddr);
bias = vec_ctf(biasInt, QUANT_BIAS_SHIFT);
negBias = vec_madd(bias, negOneFloat, zero);
@@ -506,4 +510,133 @@ int dct_quantize_altivec(MpegEncContext* s,
return lastNonZero;
}
+#undef FOUROF
+
+/*
+ AltiVec version of dct_unquantize_h263
+ this code assumes `block' is 16 bytes-aligned
+*/
+void dct_unquantize_h263_altivec(MpegEncContext *s,
+ DCTELEM *block, int n, int qscale)
+{
+POWERPC_TBL_DECLARE(altivec_dct_unquantize_h263_num, 1);
+ int i, level, qmul, qadd;
+ int nCoeffs;
+
+ assert(s->block_last_index[n]>=0);
+
+POWERPC_TBL_START_COUNT(altivec_dct_unquantize_h263_num, 1);
+
+ qadd = (qscale - 1) | 1;
+ qmul = qscale << 1;
+
+ if (s->mb_intra) {
+ if (!s->h263_aic) {
+ if (n < 4)
+ block[0] = block[0] * s->y_dc_scale;
+ else
+ block[0] = block[0] * s->c_dc_scale;
+ }else
+ qadd = 0;
+ i = 1;
+ nCoeffs= 63; //does not allways use zigzag table
+ } else {
+ i = 0;
+ nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
+ }
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+ for(;i<=nCoeffs;i++) {
+ level = block[i];
+ if (level) {
+ if (level < 0) {
+ level = level * qmul - qadd;
+ } else {
+ level = level * qmul + qadd;
+ }
+ block[i] = level;
+ }
+ }
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+ {
+ register const vector short vczero = (const vector short)vec_splat_s16(0);
+ short __attribute__ ((aligned(16))) qmul8[] =
+ {
+ qmul, qmul, qmul, qmul,
+ qmul, qmul, qmul, qmul
+ };
+ short __attribute__ ((aligned(16))) qadd8[] =
+ {
+ qadd, qadd, qadd, qadd,
+ qadd, qadd, qadd, qadd
+ };
+ short __attribute__ ((aligned(16))) nqadd8[] =
+ {
+ -qadd, -qadd, -qadd, -qadd,
+ -qadd, -qadd, -qadd, -qadd
+ };
+ register vector short blockv, qmulv, qaddv, nqaddv, temp1;
+ register vector bool short blockv_null, blockv_neg;
+ register short backup_0 = block[0];
+ register int j = 0;
+
+ qmulv = vec_ld(0, qmul8);
+ qaddv = vec_ld(0, qadd8);
+ nqaddv = vec_ld(0, nqadd8);
+
+#if 0 // block *is* 16 bytes-aligned, it seems.
+ // first make sure block[j] is 16 bytes-aligned
+ for(j = 0; (j <= nCoeffs) && ((((unsigned long)block) + (j << 1)) & 0x0000000F) ; j++) {
+ level = block[j];
+ if (level) {
+ if (level < 0) {
+ level = level * qmul - qadd;
+ } else {
+ level = level * qmul + qadd;
+ }
+ block[j] = level;
+ }
+ }
+#endif
+
+ // vectorize all the 16 bytes-aligned blocks
+ // of 8 elements
+ for(; (j + 7) <= nCoeffs ; j+=8)
+ {
+ blockv = vec_ld(j << 1, block);
+ blockv_neg = vec_cmplt(blockv, vczero);
+ blockv_null = vec_cmpeq(blockv, vczero);
+ // choose between +qadd or -qadd as the third operand
+ temp1 = vec_sel(qaddv, nqaddv, blockv_neg);
+ // multiply & add (block{i,i+7} * qmul [+-] qadd)
+ temp1 = vec_mladd(blockv, qmulv, temp1);
+ // put 0 where block[{i,i+7} used to have 0
+ blockv = vec_sel(temp1, blockv, blockv_null);
+ vec_st(blockv, j << 1, block);
+ }
+
+ // if nCoeffs isn't a multiple of 8, finish the job
+ // using good old scalar units.
+ // (we could do it using a truncated vector,
+ // but I'm not sure it's worth the hassle)
+ for(; j <= nCoeffs ; j++) {
+ level = block[j];
+ if (level) {
+ if (level < 0) {
+ level = level * qmul - qadd;
+ } else {
+ level = level * qmul + qadd;
+ }
+ block[j] = level;
+ }
+ }
+
+ if (i == 1)
+ { // cheat. this avoid special-casing the first iteration
+ block[0] = backup_0;
+ }
+ }
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+
+POWERPC_TBL_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63);
+}
diff --git a/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c b/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c
index 94d608b63..9757f5f39 100644
--- a/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c
+++ b/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c
@@ -27,6 +27,8 @@
extern int dct_quantize_altivec(MpegEncContext *s,
DCTELEM *block, int n,
int qscale, int *overflow);
+extern void dct_unquantize_h263_altivec(MpegEncContext *s,
+ DCTELEM *block, int n, int qscale);
extern void idct_put_altivec(UINT8 *dest, int line_size, INT16 *block);
extern void idct_add_altivec(UINT8 *dest, int line_size, INT16 *block);
@@ -42,7 +44,11 @@ void MPV_common_init_ppc(MpegEncContext *s)
{
s->idct_put = idct_put_altivec;
s->idct_add = idct_add_altivec;
+#ifndef ALTIVEC_USE_REFERENCE_C_CODE
s->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+ s->idct_permutation_type = FF_NO_IDCT_PERM;
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
// Test to make sure that the dct required alignments are met.
@@ -66,6 +72,7 @@ void MPV_common_init_ppc(MpegEncContext *s)
(s->avctx->dct_algo == FF_DCT_ALTIVEC))
{
s->dct_quantize = dct_quantize_altivec;
+ s->dct_unquantize_h263 = dct_unquantize_h263_altivec;
}
} else
#endif
diff --git a/src/libffmpeg/libavcodec/ratecontrol.c b/src/libffmpeg/libavcodec/ratecontrol.c
index bda408dfe..6bcbe1c67 100644
--- a/src/libffmpeg/libavcodec/ratecontrol.c
+++ b/src/libffmpeg/libavcodec/ratecontrol.c
@@ -751,8 +751,8 @@ static int init_pass2(MpegEncContext *s)
}
//printf("%lld %lld %lld %lld\n", available_bits[I_TYPE], available_bits[P_TYPE], available_bits[B_TYPE], all_available_bits);
- qscale= malloc(sizeof(double)*rcc->num_entries);
- blured_qscale= malloc(sizeof(double)*rcc->num_entries);
+ qscale= av_malloc(sizeof(double)*rcc->num_entries);
+ blured_qscale= av_malloc(sizeof(double)*rcc->num_entries);
for(step=256*256; step>0.0000001; step*=0.5){
expected_bits=0;
@@ -809,8 +809,8 @@ static int init_pass2(MpegEncContext *s)
// printf("%f %d %f\n", expected_bits, (int)all_available_bits, rate_factor);
if(expected_bits > all_available_bits) rate_factor-= step;
}
- free(qscale);
- free(blured_qscale);
+ av_free(qscale);
+ av_free(blured_qscale);
if(abs(expected_bits/all_available_bits - 1.0) > 0.01 ){
fprintf(stderr, "Error: 2pass curve failed to converge\n");
diff --git a/src/libffmpeg/libavcodec/rv10.c b/src/libffmpeg/libavcodec/rv10.c
index 4907c2347..012b1dc5c 100644
--- a/src/libffmpeg/libavcodec/rv10.c
+++ b/src/libffmpeg/libavcodec/rv10.c
@@ -395,7 +395,7 @@ static int rv10_decode_packet(AVCodecContext *avctx,
MpegEncContext *s = avctx->priv_data;
int i, mb_count, mb_pos, left;
- init_get_bits(&s->gb, buf, buf_size);
+ init_get_bits(&s->gb, buf, buf_size*8);
mb_count = rv10_decode_picture_header(s);
if (mb_count < 0) {
diff --git a/src/libffmpeg/libavcodec/simple_idct.c b/src/libffmpeg/libavcodec/simple_idct.c
index 8c9ce7b93..703e94f21 100644
--- a/src/libffmpeg/libavcodec/simple_idct.c
+++ b/src/libffmpeg/libavcodec/simple_idct.c
@@ -67,7 +67,7 @@
#endif
-static inline void idctRowCondDC (int16_t * row)
+static inline void idctRowCondDC (DCTELEM * row)
{
int a0, a1, a2, a3, b0, b1, b2, b3;
#ifdef FAST_64BIT
@@ -82,26 +82,40 @@ static inline void idctRowCondDC (int16_t * row)
#else
#define ROW0_MASK 0xffffLL
#endif
- if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
- ((uint64_t *)row)[1]) == 0) {
- temp = (row[0] << 3) & 0xffff;
- temp += temp << 16;
- temp += temp << 32;
- ((uint64_t *)row)[0] = temp;
- ((uint64_t *)row)[1] = temp;
- return;
- }
+ if(sizeof(DCTELEM)==2){
+ if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
+ ((uint64_t *)row)[1]) == 0) {
+ temp = (row[0] << 3) & 0xffff;
+ temp += temp << 16;
+ temp += temp << 32;
+ ((uint64_t *)row)[0] = temp;
+ ((uint64_t *)row)[1] = temp;
+ return;
+ }
+ }else{
+ if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
+ row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
+ return;
+ }
+ }
#else
- if (!(((uint32_t*)row)[1] |
- ((uint32_t*)row)[2] |
- ((uint32_t*)row)[3] |
- row[1])) {
- temp = (row[0] << 3) & 0xffff;
- temp += temp << 16;
- ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
- ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
- return;
- }
+ if(sizeof(DCTELEM)==2){
+ if (!(((uint32_t*)row)[1] |
+ ((uint32_t*)row)[2] |
+ ((uint32_t*)row)[3] |
+ row[1])) {
+ temp = (row[0] << 3) & 0xffff;
+ temp += temp << 16;
+ ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
+ ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
+ return;
+ }
+ }else{
+ if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
+ row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
+ return;
+ }
+ }
#endif
a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
@@ -159,7 +173,7 @@ static inline void idctRowCondDC (int16_t * row)
}
static inline void idctSparseColPut (UINT8 *dest, int line_size,
- int16_t * col)
+ DCTELEM * col)
{
int a0, a1, a2, a3, b0, b1, b2, b3;
UINT8 *cm = cropTbl + MAX_NEG_CROP;
@@ -231,7 +245,7 @@ static inline void idctSparseColPut (UINT8 *dest, int line_size,
}
static inline void idctSparseColAdd (UINT8 *dest, int line_size,
- int16_t * col)
+ DCTELEM * col)
{
int a0, a1, a2, a3, b0, b1, b2, b3;
UINT8 *cm = cropTbl + MAX_NEG_CROP;
@@ -302,7 +316,7 @@ static inline void idctSparseColAdd (UINT8 *dest, int line_size,
dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)];
}
-static inline void idctSparseCol (int16_t * col)
+static inline void idctSparseCol (DCTELEM * col)
{
int a0, a1, a2, a3, b0, b1, b2, b3;
@@ -365,7 +379,7 @@ static inline void idctSparseCol (int16_t * col)
col[56] = ((a0 - b0) >> COL_SHIFT);
}
-void simple_idct_put(UINT8 *dest, int line_size, INT16 *block)
+void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
{
int i;
for(i=0; i<8; i++)
@@ -375,7 +389,7 @@ void simple_idct_put(UINT8 *dest, int line_size, INT16 *block)
idctSparseColPut(dest + i, line_size, block + i);
}
-void simple_idct_add(UINT8 *dest, int line_size, INT16 *block)
+void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
{
int i;
for(i=0; i<8; i++)
@@ -385,7 +399,7 @@ void simple_idct_add(UINT8 *dest, int line_size, INT16 *block)
idctSparseColAdd(dest + i, line_size, block + i);
}
-void simple_idct(INT16 *block)
+void simple_idct(DCTELEM *block)
{
int i;
for(i=0; i<8; i++)
@@ -406,7 +420,7 @@ void simple_idct(INT16 *block)
and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
#define C_SHIFT (4+1+12)
-static inline void idct4col(UINT8 *dest, int line_size, const INT16 *col)
+static inline void idct4col(UINT8 *dest, int line_size, const DCTELEM *col)
{
int c0, c1, c2, c3, a0, a1, a2, a3;
const UINT8 *cm = cropTbl + MAX_NEG_CROP;
@@ -443,10 +457,10 @@ static inline void idct4col(UINT8 *dest, int line_size, const INT16 *col)
/* XXX: I think a 1.0/sqrt(2) normalization should be needed to
compensate the extra butterfly stage - I don't have the full DV
specification */
-void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block)
+void simple_idct248_put(UINT8 *dest, int line_size, DCTELEM *block)
{
int i;
- INT16 *ptr;
+ DCTELEM *ptr;
/* butterfly */
ptr = block;
@@ -486,7 +500,7 @@ void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block)
#define C2 C_FIX(0.2705980501)
#define C3 C_FIX(0.5)
#define C_SHIFT (4+1+12)
-static inline void idct4col_add(UINT8 *dest, int line_size, const INT16 *col)
+static inline void idct4col_add(UINT8 *dest, int line_size, const DCTELEM *col)
{
int c0, c1, c2, c3, a0, a1, a2, a3;
const UINT8 *cm = cropTbl + MAX_NEG_CROP;
@@ -514,7 +528,7 @@ static inline void idct4col_add(UINT8 *dest, int line_size, const INT16 *col)
#define R2 R_FIX(0.2705980501)
#define R3 R_FIX(0.5)
#define R_SHIFT 11
-static inline void idct4row(INT16 *row)
+static inline void idct4row(DCTELEM *row)
{
int c0, c1, c2, c3, a0, a1, a2, a3;
const UINT8 *cm = cropTbl + MAX_NEG_CROP;
@@ -533,7 +547,7 @@ static inline void idct4row(INT16 *row)
row[3]= (c0 - c1) >> R_SHIFT;
}
-void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block)
+void simple_idct84_add(UINT8 *dest, int line_size, DCTELEM *block)
{
int i;
@@ -548,7 +562,7 @@ void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block)
}
}
-void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block)
+void simple_idct48_add(UINT8 *dest, int line_size, DCTELEM *block)
{
int i;
diff --git a/src/libffmpeg/libavcodec/simple_idct.h b/src/libffmpeg/libavcodec/simple_idct.h
index 428c6072c..0ee1e05ed 100644
--- a/src/libffmpeg/libavcodec/simple_idct.h
+++ b/src/libffmpeg/libavcodec/simple_idct.h
@@ -18,14 +18,14 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-void simple_idct_put(UINT8 *dest, int line_size, INT16 *block);
-void simple_idct_add(UINT8 *dest, int line_size, INT16 *block);
-void ff_simple_idct_mmx(short *block);
-void ff_simple_idct_add_mmx(UINT8 *dest, int line_size, INT16 *block);
-void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, INT16 *block);
-void simple_idct(short *block);
+void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block);
+void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block);
+void ff_simple_idct_mmx(int16_t *block);
+void ff_simple_idct_add_mmx(UINT8 *dest, int line_size, int16_t *block);
+void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, int16_t *block);
+void simple_idct(DCTELEM *block);
-void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block);
+void simple_idct248_put(UINT8 *dest, int line_size, DCTELEM *block);
-void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block);
-void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block);
+void simple_idct84_add(UINT8 *dest, int line_size, DCTELEM *block);
+void simple_idct48_add(UINT8 *dest, int line_size, DCTELEM *block);
diff --git a/src/libffmpeg/libavcodec/svq1.c b/src/libffmpeg/libavcodec/svq1.c
index 77035f1f9..5a9a290b8 100644
--- a/src/libffmpeg/libavcodec/svq1.c
+++ b/src/libffmpeg/libavcodec/svq1.c
@@ -1066,7 +1066,7 @@ static int svq1_decode_frame(AVCodecContext *avctx,
AVFrame *pict = data;
/* initialize bit buffer */
- init_get_bits(&s->gb,buf,buf_size);
+ init_get_bits(&s->gb,buf,buf_size*8);
/* decode frame header */
s->f_code = get_bits (&s->gb, 22);
@@ -1093,6 +1093,10 @@ static int svq1_decode_frame(AVCodecContext *avctx,
return result;
}
+ //FIXME this avoids some confusion for "B frames" without 2 references
+ //this should be removed after libavcodec can handle more flaxible picture types & ordering
+ if(s->pict_type==B_TYPE && s->last_picture.data[0]==NULL) return buf_size;
+
if(avctx->hurry_up && s->pict_type==B_TYPE) return buf_size;
if(MPV_frame_start(s, avctx) < 0)
diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c
index af6ba986b..ca71807f7 100644
--- a/src/libffmpeg/libavcodec/utils.c
+++ b/src/libffmpeg/libavcodec/utils.c
@@ -24,8 +24,6 @@ void *av_mallocz(unsigned int size)
{
void *ptr;
- if(size == 0) fprintf(stderr, "Warning, allocating 0 bytes\n");
-
ptr = av_malloc(size);
if (!ptr)
return NULL;
@@ -33,6 +31,32 @@ void *av_mallocz(unsigned int size)
return ptr;
}
+char *av_strdup(const char *s)
+{
+ char *ptr;
+ int len;
+ len = strlen(s) + 1;
+ ptr = av_malloc(len);
+ if (!ptr)
+ return NULL;
+ memcpy(ptr, s, len);
+ return ptr;
+}
+
+/**
+ * realloc which does nothing if the block is large enough
+ */
+void *av_fast_realloc(void *ptr, int *size, int min_size)
+{
+ if(min_size < *size)
+ return ptr;
+
+ *size= min_size + 10*1024;
+
+ return av_realloc(ptr, *size);
+}
+
+
/* allocation of static arrays - do not use for normal allocation */
static unsigned int last_static = 0;
static char*** array_static = NULL;
@@ -47,7 +71,7 @@ void *__av_mallocz_static(void** location, unsigned int size)
if (location)
{
if (l > last_static)
- array_static = realloc(array_static, l);
+ array_static = av_realloc(array_static, l);
array_static[last_static++] = (char**) location;
*location = ptr;
}
@@ -61,10 +85,10 @@ void av_free_static()
unsigned i;
for (i = 0; i < last_static; i++)
{
- free(*array_static[i]);
+ av_free(*array_static[i]);
*array_static[i] = NULL;
}
- free(array_static);
+ av_free(array_static);
array_static = 0;
}
last_static = 0;
@@ -89,32 +113,6 @@ void register_avcodec(AVCodec *format)
format->next = NULL;
}
-void avcodec_get_chroma_sub_sample(int fmt, int *h_shift, int *v_shift){
- switch(fmt){
- case PIX_FMT_YUV410P:
- *h_shift=2;
- *v_shift=2;
- break;
- case PIX_FMT_YUV420P:
- *h_shift=1;
- *v_shift=1;
- break;
- case PIX_FMT_YUV411P:
- *h_shift=2;
- *v_shift=0;
- break;
- case PIX_FMT_YUV422P:
- case PIX_FMT_YUV422:
- *h_shift=1;
- *v_shift=0;
- break;
- default: //RGB/...
- *h_shift=0;
- *v_shift=0;
- break;
- }
-}
-
typedef struct DefaultPicOpaque{
int last_pic_num;
uint8_t *data[4];
@@ -125,10 +123,10 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
const int width = s->width;
const int height= s->height;
DefaultPicOpaque *opaque;
-
+/*
assert(pic->data[0]==NULL);
- /* assert(pic->type==0 || pic->type==FF_TYPE_INTERNAL); */
-
+ assert(pic->type==0 || pic->type==FF_TYPE_INTERNAL);
+*/
if(pic->opaque){
opaque= (DefaultPicOpaque *)pic->opaque;
for(i=0; i<3; i++)
@@ -152,7 +150,6 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
case PIX_FMT_BGR24:
pixel_size=3;
break;
- case PIX_FMT_BGRA32:
case PIX_FMT_RGBA32:
pixel_size=4;
break;
@@ -212,6 +209,10 @@ void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic){
//printf("R%X\n", pic->opaque);
}
+enum PixelFormat avcodec_default_get_format(struct AVCodecContext *s, enum PixelFormat * fmt){
+ return fmt[0];
+}
+
void avcodec_get_context_defaults(AVCodecContext *s){
s->bit_rate= 800*1000;
s->bit_rate_tolerance= s->bit_rate*10;
@@ -234,6 +235,7 @@ void avcodec_get_context_defaults(AVCodecContext *s){
s->me_method= ME_EPZS;
s->get_buffer= avcodec_default_get_buffer;
s->release_buffer= avcodec_default_release_buffer;
+ s->get_format= avcodec_default_get_format;
s->me_subpel_quality=8;
}
@@ -410,19 +412,6 @@ AVCodec *avcodec_find(enum CodecID id)
return NULL;
}
-const char *pix_fmt_str[] = {
- "yuv420p",
- "yuv422",
- "rgb24",
- "bgr24",
- "yuv422p",
- "yuv444p",
- "rgba32",
- "bgra32",
- "yuv410p",
- "yuv411p",
-};
-
void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
{
const char *codec_name;
@@ -462,7 +451,7 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
if (enc->codec_id == CODEC_ID_RAWVIDEO) {
snprintf(buf + strlen(buf), buf_size - strlen(buf),
", %s",
- pix_fmt_str[enc->pix_fmt]);
+ avcodec_get_pix_fmt_name(enc->pix_fmt));
}
if (enc->width) {
snprintf(buf + strlen(buf), buf_size - strlen(buf),
@@ -537,99 +526,6 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
}
}
-/* Picture field are filled with 'ptr' addresses */
-void avpicture_fill(AVPicture *picture, UINT8 *ptr,
- int pix_fmt, int width, int height)
-{
- int size;
-
- size = width * height;
- switch(pix_fmt) {
- case PIX_FMT_YUV420P:
- picture->data[0] = ptr;
- picture->data[1] = picture->data[0] + size;
- picture->data[2] = picture->data[1] + size / 4;
- picture->linesize[0] = width;
- picture->linesize[1] = width / 2;
- picture->linesize[2] = width / 2;
- break;
- case PIX_FMT_YUV422P:
- picture->data[0] = ptr;
- picture->data[1] = picture->data[0] + size;
- picture->data[2] = picture->data[1] + size / 2;
- picture->linesize[0] = width;
- picture->linesize[1] = width / 2;
- picture->linesize[2] = width / 2;
- break;
- case PIX_FMT_YUV444P:
- picture->data[0] = ptr;
- picture->data[1] = picture->data[0] + size;
- picture->data[2] = picture->data[1] + size;
- picture->linesize[0] = width;
- picture->linesize[1] = width;
- picture->linesize[2] = width;
- break;
- case PIX_FMT_RGB24:
- case PIX_FMT_BGR24:
- picture->data[0] = ptr;
- picture->data[1] = NULL;
- picture->data[2] = NULL;
- picture->linesize[0] = width * 3;
- break;
- case PIX_FMT_RGBA32:
- case PIX_FMT_BGRA32:
- picture->data[0] = ptr;
- picture->data[1] = NULL;
- picture->data[2] = NULL;
- picture->linesize[0] = width * 4;
- break;
- case PIX_FMT_YUV422:
- picture->data[0] = ptr;
- picture->data[1] = NULL;
- picture->data[2] = NULL;
- picture->linesize[0] = width * 2;
- break;
- default:
- picture->data[0] = NULL;
- picture->data[1] = NULL;
- picture->data[2] = NULL;
- break;
- }
-}
-
-int avpicture_get_size(int pix_fmt, int width, int height)
-{
- int size;
-
- size = width * height;
- switch(pix_fmt) {
- case PIX_FMT_YUV420P:
- size = (size * 3) / 2;
- break;
- case PIX_FMT_YUV422P:
- size = (size * 2);
- break;
- case PIX_FMT_YUV444P:
- size = (size * 3);
- break;
- case PIX_FMT_RGB24:
- case PIX_FMT_BGR24:
- size = (size * 3);
- break;
- case PIX_FMT_RGBA32:
- case PIX_FMT_BGRA32:
- size = (size * 4);
- break;
- case PIX_FMT_YUV422:
- size = (size * 2);
- break;
- default:
- size = -1;
- break;
- }
- return size;
-}
-
unsigned avcodec_version( void )
{
return LIBAVCODEC_VERSION_INT;
diff --git a/src/libffmpeg/libavcodec/wmadec.c b/src/libffmpeg/libavcodec/wmadec.c
index a6fa2f8b2..5305e1c5d 100644
--- a/src/libffmpeg/libavcodec/wmadec.c
+++ b/src/libffmpeg/libavcodec/wmadec.c
@@ -92,7 +92,7 @@ typedef struct WMADecodeContext {
int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
float coefs[MAX_CHANNELS][BLOCK_MAX_SIZE] __attribute__((aligned(16)));
MDCTContext mdct_ctx[BLOCK_NB_SIZES];
- float *windows[BLOCK_NB_SIZES] __attribute__((aligned(16)));
+ float *windows[BLOCK_NB_SIZES];
FFTSample mdct_tmp[BLOCK_MAX_SIZE] __attribute__((aligned(16))); /* temporary storage for imdct */
/* output buffer for one frame and the last for IMDCT windowing */
float frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2] __attribute__((aligned(16)));
@@ -212,8 +212,8 @@ static void init_coef_vlc(VLC *vlc,
init_vlc(vlc, 9, n, table_bits, 1, 1, table_codes, 4, 4);
- run_table = malloc(n * sizeof(uint16_t));
- level_table = malloc(n * sizeof(uint16_t));
+ run_table = av_malloc(n * sizeof(uint16_t));
+ level_table = av_malloc(n * sizeof(uint16_t));
p = levels_table;
i = 2;
level = 1;
@@ -1226,7 +1226,7 @@ static int wma_decode_superframe(AVCodecContext *avctx,
samples = data;
- init_get_bits(&s->gb, buf, buf_size);
+ init_get_bits(&s->gb, buf, buf_size*8);
if (s->use_bit_reservoir) {
/* read super frame header */
@@ -1252,7 +1252,7 @@ static int wma_decode_superframe(AVCodecContext *avctx,
}
/* XXX: bit_offset bits into last frame */
- init_get_bits(&s->gb, s->last_superframe, MAX_CODED_SUPERFRAME_SIZE);
+ init_get_bits(&s->gb, s->last_superframe, MAX_CODED_SUPERFRAME_SIZE*8);
/* skip unused bits */
if (s->last_bitoffset > 0)
skip_bits(&s->gb, s->last_bitoffset);
@@ -1265,7 +1265,7 @@ static int wma_decode_superframe(AVCodecContext *avctx,
/* read each frame starting from bit_offset */
pos = bit_offset + 4 + 4 + s->byte_offset_bits + 3;
- init_get_bits(&s->gb, buf + (pos >> 3), MAX_CODED_SUPERFRAME_SIZE - (pos >> 3));
+ init_get_bits(&s->gb, buf + (pos >> 3), (MAX_CODED_SUPERFRAME_SIZE - (pos >> 3))*8);
len = pos & 7;
if (len > 0)
skip_bits(&s->gb, len);
diff --git a/src/libffmpeg/libavcodec/wmv2.c b/src/libffmpeg/libavcodec/wmv2.c
index d25b7a5f1..6def6f2a8 100644
--- a/src/libffmpeg/libavcodec/wmv2.c
+++ b/src/libffmpeg/libavcodec/wmv2.c
@@ -313,7 +313,7 @@ static int decode_ext_header(Wmv2Context *w){
if(s->avctx->extradata_size<4) return -1;
- init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size);
+ init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size*8);
fps = get_bits(&gb, 5);
s->bit_rate = get_bits(&gb, 11)*1024;
@@ -330,8 +330,9 @@ static int decode_ext_header(Wmv2Context *w){
s->slice_height = s->mb_height / code;
if(s->avctx->debug&FF_DEBUG_PICT_INFO){
- printf("fps:%d, br:%d, qpbit:%d, abt_flag:%d, j_type_bit:%d, tl_mv_flag:%d, mbrl_bit:%d, code:%d, flag3:%d\n",
- fps, s->bit_rate, w->mspel_bit, w->abt_flag, w->j_type_bit, w->top_left_mv_flag, w->per_mb_rl_bit, code, w->flag3);
+ printf("fps:%d, br:%d, qpbit:%d, abt_flag:%d, j_type_bit:%d, tl_mv_flag:%d, mbrl_bit:%d, code:%d, flag3:%d, slices:%d\n",
+ fps, s->bit_rate, w->mspel_bit, w->abt_flag, w->j_type_bit, w->top_left_mv_flag, w->per_mb_rl_bit, code, w->flag3,
+ code);
}
return 0;
}
@@ -503,8 +504,7 @@ static int16_t *wmv2_pred_motion(Wmv2Context *w, int *px, int *py){
diff= FFMAX(ABS(A[0] - B[0]), ABS(A[1] - B[1]));
- if(s->mb_x && s->mb_y && !s->mspel && w->top_left_mv_flag && diff >= 8)
- //FIXME top/left bit too if y=!0 && first_slice_line?
+ if(s->mb_x && !s->first_slice_line && !s->mspel && w->top_left_mv_flag && diff >= 8)
type= get_bits1(&s->gb);
else
type= 2;
@@ -577,16 +577,7 @@ static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int st
MpegEncContext * const s= &w->s;
uint8_t temp[2][64];
int i;
-
- if(w->abt_type_table[n] && 0){
- int a,b;
- a= block1[0];
- b= w->abt_block2[n][0];
- block1[0]= a+b;
- w->abt_block2[n][0]= a-b;
- }
-
switch(w->abt_type_table[n]){
case 0:
if (s->block_last_index[n] >= 0) {
diff --git a/src/libffmpeg/xine_decoder.c b/src/libffmpeg/xine_decoder.c
index 8be7844c2..fe1aca44d 100644
--- a/src/libffmpeg/xine_decoder.c
+++ b/src/libffmpeg/xine_decoder.c
@@ -17,7 +17,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*
- * $Id: xine_decoder.c,v 1.87 2003/01/10 23:33:09 holstsn Exp $
+ * $Id: xine_decoder.c,v 1.88 2003/01/31 18:29:43 miguelfreitas Exp $
*
* xine decoder plugin using ffmpeg
*
@@ -667,7 +667,7 @@ void avcodec_register_all(void)
register_avcodec(&rv10_decoder);
register_avcodec(&svq1_decoder);
register_avcodec(&dvvideo_decoder);
- // register_avcodec(&dvaudio_decoder);
+ register_avcodec(&dvaudio_decoder);
register_avcodec(&mjpeg_decoder);
register_avcodec(&mjpegb_decoder);
register_avcodec(&mp2_decoder);
@@ -810,6 +810,16 @@ static void ff_audio_decode_data (audio_decoder_t *this_gen, buf_element_t *buf)
this->stream->meta_info[XINE_META_INFO_AUDIOCODEC]
= strdup ("Windows Media Audio v2 (ffmpeg)");
break;
+ case BUF_AUDIO_DV:
+ this->codec = avcodec_find_decoder (CODEC_ID_DVAUDIO);
+ this->stream->meta_info[XINE_META_INFO_AUDIOCODEC]
+ = strdup ("DV Audio (ffmpeg)");
+ break;
+ case BUF_AUDIO_MPEG:
+ this->codec = avcodec_find_decoder (CODEC_ID_MP3LAME);
+ this->stream->meta_info[XINE_META_INFO_AUDIOCODEC]
+ = strdup ("MP3 (ffmpeg)");
+ break;
}
if (!this->codec) {
@@ -1034,14 +1044,14 @@ static uint32_t supported_video_types[] = {
BUF_VIDEO_MSMPEG4_V2,
BUF_VIDEO_MSMPEG4_V3,
BUF_VIDEO_WMV7,
- /*BUF_VIDEO_WMV8,*/
+ /* BUF_VIDEO_WMV8, */
BUF_VIDEO_MPEG4,
BUF_VIDEO_XVID,
BUF_VIDEO_DIVX5,
BUF_VIDEO_MJPEG,
BUF_VIDEO_H263,
BUF_VIDEO_RV10,
- /* BUF_VIDEO_SORENSON_V1, -- ffmpeg svq1 decoder is segfaulting */
+ BUF_VIDEO_SORENSON_V1,
BUF_VIDEO_JPEG,
BUF_VIDEO_MPEG,
BUF_VIDEO_DV,
@@ -1051,6 +1061,8 @@ static uint32_t supported_video_types[] = {
static uint32_t supported_audio_types[] = {
BUF_AUDIO_WMAV1,
BUF_AUDIO_WMAV2,
+ BUF_AUDIO_DV,
+ /* BUF_AUDIO_MPEG, */
0
};
diff --git a/src/libmad/xine_decoder.c b/src/libmad/xine_decoder.c
index b53682387..a4f30eee3 100644
--- a/src/libmad/xine_decoder.c
+++ b/src/libmad/xine_decoder.c
@@ -17,7 +17,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*
- * $Id: xine_decoder.c,v 1.40 2003/01/11 11:29:22 esnel Exp $
+ * $Id: xine_decoder.c,v 1.41 2003/01/31 18:29:47 miguelfreitas Exp $
*
* stuff needed to turn libmad into a xine decoder plugin
*/
@@ -355,7 +355,7 @@ static uint32_t audio_types[] = {
static decoder_info_t dec_info_audio = {
audio_types, /* supported types */
- 5 /* priority */
+ 6 /* priority */
};
plugin_info_t xine_plugin_info[] = {
diff --git a/src/libxinevdec/svq1.c b/src/libxinevdec/svq1.c
index 935172ff2..c92795b6a 100644
--- a/src/libxinevdec/svq1.c
+++ b/src/libxinevdec/svq1.c
@@ -17,7 +17,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*
- * $Id: svq1.c,v 1.23 2003/01/08 01:02:32 miguelfreitas Exp $
+ * $Id: svq1.c,v 1.24 2003/01/31 18:29:47 miguelfreitas Exp $
*/
#include <stdio.h>
@@ -1495,7 +1495,7 @@ static uint32_t video_types[] = {
static decoder_info_t dec_info_video = {
video_types, /* supported types */
- 4 /* priority */
+ 6 /* priority */
};
plugin_info_t xine_plugin_info[] = {
diff --git a/src/xine-engine/buffer.h b/src/xine-engine/buffer.h
index 64bbaa8d2..057e3222b 100644
--- a/src/xine-engine/buffer.h
+++ b/src/xine-engine/buffer.h
@@ -17,7 +17,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*
- * $Id: buffer.h,v 1.98 2003/01/26 23:36:46 f1rmb Exp $
+ * $Id: buffer.h,v 1.99 2003/01/31 18:29:47 miguelfreitas Exp $
*
*
* contents:
@@ -193,6 +193,7 @@ extern "C" {
#define BUF_AUDIO_DIALOGIC_IMA 0x032A0000
#define BUF_AUDIO_NSF 0x032B0000
#define BUF_AUDIO_FLAC 0x032C0000
+#define BUF_AUDIO_DV 0x032D0000
/* spu buffer types: */
diff --git a/src/xine-engine/buffer_types.c b/src/xine-engine/buffer_types.c
index 5b82579ed..9850e4a68 100644
--- a/src/xine-engine/buffer_types.c
+++ b/src/xine-engine/buffer_types.c
@@ -17,7 +17,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*
- * $Id: buffer_types.c,v 1.52 2003/01/23 16:12:19 miguelfreitas Exp $
+ * $Id: buffer_types.c,v 1.53 2003/01/31 18:29:47 miguelfreitas Exp $
*
*
* contents:
@@ -802,6 +802,13 @@ static audio_db_t audio_db[] = {
BUF_AUDIO_FLAC,
"Free Lossless Audio Codec (FLAC)"
},
+{
+ {
+ 0
+ },
+ BUF_AUDIO_DV,
+ "DV Audio"
+},
{ { 0 }, 0, "last entry" }
};