diff options
author | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2003-01-31 18:29:43 +0000 |
---|---|---|
committer | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2003-01-31 18:29:43 +0000 |
commit | 5350f2b7701f01bc4f234d3971fb8a623a8cd72a (patch) | |
tree | 5f6cd350778863ad8d2612bce4ac2f6270919115 | |
parent | 8b0e8647a0d0c279b6a355362452dff4bd6f5c05 (diff) | |
download | xine-lib-5350f2b7701f01bc4f234d3971fb8a623a8cd72a.tar.gz xine-lib-5350f2b7701f01bc4f234d3971fb8a623a8cd72a.tar.bz2 |
update ffmpeg
CVS patchset: 4068
CVS date: 2003/01/31 18:29:43
46 files changed, 4210 insertions, 1710 deletions
diff --git a/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c index 706462a59..5cbc00167 100644 --- a/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c +++ b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c @@ -285,6 +285,16 @@ void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); } +static int sad16x16_mvi(void *s, uint8_t *a, uint8_t *b, int stride) +{ + return pix_abs16x16_mvi_asm(a, b, stride); +} + +static int sad8x8_mvi(void *s, uint8_t *a, uint8_t *b, int stride) +{ + return pix_abs8x8_mvi(a, b, stride); +} + void dsputil_init_alpha(DSPContext* c, unsigned mask) { c->put_pixels_tab[0][0] = put_pixels16_axp_asm; @@ -336,6 +346,8 @@ void dsputil_init_alpha(DSPContext* c, unsigned mask) c->get_pixels = get_pixels_mvi; c->diff_pixels = diff_pixels_mvi; + c->sad[0] = sad16x16_mvi; + c->sad[1] = sad8x8_mvi; c->pix_abs8x8 = pix_abs8x8_mvi; c->pix_abs16x16 = pix_abs16x16_mvi_asm; c->pix_abs16x16_x2 = pix_abs16x16_x2_mvi; diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h index fd7eafbd4..6ee2b84cd 100644 --- a/src/libffmpeg/libavcodec/avcodec.h +++ b/src/libffmpeg/libavcodec/avcodec.h @@ -1,12 +1,16 @@ #ifndef AVCODEC_H #define AVCODEC_H +#ifdef __cplusplus +extern "C" { +#endif + #include "common.h" #define LIBAVCODEC_VERSION_INT 0x000406 #define LIBAVCODEC_VERSION "0.4.6" -#define LIBAVCODEC_BUILD 4652 -#define LIBAVCODEC_BUILD_STR "4652" +#define LIBAVCODEC_BUILD 4654 +#define LIBAVCODEC_BUILD_STR "4654" enum CodecID { CODEC_ID_NONE, @@ -62,21 +66,19 @@ enum CodecType { enum PixelFormat { PIX_FMT_YUV420P, PIX_FMT_YUV422, - PIX_FMT_RGB24, - PIX_FMT_BGR24, + PIX_FMT_RGB24, /* 3 bytes, R is first */ + PIX_FMT_BGR24, /* 3 bytes, B is first */ PIX_FMT_YUV422P, PIX_FMT_YUV444P, - PIX_FMT_RGBA32, - PIX_FMT_BGRA32, + PIX_FMT_RGBA32, /* always stored in cpu endianness */ PIX_FMT_YUV410P, PIX_FMT_YUV411P, - PIX_FMT_RGB565, - PIX_FMT_RGB555, -// PIX_FMT_RGB5551, - PIX_FMT_BGR565, - PIX_FMT_BGR555, -// PIX_FMT_GBR565, -// PIX_FMT_GBR555 + PIX_FMT_RGB565, /* always stored in cpu endianness */ + PIX_FMT_RGB555, /* always stored in cpu endianness, most significant bit to 1 */ + PIX_FMT_GRAY8, + PIX_FMT_MONOWHITE, /* 0 is white */ + PIX_FMT_MONOBLACK, /* 0 is black */ + PIX_FMT_NB, }; /* currently unused, may be used if 24/32 bits samples ever supported */ @@ -520,6 +522,7 @@ typedef struct AVCodecContext { #define FF_BUG_NO_PADDING 16 #define FF_BUG_AC_VLC 32 #define FF_BUG_QPEL_CHROMA 64 +#define FF_BUG_STD_QPEL 128 //#define FF_BUG_FAKE_SCALABILITY 16 //autodetection should work 100% /** @@ -924,6 +927,16 @@ typedef struct AVCodecContext { */ int me_subpel_quality; + /** + * callback to negotiate the pixelFormat + * @param fmt is the list of formats which are supported by the codec, + * its terminated by -1 as 0 is a valid format, the formats are ordered by quality + * the first is allways the native one + * @return the choosen format + * encoding: unused + * decoding: set by user, if not set then the native format will always be choosen + */ + enum PixelFormat (*get_format)(struct AVCodecContext *s, enum PixelFormat * fmt); } AVCodecContext; typedef struct AVCodec { @@ -1048,10 +1061,11 @@ void img_resample(ImgReSampleContext *s, void img_resample_close(ImgReSampleContext *s); -void avpicture_fill(AVPicture *picture, UINT8 *ptr, - int pix_fmt, int width, int height); +int avpicture_fill(AVPicture *picture, UINT8 *ptr, + int pix_fmt, int width, int height); int avpicture_get_size(int pix_fmt, int width, int height); -void avcodec_get_chroma_sub_sample(int fmt, int *h_shift, int *v_shift); +void avcodec_get_chroma_sub_sample(int pix_fmt, int *h_shift, int *v_shift); +const char *avcodec_get_pix_fmt_name(int pix_fmt); /* convert among pixel formats */ int img_convert(AVPicture *dst, int dst_pix_fmt, @@ -1142,7 +1156,7 @@ typedef struct { const char* supported; } avc_config_t; -void avcodec_getopt(AVCodecContext* avctx, char* str, avc_config_t** config); +void avcodec_getopt(AVCodecContext* avctx, const char* str, avc_config_t** config); /** * Interface for 0.5.0 version @@ -1223,13 +1237,20 @@ int avcodec(void* handle, avc_cmd_t cmd, void* pin, void* pout); /* memory */ void *av_malloc(unsigned int size); void *av_mallocz(unsigned int size); +void *av_realloc(void *ptr, unsigned int size); void av_free(void *ptr); +char *av_strdup(const char *s); void __av_freep(void **ptr); #define av_freep(p) __av_freep((void **)(p)) +void *av_fast_realloc(void *ptr, int *size, int min_size); /* for static data only */ /* call av_free_static to release all staticaly allocated tables */ void av_free_static(void); void *__av_mallocz_static(void** location, unsigned int size); #define av_mallocz_static(p, s) __av_mallocz_static((void **)(p), s) +#ifdef __cplusplus +} +#endif + #endif /* AVCODEC_H */ diff --git a/src/libffmpeg/libavcodec/common.c b/src/libffmpeg/libavcodec/common.c index 40ba49811..aa766280b 100644 --- a/src/libffmpeg/libavcodec/common.c +++ b/src/libffmpeg/libavcodec/common.c @@ -27,6 +27,17 @@ const UINT8 ff_sqrt_tab[128]={ 9, 9, 9, 9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11 }; +const uint8_t ff_log2_tab[256]={ + 0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + void init_put_bits(PutBitContext *s, UINT8 *buffer, int buffer_size, void *opaque, @@ -99,10 +110,12 @@ void put_string(PutBitContext * pbc, char *s) /* bit input functions */ void init_get_bits(GetBitContext *s, - UINT8 *buffer, int buffer_size) + UINT8 *buffer, int bit_size) { + const int buffer_size= (bit_size+7)>>3; + s->buffer= buffer; - s->size= buffer_size; + s->size_in_bits= bit_size; s->buffer_end= buffer + buffer_size; #ifdef ALT_BITSTREAM_READER s->index=0; @@ -169,8 +182,8 @@ static int alloc_table(VLC *vlc, int size) vlc->table_size += size; if (vlc->table_size > vlc->table_allocated) { vlc->table_allocated += (1 << vlc->bits); - vlc->table = realloc(vlc->table, - sizeof(VLC_TYPE) * 2 * vlc->table_allocated); + vlc->table = av_realloc(vlc->table, + sizeof(VLC_TYPE) * 2 * vlc->table_allocated); if (!vlc->table) return -1; } diff --git a/src/libffmpeg/libavcodec/common.h b/src/libffmpeg/libavcodec/common.h index 405ddaa09..c2305b45e 100644 --- a/src/libffmpeg/libavcodec/common.h +++ b/src/libffmpeg/libavcodec/common.h @@ -88,8 +88,40 @@ typedef INT64 int64_t; # endif # define snprintf _snprintf +# define vsnprintf _vsnprintf -#else /* CONFIG_WIN32 */ +/* CONFIG_WIN32 end */ +#elif defined (CONFIG_OS2) +/* OS/2 EMX */ + +#include <inttypes.h> + +typedef unsigned char UINT8; +typedef unsigned short UINT16; +typedef unsigned int UINT32; +typedef unsigned long long UINT64; +typedef signed char INT8; +typedef signed short INT16; +typedef signed int INT32; +typedef signed long long INT64; + +#ifdef HAVE_AV_CONFIG_H + +#ifndef INT64_C +#define INT64_C(c) (c ## LL) +#define UINT64_C(c) (c ## ULL) +#endif + +#ifdef USE_FASTMEMCPY +#include "fastmemcpy.h" +#endif + +#include <float.h> + +#endif /* HAVE_AV_CONFIG_H */ + +/* CONFIG_OS2 end */ +#else /* unix */ @@ -119,7 +151,7 @@ typedef signed long long INT64; # endif # endif /* HAVE_AV_CONFIG_H */ -#endif /* !CONFIG_WIN32 */ +#endif /* !CONFIG_WIN32 && !CONFIG_OS2 */ #ifdef HAVE_AV_CONFIG_H @@ -238,7 +270,7 @@ typedef struct GetBitContext { UINT32 cache1; int bit_count; #endif - int size; + int size_in_bits; } GetBitContext; static inline int get_bits_count(GetBitContext *s); @@ -667,6 +699,12 @@ int init_vlc(VLC *vlc, int nb_bits, int nb_codes, const void *codes, int codes_wrap, int codes_size); void free_vlc(VLC *vlc); +/** + * + * if the vlc code is invalid and max_depth=1 than no bits will be removed + * if the vlc code is invalid and max_depth>1 than the number of bits removed + * is undefined + */ #define GET_VLC(code, name, gb, table, bits, max_depth)\ {\ int n, index, nb_bits;\ @@ -775,6 +813,7 @@ void print_stats(void); #endif /* misc math functions */ +extern const uint8_t ff_log2_tab[256]; static inline int av_log2(unsigned int v) { @@ -789,20 +828,26 @@ static inline int av_log2(unsigned int v) v >>= 8; n += 8; } - if (v & 0xf0) { - v >>= 4; - n += 4; - } - if (v & 0xc) { - v >>= 2; - n += 2; - } - if (v & 0x2) { - n++; + n += ff_log2_tab[v]; + + return n; +} + +static inline int av_log2_16bit(unsigned int v) +{ + int n; + + n = 0; + if (v & 0xff00) { + v >>= 8; + n += 8; } + n += ff_log2_tab[v]; + return n; } + /* median of 3 */ static inline int mid_pred(int a, int b, int c) { @@ -832,7 +877,7 @@ static inline int clip(int a, int amin, int amax) } /* math */ -extern const UINT8 ff_sqrt_tab[128]; +extern const uint8_t ff_sqrt_tab[128]; int ff_gcd(int a, int b); @@ -902,6 +947,11 @@ if((y)<(x)){\ #define CLAMP_TO_8BIT(d) ((d > 0xff) ? 0xff : (d < 0) ? 0 : d) +/* avoid usage of various functions */ +#define malloc please_use_av_malloc +#define free please_use_av_free +#define realloc please_use_av_realloc + #endif /* HAVE_AV_CONFIG_H */ #endif /* COMMON_H */ diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c index 0d7556f65..06da93ba7 100644 --- a/src/libffmpeg/libavcodec/dsputil.c +++ b/src/libffmpeg/libavcodec/dsputil.c @@ -129,6 +129,7 @@ static int pix_norm1_c(UINT8 * pix, int line_size) s = 0; for (i = 0; i < 16; i++) { for (j = 0; j < 16; j += 8) { +#if 0 s += sq[pix[0]]; s += sq[pix[1]]; s += sq[pix[2]]; @@ -137,6 +138,30 @@ static int pix_norm1_c(UINT8 * pix, int line_size) s += sq[pix[5]]; s += sq[pix[6]]; s += sq[pix[7]]; +#else +#if LONG_MAX > 2147483647 + register uint64_t x=*(uint64_t*)pix; + s += sq[x&0xff]; + s += sq[(x>>8)&0xff]; + s += sq[(x>>16)&0xff]; + s += sq[(x>>24)&0xff]; + s += sq[(x>>32)&0xff]; + s += sq[(x>>40)&0xff]; + s += sq[(x>>48)&0xff]; + s += sq[(x>>56)&0xff]; +#else + register uint32_t x=*(uint32_t*)pix; + s += sq[x&0xff]; + s += sq[(x>>8)&0xff]; + s += sq[(x>>16)&0xff]; + s += sq[(x>>24)&0xff]; + x=*(uint32_t*)(pix+4); + s += sq[x&0xff]; + s += sq[(x>>8)&0xff]; + s += sq[(x>>16)&0xff]; + s += sq[(x>>24)&0xff]; +#endif +#endif pix += 8; } pix += line_size - 16; @@ -166,27 +191,32 @@ static int sse8_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) return s; } -static int sse16_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) +static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size) { - int s, i, j; - UINT32 *sq = squareTbl + 256; + int s, i; + uint32_t *sq = squareTbl + 256; s = 0; for (i = 0; i < 16; i++) { - for (j = 0; j < 16; j += 8) { - s += sq[pix1[0] - pix2[0]]; - s += sq[pix1[1] - pix2[1]]; - s += sq[pix1[2] - pix2[2]]; - s += sq[pix1[3] - pix2[3]]; - s += sq[pix1[4] - pix2[4]]; - s += sq[pix1[5] - pix2[5]]; - s += sq[pix1[6] - pix2[6]]; - s += sq[pix1[7] - pix2[7]]; - pix1 += 8; - pix2 += 8; - } - pix1 += line_size - 16; - pix2 += line_size - 16; + s += sq[pix1[ 0] - pix2[ 0]]; + s += sq[pix1[ 1] - pix2[ 1]]; + s += sq[pix1[ 2] - pix2[ 2]]; + s += sq[pix1[ 3] - pix2[ 3]]; + s += sq[pix1[ 4] - pix2[ 4]]; + s += sq[pix1[ 5] - pix2[ 5]]; + s += sq[pix1[ 6] - pix2[ 6]]; + s += sq[pix1[ 7] - pix2[ 7]]; + s += sq[pix1[ 8] - pix2[ 8]]; + s += sq[pix1[ 9] - pix2[ 9]]; + s += sq[pix1[10] - pix2[10]]; + s += sq[pix1[11] - pix2[11]]; + s += sq[pix1[12] - pix2[12]]; + s += sq[pix1[13] - pix2[13]]; + s += sq[pix1[14] - pix2[14]]; + s += sq[pix1[15] - pix2[15]]; + + pix1 += line_size; + pix2 += line_size; } return s; } @@ -801,7 +831,8 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass(UINT8 *dst, UINT8 *src, int dstStrid }\ }\ \ -static void OPNAME ## mpeg4_qpel8_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w){\ +static void OPNAME ## mpeg4_qpel8_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride){\ + const int w=8;\ UINT8 *cm = cropTbl + MAX_NEG_CROP;\ int i;\ for(i=0; i<w; i++)\ @@ -923,107 +954,163 @@ static void OPNAME ## qpel8_mc01_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 full[16*9];\ UINT8 half[64];\ copy_block9(full, src, 16, stride, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16, 8);\ + put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\ }\ \ static void OPNAME ## qpel8_mc02_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 full[16*9];\ copy_block9(full, src, 16, stride, 9);\ - OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16, 8);\ + OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ }\ \ static void OPNAME ## qpel8_mc03_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 full[16*9];\ UINT8 half[64];\ copy_block9(full, src, 16, stride, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16, 8);\ + put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\ }\ -static void OPNAME ## qpel8_mc11_c(UINT8 *dst, UINT8 *src, int stride){\ +void ff_ ## OPNAME ## qpel8_mc11_old_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 full[16*9];\ UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ }\ -static void OPNAME ## qpel8_mc31_c(UINT8 *dst, UINT8 *src, int stride){\ +static void OPNAME ## qpel8_mc11_c(UINT8 *dst, UINT8 *src, int stride){\ + UINT8 full[16*9];\ + UINT8 halfH[72];\ + UINT8 halfHV[64];\ + copy_block9(full, src, 16, stride, 9);\ + put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ + put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ + OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ +}\ +void ff_ ## OPNAME ## qpel8_mc31_old_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 full[16*9];\ UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ }\ -static void OPNAME ## qpel8_mc13_c(UINT8 *dst, UINT8 *src, int stride){\ +static void OPNAME ## qpel8_mc31_c(UINT8 *dst, UINT8 *src, int stride){\ + UINT8 full[16*9];\ + UINT8 halfH[72];\ + UINT8 halfHV[64];\ + copy_block9(full, src, 16, stride, 9);\ + put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ + put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ + OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ +}\ +void ff_ ## OPNAME ## qpel8_mc13_old_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 full[16*9];\ UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ }\ -static void OPNAME ## qpel8_mc33_c(UINT8 *dst, UINT8 *src, int stride){\ +static void OPNAME ## qpel8_mc13_c(UINT8 *dst, UINT8 *src, int stride){\ + UINT8 full[16*9];\ + UINT8 halfH[72];\ + UINT8 halfHV[64];\ + copy_block9(full, src, 16, stride, 9);\ + put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ + put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ + OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ +}\ +void ff_ ## OPNAME ## qpel8_mc33_old_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 full[16*9];\ UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ }\ +static void OPNAME ## qpel8_mc33_c(UINT8 *dst, UINT8 *src, int stride){\ + UINT8 full[16*9];\ + UINT8 halfH[72];\ + UINT8 halfHV[64];\ + copy_block9(full, src, 16, stride, 9);\ + put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ + put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ + OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ +}\ static void OPNAME ## qpel8_mc21_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 halfH[72];\ UINT8 halfHV[64];\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ }\ static void OPNAME ## qpel8_mc23_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 halfH[72];\ UINT8 halfHV[64];\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ }\ -static void OPNAME ## qpel8_mc12_c(UINT8 *dst, UINT8 *src, int stride){\ +void ff_ ## OPNAME ## qpel8_mc12_old_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 full[16*9];\ UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ }\ -static void OPNAME ## qpel8_mc32_c(UINT8 *dst, UINT8 *src, int stride){\ +static void OPNAME ## qpel8_mc12_c(UINT8 *dst, UINT8 *src, int stride){\ + UINT8 full[16*9];\ + UINT8 halfH[72];\ + copy_block9(full, src, 16, stride, 9);\ + put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ + put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ + OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ +}\ +void ff_ ## OPNAME ## qpel8_mc32_old_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 full[16*9];\ UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ + put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ }\ +static void OPNAME ## qpel8_mc32_c(UINT8 *dst, UINT8 *src, int stride){\ + UINT8 full[16*9];\ + UINT8 halfH[72];\ + copy_block9(full, src, 16, stride, 9);\ + put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ + put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ + OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ +}\ static void OPNAME ## qpel8_mc22_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 halfH[72];\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ - OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8, 8);\ + OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ }\ static void OPNAME ## qpel16_mc00_c (UINT8 *dst, UINT8 *src, int stride){\ OPNAME ## pixels16_c(dst, src, stride, 16);\ @@ -1066,7 +1153,7 @@ static void OPNAME ## qpel16_mc03_c(UINT8 *dst, UINT8 *src, int stride){\ put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\ }\ -static void OPNAME ## qpel16_mc11_c(UINT8 *dst, UINT8 *src, int stride){\ +void ff_ ## OPNAME ## qpel16_mc11_old_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 full[24*17];\ UINT8 halfH[272];\ UINT8 halfV[256];\ @@ -1077,7 +1164,17 @@ static void OPNAME ## qpel16_mc11_c(UINT8 *dst, UINT8 *src, int stride){\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ }\ -static void OPNAME ## qpel16_mc31_c(UINT8 *dst, UINT8 *src, int stride){\ +static void OPNAME ## qpel16_mc11_c(UINT8 *dst, UINT8 *src, int stride){\ + UINT8 full[24*17];\ + UINT8 halfH[272];\ + UINT8 halfHV[256];\ + copy_block17(full, src, 24, stride, 17);\ + put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ + put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ + put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ + OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ +}\ +void ff_ ## OPNAME ## qpel16_mc31_old_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 full[24*17];\ UINT8 halfH[272];\ UINT8 halfV[256];\ @@ -1088,7 +1185,17 @@ static void OPNAME ## qpel16_mc31_c(UINT8 *dst, UINT8 *src, int stride){\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ }\ -static void OPNAME ## qpel16_mc13_c(UINT8 *dst, UINT8 *src, int stride){\ +static void OPNAME ## qpel16_mc31_c(UINT8 *dst, UINT8 *src, int stride){\ + UINT8 full[24*17];\ + UINT8 halfH[272];\ + UINT8 halfHV[256];\ + copy_block17(full, src, 24, stride, 17);\ + put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ + put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ + put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ + OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ +}\ +void ff_ ## OPNAME ## qpel16_mc13_old_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 full[24*17];\ UINT8 halfH[272];\ UINT8 halfV[256];\ @@ -1099,7 +1206,17 @@ static void OPNAME ## qpel16_mc13_c(UINT8 *dst, UINT8 *src, int stride){\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ }\ -static void OPNAME ## qpel16_mc33_c(UINT8 *dst, UINT8 *src, int stride){\ +static void OPNAME ## qpel16_mc13_c(UINT8 *dst, UINT8 *src, int stride){\ + UINT8 full[24*17];\ + UINT8 halfH[272];\ + UINT8 halfHV[256];\ + copy_block17(full, src, 24, stride, 17);\ + put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ + put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ + put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ + OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ +}\ +void ff_ ## OPNAME ## qpel16_mc33_old_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 full[24*17];\ UINT8 halfH[272];\ UINT8 halfV[256];\ @@ -1110,6 +1227,16 @@ static void OPNAME ## qpel16_mc33_c(UINT8 *dst, UINT8 *src, int stride){\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ }\ +static void OPNAME ## qpel16_mc33_c(UINT8 *dst, UINT8 *src, int stride){\ + UINT8 full[24*17];\ + UINT8 halfH[272];\ + UINT8 halfHV[256];\ + copy_block17(full, src, 24, stride, 17);\ + put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ + put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ + put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ + OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ +}\ static void OPNAME ## qpel16_mc21_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 halfH[272];\ UINT8 halfHV[256];\ @@ -1124,7 +1251,7 @@ static void OPNAME ## qpel16_mc23_c(UINT8 *dst, UINT8 *src, int stride){\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ }\ -static void OPNAME ## qpel16_mc12_c(UINT8 *dst, UINT8 *src, int stride){\ +void ff_ ## OPNAME ## qpel16_mc12_old_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 full[24*17];\ UINT8 halfH[272];\ UINT8 halfV[256];\ @@ -1135,7 +1262,15 @@ static void OPNAME ## qpel16_mc12_c(UINT8 *dst, UINT8 *src, int stride){\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ }\ -static void OPNAME ## qpel16_mc32_c(UINT8 *dst, UINT8 *src, int stride){\ +static void OPNAME ## qpel16_mc12_c(UINT8 *dst, UINT8 *src, int stride){\ + UINT8 full[24*17];\ + UINT8 halfH[272];\ + copy_block17(full, src, 24, stride, 17);\ + put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ + put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ + OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ +}\ +void ff_ ## OPNAME ## qpel16_mc32_old_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 full[24*17];\ UINT8 halfH[272];\ UINT8 halfV[256];\ @@ -1146,6 +1281,14 @@ static void OPNAME ## qpel16_mc32_c(UINT8 *dst, UINT8 *src, int stride){\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ }\ +static void OPNAME ## qpel16_mc32_c(UINT8 *dst, UINT8 *src, int stride){\ + UINT8 full[24*17];\ + UINT8 halfH[272];\ + copy_block17(full, src, 24, stride, 17);\ + put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ + put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ + OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ +}\ static void OPNAME ## qpel16_mc22_c(UINT8 *dst, UINT8 *src, int stride){\ UINT8 halfH[272];\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ @@ -1498,7 +1641,7 @@ static void clear_blocks_c(DCTELEM *blocks) static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ int i; - for(i=0; i+7<w; i++){ + for(i=0; i+7<w; i+=8){ dst[i+0] += src[i+0]; dst[i+1] += src[i+1]; dst[i+2] += src[i+2]; @@ -1514,7 +1657,7 @@ static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ int i; - for(i=0; i+7<w; i++){ + for(i=0; i+7<w; i+=8){ dst[i+0] = src1[i+0]-src2[i+0]; dst[i+1] = src1[i+1]-src2[i+1]; dst[i+2] = src1[i+2]-src2[i+2]; @@ -1639,7 +1782,8 @@ static int hadamard8_abs_c(uint8_t *src, int stride, int mean){ static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ MpegEncContext * const s= (MpegEncContext *)c; - DCTELEM temp[64]; + uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; + DCTELEM * const temp= (DCTELEM*)aligned_temp; int sum=0, i; s->dsp.diff_pixels(temp, src1, src2, stride); @@ -1651,11 +1795,13 @@ static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2 return sum; } -void simple_idct(INT16 *block); //FIXME +void simple_idct(DCTELEM *block); //FIXME static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ MpegEncContext * const s= (MpegEncContext *)c; - DCTELEM temp[64], bak[64]; + uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8]; + DCTELEM * const temp= (DCTELEM*)aligned_temp; + DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; int sum=0, i; s->mb_intra=0; @@ -1664,7 +1810,7 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s memcpy(bak, temp, 64*sizeof(DCTELEM)); - s->dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); + s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); s->dct_unquantize(s, temp, 0, s->qscale); simple_idct(temp); //FIXME @@ -1674,9 +1820,144 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s return sum; } +static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ + MpegEncContext * const s= (MpegEncContext *)c; + const UINT8 *scantable= s->intra_scantable.permutated; + uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; + uint64_t __align8 aligned_bak[stride]; + DCTELEM * const temp= (DCTELEM*)aligned_temp; + uint8_t * const bak= (uint8_t*)aligned_bak; + int i, last, run, bits, level, distoration, start_i; + const int esc_length= s->ac_esc_length; + uint8_t * length; + uint8_t * last_length; + + for(i=0; i<8; i++){ + ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; + ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; + } + + s->dsp.diff_pixels(temp, src1, src2, stride); + + s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); + + bits=0; + + if (s->mb_intra) { + start_i = 1; + length = s->intra_ac_vlc_length; + last_length= s->intra_ac_vlc_last_length; + bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma + } else { + start_i = 0; + length = s->inter_ac_vlc_length; + last_length= s->inter_ac_vlc_last_length; + } + + if(last>=start_i){ + run=0; + for(i=start_i; i<last; i++){ + int j= scantable[i]; + level= temp[j]; + + if(level){ + level+=64; + if((level&(~127)) == 0){ + bits+= length[UNI_AC_ENC_INDEX(run, level)]; + }else + bits+= esc_length; + run=0; + }else + run++; + } + i= scantable[last]; + + level= temp[i] + 64; + + assert(level - 64); + + if((level&(~127)) == 0){ + bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; + }else + bits+= esc_length; + + } + + if(last>=0){ + s->dct_unquantize(s, temp, 0, s->qscale); + } + + s->idct_add(bak, stride, temp); + + distoration= s->dsp.sse[1](NULL, bak, src1, stride); + + return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7); +} + +static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ + MpegEncContext * const s= (MpegEncContext *)c; + const UINT8 *scantable= s->intra_scantable.permutated; + uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; + DCTELEM * const temp= (DCTELEM*)aligned_temp; + int i, last, run, bits, level, start_i; + const int esc_length= s->ac_esc_length; + uint8_t * length; + uint8_t * last_length; + + s->dsp.diff_pixels(temp, src1, src2, stride); + + s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); + + bits=0; + + if (s->mb_intra) { + start_i = 1; + length = s->intra_ac_vlc_length; + last_length= s->intra_ac_vlc_last_length; + bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma + } else { + start_i = 0; + length = s->inter_ac_vlc_length; + last_length= s->inter_ac_vlc_last_length; + } + + if(last>=start_i){ + run=0; + for(i=start_i; i<last; i++){ + int j= scantable[i]; + level= temp[j]; + + if(level){ + level+=64; + if((level&(~127)) == 0){ + bits+= length[UNI_AC_ENC_INDEX(run, level)]; + }else + bits+= esc_length; + run=0; + }else + run++; + } + i= scantable[last]; + + level= temp[i] + 64; + + assert(level - 64); + + if((level&(~127)) == 0){ + bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; + }else + bits+= esc_length; + } + + return bits; +} + + WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c) WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c) WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c) +WARPER88_1616(rd8x8_c, rd16x16_c) +WARPER88_1616(bit8x8_c, bit16x16_c) void dsputil_init(DSPContext* c, unsigned mask) { @@ -1790,7 +2071,13 @@ void dsputil_init(DSPContext* c, unsigned mask) c->quant_psnr[0]= quant_psnr16x16_c; c->quant_psnr[1]= quant_psnr8x8_c; - + + c->rd[0]= rd16x16_c; + c->rd[1]= rd8x8_c; + + c->bit[0]= bit16x16_c; + c->bit[1]= bit8x8_c; + c->add_bytes= add_bytes_c; c->diff_bytes= diff_bytes_c; diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h index f34a8f078..2220b4871 100644 --- a/src/libffmpeg/libavcodec/dsputil.h +++ b/src/libffmpeg/libavcodec/dsputil.h @@ -30,6 +30,7 @@ #undef DEBUG /* dct code */ typedef short DCTELEM; +//typedef int DCTELEM; void fdct_ifast (DCTELEM *data); void ff_jpeg_fdct_islow (DCTELEM *data); @@ -74,7 +75,23 @@ void clear_blocks_c(DCTELEM *blocks); typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h); typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride); - +#define DEF_OLD_QPEL(name)\ +void ff_put_ ## name (UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride);\ +void ff_put_no_rnd_ ## name (UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride);\ +void ff_avg_ ## name (UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride); + +DEF_OLD_QPEL(qpel16_mc11_old_c) +DEF_OLD_QPEL(qpel16_mc31_old_c) +DEF_OLD_QPEL(qpel16_mc12_old_c) +DEF_OLD_QPEL(qpel16_mc32_old_c) +DEF_OLD_QPEL(qpel16_mc13_old_c) +DEF_OLD_QPEL(qpel16_mc33_old_c) +DEF_OLD_QPEL(qpel8_mc11_old_c) +DEF_OLD_QPEL(qpel8_mc31_old_c) +DEF_OLD_QPEL(qpel8_mc12_old_c) +DEF_OLD_QPEL(qpel8_mc32_old_c) +DEF_OLD_QPEL(qpel8_mc13_old_c) +DEF_OLD_QPEL(qpel8_mc33_old_c) #define CALL_2X_PIXELS(a, b, n)\ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ @@ -105,6 +122,8 @@ typedef struct DSPContext { me_cmp_func hadamard8_diff[2]; me_cmp_func dct_sad[2]; me_cmp_func quant_psnr[2]; + me_cmp_func bit[2]; + me_cmp_func rd[2]; int (*hadamard8_abs )(uint8_t *src, int stride, int mean); me_cmp_func me_pre_cmp[11]; @@ -143,10 +162,14 @@ void dsputil_init(DSPContext* p, unsigned mask); * permute block according to permuatation. * @param last last non zero element in scantable order */ -void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last); +void ff_block_permute(DCTELEM *block, UINT8 *permutation, const UINT8 *scantable, int last); #define emms_c() +/* should be defined by architectures supporting + one or more MultiMedia extension */ +int mm_support(void); + #if defined(HAVE_MMX) #undef emms_c @@ -161,7 +184,6 @@ void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, extern int mm_flags; -int mm_support(void); void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size); void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size); @@ -211,6 +233,10 @@ void dsputil_init_alpha(DSPContext* c, unsigned mask); extern int mm_flags; +#if defined(HAVE_ALTIVEC) && !defined(CONFIG_DARWIN) +#include <altivec.h> +#endif + #define __align8 __attribute__ ((aligned (16))) void dsputil_init_ppc(DSPContext* c, unsigned mask); @@ -314,7 +340,12 @@ static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int st /* btw, rintf() is existing on fbsd too -- alex */ static inline long int lrintf(float x) { +#ifdef CONFIG_WIN32 + /* XXX: incorrect, but make it compile */ + return (int)(x); +#else return (int)(rint(x)); +#endif } #endif diff --git a/src/libffmpeg/libavcodec/dv.c b/src/libffmpeg/libavcodec/dv.c index f436caf12..554b593e7 100644 --- a/src/libffmpeg/libavcodec/dv.c +++ b/src/libffmpeg/libavcodec/dv.c @@ -157,7 +157,7 @@ static const UINT16 block_sizes[6] = { /* decode ac coefs */ static void dv_decode_ac(DVVideoDecodeContext *s, - BlockInfo *mb, INT16 *block, int last_index) + BlockInfo *mb, DCTELEM *block, int last_index) { int last_re_index; int shift_offset = mb->shift_offset; @@ -195,7 +195,7 @@ static void dv_decode_ac(DVVideoDecodeContext *s, v, partial_bit_count, (mb->partial_bit_buffer << l)); #endif /* try to read the codeword */ - init_get_bits(&gb1, buf, 4); + init_get_bits(&gb1, buf, 4*8); { OPEN_READER(re1, &gb1); UPDATE_CACHE(re1, &gb1); @@ -333,7 +333,7 @@ static inline void dv_decode_video_segment(DVVideoDecodeContext *s, block = block1; for(j = 0;j < 6; j++) { /* NOTE: size is not important here */ - init_get_bits(&s->gb, buf_ptr, 14); + init_get_bits(&s->gb, buf_ptr, 14*8); /* get the dc */ dc = get_bits(&s->gb, 9); @@ -382,7 +382,7 @@ static inline void dv_decode_video_segment(DVVideoDecodeContext *s, #endif block = block1; mb = mb1; - init_get_bits(&s->gb, mb_bit_buffer, 80); + init_get_bits(&s->gb, mb_bit_buffer, 80*8); for(j = 0;j < 6; j++) { if (!mb->eob_reached && s->gb.index < mb_bit_count) { dv_decode_ac(s, mb, block, mb_bit_count); @@ -421,7 +421,7 @@ static inline void dv_decode_video_segment(DVVideoDecodeContext *s, #endif block = &s->block[0][0]; mb = mb_data; - init_get_bits(&s->gb, vs_bit_buffer, 5 * 80); + init_get_bits(&s->gb, vs_bit_buffer, 5 * 80*8); for(mb_index = 0; mb_index < 5; mb_index++) { for(j = 0;j < 6; j++) { if (!mb->eob_reached) { @@ -501,7 +501,7 @@ static int dvvideo_decode_frame(AVCodecContext *avctx, const UINT16 *mb_pos_ptr; /* parse id */ - init_get_bits(&s->gb, buf, buf_size); + init_get_bits(&s->gb, buf, buf_size*8); sct = get_bits(&s->gb, 3); if (sct != 0) return -1; @@ -634,7 +634,6 @@ AVCodec dvvideo_decoder = { typedef struct DVAudioDecodeContext { AVCodecContext *avctx; GetBitContext gb; - } DVAudioDecodeContext; static int dvaudio_decode_init(AVCodecContext *avctx) @@ -643,13 +642,126 @@ static int dvaudio_decode_init(AVCodecContext *avctx) return 0; } +static UINT16 dv_audio_12to16(UINT16 sample) +{ + UINT16 shift, result; + + sample = (sample < 0x800) ? sample : sample | 0xf000; + shift = (sample & 0xf00) >> 8; + + if (shift < 0x2 || shift > 0xd) { + result = sample; + } else if (shift < 0x8) { + shift--; + result = (sample - (256 * shift)) << shift; + } else { + shift = 0xe - shift; + result = ((sample + ((256 * shift) + 1)) << shift) - 1; + } + + return result; +} + /* NOTE: exactly one frame must be given (120000 bytes for NTSC, - 144000 bytes for PAL) */ + 144000 bytes for PAL) + + There's a couple of assumptions being made here: + 1. We don't do any kind of audio error correction. It means, + that erroneous samples 0x8000 are being passed upwards. + Do we need to silence erroneous samples ? Average them ? + 2. We don't do software emphasis. + 3. We are not checking for 'speed' argument being valid. + 4. Audio is always returned as 16bit linear samples: 12bit + nonlinear samples are converted into 16bit linear ones. +*/ static int dvaudio_decode_frame(AVCodecContext *avctx, void *data, int *data_size, UINT8 *buf, int buf_size) { - // DVAudioDecodeContext *s = avctx->priv_data; + DVVideoDecodeContext *s = avctx->priv_data; + const UINT16 (*unshuffle)[9]; + int smpls, freq, quant, sys, stride, difseg, ad, dp, nb_dif_segs, i; + UINT16 lc, rc; + UINT8 *buf_ptr; + + /* parse id */ + init_get_bits(&s->gb, &buf[AAUX_OFFSET], 5*8); + i = get_bits(&s->gb, 8); + if (i != 0x50) { /* No audio ? */ + *data_size = 0; + return buf_size; + } + + get_bits(&s->gb, 1); /* 0 - locked audio, 1 - unlocked audio */ + skip_bits(&s->gb, 1); + smpls = get_bits(&s->gb, 6); /* samples in this frame - min. samples */ + + skip_bits(&s->gb, 8); + + skip_bits(&s->gb, 2); + sys = get_bits(&s->gb, 1); /* 0 - 60 fields, 1 = 50 fields */ + skip_bits(&s->gb, 5); + + get_bits(&s->gb, 1); /* 0 - emphasis on, 1 - emphasis off */ + get_bits(&s->gb, 1); /* 0 - reserved, 1 - emphasis time constant 50/15us */ + freq = get_bits(&s->gb, 3); /* 0 - 48KHz, 1 - 44,1kHz, 2 - 32 kHz */ + quant = get_bits(&s->gb, 3); /* 0 - 16bit linear, 1 - 12bit nonlinear */ + + if (quant > 1) + return -1; /* Unsupported quantization */ + + avctx->sample_rate = dv_audio_frequency[freq]; + // What about: + // avctx->bit_rate = + // avctx->frame_size = + + *data_size = (dv_audio_min_samples[sys][freq] + smpls) * + avctx->channels * 2; + + if (sys) { + nb_dif_segs = 12; + stride = 108; + unshuffle = dv_place_audio50; + } else { + nb_dif_segs = 10; + stride = 90; + unshuffle = dv_place_audio60; + } + + /* for each DIF segment */ + buf_ptr = buf; + for (difseg = 0; difseg < nb_dif_segs; difseg++) { + buf_ptr += 6 * 80; /* skip DIF segment header */ + for (ad = 0; ad < 9; ad++) { + + for (dp = 8; dp < 80; dp+=2) { + if (quant == 0) { /* 16bit quantization */ + i = unshuffle[difseg][ad] + (dp - 8)/2 * stride; + ((short *)data)[i] = (buf_ptr[dp] << 8) | buf_ptr[dp+1]; + } else { /* 12bit quantization */ + if (difseg >= nb_dif_segs/2) + goto out; /* We're not doing 4ch at this time */ + + lc = ((UINT16)buf_ptr[dp] << 4) | + ((UINT16)buf_ptr[dp+2] >> 4); + rc = ((UINT16)buf_ptr[dp+1] << 4) | + ((UINT16)buf_ptr[dp+2] & 0x0f); + lc = dv_audio_12to16(lc); + rc = dv_audio_12to16(rc); + + i = unshuffle[difseg][ad] + (dp - 8)/3 * stride; + ((short *)data)[i] = lc; + i = unshuffle[difseg+nb_dif_segs/2][ad] + (dp - 8)/3 * stride; + ((short *)data)[i] = rc; + ++dp; + } + } + + buf_ptr += 16 * 80; /* 15 Video DIFs + 1 Audio DIF */ + } + } + +out: return buf_size; } diff --git a/src/libffmpeg/libavcodec/dvdata.h b/src/libffmpeg/libavcodec/dvdata.h index b5c1f5607..4e1fc39c7 100644 --- a/src/libffmpeg/libavcodec/dvdata.h +++ b/src/libffmpeg/libavcodec/dvdata.h @@ -18,6 +18,7 @@ */ #define NB_DV_VLC 409 +#define AAUX_OFFSET (80*6 + 80*16*3 + 3) static const UINT16 dv_vlc_bits[409] = { 0x0000, 0x0002, 0x0007, 0x0008, 0x0009, 0x0014, 0x0015, 0x0016, @@ -905,3 +906,41 @@ static const UINT16 dv_place_411[1350] = { 0x0834, 0x2320, 0x2f44, 0x3810, 0x1658, }; +static const UINT16 dv_place_audio60[10][9] = { + { 0, 30, 60, 20, 50, 80, 10, 40, 70 }, /* 1st channel */ + { 6, 36, 66, 26, 56, 86, 16, 46, 76 }, + { 12, 42, 72, 2, 32, 62, 22, 52, 82 }, + { 18, 48, 78, 8, 38, 68, 28, 58, 88 }, + { 24, 54, 84, 14, 44, 74, 4, 34, 64 }, + + { 1, 31, 61, 21, 51, 81, 11, 41, 71 }, /* 2nd channel */ + { 7, 37, 67, 27, 57, 87, 17, 47, 77 }, + { 13, 43, 73, 3, 33, 63, 23, 53, 83 }, + { 19, 49, 79, 9, 39, 69, 29, 59, 89 }, + { 25, 55, 85, 15, 45, 75, 5, 35, 65 }, +}; + +static const UINT16 dv_place_audio50[12][9] = { + { 0, 36, 72, 26, 62, 98, 16, 52, 88}, /* 1st channel */ + { 6, 42, 78, 32, 68, 104, 22, 58, 94}, + { 12, 48, 84, 2, 38, 74, 28, 64, 100}, + { 18, 54, 90, 8, 44, 80, 34, 70, 106}, + { 24, 60, 96, 14, 50, 86, 4, 40, 76}, + { 30, 66, 102, 20, 56, 92, 10, 46, 82}, + + { 1, 37, 73, 27, 63, 99, 17, 53, 89}, /* 2nd channel */ + { 7, 43, 79, 33, 69, 105, 23, 59, 95}, + { 13, 49, 85, 3, 39, 75, 29, 65, 101}, + { 19, 55, 91, 9, 45, 81, 35, 71, 107}, + { 25, 61, 97, 15, 51, 87, 5, 41, 77}, + { 31, 67, 103, 21, 57, 93, 11, 47, 83}, +}; + +static const int dv_audio_frequency[3] = { + 48000, 44100, 32000, +}; + +static const int dv_audio_min_samples[2][3] = { + { 1580, 1452, 1053 }, /* 60 fields */ + { 1896, 1742, 1264 }, /* 50 fileds */ +}; diff --git a/src/libffmpeg/libavcodec/fft.c b/src/libffmpeg/libavcodec/fft.c index f060992f4..65eb575f3 100644 --- a/src/libffmpeg/libavcodec/fft.c +++ b/src/libffmpeg/libavcodec/fft.c @@ -53,13 +53,13 @@ int fft_init(FFTContext *s, int nbits, int inverse) /* compute constant table for HAVE_SSE version */ #if (defined(HAVE_MMX) && defined(HAVE_BUILTIN_VECTOR)) || defined(HAVE_ALTIVEC) { - int has_vectors; + int has_vectors = 0; #if defined(HAVE_MMX) has_vectors = mm_support() & MM_SSE; -#else - /* XXX: should also use mm_support() ? */ - has_vectors = has_altivec() & MM_ALTIVEC; +#endif +#if defined(HAVE_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE) + has_vectors = mm_support() & MM_ALTIVEC; #endif if (has_vectors) { int np, nblocks, np2, l; diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c index bc21e0cd8..63bf19059 100644 --- a/src/libffmpeg/libavcodec/h263.c +++ b/src/libffmpeg/libavcodec/h263.c @@ -22,6 +22,12 @@ * qpel decoding, gmc decoding, interlaced decoding, * by Michael Niedermayer <michaelni@gmx.at> */ + +/** + * @file h263.c + * @brief h263/mpeg4 codec + * + */ //#define DEBUG #include "common.h" @@ -67,15 +73,17 @@ static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr); static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, int n, int coded, int intra); static int h263_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr); -static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n, +static void mpeg4_inv_pred_ac(MpegEncContext * s, DCTELEM *block, int n, int dir); static void mpeg4_decode_sprite_trajectory(MpegEncContext * s); static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr); extern UINT32 inverse[256]; -static UINT16 uni_DCtab_lum [512][2]; -static UINT16 uni_DCtab_chrom[512][2]; +static UINT8 uni_DCtab_lum_len[512]; +static UINT8 uni_DCtab_chrom_len[512]; +static UINT16 uni_DCtab_lum_bits[512]; +static UINT16 uni_DCtab_chrom_bits[512]; #ifdef CONFIG_ENCODERS static UINT16 (*mv_penalty)[MAX_MV*2+1]= NULL; @@ -999,7 +1007,7 @@ static int h263_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr) } -void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n) +void h263_pred_acdc(MpegEncContext * s, DCTELEM *block, int n) { int x, y, wrap, a, c, pred_dc, scale, i; INT16 *dc_val, *ac_val, *ac_val1; @@ -1309,8 +1317,8 @@ static void init_uni_dc_tab(void) uni_len++; } } - uni_DCtab_lum[level+256][0]= uni_code; - uni_DCtab_lum[level+256][1]= uni_len; + uni_DCtab_lum_bits[level+256]= uni_code; + uni_DCtab_lum_len [level+256]= uni_len; /* chrominance */ uni_code= DCtab_chrom[size][0]; @@ -1324,8 +1332,8 @@ static void init_uni_dc_tab(void) uni_len++; } } - uni_DCtab_chrom[level+256][0]= uni_code; - uni_DCtab_chrom[level+256][1]= uni_len; + uni_DCtab_chrom_bits[level+256]= uni_code; + uni_DCtab_chrom_len [level+256]= uni_len; } } @@ -1446,6 +1454,8 @@ void h263_encode_init(MpegEncContext *s) s->intra_ac_vlc_last_length= uni_mpeg4_intra_rl_len + 128*64; s->inter_ac_vlc_length = uni_mpeg4_inter_rl_len; s->inter_ac_vlc_last_length= uni_mpeg4_inter_rl_len + 128*64; + s->luma_dc_vlc_length= uni_DCtab_lum_len; + s->chroma_dc_vlc_length= uni_DCtab_chrom_len; s->ac_esc_length= 7+2+1+6+1+12+1; break; case CODEC_ID_H263P: @@ -1470,6 +1480,11 @@ void h263_encode_init(MpegEncContext *s) } } +/** + * encodes a 8x8 block. + * @param block the 8x8 block + * @param n block index (0-3 are luma, 4-5 are chroma) + */ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n) { int level, run, last, i, j, last_index, last_non_zero, sign, slevel, code; @@ -1795,6 +1810,13 @@ static void change_qscale(MpegEncContext * s, int dquant) s->c_dc_scale= s->c_dc_scale_table[ s->qscale ]; } +/** + * predicts the dc. + * @param n block index (0-3 are luma, 4-5 are chroma) + * @param dc_val_ptr a pointer to the dc_val entry for the current MB will be stored here + * @param dir_ptr pointer to an integer where the prediction direction will be stored + * @return the quantized predicted dc + */ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr) { int a, b, c, wrap, pred, scale; @@ -1852,7 +1874,12 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_pt return pred; } -void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, +/** + * predicts the ac. + * @param n block index (0-3 are luma, 4-5 are chroma) + * @param dir the ac prediction direction + */ +void mpeg4_pred_ac(MpegEncContext * s, DCTELEM *block, int n, int dir) { int i; @@ -1907,7 +1934,7 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, } -static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n, +static void mpeg4_inv_pred_ac(MpegEncContext * s, DCTELEM *block, int n, int dir) { int i; @@ -1950,6 +1977,10 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n, } } +/** + * encodes the dc value. + * @param n block index (0-3 are luma, 4-5 are chroma) + */ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n) { #if 1 @@ -1957,10 +1988,10 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n) level+=256; if (n < 4) { /* luminance */ - put_bits(s, uni_DCtab_lum[level][1], uni_DCtab_lum[level][0]); + put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]); } else { /* chrominance */ - put_bits(s, uni_DCtab_chrom[level][1], uni_DCtab_chrom[level][0]); + put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]); } #else int size, v; @@ -1991,6 +2022,10 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n) #endif } #ifdef CONFIG_ENCODERS +/** + * encodes a 8x8 block + * @param n block index (0-3 are luma, 4-5 are chroma) + */ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, UINT8 *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb) { @@ -2175,44 +2210,6 @@ static VLC dc_lum, dc_chrom; static VLC sprite_trajectory; static VLC mb_type_b_vlc; -void init_rl(RLTable *rl) -{ - INT8 max_level[MAX_RUN+1], max_run[MAX_LEVEL+1]; - UINT8 index_run[MAX_RUN+1]; - int last, run, level, start, end, i; - - /* compute max_level[], max_run[] and index_run[] */ - for(last=0;last<2;last++) { - if (last == 0) { - start = 0; - end = rl->last; - } else { - start = rl->last; - end = rl->n; - } - - memset(max_level, 0, MAX_RUN + 1); - memset(max_run, 0, MAX_LEVEL + 1); - memset(index_run, rl->n, MAX_RUN + 1); - for(i=start;i<end;i++) { - run = rl->table_run[i]; - level = rl->table_level[i]; - if (index_run[run] == rl->n) - index_run[run] = i; - if (level > max_level[run]) - max_level[run] = level; - if (run > max_run[level]) - max_run[level] = run; - } - rl->max_level[last] = av_malloc(MAX_RUN + 1); - memcpy(rl->max_level[last], max_level, MAX_RUN + 1); - rl->max_run[last] = av_malloc(MAX_LEVEL + 1); - memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1); - rl->index_run[last] = av_malloc(MAX_RUN + 1); - memcpy(rl->index_run[last], index_run, MAX_RUN + 1); - } -} - void init_vlc_rl(RLTable *rl) { int i, q; @@ -2331,7 +2328,7 @@ static int h263_decode_gob_header(MpegEncContext *s) /* We have a GBSC probably with GSTUFF */ skip_bits(&s->gb, 16); /* Drop the zeros */ - left= s->gb.size*8 - get_bits_count(&s->gb); + left= s->gb.size_in_bits - get_bits_count(&s->gb); //MN: we must check the bits left or we might end in a infinite loop (or segfault) for(;left>13; left--){ if(get_bits1(&s->gb)) break; /* Seek the '1' bit */ @@ -2431,7 +2428,7 @@ static inline int mpeg4_is_resync(MpegEncContext *s){ return 0; } - if(bits_count + 8 >= s->gb.size*8){ + if(bits_count + 8 >= s->gb.size_in_bits){ int v= show_bits(&s->gb, 8); v|= 0x7F >> (7-(bits_count&7)); @@ -2468,7 +2465,7 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s) int header_extension=0, mb_num, len; /* is there enough space left for a video packet + header */ - if( get_bits_count(&s->gb) > s->gb.size*8-20) return -1; + if( get_bits_count(&s->gb) > s->gb.size_in_bits-20) return -1; for(len=0; len<32; len++){ if(get_bits1(&s->gb)) break; @@ -2600,7 +2597,7 @@ int ff_h263_resync(MpegEncContext *s){ //ok, its not where its supposed to be ... s->gb= s->last_resync_gb; align_get_bits(&s->gb); - left= s->gb.size*8 - get_bits_count(&s->gb); + left= s->gb.size_in_bits - get_bits_count(&s->gb); for(;left>16+1+5+5; left-=8){ if(show_bits(&s->gb, 16)==0){ @@ -2622,6 +2619,7 @@ int ff_h263_resync(MpegEncContext *s){ } /** + * gets the average motion vector for a GMC MB. * @param n either 0 for the x component or 1 for y * @returns the average MV for a GMC MB */ @@ -2654,8 +2652,7 @@ static inline int get_amv(MpegEncContext *s, int n){ v+= dx; } } - sum /= 256; - sum= RSHIFT(sum<<s->quarter_sample, a); + sum= RSHIFT(sum, a+8-s->quarter_sample); } if (sum < -len) sum= -len; @@ -3055,7 +3052,7 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64]) /* per-MB end of slice check */ if(--s->mb_num_left <= 0){ -//printf("%06X %d\n", show_bits(&s->gb, 24), s->gb.size*8 - get_bits_count(&s->gb)); +//printf("%06X %d\n", show_bits(&s->gb, 24), s->gb.size_in_bits - get_bits_count(&s->gb)); if(mpeg4_is_resync(s)) return SLICE_END; else @@ -3419,8 +3416,8 @@ end: }else{ int v= show_bits(&s->gb, 16); - if(get_bits_count(&s->gb) + 16 > s->gb.size*8){ - v>>= get_bits_count(&s->gb) + 16 - s->gb.size*8; + if(get_bits_count(&s->gb) + 16 > s->gb.size_in_bits){ + v>>= get_bits_count(&s->gb) + 16 - s->gb.size_in_bits; } if(v==0) @@ -3588,6 +3585,12 @@ not_coded: return 0; } +/** + * decodes the dc value. + * @param n block index (0-3 are luma, 4-5 are chroma) + * @param dir_ptr the prediction direction will be stored here + * @return the quantized dc + */ static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr) { int level, pred, code; @@ -4473,6 +4476,10 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ return 0; } +/** + * decodes the user data stuff in the header. + * allso inits divx/xvid/lavc_version/build + */ static int decode_user_data(MpegEncContext *s, GetBitContext *gb){ char buf[256]; int i; @@ -4685,7 +4692,7 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){ printf("qp:%d fc:%d,%d %s size:%d pro:%d alt:%d top:%d %spel part:%d resync:%d w:%d a:%d\n", s->qscale, s->f_code, s->b_code, s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), - gb->size,s->progressive_sequence, s->alternate_scan, s->top_field_first, + gb->size_in_bits,s->progressive_sequence, s->alternate_scan, s->top_field_first, s->quarter_sample ? "q" : "h", s->data_partitioning, s->resync_marker, s->num_sprite_warping_points, s->sprite_warping_accuracy); } @@ -4740,9 +4747,9 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb) v = get_bits(gb, 8); startcode = ((startcode << 8) | v) & 0xffffffff; - if(get_bits_count(gb) >= gb->size*8){ - if(gb->size==1 && s->divx_version){ - printf("frame skip %d\n", gb->size); + if(get_bits_count(gb) >= gb->size_in_bits){ + if(gb->size_in_bits==8 && s->divx_version){ + printf("frame skip %d\n", gb->size_in_bits); return FRAME_SKIPED; //divx bug }else return -1; //end of stream diff --git a/src/libffmpeg/libavcodec/h263dec.c b/src/libffmpeg/libavcodec/h263dec.c index 93a14a06e..a5dadeec4 100644 --- a/src/libffmpeg/libavcodec/h263dec.c +++ b/src/libffmpeg/libavcodec/h263dec.c @@ -249,15 +249,17 @@ static int decode_slice(MpegEncContext *s){ /* try to detect the padding bug */ if( s->codec_id==CODEC_ID_MPEG4 && (s->workaround_bugs&FF_BUG_AUTODETECT) - && s->gb.size*8 - get_bits_count(&s->gb) >=0 - && s->gb.size*8 - get_bits_count(&s->gb) < 48 + && s->gb.size_in_bits - get_bits_count(&s->gb) >=0 + && s->gb.size_in_bits - get_bits_count(&s->gb) < 48 // && !s->resync_marker && !s->data_partitioning){ const int bits_count= get_bits_count(&s->gb); - const int bits_left = s->gb.size*8 - bits_count; + const int bits_left = s->gb.size_in_bits - bits_count; - if(bits_left==0 || bits_left>8){ + if(bits_left==0){ + s->padding_bug_score+=16; + }else if(bits_left>8){ s->padding_bug_score++; } else if(bits_left != 1){ int v= show_bits(&s->gb, 8); @@ -267,17 +269,12 @@ static int decode_slice(MpegEncContext *s){ s->padding_bug_score--; else s->padding_bug_score++; - } - - if(s->padding_bug_score > -2) - s->workaround_bugs |= FF_BUG_NO_PADDING; - else - s->workaround_bugs &= ~FF_BUG_NO_PADDING; + } } // handle formats which dont have unique end markers if(s->msmpeg4_version || (s->workaround_bugs&FF_BUG_NO_PADDING)){ //FIXME perhaps solve this more cleanly - int left= s->gb.size*8 - get_bits_count(&s->gb); + int left= s->gb.size_in_bits - get_bits_count(&s->gb); int max_extra=7; /* no markers in M$ crap */ @@ -302,7 +299,7 @@ static int decode_slice(MpegEncContext *s){ } fprintf(stderr, "slice end not reached but screenspace end (%d left %06X)\n", - s->gb.size*8 - get_bits_count(&s->gb), + s->gb.size_in_bits - get_bits_count(&s->gb), show_bits(&s->gb, 24)); return -1; } @@ -344,6 +341,61 @@ static int mpeg4_find_frame_end(MpegEncContext *s, UINT8 *buf, int buf_size){ return -1; } +static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){ + int t, x, y, f; + + ex= clip(ex, 0, w-1); + ey= clip(ey, 0, h-1); + + buf[sy*stride + sx]+= color; + + if(ABS(ex - sx) > ABS(ey - sy)){ + if(sx > ex){ + t=sx; sx=ex; ex=t; + t=sy; sy=ey; ey=t; + } + buf+= sx + sy*stride; + ex-= sx; + f= ((ey-sy)<<16)/ex; + for(x= 0; x <= ex; x++){ + y= ((x*f) + (1<<15))>>16; + buf[y*stride + x]+= color; + } + }else{ + if(sy > ey){ + t=sx; sx=ex; ex=t; + t=sy; sy=ey; ey=t; + } + buf+= sx + sy*stride; + ey-= sy; + if(ey) f= ((ex-sx)<<16)/ey; + else f= 0; + for(y= 0; y <= ey; y++){ + x= ((y*f) + (1<<15))>>16; + buf[y*stride + x]+= color; + } + } +} + +static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){ + int dx= ex - sx; + int dy= ey - sy; + + if(dx*dx + dy*dy > 3*3){ + int rx= dx + dy; + int ry= -dx + dy; + int length= ff_sqrt((rx*rx + ry*ry)<<8); + + //FIXME subpixel accuracy + rx= ROUNDED_DIV(rx*3<<4, length); + ry= ROUNDED_DIV(ry*3<<4, length); + + draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color); + draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color); + } + draw_line(buf, sx, sy, ex, ey, w, h, stride, color); +} + int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *data_size, UINT8 *buf, int buf_size) @@ -368,49 +420,27 @@ uint64_t time= rdtsc(); if (buf_size == 0) { return 0; } - + if(s->flags&CODEC_FLAG_TRUNCATED){ int next; - ParseContext *pc= &s->parse_context; - pc->last_index= pc->index; - if(s->codec_id==CODEC_ID_MPEG4){ next= mpeg4_find_frame_end(s, buf, buf_size); }else{ fprintf(stderr, "this codec doesnt support truncated bitstreams\n"); return -1; } - if(next==-1){ - if(buf_size + FF_INPUT_BUFFER_PADDING_SIZE + pc->index > pc->buffer_size){ - pc->buffer_size= buf_size + pc->index + 10*1024; - pc->buffer= realloc(pc->buffer, pc->buffer_size); - } - - memcpy(&pc->buffer[pc->index], buf, buf_size); - pc->index += buf_size; + + if( ff_combine_frame(s, next, &buf, &buf_size) < 0 ) return buf_size; - } - - if(pc->index){ - if(next + FF_INPUT_BUFFER_PADDING_SIZE + pc->index > pc->buffer_size){ - pc->buffer_size= next + pc->index + 10*1024; - pc->buffer= realloc(pc->buffer, pc->buffer_size); - } - - memcpy(&pc->buffer[pc->index], buf, next + FF_INPUT_BUFFER_PADDING_SIZE ); - pc->index = 0; - buf= pc->buffer; - buf_size= pc->last_index + next; - } } retry: if(s->bitstream_buffer_size && buf_size<20){ //divx 5.01+ frame reorder - init_get_bits(&s->gb, s->bitstream_buffer, s->bitstream_buffer_size); + init_get_bits(&s->gb, s->bitstream_buffer, s->bitstream_buffer_size*8); }else - init_get_bits(&s->gb, buf, buf_size); + init_get_bits(&s->gb, buf, buf_size*8); s->bitstream_buffer_size=0; if (!s->context_initialized) { @@ -427,7 +457,7 @@ retry: if(s->avctx->extradata_size && s->picture_number==0){ GetBitContext gb; - init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size); + init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size*8); ret = ff_mpeg4_decode_picture_header(s, &gb); } ret = ff_mpeg4_decode_picture_header(s, &s->gb); @@ -442,6 +472,11 @@ retry: avctx->has_b_frames= !s->low_delay; if(s->workaround_bugs&FF_BUG_AUTODETECT){ + if(s->padding_bug_score > -2 && !s->data_partitioning) + s->workaround_bugs |= FF_BUG_NO_PADDING; + else + s->workaround_bugs &= ~FF_BUG_NO_PADDING; + if(s->avctx->fourcc == ff_get_fourcc("XVIX")) s->workaround_bugs|= FF_BUG_XVID_ILACE; #if 0 @@ -472,6 +507,14 @@ retry: if(s->xvid_build && s->xvid_build<=1) s->workaround_bugs|= FF_BUG_QPEL_CHROMA; +#define SET_QPEL_FUNC(postfix1, postfix2) \ + s->dsp.put_ ## postfix1 = ff_put_ ## postfix2;\ + s->dsp.put_no_rnd_ ## postfix1 = ff_put_no_rnd_ ## postfix2;\ + s->dsp.avg_ ## postfix1 = ff_avg_ ## postfix2; + + if(s->lavc_build && s->lavc_build<4653) + s->workaround_bugs|= FF_BUG_STD_QPEL; + //printf("padding_bug_score: %d\n", s->padding_bug_score); #if 0 if(s->divx_version==500) @@ -489,6 +532,21 @@ retry: #endif } + if(s->workaround_bugs& FF_BUG_STD_QPEL){ + SET_QPEL_FUNC(qpel_pixels_tab[0][ 5], qpel16_mc11_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 7], qpel16_mc31_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 9], qpel16_mc12_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[0][11], qpel16_mc32_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[0][13], qpel16_mc13_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[0][15], qpel16_mc33_old_c) + + SET_QPEL_FUNC(qpel_pixels_tab[1][ 5], qpel8_mc11_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 7], qpel8_mc31_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 9], qpel8_mc12_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[1][11], qpel8_mc32_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_old_c) + } #if 0 // dump bits per frame / qp / complexity { @@ -571,7 +629,7 @@ retry: decode_slice(s); s->error_status_table[0]|= VP_START; - while(s->mb_y<s->mb_height && s->gb.size*8 - get_bits_count(&s->gb)>16){ + while(s->mb_y<s->mb_height && s->gb.size_in_bits - get_bits_count(&s->gb)>16){ if(s->msmpeg4_version){ if(s->mb_x!=0 || (s->mb_y%s->slice_height)!=0) break; @@ -580,7 +638,7 @@ retry: break; } - if(s->msmpeg4_version!=4 && s->h263_pred) + if(s->msmpeg4_version<4 && s->h263_pred) ff_mpeg4_clean_buffers(s); decode_slice(s); @@ -645,41 +703,40 @@ retry: } MPV_frame_end(s); -#if 0 //dirty show MVs, we should export the MV tables and write a filter to show them -{ - int mb_y; - s->has_b_frames=1; - for(mb_y=0; mb_y<s->mb_height; mb_y++){ - int mb_x; - int y= mb_y*16 + 8; - for(mb_x=0; mb_x<s->mb_width; mb_x++){ - int x= mb_x*16 + 8; - uint8_t *ptr= s->last_picture.data[0]; - int xy= 1 + mb_x*2 + (mb_y*2 + 1)*(s->mb_width*2 + 2); - int mx= (s->motion_val[xy][0]>>1) + x; - int my= (s->motion_val[xy][1]>>1) + y; - int i; - int max; - - if(mx<0) mx=0; - if(my<0) my=0; - if(mx>=s->width) mx= s->width -1; - if(my>=s->height) my= s->height-1; - max= ABS(mx-x); - if(ABS(my-y) > max) max= ABS(my-y); - /* the ugliest linedrawing routine ... */ - for(i=0; i<max; i++){ - int x1= x + (mx-x)*i/max; - int y1= y + (my-y)*i/max; - ptr[y1*s->linesize + x1]+=100; - } - ptr[y*s->linesize + x]+=100; - s->mbskip_table[mb_x + mb_y*s->mb_width]=0; + + if((avctx->debug&FF_DEBUG_VIS_MV) && s->last_picture.data[0]){ + const int shift= 1 + s->quarter_sample; + int mb_y; + uint8_t *ptr= s->last_picture.data[0]; + s->low_delay=0; //needed to see the vectors without trashing the buffers + + for(mb_y=0; mb_y<s->mb_height; mb_y++){ + int mb_x; + for(mb_x=0; mb_x<s->mb_width; mb_x++){ + const int mb_index= mb_x + mb_y*s->mb_width; + if(s->co_located_type_table[mb_index] == MV_TYPE_8X8){ + int i; + for(i=0; i<4; i++){ + int sx= mb_x*16 + 4 + 8*(i&1); + int sy= mb_y*16 + 4 + 8*(i>>1); + int xy= 1 + mb_x*2 + (i&1) + (mb_y*2 + 1 + (i>>1))*(s->mb_width*2 + 2); + int mx= (s->motion_val[xy][0]>>shift) + sx; + int my= (s->motion_val[xy][1]>>shift) + sy; + draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100); + } + }else{ + int sx= mb_x*16 + 8; + int sy= mb_y*16 + 8; + int xy= 1 + mb_x*2 + (mb_y*2 + 1)*(s->mb_width*2 + 2); + int mx= (s->motion_val[xy][0]>>shift) + sx; + int my= (s->motion_val[xy][1]>>shift) + sy; + draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100); + } + s->mbskip_table[mb_index]=0; + } + } } - } -} -#endif if(s->pict_type==B_TYPE || s->low_delay){ *pict= *(AVFrame*)&s->current_picture; diff --git a/src/libffmpeg/libavcodec/huffyuv.c b/src/libffmpeg/libavcodec/huffyuv.c index 0eb701037..cff642d11 100644 --- a/src/libffmpeg/libavcodec/huffyuv.c +++ b/src/libffmpeg/libavcodec/huffyuv.c @@ -1,7 +1,7 @@ /* * huffyuv codec for libavcodec * - * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at> + * Copyright (c) 2002-2003 Michael Niedermayer <michaelni@gmx.at> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -271,7 +271,7 @@ static int read_huffman_tables(HYuvContext *s, uint8_t *src, int length){ GetBitContext gb; int i; - init_get_bits(&gb, src, length); + init_get_bits(&gb, src, length*8); for(i=0; i<3; i++){ read_len_table(s->len[i], &gb); @@ -295,9 +295,9 @@ static int read_old_huffman_tables(HYuvContext *s){ GetBitContext gb; int i; - init_get_bits(&gb, classic_shift_luma, sizeof(classic_shift_luma)); + init_get_bits(&gb, classic_shift_luma, sizeof(classic_shift_luma)*8); read_len_table(s->len[0], &gb); - init_get_bits(&gb, classic_shift_chroma, sizeof(classic_shift_chroma)); + init_get_bits(&gb, classic_shift_chroma, sizeof(classic_shift_chroma)*8); read_len_table(s->len[1], &gb); for(i=0; i<256; i++) s->bits[0][i] = classic_add_luma [i]; @@ -403,7 +403,7 @@ s->bgr32=1; case 24: case 32: if(s->bgr32){ - avctx->pix_fmt = PIX_FMT_BGRA32; + avctx->pix_fmt = PIX_FMT_RGBA32; }else{ avctx->pix_fmt = PIX_FMT_BGR24; } @@ -461,8 +461,6 @@ static int encode_init(AVCodecContext *avctx) s->version=2; avctx->coded_frame= &s->picture; - s->picture.pict_type= FF_I_TYPE; - s->picture.key_frame= 1; switch(avctx->pix_fmt){ case PIX_FMT_YUV420P: @@ -682,7 +680,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 bswap_buf((uint32_t*)s->bitstream_buffer, (uint32_t*)buf, buf_size/4); - init_get_bits(&s->gb, s->bitstream_buffer, buf_size); + init_get_bits(&s->gb, s->bitstream_buffer, buf_size*8); p->reference= 0; if(avctx->get_buffer(avctx, p) < 0){ @@ -933,6 +931,8 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, init_put_bits(&s->pb, buf, buf_size, NULL, NULL); *p = *pict; + p->pict_type= FF_I_TYPE; + p->key_frame= 1; if(avctx->pix_fmt == PIX_FMT_YUV422P || avctx->pix_fmt == PIX_FMT_YUV420P){ int lefty, leftu, leftv, y, cy; diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c index 5fce7f914..857f1d398 100644 --- a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c +++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c @@ -485,6 +485,107 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ dst[i+0] += src[i+0]; } +static int pix_norm1_mmx(uint8_t *pix, int line_size) { + int tmp; + asm volatile ( + "movl $16,%%ecx\n" + "pxor %%mm0,%%mm0\n" + "pxor %%mm7,%%mm7\n" + "1:\n" + "movq (%0),%%mm2\n" /* mm2 = pix[0-7] */ + "movq 8(%0),%%mm3\n" /* mm3 = pix[8-15] */ + + "movq %%mm2,%%mm1\n" /* mm1 = mm2 = pix[0-7] */ + + "punpckhbw %%mm0,%%mm1\n" /* mm1 = [pix4-7] */ + "punpcklbw %%mm0,%%mm2\n" /* mm2 = [pix0-3] */ + + "movq %%mm3,%%mm4\n" /* mm4 = mm3 = pix[8-15] */ + "punpckhbw %%mm0,%%mm3\n" /* mm3 = [pix12-15] */ + "punpcklbw %%mm0,%%mm4\n" /* mm4 = [pix8-11] */ + + "pmaddwd %%mm1,%%mm1\n" /* mm1 = (pix0^2+pix1^2,pix2^2+pix3^2) */ + "pmaddwd %%mm2,%%mm2\n" /* mm2 = (pix4^2+pix5^2,pix6^2+pix7^2) */ + + "pmaddwd %%mm3,%%mm3\n" + "pmaddwd %%mm4,%%mm4\n" + + "paddd %%mm1,%%mm2\n" /* mm2 = (pix0^2+pix1^2+pix4^2+pix5^2, + pix2^2+pix3^2+pix6^2+pix7^2) */ + "paddd %%mm3,%%mm4\n" + "paddd %%mm2,%%mm7\n" + + "addl %2, %0\n" + "paddd %%mm4,%%mm7\n" + "dec %%ecx\n" + "jnz 1b\n" + + "movq %%mm7,%%mm1\n" + "psrlq $32, %%mm7\n" /* shift hi dword to lo */ + "paddd %%mm7,%%mm1\n" + "movd %%mm1,%1\n" + : "+r" (pix), "=r"(tmp) : "r" (line_size) : "%ecx" ); + return tmp; +} + +static int sse16_mmx(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) { + int tmp; + asm volatile ( + "movl $16,%%ecx\n" + "pxor %%mm0,%%mm0\n" /* mm0 = 0 */ + "pxor %%mm7,%%mm7\n" /* mm7 holds the sum */ + "1:\n" + "movq (%0),%%mm1\n" /* mm1 = pix1[0-7] */ + "movq (%1),%%mm2\n" /* mm2 = pix2[0-7] */ + "movq 8(%0),%%mm3\n" /* mm3 = pix1[8-15] */ + "movq 8(%1),%%mm4\n" /* mm4 = pix2[8-15] */ + + /* todo: mm1-mm2, mm3-mm4 */ + /* algo: substract mm1 from mm2 with saturation and vice versa */ + /* OR the results to get absolute difference */ + "movq %%mm1,%%mm5\n" + "movq %%mm3,%%mm6\n" + "psubusb %%mm2,%%mm1\n" + "psubusb %%mm4,%%mm3\n" + "psubusb %%mm5,%%mm2\n" + "psubusb %%mm6,%%mm4\n" + + "por %%mm1,%%mm2\n" + "por %%mm3,%%mm4\n" + + /* now convert to 16-bit vectors so we can square them */ + "movq %%mm2,%%mm1\n" + "movq %%mm4,%%mm3\n" + + "punpckhbw %%mm0,%%mm2\n" + "punpckhbw %%mm0,%%mm4\n" + "punpcklbw %%mm0,%%mm1\n" /* mm1 now spread over (mm1,mm2) */ + "punpcklbw %%mm0,%%mm3\n" /* mm4 now spread over (mm3,mm4) */ + + "pmaddwd %%mm2,%%mm2\n" + "pmaddwd %%mm4,%%mm4\n" + "pmaddwd %%mm1,%%mm1\n" + "pmaddwd %%mm3,%%mm3\n" + + "addl %3,%0\n" + "addl %3,%1\n" + + "paddd %%mm2,%%mm1\n" + "paddd %%mm4,%%mm3\n" + "paddd %%mm1,%%mm7\n" + "paddd %%mm3,%%mm7\n" + + "decl %%ecx\n" + "jnz 1b\n" + + "movq %%mm7,%%mm1\n" + "psrlq $32, %%mm7\n" /* shift hi dword to lo */ + "paddd %%mm7,%%mm1\n" + "movd %%mm1,%2\n" + : "+r" (pix1), "+r" (pix2), "=r"(tmp) : "r" (line_size) : "ecx"); + return tmp; +} + static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ int i=0; asm volatile( @@ -1085,7 +1186,7 @@ static void OPNAME ## qpel8_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\ }\ \ static void OPNAME ## qpel8_mc10_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t temp[32];\ + uint64_t temp[8];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ @@ -1096,14 +1197,14 @@ static void OPNAME ## qpel8_mc20_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ }\ \ static void OPNAME ## qpel8_mc30_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t temp[32];\ + uint64_t temp[8];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ OPNAME ## pixels8_l2_mmx(dst, src+1, half, stride, stride, 8);\ }\ \ static void OPNAME ## qpel8_mc01_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t temp[32];\ + uint64_t temp[8];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ @@ -1114,53 +1215,49 @@ static void OPNAME ## qpel8_mc02_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ }\ \ static void OPNAME ## qpel8_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t temp[32];\ + uint64_t temp[8];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\ }\ static void OPNAME ## qpel8_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t half[8*2 + 8*2 + 18*2];\ - uint8_t * const halfH= ((uint8_t*)half) + 2*64 + 8;\ - uint8_t * const halfV= ((uint8_t*)half);\ - uint8_t * const halfHV= ((uint8_t*)half) + 64;\ + uint64_t half[8 + 9];\ + uint8_t * const halfH= ((uint8_t*)half) + 64;\ + uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src, 8, stride);\ + put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l4_mmx(dst, src, (uint8_t*)half, stride, 8);\ + OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ }\ static void OPNAME ## qpel8_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t half[8*2 + 8*2 + 18*2];\ - uint8_t * const halfH= ((uint8_t*)half) + 2*64 + 8;\ - uint8_t * const halfV= ((uint8_t*)half);\ - uint8_t * const halfHV= ((uint8_t*)half) + 64;\ + uint64_t half[8 + 9];\ + uint8_t * const halfH= ((uint8_t*)half) + 64;\ + uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src+1, 8, stride);\ + put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l4_mmx(dst, src+1, (uint8_t*)half, stride, 8);\ + OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ }\ static void OPNAME ## qpel8_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t half[8*2 + 8*2 + 9*2];\ - uint8_t * const halfH= ((uint8_t*)half) + 2*64;\ - uint8_t * const halfV= ((uint8_t*)half);\ - uint8_t * const halfHV= ((uint8_t*)half) + 64;\ + uint64_t half[8 + 9];\ + uint8_t * const halfH= ((uint8_t*)half) + 64;\ + uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src, 8, stride);\ + put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l4_mmx(dst, src+stride, (uint8_t*)half, stride, 8);\ + OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ }\ static void OPNAME ## qpel8_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t half[8*2 + 8*2 + 9*2];\ - uint8_t * const halfH= ((uint8_t*)half) + 2*64;\ - uint8_t * const halfV= ((uint8_t*)half);\ - uint8_t * const halfHV= ((uint8_t*)half) + 64;\ - put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src , 8, stride, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src+1, 8, stride);\ + uint64_t half[8 + 9];\ + uint8_t * const halfH= ((uint8_t*)half) + 64;\ + uint8_t * const halfHV= ((uint8_t*)half);\ + put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ + put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l4_mmx(dst, src+stride+1, (uint8_t*)half, stride, 8);\ + OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ }\ static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t half[8*2 + 9*2];\ + uint64_t half[8 + 9];\ uint8_t * const halfH= ((uint8_t*)half) + 64;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ @@ -1168,7 +1265,7 @@ static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ }\ static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t half[8*2 + 9*2];\ + uint64_t half[8 + 9];\ uint8_t * const halfH= ((uint8_t*)half) + 64;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ @@ -1176,27 +1273,21 @@ static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ }\ static void OPNAME ## qpel8_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t half[8*2 + 8*2 + 9*2];\ - uint8_t * const halfH= ((uint8_t*)half) + 2*64;\ - uint8_t * const halfV= ((uint8_t*)half);\ - uint8_t * const halfHV= ((uint8_t*)half) + 64;\ + uint64_t half[8 + 9];\ + uint8_t * const halfH= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src, 8, stride);\ - put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2_mmx(dst, halfV, halfHV, stride, 8, 8);\ + put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ + OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ }\ static void OPNAME ## qpel8_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t half[8*2 + 8*2 + 9*2];\ - uint8_t * const halfH= ((uint8_t*)half) + 2*64;\ - uint8_t * const halfV= ((uint8_t*)half);\ - uint8_t * const halfHV= ((uint8_t*)half) + 64;\ + uint64_t half[8 + 9];\ + uint8_t * const halfH= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src+1, 8, stride);\ - put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2_mmx(dst, halfV, halfHV, stride, 8, 8);\ + put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ + OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ }\ static void OPNAME ## qpel8_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t half[9*2];\ + uint64_t half[9];\ uint8_t * const halfH= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ @@ -1241,44 +1332,40 @@ static void OPNAME ## qpel16_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\ }\ static void OPNAME ## qpel16_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t half[16*2 + 16*2 + 18*2];\ - uint8_t * const halfH= ((uint8_t*)half) + 2*256 + 16;\ - uint8_t * const halfV= ((uint8_t*)half);\ - uint8_t * const halfHV= ((uint8_t*)half) + 256;\ + uint64_t half[16*2 + 17*2];\ + uint8_t * const halfH= ((uint8_t*)half) + 256;\ + uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ - put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src, 16, stride);\ + put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l4_mmx(dst, src, (uint8_t*)half, stride, 16);\ + OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ }\ static void OPNAME ## qpel16_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t half[16*2 + 16*2 + 18*2];\ - uint8_t * const halfH= ((uint8_t*)half) + 2*256 + 16;\ - uint8_t * const halfV= ((uint8_t*)half);\ - uint8_t * const halfHV= ((uint8_t*)half) + 256;\ + uint64_t half[16*2 + 17*2];\ + uint8_t * const halfH= ((uint8_t*)half) + 256;\ + uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ - put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src+1, 16, stride);\ + put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l4_mmx(dst, src+1, (uint8_t*)half, stride, 16);\ + OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ }\ static void OPNAME ## qpel16_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t half[16*2 + 16*2 + 17*2];\ - uint8_t * const halfH= ((uint8_t*)half) + 2*256;\ - uint8_t * const halfV= ((uint8_t*)half);\ - uint8_t * const halfHV= ((uint8_t*)half) + 256;\ + uint64_t half[16*2 + 17*2];\ + uint8_t * const halfH= ((uint8_t*)half) + 256;\ + uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ - put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src, 16, stride);\ + put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l4_mmx(dst, src+stride, (uint8_t*)half, stride, 16);\ + OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ }\ static void OPNAME ## qpel16_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t half[16*2 + 16*2 + 17*2];\ - uint8_t * const halfH= ((uint8_t*)half) + 2*256;\ - uint8_t * const halfV= ((uint8_t*)half);\ - uint8_t * const halfHV= ((uint8_t*)half) + 256;\ - put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src , 16, stride, 17);\ - put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src+1, 16, stride);\ + uint64_t half[16*2 + 17*2];\ + uint8_t * const halfH= ((uint8_t*)half) + 256;\ + uint8_t * const halfHV= ((uint8_t*)half);\ + put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ + put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l4_mmx(dst, src+stride+1, (uint8_t*)half, stride, 16);\ + OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ }\ static void OPNAME ## qpel16_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ uint64_t half[16*2 + 17*2];\ @@ -1297,24 +1384,18 @@ static void OPNAME ## qpel16_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ }\ static void OPNAME ## qpel16_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t half[16*2 + 16*2 + 17*2];\ - uint8_t * const halfH= ((uint8_t*)half) + 2*256;\ - uint8_t * const halfV= ((uint8_t*)half);\ - uint8_t * const halfHV= ((uint8_t*)half) + 256;\ + uint64_t half[17*2];\ + uint8_t * const halfH= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ - put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src, 16, stride);\ - put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2_mmx(dst, halfV, halfHV, stride, 16, 16);\ + put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ + OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ }\ static void OPNAME ## qpel16_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ - uint64_t half[16*2 + 16*2 + 17*2];\ - uint8_t * const halfH= ((uint8_t*)half) + 2*256;\ - uint8_t * const halfV= ((uint8_t*)half);\ - uint8_t * const halfHV= ((uint8_t*)half) + 256;\ + uint64_t half[17*2];\ + uint8_t * const halfH= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ - put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src+1, 16, stride);\ - put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2_mmx(dst, halfV, halfHV, stride, 16, 16);\ + put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ + OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ }\ static void OPNAME ## qpel16_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ uint64_t half[17*2];\ @@ -1436,6 +1517,9 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) c->sad[0]= sad16x16_mmx; c->sad[1]= sad8x8_mmx; + + c->pix_norm1 = pix_norm1_mmx; + c->sse[0] = sse16_mmx; if (mm_flags & MM_MMXEXT) { c->pix_abs16x16 = pix_abs16x16_mmx2; @@ -1525,7 +1609,7 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; - + SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow) SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow) SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_3dnow) diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h b/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h index 0ae1cd99d..956edf798 100644 --- a/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h +++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h @@ -58,6 +58,16 @@ static void DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int { MOVQ_BFE(mm6); __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%2), %%mm1 \n\t" + "addl %4, %1 \n\t" + "addl $8, %2 \n\t" + PAVGB(%%mm0, %%mm1, %%mm4, %%mm6) + "movq %%mm4, (%3) \n\t" + "addl %5, %3 \n\t" + "decl %0 \n\t" ".balign 8 \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" @@ -144,6 +154,19 @@ static void DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, in { MOVQ_BFE(mm6); __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%2), %%mm1 \n\t" + "movq 8(%1), %%mm2 \n\t" + "movq 8(%2), %%mm3 \n\t" + "addl %4, %1 \n\t" + "addl $16, %2 \n\t" + PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%3) \n\t" + "movq %%mm5, 8(%3) \n\t" + "addl %5, %3 \n\t" + "decl %0 \n\t" ".balign 8 \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" @@ -271,124 +294,6 @@ static void DEF(put, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_si :"eax", "memory"); } -static void DEF(put, pixels8_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int stride, int h) -{ - MOVQ_ZERO(mm7); - SET_RND(mm6); // =2 for rnd and =1 for no_rnd version - __asm __volatile( - ".balign 8 \n\t" - "1: \n\t" - "movq (%1), %%mm0 \n\t" - "movq (%2), %%mm1 \n\t" - "movq 64(%2), %%mm2 \n\t" - "movq 136(%2), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddusw %%mm6, %%mm0 \n\t" - "paddusw %%mm0, %%mm1 \n\t" - "paddusw %%mm2, %%mm3 \n\t" - "paddusw %%mm1, %%mm3 \n\t" - "psrlw $2, %%mm3 \n\t" - "movq (%1), %%mm0 \n\t" - "movq (%2), %%mm1 \n\t" - "movq 64(%2), %%mm2 \n\t" - "movq 136(%2), %%mm4 \n\t" - "punpckhbw %%mm7, %%mm0 \n\t" - "punpckhbw %%mm7, %%mm1 \n\t" - "punpckhbw %%mm7, %%mm2 \n\t" - "punpckhbw %%mm7, %%mm4 \n\t" - "paddusw %%mm6, %%mm0 \n\t" - "paddusw %%mm0, %%mm1 \n\t" - "paddusw %%mm2, %%mm4 \n\t" - "paddusw %%mm1, %%mm4 \n\t" - "psrlw $2, %%mm4 \n\t" - "packuswb %%mm4, %%mm3 \n\t" - "movq %%mm3, (%0) \n\t" - "addl %4, %0 \n\t" - "addl %4, %1 \n\t" - "addl $8, %2 \n\t" - "decl %3 \n\t" - "jnz 1b \n\t" - :"+r"(dst), "+r"(src1), "+r"(src2), "+r"(h) - :"r"(stride) - :"memory"); -} - -static void DEF(put, pixels16_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int stride, int h) -{ - MOVQ_ZERO(mm7); - SET_RND(mm6); // =2 for rnd and =1 for no_rnd version - __asm __volatile( - ".balign 8 \n\t" - "1: \n\t" - "movq (%1), %%mm0 \n\t" - "movq (%2), %%mm1 \n\t" - "movq 256(%2), %%mm2 \n\t" - "movq 528(%2), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddusw %%mm6, %%mm0 \n\t" - "paddusw %%mm0, %%mm1 \n\t" - "paddusw %%mm2, %%mm3 \n\t" - "paddusw %%mm1, %%mm3 \n\t" - "psrlw $2, %%mm3 \n\t" - "movq (%1), %%mm0 \n\t" - "movq (%2), %%mm1 \n\t" - "movq 256(%2), %%mm2 \n\t" - "movq 528(%2), %%mm4 \n\t" - "punpckhbw %%mm7, %%mm0 \n\t" - "punpckhbw %%mm7, %%mm1 \n\t" - "punpckhbw %%mm7, %%mm2 \n\t" - "punpckhbw %%mm7, %%mm4 \n\t" - "paddusw %%mm6, %%mm0 \n\t" - "paddusw %%mm0, %%mm1 \n\t" - "paddusw %%mm2, %%mm4 \n\t" - "paddusw %%mm1, %%mm4 \n\t" - "psrlw $2, %%mm4 \n\t" - "packuswb %%mm4, %%mm3 \n\t" - "movq %%mm3, (%0) \n\t" - "movq 8(%1), %%mm0 \n\t" - "movq 8(%2), %%mm1 \n\t" - "movq 264(%2), %%mm2 \n\t" - "movq 536(%2), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddusw %%mm6, %%mm0 \n\t" - "paddusw %%mm0, %%mm1 \n\t" - "paddusw %%mm2, %%mm3 \n\t" - "paddusw %%mm1, %%mm3 \n\t" - "psrlw $2, %%mm3 \n\t" - "movq 8(%1), %%mm0 \n\t" - "movq 8(%2), %%mm1 \n\t" - "movq 264(%2), %%mm2 \n\t" - "movq 536(%2), %%mm4 \n\t" - "punpckhbw %%mm7, %%mm0 \n\t" - "punpckhbw %%mm7, %%mm1 \n\t" - "punpckhbw %%mm7, %%mm2 \n\t" - "punpckhbw %%mm7, %%mm4 \n\t" - "paddusw %%mm6, %%mm0 \n\t" - "paddusw %%mm0, %%mm1 \n\t" - "paddusw %%mm2, %%mm4 \n\t" - "paddusw %%mm1, %%mm4 \n\t" - "psrlw $2, %%mm4 \n\t" - "packuswb %%mm4, %%mm3 \n\t" - "movq %%mm3, 8(%0) \n\t" - "addl %4, %0 \n\t" - "addl %4, %1 \n\t" - "addl $16, %2 \n\t" - "decl %3 \n\t" - "jnz 1b \n\t" - :"+r"(dst), "+r"(src1), "+r"(src2), "+r"(h) - :"r"(stride) - :"memory"); -} - // avg_pixels // in case more speed is needed - unroling would certainly help static void DEF(avg, pixels8)(UINT8 *block, const UINT8 *pixels, int line_size, int h) @@ -641,133 +546,6 @@ static void DEF(avg, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_si :"eax", "memory"); } -static void DEF(avg, pixels8_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int stride, int h) -{ - MOVQ_ZERO(mm7); - SET_RND(mm6); // =2 for rnd and =1 for no_rnd version - MOVQ_BFE(mm5); - __asm __volatile( - ".balign 8 \n\t" - "1: \n\t" - "movq (%1), %%mm0 \n\t" - "movq (%2), %%mm1 \n\t" - "movq 64(%2), %%mm2 \n\t" - "movq 136(%2), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddusw %%mm6, %%mm0 \n\t" - "paddusw %%mm0, %%mm1 \n\t" - "paddusw %%mm2, %%mm3 \n\t" - "paddusw %%mm1, %%mm3 \n\t" - "psrlw $2, %%mm3 \n\t" - "movq (%1), %%mm0 \n\t" - "movq (%2), %%mm1 \n\t" - "movq 64(%2), %%mm2 \n\t" - "movq 136(%2), %%mm4 \n\t" - "punpckhbw %%mm7, %%mm0 \n\t" - "punpckhbw %%mm7, %%mm1 \n\t" - "punpckhbw %%mm7, %%mm2 \n\t" - "punpckhbw %%mm7, %%mm4 \n\t" - "paddusw %%mm6, %%mm0 \n\t" - "paddusw %%mm0, %%mm1 \n\t" - "paddusw %%mm2, %%mm4 \n\t" - "paddusw %%mm1, %%mm4 \n\t" - "psrlw $2, %%mm4 \n\t" - "packuswb %%mm4, %%mm3 \n\t" - "movq (%0), %%mm4 \n\t" - PAVGB(%%mm3, %%mm4, %%mm0, %%mm5) - "movq %%mm0, (%0) \n\t" - "addl %4, %0 \n\t" - "addl %4, %1 \n\t" - "addl $8, %2 \n\t" - "decl %3 \n\t" - "jnz 1b \n\t" - :"+r"(dst), "+r"(src1), "+r"(src2), "+r"(h) - :"r"(stride) - :"memory"); -} - -static void DEF(avg, pixels16_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int stride, int h) -{ - MOVQ_ZERO(mm7); - SET_RND(mm6); // =2 for rnd and =1 for no_rnd version - MOVQ_BFE(mm5); - __asm __volatile( - ".balign 8 \n\t" - "1: \n\t" - "movq (%1), %%mm0 \n\t" - "movq (%2), %%mm1 \n\t" - "movq 256(%2), %%mm2 \n\t" - "movq 528(%2), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddusw %%mm6, %%mm0 \n\t" - "paddusw %%mm0, %%mm1 \n\t" - "paddusw %%mm2, %%mm3 \n\t" - "paddusw %%mm1, %%mm3 \n\t" - "psrlw $2, %%mm3 \n\t" - "movq (%1), %%mm0 \n\t" - "movq (%2), %%mm1 \n\t" - "movq 256(%2), %%mm2 \n\t" - "movq 528(%2), %%mm4 \n\t" - "punpckhbw %%mm7, %%mm0 \n\t" - "punpckhbw %%mm7, %%mm1 \n\t" - "punpckhbw %%mm7, %%mm2 \n\t" - "punpckhbw %%mm7, %%mm4 \n\t" - "paddusw %%mm6, %%mm0 \n\t" - "paddusw %%mm0, %%mm1 \n\t" - "paddusw %%mm2, %%mm4 \n\t" - "paddusw %%mm1, %%mm4 \n\t" - "psrlw $2, %%mm4 \n\t" - "packuswb %%mm4, %%mm3 \n\t" - "movq (%0), %%mm4 \n\t" - PAVGB(%%mm3, %%mm4, %%mm0, %%mm5) - "movq %%mm0, (%0) \n\t" - "movq 8(%1), %%mm0 \n\t" - "movq 8(%2), %%mm1 \n\t" - "movq 264(%2), %%mm2 \n\t" - "movq 536(%2), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddusw %%mm6, %%mm0 \n\t" - "paddusw %%mm0, %%mm1 \n\t" - "paddusw %%mm2, %%mm3 \n\t" - "paddusw %%mm1, %%mm3 \n\t" - "psrlw $2, %%mm3 \n\t" - "movq 8(%1), %%mm0 \n\t" - "movq 8(%2), %%mm1 \n\t" - "movq 264(%2), %%mm2 \n\t" - "movq 536(%2), %%mm4 \n\t" - "punpckhbw %%mm7, %%mm0 \n\t" - "punpckhbw %%mm7, %%mm1 \n\t" - "punpckhbw %%mm7, %%mm2 \n\t" - "punpckhbw %%mm7, %%mm4 \n\t" - "paddusw %%mm6, %%mm0 \n\t" - "paddusw %%mm0, %%mm1 \n\t" - "paddusw %%mm2, %%mm4 \n\t" - "paddusw %%mm1, %%mm4 \n\t" - "psrlw $2, %%mm4 \n\t" - "packuswb %%mm4, %%mm3 \n\t" - "movq 8(%0), %%mm4 \n\t" - PAVGB(%%mm3, %%mm4, %%mm0, %%mm5) - "movq %%mm0, 8(%0) \n\t" - "addl %4, %0 \n\t" - "addl %4, %1 \n\t" - "addl $16, %2 \n\t" - "decl %3 \n\t" - "jnz 1b \n\t" - :"+r"(dst), "+r"(src1), "+r"(src2), "+r"(h) - :"r"(stride) - :"memory"); -} - - //FIXME optimize static void DEF(put, pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ DEF(put, pixels8_y2)(block , pixels , line_size, h); diff --git a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c index 799ff1666..ead30ed31 100644 --- a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c +++ b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c @@ -53,8 +53,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, if (!s->h263_aic) { #if 1 asm volatile ( - "xorl %%edx, %%edx \n\t" - "mul %%ecx \n\t" + "imul %%ecx \n\t" : "=d" (level), "=a"(dummy) : "a" ((block[0]>>2) + q), "c" (inverse[q<<1]) ); diff --git a/src/libffmpeg/libavcodec/imgconvert.c b/src/libffmpeg/libavcodec/imgconvert.c index bdf6fe65d..2304092fd 100644 --- a/src/libffmpeg/libavcodec/imgconvert.c +++ b/src/libffmpeg/libavcodec/imgconvert.c @@ -1,6 +1,6 @@ /* * Misc image convertion routines - * Copyright (c) 2001, 2002 Fabrice Bellard. + * Copyright (c) 2001, 2002, 2003 Fabrice Bellard. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -26,14 +26,220 @@ #ifdef HAVE_MMX #include "i386/mmx.h" #endif + +typedef struct PixFmtInfo { + const char *name; + UINT8 nb_components; /* number of components in AVPicture array */ + UINT8 is_yuv : 1; /* true if YUV instead of RGB color space */ + UINT8 is_packed : 1; /* true if multiple components in same word */ + UINT8 is_paletted : 1; /* true if paletted */ + UINT8 is_alpha : 1; /* true if alpha can be specified */ + UINT8 is_gray : 1; /* true if gray or monochrome format */ + UINT8 x_chroma_shift; /* X chroma subsampling factor is 2 ^ shift */ + UINT8 y_chroma_shift; /* Y chroma subsampling factor is 2 ^ shift */ +} PixFmtInfo; + +/* this table gives more information about formats */ +static PixFmtInfo pix_fmt_info[PIX_FMT_NB] = { + /* YUV formats */ + [PIX_FMT_YUV420P] = { + .name = "yuv420p", + .nb_components = 3, .is_yuv = 1, + .x_chroma_shift = 1, .y_chroma_shift = 1, + }, + [PIX_FMT_YUV422P] = { + .name = "yuv422p", + .nb_components = 3, .is_yuv = 1, + .x_chroma_shift = 1, .y_chroma_shift = 0, + }, + [PIX_FMT_YUV444P] = { + .name = "yuv444p", + .nb_components = 3, .is_yuv = 1, + .x_chroma_shift = 0, .y_chroma_shift = 0, + }, + [PIX_FMT_YUV422] = { + .name = "yuv422", + .nb_components = 1, .is_yuv = 1, .is_packed = 1, + .x_chroma_shift = 1, .y_chroma_shift = 0, + }, + [PIX_FMT_YUV410P] = { + .name = "yuv410p", + .nb_components = 3, .is_yuv = 1, + .x_chroma_shift = 2, .y_chroma_shift = 2, + }, + [PIX_FMT_YUV411P] = { + .name = "yuv411p", + .nb_components = 3, .is_yuv = 1, + .x_chroma_shift = 2, .y_chroma_shift = 0, + }, + + /* RGB formats */ + [PIX_FMT_RGB24] = { + .name = "rgb24", + .nb_components = 1, .is_packed = 1, + }, + [PIX_FMT_BGR24] = { + .name = "bgr24", + .nb_components = 1, .is_packed = 1, + }, + [PIX_FMT_RGBA32] = { + .name = "rgba32", + .nb_components = 1, .is_packed = 1, .is_alpha = 1, + }, + [PIX_FMT_RGB565] = { + .name = "rgb565", + .nb_components = 1, .is_packed = 1, + }, + [PIX_FMT_RGB555] = { + .name = "rgb555", + .nb_components = 1, .is_packed = 1, .is_alpha = 1, + }, + + /* gray / mono formats */ + [PIX_FMT_GRAY8] = { + .name = "gray", + .nb_components = 1, .is_gray = 1, + }, + [PIX_FMT_MONOWHITE] = { + .name = "monow", + .nb_components = 1, .is_packed = 1, .is_gray = 1, + }, + [PIX_FMT_MONOBLACK] = { + .name = "monob", + .nb_components = 1, .is_packed = 1, .is_gray = 1, + }, +}; + +void avcodec_get_chroma_sub_sample(int pix_fmt, int *h_shift, int *v_shift) +{ + if (pix_fmt_info[pix_fmt].is_yuv) { + *h_shift = pix_fmt_info[pix_fmt].x_chroma_shift; + *v_shift = pix_fmt_info[pix_fmt].y_chroma_shift; + } else { + *h_shift=0; + *v_shift=0; + } +} + +const char *avcodec_get_pix_fmt_name(int pix_fmt) +{ + if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB) + return "???"; + else + return pix_fmt_info[pix_fmt].name; +} + +/* Picture field are filled with 'ptr' addresses. Also return size */ +int avpicture_fill(AVPicture *picture, UINT8 *ptr, + int pix_fmt, int width, int height) +{ + int size; + + size = width * height; + switch(pix_fmt) { + case PIX_FMT_YUV420P: + picture->data[0] = ptr; + picture->data[1] = picture->data[0] + size; + picture->data[2] = picture->data[1] + size / 4; + picture->linesize[0] = width; + picture->linesize[1] = width / 2; + picture->linesize[2] = width / 2; + return (size * 3) / 2; + case PIX_FMT_RGB24: + case PIX_FMT_BGR24: + picture->data[0] = ptr; + picture->data[1] = NULL; + picture->data[2] = NULL; + picture->linesize[0] = width * 3; + return size * 3; + case PIX_FMT_YUV422P: + picture->data[0] = ptr; + picture->data[1] = picture->data[0] + size; + picture->data[2] = picture->data[1] + size / 2; + picture->linesize[0] = width; + picture->linesize[1] = width / 2; + picture->linesize[2] = width / 2; + return (size * 2); + case PIX_FMT_YUV444P: + picture->data[0] = ptr; + picture->data[1] = picture->data[0] + size; + picture->data[2] = picture->data[1] + size; + picture->linesize[0] = width; + picture->linesize[1] = width; + picture->linesize[2] = width; + return size * 3; + case PIX_FMT_RGBA32: + picture->data[0] = ptr; + picture->data[1] = NULL; + picture->data[2] = NULL; + picture->linesize[0] = width * 4; + return size * 4; + case PIX_FMT_YUV410P: + picture->data[0] = ptr; + picture->data[1] = picture->data[0] + size; + picture->data[2] = picture->data[1] + size / 16; + picture->linesize[0] = width; + picture->linesize[1] = width / 4; + picture->linesize[2] = width / 4; + return size + (size / 8); + case PIX_FMT_YUV411P: + picture->data[0] = ptr; + picture->data[1] = picture->data[0] + size; + picture->data[2] = picture->data[1] + size / 4; + picture->linesize[0] = width; + picture->linesize[1] = width / 4; + picture->linesize[2] = width / 4; + return size + (size / 2); + case PIX_FMT_RGB555: + case PIX_FMT_RGB565: + case PIX_FMT_YUV422: + picture->data[0] = ptr; + picture->data[1] = NULL; + picture->data[2] = NULL; + picture->linesize[0] = width * 2; + return size * 2; + case PIX_FMT_GRAY8: + picture->data[0] = ptr; + picture->data[1] = NULL; + picture->data[2] = NULL; + picture->linesize[0] = width; + return size; + case PIX_FMT_MONOWHITE: + case PIX_FMT_MONOBLACK: + picture->data[0] = ptr; + picture->data[1] = NULL; + picture->data[2] = NULL; + picture->linesize[0] = (width + 7) >> 3; + return picture->linesize[0] * height; + default: + picture->data[0] = NULL; + picture->data[1] = NULL; + picture->data[2] = NULL; + return -1; + } +} + +int avpicture_get_size(int pix_fmt, int width, int height) +{ + AVPicture dummy_pict; + return avpicture_fill(&dummy_pict, NULL, pix_fmt, width, height); +} + + /* XXX: totally non optimized */ -static void yuv422_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, - UINT8 *src, int width, int height) +static void yuv422_to_yuv420p(AVPicture *dst, AVPicture *src, + int width, int height) { + UINT8 *lum, *cb, *cr; int x, y; - UINT8 *p = src; - + const UINT8 *p; + + lum = dst->data[0]; + cb = dst->data[1]; + cr = dst->data[2]; + p = src->data[0]; + for(y=0;y<height;y+=2) { for(x=0;x<width;x+=2) { lum[0] = p[0]; @@ -58,342 +264,6 @@ static void yuv422_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, #define ONE_HALF (1 << (SCALEBITS - 1)) #define FIX(x) ((int) ((x) * (1L<<SCALEBITS) + 0.5)) -static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, - UINT8 *src, int width, int height) -{ - int wrap, wrap3, x, y; - int r, g, b, r1, g1, b1; - UINT8 *p; - - wrap = width; - wrap3 = width * 3; - p = src; - for(y=0;y<height;y+=2) { - for(x=0;x<width;x+=2) { - r = p[0]; - g = p[1]; - b = p[2]; - r1 = r; - g1 = g; - b1 = b; - lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - r = p[3]; - g = p[4]; - b = p[5]; - r1 += r; - g1 += g; - b1 += b; - lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - p += wrap3; - lum += wrap; - - r = p[0]; - g = p[1]; - b = p[2]; - r1 += r; - g1 += g; - b1 += b; - lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - r = p[3]; - g = p[4]; - b = p[5]; - r1 += r; - g1 += g; - b1 += b; - lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - - cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + - FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; - cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 - - FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; - - cb++; - cr++; - p += -wrap3 + 2 * 3; - lum += -wrap + 2; - } - p += wrap3; - lum += wrap; - } -} - -static void rgba32_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, - UINT8 *src, int width, int height) -{ - int wrap, wrap4, x, y; - int r, g, b, r1, g1, b1; - UINT8 *p; - - wrap = width; - wrap4 = width * 4; - p = src; - for(y=0;y<height;y+=2) { - for(x=0;x<width;x+=2) { - r = p[0]; - g = p[1]; - b = p[2]; - r1 = r; - g1 = g; - b1 = b; - lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - r = p[4]; - g = p[5]; - b = p[6]; - r1 += r; - g1 += g; - b1 += b; - lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - p += wrap4; - lum += wrap; - - r = p[0]; - g = p[1]; - b = p[2]; - r1 += r; - g1 += g; - b1 += b; - lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - r = p[4]; - g = p[5]; - b = p[6]; - r1 += r; - g1 += g; - b1 += b; - lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - - cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + - FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; - cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 - - FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; - - cb++; - cr++; - p += -wrap4 + 2 * 4; - lum += -wrap + 2; - } - p += wrap4; - lum += wrap; - } -} - -#define rgb565_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0800,31, 0x0020,63,0x0001,31) -#define rgb555_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0400,31, 0x0020,31,0x0001,31) -#define rgb5551_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0800,31, 0x0040,31,0x0002,31) -#define bgr565_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0020,63,0x0800,31) -#define bgr555_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0020,31,0x0400,31) -#define gbr565_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0800,31,0x0040,63) -#define gbr555_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0400,31,0x0020,31) - -static void rgbmisc_to_yuv420p - (UINT8 *lum, UINT8 *cb, UINT8 *cr, - UINT8 *src, int width, int height, - - UINT16 R_LOWMASK, UINT16 R_MAX, - UINT16 G_LOWMASK, UINT16 G_MAX, - UINT16 B_LOWMASK, UINT16 B_MAX - ) -{ - int wrap, wrap2, x, y; - int r, g, b, r1, g1, b1; - UINT8 *p; - UINT16 pixel; - - wrap = width; - wrap2 = width * 2; - p = src; - for(y=0;y<height;y+=2) { - for(x=0;x<width;x+=2) { - pixel = p[0] | (p[1]<<8); - r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1))); - g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1))); - b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1))); - r1 = r; - g1 = g; - b1 = b; - lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - - pixel = p[2] | (p[3]<<8); - r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1))); - g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1))); - b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1))); - r1 += r; - g1 += g; - b1 += b; - lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - p += wrap2; - lum += wrap; - - pixel = p[0] | (p[1]<<8); - r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1))); - g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1))); - b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1))); - r1 += r; - g1 += g; - b1 += b; - lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - pixel = p[2] | (p[3]<<8); - r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1))); - g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1))); - b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1))); - r1 += r; - g1 += g; - b1 += b; - lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - - cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + - FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; - cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 - - FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; - - cb++; - cr++; - p += -wrap2 + 2 * 2; - lum += -wrap + 2; - } - p += wrap2; - lum += wrap; - } -} - - -static void bgr24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, - UINT8 *src, int width, int height) -{ - int wrap, wrap3, x, y; - int r, g, b, r1, g1, b1; - UINT8 *p; - - wrap = width; - wrap3 = width * 3; - p = src; - for(y=0;y<height;y+=2) { - for(x=0;x<width;x+=2) { - b = p[0]; - g = p[1]; - r = p[2]; - r1 = r; - g1 = g; - b1 = b; - lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - b = p[3]; - g = p[4]; - r = p[5]; - r1 += r; - g1 += g; - b1 += b; - lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - p += wrap3; - lum += wrap; - - b = p[0]; - g = p[1]; - r = p[2]; - r1 += r; - g1 += g; - b1 += b; - lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - b = p[3]; - g = p[4]; - r = p[5]; - r1 += r; - g1 += g; - b1 += b; - lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - - cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + - FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; - cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 - - FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; - - cb++; - cr++; - p += -wrap3 + 2 * 3; - lum += -wrap + 2; - } - p += wrap3; - lum += wrap; - } -} - -static void bgra32_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, - UINT8 *src, int width, int height) -{ - int wrap, wrap4, x, y; - int r, g, b, r1, g1, b1; - UINT8 *p; - - wrap = width; - wrap4 = width * 4; - p = src; - for(y=0;y<height;y+=2) { - for(x=0;x<width;x+=2) { - b = p[0]; - g = p[1]; - r = p[2]; - r1 = r; - g1 = g; - b1 = b; - lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - b = p[4]; - g = p[5]; - r = p[6]; - r1 += r; - g1 += g; - b1 += b; - lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - p += wrap4; - lum += wrap; - - b = p[0]; - g = p[1]; - r = p[2]; - r1 += r; - g1 += g; - b1 += b; - lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - b = p[4]; - g = p[5]; - r = p[6]; - r1 += r; - g1 += g; - b1 += b; - lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + - FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; - - cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + - FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; - cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 - - FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; - - cb++; - cr++; - p += -wrap4 + 2 * 4; - lum += -wrap + 2; - } - p += wrap4; - lum += wrap; - } -} - /* XXX: use generic filter ? */ /* 1x2 -> 1x1 */ static void shrink2(UINT8 *dst, int dst_wrap, @@ -487,7 +357,7 @@ static void grow22(UINT8 *dst, int dst_wrap, } } -/* 1x2 -> 2x1. width and height are given for the source picture */ +/* 1x2 -> 2x1 */ static void conv411(UINT8 *dst, int dst_wrap, UINT8 *src, int src_wrap, int width, int height) @@ -495,7 +365,7 @@ static void conv411(UINT8 *dst, int dst_wrap, int w, c; UINT8 *s1, *s2, *d; - for(;height > 0; height -= 2) { + for(;height > 0; height--) { s1 = src; s2 = src + src_wrap; d = dst; @@ -531,7 +401,7 @@ static void img_copy(UINT8 *dst, int dst_wrap, #define C_GU (13954 >> (16 - SCALE_BITS)) #define C_GV (34903 >> (16 - SCALE_BITS)) -#define RGBOUT(r, g, b, y1)\ +#define YUV_TO_RGB2(r, g, b, y1)\ {\ y = (y1 - 16) * C_Y;\ r = cm[(y + r_add) >> SCALE_BITS];\ @@ -540,340 +410,816 @@ static void img_copy(UINT8 *dst, int dst_wrap, } /* XXX: no chroma interpolating is done */ -static void yuv420p_to_bgra32(AVPicture *dst, AVPicture *src, - int width, int height) +#define RGB_FUNCTIONS(rgb_name) \ + \ +static void yuv420p_to_ ## rgb_name (AVPicture *dst, AVPicture *src, \ + int width, int height) \ +{ \ + UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2; \ + int w, y, cb, cr, r_add, g_add, b_add, width2; \ + UINT8 *cm = cropTbl + MAX_NEG_CROP; \ + unsigned int r, g, b; \ + \ + d = dst->data[0]; \ + y1_ptr = src->data[0]; \ + cb_ptr = src->data[1]; \ + cr_ptr = src->data[2]; \ + width2 = width >> 1; \ + for(;height > 0; height -= 2) { \ + d1 = d; \ + d2 = d + dst->linesize[0]; \ + y2_ptr = y1_ptr + src->linesize[0]; \ + for(w = width2; w > 0; w --) { \ + cb = cb_ptr[0] - 128; \ + cr = cr_ptr[0] - 128; \ + r_add = C_RV * cr + (1 << (SCALE_BITS - 1)); \ + g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1)); \ + b_add = C_BU * cb + (1 << (SCALE_BITS - 1)); \ + \ + /* output 4 pixels */ \ + YUV_TO_RGB2(r, g, b, y1_ptr[0]); \ + RGB_OUT(d1, r, g, b); \ + \ + YUV_TO_RGB2(r, g, b, y1_ptr[1]); \ + RGB_OUT(d1 + BPP, r, g, b); \ + \ + YUV_TO_RGB2(r, g, b, y2_ptr[0]); \ + RGB_OUT(d2, r, g, b); \ + \ + YUV_TO_RGB2(r, g, b, y2_ptr[1]); \ + RGB_OUT(d2 + BPP, r, g, b); \ + \ + d1 += 2 * BPP; \ + d2 += 2 * BPP; \ + \ + y1_ptr += 2; \ + y2_ptr += 2; \ + cb_ptr++; \ + cr_ptr++; \ + } \ + d += 2 * dst->linesize[0]; \ + y1_ptr += 2 * src->linesize[0] - width; \ + cb_ptr += src->linesize[1] - width2; \ + cr_ptr += src->linesize[2] - width2; \ + } \ +} \ + \ +/* XXX: no chroma interpolating is done */ \ +static void yuv422p_to_ ## rgb_name (AVPicture *dst, AVPicture *src, \ + int width, int height) \ +{ \ + UINT8 *y1_ptr, *cb_ptr, *cr_ptr, *d, *d1; \ + int w, y, cb, cr, r_add, g_add, b_add, width2; \ + UINT8 *cm = cropTbl + MAX_NEG_CROP; \ + unsigned int r, g, b; \ + \ + d = dst->data[0]; \ + y1_ptr = src->data[0]; \ + cb_ptr = src->data[1]; \ + cr_ptr = src->data[2]; \ + width2 = width >> 1; \ + for(;height > 0; height --) { \ + d1 = d; \ + for(w = width2; w > 0; w --) { \ + cb = cb_ptr[0] - 128; \ + cr = cr_ptr[0] - 128; \ + r_add = C_RV * cr + (1 << (SCALE_BITS - 1)); \ + g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1)); \ + b_add = C_BU * cb + (1 << (SCALE_BITS - 1)); \ + \ + /* output 2 pixels */ \ + YUV_TO_RGB2(r, g, b, y1_ptr[0]); \ + RGB_OUT(d, r, g, b); \ + \ + YUV_TO_RGB2(r, g, b, y1_ptr[1]); \ + RGB_OUT(d + BPP, r, g, b); \ + \ + d += 2 * BPP; \ + \ + y1_ptr += 2; \ + cb_ptr++; \ + cr_ptr++; \ + } \ + d += dst->linesize[0]; \ + y1_ptr += src->linesize[0] - width; \ + cb_ptr += src->linesize[1] - width2; \ + cr_ptr += src->linesize[2] - width2; \ + } \ +} \ + \ +static void rgb_name ## _to_yuv420p(AVPicture *dst, AVPicture *src, \ + int width, int height) \ +{ \ + int wrap, wrap3, x, y; \ + int r, g, b, r1, g1, b1; \ + UINT8 *lum, *cb, *cr; \ + const UINT8 *p; \ + \ + lum = dst->data[0]; \ + cb = dst->data[1]; \ + cr = dst->data[2]; \ + \ + wrap = width; \ + wrap3 = width * BPP; \ + p = src->data[0]; \ + for(y=0;y<height;y+=2) { \ + for(x=0;x<width;x+=2) { \ + RGB_IN(r, g, b, p); \ + r1 = r; \ + g1 = g; \ + b1 = b; \ + lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + \ + FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; \ + RGB_IN(r, g, b, p + BPP); \ + r1 += r; \ + g1 += g; \ + b1 += b; \ + lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + \ + FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; \ + p += wrap3; \ + lum += wrap; \ + \ + RGB_IN(r, g, b, p); \ + r1 += r; \ + g1 += g; \ + b1 += b; \ + lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + \ + FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; \ + \ + RGB_IN(r, g, b, p + BPP); \ + r1 += r; \ + g1 += g; \ + b1 += b; \ + lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + \ + FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; \ + \ + cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + \ + FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> \ + (SCALEBITS + 2)) + 128; \ + cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 - \ + FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> \ + (SCALEBITS + 2)) + 128; \ + \ + cb++; \ + cr++; \ + p += -wrap3 + 2 * BPP; \ + lum += -wrap + 2; \ + } \ + p += wrap3; \ + lum += wrap; \ + } \ +} \ + \ +static void rgb_name ## _to_gray(AVPicture *dst, AVPicture *src, \ + int width, int height) \ +{ \ + const unsigned char *p; \ + unsigned char *q; \ + int r, g, b, dst_wrap, src_wrap; \ + int x, y; \ + \ + p = src->data[0]; \ + src_wrap = src->linesize[0] - BPP * width; \ + \ + q = dst->data[0]; \ + dst_wrap = dst->linesize[0] - width; \ + \ + for(y=0;y<height;y++) { \ + for(x=0;x<width;x++) { \ + RGB_IN(r, g, b, p); \ + q[0] = (FIX(0.29900) * r + FIX(0.58700) * g + \ + FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; \ + q++; \ + p += BPP; \ + } \ + p += src_wrap; \ + q += dst_wrap; \ + } \ +} \ + \ +static void gray_to_ ## rgb_name(AVPicture *dst, AVPicture *src, \ + int width, int height) \ +{ \ + const unsigned char *p; \ + unsigned char *q; \ + int r, dst_wrap, src_wrap; \ + int x, y; \ + \ + p = src->data[0]; \ + src_wrap = src->linesize[0] - width; \ + \ + q = dst->data[0]; \ + dst_wrap = dst->linesize[0] - BPP * width; \ + \ + for(y=0;y<height;y++) { \ + for(x=0;x<width;x++) { \ + r = p[0]; \ + RGB_OUT(q, r, r, r); \ + q += BPP; \ + p ++; \ + } \ + p += src_wrap; \ + q += dst_wrap; \ + } \ +} + +/* copy bit n to bits 0 ... n - 1 */ +static inline unsigned int bitcopy_n(unsigned int a, int n) { - UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2; - int w, y, cb, cr, r_add, g_add, b_add, width2; - UINT8 *cm = cropTbl + MAX_NEG_CROP; + int mask; + mask = (1 << n) - 1; + return (a & (0xff & ~mask)) | ((-((a >> n) & 1)) & mask); +} - d = dst->data[0]; - y1_ptr = src->data[0]; - cb_ptr = src->data[1]; - cr_ptr = src->data[2]; - width2 = width >> 1; - for(;height > 0; height -= 2) { - d1 = d; - d2 = d + dst->linesize[0]; - y2_ptr = y1_ptr + src->linesize[0]; - for(w = width2; w > 0; w --) { - cb = cb_ptr[0] - 128; - cr = cr_ptr[0] - 128; - r_add = C_RV * cr + (1 << (SCALE_BITS - 1)); - g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1)); - b_add = C_BU * cb + (1 << (SCALE_BITS - 1)); - - /* output 4 pixels */ - RGBOUT(d1[2], d1[1], d1[0], y1_ptr[0]); - RGBOUT(d1[6], d1[5], d1[4], y1_ptr[1]); - RGBOUT(d2[2], d2[1], d2[0], y2_ptr[0]); - RGBOUT(d2[6], d2[5], d2[4], y2_ptr[1]); - - d1[3] = d1[7] = d2[3] = d2[7] = 255; - - d1 += 8; - d2 += 8; - y1_ptr += 2; - y2_ptr += 2; - cb_ptr++; - cr_ptr++; +/* rgb555 handling */ + +#define RGB_IN(r, g, b, s)\ +{\ + unsigned int v = ((UINT16 *)(s))[0];\ + r = bitcopy_n(v >> (10 - 3), 3);\ + g = bitcopy_n(v >> (5 - 3), 3);\ + b = bitcopy_n(v << 3, 3);\ +} + +#define RGB_OUT(d, r, g, b)\ +{\ + ((UINT16 *)(d))[0] = ((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3) | 0x8000;\ +} + +#define BPP 2 + +RGB_FUNCTIONS(rgb555) + +#undef RGB_IN +#undef RGB_OUT +#undef BPP + +/* rgb565 handling */ + +#define RGB_IN(r, g, b, s)\ +{\ + unsigned int v = ((UINT16 *)(s))[0];\ + r = bitcopy_n(v >> (11 - 3), 3);\ + g = bitcopy_n(v >> (5 - 2), 2);\ + b = bitcopy_n(v << 3, 3);\ +} + +#define RGB_OUT(d, r, g, b)\ +{\ + ((UINT16 *)(d))[0] = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);\ +} + +#define BPP 2 + +RGB_FUNCTIONS(rgb565) + +#undef RGB_IN +#undef RGB_OUT +#undef BPP + +/* bgr24 handling */ + +#define RGB_IN(r, g, b, s)\ +{\ + b = (s)[0];\ + g = (s)[1];\ + r = (s)[2];\ +} + +#define RGB_OUT(d, r, g, b)\ +{\ + (d)[0] = b;\ + (d)[1] = g;\ + (d)[2] = r;\ +} + +#define BPP 3 + +RGB_FUNCTIONS(bgr24) + +#undef RGB_IN +#undef RGB_OUT +#undef BPP + +/* rgb24 handling */ + +#define RGB_IN(r, g, b, s)\ +{\ + r = (s)[0];\ + g = (s)[1];\ + b = (s)[2];\ +} + +#define RGB_OUT(d, r, g, b)\ +{\ + (d)[0] = r;\ + (d)[1] = g;\ + (d)[2] = b;\ +} + +#define BPP 3 + +RGB_FUNCTIONS(rgb24) + +#undef RGB_IN +#undef RGB_OUT +#undef BPP + +/* rgba32 handling */ + +#define RGB_IN(r, g, b, s)\ +{\ + unsigned int v = ((UINT32 *)(s))[0];\ + r = (v >> 16) & 0xff;\ + g = (v >> 8) & 0xff;\ + b = v & 0xff;\ +} + +#define RGB_OUT(d, r, g, b)\ +{\ + ((UINT32 *)(d))[0] = (0xff << 24) | (r << 16) | (g << 8) | b;\ +} + +#define BPP 4 + +RGB_FUNCTIONS(rgba32) + +#undef RGB_IN +#undef RGB_OUT +#undef BPP + + +static void rgb24_to_rgb565(AVPicture *dst, AVPicture *src, + int width, int height) +{ + const unsigned char *p; + unsigned char *q; + int r, g, b, dst_wrap, src_wrap; + int x, y; + + p = src->data[0]; + src_wrap = src->linesize[0] - 3 * width; + + q = dst->data[0]; + dst_wrap = dst->linesize[0] - 2 * width; + + for(y=0;y<height;y++) { + for(x=0;x<width;x++) { + r = p[0]; + g = p[1]; + b = p[2]; + + ((unsigned short *)q)[0] = + ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3); + q += 2; + p += 3; } - d += 2 * dst->linesize[0]; - y1_ptr += 2 * src->linesize[0] - width; - cb_ptr += src->linesize[1] - width2; - cr_ptr += src->linesize[2] - width2; + p += src_wrap; + q += dst_wrap; } } -/* XXX: no chroma interpolating is done */ -static void yuv420p_to_rgba32(AVPicture *dst, AVPicture *src, - int width, int height) +/* NOTE: we also add a dummy alpha bit */ +static void rgb24_to_rgb555(AVPicture *dst, AVPicture *src, + int width, int height) { - UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2; - int w, y, cb, cr, r_add, g_add, b_add, width2; - UINT8 *cm = cropTbl + MAX_NEG_CROP; + const unsigned char *p; + unsigned char *q; + int r, g, b, dst_wrap, src_wrap; + int x, y; - d = dst->data[0]; - y1_ptr = src->data[0]; - cb_ptr = src->data[1]; - cr_ptr = src->data[2]; - width2 = width >> 1; - for(;height > 0; height -= 2) { - d1 = d; - d2 = d + dst->linesize[0]; - y2_ptr = y1_ptr + src->linesize[0]; - for(w = width2; w > 0; w --) { - cb = cb_ptr[0] - 128; - cr = cr_ptr[0] - 128; - r_add = C_RV * cr + (1 << (SCALE_BITS - 1)); - g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1)); - b_add = C_BU * cb + (1 << (SCALE_BITS - 1)); - - /* output 4 pixels */ - RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]); - RGBOUT(d1[4], d1[5], d1[6], y1_ptr[1]); - RGBOUT(d2[0], d2[1], d2[2], y2_ptr[0]); - RGBOUT(d2[4], d2[5], d2[6], y2_ptr[1]); - - d1[3] = d1[7] = d2[3] = d2[7] = 255; - - d1 += 8; - d2 += 8; - y1_ptr += 2; - y2_ptr += 2; - cb_ptr++; - cr_ptr++; + p = src->data[0]; + src_wrap = src->linesize[0] - 3 * width; + + q = dst->data[0]; + dst_wrap = dst->linesize[0] - 2 * width; + + for(y=0;y<height;y++) { + for(x=0;x<width;x++) { + r = p[0]; + g = p[1]; + b = p[2]; + + ((unsigned short *)q)[0] = + ((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3) | 0x8000; + q += 2; + p += 3; } - d += 2 * dst->linesize[0]; - y1_ptr += 2 * src->linesize[0] - width; - cb_ptr += src->linesize[1] - width2; - cr_ptr += src->linesize[2] - width2; + p += src_wrap; + q += dst_wrap; } } -/* XXX: no chroma interpolating is done */ -static void yuv420p_to_rgb24(AVPicture *dst, AVPicture *src, - int width, int height) +static void mono_to_gray(AVPicture *dst, AVPicture *src, + int width, int height, int xor_mask) { - UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2; - int w, y, cb, cr, r_add, g_add, b_add, width2; - UINT8 *cm = cropTbl + MAX_NEG_CROP; - - d = dst->data[0]; - y1_ptr = src->data[0]; - cb_ptr = src->data[1]; - cr_ptr = src->data[2]; - width2 = width >> 1; - for(;height > 0; height -= 2) { - d1 = d; - d2 = d + dst->linesize[0]; - y2_ptr = y1_ptr + src->linesize[0]; - for(w = width2; w > 0; w --) { - cb = cb_ptr[0] - 128; - cr = cr_ptr[0] - 128; - r_add = C_RV * cr + (1 << (SCALE_BITS - 1)); - g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1)); - b_add = C_BU * cb + (1 << (SCALE_BITS - 1)); - - /* output 4 pixels */ - RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]); - RGBOUT(d1[3], d1[4], d1[5], y1_ptr[1]); - RGBOUT(d2[0], d2[1], d2[2], y2_ptr[0]); - RGBOUT(d2[3], d2[4], d2[5], y2_ptr[1]); - - d1 += 6; - d2 += 6; - y1_ptr += 2; - y2_ptr += 2; - cb_ptr++; - cr_ptr++; + const unsigned char *p; + unsigned char *q; + int v, dst_wrap, src_wrap; + int y, w; + + p = src->data[0]; + src_wrap = src->linesize[0] - ((width + 7) >> 3); + + q = dst->data[0]; + dst_wrap = dst->linesize[0] - width; + for(y=0;y<height;y++) { + w = width; + while (w >= 8) { + v = *p++ ^ xor_mask; + q[0] = -(v >> 7); + q[1] = -((v >> 6) & 1); + q[2] = -((v >> 5) & 1); + q[3] = -((v >> 4) & 1); + q[4] = -((v >> 3) & 1); + q[5] = -((v >> 2) & 1); + q[6] = -((v >> 1) & 1); + q[7] = -((v >> 0) & 1); + w -= 8; + q += 8; + } + if (w > 0) { + v = *p++ ^ xor_mask; + do { + q[0] = -((v >> 7) & 1); + q++; + v <<= 1; + } while (--w); } - d += 2 * dst->linesize[0]; - y1_ptr += 2 * src->linesize[0] - width; - cb_ptr += src->linesize[1] - width2; - cr_ptr += src->linesize[2] - width2; + p += src_wrap; + q += dst_wrap; } } -/* XXX: no chroma interpolating is done */ -static void yuv422p_to_rgb24(AVPicture *dst, AVPicture *src, - int width, int height) +static void monowhite_to_gray(AVPicture *dst, AVPicture *src, + int width, int height) { - UINT8 *y1_ptr, *cb_ptr, *cr_ptr, *d, *d1; - int w, y, cb, cr, r_add, g_add, b_add, width2; - UINT8 *cm = cropTbl + MAX_NEG_CROP; + mono_to_gray(dst, src, width, height, 0xff); +} + +static void monoblack_to_gray(AVPicture *dst, AVPicture *src, + int width, int height) +{ + mono_to_gray(dst, src, width, height, 0x00); +} + +static void gray_to_mono(AVPicture *dst, AVPicture *src, + int width, int height, int xor_mask) +{ + int n; + const UINT8 *s; + UINT8 *d; + int j, b, v, n1, src_wrap, dst_wrap, y; + + s = src->data[0]; + src_wrap = src->linesize[0] - width; d = dst->data[0]; - y1_ptr = src->data[0]; - cb_ptr = src->data[1]; - cr_ptr = src->data[2]; - width2 = width >> 1; - for(;height > 0; height --) { - d1 = d; - for(w = width2; w > 0; w --) { - cb = cb_ptr[0] - 128; - cr = cr_ptr[0] - 128; - r_add = C_RV * cr + (1 << (SCALE_BITS - 1)); - g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1)); - b_add = C_BU * cb + (1 << (SCALE_BITS - 1)); - - /* output 2 pixels */ - RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]); - RGBOUT(d1[3], d1[4], d1[5], y1_ptr[1]); - - d1 += 6; - y1_ptr += 2; - cb_ptr++; - cr_ptr++; + dst_wrap = dst->linesize[0] - ((width + 7) >> 3); + printf("%d %d\n", width, height); + + for(y=0;y<height;y++) { + n = width; + while (n >= 8) { + v = 0; + for(j=0;j<8;j++) { + b = s[0]; + s++; + v = (v << 1) | (b >> 7); + } + d[0] = v ^ xor_mask; + d++; + n -= 8; + } + if (n > 0) { + n1 = n; + v = 0; + while (n > 0) { + b = s[0]; + s++; + v = (v << 1) | (b >> 7); + n--; + } + d[0] = (v << (8 - (n1 & 7))) ^ xor_mask; + d++; } - d += dst->linesize[0]; - y1_ptr += src->linesize[0] - width; - cb_ptr += src->linesize[1] - width2; - cr_ptr += src->linesize[2] - width2; + s += src_wrap; + d += dst_wrap; } } +static void gray_to_monowhite(AVPicture *dst, AVPicture *src, + int width, int height) +{ + gray_to_mono(dst, src, width, height, 0xff); +} + +static void gray_to_monoblack(AVPicture *dst, AVPicture *src, + int width, int height) +{ + gray_to_mono(dst, src, width, height, 0x00); +} + +typedef struct ConvertEntry { + void (*convert)(AVPicture *dst, AVPicture *src, int width, int height); +} ConvertEntry; + +/* add each new convertion function in this table */ +/* constraints; + - all non YUV modes must convert at least to and from PIX_FMT_RGB24 +*/ +static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = { + [PIX_FMT_YUV420P] = { + [PIX_FMT_RGB555] = { + .convert = yuv420p_to_rgb555 + }, + [PIX_FMT_RGB565] = { + .convert = yuv420p_to_rgb565 + }, + [PIX_FMT_BGR24] = { + .convert = yuv420p_to_bgr24 + }, + [PIX_FMT_RGB24] = { + .convert = yuv420p_to_rgb24 + }, + [PIX_FMT_RGBA32] = { + .convert = yuv420p_to_rgba32 + }, + }, + [PIX_FMT_YUV422P] = { + [PIX_FMT_RGB555] = { + .convert = yuv422p_to_rgb555 + }, + [PIX_FMT_RGB565] = { + .convert = yuv422p_to_rgb565 + }, + [PIX_FMT_BGR24] = { + .convert = yuv422p_to_bgr24 + }, + [PIX_FMT_RGB24] = { + .convert = yuv422p_to_rgb24 + }, + [PIX_FMT_RGBA32] = { + .convert = yuv422p_to_rgba32 + }, + }, + [PIX_FMT_YUV422] = { + [PIX_FMT_YUV420P] = { + .convert = yuv422_to_yuv420p, + }, + }, + + [PIX_FMT_RGB24] = { + [PIX_FMT_YUV420P] = { + .convert = rgb24_to_yuv420p + }, + [PIX_FMT_RGB565] = { + .convert = rgb24_to_rgb565 + }, + [PIX_FMT_RGB555] = { + .convert = rgb24_to_rgb555 + }, + [PIX_FMT_GRAY8] = { + .convert = rgb24_to_gray + }, + }, + [PIX_FMT_RGBA32] = { + [PIX_FMT_YUV420P] = { + .convert = rgba32_to_yuv420p + }, + [PIX_FMT_GRAY8] = { + .convert = rgba32_to_gray + }, + }, + [PIX_FMT_BGR24] = { + [PIX_FMT_YUV420P] = { + .convert = bgr24_to_yuv420p + }, + [PIX_FMT_GRAY8] = { + .convert = bgr24_to_gray + }, + }, + [PIX_FMT_RGB555] = { + [PIX_FMT_YUV420P] = { + .convert = rgb555_to_yuv420p + }, + [PIX_FMT_GRAY8] = { + .convert = rgb555_to_gray + }, + }, + [PIX_FMT_RGB565] = { + [PIX_FMT_YUV420P] = { + .convert = rgb565_to_yuv420p + }, + [PIX_FMT_GRAY8] = { + .convert = rgb565_to_gray + }, + }, + [PIX_FMT_GRAY8] = { + [PIX_FMT_RGB555] = { + .convert = gray_to_rgb555 + }, + [PIX_FMT_RGB565] = { + .convert = gray_to_rgb565 + }, + [PIX_FMT_RGB24] = { + .convert = gray_to_rgb24 + }, + [PIX_FMT_BGR24] = { + .convert = gray_to_bgr24 + }, + [PIX_FMT_RGBA32] = { + .convert = gray_to_rgba32 + }, + [PIX_FMT_MONOWHITE] = { + .convert = gray_to_monowhite + }, + [PIX_FMT_MONOBLACK] = { + .convert = gray_to_monoblack + }, + }, + [PIX_FMT_MONOWHITE] = { + [PIX_FMT_GRAY8] = { + .convert = monowhite_to_gray + }, + }, + [PIX_FMT_MONOBLACK] = { + [PIX_FMT_GRAY8] = { + .convert = monoblack_to_gray + }, + }, +}; + +static int avpicture_alloc(AVPicture *picture, + int pix_fmt, int width, int height) +{ + int size; + void *ptr; + + size = avpicture_get_size(pix_fmt, width, height); + if (size < 0) + goto fail; + ptr = av_malloc(size); + if (!ptr) + goto fail; + avpicture_fill(picture, ptr, pix_fmt, width, height); + return 0; + fail: + memset(picture, 0, sizeof(AVPicture)); + return -1; +} + +static void avpicture_free(AVPicture *picture) +{ + av_free(picture->data[0]); +} + /* XXX: always use linesize. Return -1 if not supported */ int img_convert(AVPicture *dst, int dst_pix_fmt, - AVPicture *src, int pix_fmt, - int width, int height) + AVPicture *src, int src_pix_fmt, + int src_width, int src_height) { - int i; + int i, ret, dst_width, dst_height, int_pix_fmt; + PixFmtInfo *src_pix, *dst_pix; + ConvertEntry *ce; + AVPicture tmp1, *tmp = &tmp1; - if (dst_pix_fmt == pix_fmt) { - switch(pix_fmt) { - case PIX_FMT_YUV420P: - for(i=0;i<3;i++) { - if (i == 1) { - width >>= 1; - height >>= 1; - } - img_copy(dst->data[i], dst->linesize[i], - src->data[i], src->linesize[i], - width, height); + if (src_pix_fmt < 0 || src_pix_fmt >= PIX_FMT_NB || + dst_pix_fmt < 0 || dst_pix_fmt >= PIX_FMT_NB) + return -1; + if (src_width <= 0 || src_height <= 0) + return 0; + + dst_width = src_width; + dst_height = src_height; + + dst_pix = &pix_fmt_info[dst_pix_fmt]; + src_pix = &pix_fmt_info[src_pix_fmt]; + if (src_pix_fmt == dst_pix_fmt) { + /* XXX: incorrect */ + /* same format: just copy */ + for(i = 0; i < dst_pix->nb_components; i++) { + int w, h; + w = dst_width; + h = dst_height; + if (dst_pix->is_yuv && (i == 1 || i == 2)) { + w >>= dst_pix->x_chroma_shift; + h >>= dst_pix->y_chroma_shift; } - break; - default: - return -1; + img_copy(dst->data[i], dst->linesize[i], + src->data[i], src->linesize[i], + w, h); } - } else if (dst_pix_fmt == PIX_FMT_YUV420P) { - - switch(pix_fmt) { - case PIX_FMT_YUV411P: - img_copy(dst->data[0], dst->linesize[0], - src->data[0], src->linesize[0], - width, height); - conv411(dst->data[1], dst->linesize[1], - src->data[1], src->linesize[1], - width / 4, height); - conv411(dst->data[2], dst->linesize[2], - src->data[2], src->linesize[2], - width / 4, height); - break; - case PIX_FMT_YUV410P: - img_copy(dst->data[0], dst->linesize[0], - src->data[0], src->linesize[0], - width, height); - grow22(dst->data[1], dst->linesize[1], - src->data[1], src->linesize[1], - width/2, height/2); - grow22(dst->data[2], dst->linesize[2], - src->data[2], src->linesize[2], - width/2, height/2); - break; - case PIX_FMT_YUV420P: - for(i=0;i<3;i++) { - img_copy(dst->data[i], dst->linesize[i], - src->data[i], src->linesize[i], - width, height); - } - break; - case PIX_FMT_YUV422P: - img_copy(dst->data[0], dst->linesize[0], - src->data[0], src->linesize[0], - width, height); - width >>= 1; - height >>= 1; - for(i=1;i<3;i++) { - shrink2(dst->data[i], dst->linesize[i], - src->data[i], src->linesize[i], - width, height); - } - break; - case PIX_FMT_YUV444P: - img_copy(dst->data[0], dst->linesize[0], - src->data[0], src->linesize[0], - width, height); - width >>= 1; - height >>= 1; - for(i=1;i<3;i++) { - shrink22(dst->data[i], dst->linesize[i], - src->data[i], src->linesize[i], - width, height); + return 0; + } + + ce = &convert_table[src_pix_fmt][dst_pix_fmt]; + if (ce->convert) { + /* specific convertion routine */ + ce->convert(dst, src, dst_width, dst_height); + return 0; + } + + /* gray to YUV */ + if (dst_pix->is_yuv && src_pix_fmt == PIX_FMT_GRAY8) { + int w, h, y; + uint8_t *d; + + img_copy(dst->data[0], dst->linesize[0], + src->data[0], src->linesize[0], + dst_width, dst_height); + /* fill U and V with 128 */ + w = dst_width; + h = dst_height; + w >>= dst_pix->x_chroma_shift; + h >>= dst_pix->y_chroma_shift; + for(i = 1; i <= 2; i++) { + d = dst->data[i]; + for(y = 0; y< h; y++) { + memset(d, 128, w); + d += dst->linesize[i]; } - break; - case PIX_FMT_YUV422: - yuv422_to_yuv420p(dst->data[0], dst->data[1], dst->data[2], - src->data[0], width, height); - break; - case PIX_FMT_RGB24: - rgb24_to_yuv420p(dst->data[0], dst->data[1], dst->data[2], - src->data[0], width, height); - break; - case PIX_FMT_RGBA32: - rgba32_to_yuv420p(dst->data[0], dst->data[1], dst->data[2], - src->data[0], width, height); - break; - case PIX_FMT_BGR24: - bgr24_to_yuv420p(dst->data[0], dst->data[1], dst->data[2], - src->data[0], width, height); - break; - case PIX_FMT_BGRA32: - bgra32_to_yuv420p(dst->data[0], dst->data[1], dst->data[2], - src->data[0], width, height); - break; - case PIX_FMT_RGB565: - rgb565_to_yuv420p(dst->data[0], dst->data[1], dst->data[2], - src->data[0], width, height); - break; - case PIX_FMT_RGB555: - rgb555_to_yuv420p(dst->data[0], dst->data[1], dst->data[2], - src->data[0], width, height); - break; -/* case PIX_FMT_RGB5551: - rgb5551_to_yuv420p(dst->data[0], dst->data[1], dst->data[2], - src->data[0], width, height); - break;*/ - case PIX_FMT_BGR565: - bgr565_to_yuv420p(dst->data[0], dst->data[1], dst->data[2], - src->data[0], width, height); - break; - case PIX_FMT_BGR555: - bgr555_to_yuv420p(dst->data[0], dst->data[1], dst->data[2], - src->data[0], width, height); - break; -/* case PIX_FMT_GBR565: - gbr565_to_yuv420p(dst->data[0], dst->data[1], dst->data[2], - src->data[0], width, height); - break; - case PIX_FMT_GBR555: - gbr555_to_yuv420p(dst->data[0], dst->data[1], dst->data[2], - src->data[0], width, height); - break;*/ - default: - return -1; - } - } else if (dst_pix_fmt == PIX_FMT_RGB24) { - switch(pix_fmt) { - case PIX_FMT_YUV420P: - yuv420p_to_rgb24(dst, src, width, height); - break; - case PIX_FMT_YUV422P: - yuv422p_to_rgb24(dst, src, width, height); - break; - default: - return -1; - } - } else if (dst_pix_fmt == PIX_FMT_RGBA32) { - switch(pix_fmt) { - case PIX_FMT_YUV420P: - yuv420p_to_rgba32(dst, src, width, height); - break; - default: - return -1; } - } else if (dst_pix_fmt == PIX_FMT_BGRA32) { - switch(pix_fmt) { - case PIX_FMT_YUV420P: - yuv420p_to_bgra32(dst, src, width, height); - break; - default: + return 0; + } + + /* YUV to gray */ + if (src_pix->is_yuv && dst_pix_fmt == PIX_FMT_GRAY8) { + img_copy(dst->data[0], dst->linesize[0], + src->data[0], src->linesize[0], + dst_width, dst_height); + return 0; + } + + /* YUV to YUV */ + if (dst_pix->is_yuv && src_pix->is_yuv) { + int x_shift, y_shift, w, h; + void (*resize_func)(UINT8 *dst, int dst_wrap, + UINT8 *src, int src_wrap, + int width, int height); + + /* compute chroma size of the smallest dimensions */ + w = dst_width; + h = dst_height; + if (dst_pix->x_chroma_shift >= src_pix->x_chroma_shift) + w >>= dst_pix->x_chroma_shift; + else + w >>= src_pix->x_chroma_shift; + if (dst_pix->y_chroma_shift >= src_pix->y_chroma_shift) + h >>= dst_pix->y_chroma_shift; + else + h >>= src_pix->y_chroma_shift; + + x_shift = (dst_pix->x_chroma_shift - src_pix->x_chroma_shift); + y_shift = (dst_pix->y_chroma_shift - src_pix->y_chroma_shift); + if (x_shift == 0 && y_shift == 0) { + resize_func = img_copy; /* should never happen */ + } else if (x_shift == 0 && y_shift == 1) { + resize_func = shrink2; + } else if (x_shift == 1 && y_shift == 1) { + resize_func = shrink22; + } else if (x_shift == -1 && y_shift == -1) { + resize_func = grow22; + } else if (x_shift == -1 && y_shift == 1) { + resize_func = conv411; + } else { + /* currently not handled */ return -1; } + + img_copy(dst->data[0], dst->linesize[0], + src->data[0], src->linesize[0], + dst_width, dst_height); + + for(i = 1;i <= 2; i++) + resize_func(dst->data[i], dst->linesize[i], + src->data[i], src->linesize[i], + w, h); + return 0; + } + + /* try to use an intermediate format */ + if (src_pix_fmt == PIX_FMT_MONOWHITE || + src_pix_fmt == PIX_FMT_MONOBLACK || + dst_pix_fmt == PIX_FMT_MONOWHITE || + dst_pix_fmt == PIX_FMT_MONOBLACK) { + int_pix_fmt = PIX_FMT_GRAY8; } else { - return -1; + int_pix_fmt = PIX_FMT_RGB24; } - return 0; + if (avpicture_alloc(tmp, int_pix_fmt, dst_width, dst_height) < 0) + return -1; + ret = -1; + if (img_convert(tmp, int_pix_fmt, + src, src_pix_fmt, src_width, src_height) < 0) + goto fail1; + if (img_convert(dst, dst_pix_fmt, + tmp, int_pix_fmt, dst_width, dst_height) < 0) + goto fail1; + ret = 0; + fail1: + avpicture_free(tmp); + return ret; } @@ -948,6 +1294,15 @@ static void deinterlace_line(UINT8 *dst, UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lu } #else + { + mmx_t rounder; + rounder.uw[0]=4; + rounder.uw[1]=4; + rounder.uw[2]=4; + rounder.uw[3]=4; + pxor_r2r(mm7,mm7); + movq_m2r(rounder,mm6); + } for (;size > 3; size-=4) { DEINT_LINE_LUM lum_m4+=4; @@ -982,6 +1337,15 @@ static void deinterlace_line_inplace(UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2 } #else + { + mmx_t rounder; + rounder.uw[0]=4; + rounder.uw[1]=4; + rounder.uw[2]=4; + rounder.uw[3]=4; + pxor_r2r(mm7,mm7); + movq_m2r(rounder,mm6); + } for (;size > 3; size-=4) { DEINT_INPLACE_LINE_LUM lum_m4+=4; @@ -1064,19 +1428,6 @@ int avpicture_deinterlace(AVPicture *dst, AVPicture *src, if ((width & 3) != 0 || (height & 3) != 0) return -1; -#ifdef HAVE_MMX - { - mmx_t rounder; - rounder.uw[0]=4; - rounder.uw[1]=4; - rounder.uw[2]=4; - rounder.uw[3]=4; - pxor_r2r(mm7,mm7); - movq_m2r(rounder,mm6); - } -#endif - - for(i=0;i<3;i++) { if (i == 1) { switch(pix_fmt) { diff --git a/src/libffmpeg/libavcodec/mem.c b/src/libffmpeg/libavcodec/mem.c index a9b5e0afa..a36952fd7 100644 --- a/src/libffmpeg/libavcodec/mem.c +++ b/src/libffmpeg/libavcodec/mem.c @@ -17,6 +17,12 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "avcodec.h" + +/* here we can use OS dependant allocation functions */ +#undef malloc +#undef free +#undef realloc + #ifdef HAVE_MALLOC_H #include <malloc.h> #endif @@ -25,10 +31,15 @@ memory allocator. You do not need to suppress this file because the linker will do it automatically */ -/* memory alloc */ +/** + * Memory allocation of size byte with alignment suitable for all + * memory accesses (including vectors if available on the + * CPU). av_malloc(0) must return a non NULL pointer. + */ void *av_malloc(unsigned int size) { void *ptr; + #if defined (HAVE_MEMALIGN) ptr = memalign(16,size); /* Why 64? @@ -60,14 +71,19 @@ void *av_malloc(unsigned int size) #else ptr = malloc(size); #endif - if (!ptr) - return NULL; -//fprintf(stderr, "%X %d\n", (int)ptr, size); - /* NOTE: this memset should not be present */ - memset(ptr, 0, size); return ptr; } +/** + * av_realloc semantics (same as glibc): if ptr is NULL and size > 0, + * identical to malloc(size). If size is zero, it is identical to + * free(ptr) and NULL is returned. + */ +void *av_realloc(void *ptr, unsigned int size) +{ + return realloc(ptr, size); +} + /* NOTE: ptr = NULL is explicetly allowed */ void av_free(void *ptr) { diff --git a/src/libffmpeg/libavcodec/mjpeg.c b/src/libffmpeg/libavcodec/mjpeg.c index 9b4943582..9617816bb 100644 --- a/src/libffmpeg/libavcodec/mjpeg.c +++ b/src/libffmpeg/libavcodec/mjpeg.c @@ -735,7 +735,7 @@ static int mjpeg_decode_init(AVCodecContext *avctx) if (avctx->flags & CODEC_FLAG_EXTERN_HUFF) { printf("mjpeg: using external huffman table\n"); - init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size); + init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8); mjpeg_decode_dht(s); /* should check for error - but dunno */ } @@ -1404,13 +1404,13 @@ static int mjpeg_decode_frame(AVCodecContext *avctx, break; } } - init_get_bits(&s->gb, s->buffer, dst - s->buffer); + init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8); dprintf("escaping removed %d bytes\n", (buf_end - buf_ptr) - (dst - s->buffer)); } else - init_get_bits(&s->gb, buf_ptr, buf_end - buf_ptr); + init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8); s->start_code = start_code; @@ -1548,7 +1548,7 @@ read_header: /* reset on every SOI */ s->restart_interval = 0; - init_get_bits(&hgb, buf_ptr, /*buf_size*/buf_end - buf_ptr); + init_get_bits(&hgb, buf_ptr, /*buf_size*/(buf_end - buf_ptr)*8); skip_bits(&hgb, 32); /* reserved zeros */ @@ -1570,7 +1570,7 @@ read_header: dprintf("dqt offs: 0x%x\n", dqt_offs); if (dqt_offs) { - init_get_bits(&s->gb, buf+dqt_offs, buf_end - (buf+dqt_offs)); + init_get_bits(&s->gb, buf+dqt_offs, (buf_end - (buf+dqt_offs))*8); s->start_code = DQT; mjpeg_decode_dqt(s); } @@ -1579,7 +1579,7 @@ read_header: dprintf("dht offs: 0x%x\n", dht_offs); if (dht_offs) { - init_get_bits(&s->gb, buf+dht_offs, buf_end - (buf+dht_offs)); + init_get_bits(&s->gb, buf+dht_offs, (buf_end - (buf+dht_offs))*8); s->start_code = DHT; mjpeg_decode_dht(s); } @@ -1588,7 +1588,7 @@ read_header: dprintf("sof offs: 0x%x\n", sof_offs); if (sof_offs) { - init_get_bits(&s->gb, buf+sof_offs, buf_end - (buf+sof_offs)); + init_get_bits(&s->gb, buf+sof_offs, (buf_end - (buf+sof_offs))*8); s->start_code = SOF0; if (mjpeg_decode_sof0(s) < 0) return -1; @@ -1598,8 +1598,8 @@ read_header: dprintf("sos offs: 0x%x\n", sos_offs); if (sos_offs) { -// init_get_bits(&s->gb, buf+sos_offs, buf_end - (buf+sos_offs)); - init_get_bits(&s->gb, buf+sos_offs, field_size); +// init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8); + init_get_bits(&s->gb, buf+sos_offs, field_size*8); s->start_code = SOS; mjpeg_decode_sos(s); } diff --git a/src/libffmpeg/libavcodec/motion_est.c b/src/libffmpeg/libavcodec/motion_est.c index 8310db8d5..e4b67b22f 100644 --- a/src/libffmpeg/libavcodec/motion_est.c +++ b/src/libffmpeg/libavcodec/motion_est.c @@ -1,7 +1,7 @@ /* * Motion estimation * Copyright (c) 2000,2001 Fabrice Bellard. - * Copyright (c) 2002 Michael Niedermayer + * Copyright (c) 2002-2003 Michael Niedermayer * * * This library is free software; you can redistribute it and/or @@ -286,6 +286,14 @@ static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){ cmp[0]= c->quant_psnr[0]; cmp[1]= c->quant_psnr[1]; break; + case FF_CMP_BIT: + cmp[0]= c->bit[0]; + cmp[1]= c->bit[1]; + break; + case FF_CMP_RD: + cmp[0]= c->rd[0]; + cmp[1]= c->rd[1]; + break; case FF_CMP_ZERO: for(i=0; i<7; i++){ cmp[i]= zero_cmp; @@ -294,19 +302,24 @@ static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){ default: fprintf(stderr,"internal error in cmp function selection\n"); } -}; +} static inline int get_penalty_factor(MpegEncContext *s, int type){ - - switch(type){ + switch(type&0xFF){ default: case FF_CMP_SAD: - return s->qscale; - case FF_CMP_SSE: -// return s->qscale*8; + return s->qscale*2; case FF_CMP_DCT: + return s->qscale*3; case FF_CMP_SATD: - return s->qscale*8; + return s->qscale*6; + case FF_CMP_SSE: + return s->qscale*s->qscale*2; + case FF_CMP_BIT: + return 1; + case FF_CMP_RD: + case FF_CMP_PSNR: + return (s->qscale*s->qscale*185 + 64)>>7; } } @@ -324,7 +337,9 @@ void ff_init_me(MpegEncContext *s){ }else{ if(s->avctx->me_sub_cmp&FF_CMP_CHROMA) s->me.sub_motion_search= simple_chroma_hpel_motion_search; - else if(s->avctx->me_sub_cmp == FF_CMP_SAD && s->avctx->me_cmp == FF_CMP_SAD) + else if( s->avctx->me_sub_cmp == FF_CMP_SAD + && s->avctx-> me_cmp == FF_CMP_SAD + && s->avctx-> mb_cmp == FF_CMP_SAD) s->me.sub_motion_search= sad_hpel_motion_search; else s->me.sub_motion_search= simple_hpel_motion_search; @@ -343,6 +358,18 @@ void ff_init_me(MpegEncContext *s){ }else{ s->me.pre_motion_search= simple_epzs_motion_search; } + + if(s->flags&CODEC_FLAG_QPEL){ + if(s->avctx->mb_cmp&FF_CMP_CHROMA) + s->me.get_mb_score= simple_chroma_qpel_get_mb_score; + else + s->me.get_mb_score= simple_qpel_get_mb_score; + }else{ + if(s->avctx->mb_cmp&FF_CMP_CHROMA) + s->me.get_mb_score= simple_chroma_hpel_get_mb_score; + else + s->me.get_mb_score= simple_hpel_get_mb_score; + } } static int pix_dev(UINT8 * pix, int line_size, int mean) @@ -776,12 +803,11 @@ static inline void get_limits(MpegEncContext *s, int *range, int *xmin, int *ymi } } -static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift) +static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift) { int block; int P[10][2]; - uint8_t *ref_picture= s->last_picture.data[0]; - int dmin_sum=0; + int dmin_sum=0, mx4_sum=0, my4_sum=0; uint16_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; for(block=0; block<4; block++){ @@ -826,13 +852,15 @@ static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, in P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); - if(s->out_format == FMT_H263){ +// if(s->out_format == FMT_H263){ pred_x4 = P_MEDIAN[0]; pred_y4 = P_MEDIAN[1]; +#if 0 }else { /* mpeg1 at least */ pred_x4= P_LEFT[0]; pred_y4= P_LEFT[1]; } +#endif } P_MV1[0]= mx; P_MV1[1]= my; @@ -842,12 +870,80 @@ static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, in dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, pred_x4, pred_y4, &s->last_picture, block, 1, mv_penalty); - + + if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){ + int dxy; + const int offset= ((block&1) + (block>>1)*s->linesize)*8; + uint8_t *dest_y = s->me.scratchpad + offset; + + if(s->quarter_sample){ + uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>2)) + (s->mb_y*16 + (my4>>2))*s->linesize + offset; + dxy = ((my4 & 3) << 2) | (mx4 & 3); + + if(s->no_rounding) + s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , s->linesize); + else + s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , s->linesize); + }else{ + uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>1)) + (s->mb_y*16 + (my4>>1))*s->linesize + offset; + dxy = ((my4 & 1) << 1) | (mx4 & 1); + + if(s->no_rounding) + s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y , ref , s->linesize, 8); + else + s->dsp.put_pixels_tab [1][dxy](dest_y , ref , s->linesize, 8); + } + dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*s->me.mb_penalty_factor; + }else + dmin_sum+= dmin4; + + if(s->quarter_sample){ + mx4_sum+= mx4/2; + my4_sum+= my4/2; + }else{ + mx4_sum+= mx4; + my4_sum+= my4; + } + s->motion_val[ s->block_index[block] ][0]= mx4; s->motion_val[ s->block_index[block] ][1]= my4; - dmin_sum+= dmin4; } - return dmin_sum; + + if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){ + dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*s->linesize, s->me.scratchpad, s->linesize); + } + + if(s->avctx->mb_cmp&FF_CMP_CHROMA){ + int dxy; + int mx, my; + int offset; + + mx= ff_h263_round_chroma(mx4_sum); + my= ff_h263_round_chroma(my4_sum); + dxy = ((my & 1) << 1) | (mx & 1); + + offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize; + + if(s->no_rounding){ + s->dsp.put_no_rnd_pixels_tab[1][dxy](s->me.scratchpad , s->last_picture.data[1] + offset, s->uvlinesize, 8); + s->dsp.put_no_rnd_pixels_tab[1][dxy](s->me.scratchpad+8 , s->last_picture.data[2] + offset, s->uvlinesize, 8); + }else{ + s->dsp.put_pixels_tab [1][dxy](s->me.scratchpad , s->last_picture.data[1] + offset, s->uvlinesize, 8); + s->dsp.put_pixels_tab [1][dxy](s->me.scratchpad+8 , s->last_picture.data[2] + offset, s->uvlinesize, 8); + } + + dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad , s->uvlinesize); + dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize); + } + + switch(s->avctx->mb_cmp&0xFF){ + /*case FF_CMP_SSE: + return dmin_sum+ 32*s->qscale*s->qscale;*/ + case FF_CMP_RD: + return dmin_sum; + default: + return dmin_sum+ 11*s->me.mb_penalty_factor; + } } void ff_estimate_p_frame_motion(MpegEncContext * s, @@ -869,6 +965,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp); s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp); + s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp); get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, s->f_code); rel_xmin= xmin - mb_x*16; @@ -959,6 +1056,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, pic->mb_var [s->mb_width * mb_y + mb_x] = varc; pic->mc_mb_var[s->mb_width * mb_y + mb_x] = vard; pic->mb_mean [s->mb_width * mb_y + mb_x] = (sum+128)>>8; +// pic->mb_cmp_score[s->mb_width * mb_y + mb_x] = dmin; pic->mb_var_sum += varc; pic->mc_mb_var_sum += vard; //printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); @@ -985,44 +1083,36 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, } if((s->flags&CODEC_FLAG_4MV) && !s->me.skip && varc>50 && vard>10){ - mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); + h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); mb_type|=MB_TYPE_INTER4V; set_p_mv_tables(s, mx, my, 0); }else set_p_mv_tables(s, mx, my, 1); }else{ - if (vard <= 64 || vard < varc) { -// if (sadP <= 32 || sadP < sadI + 500) { - s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); - mb_type|= MB_TYPE_INTER; - if (s->me_method != ME_ZERO) { - dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty); - if((s->flags&CODEC_FLAG_4MV) - && !s->me.skip && varc>50 && vard>10){ - int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); - if(dmin4 + 128 <dmin) - mb_type= MB_TYPE_INTER4V; - } - set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V); + mb_type= MB_TYPE_INTER; - } else { - mx <<=shift; - my <<=shift; - } -#if 0 - if (vard < 10) { - skip++; - fprintf(stderr,"\nEarly skip: %d vard: %2d varc: %5d dmin: %d", - skip, vard, varc, dmin); + dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, + pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty); + + if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) + dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, &s->last_picture, mv_penalty); + + if((s->flags&CODEC_FLAG_4MV) + && !s->me.skip && varc>50 && vard>10){ + int dmin4= h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); + if(dmin4 < dmin){ + mb_type= MB_TYPE_INTER4V; + dmin=dmin4; } -#endif + } + pic->mb_cmp_score[s->mb_width * mb_y + mb_x] = dmin; + set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V); + + if (vard <= 64 || vard < varc) { + s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); }else{ - s->scene_change_score+= 20; - mb_type|= MB_TYPE_INTRA; - mx = 0; - my = 0; + s->scene_change_score+= s->qscale; } } @@ -1105,6 +1195,7 @@ int ff_estimate_motion_b(MpegEncContext * s, s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp); s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp); + s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp); get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, f_code); rel_xmin= xmin - mb_x*16; @@ -1174,6 +1265,10 @@ int ff_estimate_motion_b(MpegEncContext * s, dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, pred_x, pred_y, picture, 0, 0, mv_penalty); + + if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) + dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, picture, mv_penalty); + //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my); // s->mb_type[mb_y*s->mb_width + mb_x]= mb_type; mv_table[mot_xy][0]= mx; @@ -1237,10 +1332,14 @@ static inline int check_bidir_mv(MpegEncContext * s, s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); } - fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.sub_penalty_factor - +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.sub_penalty_factor; - + s->dsp.me_sub_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize); - + fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.mb_penalty_factor + +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.mb_penalty_factor + + s->dsp.mb_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize); + + if(s->avctx->mb_cmp&FF_CMP_CHROMA){ + } + //FIXME CHROMA !!! + return fbmin; } @@ -1344,17 +1443,24 @@ static inline int direct_search(MpegEncContext * s, P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); } - + + //FIXME direct_search ptr in context!!! (needed for chroma anyway or this will get messy) if(s->flags&CODEC_FLAG_QPEL){ dmin = simple_direct_qpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, &s->last_picture, mv_table, 1<<14, mv_penalty); dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, 0, 0, &s->last_picture, 0, 0, mv_penalty); + + if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) + dmin= simple_direct_qpel_qpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty); }else{ dmin = simple_direct_hpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, &s->last_picture, mv_table, 1<<15, mv_penalty); dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, 0, 0, &s->last_picture, 0, 0, mv_penalty); + + if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) + dmin= simple_direct_hpel_hpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty); } s->b_direct_mv_table[mot_xy][0]= mx; @@ -1365,18 +1471,18 @@ static inline int direct_search(MpegEncContext * s, void ff_estimate_b_frame_motion(MpegEncContext * s, int mb_x, int mb_y) { - const int penalty_factor= s->me.penalty_factor; + const int penalty_factor= s->me.mb_penalty_factor; int fmin, bmin, dmin, fbmin; int type=0; dmin= direct_search(s, mb_x, mb_y); - fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code); - bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) - penalty_factor; + fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code) + 3*penalty_factor; + bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) + 2*penalty_factor; //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]); - fbmin= bidir_refine(s, mb_x, mb_y); - + fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor; +//printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin); { int score= dmin; type=MB_TYPE_DIRECT; @@ -1393,9 +1499,10 @@ void ff_estimate_b_frame_motion(MpegEncContext * s, score=fbmin; type= MB_TYPE_BIDIR; } + score= ((unsigned)(score*score + 128*256))>>16; s->current_picture.mc_mb_var_sum += score; - s->current_picture.mc_mb_var[mb_y*s->mb_width + mb_x] = score; //FIXME use SSD + s->current_picture.mc_mb_var[mb_y*s->mb_width + mb_x] = score; //FIXME use SSE } if(s->flags&CODEC_FLAG_HQ){ diff --git a/src/libffmpeg/libavcodec/motion_est_template.c b/src/libffmpeg/libavcodec/motion_est_template.c index d1ca6e7fb..4725ed994 100644 --- a/src/libffmpeg/libavcodec/motion_est_template.c +++ b/src/libffmpeg/libavcodec/motion_est_template.c @@ -39,7 +39,7 @@ qpel_mc_func (*qpel_put)[16];\ qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\ const __attribute__((unused)) int unu= time_pp + time_pb + (int)src_u + (int)src_v + (int)ref_u + (int)ref_v\ - + (int)ref2_y + (int)hpel_avg + (int)qpel_avg;\ + + (int)ref2_y + (int)hpel_avg + (int)qpel_avg + (int)score_map;\ if(s->no_rounding /*FIXME b_type*/){\ hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\ chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\ @@ -144,6 +144,7 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s, const int my = *my_ptr; const int penalty_factor= s->me.sub_penalty_factor; me_cmp_func cmp_sub, chroma_cmp_sub; + int bx=2*mx, by=2*my; LOAD_COMMON(xx, yy); @@ -160,13 +161,12 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s, if(s->avctx->me_cmp != s->avctx->me_sub_cmp){ CMP_HPEL(dmin, 0, 0, mx, my, size); - if(mx || my) + if(mx || my || size>0) dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor; } if (mx > xmin && mx < xmax && my > ymin && my < ymax) { - int bx=2*mx, by=2*my; int d= dmin; const int index= (my<<ME_MAP_SHIFT) + mx; const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] @@ -178,7 +178,7 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s, const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*s->me.penalty_factor; -#if 0 +#if 1 int key; int map_generation= s->me.map_generation; uint32_t *map= s->me.map; @@ -231,20 +231,50 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s, CHECK_HALF_MV(0, 1, mx , my) } assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2); - - *mx_ptr = bx; - *my_ptr = by; - }else{ - *mx_ptr =2*mx; - *my_ptr =2*my; } + *mx_ptr = bx; + *my_ptr = by; + return dmin; } #endif +static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, + uint16_t * const mv_penalty) +{ +// const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp; + const int size= 0; + const int xx = 16 * s->mb_x; + const int yy = 16 * s->mb_y; + const int penalty_factor= s->me.mb_penalty_factor; + const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these + const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit + me_cmp_func cmp_sub, chroma_cmp_sub; + int d; + + LOAD_COMMON(xx, yy); + + //FIXME factorize + + cmp_sub= s->dsp.mb_cmp[size]; + chroma_cmp_sub= s->dsp.mb_cmp[size+1]; + + assert(!s->me.skip); + assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp); + + CMP_HPEL(d, mx&1, my&1, mx>>1, my>>1, size); + //FIXME check cbp before adding penalty for (0,0) vector + if(mx || my || size>0) + d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor; + + return d; +} + #endif /* CMP_HPEL */ + + #ifdef CMP_QPEL #define CHECK_QUARTER_MV(dx, dy, x, y)\ @@ -290,7 +320,7 @@ static int RENAME(qpel_motion_search)(MpegEncContext * s, if(s->avctx->me_cmp != s->avctx->me_sub_cmp){ CMP_QPEL(dmin, 0, 0, mx, my, size); - if(mx || my) + if(mx || my || size>0) dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor; } @@ -477,6 +507,37 @@ static int RENAME(qpel_motion_search)(MpegEncContext * s, return dmin; } +static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, + uint16_t * const mv_penalty) +{ + const int size= 0; + const int xx = 16 * s->mb_x; + const int yy = 16 * s->mb_y; + const int penalty_factor= s->me.mb_penalty_factor; + const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these + const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit + me_cmp_func cmp_sub, chroma_cmp_sub; + int d; + + LOAD_COMMON(xx, yy); + + //FIXME factorize + + cmp_sub= s->dsp.mb_cmp[size]; + chroma_cmp_sub= s->dsp.mb_cmp[size+1]; + + assert(!s->me.skip); + assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp); + + CMP_QPEL(d, mx&3, my&3, mx>>2, my>>2, size); + //FIXME check cbp before adding penalty for (0,0) vector + if(mx || my || size>0) + d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor; + + return d; +} + + #endif /* CMP_QPEL */ #define CHECK_MV(x,y)\ diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c index 10abf1024..fecb097bd 100644 --- a/src/libffmpeg/libavcodec/mpeg12.c +++ b/src/libffmpeg/libavcodec/mpeg12.c @@ -504,7 +504,7 @@ static void mpeg1_encode_motion(MpegEncContext *s, int val) void ff_mpeg1_encode_init(MpegEncContext *s) { -#ifdef CONFIG_ENCODERS +#if 0 static int done=0; common_init(s); @@ -769,6 +769,8 @@ static int mpeg_decode_mb(MpegEncContext *s, dprintf("decode_mb: x=%d y=%d\n", s->mb_x, s->mb_y); + assert(s->mb_skiped==0); + if (--s->mb_incr != 0) { /* skip mb */ s->mb_intra = 0; @@ -781,15 +783,18 @@ static int mpeg_decode_mb(MpegEncContext *s, s->mv[0][0][0] = s->mv[0][0][1] = 0; s->last_mv[0][0][0] = s->last_mv[0][0][1] = 0; s->last_mv[0][1][0] = s->last_mv[0][1][1] = 0; + s->mb_skiped = 1; } else { /* if B type, reuse previous vectors and directions */ s->mv[0][0][0] = s->last_mv[0][0][0]; s->mv[0][0][1] = s->last_mv[0][0][1]; s->mv[1][0][0] = s->last_mv[1][0][0]; s->mv[1][0][1] = s->last_mv[1][0][1]; + + if((s->mv[0][0][0]|s->mv[0][0][1]|s->mv[1][0][0]|s->mv[1][0][1])==0) + s->mb_skiped = 1; } - s->mb_skiped = 1; return 0; } @@ -1464,7 +1469,7 @@ static int mpeg1_decode_picture(AVCodecContext *avctx, MpegEncContext *s = &s1->mpeg_enc_ctx; int ref, f_code; - init_get_bits(&s->gb, buf, buf_size); + init_get_bits(&s->gb, buf, buf_size*8); ref = get_bits(&s->gb, 10); /* temporal ref */ s->pict_type = get_bits(&s->gb, 3); @@ -1616,7 +1621,7 @@ static void mpeg_decode_extension(AVCodecContext *avctx, MpegEncContext *s = &s1->mpeg_enc_ctx; int ext_type; - init_get_bits(&s->gb, buf, buf_size); + init_get_bits(&s->gb, buf, buf_size*8); ext_type = get_bits(&s->gb, 4); switch(ext_type) { @@ -1672,7 +1677,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx, return DECODE_SLICE_FATAL_ERROR; if(s->avctx->debug&FF_DEBUG_PICT_INFO){ - printf("qp:%d fc:%d%d%d%d %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n", + printf("qp:%d fc:%2d%2d%2d%2d %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n", s->qscale, s->mpeg_f_code[0][0],s->mpeg_f_code[0][1],s->mpeg_f_code[1][0],s->mpeg_f_code[1][1], s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), s->progressive_sequence ? "pro" :"", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"", @@ -1681,7 +1686,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx, } } - init_get_bits(&s->gb, buf, buf_size); + init_get_bits(&s->gb, buf, buf_size*8); s->qscale = get_qscale(s); /* extra slice info */ @@ -1790,7 +1795,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, int width, height, i, v, j; float aspect; - init_get_bits(&s->gb, buf, buf_size); + init_get_bits(&s->gb, buf, buf_size*8); width = get_bits(&s->gb, 12); height = get_bits(&s->gb, 12); diff --git a/src/libffmpeg/libavcodec/mpegaudiodec.c b/src/libffmpeg/libavcodec/mpegaudiodec.c index b2c0966aa..9a066c905 100644 --- a/src/libffmpeg/libavcodec/mpegaudiodec.c +++ b/src/libffmpeg/libavcodec/mpegaudiodec.c @@ -507,7 +507,7 @@ static int decode_init(AVCodecContext * avctx) return 0; } -/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */; +/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */ /* cos(i*pi/64) */ @@ -1460,7 +1460,7 @@ static void seek_to_maindata(MPADecodeContext *s, long backstep) memcpy(ptr, s->inbuf1[s->inbuf_index ^ 1] + BACKSTEP_SIZE + s->old_frame_size - backstep, backstep); /* init get bits again */ - init_get_bits(&s->gb, ptr, s->frame_size + backstep); + init_get_bits(&s->gb, ptr, (s->frame_size + backstep)*8); /* prepare next buffer */ s->inbuf_index ^= 1; @@ -2280,7 +2280,7 @@ static int mp_decode_frame(MPADecodeContext *s, short *samples_ptr; init_get_bits(&s->gb, s->inbuf + HEADER_SIZE, - s->inbuf_ptr - s->inbuf - HEADER_SIZE); + (s->inbuf_ptr - s->inbuf - HEADER_SIZE)*8); /* skip error protection field */ if (s->error_protection) diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c index 8206df470..d721647a5 100644 --- a/src/libffmpeg/libavcodec/mpegvideo.c +++ b/src/libffmpeg/libavcodec/mpegvideo.c @@ -20,6 +20,7 @@ */ #include <ctype.h> +#include <limits.h> #include "avcodec.h" #include "dsputil.h" #include "mpegvideo.h" @@ -80,12 +81,15 @@ static const uint8_t simple_mmx_permutation[64]={ }; static const uint8_t h263_chroma_roundtab[16] = { +// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, }; static UINT16 (*default_mv_penalty)[MAX_MV*2+1]=NULL; static UINT8 default_fcode_tab[MAX_MV*2+1]; +enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1}; + static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64], const UINT16 *quant_matrix, int bias, int qmin, int qmax) { @@ -230,6 +234,8 @@ int DCT_common_init(MpegEncContext *s) MPV_common_init_ppc(s); #endif + s->fast_dct_quantize= s->dct_quantize; + if(s->flags&CODEC_FLAG_TRELLIS_QUANT){ s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_* } @@ -286,7 +292,7 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){ r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic); if(r<0 || !pic->age || !pic->type || !pic->data[0]){ - fprintf(stderr, "get_buffer() failed (%d %d %d %X)\n", r, pic->age, pic->type, (int)pic->data[0]); + fprintf(stderr, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]); return -1; } @@ -309,6 +315,7 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){ CHECKED_ALLOCZ(pic->mb_var , s->mb_num * sizeof(INT16)) CHECKED_ALLOCZ(pic->mc_mb_var, s->mb_num * sizeof(INT16)) CHECKED_ALLOCZ(pic->mb_mean , s->mb_num * sizeof(INT8)) + CHECKED_ALLOCZ(pic->mb_cmp_score, s->mb_num * sizeof(int32_t)) } CHECKED_ALLOCZ(pic->mbskip_table , s->mb_num * sizeof(UINT8)+1) //the +1 is for the slice end check @@ -316,6 +323,12 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){ pic->qstride= s->mb_width; } + //it might be nicer if the application would keep track of these but it would require a API change + memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1); + s->prev_pict_types[0]= s->pict_type; + if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE) + pic->age= INT_MAX; // skiped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway + return 0; fail: //for the CHECKED_ALLOCZ macro return -1; @@ -334,6 +347,7 @@ static void free_picture(MpegEncContext *s, Picture *pic){ av_freep(&pic->mb_var); av_freep(&pic->mc_mb_var); av_freep(&pic->mb_mean); + av_freep(&pic->mb_cmp_score); av_freep(&pic->mbskip_table); av_freep(&pic->qscale_table); @@ -472,6 +486,7 @@ int MPV_common_init(MpegEncContext *s) /* init macroblock skip table */ CHECKED_ALLOCZ(s->mbskip_table, s->mb_num+1); //Note the +1 is for a quicker mpeg4 slice_end detection + CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE); s->block= s->blocks[0]; @@ -511,6 +526,7 @@ void MPV_common_end(MpegEncContext *s) av_freep(&s->me.score_map); av_freep(&s->mbskip_table); + av_freep(&s->prev_pict_types); av_freep(&s->bitstream_buffer); av_freep(&s->tex_pb_buffer); av_freep(&s->pb2_buffer); @@ -609,6 +625,7 @@ int MPV_encode_init(AVCodecContext *avctx) avctx->delay=0; s->low_delay=1; break; +#ifdef CONFIG_RISKY case CODEC_ID_H263: if (h263_get_picture_format(s->width, s->height) == 7) { printf("Input picture size isn't suitable for h263 codec! try h263+\n"); @@ -688,6 +705,7 @@ int MPV_encode_init(AVCodecContext *avctx) avctx->delay=0; s->low_delay=1; break; +#endif default: return -1; } @@ -725,24 +743,29 @@ int MPV_encode_init(AVCodecContext *avctx) ff_init_me(s); #ifdef CONFIG_ENCODERS +#ifdef CONFIG_RISKY if (s->out_format == FMT_H263) h263_encode_init(s); - else if (s->out_format == FMT_MPEG1) - ff_mpeg1_encode_init(s); if(s->msmpeg4_version) ff_msmpeg4_encode_init(s); #endif + if (s->out_format == FMT_MPEG1) + ff_mpeg1_encode_init(s); +#endif /* init default q matrix */ for(i=0;i<64;i++) { int j= s->idct_permutation[i]; +#ifdef CONFIG_RISKY if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){ s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i]; s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i]; }else if(s->out_format == FMT_H263){ s->intra_matrix[j] = s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i]; - }else{ /* mpeg1 */ + }else +#endif + { /* mpeg1 */ s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i]; s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i]; } @@ -787,6 +810,44 @@ int MPV_encode_end(AVCodecContext *avctx) return 0; } +void init_rl(RLTable *rl) +{ + INT8 max_level[MAX_RUN+1], max_run[MAX_LEVEL+1]; + UINT8 index_run[MAX_RUN+1]; + int last, run, level, start, end, i; + + /* compute max_level[], max_run[] and index_run[] */ + for(last=0;last<2;last++) { + if (last == 0) { + start = 0; + end = rl->last; + } else { + start = rl->last; + end = rl->n; + } + + memset(max_level, 0, MAX_RUN + 1); + memset(max_run, 0, MAX_LEVEL + 1); + memset(index_run, rl->n, MAX_RUN + 1); + for(i=start;i<end;i++) { + run = rl->table_run[i]; + level = rl->table_level[i]; + if (index_run[run] == rl->n) + index_run[run] = i; + if (level > max_level[run]) + max_level[run] = level; + if (run > max_run[level]) + max_run[level] = run; + } + rl->max_level[last] = av_malloc(MAX_RUN + 1); + memcpy(rl->max_level[last], max_level, MAX_RUN + 1); + rl->max_run[last] = av_malloc(MAX_LEVEL + 1); + memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1); + rl->index_run[last] = av_malloc(MAX_RUN + 1); + memcpy(rl->index_run[last], index_run, MAX_RUN + 1); + } +} + /* draw the edges of width 'w' of an image of size width, height */ //FIXME check that this is ok for mpeg4 interlaced static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w) @@ -1292,11 +1353,10 @@ static inline void gmc1_motion(MpegEncContext *s, dest_y+=dest_offset; if(s->flags&CODEC_FLAG_EMU_EDGE){ - if(src_x<0 || src_y<0 || src_x + (motion_x&15) + 16 > s->h_edge_pos - || src_y + (motion_y&15) + 16 > s->v_edge_pos){ + if(src_x<0 || src_y<0 || src_x + 17 >= s->h_edge_pos + || src_y + 17 >= s->v_edge_pos){ ff_emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos); ptr= s->edge_emu_buffer; - emu=1; } } @@ -1331,9 +1391,13 @@ static inline void gmc1_motion(MpegEncContext *s, offset = (src_y * uvlinesize) + src_x + (src_offset>>1); ptr = ref_picture[1] + offset; - if(emu){ - ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); - ptr= s->edge_emu_buffer; + if(s->flags&CODEC_FLAG_EMU_EDGE){ + if(src_x<0 || src_y<0 || src_x + 9 >= s->h_edge_pos>>1 + || src_y + 9 >= s->v_edge_pos>>1){ + ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); + ptr= s->edge_emu_buffer; + emu=1; + } } s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding); @@ -1656,6 +1720,14 @@ static inline void qpel_motion(MpegEncContext *s, pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1); } +inline int ff_h263_round_chroma(int x){ + if (x >= 0) + return (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1)); + else { + x = -x; + return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1)); + } +} static inline void MPV_motion(MpegEncContext *s, UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr, @@ -1672,6 +1744,7 @@ static inline void MPV_motion(MpegEncContext *s, switch(s->mv_type) { case MV_TYPE_16X16: +#ifdef CONFIG_RISKY if(s->mcsel){ if(s->real_sprite_warping_points==1){ gmc1_motion(s, dest_y, dest_cb, dest_cr, 0, @@ -1689,7 +1762,9 @@ static inline void MPV_motion(MpegEncContext *s, ff_mspel_motion(s, dest_y, dest_cb, dest_cr, ref_picture, pix_op, s->mv[dir][0][0], s->mv[dir][0][1], 16); - }else{ + }else +#endif + { mpeg_motion(s, dest_y, dest_cb, dest_cr, 0, ref_picture, 0, 0, pix_op, @@ -1766,20 +1841,8 @@ static inline void MPV_motion(MpegEncContext *s, if(s->flags&CODEC_FLAG_GRAY) break; /* In case of 8X8, we construct a single chroma motion vector with a special rounding */ - for(i=0;i<4;i++) { - } - if (mx >= 0) - mx = (h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1)); - else { - mx = -mx; - mx = -(h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1)); - } - if (my >= 0) - my = (h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1)); - else { - my = -my; - my = -(h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1)); - } + mx= ff_h263_round_chroma(mx); + my= ff_h263_round_chroma(my); dxy = ((my & 1) << 1) | (mx & 1); mx >>= 1; my >>= 1; @@ -2010,14 +2073,13 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) if(*mbskip_ptr >99) *mbskip_ptr= 99; /* if previous was skipped too, then nothing to do ! */ - if (*mbskip_ptr >= age){ -//if(s->pict_type!=B_TYPE && s->mb_x==0) printf("\n"); -//if(s->pict_type!=B_TYPE) printf("%d%d ", *mbskip_ptr, age); - if(s->pict_type!=B_TYPE) return; - if(s->avctx->draw_horiz_band==NULL && *mbskip_ptr > age) return; - /* we dont draw complete frames here so we cant skip */ + if (*mbskip_ptr >= age && s->current_picture.reference){ + return; } - } else { + } else if(!s->current_picture.reference){ + (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */ + if(*mbskip_ptr >99) *mbskip_ptr= 99; + } else{ *mbskip_ptr = 0; /* not skipped */ } }else @@ -2088,9 +2150,12 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) add_dct(s, block[4], 4, dest_cb, s->uvlinesize); add_dct(s, block[5], 5, dest_cr, s->uvlinesize); } - } else{ + } +#ifdef CONFIG_RISKY + else{ ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr); } +#endif } else { /* dct only in intra block */ if(s->encoding || !(s->mpeg2 || s->codec_id==CODEC_ID_MPEG1VIDEO)){ @@ -2585,7 +2650,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) s->block_last_index[4]= s->block_last_index[5]= 0; s->block[4][0]= - s->block[5][0]= 128; + s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale; } #ifdef CONFIG_ENCODERS @@ -2593,6 +2658,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) switch(s->codec_id){ //FIXME funct ptr could be slightly faster case CODEC_ID_MPEG1VIDEO: mpeg1_encode_mb(s, s->block, motion_x, motion_y); break; +#ifdef CONFIG_RISKY case CODEC_ID_MPEG4: mpeg4_encode_mb(s, s->block, motion_x, motion_y); break; case CODEC_ID_MSMPEG4V2: @@ -2601,18 +2667,48 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break; case CODEC_ID_WMV2: ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break; - case CODEC_ID_MJPEG: - mjpeg_encode_mb(s, s->block); break; case CODEC_ID_H263: case CODEC_ID_H263P: case CODEC_ID_RV10: h263_encode_mb(s, s->block, motion_x, motion_y); break; +#endif + case CODEC_ID_MJPEG: + mjpeg_encode_mb(s, s->block); break; default: assert(0); } #endif } +/** + * combines the (truncated) bitstream to a complete frame + * @returns -1 if no complete frame could be created + */ +int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size){ + ParseContext *pc= &s->parse_context; + + pc->last_index= pc->index; + + if(next==-1){ + pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE); + + memcpy(&pc->buffer[pc->index], *buf, *buf_size); + pc->index += *buf_size; + return -1; + } + + if(pc->index){ + pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE); + + memcpy(&pc->buffer[pc->index], *buf, next + FF_INPUT_BUFFER_PADDING_SIZE ); + pc->index = 0; + *buf= pc->buffer; + *buf_size= pc->last_index + next; + } + + return 0; +} + void ff_copy_bits(PutBitContext *pb, UINT8 *src, int length) { int bytes= length>>4; @@ -2769,10 +2865,12 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->current_picture.mb_var_sum = 0; s->current_picture.mc_mb_var_sum = 0; +#ifdef CONFIG_RISKY /* we need to initialize some time vars before we can encode b-frames */ if (s->h263_pred && !s->h263_msmpeg4) ff_set_mpeg4_time(s, s->picture_number); - +#endif + s->scene_change_score=0; s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration @@ -2789,6 +2887,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->no_rounding ^= 1; } /* Estimate motion for every MB */ + s->mb_intra=0; //for the rate distoration & bit compare functions if(s->pict_type != I_TYPE){ if(s->pict_type != B_TYPE){ if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){ @@ -2880,6 +2979,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->frame_qscale = ff_rate_estimate_qscale(s); if(s->adaptive_quant){ +#ifdef CONFIG_RISKY switch(s->codec_id){ case CODEC_ID_MPEG4: ff_clean_mpeg4_qscales(s); @@ -2889,6 +2989,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) ff_clean_h263_qscales(s); break; } +#endif s->qscale= s->current_picture.qscale_table[0]; }else @@ -2918,6 +3019,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) case FMT_MJPEG: mjpeg_picture_header(s); break; +#ifdef CONFIG_RISKY case FMT_H263: if (s->codec_id == CODEC_ID_WMV2) ff_wmv2_encode_picture_header(s, picture_number); @@ -2930,6 +3032,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) else h263_encode_picture_header(s, picture_number); break; +#endif case FMT_MPEG1: mpeg1_encode_picture_header(s, picture_number); break; @@ -2957,11 +3060,13 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->last_mv[0][0][0] = 0; s->last_mv[0][0][1] = 0; +#ifdef CONFIG_RISKY if (s->codec_id==CODEC_ID_H263 || s->codec_id==CODEC_ID_H263P) s->gob_index = ff_h263_get_gob_height(s); if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame) ff_mpeg4_init_partitions(s); +#endif s->resync_mb_x=0; s->resync_mb_y=0; @@ -2979,7 +3084,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->block_index[4]= s->block_wrap[4]*(mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2); s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2); for(mb_x=0; mb_x < s->mb_width; mb_x++) { - const int mb_type= s->mb_type[mb_y * s->mb_width + mb_x]; + int mb_type= s->mb_type[mb_y * s->mb_width + mb_x]; const int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1; // int d; int dmin=10000000; @@ -2994,6 +3099,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->block_index[5]++; /* write gob / video packet header */ +#ifdef CONFIG_RISKY if(s->rtp_mode){ int current_packet_size, is_gob_start; @@ -3034,6 +3140,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->resync_mb_y=mb_y; } } +#endif if( (s->resync_mb_x == s->mb_x) && s->resync_mb_y+1 == s->mb_y){ @@ -3108,7 +3215,9 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT; s->mb_intra= 0; +#ifdef CONFIG_RISKY ff_mpeg4_set_direct_mv(s, mx, my); +#endif encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb, &dmin, &next_block, mx, my); } @@ -3145,8 +3254,93 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->last_bits= get_bit_count(&s->pb); } else { int motion_x, motion_y; + int intra_score; + int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_width]; + + if(!(s->flags&CODEC_FLAG_HQ) && s->pict_type==P_TYPE){ + /* get luma score */ + if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){ + intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_width]<<8) - 500; //FIXME dont scale it down so we dont have to fix it + }else{ + uint8_t *dest_y; + + int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_width]; //FIXME + mean*= 0x01010101; + + dest_y = s->new_picture.data[0] + (mb_y * 16 * s->linesize ) + mb_x * 16; + + for(i=0; i<16; i++){ + *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 0]) = mean; + *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean; + *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean; + *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean; + } + + s->mb_intra=1; + intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, dest_y, s->linesize); + +/* printf("intra:%7d inter:%7d var:%7d mc_var.%7d\n", intra_score>>8, inter_score>>8, + s->current_picture.mb_var[mb_x + mb_y*s->mb_width], + s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_width]);*/ + } + + /* get chroma score */ + if(s->avctx->mb_cmp&FF_CMP_CHROMA){ + int i; + + s->mb_intra=1; + for(i=1; i<3; i++){ + uint8_t *dest_c; + int mean; + + if(s->out_format == FMT_H263){ + mean= (s->dc_val[i][mb_x + (mb_y+1)*(s->mb_width+2)] + 4)>>3; //FIXME not exact but simple ;) + }else{ + mean= (s->last_dc[i] + 4)>>3; + } + dest_c = s->new_picture.data[i] + (mb_y * 8 * (s->uvlinesize)) + mb_x * 8; + + mean*= 0x01010101; + for(i=0; i<8; i++){ + *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 0]) = mean; + *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 4]) = mean; + } + + intra_score+= s->dsp.mb_cmp[1](s, s->me.scratchpad, dest_c, s->uvlinesize); + } + } + + /* bias */ + switch(s->avctx->mb_cmp&0xFF){ + default: + case FF_CMP_SAD: + intra_score+= 32*s->qscale; + break; + case FF_CMP_SSE: + intra_score+= 24*s->qscale*s->qscale; + break; + case FF_CMP_SATD: + intra_score+= 96*s->qscale; + break; + case FF_CMP_DCT: + intra_score+= 48*s->qscale; + break; + case FF_CMP_BIT: + intra_score+= 16; + break; + case FF_CMP_PSNR: + case FF_CMP_RD: + intra_score+= (s->qscale*s->qscale*109*8 + 64)>>7; + break; + } + + if(intra_score < inter_score) + mb_type= MB_TYPE_INTRA; + } + s->mv_type=MV_TYPE_16X16; // only one MB-Type possible + switch(mb_type){ case MB_TYPE_INTRA: s->mv_dir = MV_DIR_FORWARD; @@ -3175,7 +3369,9 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mb_intra= 0; motion_x=s->b_direct_mv_table[xy][0]; motion_y=s->b_direct_mv_table[xy][1]; +#ifdef CONFIG_RISKY ff_mpeg4_set_direct_mv(s, motion_x, motion_y); +#endif break; case MB_TYPE_BIDIR: s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD; @@ -3253,6 +3449,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) } emms_c(); +#ifdef CONFIG_RISKY if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame) ff_mpeg4_merge_partitions(s); @@ -3261,6 +3458,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) if(s->codec_id==CODEC_ID_MPEG4) ff_mpeg4_stuffing(&s->pb); +#endif //if (s->gob_number) // fprintf(stderr,"\nNumber of GOB: %d", s->gob_number); @@ -3376,7 +3574,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s, return last_non_zero; } - lambda= (qscale*qscale*64*82 + 50)/100; //FIXME finetune + lambda= (qscale*qscale*64*105 + 64)>>7; //FIXME finetune score_tab[0]= 0; for(i=0; i<=last_non_zero - start_i; i++){ @@ -3783,6 +3981,8 @@ AVCodec mpeg1video_encoder = { MPV_encode_end, }; +#ifdef CONFIG_RISKY + AVCodec h263_encoder = { "h263", CODEC_TYPE_VIDEO, @@ -3813,16 +4013,6 @@ AVCodec rv10_encoder = { MPV_encode_end, }; -AVCodec mjpeg_encoder = { - "mjpeg", - CODEC_TYPE_VIDEO, - CODEC_ID_MJPEG, - sizeof(MpegEncContext), - MPV_encode_init, - MPV_encode_picture, - MPV_encode_end, -}; - AVCodec mpeg4_encoder = { "mpeg4", CODEC_TYPE_VIDEO, @@ -3873,3 +4063,14 @@ AVCodec wmv1_encoder = { MPV_encode_end, }; +#endif + +AVCodec mjpeg_encoder = { + "mjpeg", + CODEC_TYPE_VIDEO, + CODEC_ID_MJPEG, + sizeof(MpegEncContext), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h index e6909817a..7ecc6fd38 100644 --- a/src/libffmpeg/libavcodec/mpegvideo.h +++ b/src/libffmpeg/libavcodec/mpegvideo.h @@ -98,7 +98,6 @@ typedef struct RateControlContext{ int last_non_b_pict_type; }RateControlContext; - typedef struct ScanTable{ const UINT8 *scantable; UINT8 permutated[64]; @@ -117,6 +116,7 @@ typedef struct Picture{ uint16_t *mb_var; /* Table for MB variances */ uint16_t *mc_mb_var; /* Table for motion compensated MB variances */ uint8_t *mb_mean; /* Table for MB luminance */ + int32_t *mb_cmp_score; /* Table for MB cmp scores, for mb decission */ int b_frame_score; /* */ } Picture; @@ -142,6 +142,7 @@ typedef struct MotionEstContext{ int pre_penalty_factor; int penalty_factor; int sub_penalty_factor; + int mb_penalty_factor; int pre_pass; /* = 1 for the pre pass */ int dia_size; UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV */ @@ -160,6 +161,8 @@ typedef struct MotionEstContext{ int P[10][2], int pred_x, int pred_y, int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], int ref_mv_scale, uint16_t * const mv_penalty); + int (*get_mb_score)(struct MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, + uint16_t * const mv_penalty); }MotionEstContext; typedef struct MpegEncContext { @@ -226,6 +229,8 @@ typedef struct MpegEncContext { UINT8 *coded_block; /* used for coded block pattern prediction (msmpeg4v3, wmv1)*/ INT16 (*ac_val[3])[16]; /* used for for mpeg4 AC prediction, all 3 arrays must be continuous */ int ac_pred; + uint8_t *prev_pict_types; /* previous picture types in bitstream order, used for mb skip */ +#define PREV_PICT_TYPES_BUFFER_SIZE 256 int mb_skiped; /* MUST BE SET only during DECODING */ UINT8 *mbskip_table; /* used to avoid copy if macroblock skipped (for black regions for example) and used for b-frame encoding & decoding (contains skip table of next P Frame) */ @@ -321,6 +326,8 @@ typedef struct MpegEncContext { uint8_t *intra_ac_vlc_last_length; uint8_t *inter_ac_vlc_length; uint8_t *inter_ac_vlc_last_length; + uint8_t *luma_dc_vlc_length; + uint8_t *chroma_dc_vlc_length; #define UNI_AC_ENC_INDEX(run,level) ((run)*128 + (level)) /* precomputed matrix (combine qscale and DCT renorm) */ @@ -544,14 +551,15 @@ typedef struct MpegEncContext { #define SLICE_NOEND -3 //no end marker or error found but mb count exceeded void (*dct_unquantize_mpeg1)(struct MpegEncContext *s, - DCTELEM *block, int n, int qscale); + DCTELEM *block/*align 16*/, int n, int qscale); void (*dct_unquantize_mpeg2)(struct MpegEncContext *s, - DCTELEM *block, int n, int qscale); + DCTELEM *block/*align 16*/, int n, int qscale); void (*dct_unquantize_h263)(struct MpegEncContext *s, - DCTELEM *block, int n, int qscale); + DCTELEM *block/*align 16*/, int n, int qscale); void (*dct_unquantize)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both) - DCTELEM *block, int n, int qscale); - int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow); + DCTELEM *block/*align 16*/, int n, int qscale); + int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow); + int (*fast_dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow); void (*fdct)(DCTELEM *block/* align 16*/); void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); void (*idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); @@ -596,7 +604,9 @@ void ff_draw_horiz_band(MpegEncContext *s); void ff_emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, int src_x, int src_y, int w, int h); char ff_get_pict_type_char(int pict_type); +int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size); +extern enum PixelFormat ff_yuv420p_list[2]; extern int ff_bit_exact; @@ -690,7 +700,7 @@ void h263_encode_picture_header(MpegEncContext *s, int picture_number); int h263_encode_gob_header(MpegEncContext * s, int mb_line); INT16 *h263_pred_motion(MpegEncContext * s, int block, int *px, int *py); -void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, +void mpeg4_pred_ac(MpegEncContext * s, DCTELEM *block, int n, int dir); void ff_set_mpeg4_time(MpegEncContext * s, int picture_number); void mpeg4_encode_picture_header(MpegEncContext *s, int picture_number); @@ -717,6 +727,7 @@ int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s); int ff_h263_resync(MpegEncContext *s); int ff_h263_get_gob_height(MpegEncContext *s); void ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my); +inline int ff_h263_round_chroma(int x); /* rv10.c */ diff --git a/src/libffmpeg/libavcodec/msmpeg4.c b/src/libffmpeg/libavcodec/msmpeg4.c index a08418874..2c524a067 100644 --- a/src/libffmpeg/libavcodec/msmpeg4.c +++ b/src/libffmpeg/libavcodec/msmpeg4.c @@ -502,7 +502,7 @@ static void msmpeg4_encode_motion(MpegEncContext * s, static inline void handle_slices(MpegEncContext *s){ if (s->mb_x == 0) { if (s->slice_height && (s->mb_y % s->slice_height) == 0) { - if(s->msmpeg4_version != 4){ + if(s->msmpeg4_version < 4){ ff_mpeg4_clean_buffers(s); } s->first_slice_line = 1; @@ -691,7 +691,7 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n, b = dc_val[ - 1 - wrap]; c = dc_val[ - wrap]; - if(s->first_slice_line && (n&2)==0 && s->msmpeg4_version!=4){ + if(s->first_slice_line && (n&2)==0 && s->msmpeg4_version<4){ b=c=1024; } @@ -1195,7 +1195,7 @@ int msmpeg4_decode_picture_header(MpegEncContext * s) #if 0 { int i; -for(i=0; i<s->gb.size*8; i++) +for(i=0; i<s->gb.size_in_bits; i++) printf("%d", get_bits1(&s->gb)); // get_bits1(&s->gb); printf("END\n"); @@ -1869,7 +1869,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, if (i > 62){ i-= 192; if(i&(~63)){ - const int left= s->gb.size*8 - get_bits_count(&s->gb); + const int left= s->gb.size_in_bits - get_bits_count(&s->gb); if(((i+192 == 64 && level/qmul==-1) || s->error_resilience<=1) && left>=0){ fprintf(stderr, "ignoring overflow at %d %d\n", s->mb_x, s->mb_y); break; diff --git a/src/libffmpeg/libavcodec/msmpeg4data.h b/src/libffmpeg/libavcodec/msmpeg4data.h index 3490fc08c..2c3a28f0e 100644 --- a/src/libffmpeg/libavcodec/msmpeg4data.h +++ b/src/libffmpeg/libavcodec/msmpeg4data.h @@ -1868,7 +1868,10 @@ static const uint8_t *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={ }; static const uint8_t table_inter_intra[4][2]={ - {0,1},{2,2},{6,3},{7,3} + {0,1} /*Luma-Left Chroma-Left*/, + {2,2} /*Luma-Top Chroma-Left*/, + {6,3} /*luma-Left Chroma-Top */, + {7,3} /*luma-Top Chroma-Top */ }; #define WMV2_INTER_CBP_TABLE_COUNT 4 diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c index 5f14ed0eb..dc62e70f4 100644 --- a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c +++ b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2002 Brian Foley * Copyright (c) 2002 Dieter Shirley + * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -20,21 +21,39 @@ #include "../dsputil.h" #include "dsputil_altivec.h" -#if CONFIG_DARWIN +#ifdef CONFIG_DARWIN #include <sys/sysctl.h> -#endif +#else /* CONFIG_DARWIN */ +#include <signal.h> +#include <setjmp.h> + +static sigjmp_buf jmpbuf; +static volatile sig_atomic_t canjump = 0; + +static void sigill_handler (int sig) +{ + if (!canjump) { + signal (sig, SIG_DFL); + raise (sig); + } + + canjump = 0; + siglongjmp (jmpbuf, 1); +} +#endif /* CONFIG_DARWIN */ int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) { - int s, i; - vector unsigned char *tv, zero; + int i; + int s __attribute__((aligned(16))); + const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); + vector unsigned char *tv; vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; vector unsigned int sad; vector signed int sumdiffs; s = 0; - zero = vec_splat_u8(0); - sad = vec_splat_u32(0); + sad = (vector unsigned int)vec_splat_u32(0); for(i=0;i<16;i++) { /* Read unaligned pixels into our vectors. The vectors are as follows: @@ -72,16 +91,17 @@ int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) { - int s, i; - vector unsigned char *tv, zero; + int i; + int s __attribute__((aligned(16))); + const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); + vector unsigned char *tv; vector unsigned char pix1v, pix2v, pix3v, avgv, t5; vector unsigned int sad; vector signed int sumdiffs; uint8_t *pix3 = pix2 + line_size; s = 0; - zero = vec_splat_u8(0); - sad = vec_splat_u32(0); + sad = (vector unsigned int)vec_splat_u32(0); /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one @@ -131,20 +151,21 @@ int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) { - int s, i; + int i; + int s __attribute__((aligned(16))); uint8_t *pix3 = pix2 + line_size; - vector unsigned char *tv, avgv, t5, zero; + const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); + const vector unsigned short two = (const vector unsigned short)vec_splat_u16(2); + vector unsigned char *tv, avgv, t5; vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; - vector unsigned short avghv, avglv, two; + vector unsigned short avghv, avglv; vector unsigned short t1, t2, t3, t4; vector unsigned int sad; vector signed int sumdiffs; - zero = vec_splat_u8(0); - two = vec_splat_u16(2); - sad = vec_splat_u32(0); + sad = (vector unsigned int)vec_splat_u32(0); s = 0; @@ -231,14 +252,15 @@ int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) { - int i, s; + int i; + int s __attribute__((aligned(16))); + const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); vector unsigned char perm1, perm2, *pix1v, *pix2v; vector unsigned char t1, t2, t3,t4, t5; - vector unsigned int sad, zero; + vector unsigned int sad; vector signed int sumdiffs; - zero = (vector unsigned int) (0); - sad = (vector unsigned int) (0); + sad = (vector unsigned int)vec_splat_u32(0); for(i=0;i<16;i++) { @@ -272,15 +294,20 @@ int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) { - int i, s; + int i; + int s __attribute__((aligned(16))); + const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; vector unsigned char t1, t2, t3,t4, t5; - vector unsigned int sad, zero; + vector unsigned int sad; vector signed int sumdiffs; - zero = (vector unsigned int) (0); - sad = (vector unsigned int) (0); - permclear = (vector unsigned char) (255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); + sad = (vector unsigned int)vec_splat_u32(0); +#ifdef CONFIG_DARWIN + permclear = (vector unsigned char)(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); +#else + permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0}; +#endif for(i=0;i<8;i++) { /* Read potentially unaligned pixels into t1 and t2 @@ -315,14 +342,15 @@ int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) int pix_norm1_altivec(uint8_t *pix, int line_size) { - int s, i; - vector unsigned char *tv, zero; + int i; + int s __attribute__((aligned(16))); + const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); + vector unsigned char *tv; vector unsigned char pixv; vector unsigned int sv; vector signed int sum; - - zero = vec_splat_u8(0); - sv = vec_splat_u32(0); + + sv = (vector unsigned int)vec_splat_u32(0); s = 0; for (i = 0; i < 16; i++) { @@ -343,18 +371,127 @@ int pix_norm1_altivec(uint8_t *pix, int line_size) return s; } -int pix_sum_altivec(UINT8 * pix, int line_size) +/** + * Sum of Squared Errors for a 8x8 block. + * AltiVec-enhanced. + * It's the pix_abs8x8_altivec code above w/ squaring added. + */ +int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size) { + int i; + int s __attribute__((aligned(16))); + const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); + vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; + vector unsigned char t1, t2, t3,t4, t5; + vector unsigned int sum; + vector signed int sumsqr; + + sum = (vector unsigned int)vec_splat_u32(0); +#ifdef CONFIG_DARWIN + permclear = (vector unsigned char)(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); +#else + permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0}; +#endif + + for(i=0;i<8;i++) { + /* Read potentially unaligned pixels into t1 and t2 + Since we're reading 16 pixels, and actually only want 8, + mask out the last 8 pixels. The 0s don't change the sum. */ + perm1 = vec_lvsl(0, pix1); + pix1v = (vector unsigned char *) pix1; + perm2 = vec_lvsl(0, pix2); + pix2v = (vector unsigned char *) pix2; + t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear); + t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear); + /* + Since we want to use unsigned chars, we can take advantage + of the fact that abs(a-b)^2 = (a-b)^2. + */ + + /* Calculate abs differences vector */ + t3 = vec_max(t1, t2); + t4 = vec_min(t1, t2); + t5 = vec_sub(t3, t4); + + /* Square the values and add them to our sum */ + sum = vec_msum(t5, t5, sum); + + pix1 += line_size; + pix2 += line_size; + } + + /* Sum up the four partial sums, and put the result into s */ + sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero); + sumsqr = vec_splat(sumsqr, 3); + vec_ste(sumsqr, 0, &s); + + return s; +} + +/** + * Sum of Squared Errors for a 16x16 block. + * AltiVec-enhanced. + * It's the pix_abs16x16_altivec code above w/ squaring added. + */ +int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size) +{ + int i; + int s __attribute__((aligned(16))); + const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); + vector unsigned char perm1, perm2, *pix1v, *pix2v; + vector unsigned char t1, t2, t3,t4, t5; + vector unsigned int sum; + vector signed int sumsqr; + + sum = (vector unsigned int)vec_splat_u32(0); + + for(i=0;i<16;i++) { + /* Read potentially unaligned pixels into t1 and t2 */ + perm1 = vec_lvsl(0, pix1); + pix1v = (vector unsigned char *) pix1; + perm2 = vec_lvsl(0, pix2); + pix2v = (vector unsigned char *) pix2; + t1 = vec_perm(pix1v[0], pix1v[1], perm1); + t2 = vec_perm(pix2v[0], pix2v[1], perm2); + + /* + Since we want to use unsigned chars, we can take advantage + of the fact that abs(a-b)^2 = (a-b)^2. + */ + + /* Calculate abs differences vector */ + t3 = vec_max(t1, t2); + t4 = vec_min(t1, t2); + t5 = vec_sub(t3, t4); + + /* Square the values and add them to our sum */ + sum = vec_msum(t5, t5, sum); + + pix1 += line_size; + pix2 += line_size; + } + + /* Sum up the four partial sums, and put the result into s */ + sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero); + sumsqr = vec_splat(sumsqr, 3); + vec_ste(sumsqr, 0, &s); + + return s; +} + +int pix_sum_altivec(UINT8 * pix, int line_size) +{ + const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); vector unsigned char perm, *pixv; vector unsigned char t1; - vector unsigned int sad, zero; + vector unsigned int sad; vector signed int sumdiffs; - int s, i; - - zero = (vector unsigned int) (0); - sad = (vector unsigned int) (0); + int i; + int s __attribute__((aligned(16))); + + sad = (vector unsigned int)vec_splat_u32(0); for (i = 0; i < 16; i++) { /* Read the potentially unaligned 16 pixels into t1 */ @@ -380,7 +517,7 @@ void get_pixels_altivec(DCTELEM *restrict block, const UINT8 *pixels, int line_s { int i; vector unsigned char perm, bytes, *pixv; - vector unsigned char zero = (vector unsigned char) (0); + const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); vector signed short shorts; for(i=0;i<8;i++) @@ -407,7 +544,7 @@ void diff_pixels_altivec(DCTELEM *restrict block, const UINT8 *s1, { int i; vector unsigned char perm, bytes, *pixv; - vector unsigned char zero = (vector unsigned char) (0); + const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); vector signed short shorts1, shorts2; for(i=0;i<4;i++) @@ -474,10 +611,675 @@ void diff_pixels_altivec(DCTELEM *restrict block, const UINT8 *s1, } } +int sad16x16_altivec(void *s, uint8_t *a, uint8_t *b, int stride) { + return pix_abs16x16_altivec(a,b,stride); +} + +int sad8x8_altivec(void *s, uint8_t *a, uint8_t *b, int stride) { + return pix_abs8x8_altivec(a,b,stride); +} + +void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) { +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int i; + for(i=0; i+7<w; i++){ + dst[i+0] += src[i+0]; + dst[i+1] += src[i+1]; + dst[i+2] += src[i+2]; + dst[i+3] += src[i+3]; + dst[i+4] += src[i+4]; + dst[i+5] += src[i+5]; + dst[i+6] += src[i+6]; + dst[i+7] += src[i+7]; + } + for(; i<w; i++) + dst[i+0] += src[i+0]; +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register int i; + register vector unsigned char vdst, vsrc; + + /* dst and src are 16 bytes-aligned (guaranteed) */ + for(i = 0 ; (i + 15) < w ; i++) + { + vdst = vec_ld(i << 4, (unsigned char*)dst); + vsrc = vec_ld(i << 4, (unsigned char*)src); + vdst = vec_add(vsrc, vdst); + vec_st(vdst, i << 4, (unsigned char*)dst); + } + /* if w is not a multiple of 16 */ + for (; (i < w) ; i++) + { + dst[i] = src[i]; + } +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} + +/* next one assumes that ((line_size % 16) == 0) */ +void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ +POWERPC_TBL_DECLARE(altivec_put_pixels16_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int i; + +POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); + + for(i=0; i<h; i++) { + *((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l); + *((uint32_t*)(block+4)) = (((const struct unaligned_32 *) (pixels+4))->l); + *((uint32_t*)(block+8)) = (((const struct unaligned_32 *) (pixels+8))->l); + *((uint32_t*)(block+12)) = (((const struct unaligned_32 *) (pixels+12))->l); + pixels+=line_size; + block +=line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); + +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register vector unsigned char pixelsv1, pixelsv2; + register vector unsigned char perm = vec_lvsl(0, pixels); + int i; + +POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); + + for(i=0; i<h; i++) { + pixelsv1 = vec_ld(0, (unsigned char*)pixels); + pixelsv2 = vec_ld(16, (unsigned char*)pixels); + vec_st(vec_perm(pixelsv1, pixelsv2, perm), + 0, (unsigned char*)block); + pixels+=line_size; + block +=line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); + +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} + +/* next one assumes that ((line_size % 16) == 0) */ +#define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) +void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ +POWERPC_TBL_DECLARE(altivec_avg_pixels16_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int i; + +POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); + + for(i=0; i<h; i++) { + op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l)); + op_avg(*((uint32_t*)(block+4)),(((const struct unaligned_32 *)(pixels+4))->l)); + op_avg(*((uint32_t*)(block+8)),(((const struct unaligned_32 *)(pixels+8))->l)); + op_avg(*((uint32_t*)(block+12)),(((const struct unaligned_32 *)(pixels+12))->l)); + pixels+=line_size; + block +=line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); + +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; + register vector unsigned char perm = vec_lvsl(0, pixels); + int i; + +POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); + + for(i=0; i<h; i++) { + pixelsv1 = vec_ld(0, (unsigned char*)pixels); + pixelsv2 = vec_ld(16, (unsigned char*)pixels); + blockv = vec_ld(0, block); + pixelsv = vec_perm(pixelsv1, pixelsv2, perm); + blockv = vec_avg(blockv,pixelsv); + vec_st(blockv, 0, (unsigned char*)block); + pixels+=line_size; + block +=line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); + +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} + +/* next one assumes that ((line_size % 8) == 0) */ +void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) +{ +POWERPC_TBL_DECLARE(altivec_avg_pixels8_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int i; +POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1); + for (i = 0; i < h; i++) { + *((uint32_t *) (block)) = + (((*((uint32_t *) (block))) | + ((((const struct unaligned_32 *) (pixels))->l))) - + ((((*((uint32_t *) (block))) ^ + ((((const struct unaligned_32 *) (pixels))-> + l))) & 0xFEFEFEFEUL) >> 1)); + *((uint32_t *) (block + 4)) = + (((*((uint32_t *) (block + 4))) | + ((((const struct unaligned_32 *) (pixels + 4))->l))) - + ((((*((uint32_t *) (block + 4))) ^ + ((((const struct unaligned_32 *) (pixels + + 4))-> + l))) & 0xFEFEFEFEUL) >> 1)); + pixels += line_size; + block += line_size; + } +POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1); + +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; + int i; + +POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1); + + for (i = 0; i < h; i++) { + /* + block is 8 bytes-aligned, so we're either in the + left block (16 bytes-aligned) or in the right block (not) + */ + int rightside = ((unsigned long)block & 0x0000000F); + + blockv = vec_ld(0, block); + pixelsv1 = vec_ld(0, (unsigned char*)pixels); + pixelsv2 = vec_ld(16, (unsigned char*)pixels); + pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels)); + + if (rightside) + { + pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,s0,s1)); + } + else + { + pixelsv = vec_perm(blockv, pixelsv, vcprm(s0,s1,2,3)); + } + + blockv = vec_avg(blockv, pixelsv); + + vec_st(blockv, 0, block); + + pixels += line_size; + block += line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1); + +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} + +/* next one assumes that ((line_size % 8) == 0) */ +void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ +POWERPC_TBL_DECLARE(altivec_put_pixels8_xy2_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int j; +POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); + for (j = 0; j < 2; j++) { + int i; + const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + const uint32_t b = + (((const struct unaligned_32 *) (pixels + 1))->l); + uint32_t l0 = + (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; + uint32_t h0 = + ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + uint32_t l1, h1; + pixels += line_size; + for (i = 0; i < h; i += 2) { + uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); + l1 = (a & 0x03030303UL) + (b & 0x03030303UL); + h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + a = (((const struct unaligned_32 *) (pixels))->l); + b = (((const struct unaligned_32 *) (pixels + 1))->l); + l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; + h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + } pixels += 4 - line_size * (h + 1); + block += 4 - line_size * h; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); + +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register int i; + register vector unsigned char + pixelsv1, pixelsv2, + pixelsavg; + register vector unsigned char + blockv, temp1, temp2; + register vector unsigned short + pixelssum1, pixelssum2, temp3; + register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); + register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); + + temp1 = vec_ld(0, pixels); + temp2 = vec_ld(16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); + if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); + } + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum1 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + pixelssum1 = vec_add(pixelssum1, vctwo); + +POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); + for (i = 0; i < h ; i++) { + int rightside = ((unsigned long)block & 0x0000000F); + blockv = vec_ld(0, block); + + temp1 = vec_ld(line_size, pixels); + temp2 = vec_ld(line_size + 16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); + if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); + } + + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum2 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + temp3 = vec_add(pixelssum1, pixelssum2); + temp3 = vec_sra(temp3, vctwo); + pixelssum1 = vec_add(pixelssum2, vctwo); + pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); + + if (rightside) + { + blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); + } + else + { + blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); + } + + vec_st(blockv, 0, block); + + block += line_size; + pixels += line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} + +/* next one assumes that ((line_size % 8) == 0) */ +void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ +POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int j; +POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); + for (j = 0; j < 2; j++) { + int i; + const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + const uint32_t b = + (((const struct unaligned_32 *) (pixels + 1))->l); + uint32_t l0 = + (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; + uint32_t h0 = + ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + uint32_t l1, h1; + pixels += line_size; + for (i = 0; i < h; i += 2) { + uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); + l1 = (a & 0x03030303UL) + (b & 0x03030303UL); + h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + a = (((const struct unaligned_32 *) (pixels))->l); + b = (((const struct unaligned_32 *) (pixels + 1))->l); + l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; + h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + } pixels += 4 - line_size * (h + 1); + block += 4 - line_size * h; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); + +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register int i; + register vector unsigned char + pixelsv1, pixelsv2, + pixelsavg; + register vector unsigned char + blockv, temp1, temp2; + register vector unsigned short + pixelssum1, pixelssum2, temp3; + register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); + register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); + register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); + + temp1 = vec_ld(0, pixels); + temp2 = vec_ld(16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); + if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); + } + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum1 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + pixelssum1 = vec_add(pixelssum1, vcone); + +POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); + for (i = 0; i < h ; i++) { + int rightside = ((unsigned long)block & 0x0000000F); + blockv = vec_ld(0, block); + + temp1 = vec_ld(line_size, pixels); + temp2 = vec_ld(line_size + 16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); + if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); + } + + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum2 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + temp3 = vec_add(pixelssum1, pixelssum2); + temp3 = vec_sra(temp3, vctwo); + pixelssum1 = vec_add(pixelssum2, vcone); + pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); + + if (rightside) + { + blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); + } + else + { + blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); + } + + vec_st(blockv, 0, block); + + block += line_size; + pixels += line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} + +/* next one assumes that ((line_size % 16) == 0) */ +void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) +{ +POWERPC_TBL_DECLARE(altivec_put_pixels16_xy2_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int j; +POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1); + for (j = 0; j < 4; j++) { + int i; + const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + const uint32_t b = + (((const struct unaligned_32 *) (pixels + 1))->l); + uint32_t l0 = + (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; + uint32_t h0 = + ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + uint32_t l1, h1; + pixels += line_size; + for (i = 0; i < h; i += 2) { + uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); + l1 = (a & 0x03030303UL) + (b & 0x03030303UL); + h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + a = (((const struct unaligned_32 *) (pixels))->l); + b = (((const struct unaligned_32 *) (pixels + 1))->l); + l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; + h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + } pixels += 4 - line_size * (h + 1); + block += 4 - line_size * h; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); + +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register int i; + register vector unsigned char + pixelsv1, pixelsv2, pixelsv3, pixelsv4; + register vector unsigned char + blockv, temp1, temp2; + register vector unsigned short + pixelssum1, pixelssum2, temp3, + pixelssum3, pixelssum4, temp4; + register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); + register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); + + temp1 = vec_ld(0, pixels); + temp2 = vec_ld(16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); + if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); + } + pixelsv3 = vec_mergel(vczero, pixelsv1); + pixelsv4 = vec_mergel(vczero, pixelsv2); + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum3 = vec_add((vector unsigned short)pixelsv3, + (vector unsigned short)pixelsv4); + pixelssum3 = vec_add(pixelssum3, vctwo); + pixelssum1 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + pixelssum1 = vec_add(pixelssum1, vctwo); + +POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1); + for (i = 0; i < h ; i++) { + blockv = vec_ld(0, block); + + temp1 = vec_ld(line_size, pixels); + temp2 = vec_ld(line_size + 16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); + if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); + } + + pixelsv3 = vec_mergel(vczero, pixelsv1); + pixelsv4 = vec_mergel(vczero, pixelsv2); + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + + pixelssum4 = vec_add((vector unsigned short)pixelsv3, + (vector unsigned short)pixelsv4); + pixelssum2 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + temp4 = vec_add(pixelssum3, pixelssum4); + temp4 = vec_sra(temp4, vctwo); + temp3 = vec_add(pixelssum1, pixelssum2); + temp3 = vec_sra(temp3, vctwo); + + pixelssum3 = vec_add(pixelssum4, vctwo); + pixelssum1 = vec_add(pixelssum2, vctwo); + + blockv = vec_packsu(temp3, temp4); + + vec_st(blockv, 0, block); + + block += line_size; + pixels += line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} + +/* next one assumes that ((line_size % 16) == 0) */ +void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) +{ +POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int j; +POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); + for (j = 0; j < 4; j++) { + int i; + const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + const uint32_t b = + (((const struct unaligned_32 *) (pixels + 1))->l); + uint32_t l0 = + (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; + uint32_t h0 = + ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + uint32_t l1, h1; + pixels += line_size; + for (i = 0; i < h; i += 2) { + uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); + l1 = (a & 0x03030303UL) + (b & 0x03030303UL); + h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + a = (((const struct unaligned_32 *) (pixels))->l); + b = (((const struct unaligned_32 *) (pixels + 1))->l); + l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; + h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + } pixels += 4 - line_size * (h + 1); + block += 4 - line_size * h; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); + +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register int i; + register vector unsigned char + pixelsv1, pixelsv2, pixelsv3, pixelsv4; + register vector unsigned char + blockv, temp1, temp2; + register vector unsigned short + pixelssum1, pixelssum2, temp3, + pixelssum3, pixelssum4, temp4; + register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); + register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); + register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); + + temp1 = vec_ld(0, pixels); + temp2 = vec_ld(16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); + if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); + } + pixelsv3 = vec_mergel(vczero, pixelsv1); + pixelsv4 = vec_mergel(vczero, pixelsv2); + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum3 = vec_add((vector unsigned short)pixelsv3, + (vector unsigned short)pixelsv4); + pixelssum3 = vec_add(pixelssum3, vcone); + pixelssum1 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + pixelssum1 = vec_add(pixelssum1, vcone); + +POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); + for (i = 0; i < h ; i++) { + blockv = vec_ld(0, block); + + temp1 = vec_ld(line_size, pixels); + temp2 = vec_ld(line_size + 16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); + if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); + } + + pixelsv3 = vec_mergel(vczero, pixelsv1); + pixelsv4 = vec_mergel(vczero, pixelsv2); + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + + pixelssum4 = vec_add((vector unsigned short)pixelsv3, + (vector unsigned short)pixelsv4); + pixelssum2 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + temp4 = vec_add(pixelssum3, pixelssum4); + temp4 = vec_sra(temp4, vctwo); + temp3 = vec_add(pixelssum1, pixelssum2); + temp3 = vec_sra(temp3, vctwo); + + pixelssum3 = vec_add(pixelssum4, vcone); + pixelssum1 = vec_add(pixelssum2, vcone); + + blockv = vec_packsu(temp3, temp4); + + vec_st(blockv, 0, block); + + block += line_size; + pixels += line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} int has_altivec(void) { -#if CONFIG_DARWIN +#ifdef CONFIG_DARWIN int sels[2] = {CTL_HW, HW_VECTORUNIT}; int has_vu = 0; size_t len = sizeof(has_vu); @@ -486,7 +1288,25 @@ int has_altivec(void) err = sysctl(sels, 2, &has_vu, &len, NULL, 0); if (err == 0) return (has_vu != 0); -#endif +#else /* CONFIG_DARWIN */ +/* no Darwin, do it the brute-force way */ +/* this is borrowed from the libmpeg2 library */ + { + signal (SIGILL, sigill_handler); + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, SIG_DFL); + } else { + canjump = 1; + + asm volatile ("mtspr 256, %0\n\t" + "vand %%v0, %%v0, %%v0" + : + : "r" (-1)); + + signal (SIGILL, SIG_DFL); + return 1; + } + } +#endif /* CONFIG_DARWIN */ return 0; } - diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h index d4d259d9e..61dbec548 100644 --- a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h +++ b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h @@ -17,14 +17,79 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#ifndef _DSPUTIL_ALTIVEC_ +#define _DSPUTIL_ALTIVEC_ + +#include "dsputil_ppc.h" + +#ifdef HAVE_ALTIVEC + extern int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); extern int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); extern int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); extern int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); extern int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); +extern int sad16x16_altivec(void *s, uint8_t *a, uint8_t *b, int stride); +extern int sad8x8_altivec(void *s, uint8_t *a, uint8_t *b, int stride); extern int pix_norm1_altivec(uint8_t *pix, int line_size); +extern int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size); +extern int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size); extern int pix_sum_altivec(UINT8 * pix, int line_size); extern void diff_pixels_altivec(DCTELEM* block, const UINT8* s1, const UINT8* s2, int stride); extern void get_pixels_altivec(DCTELEM* block, const UINT8 * pixels, int line_size); +extern void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w); +extern void put_pixels_clamped_altivec(const DCTELEM *block, UINT8 *restrict pixels, int line_size); +extern void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); +extern void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); +extern void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h); +extern void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); +extern void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); +extern void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h); +extern void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h); + +extern void gmc1_altivec(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, int rounder); + extern int has_altivec(void); + +// used to build registers permutation vectors (vcprm) +// the 's' are for words in the _s_econd vector +#define WORD_0 0x00,0x01,0x02,0x03 +#define WORD_1 0x04,0x05,0x06,0x07 +#define WORD_2 0x08,0x09,0x0a,0x0b +#define WORD_3 0x0c,0x0d,0x0e,0x0f +#define WORD_s0 0x10,0x11,0x12,0x13 +#define WORD_s1 0x14,0x15,0x16,0x17 +#define WORD_s2 0x18,0x19,0x1a,0x1b +#define WORD_s3 0x1c,0x1d,0x1e,0x1f + +#ifdef CONFIG_DARWIN +#define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d) +#else +#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} +#endif + +// vcprmle is used to keep the same index as in the SSE version. +// it's the same as vcprm, with the index inversed +// ('le' is Little Endian) +#define vcprmle(a,b,c,d) vcprm(d,c,b,a) + +// used to build inverse/identity vectors (vcii) +// n is _n_egative, p is _p_ositive +#define FLOAT_n -1. +#define FLOAT_p 1. + + +#ifdef CONFIG_DARWIN +#define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d) +#else +#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d} +#endif + +#else /* HAVE_ALTIVEC */ +#ifdef ALTIVEC_USE_REFERENCE_C_CODE +#error "I can't use ALTIVEC_USE_REFERENCE_C_CODE if I don't use HAVE_ALTIVEC" +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +#endif /* HAVE_ALTIVEC */ + +#endif /* _DSPUTIL_ALTIVEC_ */ diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c index 733d0c156..c502f5819 100644 --- a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c +++ b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c @@ -19,18 +19,168 @@ #include "../dsputil.h" +#include "dsputil_ppc.h" + #ifdef HAVE_ALTIVEC #include "dsputil_altivec.h" #endif int mm_flags = 0; +int mm_support(void) +{ + int result = 0; +#if HAVE_ALTIVEC + if (has_altivec()) { + result |= MM_ALTIVEC; + } +#endif /* result */ + return result; +} + +#ifdef POWERPC_TBL_PERFORMANCE_REPORT +unsigned long long perfdata[powerpc_perf_total][powerpc_data_total]; +/* list below must match enum in dsputil_ppc.h */ +static unsigned char* perfname[] = { + "fft_calc_altivec", + "gmc1_altivec", + "dct_unquantize_h263_altivec", + "idct_add_altivec", + "idct_put_altivec", + "put_pixels16_altivec", + "avg_pixels16_altivec", + "avg_pixels8_altivec", + "put_pixels8_xy2_altivec", + "put_no_rnd_pixels8_xy2_altivec", + "put_pixels16_xy2_altivec", + "put_no_rnd_pixels16_xy2_altivec", + "clear_blocks_dcbz32_ppc" +}; +#ifdef POWERPC_PERF_USE_PMC +unsigned long long perfdata_miss[powerpc_perf_total][powerpc_data_total]; +#endif +#include <stdio.h> +#endif + +#ifdef POWERPC_TBL_PERFORMANCE_REPORT +void powerpc_display_perf_report(void) +{ + int i; +#ifndef POWERPC_PERF_USE_PMC + fprintf(stderr, "PowerPC performance report\n Values are from the Time Base register, and represent 4 bus cycles.\n"); +#else /* POWERPC_PERF_USE_PMC */ + fprintf(stderr, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n"); +#endif /* POWERPC_PERF_USE_PMC */ + for(i = 0 ; i < powerpc_perf_total ; i++) + { + if (perfdata[i][powerpc_data_num] != (unsigned long long)0) + fprintf(stderr, " Function \"%s\" (pmc1):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", + perfname[i], + perfdata[i][powerpc_data_min], + perfdata[i][powerpc_data_max], + (double)perfdata[i][powerpc_data_sum] / + (double)perfdata[i][powerpc_data_num], + perfdata[i][powerpc_data_num]); +#ifdef POWERPC_PERF_USE_PMC + if (perfdata_miss[i][powerpc_data_num] != (unsigned long long)0) + fprintf(stderr, " Function \"%s\" (pmc2):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", + perfname[i], + perfdata_miss[i][powerpc_data_min], + perfdata_miss[i][powerpc_data_max], + (double)perfdata_miss[i][powerpc_data_sum] / + (double)perfdata_miss[i][powerpc_data_num], + perfdata_miss[i][powerpc_data_num]); +#endif + } +} +#endif /* POWERPC_TBL_PERFORMANCE_REPORT */ + +/* ***** WARNING ***** WARNING ***** WARNING ***** */ +/* + clear_blocks_dcbz32_ppc will not work properly + on PowerPC processors with a cache line size + not equal to 32 bytes. + Fortunately all processor used by Apple up to + at least the 7450 (aka second generation G4) + use 32 bytes cache line. + This is due to the use of the 'dcbz' instruction. + It simply clear to zero a single cache line, + so you need to know the cache line size to use it ! + It's absurd, but it's fast... +*/ +void clear_blocks_dcbz32_ppc(DCTELEM *blocks) +{ +POWERPC_TBL_DECLARE(powerpc_clear_blocks_dcbz32, 1); + register int misal = ((unsigned long)blocks & 0x00000010); + register int i = 0; +POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz32, 1); +#if 1 + if (misal) { + ((unsigned long*)blocks)[0] = 0L; + ((unsigned long*)blocks)[1] = 0L; + ((unsigned long*)blocks)[2] = 0L; + ((unsigned long*)blocks)[3] = 0L; + i += 16; + } + for ( ; i < sizeof(DCTELEM)*6*64 ; i += 32) { + asm volatile("dcbz %0,%1" : : "r" (blocks), "r" (i) : "memory"); + } + if (misal) { + ((unsigned long*)blocks)[188] = 0L; + ((unsigned long*)blocks)[189] = 0L; + ((unsigned long*)blocks)[190] = 0L; + ((unsigned long*)blocks)[191] = 0L; + i += 16; + } +#else + memset(blocks, 0, sizeof(DCTELEM)*6*64); +#endif +POWERPC_TBL_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); +} + +/* check dcbz report how many bytes are set to 0 by dcbz */ +long check_dcbz_effect(void) +{ + register char *fakedata = (char*)av_malloc(1024); + register char *fakedata_middle; + register long zero = 0; + register long i = 0; + long count = 0; + + if (!fakedata) + { + return 0L; + } + + fakedata_middle = (fakedata + 512); + + memset(fakedata, 0xFF, 1024); + + asm volatile("dcbz %0, %1" : : "r" (fakedata_middle), "r" (zero)); + + for (i = 0; i < 1024 ; i ++) + { + if (fakedata[i] == (char)0) + count++; + } + + av_free(fakedata); + + return count; +} + void dsputil_init_ppc(DSPContext* c, unsigned mask) { // Common optimisations whether Altivec or not - // ... pending ... - + switch (check_dcbz_effect()) { + case 32: + c->clear_blocks = clear_blocks_dcbz32_ppc; + break; + default: + break; + } + #if HAVE_ALTIVEC if (has_altivec()) { mm_flags |= MM_ALTIVEC; @@ -41,12 +191,51 @@ void dsputil_init_ppc(DSPContext* c, unsigned mask) c->pix_abs16x16_xy2 = pix_abs16x16_xy2_altivec; c->pix_abs16x16 = pix_abs16x16_altivec; c->pix_abs8x8 = pix_abs8x8_altivec; + c->sad[0]= sad16x16_altivec; + c->sad[1]= sad8x8_altivec; c->pix_norm1 = pix_norm1_altivec; + c->sse[1]= sse8_altivec; + c->sse[0]= sse16_altivec; c->pix_sum = pix_sum_altivec; c->diff_pixels = diff_pixels_altivec; c->get_pixels = get_pixels_altivec; +// next one disabled as it's untested. +#if 0 + c->add_bytes= add_bytes_altivec; +#endif /* 0 */ + c->put_pixels_tab[0][0] = put_pixels16_altivec; + c->avg_pixels_tab[0][0] = avg_pixels16_altivec; +// next one disabled as it's untested. +#if 0 + c->avg_pixels_tab[1][0] = avg_pixels8_altivec; +#endif /* 0 */ + c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec; + c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec; + c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec; + c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; + + c->gmc1 = gmc1_altivec; + +#ifdef POWERPC_TBL_PERFORMANCE_REPORT + { + int i; + for (i = 0 ; i < powerpc_perf_total ; i++) + { + perfdata[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF; + perfdata[i][powerpc_data_max] = 0x0000000000000000; + perfdata[i][powerpc_data_sum] = 0x0000000000000000; + perfdata[i][powerpc_data_num] = 0x0000000000000000; +#ifdef POWERPC_PERF_USE_PMC + perfdata_miss[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF; + perfdata_miss[i][powerpc_data_max] = 0x0000000000000000; + perfdata_miss[i][powerpc_data_sum] = 0x0000000000000000; + perfdata_miss[i][powerpc_data_num] = 0x0000000000000000; +#endif /* POWERPC_PERF_USE_PMC */ + } + } +#endif /* POWERPC_TBL_PERFORMANCE_REPORT */ } else -#endif +#endif /* HAVE_ALTIVEC */ { // Non-AltiVec PPC optimisations diff --git a/src/libffmpeg/libavcodec/ppc/fft_altivec.c b/src/libffmpeg/libavcodec/ppc/fft_altivec.c index 1a926b77c..992be5b8e 100644 --- a/src/libffmpeg/libavcodec/ppc/fft_altivec.c +++ b/src/libffmpeg/libavcodec/ppc/fft_altivec.c @@ -1,7 +1,7 @@ /* * FFT/IFFT transforms * AltiVec-enabled - * Copyright (c) 2002 Romain Dolbeau <romain@dolbeau.org> + * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> * Based on code Copyright (c) 2002 Fabrice Bellard. * * This library is free software; you can redistribute it and/or @@ -22,30 +22,30 @@ #include "dsputil_altivec.h" -// used to build registers permutation vectors (vcprm) -// the 's' are for words in the _s_econd vector -#define WORD_0 0x00,0x01,0x02,0x03 -#define WORD_1 0x04,0x05,0x06,0x07 -#define WORD_2 0x08,0x09,0x0a,0x0b -#define WORD_3 0x0c,0x0d,0x0e,0x0f -#define WORD_s0 0x10,0x11,0x12,0x13 -#define WORD_s1 0x14,0x15,0x16,0x17 -#define WORD_s2 0x18,0x19,0x1a,0x1b -#define WORD_s3 0x1c,0x1d,0x1e,0x1f - -#define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d) - -// vcprmle is used to keep the same index as in the SSE version. -// it's the same as vcprm, with the index inversed -// ('le' is Little Endian) -#define vcprmle(a,b,c,d) vcprm(d,c,b,a) - -// used to build inverse/identity vectors (vcii) -// n is _n_egative, p is _p_ositive -#define FLOAT_n -1. -#define FLOAT_p 1. +/* + those three macros are from libavcodec/fft.c + and are required for the reference C code +*/ +/* butter fly op */ +#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \ +{\ + FFTSample ax, ay, bx, by;\ + bx=pre1;\ + by=pim1;\ + ax=qre1;\ + ay=qim1;\ + pre = (bx + ax);\ + pim = (by + ay);\ + qre = (bx - ax);\ + qim = (by - ay);\ +} +#define MUL16(a,b) ((a) * (b)) +#define CMUL(pre, pim, are, aim, bre, bim) \ +{\ + pre = (MUL16(are, bre) - MUL16(aim, bim));\ + pim = (MUL16(are, bim) + MUL16(bre, aim));\ +} -#define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d) /** * Do a complex FFT with the parameters defined in fft_init(). The @@ -55,20 +55,94 @@ * This code assumes that the 'z' pointer is 16 bytes-aligned * It also assumes all FFTComplex are 8 bytes-aligned pair of float * The code is exactly the same as the SSE version, except - * that successive MUL + ADD/SUB have been fusionned into + * that successive MUL + ADD/SUB have been merged into * fused multiply-add ('vec_madd' in altivec) - * - * To test this code you can use fft-test in libavcodec ; use - * the following line in libavcodec to compile (MacOS X): - * ##### - * gcc -I. -Ippc -no-cpp-precomp -pipe -O3 -fomit-frame-pointer -mdynamic-no-pic -Wall - * -faltivec -DARCH_POWERPC -DHAVE_ALTIVEC -DCONFIG_DARWIN fft-test.c fft.c - * ppc/fft_altivec.c ppc/dsputil_altivec.c mdct.c -DHAVE_LRINTF -o fft-test - * ##### */ void fft_calc_altivec(FFTContext *s, FFTComplex *z) { - register const vector float vczero = (vector float)( 0., 0., 0., 0.); +POWERPC_TBL_DECLARE(altivec_fft_num, s->nbits >= 6); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int ln = s->nbits; + int j, np, np2; + int nblocks, nloops; + register FFTComplex *p, *q; + FFTComplex *exptab = s->exptab; + int l; + FFTSample tmp_re, tmp_im; + +POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6); + + np = 1 << ln; + + /* pass 0 */ + + p=&z[0]; + j=(np >> 1); + do { + BF(p[0].re, p[0].im, p[1].re, p[1].im, + p[0].re, p[0].im, p[1].re, p[1].im); + p+=2; + } while (--j != 0); + + /* pass 1 */ + + + p=&z[0]; + j=np >> 2; + if (s->inverse) { + do { + BF(p[0].re, p[0].im, p[2].re, p[2].im, + p[0].re, p[0].im, p[2].re, p[2].im); + BF(p[1].re, p[1].im, p[3].re, p[3].im, + p[1].re, p[1].im, -p[3].im, p[3].re); + p+=4; + } while (--j != 0); + } else { + do { + BF(p[0].re, p[0].im, p[2].re, p[2].im, + p[0].re, p[0].im, p[2].re, p[2].im); + BF(p[1].re, p[1].im, p[3].re, p[3].im, + p[1].re, p[1].im, p[3].im, -p[3].re); + p+=4; + } while (--j != 0); + } + /* pass 2 .. ln-1 */ + + nblocks = np >> 3; + nloops = 1 << 2; + np2 = np >> 1; + do { + p = z; + q = z + nloops; + for (j = 0; j < nblocks; ++j) { + BF(p->re, p->im, q->re, q->im, + p->re, p->im, q->re, q->im); + + p++; + q++; + for(l = nblocks; l < np2; l += nblocks) { + CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im); + BF(p->re, p->im, q->re, q->im, + p->re, p->im, tmp_re, tmp_im); + p++; + q++; + } + + p += nloops; + q += nloops; + } + nblocks = nblocks >> 1; + nloops = nloops << 1; + } while (nblocks != 0); + +POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6); + +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ +#ifdef CONFIG_DARWIN + register const vector float vczero = (const vector float)(0.); +#else + register const vector float vczero = (const vector float){0.,0.,0.,0.}; +#endif int ln = s->nbits; int j, np, np2; @@ -77,6 +151,8 @@ void fft_calc_altivec(FFTContext *s, FFTComplex *z) FFTComplex *cptr, *cptr1; int k; +POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6); + np = 1 << ln; { @@ -162,5 +238,8 @@ void fft_calc_altivec(FFTContext *s, FFTComplex *z) nblocks = nblocks >> 1; nloops = nloops << 1; } while (nblocks != 0); -} +POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6); + +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} diff --git a/src/libffmpeg/libavcodec/ppc/idct_altivec.c b/src/libffmpeg/libavcodec/ppc/idct_altivec.c index 8036d403f..1619f1731 100644 --- a/src/libffmpeg/libavcodec/ppc/idct_altivec.c +++ b/src/libffmpeg/libavcodec/ppc/idct_altivec.c @@ -38,6 +38,7 @@ #include <stdlib.h> /* malloc(), free() */ #include <string.h> #include "../dsputil.h" +#include "dsputil_altivec.h" #define vector_s16_t vector signed short #define vector_u16_t vector unsigned short @@ -150,6 +151,8 @@ vx6 = vec_sra (vy6, shift); \ vx7 = vec_sra (vy7, shift); + +#ifdef CONFIG_DARWIN static const vector_s16_t constants[5] = { (vector_s16_t)(23170, 13573, 6518, 21895, -23170, -21895, 32, 31), (vector_s16_t)(16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725), @@ -157,11 +160,30 @@ static const vector_s16_t constants[5] = { (vector_s16_t)(21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692), (vector_s16_t)(19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722) }; +#else +// broken gcc +static const vector_s16_t constants[5] = { + (vector_s16_t){23170, 13573, 6518, 21895, -23170, -21895, 32, 31}, + (vector_s16_t){16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725}, + (vector_s16_t){22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521}, + (vector_s16_t){21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692}, + (vector_s16_t){19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722} +}; +#endif void idct_put_altivec(uint8_t* dest, int stride, vector_s16_t* block) { +POWERPC_TBL_DECLARE(altivec_idct_put_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE +POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1); + void simple_idct_put(UINT8 *dest, int line_size, INT16 *block); + simple_idct_put(dest, stride, (INT16*)block); +POWERPC_TBL_STOP_COUNT(altivec_idct_put_num, 1); +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ vector_u8_t tmp; +POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1); + IDCT #define COPY(dest,src) \ @@ -177,16 +199,28 @@ void idct_put_altivec(uint8_t* dest, int stride, vector_s16_t* block) COPY (dest, vx5) dest += stride; COPY (dest, vx6) dest += stride; COPY (dest, vx7) + +POWERPC_TBL_STOP_COUNT(altivec_idct_put_num, 1); +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } void idct_add_altivec(uint8_t* dest, int stride, vector_s16_t* block) { +POWERPC_TBL_DECLARE(altivec_idct_add_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE +POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1); + void simple_idct_add(UINT8 *dest, int line_size, INT16 *block); + simple_idct_add(dest, stride, (INT16*)block); +POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1); +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ vector_u8_t tmp; vector_s16_t tmp2, tmp3; vector_u8_t perm0; vector_u8_t perm1; vector_u8_t p0, p1, p; +POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1); + IDCT p0 = vec_lvsl (0, dest); @@ -212,5 +246,8 @@ void idct_add_altivec(uint8_t* dest, int stride, vector_s16_t* block) ADD (dest, vx5, perm1) dest += stride; ADD (dest, vx6, perm0) dest += stride; ADD (dest, vx7, perm1) + +POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1); +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } diff --git a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c index bcbc1e6ba..dd898e158 100644 --- a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c +++ b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c @@ -20,10 +20,7 @@ #include <stdio.h> #include "../dsputil.h" #include "../mpegvideo.h" - - -// Used when initializing constant vectors -#define FOUR_INSTANCES(x) x,x,x,x +#include "dsputil_altivec.h" // Swaps two variables (used for altivec registers) #define SWAP(a,b) \ @@ -93,6 +90,13 @@ do { \ vec = vec_splat(vec, 0); \ } + +#ifdef CONFIG_DARWIN +#define FOUROF(a) (a) +#else +// slower, for dumb non-apple GCC +#define FOUROF(a) {a,a,a,a} +#endif int dct_quantize_altivec(MpegEncContext* s, DCTELEM* data, int n, int qscale, int* overflow) @@ -100,7 +104,7 @@ int dct_quantize_altivec(MpegEncContext* s, int lastNonZero; vector float row0, row1, row2, row3, row4, row5, row6, row7; vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7; - const vector float zero = {FOUR_INSTANCES(0.0f)}; + const vector float zero = (const vector float)FOUROF(0.); // Load the data into the row/alt vectors { @@ -144,18 +148,18 @@ int dct_quantize_altivec(MpegEncContext* s, // in the vector local variables, as floats, which we'll use during the // quantize step... { - const vector float vec_0_298631336 = {FOUR_INSTANCES(0.298631336f)}; - const vector float vec_0_390180644 = {FOUR_INSTANCES(-0.390180644f)}; - const vector float vec_0_541196100 = {FOUR_INSTANCES(0.541196100f)}; - const vector float vec_0_765366865 = {FOUR_INSTANCES(0.765366865f)}; - const vector float vec_0_899976223 = {FOUR_INSTANCES(-0.899976223f)}; - const vector float vec_1_175875602 = {FOUR_INSTANCES(1.175875602f)}; - const vector float vec_1_501321110 = {FOUR_INSTANCES(1.501321110f)}; - const vector float vec_1_847759065 = {FOUR_INSTANCES(-1.847759065f)}; - const vector float vec_1_961570560 = {FOUR_INSTANCES(-1.961570560f)}; - const vector float vec_2_053119869 = {FOUR_INSTANCES(2.053119869f)}; - const vector float vec_2_562915447 = {FOUR_INSTANCES(-2.562915447f)}; - const vector float vec_3_072711026 = {FOUR_INSTANCES(3.072711026f)}; + const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f); + const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f); + const vector float vec_0_541196100 = (vector float)FOUROF(0.541196100f); + const vector float vec_0_765366865 = (vector float)FOUROF(0.765366865f); + const vector float vec_0_899976223 = (vector float)FOUROF(-0.899976223f); + const vector float vec_1_175875602 = (vector float)FOUROF(1.175875602f); + const vector float vec_1_501321110 = (vector float)FOUROF(1.501321110f); + const vector float vec_1_847759065 = (vector float)FOUROF(-1.847759065f); + const vector float vec_1_961570560 = (vector float)FOUROF(-1.961570560f); + const vector float vec_2_053119869 = (vector float)FOUROF(2.053119869f); + const vector float vec_2_562915447 = (vector float)FOUROF(-2.562915447f); + const vector float vec_3_072711026 = (vector float)FOUROF(3.072711026f); int whichPass, whichHalf; @@ -309,7 +313,7 @@ int dct_quantize_altivec(MpegEncContext* s, // rounding when we convert to int, instead of flooring.) { vector signed int biasInt; - const vector float negOneFloat = (vector float)(FOUR_INSTANCES(-1.0f)); + const vector float negOneFloat = (vector float)FOUROF(-1.0f); LOAD4(biasInt, biasAddr); bias = vec_ctf(biasInt, QUANT_BIAS_SHIFT); negBias = vec_madd(bias, negOneFloat, zero); @@ -506,4 +510,133 @@ int dct_quantize_altivec(MpegEncContext* s, return lastNonZero; } +#undef FOUROF + +/* + AltiVec version of dct_unquantize_h263 + this code assumes `block' is 16 bytes-aligned +*/ +void dct_unquantize_h263_altivec(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ +POWERPC_TBL_DECLARE(altivec_dct_unquantize_h263_num, 1); + int i, level, qmul, qadd; + int nCoeffs; + + assert(s->block_last_index[n]>=0); + +POWERPC_TBL_START_COUNT(altivec_dct_unquantize_h263_num, 1); + + qadd = (qscale - 1) | 1; + qmul = qscale << 1; + + if (s->mb_intra) { + if (!s->h263_aic) { + if (n < 4) + block[0] = block[0] * s->y_dc_scale; + else + block[0] = block[0] * s->c_dc_scale; + }else + qadd = 0; + i = 1; + nCoeffs= 63; //does not allways use zigzag table + } else { + i = 0; + nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; + } +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + for(;i<=nCoeffs;i++) { + level = block[i]; + if (level) { + if (level < 0) { + level = level * qmul - qadd; + } else { + level = level * qmul + qadd; + } + block[i] = level; + } + } +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + { + register const vector short vczero = (const vector short)vec_splat_s16(0); + short __attribute__ ((aligned(16))) qmul8[] = + { + qmul, qmul, qmul, qmul, + qmul, qmul, qmul, qmul + }; + short __attribute__ ((aligned(16))) qadd8[] = + { + qadd, qadd, qadd, qadd, + qadd, qadd, qadd, qadd + }; + short __attribute__ ((aligned(16))) nqadd8[] = + { + -qadd, -qadd, -qadd, -qadd, + -qadd, -qadd, -qadd, -qadd + }; + register vector short blockv, qmulv, qaddv, nqaddv, temp1; + register vector bool short blockv_null, blockv_neg; + register short backup_0 = block[0]; + register int j = 0; + + qmulv = vec_ld(0, qmul8); + qaddv = vec_ld(0, qadd8); + nqaddv = vec_ld(0, nqadd8); + +#if 0 // block *is* 16 bytes-aligned, it seems. + // first make sure block[j] is 16 bytes-aligned + for(j = 0; (j <= nCoeffs) && ((((unsigned long)block) + (j << 1)) & 0x0000000F) ; j++) { + level = block[j]; + if (level) { + if (level < 0) { + level = level * qmul - qadd; + } else { + level = level * qmul + qadd; + } + block[j] = level; + } + } +#endif + + // vectorize all the 16 bytes-aligned blocks + // of 8 elements + for(; (j + 7) <= nCoeffs ; j+=8) + { + blockv = vec_ld(j << 1, block); + blockv_neg = vec_cmplt(blockv, vczero); + blockv_null = vec_cmpeq(blockv, vczero); + // choose between +qadd or -qadd as the third operand + temp1 = vec_sel(qaddv, nqaddv, blockv_neg); + // multiply & add (block{i,i+7} * qmul [+-] qadd) + temp1 = vec_mladd(blockv, qmulv, temp1); + // put 0 where block[{i,i+7} used to have 0 + blockv = vec_sel(temp1, blockv, blockv_null); + vec_st(blockv, j << 1, block); + } + + // if nCoeffs isn't a multiple of 8, finish the job + // using good old scalar units. + // (we could do it using a truncated vector, + // but I'm not sure it's worth the hassle) + for(; j <= nCoeffs ; j++) { + level = block[j]; + if (level) { + if (level < 0) { + level = level * qmul - qadd; + } else { + level = level * qmul + qadd; + } + block[j] = level; + } + } + + if (i == 1) + { // cheat. this avoid special-casing the first iteration + block[0] = backup_0; + } + } +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ + +POWERPC_TBL_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63); +} diff --git a/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c b/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c index 94d608b63..9757f5f39 100644 --- a/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c +++ b/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c @@ -27,6 +27,8 @@ extern int dct_quantize_altivec(MpegEncContext *s,
DCTELEM *block, int n,
int qscale, int *overflow);
+extern void dct_unquantize_h263_altivec(MpegEncContext *s, + DCTELEM *block, int n, int qscale); extern void idct_put_altivec(UINT8 *dest, int line_size, INT16 *block);
extern void idct_add_altivec(UINT8 *dest, int line_size, INT16 *block);
@@ -42,7 +44,11 @@ void MPV_common_init_ppc(MpegEncContext *s) {
s->idct_put = idct_put_altivec;
s->idct_add = idct_add_altivec;
+#ifndef ALTIVEC_USE_REFERENCE_C_CODE s->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + s->idct_permutation_type = FF_NO_IDCT_PERM; +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ }
// Test to make sure that the dct required alignments are met.
@@ -66,6 +72,7 @@ void MPV_common_init_ppc(MpegEncContext *s) (s->avctx->dct_algo == FF_DCT_ALTIVEC))
{
s->dct_quantize = dct_quantize_altivec;
+ s->dct_unquantize_h263 = dct_unquantize_h263_altivec; }
} else
#endif
diff --git a/src/libffmpeg/libavcodec/ratecontrol.c b/src/libffmpeg/libavcodec/ratecontrol.c index bda408dfe..6bcbe1c67 100644 --- a/src/libffmpeg/libavcodec/ratecontrol.c +++ b/src/libffmpeg/libavcodec/ratecontrol.c @@ -751,8 +751,8 @@ static int init_pass2(MpegEncContext *s) } //printf("%lld %lld %lld %lld\n", available_bits[I_TYPE], available_bits[P_TYPE], available_bits[B_TYPE], all_available_bits); - qscale= malloc(sizeof(double)*rcc->num_entries); - blured_qscale= malloc(sizeof(double)*rcc->num_entries); + qscale= av_malloc(sizeof(double)*rcc->num_entries); + blured_qscale= av_malloc(sizeof(double)*rcc->num_entries); for(step=256*256; step>0.0000001; step*=0.5){ expected_bits=0; @@ -809,8 +809,8 @@ static int init_pass2(MpegEncContext *s) // printf("%f %d %f\n", expected_bits, (int)all_available_bits, rate_factor); if(expected_bits > all_available_bits) rate_factor-= step; } - free(qscale); - free(blured_qscale); + av_free(qscale); + av_free(blured_qscale); if(abs(expected_bits/all_available_bits - 1.0) > 0.01 ){ fprintf(stderr, "Error: 2pass curve failed to converge\n"); diff --git a/src/libffmpeg/libavcodec/rv10.c b/src/libffmpeg/libavcodec/rv10.c index 4907c2347..012b1dc5c 100644 --- a/src/libffmpeg/libavcodec/rv10.c +++ b/src/libffmpeg/libavcodec/rv10.c @@ -395,7 +395,7 @@ static int rv10_decode_packet(AVCodecContext *avctx, MpegEncContext *s = avctx->priv_data; int i, mb_count, mb_pos, left; - init_get_bits(&s->gb, buf, buf_size); + init_get_bits(&s->gb, buf, buf_size*8); mb_count = rv10_decode_picture_header(s); if (mb_count < 0) { diff --git a/src/libffmpeg/libavcodec/simple_idct.c b/src/libffmpeg/libavcodec/simple_idct.c index 8c9ce7b93..703e94f21 100644 --- a/src/libffmpeg/libavcodec/simple_idct.c +++ b/src/libffmpeg/libavcodec/simple_idct.c @@ -67,7 +67,7 @@ #endif -static inline void idctRowCondDC (int16_t * row) +static inline void idctRowCondDC (DCTELEM * row) { int a0, a1, a2, a3, b0, b1, b2, b3; #ifdef FAST_64BIT @@ -82,26 +82,40 @@ static inline void idctRowCondDC (int16_t * row) #else #define ROW0_MASK 0xffffLL #endif - if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | - ((uint64_t *)row)[1]) == 0) { - temp = (row[0] << 3) & 0xffff; - temp += temp << 16; - temp += temp << 32; - ((uint64_t *)row)[0] = temp; - ((uint64_t *)row)[1] = temp; - return; - } + if(sizeof(DCTELEM)==2){ + if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | + ((uint64_t *)row)[1]) == 0) { + temp = (row[0] << 3) & 0xffff; + temp += temp << 16; + temp += temp << 32; + ((uint64_t *)row)[0] = temp; + ((uint64_t *)row)[1] = temp; + return; + } + }else{ + if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { + row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; + return; + } + } #else - if (!(((uint32_t*)row)[1] | - ((uint32_t*)row)[2] | - ((uint32_t*)row)[3] | - row[1])) { - temp = (row[0] << 3) & 0xffff; - temp += temp << 16; - ((uint32_t*)row)[0]=((uint32_t*)row)[1] = - ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; - return; - } + if(sizeof(DCTELEM)==2){ + if (!(((uint32_t*)row)[1] | + ((uint32_t*)row)[2] | + ((uint32_t*)row)[3] | + row[1])) { + temp = (row[0] << 3) & 0xffff; + temp += temp << 16; + ((uint32_t*)row)[0]=((uint32_t*)row)[1] = + ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; + return; + } + }else{ + if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { + row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; + return; + } + } #endif a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); @@ -159,7 +173,7 @@ static inline void idctRowCondDC (int16_t * row) } static inline void idctSparseColPut (UINT8 *dest, int line_size, - int16_t * col) + DCTELEM * col) { int a0, a1, a2, a3, b0, b1, b2, b3; UINT8 *cm = cropTbl + MAX_NEG_CROP; @@ -231,7 +245,7 @@ static inline void idctSparseColPut (UINT8 *dest, int line_size, } static inline void idctSparseColAdd (UINT8 *dest, int line_size, - int16_t * col) + DCTELEM * col) { int a0, a1, a2, a3, b0, b1, b2, b3; UINT8 *cm = cropTbl + MAX_NEG_CROP; @@ -302,7 +316,7 @@ static inline void idctSparseColAdd (UINT8 *dest, int line_size, dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; } -static inline void idctSparseCol (int16_t * col) +static inline void idctSparseCol (DCTELEM * col) { int a0, a1, a2, a3, b0, b1, b2, b3; @@ -365,7 +379,7 @@ static inline void idctSparseCol (int16_t * col) col[56] = ((a0 - b0) >> COL_SHIFT); } -void simple_idct_put(UINT8 *dest, int line_size, INT16 *block) +void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block) { int i; for(i=0; i<8; i++) @@ -375,7 +389,7 @@ void simple_idct_put(UINT8 *dest, int line_size, INT16 *block) idctSparseColPut(dest + i, line_size, block + i); } -void simple_idct_add(UINT8 *dest, int line_size, INT16 *block) +void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block) { int i; for(i=0; i<8; i++) @@ -385,7 +399,7 @@ void simple_idct_add(UINT8 *dest, int line_size, INT16 *block) idctSparseColAdd(dest + i, line_size, block + i); } -void simple_idct(INT16 *block) +void simple_idct(DCTELEM *block) { int i; for(i=0; i<8; i++) @@ -406,7 +420,7 @@ void simple_idct(INT16 *block) and the butterfly must be multiplied by 0.5 * sqrt(2.0) */ #define C_SHIFT (4+1+12) -static inline void idct4col(UINT8 *dest, int line_size, const INT16 *col) +static inline void idct4col(UINT8 *dest, int line_size, const DCTELEM *col) { int c0, c1, c2, c3, a0, a1, a2, a3; const UINT8 *cm = cropTbl + MAX_NEG_CROP; @@ -443,10 +457,10 @@ static inline void idct4col(UINT8 *dest, int line_size, const INT16 *col) /* XXX: I think a 1.0/sqrt(2) normalization should be needed to compensate the extra butterfly stage - I don't have the full DV specification */ -void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block) +void simple_idct248_put(UINT8 *dest, int line_size, DCTELEM *block) { int i; - INT16 *ptr; + DCTELEM *ptr; /* butterfly */ ptr = block; @@ -486,7 +500,7 @@ void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block) #define C2 C_FIX(0.2705980501) #define C3 C_FIX(0.5) #define C_SHIFT (4+1+12) -static inline void idct4col_add(UINT8 *dest, int line_size, const INT16 *col) +static inline void idct4col_add(UINT8 *dest, int line_size, const DCTELEM *col) { int c0, c1, c2, c3, a0, a1, a2, a3; const UINT8 *cm = cropTbl + MAX_NEG_CROP; @@ -514,7 +528,7 @@ static inline void idct4col_add(UINT8 *dest, int line_size, const INT16 *col) #define R2 R_FIX(0.2705980501) #define R3 R_FIX(0.5) #define R_SHIFT 11 -static inline void idct4row(INT16 *row) +static inline void idct4row(DCTELEM *row) { int c0, c1, c2, c3, a0, a1, a2, a3; const UINT8 *cm = cropTbl + MAX_NEG_CROP; @@ -533,7 +547,7 @@ static inline void idct4row(INT16 *row) row[3]= (c0 - c1) >> R_SHIFT; } -void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block) +void simple_idct84_add(UINT8 *dest, int line_size, DCTELEM *block) { int i; @@ -548,7 +562,7 @@ void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block) } } -void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block) +void simple_idct48_add(UINT8 *dest, int line_size, DCTELEM *block) { int i; diff --git a/src/libffmpeg/libavcodec/simple_idct.h b/src/libffmpeg/libavcodec/simple_idct.h index 428c6072c..0ee1e05ed 100644 --- a/src/libffmpeg/libavcodec/simple_idct.h +++ b/src/libffmpeg/libavcodec/simple_idct.h @@ -18,14 +18,14 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -void simple_idct_put(UINT8 *dest, int line_size, INT16 *block); -void simple_idct_add(UINT8 *dest, int line_size, INT16 *block); -void ff_simple_idct_mmx(short *block); -void ff_simple_idct_add_mmx(UINT8 *dest, int line_size, INT16 *block); -void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, INT16 *block); -void simple_idct(short *block); +void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block); +void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block); +void ff_simple_idct_mmx(int16_t *block); +void ff_simple_idct_add_mmx(UINT8 *dest, int line_size, int16_t *block); +void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, int16_t *block); +void simple_idct(DCTELEM *block); -void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block); +void simple_idct248_put(UINT8 *dest, int line_size, DCTELEM *block); -void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block); -void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block); +void simple_idct84_add(UINT8 *dest, int line_size, DCTELEM *block); +void simple_idct48_add(UINT8 *dest, int line_size, DCTELEM *block); diff --git a/src/libffmpeg/libavcodec/svq1.c b/src/libffmpeg/libavcodec/svq1.c index 77035f1f9..5a9a290b8 100644 --- a/src/libffmpeg/libavcodec/svq1.c +++ b/src/libffmpeg/libavcodec/svq1.c @@ -1066,7 +1066,7 @@ static int svq1_decode_frame(AVCodecContext *avctx, AVFrame *pict = data; /* initialize bit buffer */ - init_get_bits(&s->gb,buf,buf_size); + init_get_bits(&s->gb,buf,buf_size*8); /* decode frame header */ s->f_code = get_bits (&s->gb, 22); @@ -1093,6 +1093,10 @@ static int svq1_decode_frame(AVCodecContext *avctx, return result; } + //FIXME this avoids some confusion for "B frames" without 2 references + //this should be removed after libavcodec can handle more flaxible picture types & ordering + if(s->pict_type==B_TYPE && s->last_picture.data[0]==NULL) return buf_size; + if(avctx->hurry_up && s->pict_type==B_TYPE) return buf_size; if(MPV_frame_start(s, avctx) < 0) diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c index af6ba986b..ca71807f7 100644 --- a/src/libffmpeg/libavcodec/utils.c +++ b/src/libffmpeg/libavcodec/utils.c @@ -24,8 +24,6 @@ void *av_mallocz(unsigned int size) { void *ptr; - if(size == 0) fprintf(stderr, "Warning, allocating 0 bytes\n"); - ptr = av_malloc(size); if (!ptr) return NULL; @@ -33,6 +31,32 @@ void *av_mallocz(unsigned int size) return ptr; } +char *av_strdup(const char *s) +{ + char *ptr; + int len; + len = strlen(s) + 1; + ptr = av_malloc(len); + if (!ptr) + return NULL; + memcpy(ptr, s, len); + return ptr; +} + +/** + * realloc which does nothing if the block is large enough + */ +void *av_fast_realloc(void *ptr, int *size, int min_size) +{ + if(min_size < *size) + return ptr; + + *size= min_size + 10*1024; + + return av_realloc(ptr, *size); +} + + /* allocation of static arrays - do not use for normal allocation */ static unsigned int last_static = 0; static char*** array_static = NULL; @@ -47,7 +71,7 @@ void *__av_mallocz_static(void** location, unsigned int size) if (location) { if (l > last_static) - array_static = realloc(array_static, l); + array_static = av_realloc(array_static, l); array_static[last_static++] = (char**) location; *location = ptr; } @@ -61,10 +85,10 @@ void av_free_static() unsigned i; for (i = 0; i < last_static; i++) { - free(*array_static[i]); + av_free(*array_static[i]); *array_static[i] = NULL; } - free(array_static); + av_free(array_static); array_static = 0; } last_static = 0; @@ -89,32 +113,6 @@ void register_avcodec(AVCodec *format) format->next = NULL; } -void avcodec_get_chroma_sub_sample(int fmt, int *h_shift, int *v_shift){ - switch(fmt){ - case PIX_FMT_YUV410P: - *h_shift=2; - *v_shift=2; - break; - case PIX_FMT_YUV420P: - *h_shift=1; - *v_shift=1; - break; - case PIX_FMT_YUV411P: - *h_shift=2; - *v_shift=0; - break; - case PIX_FMT_YUV422P: - case PIX_FMT_YUV422: - *h_shift=1; - *v_shift=0; - break; - default: //RGB/... - *h_shift=0; - *v_shift=0; - break; - } -} - typedef struct DefaultPicOpaque{ int last_pic_num; uint8_t *data[4]; @@ -125,10 +123,10 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){ const int width = s->width; const int height= s->height; DefaultPicOpaque *opaque; - +/* assert(pic->data[0]==NULL); - /* assert(pic->type==0 || pic->type==FF_TYPE_INTERNAL); */ - + assert(pic->type==0 || pic->type==FF_TYPE_INTERNAL); +*/ if(pic->opaque){ opaque= (DefaultPicOpaque *)pic->opaque; for(i=0; i<3; i++) @@ -152,7 +150,6 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){ case PIX_FMT_BGR24: pixel_size=3; break; - case PIX_FMT_BGRA32: case PIX_FMT_RGBA32: pixel_size=4; break; @@ -212,6 +209,10 @@ void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic){ //printf("R%X\n", pic->opaque); } +enum PixelFormat avcodec_default_get_format(struct AVCodecContext *s, enum PixelFormat * fmt){ + return fmt[0]; +} + void avcodec_get_context_defaults(AVCodecContext *s){ s->bit_rate= 800*1000; s->bit_rate_tolerance= s->bit_rate*10; @@ -234,6 +235,7 @@ void avcodec_get_context_defaults(AVCodecContext *s){ s->me_method= ME_EPZS; s->get_buffer= avcodec_default_get_buffer; s->release_buffer= avcodec_default_release_buffer; + s->get_format= avcodec_default_get_format; s->me_subpel_quality=8; } @@ -410,19 +412,6 @@ AVCodec *avcodec_find(enum CodecID id) return NULL; } -const char *pix_fmt_str[] = { - "yuv420p", - "yuv422", - "rgb24", - "bgr24", - "yuv422p", - "yuv444p", - "rgba32", - "bgra32", - "yuv410p", - "yuv411p", -}; - void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode) { const char *codec_name; @@ -462,7 +451,7 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode) if (enc->codec_id == CODEC_ID_RAWVIDEO) { snprintf(buf + strlen(buf), buf_size - strlen(buf), ", %s", - pix_fmt_str[enc->pix_fmt]); + avcodec_get_pix_fmt_name(enc->pix_fmt)); } if (enc->width) { snprintf(buf + strlen(buf), buf_size - strlen(buf), @@ -537,99 +526,6 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode) } } -/* Picture field are filled with 'ptr' addresses */ -void avpicture_fill(AVPicture *picture, UINT8 *ptr, - int pix_fmt, int width, int height) -{ - int size; - - size = width * height; - switch(pix_fmt) { - case PIX_FMT_YUV420P: - picture->data[0] = ptr; - picture->data[1] = picture->data[0] + size; - picture->data[2] = picture->data[1] + size / 4; - picture->linesize[0] = width; - picture->linesize[1] = width / 2; - picture->linesize[2] = width / 2; - break; - case PIX_FMT_YUV422P: - picture->data[0] = ptr; - picture->data[1] = picture->data[0] + size; - picture->data[2] = picture->data[1] + size / 2; - picture->linesize[0] = width; - picture->linesize[1] = width / 2; - picture->linesize[2] = width / 2; - break; - case PIX_FMT_YUV444P: - picture->data[0] = ptr; - picture->data[1] = picture->data[0] + size; - picture->data[2] = picture->data[1] + size; - picture->linesize[0] = width; - picture->linesize[1] = width; - picture->linesize[2] = width; - break; - case PIX_FMT_RGB24: - case PIX_FMT_BGR24: - picture->data[0] = ptr; - picture->data[1] = NULL; - picture->data[2] = NULL; - picture->linesize[0] = width * 3; - break; - case PIX_FMT_RGBA32: - case PIX_FMT_BGRA32: - picture->data[0] = ptr; - picture->data[1] = NULL; - picture->data[2] = NULL; - picture->linesize[0] = width * 4; - break; - case PIX_FMT_YUV422: - picture->data[0] = ptr; - picture->data[1] = NULL; - picture->data[2] = NULL; - picture->linesize[0] = width * 2; - break; - default: - picture->data[0] = NULL; - picture->data[1] = NULL; - picture->data[2] = NULL; - break; - } -} - -int avpicture_get_size(int pix_fmt, int width, int height) -{ - int size; - - size = width * height; - switch(pix_fmt) { - case PIX_FMT_YUV420P: - size = (size * 3) / 2; - break; - case PIX_FMT_YUV422P: - size = (size * 2); - break; - case PIX_FMT_YUV444P: - size = (size * 3); - break; - case PIX_FMT_RGB24: - case PIX_FMT_BGR24: - size = (size * 3); - break; - case PIX_FMT_RGBA32: - case PIX_FMT_BGRA32: - size = (size * 4); - break; - case PIX_FMT_YUV422: - size = (size * 2); - break; - default: - size = -1; - break; - } - return size; -} - unsigned avcodec_version( void ) { return LIBAVCODEC_VERSION_INT; diff --git a/src/libffmpeg/libavcodec/wmadec.c b/src/libffmpeg/libavcodec/wmadec.c index a6fa2f8b2..5305e1c5d 100644 --- a/src/libffmpeg/libavcodec/wmadec.c +++ b/src/libffmpeg/libavcodec/wmadec.c @@ -92,7 +92,7 @@ typedef struct WMADecodeContext { int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; float coefs[MAX_CHANNELS][BLOCK_MAX_SIZE] __attribute__((aligned(16))); MDCTContext mdct_ctx[BLOCK_NB_SIZES]; - float *windows[BLOCK_NB_SIZES] __attribute__((aligned(16))); + float *windows[BLOCK_NB_SIZES]; FFTSample mdct_tmp[BLOCK_MAX_SIZE] __attribute__((aligned(16))); /* temporary storage for imdct */ /* output buffer for one frame and the last for IMDCT windowing */ float frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2] __attribute__((aligned(16))); @@ -212,8 +212,8 @@ static void init_coef_vlc(VLC *vlc, init_vlc(vlc, 9, n, table_bits, 1, 1, table_codes, 4, 4); - run_table = malloc(n * sizeof(uint16_t)); - level_table = malloc(n * sizeof(uint16_t)); + run_table = av_malloc(n * sizeof(uint16_t)); + level_table = av_malloc(n * sizeof(uint16_t)); p = levels_table; i = 2; level = 1; @@ -1226,7 +1226,7 @@ static int wma_decode_superframe(AVCodecContext *avctx, samples = data; - init_get_bits(&s->gb, buf, buf_size); + init_get_bits(&s->gb, buf, buf_size*8); if (s->use_bit_reservoir) { /* read super frame header */ @@ -1252,7 +1252,7 @@ static int wma_decode_superframe(AVCodecContext *avctx, } /* XXX: bit_offset bits into last frame */ - init_get_bits(&s->gb, s->last_superframe, MAX_CODED_SUPERFRAME_SIZE); + init_get_bits(&s->gb, s->last_superframe, MAX_CODED_SUPERFRAME_SIZE*8); /* skip unused bits */ if (s->last_bitoffset > 0) skip_bits(&s->gb, s->last_bitoffset); @@ -1265,7 +1265,7 @@ static int wma_decode_superframe(AVCodecContext *avctx, /* read each frame starting from bit_offset */ pos = bit_offset + 4 + 4 + s->byte_offset_bits + 3; - init_get_bits(&s->gb, buf + (pos >> 3), MAX_CODED_SUPERFRAME_SIZE - (pos >> 3)); + init_get_bits(&s->gb, buf + (pos >> 3), (MAX_CODED_SUPERFRAME_SIZE - (pos >> 3))*8); len = pos & 7; if (len > 0) skip_bits(&s->gb, len); diff --git a/src/libffmpeg/libavcodec/wmv2.c b/src/libffmpeg/libavcodec/wmv2.c index d25b7a5f1..6def6f2a8 100644 --- a/src/libffmpeg/libavcodec/wmv2.c +++ b/src/libffmpeg/libavcodec/wmv2.c @@ -313,7 +313,7 @@ static int decode_ext_header(Wmv2Context *w){ if(s->avctx->extradata_size<4) return -1; - init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size); + init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size*8); fps = get_bits(&gb, 5); s->bit_rate = get_bits(&gb, 11)*1024; @@ -330,8 +330,9 @@ static int decode_ext_header(Wmv2Context *w){ s->slice_height = s->mb_height / code; if(s->avctx->debug&FF_DEBUG_PICT_INFO){ - printf("fps:%d, br:%d, qpbit:%d, abt_flag:%d, j_type_bit:%d, tl_mv_flag:%d, mbrl_bit:%d, code:%d, flag3:%d\n", - fps, s->bit_rate, w->mspel_bit, w->abt_flag, w->j_type_bit, w->top_left_mv_flag, w->per_mb_rl_bit, code, w->flag3); + printf("fps:%d, br:%d, qpbit:%d, abt_flag:%d, j_type_bit:%d, tl_mv_flag:%d, mbrl_bit:%d, code:%d, flag3:%d, slices:%d\n", + fps, s->bit_rate, w->mspel_bit, w->abt_flag, w->j_type_bit, w->top_left_mv_flag, w->per_mb_rl_bit, code, w->flag3, + code); } return 0; } @@ -503,8 +504,7 @@ static int16_t *wmv2_pred_motion(Wmv2Context *w, int *px, int *py){ diff= FFMAX(ABS(A[0] - B[0]), ABS(A[1] - B[1])); - if(s->mb_x && s->mb_y && !s->mspel && w->top_left_mv_flag && diff >= 8) - //FIXME top/left bit too if y=!0 && first_slice_line? + if(s->mb_x && !s->first_slice_line && !s->mspel && w->top_left_mv_flag && diff >= 8) type= get_bits1(&s->gb); else type= 2; @@ -577,16 +577,7 @@ static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int st MpegEncContext * const s= &w->s; uint8_t temp[2][64]; int i; - - if(w->abt_type_table[n] && 0){ - int a,b; - a= block1[0]; - b= w->abt_block2[n][0]; - block1[0]= a+b; - w->abt_block2[n][0]= a-b; - } - switch(w->abt_type_table[n]){ case 0: if (s->block_last_index[n] >= 0) { diff --git a/src/libffmpeg/xine_decoder.c b/src/libffmpeg/xine_decoder.c index 8be7844c2..fe1aca44d 100644 --- a/src/libffmpeg/xine_decoder.c +++ b/src/libffmpeg/xine_decoder.c @@ -17,7 +17,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * - * $Id: xine_decoder.c,v 1.87 2003/01/10 23:33:09 holstsn Exp $ + * $Id: xine_decoder.c,v 1.88 2003/01/31 18:29:43 miguelfreitas Exp $ * * xine decoder plugin using ffmpeg * @@ -667,7 +667,7 @@ void avcodec_register_all(void) register_avcodec(&rv10_decoder); register_avcodec(&svq1_decoder); register_avcodec(&dvvideo_decoder); - // register_avcodec(&dvaudio_decoder); + register_avcodec(&dvaudio_decoder); register_avcodec(&mjpeg_decoder); register_avcodec(&mjpegb_decoder); register_avcodec(&mp2_decoder); @@ -810,6 +810,16 @@ static void ff_audio_decode_data (audio_decoder_t *this_gen, buf_element_t *buf) this->stream->meta_info[XINE_META_INFO_AUDIOCODEC] = strdup ("Windows Media Audio v2 (ffmpeg)"); break; + case BUF_AUDIO_DV: + this->codec = avcodec_find_decoder (CODEC_ID_DVAUDIO); + this->stream->meta_info[XINE_META_INFO_AUDIOCODEC] + = strdup ("DV Audio (ffmpeg)"); + break; + case BUF_AUDIO_MPEG: + this->codec = avcodec_find_decoder (CODEC_ID_MP3LAME); + this->stream->meta_info[XINE_META_INFO_AUDIOCODEC] + = strdup ("MP3 (ffmpeg)"); + break; } if (!this->codec) { @@ -1034,14 +1044,14 @@ static uint32_t supported_video_types[] = { BUF_VIDEO_MSMPEG4_V2, BUF_VIDEO_MSMPEG4_V3, BUF_VIDEO_WMV7, - /*BUF_VIDEO_WMV8,*/ + /* BUF_VIDEO_WMV8, */ BUF_VIDEO_MPEG4, BUF_VIDEO_XVID, BUF_VIDEO_DIVX5, BUF_VIDEO_MJPEG, BUF_VIDEO_H263, BUF_VIDEO_RV10, - /* BUF_VIDEO_SORENSON_V1, -- ffmpeg svq1 decoder is segfaulting */ + BUF_VIDEO_SORENSON_V1, BUF_VIDEO_JPEG, BUF_VIDEO_MPEG, BUF_VIDEO_DV, @@ -1051,6 +1061,8 @@ static uint32_t supported_video_types[] = { static uint32_t supported_audio_types[] = { BUF_AUDIO_WMAV1, BUF_AUDIO_WMAV2, + BUF_AUDIO_DV, + /* BUF_AUDIO_MPEG, */ 0 }; diff --git a/src/libmad/xine_decoder.c b/src/libmad/xine_decoder.c index b53682387..a4f30eee3 100644 --- a/src/libmad/xine_decoder.c +++ b/src/libmad/xine_decoder.c @@ -17,7 +17,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * - * $Id: xine_decoder.c,v 1.40 2003/01/11 11:29:22 esnel Exp $ + * $Id: xine_decoder.c,v 1.41 2003/01/31 18:29:47 miguelfreitas Exp $ * * stuff needed to turn libmad into a xine decoder plugin */ @@ -355,7 +355,7 @@ static uint32_t audio_types[] = { static decoder_info_t dec_info_audio = { audio_types, /* supported types */ - 5 /* priority */ + 6 /* priority */ }; plugin_info_t xine_plugin_info[] = { diff --git a/src/libxinevdec/svq1.c b/src/libxinevdec/svq1.c index 935172ff2..c92795b6a 100644 --- a/src/libxinevdec/svq1.c +++ b/src/libxinevdec/svq1.c @@ -17,7 +17,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * - * $Id: svq1.c,v 1.23 2003/01/08 01:02:32 miguelfreitas Exp $ + * $Id: svq1.c,v 1.24 2003/01/31 18:29:47 miguelfreitas Exp $ */ #include <stdio.h> @@ -1495,7 +1495,7 @@ static uint32_t video_types[] = { static decoder_info_t dec_info_video = { video_types, /* supported types */ - 4 /* priority */ + 6 /* priority */ }; plugin_info_t xine_plugin_info[] = { diff --git a/src/xine-engine/buffer.h b/src/xine-engine/buffer.h index 64bbaa8d2..057e3222b 100644 --- a/src/xine-engine/buffer.h +++ b/src/xine-engine/buffer.h @@ -17,7 +17,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * - * $Id: buffer.h,v 1.98 2003/01/26 23:36:46 f1rmb Exp $ + * $Id: buffer.h,v 1.99 2003/01/31 18:29:47 miguelfreitas Exp $ * * * contents: @@ -193,6 +193,7 @@ extern "C" { #define BUF_AUDIO_DIALOGIC_IMA 0x032A0000 #define BUF_AUDIO_NSF 0x032B0000 #define BUF_AUDIO_FLAC 0x032C0000 +#define BUF_AUDIO_DV 0x032D0000 /* spu buffer types: */ diff --git a/src/xine-engine/buffer_types.c b/src/xine-engine/buffer_types.c index 5b82579ed..9850e4a68 100644 --- a/src/xine-engine/buffer_types.c +++ b/src/xine-engine/buffer_types.c @@ -17,7 +17,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * - * $Id: buffer_types.c,v 1.52 2003/01/23 16:12:19 miguelfreitas Exp $ + * $Id: buffer_types.c,v 1.53 2003/01/31 18:29:47 miguelfreitas Exp $ * * * contents: @@ -802,6 +802,13 @@ static audio_db_t audio_db[] = { BUF_AUDIO_FLAC, "Free Lossless Audio Codec (FLAC)" }, +{ + { + 0 + }, + BUF_AUDIO_DV, + "DV Audio" +}, { { 0 }, 0, "last entry" } }; |