summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/libffmpeg/libavcodec/Makefile.am34
-rw-r--r--src/libffmpeg/libavcodec/avcodec.h67
-rw-r--r--src/libffmpeg/libavcodec/common.c46
-rw-r--r--src/libffmpeg/libavcodec/common.h61
-rw-r--r--src/libffmpeg/libavcodec/dsputil.c91
-rw-r--r--src/libffmpeg/libavcodec/dsputil.h59
-rw-r--r--src/libffmpeg/libavcodec/dsputil_mlib.c136
-rw-r--r--src/libffmpeg/libavcodec/dsputil_mmx.c1057
-rw-r--r--src/libffmpeg/libavcodec/dsputil_mmx_avg.h344
-rw-r--r--src/libffmpeg/libavcodec/h263.c353
-rw-r--r--src/libffmpeg/libavcodec/h263data.h23
-rw-r--r--src/libffmpeg/libavcodec/h263dec.c45
-rw-r--r--src/libffmpeg/libavcodec/idct_mlib.c38
-rw-r--r--src/libffmpeg/libavcodec/idct_mmx.c597
-rw-r--r--src/libffmpeg/libavcodec/imgconvert.c548
-rw-r--r--src/libffmpeg/libavcodec/imgresample.c619
-rw-r--r--src/libffmpeg/libavcodec/mjpeg.c25
-rw-r--r--src/libffmpeg/libavcodec/mpeg12.c25
-rw-r--r--src/libffmpeg/libavcodec/mpegvideo.c65
-rw-r--r--src/libffmpeg/libavcodec/mpegvideo.h28
-rw-r--r--src/libffmpeg/libavcodec/mpegvideo_mmx.c232
-rw-r--r--src/libffmpeg/libavcodec/msmpeg4.c4
-rw-r--r--src/libffmpeg/libavcodec/rv10.c2
-rw-r--r--src/libffmpeg/libavcodec/sad_mmx.s799
-rw-r--r--src/libffmpeg/libavcodec/simple_idct.c231
-rw-r--r--src/libffmpeg/libavcodec/simple_idct.h20
-rw-r--r--src/libffmpeg/libavcodec/utils.c91
27 files changed, 2183 insertions, 3457 deletions
diff --git a/src/libffmpeg/libavcodec/Makefile.am b/src/libffmpeg/libavcodec/Makefile.am
index c2ffc84bc..8e2a6a4df 100644
--- a/src/libffmpeg/libavcodec/Makefile.am
+++ b/src/libffmpeg/libavcodec/Makefile.am
@@ -2,6 +2,8 @@
## Process this file with automake to produce Makefile.in
##
+SUBDIRS = armv4l i386 mlib
+
#CFLAGS = -D_FILE_OFFSET_BITS=64 @GLOBAL_CFLAGS@ -DCONFIG_DECODERS -DHAVE_AV_CONFIG_H
CFLAGS = @GLOBAL_CFLAGS@ @LIBFFMPEG_CFLAGS@ -DCONFIG_DECODERS -DHAVE_AV_CONFIG_H
@@ -11,28 +13,22 @@ LIBTOOL = $(SHELL) $(top_builddir)/libtool-nofpic
noinst_LTLIBRARIES = libavcodec.la
-if HAVE_FFMMX
-mmx_modules = mpegvideo_mmx.c sad_mmx.s dsputil_mmx.c idct_mmx.c
-#mmx_modules = mpegvideo_mmx.c sad_mmx.s
-endif
-
-if HAVE_MLIB
-mlib_modules = dsputil_mlib.c idct_mlib.c
-endif
-
-EXTRA_DIST = mpegvideo_mmx.c sad_mmx.s dsputil_mmx.c idct_mmx.c \
- dsputil_mlib.c idct_mlib.c
-
-libavcodec_la_SOURCES = dsputil.c fdctref.c jfdctfst.c mpeg12.c \
- utils.c rv10.c h263.c jrevdct.c \
- common.c h263dec.c msmpeg4.c \
- mpegvideo.c mjpeg.c motion_est.c \
- $(mmx_modules) $(mlib_modules)
+libavcodec_la_SOURCES = common.c utils.c mpegvideo.c h263.c jrevdct.c jfdctfst.c \
+ mjpeg.c dsputil.c \
+ motion_est.c imgconvert.c msmpeg4.c \
+ mpeg12.c h263dec.c rv10.c simple_idct.c
+#imgresample.c
+
+libavcodec_la_LDFLAGS = \
+ $(top_builddir)/src/libffmpeg/libavcodec/armv4l/libavcodec_armv4l.la \
+ $(top_builddir)/src/libffmpeg/libavcodec/i386/libavcodec_mmx.la \
+ $(top_builddir)/src/libffmpeg/libavcodec/mlib/libavcodec_mlib.la \
+ -avoid-version -module
-noinst_HEADERS = avcodec.h dsputil.h mpegvideo.h dsputil_mmx_avg.h\
+noinst_HEADERS = avcodec.h dsputil.h mpegvideo.h \
common.h h263data.h mpeg4data.h msmpeg4data.h \
- mpeg12data.h
+ mpeg12data.h simple_idct.h
.s.lo:
$(ASCOMPILE) -o $@ `test -f $< || echo '$(srcdir)/'`$<
diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h
index 5155c8fd2..864ebb3ba 100644
--- a/src/libffmpeg/libavcodec/avcodec.h
+++ b/src/libffmpeg/libavcodec/avcodec.h
@@ -1,3 +1,6 @@
+#ifndef AVCODEC_H
+#define AVCODEC_H
+
#include "common.h"
enum CodecID {
@@ -9,11 +12,20 @@ enum CodecID {
CODEC_ID_AC3,
CODEC_ID_MJPEG,
CODEC_ID_MPEG4,
- CODEC_ID_PCM,
CODEC_ID_RAWVIDEO,
CODEC_ID_MSMPEG4,
CODEC_ID_H263P,
CODEC_ID_H263I,
+
+ /* various pcm "codecs" */
+ CODEC_ID_PCM_S16LE,
+ CODEC_ID_PCM_S16BE,
+ CODEC_ID_PCM_U16LE,
+ CODEC_ID_PCM_U16BE,
+ CODEC_ID_PCM_S8,
+ CODEC_ID_PCM_U8,
+ CODEC_ID_PCM_MULAW,
+ CODEC_ID_PCM_ALAW,
};
enum CodecType {
@@ -30,6 +42,11 @@ enum PixelFormat {
PIX_FMT_YUV444P,
};
+/* currently unused, may be used if 24/32 bits samples ever supported */
+enum SampleFormat {
+ SAMPLE_FMT_S16 = 0, /* signed 16 bits */
+};
+
/* in bytes */
#define AVCODEC_MAX_AUDIO_FRAME_SIZE 18432
@@ -74,6 +91,7 @@ typedef struct AVCodecContext {
/* audio only */
int sample_rate; /* samples per sec */
int channels;
+ int sample_fmt; /* sample format, currenly unused */
/* the following data should not be initialized */
int frame_size; /* in samples, initialized when calling 'init' */
@@ -85,6 +103,19 @@ typedef struct AVCodecContext {
struct AVCodec *codec;
void *priv_data;
+ /* The following data is for RTP friendly coding */
+ /* By now only H.263/H.263+ coder honours this */
+ int rtp_mode; /* 1 for activate RTP friendly-mode */
+ /* highers numbers represent more error-prone */
+ /* enviroments, by now just "1" exist */
+
+ int rtp_payload_size; /* The size of the RTP payload, the coder will */
+ /* do it's best to deliver a chunk with size */
+ /* below rtp_payload_size, the chunk will start */
+ /* with a start code on some codecs like H.263 */
+ /* This doesn't take account of any particular */
+ /* headers inside the transmited RTP payload */
+
/* the following fields are ignored */
void *opaque; /* can be used to carry app specific stuff */
char codec_name[32];
@@ -130,13 +161,30 @@ extern AVCodec mpeg_decoder;
extern AVCodec h263i_decoder;
extern AVCodec rv10_decoder;
extern AVCodec mjpeg_decoder;
+#ifdef FF_AUDIO_CODECS
+extern AVCodec mp3_decoder;
+
+/* pcm codecs */
+#define PCM_CODEC(id, name) \
+extern AVCodec name ## _decoder; \
+extern AVCodec name ## _encoder;
-/* dummy raw codecs */
-extern AVCodec pcm_codec;
+PCM_CODEC(CODEC_ID_PCM_S16LE, pcm_s16le);
+PCM_CODEC(CODEC_ID_PCM_S16BE, pcm_s16be);
+PCM_CODEC(CODEC_ID_PCM_U16LE, pcm_u16le);
+PCM_CODEC(CODEC_ID_PCM_U16BE, pcm_u16be);
+PCM_CODEC(CODEC_ID_PCM_S8, pcm_s8);
+PCM_CODEC(CODEC_ID_PCM_U8, pcm_u8);
+PCM_CODEC(CODEC_ID_PCM_ALAW, pcm_alaw);
+PCM_CODEC(CODEC_ID_PCM_MULAW, pcm_mulaw);
+
+#undef PCM_CODEC
+#endif
+
+/* dummy raw video codec */
extern AVCodec rawvideo_codec;
/* the following codecs use external GPL libs */
-extern AVCodec mp3_decoder;
extern AVCodec ac3_decoder;
/* resample.c */
@@ -184,6 +232,7 @@ void avcodec_init(void);
void register_avcodec(AVCodec *format);
AVCodec *avcodec_find_encoder(enum CodecID id);
+AVCodec *avcodec_find_encoder_by_name(const char *name);
AVCodec *avcodec_find_decoder(enum CodecID id);
AVCodec *avcodec_find_decoder_by_name(const char *name);
void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode);
@@ -203,3 +252,13 @@ int avcodec_encode_video(AVCodecContext *avctx, UINT8 *buf, int buf_size,
int avcodec_close(AVCodecContext *avctx);
void avcodec_register_all(void);
+
+#ifdef FF_POSTPROCESS
+#ifndef MBC
+#define MBC 48
+#define MBR 36
+#endif
+extern int quant_store[MBR+1][MBC+1]; // [Review]
+#endif
+
+#endif /* AVCODEC_H */
diff --git a/src/libffmpeg/libavcodec/common.c b/src/libffmpeg/libavcodec/common.c
index b3ce07c05..8f0fdc1bb 100644
--- a/src/libffmpeg/libavcodec/common.c
+++ b/src/libffmpeg/libavcodec/common.c
@@ -250,6 +250,52 @@ void align_get_bits(GetBitContext *s)
get_bits(s, n);
}
}
+/* This function is identical to get_bits_long(), the */
+/* only diference is that it doesn't touch the buffer */
+/* it is usefull to see the buffer. */
+
+unsigned int show_bits_long(GetBitContext *s, int n)
+{
+ unsigned int val;
+ int bit_cnt;
+ unsigned int bit_buf;
+ UINT8 *buf_ptr;
+
+ bit_buf = s->bit_buf;
+ bit_cnt = s->bit_cnt - n;
+
+ val = bit_buf >> (32 - n);
+ buf_ptr = s->buf_ptr;
+ buf_ptr += 4;
+
+ /* handle common case: we can read everything */
+ if (buf_ptr <= s->buf_end) {
+#ifdef ARCH_X86
+ bit_buf = bswap_32(*((unsigned long*)(&buf_ptr[-4])));
+#else
+ bit_buf = (buf_ptr[-4] << 24) |
+ (buf_ptr[-3] << 16) |
+ (buf_ptr[-2] << 8) |
+ (buf_ptr[-1]);
+#endif
+ } else {
+ buf_ptr -= 4;
+ bit_buf = 0;
+ if (buf_ptr < s->buf_end)
+ bit_buf |= *buf_ptr++ << 24;
+ if (buf_ptr < s->buf_end)
+ bit_buf |= *buf_ptr++ << 16;
+ if (buf_ptr < s->buf_end)
+ bit_buf |= *buf_ptr++ << 8;
+ if (buf_ptr < s->buf_end)
+ bit_buf |= *buf_ptr++;
+ }
+ val |= bit_buf >> (32 + bit_cnt);
+ bit_buf <<= - bit_cnt;
+ bit_cnt += 32;
+
+ return val;
+}
/* VLC decoding */
diff --git a/src/libffmpeg/libavcodec/common.h b/src/libffmpeg/libavcodec/common.h
index 8ea58b2e7..be9678cf4 100644
--- a/src/libffmpeg/libavcodec/common.h
+++ b/src/libffmpeg/libavcodec/common.h
@@ -1,10 +1,13 @@
#ifndef COMMON_H
#define COMMON_H
-#define FFMPEG_VERSION_INT 0x000405
-#define FFMPEG_VERSION "0.4.5"
+#define FFMPEG_VERSION_INT 0x000406
+#define FFMPEG_VERSION "0.4.6"
+/* CVS version as 26-12-2001 */
-#ifdef WIN32
+#undef DEBUG
+
+#if defined(WIN32) && !defined(__MINGW32__)
#define CONFIG_WIN32
#endif
@@ -43,6 +46,7 @@ typedef INT16 int16_t;
typedef UINT32 uint32_t;
typedef INT32 int32_t;
+#ifndef __MINGW32__
#define INT64_C(c) (c ## i64)
#define UINT64_C(c) (c ## i64)
@@ -56,6 +60,11 @@ typedef INT32 int32_t;
#pragma warning( disable : 4244 )
#pragma warning( disable : 4305 )
+#else
+#define INT64_C(c) (c ## LL)
+#define UINT64_C(c) (c ## ULL)
+#endif /* __MINGW32__ */
+
#define M_PI 3.14159265358979323846
#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
@@ -71,10 +80,12 @@ typedef INT32 int32_t;
#define snprintf _snprintf
+#ifndef __MINGW32__
+/* no config.h with VC */
#define CONFIG_ENCODERS 1
#define CONFIG_DECODERS 1
#define CONFIG_AC3 1
-#define CONFIG_MPGLIB 1
+#endif
#else
@@ -112,15 +123,35 @@ typedef signed long long INT64;
#include "fastmemcpy.h"
#endif
+#endif /* HAVE_AV_CONFIG_H */
+
+#endif /* !CONFIG_WIN32 */
+
+/* debug stuff */
+#ifdef HAVE_AV_CONFIG_H
+
#ifndef DEBUG
#define NDEBUG
#endif
#include <assert.h>
-#endif /* HAVE_AV_CONFIG_H */
+/* dprintf macros */
+#if defined(CONFIG_WIN32) && !defined(__MINGW32__)
+
+inline void dprintf(const char* fmt,...) {}
+
+#else
+
+#ifdef DEBUG
+#define dprintf(fmt,args...) printf(fmt, ## args)
+#else
+#define dprintf(fmt,args...)
+#endif
#endif /* !CONFIG_WIN32 */
+#endif /* HAVE_AV_CONFIG_H */
+
/* bit output */
struct PutBitContext;
@@ -141,7 +172,7 @@ void init_put_bits(PutBitContext *s,
void *opaque,
void (*write_data)(void *, UINT8 *, int));
void put_bits(PutBitContext *s, int n, unsigned int value);
-INT64 get_bit_count(PutBitContext *s);
+INT64 get_bit_count(PutBitContext *s); /* XXX: change function name */
void align_put_bits(PutBitContext *s);
void flush_put_bits(PutBitContext *s);
@@ -168,6 +199,7 @@ void init_get_bits(GetBitContext *s,
UINT8 *buffer, int buffer_size);
unsigned int get_bits_long(GetBitContext *s, int n);
+unsigned int show_bits_long(GetBitContext *s, int n);
static inline unsigned int get_bits(GetBitContext *s, int n){
if(s->bit_cnt>=n){
@@ -197,6 +229,19 @@ static inline unsigned int get_bits1(GetBitContext *s){
return get_bits_long(s,1);
}
+/* This function is identical to get_bits(), the only */
+/* diference is that it doesn't touch the buffer */
+/* it is usefull to see the buffer. */
+static inline unsigned int show_bits(GetBitContext *s, int n)
+{
+ if(s->bit_cnt>=n) {
+ /* most common case here */
+ unsigned int val = s->bit_buf >> (32 - n);
+ return val;
+ }
+ return show_bits_long(s,n);
+}
+
static inline void skip_bits(GetBitContext *s, int n){
if(s->bit_cnt>=n){
/* most common case here */
@@ -223,6 +268,10 @@ static inline void skip_bits1(GetBitContext *s){
}
}
+static inline int get_bits_count(GetBitContext *s)
+{
+ return (s->buf_ptr - s->buf) * 8 - s->bit_cnt;
+}
void align_get_bits(GetBitContext *s);
int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c
index e68833b7c..f699b2ef6 100644
--- a/src/libffmpeg/libavcodec/dsputil.c
+++ b/src/libffmpeg/libavcodec/dsputil.c
@@ -20,7 +20,7 @@
#include <stdio.h>
#include "avcodec.h"
#include "dsputil.h"
-#include "xineutils.h"
+#include "simple_idct.h"
void (*ff_idct)(DCTELEM *block);
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
@@ -389,6 +389,27 @@ int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
/* permute block according so that it corresponds to the MMX idct
order */
+#ifdef SIMPLE_IDCT
+void block_permute(INT16 *block)
+{
+ int i;
+ INT16 temp[64];
+
+// for(i=0; i<64; i++) temp[i] = block[ block_permute_op(i) ];
+ for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i];
+
+ for(i=0; i<64; i++) block[i] = temp[i];
+/*
+ for(i=0; i<64; i++)
+ {
+ if((i&7)==0) printf("\n");
+ printf("%2d ", block[i]);
+ }
+*/
+}
+
+#else
+
void block_permute(INT16 *block)
{
int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
@@ -410,16 +431,12 @@ void block_permute(INT16 *block)
block += 8;
}
}
+#endif
void dsputil_init(void)
{
int i, j;
-#ifdef ARCH_X86
- uint32_t mm = xine_mm_accel();
-#endif
- int use_permuted_mmx_idct;
- int accel_dsputil;
-
+ int use_permuted_idct;
for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
for(i=0;i<MAX_NEG_CROP;i++) {
@@ -431,7 +448,11 @@ void dsputil_init(void)
squareTbl[i] = (i - 256) * (i - 256);
}
+#ifdef SIMPLE_IDCT
+ ff_idct = simple_idct;
+#else
ff_idct = j_rev_dct;
+#endif
get_pixels = get_pixels_c;
put_pixels_clamped = put_pixels_clamped_c;
add_pixels_clamped = add_pixels_clamped_c;
@@ -442,46 +463,34 @@ void dsputil_init(void)
pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
av_fdct = jpeg_fdct_ifast;
- use_permuted_mmx_idct = 1;
- accel_dsputil = 0;
+ use_permuted_idct = 1;
-#ifdef ARCH_X86
- if (!accel_dsputil && (mm & MM_ACCEL_X86_MMX)) {
- dsputil_init_mmx();
- accel_dsputil = 1;
- /* printf("AVCODEC: Using mmx idct\n"); */
- }
+#ifdef HAVE_MMX
+ dsputil_init_mmx();
#endif
#ifdef ARCH_ARMV4L
- if (!accel_dsputil) {
- dsputil_init_armv4l();
- accel_dsputil = 1;
- /* printf("AVCODEC: Using armv4l idct\n"); */
- }
+ dsputil_init_armv4l();
#endif
#ifdef HAVE_MLIB
- if (!accel_dsputil) {
- dsputil_init_mlib();
- accel_dsputil = 1;
- use_permuted_mmx_idct = 0;
- /* printf("AVCODEC: Using mediaLib idct\n"); */
- }
+ dsputil_init_mlib();
+ use_permuted_idct = 0;
#endif
- if (!accel_dsputil) {
- /* printf("AVCODEC: Using C idct\n"); */
- }
- if (use_permuted_mmx_idct) {
- /* permute for IDCT */
- for(i=0;i<64;i++) {
- j = zigzag_direct[i];
- zigzag_direct[i] = block_permute_op(j);
- j = ff_alternate_horizontal_scan[i];
- ff_alternate_horizontal_scan[i] = block_permute_op(j);
- j = ff_alternate_vertical_scan[i];
- ff_alternate_vertical_scan[i] = block_permute_op(j);
- }
- block_permute(default_intra_matrix);
- block_permute(default_non_intra_matrix);
+#ifdef SIMPLE_IDCT
+ if(ff_idct == simple_idct) use_permuted_idct=0;
+#endif
+
+ if (use_permuted_idct) {
+ /* permute for IDCT */
+ for(i=0;i<64;i++) {
+ j = zigzag_direct[i];
+ zigzag_direct[i] = block_permute_op(j);
+ j = ff_alternate_horizontal_scan[i];
+ ff_alternate_horizontal_scan[i] = block_permute_op(j);
+ j = ff_alternate_vertical_scan[i];
+ ff_alternate_vertical_scan[i] = block_permute_op(j);
+ }
+ block_permute(default_intra_matrix);
+ block_permute(default_non_intra_matrix);
}
}
diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h
index e401065a0..182b1e706 100644
--- a/src/libffmpeg/libavcodec/dsputil.h
+++ b/src/libffmpeg/libavcodec/dsputil.h
@@ -67,19 +67,53 @@ int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h);
int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h);
int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h);
+#if defined (SIMPLE_IDCT) && defined (ARCH_X86)
+static inline int block_permute_op(int j)
+{
+static const int table[64]={
+ 0x00, 0x08, 0x01, 0x09, 0x04, 0x0C, 0x05, 0x0D,
+ 0x10, 0x18, 0x11, 0x19, 0x14, 0x1C, 0x15, 0x1D,
+ 0x02, 0x0A, 0x03, 0x0B, 0x06, 0x0E, 0x07, 0x0F,
+ 0x12, 0x1A, 0x13, 0x1B, 0x16, 0x1E, 0x17, 0x1F,
+ 0x20, 0x28, 0x21, 0x29, 0x24, 0x2C, 0x25, 0x2D,
+ 0x30, 0x38, 0x31, 0x39, 0x34, 0x3C, 0x35, 0x3D,
+ 0x22, 0x2A, 0x23, 0x2B, 0x26, 0x2E, 0x27, 0x2F,
+ 0x32, 0x3A, 0x33, 0x3B, 0x36, 0x3E, 0x37, 0x3F,
+};
+
+ return table[j];
+}
+#elif defined (SIMPLE_IDCT)
+static inline int block_permute_op(int j)
+{
+ return j;
+}
+#else
static inline int block_permute_op(int j)
{
return (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
}
+#endif
void block_permute(INT16 *block);
#if defined(ARCH_X86)
+#define HAVE_MMX 1
+
#include "xineutils.h"
extern int mm_flags;
+#define mm_support() xine_mm_accel()
+
+#if 0
+static inline void emms(void)
+{
+ __asm __volatile ("emms;":::"memory");
+}
+#endif
+
#define emms_c() \
{\
if (mm_flags & MM_MMX)\
@@ -92,36 +126,29 @@ void dsputil_init_mmx(void);
#elif defined(ARCH_ARMV4L)
+#define emms_c()
+
/* This is to use 4 bytes read to the IDCT pointers for some 'zero'
line ptimizations */
#define __align8 __attribute__ ((aligned (4)))
void dsputil_init_armv4l(void);
-#endif
-
-
-
-#if defined(HAVE_MLIB)
+#elif defined(HAVE_MLIB)
+
+#define emms_c()
/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
#define __align8 __attribute__ ((aligned (8)))
-void dsputil_init_mlib(void);
+void dsputil_init_mlib(void);
-#endif /* HAVE_MLIB */
+#else
+#define emms_c()
-/*
- * provide empty defaults, if the target specific accelerated dsputils did
- * not define these:
- */
-#ifndef __align8
-#define __align8
-#endif
+#define __align8
-#ifndef emms_c
-#define emms_c()
#endif
#endif
diff --git a/src/libffmpeg/libavcodec/dsputil_mlib.c b/src/libffmpeg/libavcodec/dsputil_mlib.c
deleted file mode 100644
index 4be97d0c7..000000000
--- a/src/libffmpeg/libavcodec/dsputil_mlib.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Sun mediaLib optimized DSP utils
- * Copyright (c) 2001 Juergen Keil.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "dsputil.h"
-
-#include <mlib_types.h>
-#include <mlib_status.h>
-#include <mlib_sys.h>
-#include <mlib_video.h>
-
-
-static void put_pixels_mlib (uint8_t * dest, const uint8_t * ref,
- int stride, int height)
-{
- assert(height == 16 || height == 8);
- if (height == 16)
- mlib_VideoCopyRef_U8_U8_8x16(dest, (uint8_t *)ref, stride);
- else
- mlib_VideoCopyRef_U8_U8_8x8 (dest, (uint8_t *)ref, stride);
-}
-
-static void put_pixels_x2_mlib (uint8_t * dest, const uint8_t * ref,
- int stride, int height)
-{
- assert(height == 16 || height == 8);
- if (height == 16)
- mlib_VideoInterpX_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
- else
- mlib_VideoInterpX_U8_U8_8x8 (dest, (uint8_t *)ref, stride, stride);
-}
-
-static void put_pixels_y2_mlib (uint8_t * dest, const uint8_t * ref,
- int stride, int height)
-{
- assert(height == 16 || height == 8);
- if (height == 16)
- mlib_VideoInterpY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
- else
- mlib_VideoInterpY_U8_U8_8x8 (dest, (uint8_t *)ref, stride, stride);
-}
-
-static void put_pixels_xy2_mlib(uint8_t * dest, const uint8_t * ref,
- int stride, int height)
-{
- assert(height == 16 || height == 8);
- if (height == 16)
- mlib_VideoInterpXY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
- else
- mlib_VideoInterpXY_U8_U8_8x8 (dest, (uint8_t *)ref, stride, stride);
-}
-
-static void avg_pixels_mlib (uint8_t * dest, const uint8_t * ref,
- int stride, int height)
-{
- assert(height == 16 || height == 8);
- if (height == 16)
- mlib_VideoCopyRefAve_U8_U8_8x16(dest, (uint8_t *)ref, stride);
- else
- mlib_VideoCopyRefAve_U8_U8_8x8 (dest, (uint8_t *)ref, stride);
-}
-
-static void avg_pixels_x2_mlib (uint8_t * dest, const uint8_t * ref,
- int stride, int height)
-{
- assert(height == 16 || height == 8);
- if (height == 16)
- mlib_VideoInterpAveX_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
- else
- mlib_VideoInterpAveX_U8_U8_8x8 (dest, (uint8_t *)ref, stride, stride);
-}
-
-static void avg_pixels_y2_mlib (uint8_t * dest, const uint8_t * ref,
- int stride, int height)
-{
- assert(height == 16 || height == 8);
- if (height == 16)
- mlib_VideoInterpAveY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
- else
- mlib_VideoInterpAveY_U8_U8_8x8 (dest, (uint8_t *)ref, stride, stride);
-}
-
-static void avg_pixels_xy2_mlib (uint8_t * dest, const uint8_t * ref,
- int stride, int height)
-{
- assert(height == 16 || height == 8);
- if (height == 16)
- mlib_VideoInterpAveXY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
- else
- mlib_VideoInterpAveXY_U8_U8_8x8 (dest, (uint8_t *)ref, stride, stride);
-}
-
-
-static void add_pixels_clamped_mlib(const DCTELEM *block, UINT8 *pixels, int line_size)
-{
- mlib_VideoAddBlock_U8_S16(pixels, (mlib_s16 *)block, line_size);
-}
-
-
-extern void ff_fdct_mlib(DCTELEM *data);
-extern void ff_idct_mlib(DCTELEM *data);
-
-void dsputil_init_mlib(void)
-{
- av_fdct = ff_fdct_mlib;
- ff_idct = ff_idct_mlib;
-
- put_pixels_tab[0] = put_pixels_mlib;
- put_pixels_tab[1] = put_pixels_x2_mlib;
- put_pixels_tab[2] = put_pixels_y2_mlib;
- put_pixels_tab[3] = put_pixels_xy2_mlib;
-
- avg_pixels_tab[0] = avg_pixels_mlib;
- avg_pixels_tab[1] = avg_pixels_x2_mlib;
- avg_pixels_tab[2] = avg_pixels_y2_mlib;
- avg_pixels_tab[3] = avg_pixels_xy2_mlib;
-
- put_no_rnd_pixels_tab[0] = put_pixels_mlib;
-
- add_pixels_clamped = add_pixels_clamped_mlib;
-}
diff --git a/src/libffmpeg/libavcodec/dsputil_mmx.c b/src/libffmpeg/libavcodec/dsputil_mmx.c
deleted file mode 100644
index b806c34e5..000000000
--- a/src/libffmpeg/libavcodec/dsputil_mmx.c
+++ /dev/null
@@ -1,1057 +0,0 @@
-/*
- * MMX optimized DSP utils
- * Copyright (c) 2000, 2001 Gerard Lantau.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
- */
-
-#include "dsputil.h"
-#include "xineutils.h"
-
-int mm_flags; /* multimedia extension flags */
-
-int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
-int pix_abs16x16_sse(UINT8 *blk1, UINT8 *blk2, int lx, int h);
-int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
-int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
-int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
-
-/* external functions, from idct_mmx.c */
-void ff_mmx_idct(DCTELEM *block);
-void ff_mmxext_idct(DCTELEM *block);
-
-/* pixel operations */
-static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001;
-static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002;
-//static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 };
-//static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 };
-
-/***********************************/
-/* 3Dnow specific */
-
-#define DEF(x) x ## _3dnow
-/* for Athlons PAVGUSB is prefered */
-#define PAVGB "pavgusb"
-
-#include "dsputil_mmx_avg.h"
-
-#undef DEF
-#undef PAVGB
-
-/***********************************/
-/* MMX2 specific */
-
-#define DEF(x) x ## _sse
-
-/* Introduced only in MMX2 set */
-#define PAVGB "pavgb"
-
-#include "dsputil_mmx_avg.h"
-
-#undef DEF
-#undef PAVGB
-
-/***********************************/
-/* standard MMX */
-
-static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size)
-{
- DCTELEM *p;
- const UINT8 *pix;
- int i;
-
- /* read the pixels */
- p = block;
- pix = pixels;
- __asm __volatile("pxor %%mm7, %%mm7":::"memory");
- for(i=0;i<4;i++) {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm2, 8%0\n\t"
- "movq %%mm1, 16%0\n\t"
- "movq %%mm3, 24%0\n\t"
- :"=m"(*p)
- :"m"(*pix), "m"(*(pix+line_size))
- :"memory");
- pix += line_size*2;
- p += 16;
- }
-}
-
-static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
-{
- const DCTELEM *p;
- UINT8 *pix;
- int i;
- static int p_inc = 32; /* hack to avoid gcc-2.95.2 loop unrolling bug */
-
- /* read the pixels */
- p = block;
- pix = pixels;
- for(i=0;i<2;i++) {
- __asm __volatile(
- "movq %4, %%mm0\n\t"
- "movq 8%4, %%mm1\n\t"
- "movq 16%4, %%mm2\n\t"
- "movq 24%4, %%mm3\n\t"
- "movq 32%4, %%mm4\n\t"
- "movq 40%4, %%mm5\n\t"
- "movq 48%4, %%mm6\n\t"
- "movq 56%4, %%mm7\n\t"
- "packuswb %%mm1, %%mm0\n\t"
- "packuswb %%mm3, %%mm2\n\t"
- "packuswb %%mm5, %%mm4\n\t"
- "packuswb %%mm7, %%mm6\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm2, %1\n\t"
- "movq %%mm4, %2\n\t"
- "movq %%mm6, %3\n\t"
- :"=m"(*pix), "=m"(*(pix+line_size))
- ,"=m"(*(pix+line_size*2)), "=m"(*(pix+line_size*3))
- :"m"(*p)
- :"memory");
- pix += line_size*4;
- p += p_inc;
- }
-}
-
-static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
-{
- const DCTELEM *p;
- UINT8 *pix;
- int i;
- static int p_inc = 16; /* hack to avoid gcc-2.95.2 loop unrolling bug */
-
- /* read the pixels */
- p = block;
- pix = pixels;
- __asm __volatile("pxor %%mm7, %%mm7":::"memory");
- for(i=0;i<4;i++) {
- __asm __volatile(
- "movq %2, %%mm0\n\t"
- "movq 8%2, %%mm1\n\t"
- "movq 16%2, %%mm2\n\t"
- "movq 24%2, %%mm3\n\t"
- "movq %0, %%mm4\n\t"
- "movq %1, %%mm6\n\t"
- "movq %%mm4, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpckhbw %%mm7, %%mm5\n\t"
- "paddsw %%mm4, %%mm0\n\t"
- "paddsw %%mm5, %%mm1\n\t"
- "movq %%mm6, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm6\n\t"
- "punpckhbw %%mm7, %%mm5\n\t"
- "paddsw %%mm6, %%mm2\n\t"
- "paddsw %%mm5, %%mm3\n\t"
- "packuswb %%mm1, %%mm0\n\t"
- "packuswb %%mm3, %%mm2\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm2, %1\n\t"
- :"=m"(*pix), "=m"(*(pix+line_size))
- :"m"(*p)
- :"memory");
- pix += line_size*2;
- p += p_inc;
- }
-}
-
-static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- int dh, hh;
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- hh=h>>2;
- dh=h&3;
- while(hh--) {
- __asm __volatile(
- "movq %4, %%mm0\n\t"
- "movq %5, %%mm1\n\t"
- "movq %6, %%mm2\n\t"
- "movq %7, %%mm3\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, %1\n\t"
- "movq %%mm2, %2\n\t"
- "movq %%mm3, %3\n\t"
- :"=m"(*p), "=m"(*(p+line_size)), "=m"(*(p+line_size*2)), "=m"(*(p+line_size*3))
- :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)), "m"(*(pix+line_size*3))
- :"memory");
- pix = pix + line_size*4;
- p = p + line_size*4;
- }
- while(dh--) {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix = pix + line_size;
- p = p + line_size;
- }
-}
-
-static void put_pixels_x2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm4\n\t"
- ::"m"(mm_wone):"memory");
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq 1%1, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm4, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size; p += line_size;
- } while (--h);
-}
-
-static void put_pixels_y2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm4\n\t"
- ::"m"(mm_wone):"memory");
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm4, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size;
- } while (--h);
-}
-
-static void put_pixels_xy2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wtwo):"memory");
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq 1%2, %%mm5\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "movq %%mm4, %%mm1\n\t"
- "movq %%mm5, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpcklbw %%mm7, %%mm5\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm5, %%mm4\n\t"
- "paddusw %%mm3, %%mm1\n\t"
- "paddusw %%mm6, %%mm4\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm1, %%mm2\n\t"
- "psrlw $2, %%mm0\n\t"
- "psrlw $2, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size;
- } while(--h);
-}
-
-static void put_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile("pxor %%mm7, %%mm7\n\t":::"memory");
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq 1%1, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += line_size;
- } while (--h);
-}
-
-static void put_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile("pxor %%mm7, %%mm7\n\t":::"memory");
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size;
- } while(--h);
-}
-
-static void put_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wone):"memory");
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq 1%2, %%mm5\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "movq %%mm4, %%mm1\n\t"
- "movq %%mm5, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpcklbw %%mm7, %%mm5\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm5, %%mm4\n\t"
- "paddusw %%mm3, %%mm1\n\t"
- "paddusw %%mm6, %%mm4\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm1, %%mm2\n\t"
- "psrlw $2, %%mm0\n\t"
- "psrlw $2, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size;
- } while(--h);
-}
-
-static void avg_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wone):"memory");
- do {
- __asm __volatile(
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "paddusw %%mm6, %%mm0\n\t"
- "paddusw %%mm6, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += line_size;
- }
- while (--h);
-}
-
-static void avg_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wone):"memory");
- do {
- __asm __volatile(
- "movq %1, %%mm1\n\t"
- "movq %0, %%mm0\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "movq %%mm4, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpckhbw %%mm7, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "paddusw %%mm4, %%mm1\n\t"
- "paddusw %%mm5, %%mm3\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm6, %%mm3\n\t"
- "psrlw $1, %%mm1\n\t"
- "psrlw $1, %%mm3\n\t"
- "paddusw %%mm6, %%mm0\n\t"
- "paddusw %%mm6, %%mm2\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += line_size;
- } while (--h);
-}
-
-static void avg_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wone):"memory");
- do {
- __asm __volatile(
- "movq %1, %%mm1\n\t"
- "movq %0, %%mm0\n\t"
- "movq %2, %%mm4\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "movq %%mm4, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpckhbw %%mm7, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "paddusw %%mm4, %%mm1\n\t"
- "paddusw %%mm5, %%mm3\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm6, %%mm3\n\t"
- "psrlw $1, %%mm1\n\t"
- "psrlw $1, %%mm3\n\t"
- "paddusw %%mm6, %%mm0\n\t"
- "paddusw %%mm6, %%mm2\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix), "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size ;
- } while(--h);
-}
-
-static void avg_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wtwo):"memory");
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq 1%2, %%mm5\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "movq %%mm4, %%mm1\n\t"
- "movq %%mm5, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpcklbw %%mm7, %%mm5\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm5, %%mm4\n\t"
- "paddusw %%mm3, %%mm1\n\t"
- "paddusw %%mm6, %%mm4\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm1, %%mm2\n\t"
- "movq %3, %%mm5\n\t"
- "psrlw $2, %%mm0\n\t"
- "movq %0, %%mm1\n\t"
- "psrlw $2, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "paddusw %%mm5, %%mm0\n\t"
- "paddusw %%mm5, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size)), "m"(mm_wone)
- :"memory");
- pix += line_size;
- p += line_size ;
- } while(--h);
-}
-
-static void avg_no_rnd_pixels_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile("pxor %%mm7, %%mm7\n\t":::"memory");
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %0, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += line_size ;
- } while (--h);
-}
-
-static void avg_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t":::"memory");
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq 1%1, %%mm1\n\t"
- "movq %0, %%mm4\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "movq %%mm4, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpckhbw %%mm7, %%mm5\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm5, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += line_size;
- } while (--h);
-}
-
-static void avg_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t":::"memory");
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq %0, %%mm4\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "movq %%mm4, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpckhbw %%mm7, %%mm5\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm5, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix), "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size ;
- } while(--h);
-}
-
-static void avg_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wone):"memory");
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq 1%2, %%mm5\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "movq %%mm4, %%mm1\n\t"
- "movq %%mm5, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpcklbw %%mm7, %%mm5\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm5, %%mm4\n\t"
- "paddusw %%mm3, %%mm1\n\t"
- "paddusw %%mm6, %%mm4\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm1, %%mm2\n\t"
- "movq %0, %%mm1\n\t"
- "psrlw $2, %%mm0\n\t"
- "movq %%mm1, %%mm3\n\t"
- "psrlw $2, %%mm2\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size;
- } while(--h);
-}
-
-static void sub_pixels_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
-{
- DCTELEM *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile("pxor %%mm7, %%mm7":::"memory");
- do {
- __asm __volatile(
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm2\n\t"
- "movq 8%0, %%mm1\n\t"
- "movq %%mm2, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "psubsw %%mm2, %%mm0\n\t"
- "psubsw %%mm3, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, 8%0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += 8;
- } while (--h);
-}
-
-static void sub_pixels_x2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
-{
- DCTELEM *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6"
- ::"m"(mm_wone):"memory");
- do {
- __asm __volatile(
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm2\n\t"
- "movq 8%0, %%mm1\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq %%mm2, %%mm3\n\t"
- "movq %%mm4, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpckhbw %%mm7, %%mm5\n\t"
- "paddusw %%mm4, %%mm2\n\t"
- "paddusw %%mm5, %%mm3\n\t"
- "paddusw %%mm6, %%mm2\n\t"
- "paddusw %%mm6, %%mm3\n\t"
- "psrlw $1, %%mm2\n\t"
- "psrlw $1, %%mm3\n\t"
- "psubsw %%mm2, %%mm0\n\t"
- "psubsw %%mm3, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, 8%0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += 8;
- } while (--h);
-}
-
-static void sub_pixels_y2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
-{
- DCTELEM *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6"
- ::"m"(mm_wone):"memory");
- do {
- __asm __volatile(
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm2\n\t"
- "movq 8%0, %%mm1\n\t"
- "movq %2, %%mm4\n\t"
- "movq %%mm2, %%mm3\n\t"
- "movq %%mm4, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpckhbw %%mm7, %%mm5\n\t"
- "paddusw %%mm4, %%mm2\n\t"
- "paddusw %%mm5, %%mm3\n\t"
- "paddusw %%mm6, %%mm2\n\t"
- "paddusw %%mm6, %%mm3\n\t"
- "psrlw $1, %%mm2\n\t"
- "psrlw $1, %%mm3\n\t"
- "psubsw %%mm2, %%mm0\n\t"
- "psubsw %%mm3, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, 8%0\n\t"
- :"=m"(*p)
- :"m"(*pix), "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += 8;
- } while (--h);
-}
-
-static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
-{
- DCTELEM *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wtwo):"memory");
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq 1%2, %%mm5\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "movq %%mm4, %%mm1\n\t"
- "movq %%mm5, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpcklbw %%mm7, %%mm5\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm5, %%mm4\n\t"
- "paddusw %%mm3, %%mm1\n\t"
- "paddusw %%mm6, %%mm4\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm1, %%mm2\n\t"
- "movq %0, %%mm1\n\t"
- "movq 8%0, %%mm3\n\t"
- "psrlw $2, %%mm0\n\t"
- "psrlw $2, %%mm2\n\t"
- "psubsw %%mm0, %%mm1\n\t"
- "psubsw %%mm2, %%mm3\n\t"
- "movq %%mm1, %0\n\t"
- "movq %%mm3, 8%0\n\t"
- :"=m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += 8 ;
- } while(--h);
-}
-
-void dsputil_init_mmx(void)
-{
- mm_flags = xine_mm_accel();
-#if 0
- printf("CPU flags:");
- if (mm_flags & MM_MMX)
- printf(" mmx");
- if (mm_flags & MM_MMXEXT)
- printf(" mmxext");
- if (mm_flags & MM_3DNOW)
- printf(" 3dnow");
- if (mm_flags & MM_SSE)
- printf(" sse");
- if (mm_flags & MM_SSE2)
- printf(" sse2");
- printf("\n");
-#endif
-
- if (mm_flags & MM_MMX) {
- get_pixels = get_pixels_mmx;
- put_pixels_clamped = put_pixels_clamped_mmx;
- add_pixels_clamped = add_pixels_clamped_mmx;
-
- pix_abs16x16 = pix_abs16x16_mmx;
- pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
- pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
- pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
- /* av_fdct = fdct_mmx; */
-
- put_pixels_tab[0] = put_pixels_mmx;
- put_pixels_tab[1] = put_pixels_x2_mmx;
- put_pixels_tab[2] = put_pixels_y2_mmx;
- put_pixels_tab[3] = put_pixels_xy2_mmx;
-
- put_no_rnd_pixels_tab[0] = put_pixels_mmx;
- put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx;
- put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx;
- put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_mmx;
-
- avg_pixels_tab[0] = avg_pixels_mmx;
- avg_pixels_tab[1] = avg_pixels_x2_mmx;
- avg_pixels_tab[2] = avg_pixels_y2_mmx;
- avg_pixels_tab[3] = avg_pixels_xy2_mmx;
-
- avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_mmx;
- avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_mmx;
- avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_mmx;
- avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_mmx;
-
- sub_pixels_tab[0] = sub_pixels_mmx;
- sub_pixels_tab[1] = sub_pixels_x2_mmx;
- sub_pixels_tab[2] = sub_pixels_y2_mmx;
- sub_pixels_tab[3] = sub_pixels_xy2_mmx;
-
- if (mm_flags & MM_MMXEXT) {
- pix_abs16x16 = pix_abs16x16_sse;
- }
-
- if (mm_flags & MM_SSE) {
- put_pixels_tab[1] = put_pixels_x2_sse;
- put_pixels_tab[2] = put_pixels_y2_sse;
-
- avg_pixels_tab[0] = avg_pixels_sse;
- avg_pixels_tab[1] = avg_pixels_x2_sse;
- avg_pixels_tab[2] = avg_pixels_y2_sse;
- avg_pixels_tab[3] = avg_pixels_xy2_sse;
-
- sub_pixels_tab[1] = sub_pixels_x2_sse;
- sub_pixels_tab[2] = sub_pixels_y2_sse;
- } else if (mm_flags & MM_3DNOW) {
- put_pixels_tab[1] = put_pixels_x2_3dnow;
- put_pixels_tab[2] = put_pixels_y2_3dnow;
-
- avg_pixels_tab[0] = avg_pixels_3dnow;
- avg_pixels_tab[1] = avg_pixels_x2_3dnow;
- avg_pixels_tab[2] = avg_pixels_y2_3dnow;
- avg_pixels_tab[3] = avg_pixels_xy2_3dnow;
-
- sub_pixels_tab[1] = sub_pixels_x2_3dnow;
- sub_pixels_tab[2] = sub_pixels_y2_3dnow;
- }
-
- /* idct */
- if (mm_flags & MM_MMXEXT) {
- ff_idct = ff_mmxext_idct;
- } else {
- ff_idct = ff_mmx_idct;
- }
- }
-}
diff --git a/src/libffmpeg/libavcodec/dsputil_mmx_avg.h b/src/libffmpeg/libavcodec/dsputil_mmx_avg.h
deleted file mode 100644
index 5cd640f71..000000000
--- a/src/libffmpeg/libavcodec/dsputil_mmx_avg.h
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * DSP utils : average functions are compiled twice for 3dnow/mmx2
- * Copyright (c) 2000, 2001 Gerard Lantau.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
- */
-
-static void DEF(put_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- int dh, hh;
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- hh=h>>2;
- dh=h&3;
- while(hh--) {
- __asm __volatile(
- "movq %4, %%mm0\n\t"
- "movq 1%4, %%mm1\n\t"
- "movq %5, %%mm2\n\t"
- "movq 1%5, %%mm3\n\t"
- "movq %6, %%mm4\n\t"
- "movq 1%6, %%mm5\n\t"
- "movq %7, %%mm6\n\t"
- "movq 1%7, %%mm7\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- PAVGB" %%mm3, %%mm2\n\t"
- PAVGB" %%mm5, %%mm4\n\t"
- PAVGB" %%mm7, %%mm6\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm2, %1\n\t"
- "movq %%mm4, %2\n\t"
- "movq %%mm6, %3\n\t"
- :"=m"(*p), "=m"(*(p+line_size)), "=m"(*(p+line_size*2)), "=m"(*(p+line_size*3))
- :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)), "m"(*(pix+line_size*3))
- :"memory");
- pix += line_size*4; p += line_size*4;
- }
- while(dh--) {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq 1%1, %%mm1\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size; p += line_size;
- }
-}
-
-static void DEF(put_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- int dh, hh;
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
-
- hh=h>>1;
- dh=h&1;
- while(hh--) {
- __asm __volatile(
- "movq %2, %%mm0\n\t"
- "movq %3, %%mm1\n\t"
- "movq %4, %%mm2\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- PAVGB" %%mm2, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, %1\n\t"
- :"=m"(*p), "=m"(*(p+line_size))
- :"m"(*pix), "m"(*(pix+line_size)),
- "m"(*(pix+line_size*2))
- :"memory");
- pix += line_size*2;
- p += line_size*2;
- }
- if(dh) {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- }
-}
-
-static void DEF(avg_pixels)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- int dh, hh;
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- hh=h>>2;
- dh=h&3;
- while(hh--) {
- __asm __volatile(
- "movq %0, %%mm0\n\t"
- "movq %4, %%mm1\n\t"
- "movq %1, %%mm2\n\t"
- "movq %5, %%mm3\n\t"
- "movq %2, %%mm4\n\t"
- "movq %6, %%mm5\n\t"
- "movq %3, %%mm6\n\t"
- "movq %7, %%mm7\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- PAVGB" %%mm3, %%mm2\n\t"
- PAVGB" %%mm5, %%mm4\n\t"
- PAVGB" %%mm7, %%mm6\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm2, %1\n\t"
- "movq %%mm4, %2\n\t"
- "movq %%mm6, %3\n\t"
- :"=m"(*p), "=m"(*(p+line_size)), "=m"(*(p+line_size*2)), "=m"(*(p+line_size*3))
- :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)), "m"(*(pix+line_size*3))
- :"memory");
- pix += line_size*4; p += line_size*4;
- }
- while(dh--) {
- __asm __volatile(
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm1\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size; p += line_size;
- }
-}
-
-static void DEF(avg_pixels_x2)( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- int dh, hh;
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- hh=h>>1;
- dh=h&1;
- while(hh--) {
- __asm __volatile(
- "movq %2, %%mm2\n\t"
- "movq 1%2, %%mm3\n\t"
- "movq %3, %%mm4\n\t"
- "movq 1%3, %%mm5\n\t"
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm1\n\t"
- PAVGB" %%mm3, %%mm2\n\t"
- PAVGB" %%mm2, %%mm0\n\t"
- PAVGB" %%mm5, %%mm4\n\t"
- PAVGB" %%mm4, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, %1\n\t"
- :"=m"(*p), "=m"(*(p+line_size))
- :"m"(*pix), "m"(*(pix+line_size))
- :"memory");
- pix += line_size*2;
- p += line_size*2;
- }
- if(dh) {
- __asm __volatile(
- "movq %1, %%mm1\n\t"
- "movq 1%1, %%mm2\n\t"
- "movq %0, %%mm0\n\t"
- PAVGB" %%mm2, %%mm1\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- }
-}
-
-static void DEF(avg_pixels_y2)( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- int dh, hh;
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- hh=h>>1;
- dh=h&1;
- while(hh--) {
- __asm __volatile(
- "movq %2, %%mm2\n\t"
- "movq %3, %%mm3\n\t"
- "movq %3, %%mm4\n\t"
- "movq %4, %%mm5\n\t"
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm1\n\t"
- PAVGB" %%mm3, %%mm2\n\t"
- PAVGB" %%mm2, %%mm0\n\t"
- PAVGB" %%mm5, %%mm4\n\t"
- PAVGB" %%mm4, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, %1\n\t"
- :"=m"(*p), "=m"(*(p+line_size))
- :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2))
- :"memory");
- pix += line_size*2;
- p += line_size*2;
- }
- if(dh) {
- __asm __volatile(
- "movq %1, %%mm1\n\t"
- "movq %2, %%mm2\n\t"
- "movq %0, %%mm0\n\t"
- PAVGB" %%mm2, %%mm1\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix), "m"(*(pix+line_size))
- :"memory");
- }
-}
-
-static void DEF(avg_pixels_xy2)( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wtwo):"memory");
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq 1%2, %%mm5\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "movq %%mm4, %%mm1\n\t"
- "movq %%mm5, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpcklbw %%mm7, %%mm5\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm5, %%mm4\n\t"
- "paddusw %%mm3, %%mm1\n\t"
- "paddusw %%mm6, %%mm4\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm1, %%mm2\n\t"
- "psrlw $2, %%mm0\n\t"
- "psrlw $2, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- PAVGB" %0, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size ;
- } while(--h);
-}
-
-static void DEF(sub_pixels_x2)( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
-{
- DCTELEM *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7":::"memory");
- do {
- __asm __volatile(
- "movq 1%1, %%mm2\n\t"
- "movq %0, %%mm0\n\t"
- PAVGB" %1, %%mm2\n\t"
- "movq 8%0, %%mm1\n\t"
- "movq %%mm2, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "psubsw %%mm2, %%mm0\n\t"
- "psubsw %%mm3, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, 8%0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += 8;
- } while (--h);
-}
-
-static void DEF(sub_pixels_y2)( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
-{
- DCTELEM *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7":::"memory");
- do {
- __asm __volatile(
- "movq %2, %%mm2\n\t"
- "movq %0, %%mm0\n\t"
- PAVGB" %1, %%mm2\n\t"
- "movq 8%0, %%mm1\n\t"
- "movq %%mm2, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "psubsw %%mm2, %%mm0\n\t"
- "psubsw %%mm3, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, 8%0\n\t"
- :"=m"(*p)
- :"m"(*pix), "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += 8;
- } while (--h);
-}
-
diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c
index 29e6ab191..da694411f 100644
--- a/src/libffmpeg/libavcodec/h263.c
+++ b/src/libffmpeg/libavcodec/h263.c
@@ -1,7 +1,7 @@
/*
* H263/MPEG4 backend for ffmpeg encoder and decoder
* Copyright (c) 2000,2001 Gerard Lantau.
- * H263+ support for custom picture format.
+ * H263+ support.
* Copyright (c) 2001 Juan J. Sierralta P.
*
* This program is free software; you can redistribute it and/or modify
@@ -28,9 +28,11 @@
static void h263_encode_block(MpegEncContext * s, DCTELEM * block,
int n);
static void h263_encode_motion(MpegEncContext * s, int val);
+static void h263p_encode_umotion(MpegEncContext * s, int val);
static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block,
int n);
static int h263_decode_motion(MpegEncContext * s, int pred);
+static int h263p_decode_umotion(MpegEncContext * s, int pred);
static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
int n, int coded);
static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
@@ -57,7 +59,7 @@ int h263_get_picture_format(int width, int height)
void h263_encode_picture_header(MpegEncContext * s, int picture_number)
{
- int format, umvplus;
+ int format;
align_put_bits(&s->pb);
put_bits(&s->pb, 22, 0x20);
@@ -69,10 +71,10 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb, 1, 0); /* split screen off */
put_bits(&s->pb, 1, 0); /* camera off */
put_bits(&s->pb, 1, 0); /* freeze picture release off */
-
+
+ format = h263_get_picture_format(s->width, s->height);
if (!s->h263_plus) {
/* H.263v1 */
- format = h263_get_picture_format(s->width, s->height);
put_bits(&s->pb, 3, format);
put_bits(&s->pb, 1, (s->pict_type == P_TYPE));
/* By now UMV IS DISABLED ON H.263v1, since the restrictions
@@ -89,10 +91,14 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
/* H.263 Plus PTYPE */
put_bits(&s->pb, 3, 7);
put_bits(&s->pb,3,1); /* Update Full Extended PTYPE */
- put_bits(&s->pb,3,6); /* Custom Source Format */
+ if (format == 7)
+ put_bits(&s->pb,3,6); /* Custom Source Format */
+ else
+ put_bits(&s->pb, 3, format);
+
put_bits(&s->pb,1,0); /* Custom PCF: off */
- umvplus = (s->pict_type == P_TYPE) && s->unrestricted_mv;
- put_bits(&s->pb, 1, umvplus); /* Unrestricted Motion Vector */
+ s->umvplus = (s->pict_type == P_TYPE) && s->unrestricted_mv;
+ put_bits(&s->pb, 1, s->umvplus); /* Unrestricted Motion Vector */
put_bits(&s->pb,1,0); /* SAC: off */
put_bits(&s->pb,1,0); /* Advanced Prediction Mode: off */
put_bits(&s->pb,1,0); /* Advanced Intra Coding: off */
@@ -116,14 +122,17 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
/* This should be here if PLUSPTYPE */
put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
- /* Custom Picture Format (CPFMT) */
+ if (format == 7) {
+ /* Custom Picture Format (CPFMT) */
- put_bits(&s->pb,4,2); /* Aspect ratio: CIF 12:11 (4:3) picture */
- put_bits(&s->pb,9,(s->width >> 2) - 1);
- put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
- put_bits(&s->pb,9,(s->height >> 2));
+ put_bits(&s->pb,4,2); /* Aspect ratio: CIF 12:11 (4:3) picture */
+ put_bits(&s->pb,9,(s->width >> 2) - 1);
+ put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
+ put_bits(&s->pb,9,(s->height >> 2));
+ }
+
/* Unlimited Unrestricted Motion Vectors Indicator (UUI) */
- if (umvplus)
+ if (s->umvplus)
put_bits(&s->pb,1,1); /* Limited according tables of Annex D */
put_bits(&s->pb, 5, s->qscale);
}
@@ -131,40 +140,82 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb, 1, 0); /* no PEI */
}
+int h263_encode_gob_header(MpegEncContext * s, int mb_line)
+{
+ int pdif=0;
+
+ /* Check to see if we need to put a new GBSC */
+ /* for RTP packetization */
+ if (s->rtp_mode) {
+ pdif = s->pb.buf_ptr - s->ptr_lastgob;
+ if (pdif >= s->rtp_payload_size) {
+ /* Bad luck, packet must be cut before */
+ align_put_bits(&s->pb);
+ s->ptr_lastgob = s->pb.buf_ptr;
+ put_bits(&s->pb, 17, 1); /* GBSC */
+ s->gob_number = mb_line;
+ put_bits(&s->pb, 5, s->gob_number); /* GN */
+ put_bits(&s->pb, 2, 1); /* GFID */
+ put_bits(&s->pb, 5, s->qscale); /* GQUANT */
+ return pdif;
+ } else if (pdif + s->mb_line_avgsize >= s->rtp_payload_size) {
+ /* Cut the packet before we can't */
+ align_put_bits(&s->pb);
+ s->ptr_lastgob = s->pb.buf_ptr;
+ put_bits(&s->pb, 17, 1); /* GBSC */
+ s->gob_number = mb_line;
+ put_bits(&s->pb, 5, s->gob_number); /* GN */
+ put_bits(&s->pb, 2, 1); /* GFID */
+ put_bits(&s->pb, 5, s->qscale); /* GQUANT */
+ return pdif;
+ }
+ }
+ return 0;
+}
+
void h263_encode_mb(MpegEncContext * s,
DCTELEM block[6][64],
int motion_x, int motion_y)
{
int cbpc, cbpy, i, cbp, pred_x, pred_y;
-
+
// printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
- if (!s->mb_intra) {
- /* compute cbp */
- cbp = 0;
- for (i = 0; i < 6; i++) {
- if (s->block_last_index[i] >= 0)
- cbp |= 1 << (5 - i);
- }
- if ((cbp | motion_x | motion_y) == 0) {
- /* skip macroblock */
- put_bits(&s->pb, 1, 1);
- return;
- }
- put_bits(&s->pb, 1, 0); /* mb coded */
- cbpc = cbp & 3;
- put_bits(&s->pb,
- inter_MCBPC_bits[cbpc],
- inter_MCBPC_code[cbpc]);
- cbpy = cbp >> 2;
- cbpy ^= 0xf;
- put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
-
- /* motion vectors: 16x16 mode only now */
- h263_pred_motion(s, 0, &pred_x, &pred_y);
-
- h263_encode_motion(s, motion_x - pred_x);
- h263_encode_motion(s, motion_y - pred_y);
- } else {
+ if (!s->mb_intra) {
+ /* compute cbp */
+ cbp = 0;
+ for (i = 0; i < 6; i++) {
+ if (s->block_last_index[i] >= 0)
+ cbp |= 1 << (5 - i);
+ }
+ if ((cbp | motion_x | motion_y) == 0) {
+ /* skip macroblock */
+ put_bits(&s->pb, 1, 1);
+ return;
+ }
+ put_bits(&s->pb, 1, 0); /* mb coded */
+ cbpc = cbp & 3;
+ put_bits(&s->pb,
+ inter_MCBPC_bits[cbpc],
+ inter_MCBPC_code[cbpc]);
+ cbpy = cbp >> 2;
+ cbpy ^= 0xf;
+ put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+
+ /* motion vectors: 16x16 mode only now */
+ h263_pred_motion(s, 0, &pred_x, &pred_y);
+
+ if (!s->umvplus) {
+ h263_encode_motion(s, motion_x - pred_x);
+ h263_encode_motion(s, motion_y - pred_y);
+ }
+ else {
+ h263p_encode_umotion(s, motion_x - pred_x);
+ h263p_encode_umotion(s, motion_y - pred_y);
+ if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1))
+ /* To prevent Start Code emulation */
+ put_bits(&s->pb,1,1);
+ }
+ } else {
/* compute cbp */
cbp = 0;
for (i = 0; i < 6; i++) {
@@ -234,7 +285,7 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block,
mot_val = s->motion_val[(x) + (y) * wrap];
/* special case for first line */
- if (y == 1 || s->first_slice_line) {
+ if (y == 1 || s->first_slice_line || s->first_gob_line) {
A = s->motion_val[(x-1) + (y) * wrap];
*px = A[0];
*py = A[1];
@@ -305,6 +356,45 @@ static void h263_encode_motion(MpegEncContext * s, int val)
}
}
+/* Encode MV differences on H.263+ with Unrestricted MV mode */
+static void h263p_encode_umotion(MpegEncContext * s, int val)
+{
+ short sval = 0;
+ short i = 0;
+ short n_bits = 0;
+ short temp_val;
+ int code = 0;
+ int tcode;
+
+ if ( val == 0)
+ put_bits(&s->pb, 1, 1);
+ else if (val == 1)
+ put_bits(&s->pb, 3, 0);
+ else if (val == -1)
+ put_bits(&s->pb, 3, 2);
+ else {
+
+ sval = ((val < 0) ? (short)(-val):(short)val);
+ temp_val = sval;
+
+ while (temp_val != 0) {
+ temp_val = temp_val >> 1;
+ n_bits++;
+ }
+
+ i = n_bits - 1;
+ while (i > 0) {
+ tcode = (sval & (1 << (i-1))) >> (i-1);
+ tcode = (tcode << 1) | 1;
+ code = (code << 2) | tcode;
+ i--;
+ }
+ code = ((code << 1) | (val < 0)) << 1;
+ put_bits(&s->pb, (2*n_bits)+1, code);
+ //printf("\nVal = %d\tCode = %d", sval, code);
+ }
+}
+
void h263_encode_init_vlc(MpegEncContext *s)
{
static int done = 0;
@@ -464,7 +554,7 @@ static int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *di
return pred;
}
-void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
+void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
int dir)
{
int x, y, wrap, i;
@@ -489,22 +579,22 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
/* left prediction */
ac_val -= 16;
for(i=1;i<8;i++) {
- block[i*8] += ac_val[i];
+ block[block_permute_op(i*8)] += ac_val[i];
}
} else {
/* top prediction */
ac_val -= 16 * wrap;
for(i=1;i<8;i++) {
- block[i] += ac_val[i + 8];
+ block[block_permute_op(i)] += ac_val[i + 8];
}
}
}
/* left copy */
for(i=1;i<8;i++)
- ac_val1[i] = block[i * 8];
+ ac_val1[i] = block[block_permute_op(i * 8)];
/* top copy */
for(i=1;i<8;i++)
- ac_val1[8 + i] = block[i];
+ ac_val1[8 + i] = block[block_permute_op(i)];
}
static inline void mpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr)
@@ -693,7 +783,7 @@ void h263_decode_init_vlc(MpegEncContext *s)
init_vlc(&intra_MCBPC_vlc, 6, 8,
intra_MCBPC_bits, 1, 1,
intra_MCBPC_code, 1, 1);
- init_vlc(&inter_MCBPC_vlc, 9, 20,
+ init_vlc(&inter_MCBPC_vlc, 9, 25,
inter_MCBPC_bits, 1, 1,
inter_MCBPC_code, 1, 1);
init_vlc(&cbpy_vlc, 6, 16,
@@ -715,13 +805,38 @@ void h263_decode_init_vlc(MpegEncContext *s)
}
}
+int h263_decode_gob_header(MpegEncContext *s)
+{
+ unsigned int val, gfid;
+
+ /* Check for GOB Start Code */
+ val = show_bits(&s->gb, 16);
+ if (val == 0) {
+ /* We have a GBSC probably with GSTUFF */
+ skip_bits(&s->gb, 16); /* Drop the zeros */
+ while (get_bits1(&s->gb) == 0); /* Seek the '1' bit */
+#ifdef DEBUG
+ fprintf(stderr,"\nGOB Start Code at MB %d\n", (s->mb_y * s->mb_width) + s->mb_x);
+#endif
+ s->gob_number = get_bits(&s->gb, 5); /* GN */
+ gfid = get_bits(&s->gb, 2); /* GFID */
+ s->qscale = get_bits(&s->gb, 5); /* GQUANT */
+#ifdef DEBUG
+ fprintf(stderr, "\nGN: %u GFID: %u Quant: %u\n", gn, gfid, s->qscale);
+#endif
+ return 1;
+ }
+ return 0;
+
+}
+
int h263_decode_mb(MpegEncContext *s,
DCTELEM block[6][64])
{
int cbpc, cbpy, i, cbp, pred_x, pred_y, mx, my, dquant;
INT16 *mot_val;
static INT8 quant_tab[4] = { -1, -2, 1, 2 };
-
+
if (s->pict_type == P_TYPE) {
if (get_bits1(&s->gb)) {
/* skip mb */
@@ -736,8 +851,14 @@ int h263_decode_mb(MpegEncContext *s,
return 0;
}
cbpc = get_vlc(&s->gb, &inter_MCBPC_vlc);
+ //fprintf(stderr, "\tCBPC: %d", cbpc);
if (cbpc < 0)
return -1;
+ if (cbpc > 20)
+ cbpc+=3;
+ else if (cbpc == 20)
+ fprintf(stderr, "Stuffing !");
+
dquant = cbpc & 8;
s->mb_intra = ((cbpc & 4) != 0);
} else {
@@ -763,33 +884,55 @@ int h263_decode_mb(MpegEncContext *s,
/* 16x16 motion prediction */
s->mv_type = MV_TYPE_16X16;
h263_pred_motion(s, 0, &pred_x, &pred_y);
- mx = h263_decode_motion(s, pred_x);
+ if (s->umvplus_dec)
+ mx = h263p_decode_umotion(s, pred_x);
+ else
+ mx = h263_decode_motion(s, pred_x);
if (mx >= 0xffff)
return -1;
- my = h263_decode_motion(s, pred_y);
+
+ if (s->umvplus_dec)
+ my = h263p_decode_umotion(s, pred_y);
+ else
+ my = h263_decode_motion(s, pred_y);
if (my >= 0xffff)
return -1;
s->mv[0][0][0] = mx;
s->mv[0][0][1] = my;
+ /*fprintf(stderr, "\n MB %d", (s->mb_y * s->mb_width) + s->mb_x);
+ fprintf(stderr, "\n\tmvx: %d\t\tpredx: %d", mx, pred_x);
+ fprintf(stderr, "\n\tmvy: %d\t\tpredy: %d", my, pred_y);*/
+ if (s->umvplus_dec && (mx - pred_x) == 1 && (my - pred_y) == 1)
+ skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */
+
} else {
s->mv_type = MV_TYPE_8X8;
for(i=0;i<4;i++) {
mot_val = h263_pred_motion(s, i, &pred_x, &pred_y);
- mx = h263_decode_motion(s, pred_x);
+ if (s->umvplus_dec)
+ mx = h263p_decode_umotion(s, pred_x);
+ else
+ mx = h263_decode_motion(s, pred_x);
if (mx >= 0xffff)
return -1;
- my = h263_decode_motion(s, pred_y);
+
+ if (s->umvplus_dec)
+ my = h263p_decode_umotion(s, pred_y);
+ else
+ my = h263_decode_motion(s, pred_y);
if (my >= 0xffff)
return -1;
s->mv[0][i][0] = mx;
s->mv[0][i][1] = my;
+ if (s->umvplus_dec && (mx - pred_x) == 1 && (my - pred_y) == 1)
+ skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */
mot_val[0] = mx;
mot_val[1] = my;
}
}
} else {
s->ac_pred = 0;
- if (s->h263_pred) {
+ if (s->h263_pred) {
s->ac_pred = get_bits1(&s->gb);
}
cbpy = get_vlc(&s->gb, &cbpy_vlc);
@@ -853,10 +996,37 @@ static int h263_decode_motion(MpegEncContext * s, int pred)
val += 64;
if (pred > 32 && val > 63)
val -= 64;
+
}
return val;
}
+/* Decodes RVLC of H.263+ UMV */
+static int h263p_decode_umotion(MpegEncContext * s, int pred)
+{
+ int code = 0, sign;
+
+ if (get_bits1(&s->gb)) /* Motion difference = 0 */
+ return pred;
+
+ code = 2 + get_bits1(&s->gb);
+
+ while (get_bits1(&s->gb))
+ {
+ code <<= 1;
+ code += get_bits1(&s->gb);
+ }
+ sign = code & 1;
+ code >>= 1;
+
+ code = (sign) ? (pred - code) : (pred + code);
+#ifdef DEBUG
+ fprintf(stderr,"H.263+ UMV Motion = %d\n", code);
+#endif
+ return code;
+
+}
+
static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
int n, int coded)
{
@@ -1081,16 +1251,21 @@ int h263_decode_picture_header(MpegEncContext *s)
skip_bits1(&s->gb); /* camera off */
skip_bits1(&s->gb); /* freeze picture release off */
+ /* Reset GOB number */
+ s->gob_number = 0;
+
format = get_bits(&s->gb, 3);
- if (format != 7) {
+ if (format != 7 && format != 6) {
s->h263_plus = 0;
/* H.263v1 */
width = h263_format[format][0];
height = h263_format[format][1];
if (!width)
return -1;
-
+
+ s->width = width;
+ s->height = height;
s->pict_type = I_TYPE + get_bits1(&s->gb);
s->unrestricted_mv = get_bits1(&s->gb);
@@ -1098,33 +1273,68 @@ int h263_decode_picture_header(MpegEncContext *s)
if (get_bits1(&s->gb) != 0)
return -1; /* SAC: off */
- if (get_bits1(&s->gb) != 0)
- return -1; /* advanced prediction mode: off */
+ if (get_bits1(&s->gb) != 0) {
+ s->mv_type = MV_TYPE_8X8; /* Advanced prediction mode */
+ }
+
if (get_bits1(&s->gb) != 0)
return -1; /* not PB frame */
s->qscale = get_bits(&s->gb, 5);
skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
} else {
- s->h263_plus = 1;
+ int ufep;
+
/* H.263v2 */
- if (get_bits(&s->gb, 3) != 1)
- return -1;
- if (get_bits(&s->gb, 3) != 6) /* custom source format */
+ s->h263_plus = 1;
+ ufep = get_bits(&s->gb, 3); /* Update Full Extended PTYPE */
+
+ if (ufep == 1) {
+ /* OPPTYPE */
+ format = get_bits(&s->gb, 3);
+ skip_bits(&s->gb,1); /* Custom PCF */
+ s->umvplus_dec = get_bits(&s->gb, 1); /* Unrestricted Motion Vector */
+ skip_bits1(&s->gb); /* Syntax-based Arithmetic Coding (SAC) */
+ if (get_bits1(&s->gb) != 0) {
+ s->mv_type = MV_TYPE_8X8; /* Advanced prediction mode */
+ }
+ skip_bits(&s->gb, 8);
+ skip_bits(&s->gb, 3); /* Reserved */
+ } else if (ufep != 0)
return -1;
- skip_bits(&s->gb, 12);
- skip_bits(&s->gb, 3);
+
+ /* MPPTYPE */
s->pict_type = get_bits(&s->gb, 3) + 1;
if (s->pict_type != I_TYPE &&
s->pict_type != P_TYPE)
return -1;
skip_bits(&s->gb, 7);
- skip_bits(&s->gb, 4); /* aspect ratio */
- width = (get_bits(&s->gb, 9) + 1) * 4;
- skip_bits1(&s->gb);
- height = get_bits(&s->gb, 9) * 4;
- if (height == 0)
- return -1;
+
+ /* Get the picture dimensions */
+ if (ufep) {
+ if (format == 6) {
+ /* Custom Picture Format (CPFMT) */
+ skip_bits(&s->gb, 4); /* aspect ratio */
+ width = (get_bits(&s->gb, 9) + 1) * 4;
+ skip_bits1(&s->gb);
+ height = get_bits(&s->gb, 9) * 4;
+#ifdef DEBUG
+ fprintf(stderr,"\nH.263+ Custom picture: %dx%d\n",width,height);
+#endif
+ }
+ else {
+ width = h263_format[format][0];
+ height = h263_format[format][1];
+ }
+ if ((width == 0) || (height == 0))
+ return -1;
+ s->width = width;
+ s->height = height;
+ if (s->umvplus_dec) {
+ skip_bits1(&s->gb); /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */
+ }
+ }
+
s->qscale = get_bits(&s->gb, 5);
}
/* PEI */
@@ -1132,8 +1342,6 @@ int h263_decode_picture_header(MpegEncContext *s)
skip_bits(&s->gb, 8);
}
s->f_code = 1;
- s->width = width;
- s->height = height;
return 0;
}
@@ -1335,3 +1543,4 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
s->f_code = 1;
return 0;
}
+
diff --git a/src/libffmpeg/libavcodec/h263data.h b/src/libffmpeg/libavcodec/h263data.h
index 4fd9d3629..d41996a3b 100644
--- a/src/libffmpeg/libavcodec/h263data.h
+++ b/src/libffmpeg/libavcodec/h263data.h
@@ -4,6 +4,27 @@ static const UINT8 intra_MCBPC_code[8] = { 1, 1, 2, 3, 1, 1, 2, 3 };
static const UINT8 intra_MCBPC_bits[8] = { 1, 3, 3, 3, 4, 6, 6, 6 };
/* inter MCBPC, mb_type = (inter), (intra), (interq), (intraq), (inter4v) */
+/* Changed the tables for interq and inter4v+q, following the standard ** Juanjo ** */
+static const UINT8 inter_MCBPC_code[25] = {
+ 1, 3, 2, 5,
+ 3, 4, 3, 3,
+ 3, 7, 6, 5,
+ 4, 4, 3, 2,
+ 2, 5, 4, 5,
+ 1, /* Stuffing */
+ 2, 12, 14, 15,
+};
+static const UINT8 inter_MCBPC_bits[25] = {
+ 1, 4, 4, 6,
+ 5, 8, 8, 7,
+ 3, 7, 7, 9,
+ 6, 9, 9, 9,
+ 3, 7, 7, 8,
+ 9, /* Stuffing */
+ 11, 13, 13, 13,
+};
+
+/* This is the old table
static const UINT8 inter_MCBPC_code[20] = {
1, 3, 2, 5,
3, 4, 3, 3,
@@ -17,7 +38,7 @@ static const UINT8 inter_MCBPC_bits[20] = {
12, 12, 12, 12,
6, 9, 9, 9,
3, 7, 7, 8,
-};
+};*/
static const UINT8 cbpy_tab[16][2] =
{
diff --git a/src/libffmpeg/libavcodec/h263dec.c b/src/libffmpeg/libavcodec/h263dec.c
index ec14c09e2..be76e3030 100644
--- a/src/libffmpeg/libavcodec/h263dec.c
+++ b/src/libffmpeg/libavcodec/h263dec.c
@@ -23,7 +23,7 @@
#include "avcodec.h"
#include "mpegvideo.h"
-#undef DEBUG
+//#define DEBUG
static int h263_decode_init(AVCodecContext *avctx)
{
@@ -39,6 +39,8 @@ static int h263_decode_init(AVCodecContext *avctx)
/* select sub codec */
switch(avctx->codec->id) {
case CODEC_ID_H263:
+ s->gob_number = 0;
+ s->first_gob_line = 0;
break;
case CODEC_ID_MPEG4:
s->time_increment_bits = 4; /* default value for broken headers */
@@ -56,8 +58,9 @@ static int h263_decode_init(AVCodecContext *avctx)
}
/* for h263, we allocate the images after having read the header */
- if (MPV_common_init(s) < 0)
- return -1;
+ if (avctx->codec->id != CODEC_ID_H263)
+ if (MPV_common_init(s) < 0)
+ return -1;
/* XXX: suppress this matrix init, only needed because using mpeg1
dequantize in mmx case */
@@ -92,7 +95,7 @@ static int h263_decode_frame(AVCodecContext *avctx,
printf("*****frame %d size=%d\n", avctx->frame_number, buf_size);
printf("bytes=%x %x %x %x\n", buf[0], buf[1], buf[2], buf[3]);
#endif
-
+
/* no supplementary picture */
if (buf_size == 0) {
*data_size = 0;
@@ -110,18 +113,25 @@ static int h263_decode_frame(AVCodecContext *avctx,
ret = intel_h263_decode_picture_header(s);
} else {
ret = h263_decode_picture_header(s);
+ /* After H263 header decode we have the height, width, */
+ /* and other parameters. So then we could init the picture */
+ /* FIXME: By the way H263 decoder is evolving it should have */
+ /* an H263EncContext */
+ if (!s->context_initialized) {
+ avctx->width = s->width;
+ avctx->height = s->height;
+ if (MPV_common_init(s) < 0)
+ return -1;
+ } else if (s->width != avctx->width || s->height != avctx->height) {
+ /* H.263 could change picture size any time */
+ MPV_common_end(s);
+ if (MPV_common_init(s) < 0)
+ return -1;
+ }
}
if (ret < 0)
return -1;
- /* make sure we start with an I-frame */
- if (s->waiting_for_keyframe) {
- if (s->pict_type != I_TYPE)
- return -1;
- else
- s->waiting_for_keyframe = 0;
- }
-
MPV_frame_start(s);
#ifdef DEBUG
@@ -130,10 +140,17 @@ static int h263_decode_frame(AVCodecContext *avctx,
/* decode each macroblock */
for(s->mb_y=0; s->mb_y < s->mb_height; s->mb_y++) {
+ /* Check for GOB headers on H.263 */
+ /* FIXME: In the future H.263+ will have intra prediction */
+ /* and we are gonna need another way to detect MPEG4 */
+ if (s->mb_y && !s->h263_pred) {
+ s->first_gob_line = h263_decode_gob_header(s);
+ }
for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
#ifdef DEBUG
printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
#endif
+ //fprintf(stderr,"\nFrame: %d\tMB: %d",avctx->frame_number, (s->mb_y * s->mb_width) + s->mb_x);
/* DCT & quantize */
if (s->h263_msmpeg4) {
msmpeg4_dc_scale(s);
@@ -152,8 +169,10 @@ static int h263_decode_frame(AVCodecContext *avctx,
if (msmpeg4_decode_mb(s, s->block) < 0)
return -1;
} else {
- if (h263_decode_mb(s, s->block) < 0)
+ if (h263_decode_mb(s, s->block) < 0) {
+ fprintf(stderr,"\nError at MB: %d\n", (s->mb_y * s->mb_width) + s->mb_x);
return -1;
+ }
}
MPV_decode_mb(s, s->block);
}
diff --git a/src/libffmpeg/libavcodec/idct_mlib.c b/src/libffmpeg/libavcodec/idct_mlib.c
deleted file mode 100644
index 63421273f..000000000
--- a/src/libffmpeg/libavcodec/idct_mlib.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Sun mediaLib optimized DSP utils
- * Copyright (c) 2001 Juergen Keil.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-#include "dsputil.h"
-
-#include <mlib_types.h>
-#include <mlib_status.h>
-#include <mlib_sys.h>
-#include <mlib_video.h>
-
-
-void ff_idct_mlib(DCTELEM *data)
-{
- mlib_VideoIDCT8x8_S16_S16 (data, data);
-}
-
-
-void ff_fdct_mlib(DCTELEM *data)
-{
- mlib_VideoDCT8x8_S16_S16 (data, data);
-}
diff --git a/src/libffmpeg/libavcodec/idct_mmx.c b/src/libffmpeg/libavcodec/idct_mmx.c
deleted file mode 100644
index 7141347ec..000000000
--- a/src/libffmpeg/libavcodec/idct_mmx.c
+++ /dev/null
@@ -1,597 +0,0 @@
-/*
- * Note: For libavcodec, this code can also be used under the LGPL license
- */
-/*
- * idct_mmx.c
- * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <inttypes.h>
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include "xineutils.h"
-
-#ifdef ATTR_ALIGN
-#undef ATTR_ALIGN
-#endif
-#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))
-
-#define ROW_SHIFT 11
-#define COL_SHIFT 6
-
-#define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT)))
-#define rounder(bias) {round (bias), round (bias)}
-
-#if 0
-/* C row IDCT - its just here to document the MMXEXT and MMX versions */
-static inline void idct_row (int16_t * row, int offset,
- int16_t * table, int32_t * rounder)
-{
- int C1, C2, C3, C4, C5, C6, C7;
- int a0, a1, a2, a3, b0, b1, b2, b3;
-
- row += offset;
-
- C1 = table[1];
- C2 = table[2];
- C3 = table[3];
- C4 = table[4];
- C5 = table[5];
- C6 = table[6];
- C7 = table[7];
-
- a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder;
- a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder;
- a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder;
- a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder;
-
- b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
- b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
- b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
- b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
-
- row[0] = (a0 + b0) >> ROW_SHIFT;
- row[1] = (a1 + b1) >> ROW_SHIFT;
- row[2] = (a2 + b2) >> ROW_SHIFT;
- row[3] = (a3 + b3) >> ROW_SHIFT;
- row[4] = (a3 - b3) >> ROW_SHIFT;
- row[5] = (a2 - b2) >> ROW_SHIFT;
- row[6] = (a1 - b1) >> ROW_SHIFT;
- row[7] = (a0 - b0) >> ROW_SHIFT;
-}
-#endif
-
-
-/* MMXEXT row IDCT */
-
-#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
- c4, c6, c4, c6, \
- c1, c3, -c1, -c5, \
- c5, c7, c3, -c7, \
- c4, -c6, c4, -c6, \
- -c4, c2, c4, -c2, \
- c5, -c1, c3, -c1, \
- c7, c3, c7, -c5 }
-
-static inline void mmxext_row_head (int16_t * row, int offset, int16_t * table)
-{
- movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
-
- movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
- movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
-
- movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
- movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
-
- movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
- pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
-
- pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
-}
-
-static inline void mmxext_row (int16_t * table, int32_t * rounder)
-{
- movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1
- pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6
-
- pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2
- pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5
-
- movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5
- pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3
-
- paddd_m2r (*rounder, mm3); // mm3 += rounder
- pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7
-
- pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6
- paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
-
- pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3
- movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
-
- pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7
- paddd_r2r (mm7, mm1); // mm1 = b1 b0
-
- paddd_m2r (*rounder, mm0); // mm0 += rounder
- psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
-
- psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
- paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
-
- paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
- psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
-
- paddd_r2r (mm6, mm5); // mm5 = b3 b2
- movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder
-
- paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
- psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder
-}
-
-static inline void mmxext_row_tail (int16_t * row, int store)
-{
- psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
-
- psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
-
- packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
-
- packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
-
- movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
- pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
-
- /* slot */
-
- movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
-}
-
-static inline void mmxext_row_mid (int16_t * row, int store,
- int offset, int16_t * table)
-{
- movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
- psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
-
- movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
- psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
-
- packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
- movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
-
- packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
- movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
-
- movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
- pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
-
- movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
- movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
-
- pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
-
- movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
- pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
-}
-
-
-/* MMX row IDCT */
-
-#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
- c4, c6, -c4, -c2, \
- c1, c3, c3, -c7, \
- c5, c7, -c1, -c5, \
- c4, -c6, c4, -c2, \
- -c4, c2, c4, -c6, \
- c5, -c1, c7, -c5, \
- c7, c3, c3, -c1 }
-
-static inline void mmx_row_head (int16_t * row, int offset, int16_t * table)
-{
- movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
-
- movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
- movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
-
- movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
- movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
-
- punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
-
- movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
- pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
-
- movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
- punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
-}
-
-static inline void mmx_row (int16_t * table, int32_t * rounder)
-{
- pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
- punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1
-
- pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2
- punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5
-
- movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5
- pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3
-
- paddd_m2r (*rounder, mm3); // mm3 += rounder
- pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7
-
- pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6
- paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
-
- pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3
- movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
-
- pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7
- paddd_r2r (mm7, mm1); // mm1 = b1 b0
-
- paddd_m2r (*rounder, mm0); // mm0 += rounder
- psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
-
- psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
- paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
-
- paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
- psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
-
- paddd_r2r (mm6, mm5); // mm5 = b3 b2
- movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder
-
- paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
- psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder
-}
-
-static inline void mmx_row_tail (int16_t * row, int store)
-{
- psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
-
- psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
-
- packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
-
- packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
-
- movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
- movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5
-
- pslld_i2r (16, mm7); // mm7 = y7 0 y5 0
-
- psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4
-
- por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4
-
- /* slot */
-
- movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
-}
-
-static inline void mmx_row_mid (int16_t * row, int store,
- int offset, int16_t * table)
-{
- movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
- psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
-
- movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
- psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
-
- packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
- movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
-
- packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
- movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
-
- movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
- movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5
-
- punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
- psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4
-
- movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
- pslld_i2r (16, mm1); // mm1 = y7 0 y5 0
-
- movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
- por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4
-
- movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
- punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
-
- movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
- pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
-}
-
-
-#if 0
-// C column IDCT - its just here to document the MMXEXT and MMX versions
-static inline void idct_col (int16_t * col, int offset)
-{
-/* multiplication - as implemented on mmx */
-#define F(c,x) (((c) * (x)) >> 16)
-
-/* saturation - it helps us handle torture test cases */
-#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x))
-
- int16_t x0, x1, x2, x3, x4, x5, x6, x7;
- int16_t y0, y1, y2, y3, y4, y5, y6, y7;
- int16_t a0, a1, a2, a3, b0, b1, b2, b3;
- int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12;
-
- col += offset;
-
- x0 = col[0*8];
- x1 = col[1*8];
- x2 = col[2*8];
- x3 = col[3*8];
- x4 = col[4*8];
- x5 = col[5*8];
- x6 = col[6*8];
- x7 = col[7*8];
-
- u04 = S (x0 + x4);
- v04 = S (x0 - x4);
- u26 = S (F (T2, x6) + x2);
- v26 = S (F (T2, x2) - x6);
-
- a0 = S (u04 + u26);
- a1 = S (v04 + v26);
- a2 = S (v04 - v26);
- a3 = S (u04 - u26);
-
- u17 = S (F (T1, x7) + x1);
- v17 = S (F (T1, x1) - x7);
- u35 = S (F (T3, x5) + x3);
- v35 = S (F (T3, x3) - x5);
-
- b0 = S (u17 + u35);
- b3 = S (v17 - v35);
- u12 = S (u17 - u35);
- v12 = S (v17 + v35);
- u12 = S (2 * F (C4, u12));
- v12 = S (2 * F (C4, v12));
- b1 = S (u12 + v12);
- b2 = S (u12 - v12);
-
- y0 = S (a0 + b0) >> COL_SHIFT;
- y1 = S (a1 + b1) >> COL_SHIFT;
- y2 = S (a2 + b2) >> COL_SHIFT;
- y3 = S (a3 + b3) >> COL_SHIFT;
-
- y4 = S (a3 - b3) >> COL_SHIFT;
- y5 = S (a2 - b2) >> COL_SHIFT;
- y6 = S (a1 - b1) >> COL_SHIFT;
- y7 = S (a0 - b0) >> COL_SHIFT;
-
- col[0*8] = y0;
- col[1*8] = y1;
- col[2*8] = y2;
- col[3*8] = y3;
- col[4*8] = y4;
- col[5*8] = y5;
- col[6*8] = y6;
- col[7*8] = y7;
-}
-#endif
-
-
-// MMX column IDCT
-static inline void idct_col (int16_t * col, int offset)
-{
-#define T1 13036
-#define T2 27146
-#define T3 43790
-#define C4 23170
-
- static short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
- static short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
- static short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
- static short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
-
- /* column code adapted from peter gubanov */
- /* http://www.elecard.com/peter/idct.shtml */
-
- movq_m2r (*_T1, mm0); // mm0 = T1
-
- movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1
- movq_r2r (mm0, mm2); // mm2 = T1
-
- movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7
- pmulhw_r2r (mm1, mm0); // mm0 = T1*x1
-
- movq_m2r (*_T3, mm5); // mm5 = T3
- pmulhw_r2r (mm4, mm2); // mm2 = T1*x7
-
- movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5
- movq_r2r (mm5, mm7); // mm7 = T3-1
-
- movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3
- psubsw_r2r (mm4, mm0); // mm0 = v17
-
- movq_m2r (*_T2, mm4); // mm4 = T2
- pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3
-
- paddsw_r2r (mm2, mm1); // mm1 = u17
- pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5
-
- /* slot */
-
- movq_r2r (mm4, mm2); // mm2 = T2
- paddsw_r2r (mm3, mm5); // mm5 = T3*x3
-
- pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2
- paddsw_r2r (mm6, mm7); // mm7 = T3*x5
-
- psubsw_r2r (mm6, mm5); // mm5 = v35
- paddsw_r2r (mm3, mm7); // mm7 = u35
-
- movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6
- movq_r2r (mm0, mm6); // mm6 = v17
-
- pmulhw_r2r (mm3, mm2); // mm2 = T2*x6
- psubsw_r2r (mm5, mm0); // mm0 = b3
-
- psubsw_r2r (mm3, mm4); // mm4 = v26
- paddsw_r2r (mm6, mm5); // mm5 = v12
-
- movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0
- movq_r2r (mm1, mm6); // mm6 = u17
-
- paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26
- paddsw_r2r (mm7, mm6); // mm6 = b0
-
- psubsw_r2r (mm7, mm1); // mm1 = u12
- movq_r2r (mm1, mm7); // mm7 = u12
-
- movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0
- paddsw_r2r (mm5, mm1); // mm1 = u12+v12
-
- movq_m2r (*_C4, mm0); // mm0 = C4/2
- psubsw_r2r (mm5, mm7); // mm7 = u12-v12
-
- movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1
- pmulhw_r2r (mm0, mm1); // mm1 = b1/2
-
- movq_r2r (mm4, mm6); // mm6 = v26
- pmulhw_r2r (mm0, mm7); // mm7 = b2/2
-
- movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4
- movq_r2r (mm3, mm0); // mm0 = x0
-
- psubsw_r2r (mm5, mm3); // mm3 = v04
- paddsw_r2r (mm5, mm0); // mm0 = u04
-
- paddsw_r2r (mm3, mm4); // mm4 = a1
- movq_r2r (mm0, mm5); // mm5 = u04
-
- psubsw_r2r (mm6, mm3); // mm3 = a2
- paddsw_r2r (mm2, mm5); // mm5 = a0
-
- paddsw_r2r (mm1, mm1); // mm1 = b1
- psubsw_r2r (mm2, mm0); // mm0 = a3
-
- paddsw_r2r (mm7, mm7); // mm7 = b2
- movq_r2r (mm3, mm2); // mm2 = a2
-
- movq_r2r (mm4, mm6); // mm6 = a1
- paddsw_r2r (mm7, mm3); // mm3 = a2+b2
-
- psraw_i2r (COL_SHIFT, mm3); // mm3 = y2
- paddsw_r2r (mm1, mm4); // mm4 = a1+b1
-
- psraw_i2r (COL_SHIFT, mm4); // mm4 = y1
- psubsw_r2r (mm1, mm6); // mm6 = a1-b1
-
- movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0
- psubsw_r2r (mm7, mm2); // mm2 = a2-b2
-
- psraw_i2r (COL_SHIFT, mm6); // mm6 = y6
- movq_r2r (mm5, mm7); // mm7 = a0
-
- movq_r2m (mm4, *(col+offset+1*8)); // save y1
- psraw_i2r (COL_SHIFT, mm2); // mm2 = y5
-
- movq_r2m (mm3, *(col+offset+2*8)); // save y2
- paddsw_r2r (mm1, mm5); // mm5 = a0+b0
-
- movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3
- psubsw_r2r (mm1, mm7); // mm7 = a0-b0
-
- psraw_i2r (COL_SHIFT, mm5); // mm5 = y0
- movq_r2r (mm0, mm3); // mm3 = a3
-
- movq_r2m (mm2, *(col+offset+5*8)); // save y5
- psubsw_r2r (mm4, mm3); // mm3 = a3-b3
-
- psraw_i2r (COL_SHIFT, mm7); // mm7 = y7
- paddsw_r2r (mm0, mm4); // mm4 = a3+b3
-
- movq_r2m (mm5, *(col+offset+0*8)); // save y0
- psraw_i2r (COL_SHIFT, mm3); // mm3 = y4
-
- movq_r2m (mm6, *(col+offset+6*8)); // save y6
- psraw_i2r (COL_SHIFT, mm4); // mm4 = y3
-
- movq_r2m (mm7, *(col+offset+7*8)); // save y7
-
- movq_r2m (mm3, *(col+offset+4*8)); // save y4
-
- movq_r2m (mm4, *(col+offset+3*8)); // save y3
-}
-
-
-static int32_t rounder0[] ATTR_ALIGN(8) =
- rounder ((1 << (COL_SHIFT - 1)) - 0.5);
-static int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
-static int32_t rounder1[] ATTR_ALIGN(8) =
- rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
-static int32_t rounder7[] ATTR_ALIGN(8) =
- rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
-static int32_t rounder2[] ATTR_ALIGN(8) =
- rounder (0.60355339059); /* C2 * (C6+C2)/2 */
-static int32_t rounder6[] ATTR_ALIGN(8) =
- rounder (-0.25); /* C2 * (C6-C2)/2 */
-static int32_t rounder3[] ATTR_ALIGN(8) =
- rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
-static int32_t rounder5[] ATTR_ALIGN(8) =
- rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
-
-
-#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
-void idct (int16_t * block) \
-{ \
- static int16_t table04[] ATTR_ALIGN(16) = \
- table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
- static int16_t table17[] ATTR_ALIGN(16) = \
- table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
- static int16_t table26[] ATTR_ALIGN(16) = \
- table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
- static int16_t table35[] ATTR_ALIGN(16) = \
- table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
- \
- idct_row_head (block, 0*8, table04); \
- idct_row (table04, rounder0); \
- idct_row_mid (block, 0*8, 4*8, table04); \
- idct_row (table04, rounder4); \
- idct_row_mid (block, 4*8, 1*8, table17); \
- idct_row (table17, rounder1); \
- idct_row_mid (block, 1*8, 7*8, table17); \
- idct_row (table17, rounder7); \
- idct_row_mid (block, 7*8, 2*8, table26); \
- idct_row (table26, rounder2); \
- idct_row_mid (block, 2*8, 6*8, table26); \
- idct_row (table26, rounder6); \
- idct_row_mid (block, 6*8, 3*8, table35); \
- idct_row (table35, rounder3); \
- idct_row_mid (block, 3*8, 5*8, table35); \
- idct_row (table35, rounder5); \
- idct_row_tail (block, 5*8); \
- \
- idct_col (block, 0); \
- idct_col (block, 4); \
-}
-
-
-declare_idct (ff_mmxext_idct, mmxext_table,
- mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
-
-declare_idct (ff_mmx_idct, mmx_table,
- mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
diff --git a/src/libffmpeg/libavcodec/imgconvert.c b/src/libffmpeg/libavcodec/imgconvert.c
new file mode 100644
index 000000000..d39b6c1e9
--- /dev/null
+++ b/src/libffmpeg/libavcodec/imgconvert.c
@@ -0,0 +1,548 @@
+/*
+ * Misc image convertion routines
+ * Copyright (c) 2001 Gerard Lantau.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include "avcodec.h"
+#include "dsputil.h"
+
+#ifdef USE_FASTMEMCPY
+#include "fastmemcpy.h"
+#endif
+/* XXX: totally non optimized */
+
+static void yuv422_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
+ UINT8 *src, int width, int height)
+{
+ int x, y;
+ UINT8 *p = src;
+
+ for(y=0;y<height;y+=2) {
+ for(x=0;x<width;x+=2) {
+ lum[0] = p[0];
+ cb[0] = p[1];
+ lum[1] = p[2];
+ cr[0] = p[3];
+ p += 4;
+ lum += 2;
+ cb++;
+ cr++;
+ }
+ for(x=0;x<width;x+=2) {
+ lum[0] = p[0];
+ lum[1] = p[2];
+ p += 4;
+ lum += 2;
+ }
+ }
+}
+
+#define SCALEBITS 8
+#define ONE_HALF (1 << (SCALEBITS - 1))
+#define FIX(x) ((int) ((x) * (1L<<SCALEBITS) + 0.5))
+
+static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
+ UINT8 *src, int width, int height)
+{
+ int wrap, wrap3, x, y;
+ int r, g, b, r1, g1, b1;
+ UINT8 *p;
+
+ wrap = width;
+ wrap3 = width * 3;
+ p = src;
+ for(y=0;y<height;y+=2) {
+ for(x=0;x<width;x+=2) {
+ r = p[0];
+ g = p[1];
+ b = p[2];
+ r1 = r;
+ g1 = g;
+ b1 = b;
+ lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
+ FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
+ r = p[3];
+ g = p[4];
+ b = p[5];
+ r1 += r;
+ g1 += g;
+ b1 += b;
+ lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
+ FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
+ p += wrap3;
+ lum += wrap;
+
+ r = p[0];
+ g = p[1];
+ b = p[2];
+ r1 += r;
+ g1 += g;
+ b1 += b;
+ lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
+ FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
+ r = p[3];
+ g = p[4];
+ b = p[5];
+ r1 += r;
+ g1 += g;
+ b1 += b;
+ lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
+ FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
+
+ cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
+ FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
+ cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
+ FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
+
+ cb++;
+ cr++;
+ p += -wrap3 + 2 * 3;
+ lum += -wrap + 2;
+ }
+ p += wrap3;
+ lum += wrap;
+ }
+}
+
+static void bgr24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
+ UINT8 *src, int width, int height)
+{
+ int wrap, wrap3, x, y;
+ int r, g, b, r1, g1, b1;
+ UINT8 *p;
+
+ wrap = width;
+ wrap3 = width * 3;
+ p = src;
+ for(y=0;y<height;y+=2) {
+ for(x=0;x<width;x+=2) {
+ b = p[0];
+ g = p[1];
+ r = p[2];
+ r1 = r;
+ g1 = g;
+ b1 = b;
+ lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
+ FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
+ b = p[3];
+ g = p[4];
+ r = p[5];
+ r1 += r;
+ g1 += g;
+ b1 += b;
+ lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
+ FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
+ p += wrap3;
+ lum += wrap;
+
+ b = p[0];
+ g = p[1];
+ r = p[2];
+ r1 += r;
+ g1 += g;
+ b1 += b;
+ lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
+ FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
+ b = p[3];
+ g = p[4];
+ r = p[5];
+ r1 += r;
+ g1 += g;
+ b1 += b;
+ lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
+ FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
+
+ cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
+ FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
+ cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
+ FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
+
+ cb++;
+ cr++;
+ p += -wrap3 + 2 * 3;
+ lum += -wrap + 2;
+ }
+ p += wrap3;
+ lum += wrap;
+ }
+}
+
+/* XXX: use generic filter ? */
+/* 1x2 -> 1x1 */
+static void shrink2(UINT8 *dst, int dst_wrap,
+ UINT8 *src, int src_wrap,
+ int width, int height)
+{
+ int w;
+ UINT8 *s1, *s2, *d;
+
+ for(;height > 0; height--) {
+ s1 = src;
+ s2 = s1 + src_wrap;
+ d = dst;
+ for(w = width;w >= 4; w-=4) {
+ d[0] = (s1[0] + s2[0]) >> 1;
+ d[1] = (s1[1] + s2[1]) >> 1;
+ d[2] = (s1[2] + s2[2]) >> 1;
+ d[3] = (s1[3] + s2[3]) >> 1;
+ s1 += 4;
+ s2 += 4;
+ d += 4;
+ }
+ for(;w > 0; w--) {
+ d[0] = (s1[0] + s2[0]) >> 1;
+ s1++;
+ s2++;
+ d++;
+ }
+ src += 2 * src_wrap;
+ dst += dst_wrap;
+ }
+}
+
+/* 2x2 -> 1x1 */
+static void shrink22(UINT8 *dst, int dst_wrap,
+ UINT8 *src, int src_wrap,
+ int width, int height)
+{
+ int w;
+ UINT8 *s1, *s2, *d;
+
+ for(;height > 0; height--) {
+ s1 = src;
+ s2 = s1 + src_wrap;
+ d = dst;
+ for(w = width;w >= 4; w-=4) {
+ d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 1;
+ d[1] = (s1[2] + s1[3] + s2[2] + s2[3] + 2) >> 1;
+ d[2] = (s1[4] + s1[5] + s2[4] + s2[5] + 2) >> 1;
+ d[3] = (s1[6] + s1[7] + s2[6] + s2[7] + 2) >> 1;
+ s1 += 8;
+ s2 += 8;
+ d += 4;
+ }
+ for(;w > 0; w--) {
+ d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 1;
+ s1 += 2;
+ s2 += 2;
+ d++;
+ }
+ src += 2 * src_wrap;
+ dst += dst_wrap;
+ }
+}
+
+static void img_copy(UINT8 *dst, int dst_wrap,
+ UINT8 *src, int src_wrap,
+ int width, int height)
+{
+ for(;height > 0; height--) {
+ memcpy(dst, src, width);
+ dst += dst_wrap;
+ src += src_wrap;
+ }
+}
+
+#define SCALE_BITS 10
+
+#define C_Y (76309 >> (16 - SCALE_BITS))
+#define C_RV (117504 >> (16 - SCALE_BITS))
+#define C_BU (138453 >> (16 - SCALE_BITS))
+#define C_GU (13954 >> (16 - SCALE_BITS))
+#define C_GV (34903 >> (16 - SCALE_BITS))
+
+#define RGBOUT(r, g, b, y1)\
+{\
+ y = (y1 - 16) * C_Y;\
+ r = cm[(y + r_add) >> SCALE_BITS];\
+ g = cm[(y + g_add) >> SCALE_BITS];\
+ b = cm[(y + b_add) >> SCALE_BITS];\
+}
+
+/* XXX: no chroma interpolating is done */
+static void yuv420p_to_rgb24(AVPicture *dst, AVPicture *src,
+ int width, int height)
+{
+ UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
+ int w, y, cb, cr, r_add, g_add, b_add, width2;
+ UINT8 *cm = cropTbl + MAX_NEG_CROP;
+
+ d = dst->data[0];
+ y1_ptr = src->data[0];
+ cb_ptr = src->data[1];
+ cr_ptr = src->data[2];
+ width2 = width >> 1;
+ for(;height > 0; height -= 2) {
+ d1 = d;
+ d2 = d + dst->linesize[0];
+ y2_ptr = y1_ptr + src->linesize[0];
+ for(w = width2; w > 0; w --) {
+ cb = cb_ptr[0] - 128;
+ cr = cr_ptr[0] - 128;
+ r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
+ g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
+ b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
+
+ /* output 4 pixels */
+ RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
+ RGBOUT(d1[3], d1[4], d1[5], y1_ptr[1]);
+ RGBOUT(d2[0], d2[1], d2[2], y2_ptr[0]);
+ RGBOUT(d2[3], d2[4], d2[5], y2_ptr[1]);
+
+ d1 += 6;
+ d2 += 6;
+ y1_ptr += 2;
+ y2_ptr += 2;
+ cb_ptr++;
+ cr_ptr++;
+ }
+ d += 2 * dst->linesize[0];
+ y1_ptr += 2 * src->linesize[0] - width;
+ cb_ptr += src->linesize[1] - width2;
+ cr_ptr += src->linesize[2] - width2;
+ }
+}
+
+/* XXX: no chroma interpolating is done */
+static void yuv422p_to_rgb24(AVPicture *dst, AVPicture *src,
+ int width, int height)
+{
+ UINT8 *y1_ptr, *cb_ptr, *cr_ptr, *d, *d1;
+ int w, y, cb, cr, r_add, g_add, b_add, width2;
+ UINT8 *cm = cropTbl + MAX_NEG_CROP;
+
+ d = dst->data[0];
+ y1_ptr = src->data[0];
+ cb_ptr = src->data[1];
+ cr_ptr = src->data[2];
+ width2 = width >> 1;
+ for(;height > 0; height --) {
+ d1 = d;
+ for(w = width2; w > 0; w --) {
+ cb = cb_ptr[0] - 128;
+ cr = cr_ptr[0] - 128;
+ r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
+ g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
+ b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
+
+ /* output 2 pixels */
+ RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
+ RGBOUT(d1[3], d1[4], d1[5], y1_ptr[1]);
+
+ d1 += 6;
+ y1_ptr += 2;
+ cb_ptr++;
+ cr_ptr++;
+ }
+ d += dst->linesize[0];
+ y1_ptr += src->linesize[0] - width;
+ cb_ptr += src->linesize[1] - width2;
+ cr_ptr += src->linesize[2] - width2;
+ }
+}
+
+/* XXX: always use linesize. Return -1 if not supported */
+int img_convert(AVPicture *dst, int dst_pix_fmt,
+ AVPicture *src, int pix_fmt,
+ int width, int height)
+{
+ int i;
+
+ if (dst_pix_fmt == pix_fmt) {
+ switch(pix_fmt) {
+ case PIX_FMT_YUV420P:
+ for(i=0;i<3;i++) {
+ if (i == 1) {
+ width >>= 1;
+ height >>= 1;
+ }
+ img_copy(dst->data[i], dst->linesize[i],
+ src->data[i], src->linesize[i],
+ width, height);
+ }
+ break;
+ default:
+ return -1;
+ }
+ } else if (dst_pix_fmt == PIX_FMT_YUV420P) {
+
+ switch(pix_fmt) {
+ case PIX_FMT_YUV420P:
+ for(i=0;i<3;i++) {
+ img_copy(dst->data[i], dst->linesize[i],
+ src->data[i], src->linesize[i],
+ width, height);
+ }
+ break;
+ case PIX_FMT_YUV422P:
+ img_copy(dst->data[0], dst->linesize[0],
+ src->data[0], src->linesize[0],
+ width, height);
+ width >>= 1;
+ height >>= 1;
+ for(i=1;i<3;i++) {
+ shrink2(dst->data[i], dst->linesize[i],
+ src->data[i], src->linesize[i],
+ width, height);
+ }
+ break;
+ case PIX_FMT_YUV444P:
+ img_copy(dst->data[0], dst->linesize[0],
+ src->data[0], src->linesize[0],
+ width, height);
+ width >>= 1;
+ height >>= 1;
+ for(i=1;i<3;i++) {
+ shrink22(dst->data[i], dst->linesize[i],
+ src->data[i], src->linesize[i],
+ width, height);
+ }
+ break;
+ case PIX_FMT_YUV422:
+ yuv422_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
+ src->data[0], width, height);
+ break;
+ case PIX_FMT_RGB24:
+ rgb24_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
+ src->data[0], width, height);
+ break;
+ case PIX_FMT_BGR24:
+ bgr24_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
+ src->data[0], width, height);
+ break;
+ default:
+ return -1;
+ }
+ } else if (dst_pix_fmt == PIX_FMT_RGB24) {
+ switch(pix_fmt) {
+ case PIX_FMT_YUV420P:
+ yuv420p_to_rgb24(dst, src, width, height);
+ break;
+ case PIX_FMT_YUV422P:
+ yuv422p_to_rgb24(dst, src, width, height);
+ break;
+ default:
+ return -1;
+ }
+ } else {
+ return -1;
+ }
+ return 0;
+}
+
+/* filter parameters: [-1 4 2 4 -1] // 8 */
+static void deinterlace_line(UINT8 *dst, UINT8 *src, int src_wrap,
+ int size)
+{
+ UINT8 *cm = cropTbl + MAX_NEG_CROP;
+ int sum;
+ UINT8 *s;
+
+ for(;size > 0;size--) {
+ s = src;
+ sum = -s[0];
+ s += src_wrap;
+ sum += s[0] << 2;
+ s += src_wrap;
+ sum += s[0] << 1;
+ s += src_wrap;
+ sum += s[0] << 2;
+ s += src_wrap;
+ sum += -s[0];
+ dst[0] = cm[(sum + 4) >> 3];
+ dst++;
+ src++;
+ }
+}
+
+/* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
+ top field is copied as is, but the bottom field is deinterlaced
+ against the top field. */
+static void deinterlace_bottom_field(UINT8 *dst, int dst_wrap,
+ UINT8 *src1, int src_wrap,
+ int width, int height)
+{
+ UINT8 *src, *ptr;
+ int y, y1, i;
+ UINT8 *buf;
+
+ buf= (UINT8*) malloc(5 * width);
+
+ src = src1;
+ for(y=0;y<height;y+=2) {
+ /* copy top field line */
+ memcpy(dst, src, width);
+ dst += dst_wrap;
+ src += (1 - 2) * src_wrap;
+ y1 = y - 2;
+ if (y1 >= 0 && (y1 + 4) < height) {
+ /* fast case : no edges */
+ deinterlace_line(dst, src, src_wrap, width);
+ } else {
+ /* in order to use the same function, we use an intermediate buffer */
+ ptr = buf;
+ for(i=0;i<5;i++) {
+ if (y1 < 0)
+ memcpy(ptr, src1, width);
+ else if (y1 >= height)
+ memcpy(ptr, src1 + (height - 1) * src_wrap, width);
+ else
+ memcpy(ptr, src1 + y1 * src_wrap, width);
+ y1++;
+ ptr += width;
+ }
+ deinterlace_line(dst, buf, width, width);
+ }
+ dst += dst_wrap;
+ src += (2 + 1) * src_wrap;
+ }
+ free(buf);
+}
+
+
+/* deinterlace, return -1 if format not handled */
+int avpicture_deinterlace(AVPicture *dst, AVPicture *src,
+ int pix_fmt, int width, int height)
+{
+ int i;
+
+ if (pix_fmt != PIX_FMT_YUV420P &&
+ pix_fmt != PIX_FMT_YUV422P &&
+ pix_fmt != PIX_FMT_YUV444P)
+ return -1;
+ if ((width & 1) != 0 || (height & 3) != 0)
+ return -1;
+
+ for(i=0;i<3;i++) {
+ if (i == 1) {
+ switch(pix_fmt) {
+ case PIX_FMT_YUV420P:
+ width >>= 1;
+ height >>= 1;
+ break;
+ case PIX_FMT_YUV422P:
+ width >>= 1;
+ break;
+ default:
+ break;
+ }
+ }
+ deinterlace_bottom_field(dst->data[i], dst->linesize[i],
+ src->data[i], src->linesize[i],
+ width, height);
+ }
+ return 0;
+}
diff --git a/src/libffmpeg/libavcodec/imgresample.c b/src/libffmpeg/libavcodec/imgresample.c
new file mode 100644
index 000000000..8c69de2de
--- /dev/null
+++ b/src/libffmpeg/libavcodec/imgresample.c
@@ -0,0 +1,619 @@
+/*
+ * High quality image resampling with polyphase filters
+ * Copyright (c) 2001 Gerard Lantau.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include "dsputil.h"
+#include "avcodec.h"
+
+#ifdef USE_FASTMEMCPY
+#include "fastmemcpy.h"
+#endif
+
+
+#define NB_COMPONENTS 3
+
+#define PHASE_BITS 4
+#define NB_PHASES (1 << PHASE_BITS)
+#define NB_TAPS 4
+#define FCENTER 1 /* index of the center of the filter */
+
+#define POS_FRAC_BITS 16
+#define POS_FRAC (1 << POS_FRAC_BITS)
+/* 6 bits precision is needed for MMX */
+#define FILTER_BITS 8
+
+#define LINE_BUF_HEIGHT (NB_TAPS * 4)
+
+struct ImgReSampleContext {
+ int iwidth, iheight, owidth, oheight;
+ int h_incr, v_incr;
+ INT16 h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
+ INT16 v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
+ UINT8 *line_buf;
+};
+
+static inline int get_phase(int pos)
+{
+ return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
+}
+
+/* This function must be optimized */
+static void h_resample_fast(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
+ int src_start, int src_incr, INT16 *filters)
+{
+ int src_pos, phase, sum, i;
+ UINT8 *s;
+ INT16 *filter;
+
+ src_pos = src_start;
+ for(i=0;i<dst_width;i++) {
+#ifdef TEST
+ /* test */
+ if ((src_pos >> POS_FRAC_BITS) < 0 ||
+ (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
+ abort();
+#endif
+ s = src + (src_pos >> POS_FRAC_BITS);
+ phase = get_phase(src_pos);
+ filter = filters + phase * NB_TAPS;
+#if NB_TAPS == 4
+ sum = s[0] * filter[0] +
+ s[1] * filter[1] +
+ s[2] * filter[2] +
+ s[3] * filter[3];
+#else
+ {
+ int j;
+ sum = 0;
+ for(j=0;j<NB_TAPS;j++)
+ sum += s[j] * filter[j];
+ }
+#endif
+ sum = sum >> FILTER_BITS;
+ if (sum < 0)
+ sum = 0;
+ else if (sum > 255)
+ sum = 255;
+ dst[0] = sum;
+ src_pos += src_incr;
+ dst++;
+ }
+}
+
+/* This function must be optimized */
+static void v_resample(UINT8 *dst, int dst_width, UINT8 *src, int wrap,
+ INT16 *filter)
+{
+ int sum, i;
+ UINT8 *s;
+
+ s = src;
+ for(i=0;i<dst_width;i++) {
+#if NB_TAPS == 4
+ sum = s[0 * wrap] * filter[0] +
+ s[1 * wrap] * filter[1] +
+ s[2 * wrap] * filter[2] +
+ s[3 * wrap] * filter[3];
+#else
+ {
+ int j;
+ UINT8 *s1 = s;
+
+ sum = 0;
+ for(j=0;j<NB_TAPS;j++) {
+ sum += s1[0] * filter[j];
+ s1 += wrap;
+ }
+ }
+#endif
+ sum = sum >> FILTER_BITS;
+ if (sum < 0)
+ sum = 0;
+ else if (sum > 255)
+ sum = 255;
+ dst[0] = sum;
+ dst++;
+ s++;
+ }
+}
+
+#ifdef HAVE_MMX
+
+#include "i386/mmx.h"
+
+#define FILTER4(reg) \
+{\
+ s = src + (src_pos >> POS_FRAC_BITS);\
+ phase = get_phase(src_pos);\
+ filter = filters + phase * NB_TAPS;\
+ movq_m2r(*s, reg);\
+ punpcklbw_r2r(mm7, reg);\
+ movq_m2r(*filter, mm6);\
+ pmaddwd_r2r(reg, mm6);\
+ movq_r2r(mm6, reg);\
+ psrlq_i2r(32, reg);\
+ paddd_r2r(mm6, reg);\
+ psrad_i2r(FILTER_BITS, reg);\
+ src_pos += src_incr;\
+}
+
+#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
+
+/* XXX: do four pixels at a time */
+static void h_resample_fast4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
+ int src_start, int src_incr, INT16 *filters)
+{
+ int src_pos, phase;
+ UINT8 *s;
+ INT16 *filter;
+ mmx_t tmp;
+
+ src_pos = src_start;
+ pxor_r2r(mm7, mm7);
+
+ while (dst_width >= 4) {
+
+ FILTER4(mm0);
+ FILTER4(mm1);
+ FILTER4(mm2);
+ FILTER4(mm3);
+
+ packuswb_r2r(mm7, mm0);
+ packuswb_r2r(mm7, mm1);
+ packuswb_r2r(mm7, mm3);
+ packuswb_r2r(mm7, mm2);
+ movq_r2m(mm0, tmp);
+ dst[0] = tmp.ub[0];
+ movq_r2m(mm1, tmp);
+ dst[1] = tmp.ub[0];
+ movq_r2m(mm2, tmp);
+ dst[2] = tmp.ub[0];
+ movq_r2m(mm3, tmp);
+ dst[3] = tmp.ub[0];
+ dst += 4;
+ dst_width -= 4;
+ }
+ while (dst_width > 0) {
+ FILTER4(mm0);
+ packuswb_r2r(mm7, mm0);
+ movq_r2m(mm0, tmp);
+ dst[0] = tmp.ub[0];
+ dst++;
+ dst_width--;
+ }
+ emms();
+}
+
+static void v_resample4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int wrap,
+ INT16 *filter)
+{
+ int sum, i, v;
+ UINT8 *s;
+ mmx_t tmp;
+ mmx_t coefs[4];
+
+ for(i=0;i<4;i++) {
+ v = filter[i];
+ coefs[i].uw[0] = v;
+ coefs[i].uw[1] = v;
+ coefs[i].uw[2] = v;
+ coefs[i].uw[3] = v;
+ }
+
+ pxor_r2r(mm7, mm7);
+ s = src;
+ while (dst_width >= 4) {
+ movq_m2r(s[0 * wrap], mm0);
+ punpcklbw_r2r(mm7, mm0);
+ movq_m2r(s[1 * wrap], mm1);
+ punpcklbw_r2r(mm7, mm1);
+ movq_m2r(s[2 * wrap], mm2);
+ punpcklbw_r2r(mm7, mm2);
+ movq_m2r(s[3 * wrap], mm3);
+ punpcklbw_r2r(mm7, mm3);
+
+ pmullw_m2r(coefs[0], mm0);
+ pmullw_m2r(coefs[1], mm1);
+ pmullw_m2r(coefs[2], mm2);
+ pmullw_m2r(coefs[3], mm3);
+
+ paddw_r2r(mm1, mm0);
+ paddw_r2r(mm3, mm2);
+ paddw_r2r(mm2, mm0);
+ psraw_i2r(FILTER_BITS, mm0);
+
+ packuswb_r2r(mm7, mm0);
+ movq_r2m(mm0, tmp);
+
+ *(UINT32 *)dst = tmp.ud[0];
+ dst += 4;
+ s += 4;
+ dst_width -= 4;
+ }
+ while (dst_width > 0) {
+ sum = s[0 * wrap] * filter[0] +
+ s[1 * wrap] * filter[1] +
+ s[2 * wrap] * filter[2] +
+ s[3 * wrap] * filter[3];
+ sum = sum >> FILTER_BITS;
+ if (sum < 0)
+ sum = 0;
+ else if (sum > 255)
+ sum = 255;
+ dst[0] = sum;
+ dst++;
+ s++;
+ dst_width--;
+ }
+ emms();
+}
+#endif
+
+/* slow version to handle limit cases. Does not need optimisation */
+static void h_resample_slow(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
+ int src_start, int src_incr, INT16 *filters)
+{
+ int src_pos, phase, sum, j, v, i;
+ UINT8 *s, *src_end;
+ INT16 *filter;
+
+ src_end = src + src_width;
+ src_pos = src_start;
+ for(i=0;i<dst_width;i++) {
+ s = src + (src_pos >> POS_FRAC_BITS);
+ phase = get_phase(src_pos);
+ filter = filters + phase * NB_TAPS;
+ sum = 0;
+ for(j=0;j<NB_TAPS;j++) {
+ if (s < src)
+ v = src[0];
+ else if (s >= src_end)
+ v = src_end[-1];
+ else
+ v = s[0];
+ sum += v * filter[j];
+ s++;
+ }
+ sum = sum >> FILTER_BITS;
+ if (sum < 0)
+ sum = 0;
+ else if (sum > 255)
+ sum = 255;
+ dst[0] = sum;
+ src_pos += src_incr;
+ dst++;
+ }
+}
+
+static void h_resample(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
+ int src_start, int src_incr, INT16 *filters)
+{
+ int n, src_end;
+
+ if (src_start < 0) {
+ n = (0 - src_start + src_incr - 1) / src_incr;
+ h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
+ dst += n;
+ dst_width -= n;
+ src_start += n * src_incr;
+ }
+ src_end = src_start + dst_width * src_incr;
+ if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
+ n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
+ src_incr;
+ } else {
+ n = dst_width;
+ }
+#ifdef HAVE_MMX
+ if ((mm_flags & MM_MMX) && NB_TAPS == 4)
+ h_resample_fast4_mmx(dst, n,
+ src, src_width, src_start, src_incr, filters);
+ else
+#endif
+ h_resample_fast(dst, n,
+ src, src_width, src_start, src_incr, filters);
+ if (n < dst_width) {
+ dst += n;
+ dst_width -= n;
+ src_start += n * src_incr;
+ h_resample_slow(dst, dst_width,
+ src, src_width, src_start, src_incr, filters);
+ }
+}
+
+static void component_resample(ImgReSampleContext *s,
+ UINT8 *output, int owrap, int owidth, int oheight,
+ UINT8 *input, int iwrap, int iwidth, int iheight)
+{
+ int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
+ UINT8 *new_line, *src_line;
+
+ last_src_y = - FCENTER - 1;
+ /* position of the bottom of the filter in the source image */
+ src_y = (last_src_y + NB_TAPS) * POS_FRAC;
+ ring_y = NB_TAPS; /* position in ring buffer */
+ for(y=0;y<oheight;y++) {
+ /* apply horizontal filter on new lines from input if needed */
+ src_y1 = src_y >> POS_FRAC_BITS;
+ while (last_src_y < src_y1) {
+ if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
+ ring_y = NB_TAPS;
+ last_src_y++;
+ /* handle limit conditions : replicate line (slighly
+ inefficient because we filter multiple times */
+ y1 = last_src_y;
+ if (y1 < 0) {
+ y1 = 0;
+ } else if (y1 >= iheight) {
+ y1 = iheight - 1;
+ }
+ src_line = input + y1 * iwrap;
+ new_line = s->line_buf + ring_y * owidth;
+ /* apply filter and handle limit cases correctly */
+ h_resample(new_line, owidth,
+ src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
+ &s->h_filters[0][0]);
+ /* handle ring buffer wraping */
+ if (ring_y >= LINE_BUF_HEIGHT) {
+ memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
+ new_line, owidth);
+ }
+ }
+ /* apply vertical filter */
+ phase_y = get_phase(src_y);
+#ifdef HAVE_MMX
+ /* desactivated MMX because loss of precision */
+ if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
+ v_resample4_mmx(output, owidth,
+ s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
+ &s->v_filters[phase_y][0]);
+ else
+#endif
+ v_resample(output, owidth,
+ s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
+ &s->v_filters[phase_y][0]);
+
+ src_y += s->v_incr;
+ output += owrap;
+ }
+}
+
+/* XXX: the following filter is quite naive, but it seems to suffice
+ for 4 taps */
+static void build_filter(INT16 *filter, float factor)
+{
+ int ph, i, v;
+ float x, y, tab[NB_TAPS], norm, mult;
+
+ /* if upsampling, only need to interpolate, no filter */
+ if (factor > 1.0)
+ factor = 1.0;
+
+ for(ph=0;ph<NB_PHASES;ph++) {
+ norm = 0;
+ for(i=0;i<NB_TAPS;i++) {
+
+ x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
+ if (x == 0)
+ y = 1.0;
+ else
+ y = sin(x) / x;
+ tab[i] = y;
+ norm += y;
+ }
+
+ /* normalize so that an uniform color remains the same */
+ mult = (float)(1 << FILTER_BITS) / norm;
+ for(i=0;i<NB_TAPS;i++) {
+ v = (int)(tab[i] * mult);
+ filter[ph * NB_TAPS + i] = v;
+ }
+ }
+}
+
+ImgReSampleContext *img_resample_init(int owidth, int oheight,
+ int iwidth, int iheight)
+{
+ ImgReSampleContext *s;
+
+ s = av_mallocz(sizeof(ImgReSampleContext));
+ if (!s)
+ return NULL;
+ s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
+ if (!s->line_buf)
+ goto fail;
+
+ s->owidth = owidth;
+ s->oheight = oheight;
+ s->iwidth = iwidth;
+ s->iheight = iheight;
+
+ s->h_incr = (iwidth * POS_FRAC) / owidth;
+ s->v_incr = (iheight * POS_FRAC) / oheight;
+
+ build_filter(&s->h_filters[0][0], (float)owidth / (float)iwidth);
+ build_filter(&s->v_filters[0][0], (float)oheight / (float)iheight);
+
+ return s;
+ fail:
+ free(s);
+ return NULL;
+}
+
+void img_resample(ImgReSampleContext *s,
+ AVPicture *output, AVPicture *input)
+{
+ int i, shift;
+
+ for(i=0;i<3;i++) {
+ shift = (i == 0) ? 0 : 1;
+ component_resample(s, output->data[i], output->linesize[i],
+ s->owidth >> shift, s->oheight >> shift,
+ input->data[i], input->linesize[i],
+ s->iwidth >> shift, s->iheight >> shift);
+ }
+}
+
+void img_resample_close(ImgReSampleContext *s)
+{
+ free(s->line_buf);
+ free(s);
+}
+
+#ifdef TEST
+
+void *av_mallocz(int size)
+{
+ void *ptr;
+ ptr = malloc(size);
+ memset(ptr, 0, size);
+ return ptr;
+}
+
+/* input */
+#define XSIZE 256
+#define YSIZE 256
+UINT8 img[XSIZE * YSIZE];
+
+/* output */
+#define XSIZE1 512
+#define YSIZE1 512
+UINT8 img1[XSIZE1 * YSIZE1];
+UINT8 img2[XSIZE1 * YSIZE1];
+
+void save_pgm(const char *filename, UINT8 *img, int xsize, int ysize)
+{
+ FILE *f;
+ f=fopen(filename,"w");
+ fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
+ fwrite(img,1, xsize * ysize,f);
+ fclose(f);
+}
+
+static void dump_filter(INT16 *filter)
+{
+ int i, ph;
+
+ for(ph=0;ph<NB_PHASES;ph++) {
+ printf("%2d: ", ph);
+ for(i=0;i<NB_TAPS;i++) {
+ printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
+ }
+ printf("\n");
+ }
+}
+
+#ifdef HAVE_MMX
+int mm_flags;
+#endif
+
+int main(int argc, char **argv)
+{
+ int x, y, v, i, xsize, ysize;
+ ImgReSampleContext *s;
+ float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
+ char buf[256];
+
+ /* build test image */
+ for(y=0;y<YSIZE;y++) {
+ for(x=0;x<XSIZE;x++) {
+ if (x < XSIZE/2 && y < YSIZE/2) {
+ if (x < XSIZE/4 && y < YSIZE/4) {
+ if ((x % 10) <= 6 &&
+ (y % 10) <= 6)
+ v = 0xff;
+ else
+ v = 0x00;
+ } else if (x < XSIZE/4) {
+ if (x & 1)
+ v = 0xff;
+ else
+ v = 0;
+ } else if (y < XSIZE/4) {
+ if (y & 1)
+ v = 0xff;
+ else
+ v = 0;
+ } else {
+ if (y < YSIZE*3/8) {
+ if ((y+x) & 1)
+ v = 0xff;
+ else
+ v = 0;
+ } else {
+ if (((x+3) % 4) <= 1 &&
+ ((y+3) % 4) <= 1)
+ v = 0xff;
+ else
+ v = 0x00;
+ }
+ }
+ } else if (x < XSIZE/2) {
+ v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
+ } else if (y < XSIZE/2) {
+ v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
+ } else {
+ v = ((x + y - XSIZE) * 255) / XSIZE;
+ }
+ img[y * XSIZE + x] = v;
+ }
+ }
+ save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
+ for(i=0;i<sizeof(factors)/sizeof(float);i++) {
+ fact = factors[i];
+ xsize = (int)(XSIZE * fact);
+ ysize = (int)(YSIZE * fact);
+ s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
+ printf("Factor=%0.2f\n", fact);
+ dump_filter(&s->h_filters[0][0]);
+ component_resample(s, img1, xsize, xsize, ysize,
+ img, XSIZE, XSIZE, YSIZE);
+ img_resample_close(s);
+
+ sprintf(buf, "/tmp/out%d.pgm", i);
+ save_pgm(buf, img1, xsize, ysize);
+ }
+
+ /* mmx test */
+#ifdef HAVE_MMX
+ printf("MMX test\n");
+ fact = 0.72;
+ xsize = (int)(XSIZE * fact);
+ ysize = (int)(YSIZE * fact);
+ mm_flags = MM_MMX;
+ s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
+ component_resample(s, img1, xsize, xsize, ysize,
+ img, XSIZE, XSIZE, YSIZE);
+
+ mm_flags = 0;
+ s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
+ component_resample(s, img2, xsize, xsize, ysize,
+ img, XSIZE, XSIZE, YSIZE);
+ if (memcmp(img1, img2, xsize * ysize) != 0) {
+ fprintf(stderr, "mmx error\n");
+ exit(1);
+ }
+ printf("MMX OK\n");
+#endif
+ return 0;
+}
+
+#endif
diff --git a/src/libffmpeg/libavcodec/mjpeg.c b/src/libffmpeg/libavcodec/mjpeg.c
index 66d018e69..08281a501 100644
--- a/src/libffmpeg/libavcodec/mjpeg.c
+++ b/src/libffmpeg/libavcodec/mjpeg.c
@@ -16,6 +16,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+//#define DEBUG
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
@@ -419,30 +420,6 @@ void mjpeg_encode_mb(MpegEncContext *s,
/******************************************/
/* decoding */
-//#define DEBUG
-
-#ifndef CONFIG_WIN32
-
-#ifdef DEBUG
-# if __GNUC__
-# define dprintf(fmt,args...) printf(fmt, ## args)
-# else
-# define dprintf(...) printf(__VA_ARGS__)
-# endif
-#else
-# if __GNUC__
-# define dprintf(fmt,args...)
-# else
-# define dprintf(...)
-# endif
-#endif
-
-#else
-
-inline void dprintf(const char* fmt,...) {}
-
-#endif
-
/* compressed picture size */
#define PICTURE_BUFFER_SIZE 100000
diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c
index 013e1b935..758124eea 100644
--- a/src/libffmpeg/libavcodec/mpeg12.c
+++ b/src/libffmpeg/libavcodec/mpeg12.c
@@ -16,36 +16,13 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+//#define DEBUG
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
#include "mpeg12data.h"
-//#define DEBUG
-
-#ifndef CONFIG_WIN32
-
-#ifdef DEBUG
-# if __GNUC__
-# define dprintf(fmt,args...) printf(fmt, ## args)
-# else
-# define dprintf(...) printf(__VA_ARGS__)
-# endif
-#else
-# if __GNUC__
-# define dprintf(fmt,args...)
-# else
-# define dprintf(...)
-# endif
-#endif
-
-#else
-
-inline void dprintf(const char* fmt,...) {}
-
-#endif
-
/* Start codes. */
#define SEQ_END_CODE 0x000001b7
#define SEQ_START_CODE 0x000001b3
diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c
index 9f7236f98..80a35635d 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.c
+++ b/src/libffmpeg/libavcodec/mpegvideo.c
@@ -101,14 +101,12 @@ int MPV_common_init(MpegEncContext *s)
int c_size, i;
UINT8 *pict;
- s->waiting_for_keyframe = 1;
-
if (s->out_format == FMT_H263)
s->dct_unquantize = dct_unquantize_h263_c;
else
s->dct_unquantize = dct_unquantize_mpeg1_c;
-#ifdef ARCH_X86
+#ifdef HAVE_MMX
MPV_common_init_mmx(s);
#endif
s->mb_width = (s->width + 15) / 16;
@@ -214,7 +212,6 @@ int MPV_common_init(MpegEncContext *s)
if (s->aux_picture_base[i])
free(s->aux_picture_base[i]);
}
-
return -1;
}
@@ -252,6 +249,9 @@ int MPV_encode_init(AVCodecContext *avctx)
s->width = avctx->width;
s->height = avctx->height;
s->gop_size = avctx->gop_size;
+ s->rtp_mode = avctx->rtp_mode;
+ s->rtp_payload_size = avctx->rtp_payload_size;
+
if (s->gop_size <= 1) {
s->intra_only = 1;
s->gop_size = 12;
@@ -273,14 +273,22 @@ int MPV_encode_init(AVCodecContext *avctx)
return -1;
break;
case CODEC_ID_H263:
- if (h263_get_picture_format(s->width, s->height) == 7)
+ if (h263_get_picture_format(s->width, s->height) == 7){
+ printf("Input picture size isn't suitable for h263 codec! try h263+\n");
return -1;
+ }
s->out_format = FMT_H263;
break;
case CODEC_ID_H263P:
s->out_format = FMT_H263;
+ s->rtp_mode = 1;
+ s->rtp_payload_size = 1200;
s->h263_plus = 1;
- /* XXX: not unrectricted mv yet */
+ s->unrestricted_mv = 1;
+
+ /* These are just to be sure */
+ s->umvplus = 0;
+ s->umvplus_dec = 0;
break;
case CODEC_ID_RV10:
s->out_format = FMT_H263;
@@ -395,9 +403,15 @@ void MPV_frame_end(MpegEncContext *s)
{
/* draw edge for correct motion prediction if outside */
if (s->pict_type != B_TYPE) {
+#if 1
+ draw_edges(s->current_picture[0], s->linesize, s->mb_width*16, s->mb_height*16, EDGE_WIDTH);
+ draw_edges(s->current_picture[1], s->linesize/2, s->mb_width*8, s->mb_height*8, EDGE_WIDTH/2);
+ draw_edges(s->current_picture[2], s->linesize/2, s->mb_width*8, s->mb_height*8, EDGE_WIDTH/2);
+#else
draw_edges(s->current_picture[0], s->linesize, s->width, s->height, EDGE_WIDTH);
draw_edges(s->current_picture[1], s->linesize/2, s->width/2, s->height/2, EDGE_WIDTH/2);
draw_edges(s->current_picture[2], s->linesize/2, s->width/2, s->height/2, EDGE_WIDTH/2);
+#endif
}
}
@@ -674,6 +688,11 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
mb_x = s->mb_x;
mb_y = s->mb_y;
+#ifdef FF_POSTPROCESS
+ quant_store[mb_y][mb_x]=s->qscale;
+ //printf("[%02d][%02d] %d\n",mb_x,mb_y,s->qscale);
+#endif
+
/* update DC predictors for P macroblocks */
if (!s->mb_intra) {
if (s->h263_pred) {
@@ -813,7 +832,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
static void encode_picture(MpegEncContext *s, int picture_number)
{
- int mb_x, mb_y, wrap;
+ int mb_x, mb_y, wrap, last_gob;
UINT8 *ptr;
int i, motion_x, motion_y;
@@ -863,7 +882,29 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->mv_type = MV_TYPE_16X16;
s->mv_dir = MV_DIR_FORWARD;
+ /* Get the GOB height based on picture height */
+ if (s->out_format == FMT_H263 && s->h263_plus) {
+ if (s->height <= 400)
+ s->gob_index = 1;
+ else if (s->height <= 800)
+ s->gob_index = 2;
+ else
+ s->gob_index = 4;
+ }
+
for(mb_y=0; mb_y < s->mb_height; mb_y++) {
+ /* Put GOB header based on RTP MTU */
+ if (!mb_y) {
+ s->ptr_lastgob = s->pb.buf_ptr;
+ s->ptr_last_mb_line = s->pb.buf_ptr;
+ } else if (s->out_format == FMT_H263 && s->h263_plus) {
+ last_gob = h263_encode_gob_header(s, mb_y);
+ if (last_gob) {
+ //fprintf(stderr,"\nLast GOB size: %d", last_gob);
+ s->first_gob_line = 1;
+ } else
+ s->first_gob_line = 0;
+ }
for(mb_x=0; mb_x < s->mb_width; mb_x++) {
s->mb_x = mb_x;
@@ -975,7 +1016,17 @@ static void encode_picture(MpegEncContext *s, int picture_number)
MPV_decode_mb(s, s->block);
}
+ /* Obtain average MB line size for RTP */
+ if (!mb_y)
+ s->mb_line_avgsize = s->pb.buf_ptr - s->ptr_last_mb_line;
+ else
+ s->mb_line_avgsize = (s->mb_line_avgsize + s->pb.buf_ptr - s->ptr_last_mb_line) >> 1;
+ //fprintf(stderr, "\nMB line: %d\tSize: %u\tAvg. Size: %u", s->mb_y,
+ // (s->pb.buf_ptr - s->ptr_last_mb_line), s->mb_line_avgsize);
+ s->ptr_last_mb_line = s->pb.buf_ptr;
}
+ //if (s->gob_number)
+ // fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
}
static int dct_quantize(MpegEncContext *s,
diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h
index 9abe75a7c..e653edb9b 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.h
+++ b/src/libffmpeg/libavcodec/mpegvideo.h
@@ -128,7 +128,16 @@ typedef struct MpegEncContext {
int P_frame_bits; /* same for P frame */
INT64 wanted_bits;
INT64 total_bits;
-
+
+ /* H.263 specific */
+ int gob_number;
+ int gob_index;
+ int first_gob_line;
+
+ /* H.263+ specific */
+ int umvplus;
+ int umvplus_dec;
+
/* mpeg4 specific */
int time_increment_bits;
int shape;
@@ -177,12 +186,17 @@ typedef struct MpegEncContext {
int interlaced_dct;
int last_qscale;
int first_slice;
-
+
+ /* RTP specific */
+ int rtp_mode;
+ int rtp_payload_size;
+ UINT8 *ptr_lastgob;
+ UINT8 *ptr_last_mb_line;
+ UINT32 mb_line_avgsize;
+
DCTELEM block[6][64] __align8;
void (*dct_unquantize)(struct MpegEncContext *s,
DCTELEM *block, int n, int qscale);
-
- int waiting_for_keyframe;
} MpegEncContext;
int MPV_common_init(MpegEncContext *s);
@@ -190,7 +204,7 @@ void MPV_common_end(MpegEncContext *s);
void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
void MPV_frame_start(MpegEncContext *s);
void MPV_frame_end(MpegEncContext *s);
-#ifdef ARCH_X86
+#ifdef HAVE_MMX
void MPV_common_init_mmx(MpegEncContext *s);
#endif
@@ -230,7 +244,7 @@ typedef struct RLTable {
void init_rl(RLTable *rl);
void init_vlc_rl(RLTable *rl);
-static inline int get_rl_index(const RLTable *rl, int last, int run, int level)
+extern inline int get_rl_index(const RLTable *rl, int last, int run, int level)
{
int index;
index = rl->index_run[last][run];
@@ -245,6 +259,7 @@ void h263_encode_mb(MpegEncContext *s,
DCTELEM block[6][64],
int motion_x, int motion_y);
void h263_encode_picture_header(MpegEncContext *s, int picture_number);
+int h263_encode_gob_header(MpegEncContext * s, int mb_line);
void h263_dc_scale(MpegEncContext *s);
INT16 *h263_pred_motion(MpegEncContext * s, int block,
int *px, int *py);
@@ -255,6 +270,7 @@ void h263_encode_init_vlc(MpegEncContext *s);
void h263_decode_init_vlc(MpegEncContext *s);
int h263_decode_picture_header(MpegEncContext *s);
+int h263_decode_gob_header(MpegEncContext *s);
int mpeg4_decode_picture_header(MpegEncContext * s);
int intel_h263_decode_picture_header(MpegEncContext *s);
int h263_decode_mb(MpegEncContext *s,
diff --git a/src/libffmpeg/libavcodec/mpegvideo_mmx.c b/src/libffmpeg/libavcodec/mpegvideo_mmx.c
deleted file mode 100644
index 7b3ba8aff..000000000
--- a/src/libffmpeg/libavcodec/mpegvideo_mmx.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * The simplest mpeg encoder (well, it was the simplest!)
- * Copyright (c) 2000,2001 Gerard Lantau.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Optimized for ia32 cpus by Nick Kurshev <nickols_k@mail.ru>
- */
-
-#include "dsputil.h"
-#include "mpegvideo.h"
-#include "xineutils.h"
-
-#if 0
-
-/* XXX: GL: I don't understand why this function needs optimization
- (it is called only once per frame!), so I disabled it */
-
-void MPV_frame_start(MpegEncContext *s)
-{
- if (s->pict_type == B_TYPE) {
- __asm __volatile(
- "movl (%1), %%eax\n\t"
- "movl 4(%1), %%edx\n\t"
- "movl 8(%1), %%ecx\n\t"
- "movl %%eax, (%0)\n\t"
- "movl %%edx, 4(%0)\n\t"
- "movl %%ecx, 8(%0)\n\t"
- :
- :"r"(s->current_picture), "r"(s->aux_picture)
- :"eax","edx","ecx","memory");
- } else {
- /* swap next and last */
- __asm __volatile(
- "movl (%1), %%eax\n\t"
- "movl 4(%1), %%edx\n\t"
- "movl 8(%1), %%ecx\n\t"
- "xchgl (%0), %%eax\n\t"
- "xchgl 4(%0), %%edx\n\t"
- "xchgl 8(%0), %%ecx\n\t"
- "movl %%eax, (%1)\n\t"
- "movl %%edx, 4(%1)\n\t"
- "movl %%ecx, 8(%1)\n\t"
- "movl %%eax, (%2)\n\t"
- "movl %%edx, 4(%2)\n\t"
- "movl %%ecx, 8(%2)\n\t"
- :
- :"r"(s->last_picture), "r"(s->next_picture), "r"(s->current_picture)
- :"eax","edx","ecx","memory");
- }
-}
-#endif
-
-static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL;
-static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
-
-/*
- NK:
- Note: looking at PARANOID:
- "enable all paranoid tests for rounding, overflows, etc..."
-
-#ifdef PARANOID
- if (level < -2048 || level > 2047)
- fprintf(stderr, "unquant error %d %d\n", i, level);
-#endif
- We can suppose that result of two multiplications can't be greate of 0xFFFF
- i.e. is 16-bit, so we use here only PMULLW instruction and can avoid
- a complex multiplication.
-=====================================================
- Full formula for multiplication of 2 integer numbers
- which are represent as high:low words:
- input: value1 = high1:low1
- value2 = high2:low2
- output: value3 = value1*value2
- value3=high3:low3 (on overflow: modulus 2^32 wrap-around)
- this mean that for 0x123456 * 0x123456 correct result is 0x766cb0ce4
- but this algorithm will compute only 0x66cb0ce4
- this limited by 16-bit size of operands
- ---------------------------------
- tlow1 = high1*low2
- tlow2 = high2*low1
- tlow1 = tlow1 + tlow2
- high3:low3 = low1*low2
- high3 += tlow1
-*/
-static void dct_unquantize_mpeg1_mmx(MpegEncContext *s,
- DCTELEM *block, int n, int qscale)
-{
- int i, level;
- const UINT16 *quant_matrix;
- if (s->mb_intra) {
- if (n < 4)
- block[0] = block[0] * s->y_dc_scale;
- else
- block[0] = block[0] * s->c_dc_scale;
- if (s->out_format == FMT_H263) {
- i = 1;
- goto unquant_even;
- }
- /* XXX: only mpeg1 */
- quant_matrix = s->intra_matrix;
- i=1;
- /* Align on 4 elements boundary */
- while(i&3)
- {
- level = block[i];
- if (level) {
- if (level < 0) level = -level;
- level = (int)(level * qscale * quant_matrix[i]) >> 3;
- level = (level - 1) | 1;
- if (block[i] < 0) level = -level;
- block[i] = level;
- }
- i++;
- }
- __asm __volatile(
- "movd %0, %%mm6\n\t" /* mm6 = qscale | 0 */
- "punpckldq %%mm6, %%mm6\n\t" /* mm6 = qscale | qscale */
- "movq %2, %%mm4\n\t"
- "movq %%mm6, %%mm7\n\t"
- "movq %1, %%mm5\n\t"
- "packssdw %%mm6, %%mm7\n\t" /* mm7 = qscale | qscale | qscale | qscale */
- "pxor %%mm6, %%mm6\n\t"
- ::"g"(qscale),"m"(mm_wone),"m"(mm_wabs):"memory");
- for(;i<64;i+=4) {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %%mm7, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm0, %%mm3\n\t"
- "pcmpgtw %%mm6, %%mm2\n\t"
- "pmullw %2, %%mm1\n\t"
- "pandn %%mm4, %%mm2\n\t"
- "por %%mm5, %%mm2\n\t"
- "pmullw %%mm2, %%mm0\n\t" /* mm0 = abs(block[i]). */
-
- "pcmpeqw %%mm6, %%mm3\n\t"
- "pmullw %%mm0, %%mm1\n\t"
- "psraw $3, %%mm1\n\t"
- "psubw %%mm5, %%mm1\n\t" /* block[i] --; */
- "pandn %%mm4, %%mm3\n\t" /* fake of pcmpneqw : mm0 != 0 then mm1 = -1 */
- "por %%mm5, %%mm1\n\t" /* block[i] |= 1 */
- "pmullw %%mm2, %%mm1\n\t" /* change signs again */
-
- "pand %%mm3, %%mm1\n\t" /* nullify if was zero */
- "movq %%mm1, %0"
- :"=m"(block[i])
- :"m"(block[i]), "m"(quant_matrix[i])
- :"memory");
- }
- } else {
- i = 0;
- unquant_even:
- quant_matrix = s->non_intra_matrix;
- /* Align on 4 elements boundary */
- while(i&3)
- {
- level = block[i];
- if (level) {
- if (level < 0) level = -level;
- level = (((level << 1) + 1) * qscale *
- ((int) quant_matrix[i])) >> 4;
- level = (level - 1) | 1;
- if(block[i] < 0) level = -level;
- block[i] = level;
- }
- i++;
- }
- __asm __volatile(
- "movd %0, %%mm6\n\t" /* mm6 = qscale | 0 */
- "punpckldq %%mm6, %%mm6\n\t" /* mm6 = qscale | qscale */
- "movq %2, %%mm4\n\t"
- "movq %%mm6, %%mm7\n\t"
- "movq %1, %%mm5\n\t"
- "packssdw %%mm6, %%mm7\n\t" /* mm7 = qscale | qscale | qscale | qscale */
- "pxor %%mm6, %%mm6\n\t"
- ::"g"(qscale),"m"(mm_wone),"m"(mm_wabs):"memory");
- for(;i<64;i+=4) {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %%mm7, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm0, %%mm3\n\t"
- "pcmpgtw %%mm6, %%mm2\n\t"
- "pmullw %2, %%mm1\n\t"
- "pandn %%mm4, %%mm2\n\t"
- "por %%mm5, %%mm2\n\t"
- "pmullw %%mm2, %%mm0\n\t" /* mm0 = abs(block[i]). */
- "psllw $1, %%mm0\n\t" /* block[i] <<= 1 */
- "paddw %%mm5, %%mm0\n\t" /* block[i] ++ */
-
- "pmullw %%mm0, %%mm1\n\t"
- "psraw $4, %%mm1\n\t"
- "pcmpeqw %%mm6, %%mm3\n\t"
- "psubw %%mm5, %%mm1\n\t" /* block[i] --; */
- "pandn %%mm4, %%mm3\n\t" /* fake of pcmpneqw : mm0 != 0 then mm1 = -1 */
- "por %%mm5, %%mm1\n\t" /* block[i] |= 1 */
- "pmullw %%mm2, %%mm1\n\t" /* change signs again */
-
- "pand %%mm3, %%mm1\n\t" /* nullify if was zero */
- "movq %%mm1, %0"
- :"=m"(block[i])
- :"m"(block[i]), "m"(quant_matrix[i])
- :"memory");
- }
- }
-}
-
-void MPV_common_init_mmx(MpegEncContext *s)
-{
- int mm_flags;
-
- mm_flags = xine_mm_accel();
-
- if (mm_flags & MM_ACCEL_X86_MMX) {
- /* XXX: should include h263 optimization too. It would go even
- faster! */
- s->dct_unquantize = dct_unquantize_mpeg1_mmx;
- }
-}
diff --git a/src/libffmpeg/libavcodec/msmpeg4.c b/src/libffmpeg/libavcodec/msmpeg4.c
index 63f111bdd..01e3d5438 100644
--- a/src/libffmpeg/libavcodec/msmpeg4.c
+++ b/src/libffmpeg/libavcodec/msmpeg4.c
@@ -30,7 +30,7 @@
* - (encoding) select best vlc/dc table
* - (decoding) handle slice indication
*/
-#undef DEBUG
+//#define DEBUG
/* motion vector table */
typedef struct MVTable {
@@ -664,7 +664,7 @@ int msmpeg4_decode_mb(MpegEncContext *s,
/* special slice handling */
if (s->mb_x == 0) {
- if ((s->mb_y % s->slice_height) == 0) {
+ if (s->slice_height && (s->mb_y % s->slice_height) == 0) {
int wrap;
/* reset DC pred (set previous line to 1024) */
wrap = 2 * s->mb_width + 2;
diff --git a/src/libffmpeg/libavcodec/rv10.c b/src/libffmpeg/libavcodec/rv10.c
index 6d9376c54..f4ebc9016 100644
--- a/src/libffmpeg/libavcodec/rv10.c
+++ b/src/libffmpeg/libavcodec/rv10.c
@@ -24,7 +24,7 @@
#include "avcodec.h"
#include "mpegvideo.h"
-#undef DEBUG
+//#define DEBUG
static const UINT16 rv_lum_code[256] =
{
diff --git a/src/libffmpeg/libavcodec/sad_mmx.s b/src/libffmpeg/libavcodec/sad_mmx.s
deleted file mode 100644
index 42c7ade59..000000000
--- a/src/libffmpeg/libavcodec/sad_mmx.s
+++ /dev/null
@@ -1,799 +0,0 @@
-# MMX/SSE optimized routines for SAD of 16*16 macroblocks
-# Copyright (C) Juan J. Sierralta P. <juanjo@atmlab.utfsm.cl>
-#
-# dist1_* Original Copyright (C) 2000 Chris Atenasio <chris@crud.net>
-# Enhancements and rest Copyright (C) 2000 Andrew Stevens <as@comlab.ox.ac.uk>
-
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-#
-
-.global pix_abs16x16_mmx
-
-# int pix_abs16x16_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h);
-# esi = p1 (init: blk1)
-# edi = p2 (init: blk2)
-# ecx = rowsleft (init: h)
-# edx = lx;
-
-# mm0 = distance accumulators (4 words)
-# mm1 = distance accumulators (4 words)
-# mm2 = temp
-# mm3 = temp
-# mm4 = temp
-# mm5 = temp
-# mm6 = 0
-# mm7 = temp
-
-
-.align 32
-pix_abs16x16_mmx:
- pushl %ebp # save frame pointer
- movl %esp,%ebp
-
- pushl %ebx # Saves registers (called saves convention in
- pushl %ecx # x86 GCC it seems)
- pushl %edx #
- pushl %esi
- pushl %edi
-
- pxor %mm0,%mm0 # zero acculumators
- pxor %mm1,%mm1
- pxor %mm6,%mm6
- movl 8(%ebp),%esi # get pix1
- movl 12(%ebp),%edi # get pix2
- movl 16(%ebp),%edx # get lx
- movl 20(%ebp),%ecx # get rowsleft
- jmp pix_abs16x16_mmx.nextrow
-.align 32
-
-pix_abs16x16_mmx.nextrow:
- # First 8 bytes of the row
-
- movq (%edi),%mm4 # load first 8 bytes of pix2 row
- movq (%esi),%mm5 # load first 8 bytes of pix1 row
- movq %mm4,%mm3 # mm4 := abs(mm4-mm5)
- movq 8(%esi),%mm2 # load last 8 bytes of pix1 row
- psubusb %mm5,%mm4
- movq 8(%edi),%mm7 # load last 8 bytes of pix2 row
- psubusb %mm3,%mm5
- por %mm5,%mm4
-
- # Last 8 bytes of the row
-
- movq %mm7,%mm3 # mm7 := abs(mm7-mm2)
- psubusb %mm2,%mm7
- psubusb %mm3,%mm2
- por %mm2,%mm7
-
- # Now mm4 and mm7 have 16 absdiffs to add
-
- # First 8 bytes of the row2
-
-
- addl %edx,%edi
- movq (%edi),%mm2 # load first 8 bytes of pix2 row
- addl %edx,%esi
- movq (%esi),%mm5 # load first 8 bytes of pix1 row
-
-
-
- movq %mm2,%mm3 # mm2 := abs(mm2-mm5)
- psubusb %mm5,%mm2
- movq 8(%esi),%mm6 # load last 8 bytes of pix1 row
- psubusb %mm3,%mm5
- por %mm5,%mm2
-
- # Last 8 bytes of the row2
-
- movq 8(%edi),%mm5 # load last 8 bytes of pix2 row
-
-
- movq %mm5,%mm3 # mm5 := abs(mm5-mm6)
- psubusb %mm6,%mm5
- psubusb %mm3,%mm6
- por %mm6,%mm5
-
- # Now mm2, mm4, mm5, mm7 have 32 absdiffs
-
- movq %mm7,%mm3
-
- pxor %mm6,%mm6 # Zero mm6
-
- punpcklbw %mm6,%mm3 # Unpack to words and add
- punpckhbw %mm6,%mm7
- paddusw %mm3,%mm7
-
- movq %mm5,%mm3
-
- punpcklbw %mm6,%mm3 # Unpack to words and add
- punpckhbw %mm6,%mm5
- paddusw %mm3,%mm5
-
- paddusw %mm7,%mm0 # Add to the acumulator (mm0)
- paddusw %mm5,%mm1 # Add to the acumulator (mm1)
-
- movq %mm4,%mm3
-
- punpcklbw %mm6,%mm3 # Unpack to words and add
- punpckhbw %mm6,%mm4
- movq %mm2,%mm5
- paddusw %mm3,%mm4
-
-
-
- punpcklbw %mm6,%mm5 # Unpack to words and add
- punpckhbw %mm6,%mm2
- paddusw %mm5,%mm2
-
- # Loop termination
-
- addl %edx,%esi # update pointers to next row
- paddusw %mm4,%mm0 # Add to the acumulator (mm0)
- addl %edx,%edi
- subl $2,%ecx
- paddusw %mm2,%mm1 # Add to the acumulator (mm1)
- testl %ecx,%ecx # check rowsleft
- jnz pix_abs16x16_mmx.nextrow
-
- paddusw %mm1,%mm0
- movq %mm0,%mm2 # Copy mm0 to mm2
- psrlq $32,%mm2
- paddusw %mm2,%mm0 # Add
- movq %mm0,%mm3
- psrlq $16,%mm3
- paddusw %mm3,%mm0
- movd %mm0,%eax # Store return value
- andl $0xffff,%eax
-
- popl %edi
- popl %esi
- popl %edx
- popl %ecx
- popl %ebx
-
- popl %ebp # restore stack pointer
-
- #emms ; clear mmx registers
- ret # return
-
-.global pix_abs16x16_sse
-
-# int pix_abs16x16_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h);
-# esi = p1 (init: blk1)
-# edi = p2 (init: blk2)
-# ecx = rowsleft (init: h)
-# edx = lx;
-
-# mm0 = distance accumulators (4 words)
-# mm1 = distance accumulators (4 words)
-# mm2 = temp
-# mm3 = temp
-# mm4 = temp
-# mm5 = temp
-# mm6 = temp
-# mm7 = temp
-
-
-.align 32
-pix_abs16x16_sse:
- pushl %ebp # save frame pointer
- movl %esp,%ebp
-
- pushl %ebx # Saves registers (called saves convention in
- pushl %ecx # x86 GCC it seems)
- pushl %edx #
- pushl %esi
- pushl %edi
-
- pxor %mm0,%mm0 # zero acculumators
- pxor %mm1,%mm1
- movl 8(%ebp),%esi # get pix1
- movl 12(%ebp),%edi # get pix2
- movl 16(%ebp),%edx # get lx
- movl 20(%ebp),%ecx # get rowsleft
- jmp pix_abs16x16_sse.next4row
-.align 32
-
-pix_abs16x16_sse.next4row:
- # First row
-
- movq (%edi),%mm4 # load first 8 bytes of pix2 row
- movq 8(%edi),%mm5 # load last 8 bytes of pix2 row
- psadbw (%esi),%mm4 # SAD of first 8 bytes
- psadbw 8(%esi),%mm5 # SAD of last 8 bytes
- paddw %mm4,%mm0 # Add to acumulators
- paddw %mm5,%mm1
-
- # Second row
-
- addl %edx,%edi #
- addl %edx,%esi #
-
- movq (%edi),%mm6 # load first 8 bytes of pix2 row
- movq 8(%edi),%mm7 # load last 8 bytes of pix2 row
- psadbw (%esi),%mm6 # SAD of first 8 bytes
- psadbw 8(%esi),%mm7 # SAD of last 8 bytes
- paddw %mm6,%mm0 # Add to acumulators
- paddw %mm7,%mm1
-
- # Third row
-
- addl %edx,%edi #
- addl %edx,%esi #
-
- movq (%edi),%mm4 # load first 8 bytes of pix2 row
- movq 8(%edi),%mm5 # load last 8 bytes of pix2 row
- psadbw (%esi),%mm4 # SAD of first 8 bytes
- psadbw 8(%esi),%mm5 # SAD of last 8 bytes
- paddw %mm4,%mm0 # Add to acumulators
- paddw %mm5,%mm1
-
- # Fourth row
-
- addl %edx,%edi #
- addl %edx,%esi #
-
- movq (%edi),%mm6 # load first 8 bytes of pix2 row
- movq 8(%edi),%mm7 # load last 8 bytes of pix2 row
- psadbw (%esi),%mm6 # SAD of first 8 bytes
- psadbw 8(%esi),%mm7 # SAD of last 8 bytes
- paddw %mm6,%mm0 # Add to acumulators
- paddw %mm7,%mm1
-
- # Loop termination
-
- addl %edx,%esi # update pointers to next row
- addl %edx,%edi
- subl $4,%ecx
- testl %ecx,%ecx # check rowsleft
- jnz pix_abs16x16_sse.next4row
-
- paddd %mm1,%mm0 # Sum acumulators
- movd %mm0,%eax # Store return value
-
- popl %edi
- popl %esi
- popl %edx
- popl %ecx
- popl %ebx
-
- popl %ebp # restore stack pointer
-
- #emms ; clear mmx registers
- ret # return
-
-.global pix_abs16x16_x2_mmx
-
-# int pix_abs16x16_x2_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h);
-# esi = p1 (init: blk1)
-# edi = p2 (init: blk2)
-# ecx = rowsleft (init: h)
-# edx = lx;
-
-# mm0 = distance accumulators (4 words)
-# mm1 = distance accumulators (4 words)
-# mm2 = temp
-# mm3 = temp
-# mm4 = temp
-# mm5 = temp
-# mm6 = 0
-# mm7 = temp
-
-
-.align 32
-pix_abs16x16_x2_mmx:
- pushl %ebp # save frame pointer
- movl %esp,%ebp
-
- pushl %ebx # Saves registers (called saves convention in
- pushl %ecx # x86 GCC it seems)
- pushl %edx #
- pushl %esi
- pushl %edi
-
- pxor %mm0,%mm0 # zero acculumators
- pxor %mm1,%mm1
- pxor %mm6,%mm6
- movl 8(%ebp),%esi # get pix1
- movl 12(%ebp),%edi # get pix2
- movl 16(%ebp),%edx # get lx
- movl 20(%ebp),%ecx # get rowsleft
- jmp pix_abs16x16_x2_mmx.nextrow_x2
-.align 32
-
-pix_abs16x16_x2_mmx.nextrow_x2:
- # First 8 bytes of the row
-
- movq (%edi),%mm4 # load first 8 bytes of pix2 row
- movq 1(%edi),%mm5 # load bytes 1-8 of pix2 row
-
- movq %mm4,%mm2 # copy mm4 on mm2
- movq %mm5,%mm3 # copy mm5 on mm3
- punpcklbw %mm6,%mm4 # first 4 bytes of [edi] on mm4
- punpcklbw %mm6,%mm5 # first 4 bytes of [edi+1] on mm5
- paddusw %mm5,%mm4 # mm4 := first 4 bytes interpolated in words
- psrlw $1,%mm4
-
- punpckhbw %mm6,%mm2 # last 4 bytes of [edi] on mm2
- punpckhbw %mm6,%mm3 # last 4 bytes of [edi+1] on mm3
- paddusw %mm3,%mm2 # mm2 := last 4 bytes interpolated in words
- psrlw $1,%mm2
-
- packuswb %mm2,%mm4 # pack 8 bytes interpolated on mm4
- movq (%esi),%mm5 # load first 8 bytes of pix1 row
-
- movq %mm4,%mm3 # mm4 := abs(mm4-mm5)
- psubusb %mm5,%mm4
- psubusb %mm3,%mm5
- por %mm5,%mm4
-
- # Last 8 bytes of the row
-
- movq 8(%edi),%mm7 # load last 8 bytes of pix2 row
- movq 9(%edi),%mm5 # load bytes 10-17 of pix2 row
-
- movq %mm7,%mm2 # copy mm7 on mm2
- movq %mm5,%mm3 # copy mm5 on mm3
- punpcklbw %mm6,%mm7 # first 4 bytes of [edi+8] on mm7
- punpcklbw %mm6,%mm5 # first 4 bytes of [edi+9] on mm5
- paddusw %mm5,%mm7 # mm1 := first 4 bytes interpolated in words
- psrlw $1,%mm7
-
- punpckhbw %mm6,%mm2 # last 4 bytes of [edi] on mm2
- punpckhbw %mm6,%mm3 # last 4 bytes of [edi+1] on mm3
- paddusw %mm3,%mm2 # mm2 := last 4 bytes interpolated in words
- psrlw $1,%mm2
-
- packuswb %mm2,%mm7 # pack 8 bytes interpolated on mm1
- movq 8(%esi),%mm5 # load last 8 bytes of pix1 row
-
- movq %mm7,%mm3 # mm7 := abs(mm1-mm5)
- psubusb %mm5,%mm7
- psubusb %mm3,%mm5
- por %mm5,%mm7
-
- # Now mm4 and mm7 have 16 absdiffs to add
-
- movq %mm4,%mm3 # Make copies of these bytes
- movq %mm7,%mm2
-
- punpcklbw %mm6,%mm4 # Unpack to words and add
- punpcklbw %mm6,%mm7
- paddusw %mm7,%mm4
- paddusw %mm4,%mm0 # Add to the acumulator (mm0)
-
- punpckhbw %mm6,%mm3 # Unpack to words and add
- punpckhbw %mm6,%mm2
- paddusw %mm2,%mm3
- paddusw %mm3,%mm1 # Add to the acumulator (mm1)
-
- # Loop termination
-
- addl %edx,%esi # update pointers to next row
- addl %edx,%edi
-
- subl $1,%ecx
- testl %ecx,%ecx # check rowsleft
- jnz pix_abs16x16_x2_mmx.nextrow_x2
-
- paddusw %mm1,%mm0
-
- movq %mm0,%mm1 # Copy mm0 to mm1
- psrlq $32,%mm1
- paddusw %mm1,%mm0 # Add
- movq %mm0,%mm2
- psrlq $16,%mm2
- paddusw %mm2,%mm0
- movd %mm0,%eax # Store return value
- andl $0xffff,%eax
-
- popl %edi
- popl %esi
- popl %edx
- popl %ecx
- popl %ebx
-
- popl %ebp # restore stack pointer
-
- emms # clear mmx registers
- ret # return
-
-.global pix_abs16x16_y2_mmx
-
-# int pix_abs16x16_y2_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h);
-# esi = p1 (init: blk1)
-# edi = p2 (init: blk2)
-# ebx = p2 + lx
-# ecx = rowsleft (init: h)
-# edx = lx;
-
-# mm0 = distance accumulators (4 words)
-# mm1 = distance accumulators (4 words)
-# mm2 = temp
-# mm3 = temp
-# mm4 = temp
-# mm5 = temp
-# mm6 = 0
-# mm7 = temp
-
-
-.align 32
-pix_abs16x16_y2_mmx:
- pushl %ebp # save frame pointer
- movl %esp,%ebp
-
- pushl %ebx # Saves registers (called saves convention in
- pushl %ecx # x86 GCC it seems)
- pushl %edx #
- pushl %esi
- pushl %edi
-
- pxor %mm0,%mm0 # zero acculumators
- pxor %mm1,%mm1
- pxor %mm6,%mm6
- movl 8(%ebp),%esi # get pix1
- movl 12(%ebp),%edi # get pix2
- movl 16(%ebp),%edx # get lx
- movl 20(%ebp),%ecx # get rowsleft
- movl %edi,%ebx
- addl %edx,%ebx
- jmp pix_abs16x16_y2_mmx.nextrow_y2
-.align 32
-
-pix_abs16x16_y2_mmx.nextrow_y2:
- # First 8 bytes of the row
-
- movq (%edi),%mm4 # load first 8 bytes of pix2 row
- movq (%ebx),%mm5 # load bytes 1-8 of pix2 row
-
- movq %mm4,%mm2 # copy mm4 on mm2
- movq %mm5,%mm3 # copy mm5 on mm3
- punpcklbw %mm6,%mm4 # first 4 bytes of [edi] on mm4
- punpcklbw %mm6,%mm5 # first 4 bytes of [ebx] on mm5
- paddusw %mm5,%mm4 # mm4 := first 4 bytes interpolated in words
- psrlw $1,%mm4
-
- punpckhbw %mm6,%mm2 # last 4 bytes of [edi] on mm2
- punpckhbw %mm6,%mm3 # last 4 bytes of [edi+1] on mm3
- paddusw %mm3,%mm2 # mm2 := last 4 bytes interpolated in words
- psrlw $1,%mm2
-
- packuswb %mm2,%mm4 # pack 8 bytes interpolated on mm4
- movq (%esi),%mm5 # load first 8 bytes of pix1 row
-
- movq %mm4,%mm3 # mm4 := abs(mm4-mm5)
- psubusb %mm5,%mm4
- psubusb %mm3,%mm5
- por %mm5,%mm4
-
- # Last 8 bytes of the row
-
- movq 8(%edi),%mm7 # load last 8 bytes of pix2 row
- movq 8(%ebx),%mm5 # load bytes 10-17 of pix2 row
-
- movq %mm7,%mm2 # copy mm7 on mm2
- movq %mm5,%mm3 # copy mm5 on mm3
- punpcklbw %mm6,%mm7 # first 4 bytes of [edi+8] on mm7
- punpcklbw %mm6,%mm5 # first 4 bytes of [ebx+8] on mm5
- paddusw %mm5,%mm7 # mm1 := first 4 bytes interpolated in words
- psrlw $1,%mm7
-
- punpckhbw %mm6,%mm2 # last 4 bytes of [edi+8] on mm2
- punpckhbw %mm6,%mm3 # last 4 bytes of [ebx+8] on mm3
- paddusw %mm3,%mm2 # mm2 := last 4 bytes interpolated in words
- psrlw $1,%mm2
-
- packuswb %mm2,%mm7 # pack 8 bytes interpolated on mm1
- movq 8(%esi),%mm5 # load last 8 bytes of pix1 row
-
- movq %mm7,%mm3 # mm7 := abs(mm1-mm5)
- psubusb %mm5,%mm7
- psubusb %mm3,%mm5
- por %mm5,%mm7
-
- # Now mm4 and mm7 have 16 absdiffs to add
-
- movq %mm4,%mm3 # Make copies of these bytes
- movq %mm7,%mm2
-
- punpcklbw %mm6,%mm4 # Unpack to words and add
- punpcklbw %mm6,%mm7
- paddusw %mm7,%mm4
- paddusw %mm4,%mm0 # Add to the acumulator (mm0)
-
- punpckhbw %mm6,%mm3 # Unpack to words and add
- punpckhbw %mm6,%mm2
- paddusw %mm2,%mm3
- paddusw %mm3,%mm1 # Add to the acumulator (mm1)
-
- # Loop termination
-
- addl %edx,%esi # update pointers to next row
- addl %edx,%edi
- addl %edx,%ebx
- subl $1,%ecx
- testl %ecx,%ecx # check rowsleft
- jnz pix_abs16x16_y2_mmx.nextrow_y2
-
- paddusw %mm1,%mm0
-
- movq %mm0,%mm1 # Copy mm0 to mm1
- psrlq $32,%mm1
- paddusw %mm1,%mm0 # Add
- movq %mm0,%mm2
- psrlq $16,%mm2
- paddusw %mm2,%mm0
- movd %mm0,%eax # Store return value
- andl $0xffff,%eax
-
- popl %edi
- popl %esi
- popl %edx
- popl %ecx
- popl %ebx
-
- popl %ebp # restore stack pointer
-
- emms # clear mmx registers
- ret # return
-
-.global pix_abs16x16_xy2_mmx
-
-# int pix_abs16x16_xy2_mmx(unsigned char *p1,unsigned char *p2,int lx,int h);
-
-# esi = p1 (init: blk1)
-# edi = p2 (init: blk2)
-# ebx = p1+lx
-# ecx = rowsleft (init: h)
-# edx = lx;
-
-# mm0 = distance accumulators (4 words)
-# mm1 = bytes p2
-# mm2 = bytes p1
-# mm3 = bytes p1+lx
-# I'd love to find someplace to stash p1+1 and p1+lx+1's bytes
-# but I don't think thats going to happen in iA32-land...
-# mm4 = temp 4 bytes in words interpolating p1, p1+1
-# mm5 = temp 4 bytes in words from p2
-# mm6 = temp comparison bit mask p1,p2
-# mm7 = temp comparison bit mask p2,p1
-
-
-.align 32
-pix_abs16x16_xy2_mmx:
- pushl %ebp # save stack pointer
- movl %esp,%ebp # so that we can do this
-
- pushl %ebx # Saves registers (called saves convention in
- pushl %ecx # x86 GCC it seems)
- pushl %edx #
- pushl %esi
- pushl %edi
-
- pxor %mm0,%mm0 # zero acculumators
-
- movl 12(%ebp),%esi # get p1
- movl 8(%ebp),%edi # get p2
- movl 16(%ebp),%edx # get lx
- movl 20(%ebp),%ecx # rowsleft := h
- movl %esi,%ebx
- addl %edx,%ebx
- jmp pix_abs16x16_xy2_mmx.nextrowmm11 # snap to it
-.align 32
-pix_abs16x16_xy2_mmx.nextrowmm11:
-
- ##
- ## First 8 bytes of row
- ##
-
- ## First 4 bytes of 8
-
- movq (%esi),%mm4 # mm4 := first 4 bytes p1
- pxor %mm7,%mm7
- movq %mm4,%mm2 # mm2 records all 8 bytes
- punpcklbw %mm7,%mm4 # First 4 bytes p1 in Words...
-
- movq (%ebx),%mm6 # mm6 := first 4 bytes p1+lx
- movq %mm6,%mm3 # mm3 records all 8 bytes
- punpcklbw %mm7,%mm6
- paddw %mm6,%mm4
-
-
- movq 1(%esi),%mm5 # mm5 := first 4 bytes p1+1
- punpcklbw %mm7,%mm5 # First 4 bytes p1 in Words...
- paddw %mm5,%mm4
- movq 1(%ebx),%mm6 # mm6 := first 4 bytes p1+lx+1
- punpcklbw %mm7,%mm6
- paddw %mm6,%mm4
-
- psrlw $2,%mm4 # mm4 := First 4 bytes interpolated in words
-
- movq (%edi),%mm5 # mm5:=first 4 bytes of p2 in words
- movq %mm5,%mm1
- punpcklbw %mm7,%mm5
-
- movq %mm4,%mm7
- pcmpgtw %mm5,%mm7 # mm7 := [i : W0..3,mm4>mm5]
-
- movq %mm4,%mm6 # mm6 := [i : W0..3, (mm4-mm5)*(mm4-mm5 > 0)]
- psubw %mm5,%mm6
- pand %mm7,%mm6
-
- paddw %mm6,%mm0 # Add to accumulator
-
- movq %mm5,%mm6 # mm6 := [i : W0..3,mm5>mm4]
- pcmpgtw %mm4,%mm6
- psubw %mm4,%mm5 # mm5 := [i : B0..7, (mm5-mm4)*(mm5-mm4 > 0)]
- pand %mm6,%mm5
-
- paddw %mm5,%mm0 # Add to accumulator
-
- ## Second 4 bytes of 8
-
- movq %mm2,%mm4 # mm4 := Second 4 bytes p1 in words
- pxor %mm7,%mm7
- punpckhbw %mm7,%mm4
- movq %mm3,%mm6 # mm6 := Second 4 bytes p1+1 in words
- punpckhbw %mm7,%mm6
- paddw %mm6,%mm4
-
- movq 1(%esi),%mm5 # mm5 := first 4 bytes p1+1
- punpckhbw %mm7,%mm5 # First 4 bytes p1 in Words...
- paddw %mm5,%mm4
- movq 1(%ebx),%mm6 # mm6 := first 4 bytes p1+lx+1
- punpckhbw %mm7,%mm6
- paddw %mm6,%mm4
-
- psrlw $2,%mm4 # mm4 := First 4 bytes interpolated in words
-
- movq %mm1,%mm5 # mm5:= second 4 bytes of p2 in words
- punpckhbw %mm7,%mm5
-
- movq %mm4,%mm7
- pcmpgtw %mm5,%mm7 # mm7 := [i : W0..3,mm4>mm5]
-
- movq %mm4,%mm6 # mm6 := [i : W0..3, (mm4-mm5)*(mm4-mm5 > 0)]
- psubw %mm5,%mm6
- pand %mm7,%mm6
-
- paddw %mm6,%mm0 # Add to accumulator
-
- movq %mm5,%mm6 # mm6 := [i : W0..3,mm5>mm4]
- pcmpgtw %mm4,%mm6
- psubw %mm4,%mm5 # mm5 := [i : B0..7, (mm5-mm4)*(mm5-mm4 > 0)]
- pand %mm6,%mm5
-
- paddw %mm5,%mm0 # Add to accumulator
-
-
- ##
- ## Second 8 bytes of row
- ##
- ## First 4 bytes of 8
-
- movq 8(%esi),%mm4 # mm4 := first 4 bytes p1+8
- pxor %mm7,%mm7
- movq %mm4,%mm2 # mm2 records all 8 bytes
- punpcklbw %mm7,%mm4 # First 4 bytes p1 in Words...
-
- movq 8(%ebx),%mm6 # mm6 := first 4 bytes p1+lx+8
- movq %mm6,%mm3 # mm3 records all 8 bytes
- punpcklbw %mm7,%mm6
- paddw %mm6,%mm4
-
-
- movq 9(%esi),%mm5 # mm5 := first 4 bytes p1+9
- punpcklbw %mm7,%mm5 # First 4 bytes p1 in Words...
- paddw %mm5,%mm4
- movq 9(%ebx),%mm6 # mm6 := first 4 bytes p1+lx+9
- punpcklbw %mm7,%mm6
- paddw %mm6,%mm4
-
- psrlw $2,%mm4 # mm4 := First 4 bytes interpolated in words
-
- movq 8(%edi),%mm5 # mm5:=first 4 bytes of p2+8 in words
- movq %mm5,%mm1
- punpcklbw %mm7,%mm5
-
- movq %mm4,%mm7
- pcmpgtw %mm5,%mm7 # mm7 := [i : W0..3,mm4>mm5]
-
- movq %mm4,%mm6 # mm6 := [i : W0..3, (mm4-mm5)*(mm4-mm5 > 0)]
- psubw %mm5,%mm6
- pand %mm7,%mm6
-
- paddw %mm6,%mm0 # Add to accumulator
-
- movq %mm5,%mm6 # mm6 := [i : W0..3,mm5>mm4]
- pcmpgtw %mm4,%mm6
- psubw %mm4,%mm5 # mm5 := [i : B0..7, (mm5-mm4)*(mm5-mm4 > 0)]
- pand %mm6,%mm5
-
- paddw %mm5,%mm0 # Add to accumulator
-
- ## Second 4 bytes of 8
-
- movq %mm2,%mm4 # mm4 := Second 4 bytes p1 in words
- pxor %mm7,%mm7
- punpckhbw %mm7,%mm4
- movq %mm3,%mm6 # mm6 := Second 4 bytes p1+1 in words
- punpckhbw %mm7,%mm6
- paddw %mm6,%mm4
-
- movq 9(%esi),%mm5 # mm5 := first 4 bytes p1+1
- punpckhbw %mm7,%mm5 # First 4 bytes p1 in Words...
- paddw %mm5,%mm4
- movq 9(%ebx),%mm6 # mm6 := first 4 bytes p1+lx+1
- punpckhbw %mm7,%mm6
- paddw %mm6,%mm4
-
- psrlw $2,%mm4 # mm4 := First 4 bytes interpolated in words
-
- movq %mm1,%mm5 # mm5:= second 4 bytes of p2 in words
- punpckhbw %mm7,%mm5
-
- movq %mm4,%mm7
- pcmpgtw %mm5,%mm7 # mm7 := [i : W0..3,mm4>mm5]
-
- movq %mm4,%mm6 # mm6 := [i : W0..3, (mm4-mm5)*(mm4-mm5 > 0)]
- psubw %mm5,%mm6
- pand %mm7,%mm6
-
- paddw %mm6,%mm0 # Add to accumulator
-
- movq %mm5,%mm6 # mm6 := [i : W0..3,mm5>mm4]
- pcmpgtw %mm4,%mm6
- psubw %mm4,%mm5 # mm5 := [i : B0..7, (mm5-mm4)*(mm5-mm4 > 0)]
- pand %mm6,%mm5
-
- paddw %mm5,%mm0 # Add to accumulator
-
-
- ##
- ## Loop termination condition... and stepping
- ##
-
- addl %edx,%esi # update pointer to next row
- addl %edx,%edi # ditto
- addl %edx,%ebx
-
- subl $1,%ecx
- testl %ecx,%ecx # check rowsleft
- jnz pix_abs16x16_xy2_mmx.nextrowmm11
-
- ## Sum the Accumulators
- movq %mm0,%mm4
- psrlq $32,%mm4
- paddw %mm4,%mm0
- movq %mm0,%mm6
- psrlq $16,%mm6
- paddw %mm6,%mm0
- movd %mm0,%eax # store return value
- andl $0xffff,%eax
-
- popl %edi
- popl %esi
- popl %edx
- popl %ecx
- popl %ebx
-
- popl %ebp # restore stack pointer
-
- emms # clear mmx registers
- ret # we now return you to your regular programming
-
-
-
diff --git a/src/libffmpeg/libavcodec/simple_idct.c b/src/libffmpeg/libavcodec/simple_idct.c
new file mode 100644
index 000000000..5459b81b3
--- /dev/null
+++ b/src/libffmpeg/libavcodec/simple_idct.c
@@ -0,0 +1,231 @@
+/*
+ Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at)
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+/*
+ based upon some outcommented c code from mpeg2dec (idct_mmx.c written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
+*/
+
+#include <inttypes.h>
+
+#include "simple_idct.h"
+
+#if 0
+#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
+#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
+#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
+#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
+#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
+#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
+#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
+#define ROW_SHIFT 8
+#define COL_SHIFT 17
+#else
+#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define ROW_SHIFT 11
+#define COL_SHIFT 20 // 6
+#endif
+#if 1
+static void inline idctRow (int16_t * row)
+{
+ int a0, a1, a2, a3, b0, b1, b2, b3;
+ const int C1 =W1;
+ const int C2 =W2;
+ const int C3 =W3;
+ const int C4 =W4;
+ const int C5 =W5;
+ const int C6 =W6;
+ const int C7 =W7;
+
+ if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7])) {
+ row[0] = row[1] = row[2] = row[3] = row[4] =
+ row[5] = row[6] = row[7] = row[0]<<3;
+ return;
+ }
+
+ a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + (1<<(ROW_SHIFT-1));
+ a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + (1<<(ROW_SHIFT-1));
+ a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + (1<<(ROW_SHIFT-1));
+ a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + (1<<(ROW_SHIFT-1));
+
+ b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
+ b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
+ b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
+ b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
+
+ row[0] = (a0 + b0) >> ROW_SHIFT;
+ row[1] = (a1 + b1) >> ROW_SHIFT;
+ row[2] = (a2 + b2) >> ROW_SHIFT;
+ row[3] = (a3 + b3) >> ROW_SHIFT;
+ row[4] = (a3 - b3) >> ROW_SHIFT;
+ row[5] = (a2 - b2) >> ROW_SHIFT;
+ row[6] = (a1 - b1) >> ROW_SHIFT;
+ row[7] = (a0 - b0) >> ROW_SHIFT;
+}
+
+static void inline idctCol (int16_t * col)
+{
+ int a0, a1, a2, a3, b0, b1, b2, b3;
+ const int C1 =W1;
+ const int C2 =W2;
+ const int C3 =W3;
+ const int C4 =W4;
+ const int C5 =W5;
+ const int C6 =W6;
+ const int C7 =W7;
+/*
+ if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) {
+ col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] =
+ col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3;
+ return;
+ }*/
+ col[0] += (1<<(COL_SHIFT-1))/W4;
+ a0 = C4*col[8*0] + C2*col[8*2] + C4*col[8*4] + C6*col[8*6];
+ a1 = C4*col[8*0] + C6*col[8*2] - C4*col[8*4] - C2*col[8*6];
+ a2 = C4*col[8*0] - C6*col[8*2] - C4*col[8*4] + C2*col[8*6];
+ a3 = C4*col[8*0] - C2*col[8*2] + C4*col[8*4] - C6*col[8*6];
+
+ b0 = C1*col[8*1] + C3*col[8*3] + C5*col[8*5] + C7*col[8*7];
+ b1 = C3*col[8*1] - C7*col[8*3] - C1*col[8*5] - C5*col[8*7];
+ b2 = C5*col[8*1] - C1*col[8*3] + C7*col[8*5] + C3*col[8*7];
+ b3 = C7*col[8*1] - C5*col[8*3] + C3*col[8*5] - C1*col[8*7];
+
+ col[8*0] = (a0 + b0) >> COL_SHIFT;
+ col[8*1] = (a1 + b1) >> COL_SHIFT;
+ col[8*2] = (a2 + b2) >> COL_SHIFT;
+ col[8*3] = (a3 + b3) >> COL_SHIFT;
+ col[8*4] = (a3 - b3) >> COL_SHIFT;
+ col[8*5] = (a2 - b2) >> COL_SHIFT;
+ col[8*6] = (a1 - b1) >> COL_SHIFT;
+ col[8*7] = (a0 - b0) >> COL_SHIFT;
+}
+
+void simple_idct (short *block)
+{
+ int i;
+ for(i=0; i<8; i++)
+ idctRow(block + 8*i);
+
+ for(i=0; i<8; i++)
+ idctCol(block + i);
+
+}
+
+#else
+
+#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define COL_SHIFT 31 // 6
+
+static void inline idctRow (int32_t *out, int16_t * row)
+{
+ int a0, a1, a2, a3, b0, b1, b2, b3;
+ const int C1 =W1;
+ const int C2 =W2;
+ const int C3 =W3;
+ const int C4 =W4;
+ const int C5 =W5;
+ const int C6 =W6;
+ const int C7 =W7;
+/*
+ if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7])) {
+ row[0] = row[1] = row[2] = row[3] = row[4] =
+ row[5] = row[6] = row[7] = row[0]<<14;
+ return;
+ }
+*/
+ a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6];
+ a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6];
+ a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6];
+ a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6];
+
+ b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
+ b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
+ b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
+ b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
+
+ out[0] = (a0 + b0);
+ out[1] = (a1 + b1);
+ out[2] = (a2 + b2);
+ out[3] = (a3 + b3);
+ out[4] = (a3 - b3);
+ out[5] = (a2 - b2);
+ out[6] = (a1 - b1);
+ out[7] = (a0 - b0);
+}
+
+static void inline idctCol (int32_t *in, int16_t * col)
+{
+ int64_t a0, a1, a2, a3, b0, b1, b2, b3;
+ const int64_t C1 =W1;
+ const int64_t C2 =W2;
+ const int64_t C3 =W3;
+ const int64_t C4 =W4;
+ const int64_t C5 =W5;
+ const int64_t C6 =W6;
+ const int64_t C7 =W7;
+/*
+ if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) {
+ col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] =
+ col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3;
+ return;
+ }*/
+ in[0] += (1<<(COL_SHIFT-1))/W4;
+ a0 = C4*in[8*0] + C2*in[8*2] + C4*in[8*4] + C6*in[8*6];
+ a1 = C4*in[8*0] + C6*in[8*2] - C4*in[8*4] - C2*in[8*6];
+ a2 = C4*in[8*0] - C6*in[8*2] - C4*in[8*4] + C2*in[8*6];
+ a3 = C4*in[8*0] - C2*in[8*2] + C4*in[8*4] - C6*in[8*6];
+
+ b0 = C1*in[8*1] + C3*in[8*3] + C5*in[8*5] + C7*in[8*7];
+ b1 = C3*in[8*1] - C7*in[8*3] - C1*in[8*5] - C5*in[8*7];
+ b2 = C5*in[8*1] - C1*in[8*3] + C7*in[8*5] + C3*in[8*7];
+ b3 = C7*in[8*1] - C5*in[8*3] + C3*in[8*5] - C1*in[8*7];
+
+ col[8*0] = (a0 + b0) >> COL_SHIFT;
+ col[8*1] = (a1 + b1) >> COL_SHIFT;
+ col[8*2] = (a2 + b2) >> COL_SHIFT;
+ col[8*3] = (a3 + b3) >> COL_SHIFT;
+ col[8*4] = (a3 - b3) >> COL_SHIFT;
+ col[8*5] = (a2 - b2) >> COL_SHIFT;
+ col[8*6] = (a1 - b1) >> COL_SHIFT;
+ col[8*7] = (a0 - b0) >> COL_SHIFT;
+}
+
+void simple_idct (short *block)
+{
+ int i;
+ int32_t temp[64];
+ for(i=0; i<8; i++)
+ idctRow(temp+8*i, block + 8*i);
+
+ for(i=0; i<8; i++)
+ idctCol(temp+i, block + i);
+
+}
+
+#endif
diff --git a/src/libffmpeg/libavcodec/simple_idct.h b/src/libffmpeg/libavcodec/simple_idct.h
new file mode 100644
index 000000000..54dff7396
--- /dev/null
+++ b/src/libffmpeg/libavcodec/simple_idct.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at)
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+void simple_idct(short *block);
+void simple_idct_mmx(short *block);
diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c
index 26510d87c..aef27e342 100644
--- a/src/libffmpeg/libavcodec/utils.c
+++ b/src/libffmpeg/libavcodec/utils.c
@@ -16,19 +16,36 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include "common.h"
#include "dsputil.h"
#include "avcodec.h"
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#else
+#include <stdlib.h>
+#endif
/* memory alloc */
void *av_mallocz(int size)
{
void *ptr;
+#if defined ( ARCH_X86 ) && defined ( HAVE_MEMALIGN )
+ ptr = memalign(64,size);
+ /* Why 64?
+ Indeed, we should align it:
+ on 4 for 386
+ on 16 for 486
+ on 32 for 586, PPro - k6-III
+ on 64 for K7 (maybe for P3 too).
+ Because L1 and L2 caches are aligned on those values.
+ But I don't want to code such logic here!
+ */
+#else
ptr = malloc(size);
+#endif
if (!ptr)
return NULL;
memset(ptr, 0, size);
@@ -138,6 +155,18 @@ AVCodec *avcodec_find_encoder(enum CodecID id)
return NULL;
}
+AVCodec *avcodec_find_encoder_by_name(const char *name)
+{
+ AVCodec *p;
+ p = first_avcodec;
+ while (p) {
+ if (p->encode != NULL && strcmp(name,p->name) == 0)
+ return p;
+ p = p->next;
+ }
+ return NULL;
+}
+
AVCodec *avcodec_find_decoder(enum CodecID id)
{
AVCodec *p;
@@ -188,6 +217,7 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
const char *codec_name;
AVCodec *p;
char buf1[32];
+ int bitrate;
if (encode)
p = avcodec_find_encoder(enc->codec_id);
@@ -228,6 +258,7 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
enc->width, enc->height,
(float)enc->frame_rate / FRAME_RATE_BASE);
}
+ bitrate = enc->bit_rate;
break;
case CODEC_TYPE_AUDIO:
snprintf(buf, buf_size,
@@ -239,13 +270,31 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
enc->sample_rate,
enc->channels == 2 ? "stereo" : "mono");
}
+ /* for PCM codecs, compute bitrate directly */
+ switch(enc->codec_id) {
+ case CODEC_ID_PCM_S16LE:
+ case CODEC_ID_PCM_S16BE:
+ case CODEC_ID_PCM_U16LE:
+ case CODEC_ID_PCM_U16BE:
+ bitrate = enc->sample_rate * enc->channels * 16;
+ break;
+ case CODEC_ID_PCM_S8:
+ case CODEC_ID_PCM_U8:
+ case CODEC_ID_PCM_ALAW:
+ case CODEC_ID_PCM_MULAW:
+ bitrate = enc->sample_rate * enc->channels * 8;
+ break;
+ default:
+ bitrate = enc->bit_rate;
+ break;
+ }
break;
default:
abort();
}
- if (enc->bit_rate != 0) {
+ if (bitrate != 0) {
snprintf(buf + strlen(buf), buf_size - strlen(buf),
- ", %d kb/s", enc->bit_rate / 1000);
+ ", %d kb/s", bitrate / 1000);
}
}
@@ -353,7 +402,6 @@ void avcodec_register_all(void)
register_avcodec(&mpeg4_encoder);
register_avcodec(&msmpeg4_encoder);
#endif /* CONFIG_ENCODERS */
- register_avcodec(&pcm_codec);
register_avcodec(&rawvideo_codec);
/* decoders */
@@ -365,13 +413,32 @@ void avcodec_register_all(void)
register_avcodec(&h263i_decoder);
register_avcodec(&rv10_decoder);
register_avcodec(&mjpeg_decoder);
-#ifdef CONFIG_MPGLIB
+#ifdef FF_AUDIO_CODECS
register_avcodec(&mp3_decoder);
-#endif
#ifdef CONFIG_AC3
register_avcodec(&ac3_decoder);
#endif
+#endif
#endif /* CONFIG_DECODERS */
+
+#ifdef FF_AUDIO_CODECS
+ /* pcm codecs */
+
+#define PCM_CODEC(id, name) \
+ register_avcodec(& name ## _encoder); \
+ register_avcodec(& name ## _decoder); \
+
+PCM_CODEC(CODEC_ID_PCM_S16LE, pcm_s16le);
+PCM_CODEC(CODEC_ID_PCM_S16BE, pcm_s16be);
+PCM_CODEC(CODEC_ID_PCM_U16LE, pcm_u16le);
+PCM_CODEC(CODEC_ID_PCM_U16BE, pcm_u16be);
+PCM_CODEC(CODEC_ID_PCM_S8, pcm_s8);
+PCM_CODEC(CODEC_ID_PCM_U8, pcm_u8);
+PCM_CODEC(CODEC_ID_PCM_ALAW, pcm_alaw);
+PCM_CODEC(CODEC_ID_PCM_MULAW, pcm_mulaw);
+
+#undef PCM_CODEC
+#endif
}
static int encode_init(AVCodecContext *s)
@@ -392,18 +459,6 @@ static int encode_frame(AVCodecContext *avctx,
return -1;
}
-/* dummy pcm codec */
-AVCodec pcm_codec = {
- "pcm",
- CODEC_TYPE_AUDIO,
- CODEC_ID_PCM,
- 0,
- encode_init,
- encode_frame,
- NULL,
- decode_frame,
-};
-
AVCodec rawvideo_codec = {
"rawvideo",
CODEC_TYPE_VIDEO,