60 files changed, 2764 insertions, 1771 deletions
diff --git a/src/libffmpeg/libavcodec/Makefile.am b/src/libffmpeg/libavcodec/Makefile.am
index 3b80eb07c..00dbe4f2a 100644
--- a/src/libffmpeg/libavcodec/Makefile.am
+++ b/src/libffmpeg/libavcodec/Makefile.am
@@ -53,6 +53,7 @@ libavcodec_la_SOURCES = \
 	msvideo1.c \
 	opts.c \
 	pcm.c \
+	qtrle.c \
 	ra144.c \
 	ra288.c \
 	ratecontrol.c \
@@ -68,6 +69,7 @@ libavcodec_la_SOURCES = \
 	vcr1.c \
 	vmdav.c \
 	vp3.c \
+	vp3dsp.c \
 	vqavideo.c \
 	wmadec.c \
 	xan.c
diff --git a/src/libffmpeg/libavcodec/adpcm.c b/src/libffmpeg/libavcodec/adpcm.c
index 7aee84f65..2ce7dc87f 100644
--- a/src/libffmpeg/libavcodec/adpcm.c
+++ b/src/libffmpeg/libavcodec/adpcm.c
@@ -45,13 +45,6 @@
 
 #define BLKSIZE 1024
 
-#define BE_16(x)  ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1])
-#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
-#define LE_32(x)  ((((uint8_t*)(x))[3] << 24) | \
-                   (((uint8_t*)(x))[2] << 16) | \
-                   (((uint8_t*)(x))[1] << 8) | \
-                    ((uint8_t*)(x))[0])
-
 #define CLAMP_TO_SHORT(value) \
 if (value > 32767) \
     value = 32767; \
@@ -153,8 +146,9 @@ static int adpcm_encode_init(AVCodecContext *avctx)
         /* seems frame_size isn't taken into account... have to buffer the samples :-( */
         break;
     case CODEC_ID_ADPCM_MS:
-        av_log(avctx, AV_LOG_ERROR, "ADPCM: codec adpcm_ms unsupported for encoding !\n");
-        return -1;
+        avctx->frame_size = (BLKSIZE - 7 * avctx->channels) * 2 / avctx->channels + 2; /* each 16 bits sample gives one nibble */
+                                                             /* and we have 7 bytes per channel overhead */
+        avctx->block_align = BLKSIZE;
         break;
     default:
         return -1;
@@ -223,16 +217,42 @@ static inline unsigned char adpcm_ima_compress_sample(ADPCMChannelStatus *c, sho
     return nibble;
 }
 
+static inline unsigned char adpcm_ms_compress_sample(ADPCMChannelStatus *c, short sample)
+{
+    int predictor, nibble, bias;
+
+    predictor = (((c->sample1) * (c->coeff1)) + ((c->sample2) * (c->coeff2))) / 256;
+    
+    nibble= sample - predictor;
+    if(nibble>=0) bias= c->idelta/2;
+    else          bias=-c->idelta/2;
+        
+    nibble= (nibble + bias) / c->idelta;
+    nibble= clip(nibble, -8, 7)&0x0F;
+    
+    predictor += (signed)((nibble & 0x08)?(nibble - 0x10):(nibble)) * c->idelta;
+    CLAMP_TO_SHORT(predictor);
+
+    c->sample2 = c->sample1;
+    c->sample1 = predictor;
+
+    c->idelta = (AdaptationTable[(int)nibble] * c->idelta) >> 8;
+    if (c->idelta < 16) c->idelta = 16;
+
+    return nibble;
+}
+
 static int adpcm_encode_frame(AVCodecContext *avctx,
 			    unsigned char *frame, int buf_size, void *data)
 {
-    int n;
+    int n, i, st;
     short *samples;
     unsigned char *dst;
     ADPCMContext *c = avctx->priv_data;
 
     dst = frame;
     samples = (short *)data;
+    st= avctx->channels == 2;
 /*    n = (BLKSIZE - 4 * avctx->channels) / (2 * 8 * avctx->channels); */
 
     switch(avctx->codec->id) {
@@ -289,6 +309,41 @@ static int adpcm_encode_frame(AVCodecContext *avctx,
                 samples += 8 * avctx->channels;
             }
         break;
+    case CODEC_ID_ADPCM_MS:
+        for(i=0; i<avctx->channels; i++){
+            int predictor=0;
+
+            *dst++ = predictor;
+            c->status[i].coeff1 = AdaptCoeff1[predictor];
+            c->status[i].coeff2 = AdaptCoeff2[predictor];
+        }
+        for(i=0; i<avctx->channels; i++){
+            if (c->status[i].idelta < 16) 
+                c->status[i].idelta = 16;
+            
+            *dst++ = c->status[i].idelta & 0xFF;
+            *dst++ = c->status[i].idelta >> 8;
+        }
+        for(i=0; i<avctx->channels; i++){
+            c->status[i].sample1= *samples++;
+
+            *dst++ = c->status[i].sample1 & 0xFF;
+            *dst++ = c->status[i].sample1 >> 8;
+        }
+        for(i=0; i<avctx->channels; i++){
+            c->status[i].sample2= *samples++;
+
+            *dst++ = c->status[i].sample2 & 0xFF;
+            *dst++ = c->status[i].sample2 >> 8;
+        }
+
+        for(i=7*avctx->channels; i<avctx->block_align; i++) {
+            int nibble;
+            nibble = adpcm_ms_compress_sample(&c->status[ 0], *samples++)<<4;
+            nibble|= adpcm_ms_compress_sample(&c->status[st], *samples++);
+            *dst++ = nibble;
+        }
+        break;
     default:
         return -1;
     }
@@ -350,7 +405,7 @@ static inline short adpcm_ms_expand_nibble(ADPCMChannelStatus *c, char nibble)
 
     c->sample2 = c->sample1;
     c->sample1 = predictor;
-    c->idelta = (AdaptationTable[(int)nibble] * c->idelta) / 256;
+    c->idelta = (AdaptationTable[(int)nibble] * c->idelta) >> 8;
     if (c->idelta < 16) c->idelta = 16;
 
     return (short)predictor;
@@ -585,22 +640,16 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
         n = buf_size - 7 * avctx->channels;
         if (n < 0)
             return -1;
-        block_predictor[0] = (*src++); /* should be bound */
-        block_predictor[0] = (block_predictor[0] < 0)?(0):((block_predictor[0] > 7)?(7):(block_predictor[0]));
+        block_predictor[0] = clip(*src++, 0, 7);
         block_predictor[1] = 0;
         if (st)
-            block_predictor[1] = (*src++);
-        block_predictor[1] = (block_predictor[1] < 0)?(0):((block_predictor[1] > 7)?(7):(block_predictor[1]));
-        c->status[0].idelta = ((*src & 0xFF) | ((src[1] << 8) & 0xFF00));
-        if (c->status[0].idelta & 0x08000)
-            c->status[0].idelta -= 0x10000;
+            block_predictor[1] = clip(*src++, 0, 7);
+        c->status[0].idelta = (int16_t)((*src & 0xFF) | ((src[1] << 8) & 0xFF00));
         src+=2;
-        if (st)
-            c->status[1].idelta = ((*src & 0xFF) | ((src[1] << 8) & 0xFF00));
-        if (st && c->status[1].idelta & 0x08000)
-            c->status[1].idelta |= 0xFFFF0000;
-        if (st)
+        if (st){
+            c->status[1].idelta = (int16_t)((*src & 0xFF) | ((src[1] << 8) & 0xFF00));
             src+=2;
+        }
         c->status[0].coeff1 = AdaptCoeff1[block_predictor[0]];
         c->status[0].coeff2 = AdaptCoeff2[block_predictor[0]];
         c->status[1].coeff1 = AdaptCoeff1[block_predictor[1]];
@@ -629,18 +678,14 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
         if (avctx->block_align != 0 && buf_size > avctx->block_align)
             buf_size = avctx->block_align;
 
-        c->status[0].predictor = (src[0] | (src[1] << 8));
+        c->status[0].predictor = (int16_t)(src[0] | (src[1] << 8));
         c->status[0].step_index = src[2];
         src += 4;
-        if(c->status[0].predictor & 0x8000)
-            c->status[0].predictor -= 0x10000;
         *samples++ = c->status[0].predictor;
         if (st) {
-            c->status[1].predictor = (src[0] | (src[1] << 8));
+            c->status[1].predictor = (int16_t)(src[0] | (src[1] << 8));
             c->status[1].step_index = src[2];
             src += 4;
-            if(c->status[1].predictor & 0x8000)
-                c->status[1].predictor -= 0x10000;
             *samples++ = c->status[1].predictor;
         }
         while (src < buf + buf_size) {
@@ -665,15 +710,11 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
         if (avctx->block_align != 0 && buf_size > avctx->block_align)
             buf_size = avctx->block_align;
 
-        c->status[0].predictor = (src[10] | (src[11] << 8));
-        c->status[1].predictor = (src[12] | (src[13] << 8));
+        c->status[0].predictor = (int16_t)(src[10] | (src[11] << 8));
+        c->status[1].predictor = (int16_t)(src[12] | (src[13] << 8));
         c->status[0].step_index = src[14];
         c->status[1].step_index = src[15];
         /* sign extend the predictors */
-        if(c->status[0].predictor & 0x8000)
-            c->status[0].predictor -= 0x10000;
-        if(c->status[1].predictor & 0x8000)
-            c->status[1].predictor -= 0x10000;
         src += 16;
         diff_channel = c->status[1].predictor;
 
diff --git a/src/libffmpeg/libavcodec/asv1.c b/src/libffmpeg/libavcodec/asv1.c
index b84b02475..87b13c637 100644
--- a/src/libffmpeg/libavcodec/asv1.c
+++ b/src/libffmpeg/libavcodec/asv1.c
@@ -519,10 +519,10 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
     emms_c();
     
     align_put_bits(&a->pb);
-    while(get_bit_count(&a->pb)&31)
+    while(put_bits_count(&a->pb)&31)
         put_bits(&a->pb, 8, 0);
     
-    size= get_bit_count(&a->pb)/32;
+    size= put_bits_count(&a->pb)/32;
     
     if(avctx->codec_id == CODEC_ID_ASV1)
         a->dsp.bswap_buf((uint32_t*)buf, (uint32_t*)buf, size);
diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h
index 2e884e834..510bd41d2 100644
--- a/src/libffmpeg/libavcodec/avcodec.h
+++ b/src/libffmpeg/libavcodec/avcodec.h
@@ -24,7 +24,7 @@ extern "C" {
 
 #define FFMPEG_VERSION_INT     0x000408
 #define FFMPEG_VERSION         "0.4.8"
-#define LIBAVCODEC_BUILD       4699
+#define LIBAVCODEC_BUILD       4707
 
 #define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT
 #define LIBAVCODEC_VERSION     FFMPEG_VERSION
@@ -33,6 +33,9 @@ extern "C" {
 #define AV_TOSTRING(s) #s
 #define LIBAVCODEC_IDENT	"FFmpeg" LIBAVCODEC_VERSION "b" AV_STRINGIFY(LIBAVCODEC_BUILD)
 
+#define AV_NOPTS_VALUE int64_t_C(0x8000000000000000)
+#define AV_TIME_BASE 1000000
+
 enum CodecID {
     CODEC_ID_NONE, 
     CODEC_ID_MPEG1VIDEO,
@@ -100,6 +103,7 @@ enum CodecID {
     CODEC_ID_VMDAUDIO,
     CODEC_ID_MSZH,
     CODEC_ID_ZLIB,
+    CODEC_ID_QTRLE,
 
     /* various pcm "codecs" */
     CODEC_ID_PCM_S16LE,
@@ -123,6 +127,7 @@ enum CodecID {
     CODEC_ID_ADPCM_XA,
     CODEC_ID_ADPCM_ADX,
     CODEC_ID_ADPCM_EA,
+    CODEC_ID_ADPCM_G726,
 
 	/* AMR */
     CODEC_ID_AMR_NB,
@@ -137,6 +142,8 @@ enum CodecID {
     CODEC_ID_INTERPLAY_DPCM,
     CODEC_ID_XAN_DPCM,
     
+    CODEC_ID_FLAC,
+    
     CODEC_ID_MPEG2TS, /* _FAKE_ codec to indicate a raw MPEG2 transport
                          stream (only used by libavformat) */
 };
@@ -230,12 +237,6 @@ typedef struct RcOverride{
 /* only for ME compatiblity with old apps */
 extern int motion_estimation_method;
 
-/* ME algos sorted by quality */
-//FIXME remove IMHO
-static const __attribute__((unused)) int Motion_Est_QTab[] =
- { ME_ZERO, ME_PHODS, ME_LOG, ME_X1, ME_EPZS, ME_FULL };
-
-
 #define FF_MAX_B_FRAMES 8
 
 /* encoding support
@@ -297,6 +298,8 @@ static const __attribute__((unused)) int Motion_Est_QTab[] =
    used */
 #define CODEC_CAP_PARSE_ONLY      0x0004
 #define CODEC_CAP_TRUNCATED       0x0008
+/* codec can export data for HW decoding (XvMC) */
+#define CODEC_CAP_HWACCEL         0x0010
 
 //the following defines might change, so dont expect compatibility if u use them
 #define MB_TYPE_INTRA4x4   0x0001
@@ -381,8 +384,8 @@ typedef struct AVPanScan{
     int pict_type;\
 \
     /**\
-     * presentation timestamp in micro seconds (time when frame should be shown to user)\
-     * if 0 then the frame_rate will be used as reference\
+     * presentation timestamp in AV_TIME_BASE (=micro seconds currently) (time when frame should be shown to user)\
+     * if AV_NOPTS_VALUE then the frame_rate will be used as reference\
      * - encoding: MUST be set by user\
      * - decoding: set by lavc\
      */\
@@ -535,6 +538,13 @@ typedef struct AVPanScan{
      * - decoding: set by lavc (before get_buffer() call))\
      */\
     int buffer_hints;\
+\
+    /**\
+     * DCT coeffitients\
+     * - encoding: unused\
+     * - decoding: set by lavc\
+     */\
+    short *dct_coeff;\
 
 #define FF_QSCALE_TYPE_MPEG1	0
 #define FF_QSCALE_TYPE_MPEG2	1
@@ -567,10 +577,27 @@ typedef struct AVFrame {
 #define DEFAULT_FRAME_RATE_BASE 1001000
 
 /**
+ * Used by av_log
+ */
+typedef struct AVCLASS AVClass;
+struct AVCLASS {
+    const char* class_name;
+    const char* (*item_name)(void*); /* actually passing a pointer to an AVCodecContext
+					or AVFormatContext, which begin with an AVClass.
+					Needed because av_log is in libavcodec and has no visibility
+					of AVIn/OutputFormat */
+};
+
+/**
  * main external api structure.
  */
 typedef struct AVCodecContext {
     /**
+     * Info on struct for av_log
+     * - set by avcodec_alloc_context
+     */
+    AVClass *av_class;
+    /**
      * the average bitrate.
      * - encoding: set by user. unused for constant quantizer encoding
      * - decoding: set by lavc. 0 or some bitrate if this info is available in the stream 
@@ -634,11 +661,6 @@ typedef struct AVCodecContext {
      */
     int width, height;
     
-#define FF_ASPECT_SQUARE 1
-#define FF_ASPECT_4_3_625 2
-#define FF_ASPECT_4_3_525 3
-#define FF_ASPECT_16_9_625 4
-#define FF_ASPECT_16_9_525 5
 #define FF_ASPECT_EXTENDED 15
 
     /**
@@ -1149,7 +1171,7 @@ typedef struct AVCodecContext {
 #define FF_DEBUG_MB_TYPE   8
 #define FF_DEBUG_QP        16
 #define FF_DEBUG_MV        32
-//#define FF_DEBUG_VIS_MV    0x00000040
+#define FF_DEBUG_DCT_COEFF 0x00000040
 #define FF_DEBUG_SKIP      0x00000080
 #define FF_DEBUG_STARTCODE 0x00000100
 #define FF_DEBUG_PTS       0x00000200
@@ -1277,7 +1299,7 @@ typedef struct AVCodecContext {
      * - encoding: unused
      * - decoding: set by user, if not set then the native format will always be choosen
      */
-    enum PixelFormat (*get_format)(struct AVCodecContext *s, enum PixelFormat * fmt);
+    enum PixelFormat (*get_format)(struct AVCodecContext *s, const enum PixelFormat * fmt);
 
     /**
      * DTG active format information (additionnal aspect ratio
@@ -1513,6 +1535,38 @@ typedef struct AVCodecContext {
 #define FF_AA_FASTINT 1 //not implemented yet
 #define FF_AA_INT     2
 #define FF_AA_FLOAT   3
+    /**
+     * Quantizer noise shaping.
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    int quantizer_noise_shaping;
+
+    /**
+     * Thread count.
+     * is used to decide how many independant tasks should be passed to execute()
+     * - encoding: set by user
+     * - decoding: set by user
+     */
+    int thread_count;
+    
+    /**
+     * the codec may call this to execute several independant things. it will return only after
+     * finishing all tasks, the user may replace this with some multithreaded implementation, the
+     * default implementation will execute the parts serially
+     * @param count the number of things to execute
+     * - encoding: set by lavc, user can override
+     * - decoding: set by lavc, user can override
+     */
+    int (*execute)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg), void **arg2, int *ret, int count);
+    
+    /**
+     * Thread opaque.
+     * can be used by execute() to store some per AVCodecContext stuff.
+     * - encoding: set by execute()
+     * - decoding: set by execute()
+     */
+    void *thread_opaque;
 } AVCodecContext;
 
 
@@ -1577,6 +1631,7 @@ typedef struct AVCodec {
     const AVOption *options;
     struct AVCodec *next;
     void (*flush)(AVCodecContext *);
+    const AVRational *supported_framerates; ///array of supported framerates, or NULL if any, array is terminated by {0,0}
 } AVCodec;
 
 /**
@@ -1707,6 +1762,8 @@ extern AVCodec ra_288_decoder;
 extern AVCodec roq_dpcm_decoder;
 extern AVCodec interplay_dpcm_decoder;
 extern AVCodec xan_dpcm_decoder;
+extern AVCodec qtrle_decoder;
+extern AVCodec flac_decoder;
 
 /* pcm codecs */
 #define PCM_CODEC(id, name) \
@@ -1735,6 +1792,7 @@ PCM_CODEC(CODEC_ID_ADPCM_4XM, adpcm_4xm);
 PCM_CODEC(CODEC_ID_ADPCM_XA, adpcm_xa);
 PCM_CODEC(CODEC_ID_ADPCM_ADX, adpcm_adx);
 PCM_CODEC(CODEC_ID_ADPCM_EA, adpcm_ea);
+PCM_CODEC(CODEC_ID_ADPCM_G726, adpcm_g726);
 
 #undef PCM_CODEC
 
@@ -1849,6 +1907,11 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic);
 void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic);
 void avcodec_default_free_buffers(AVCodecContext *s);
 
+int avcodec_thread_init(AVCodecContext *s, int thread_count);
+void avcodec_thread_free(AVCodecContext *s);
+int avcodec_thread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count);
+//FIXME func typedef
+
 /**
  * opens / inits the AVCodecContext.
  * not thread save!
@@ -2034,8 +2097,7 @@ void *av_mallocz(unsigned int size);
 void *av_realloc(void *ptr, unsigned int size);
 void av_free(void *ptr);
 char *av_strdup(const char *s);
-void __av_freep(void **ptr);
-#define av_freep(p) __av_freep((void **)(p))
+void av_freep(void *ptr);
 void *av_fast_realloc(void *ptr, unsigned int *size, unsigned int min_size);
 /* for static data only */
 /* call av_free_static to release all staticaly allocated tables */
@@ -2057,19 +2119,23 @@ void img_copy(AVPicture *dst, const AVPicture *src,
 #define AV_LOG_INFO 1
 #define AV_LOG_DEBUG 2
 
-extern void av_log(AVCodecContext*, int level, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 3, 4)));
-extern void av_vlog(AVCodecContext*, int level, const char *fmt, va_list);
+extern void av_log(void*, int level, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 3, 4)));
+extern void av_vlog(void*, int level, const char *fmt, va_list);
 extern int av_log_get_level(void);
 extern void av_log_set_level(int);
-extern void av_log_set_callback(void (*)(AVCodecContext*, int, const char*, va_list));
-
-#undef  AV_LOG_TRAP_PRINTF
-#ifdef AV_LOG_TRAP_PRINTF
-#define printf DO NOT USE
-#define fprintf DO NOT USE
-#undef stderr
-#define stderr DO NOT USE
-#endif
+extern void av_log_set_callback(void (*)(void*, int, const char*, va_list));
+
+/* endian macros */
+#define BE_16(x)  ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1])
+#define BE_32(x)  ((((uint8_t*)(x))[0] << 24) | \
+                   (((uint8_t*)(x))[1] << 16) | \
+                   (((uint8_t*)(x))[2] << 8) | \
+                    ((uint8_t*)(x))[3])
+#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
+#define LE_32(x)  ((((uint8_t*)(x))[3] << 24) | \
+                   (((uint8_t*)(x))[2] << 16) | \
+                   (((uint8_t*)(x))[1] << 8) | \
+                    ((uint8_t*)(x))[0])
 
 #ifdef __cplusplus
 }
diff --git a/src/libffmpeg/libavcodec/cabac.h b/src/libffmpeg/libavcodec/cabac.h
index a49e15631..852d47ebe 100644
--- a/src/libffmpeg/libavcodec/cabac.h
+++ b/src/libffmpeg/libavcodec/cabac.h
@@ -165,7 +165,7 @@ static inline int put_cabac_terminate(CABACContext *c, int bit){
     c->symCount++;
 #endif
 
-    return (get_bit_count(&c->pb)+7)>>3;
+    return (put_bits_count(&c->pb)+7)>>3;
 }
 
 /**
diff --git a/src/libffmpeg/libavcodec/cinepak.c b/src/libffmpeg/libavcodec/cinepak.c
index e8d3efc3c..412db7f7e 100644
--- a/src/libffmpeg/libavcodec/cinepak.c
+++ b/src/libffmpeg/libavcodec/cinepak.c
@@ -37,12 +37,6 @@
 
 #define PALETTE_COUNT 256
 
-#define BE_16(x)  ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1])
-#define BE_32(x)  ((((uint8_t*)(x))[0] << 24) | \
-                   (((uint8_t*)(x))[1] << 16) | \
-                   (((uint8_t*)(x))[2] << 8) | \
-                    ((uint8_t*)(x))[3])
-
 typedef struct {
     uint8_t  y0, y1, y2, y3;
     uint8_t  u, v;
diff --git a/src/libffmpeg/libavcodec/common.c b/src/libffmpeg/libavcodec/common.c
index 79c6f52a0..72172fa4f 100644
--- a/src/libffmpeg/libavcodec/common.c
+++ b/src/libffmpeg/libavcodec/common.c
@@ -45,34 +45,6 @@ const uint8_t ff_log2_tab[256]={
         7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
 };
 
-void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
-{
-    s->buf = buffer;
-    s->buf_end = s->buf + buffer_size;
-#ifdef ALT_BITSTREAM_WRITER
-    s->index=0;
-    ((uint32_t*)(s->buf))[0]=0;
-//    memset(buffer, 0, buffer_size);
-#else
-    s->buf_ptr = s->buf;
-    s->bit_left=32;
-    s->bit_buf=0;
-#endif
-}
-
-//#ifdef CONFIG_ENCODERS
-#if 1
-
-/* return the number of bits output */
-int get_bit_count(PutBitContext *s)
-{
-#ifdef ALT_BITSTREAM_WRITER
-    return s->index;
-#else
-    return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left;
-#endif
-}
-
 void align_put_bits(PutBitContext *s)
 {
 #ifdef ALT_BITSTREAM_WRITER
@@ -82,88 +54,18 @@ void align_put_bits(PutBitContext *s)
 #endif
 }
 
-#endif //CONFIG_ENCODERS
-
-/* pad the end of the output stream with zeros */
-void flush_put_bits(PutBitContext *s)
-{
-#ifdef ALT_BITSTREAM_WRITER
-    align_put_bits(s);
-#else
-    s->bit_buf<<= s->bit_left;
-    while (s->bit_left < 32) {
-        /* XXX: should test end of buffer */
-        *s->buf_ptr++=s->bit_buf >> 24;
-        s->bit_buf<<=8;
-        s->bit_left+=8;
-    }
-    s->bit_left=32;
-    s->bit_buf=0;
-#endif
-}
-
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
-
-void put_string(PutBitContext * pbc, char *s)
+void put_string(PutBitContext * pbc, char *s, int put_zero)
 {
     while(*s){
         put_bits(pbc, 8, *s);
         s++;
     }
-    put_bits(pbc, 8, 0);
+    if(put_zero)
+        put_bits(pbc, 8, 0);
 }
 
 /* bit input functions */
 
-#endif //CONFIG_ENCODERS
-
-/**
- * init GetBitContext.
- * @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes larger then the actual read bits
- * because some optimized bitstream readers read 32 or 64 bit at once and could read over the end
- * @param bit_size the size of the buffer in bits
- */
-void init_get_bits(GetBitContext *s,
-                   const uint8_t *buffer, int bit_size)
-{
-    const int buffer_size= (bit_size+7)>>3;
-
-    s->buffer= buffer;
-    s->size_in_bits= bit_size;
-    s->buffer_end= buffer + buffer_size;
-#ifdef ALT_BITSTREAM_READER
-    s->index=0;
-#elif defined LIBMPEG2_BITSTREAM_READER
-#ifdef LIBMPEG2_BITSTREAM_READER_HACK
-  if ((int)buffer&1) {
-     /* word alignment */
-    s->cache = (*buffer++)<<24;
-    s->buffer_ptr = buffer;
-    s->bit_count = 16-8;
-  } else
-#endif
-  {
-    s->buffer_ptr = buffer;
-    s->bit_count = 16;
-    s->cache = 0;
-  }
-#elif defined A32_BITSTREAM_READER
-    s->buffer_ptr = (uint32_t*)buffer;
-    s->bit_count = 32;
-    s->cache0 = 0;
-    s->cache1 = 0;
-#endif
-    {
-        OPEN_READER(re, s)
-        UPDATE_CACHE(re, s)
-        UPDATE_CACHE(re, s)
-        CLOSE_READER(re, s)
-    }
-#ifdef A32_BITSTREAM_READER
-    s->cache1 = 0;
-#endif
-}
-
 /** 
  * reads 0-32 bits.
  */
diff --git a/src/libffmpeg/libavcodec/common.h b/src/libffmpeg/libavcodec/common.h
index 5f51ba25d..a5c67d3a7 100644
--- a/src/libffmpeg/libavcodec/common.h
+++ b/src/libffmpeg/libavcodec/common.h
@@ -82,6 +82,12 @@ extern const struct AVOption avoptions_workaround_bug[11];
 #    define always_inline inline
 #endif
 
+#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
+#    define attribute_used __attribute__((used))
+#else
+#    define attribute_used
+#endif
+
 #ifndef EMULATE_INTTYPES
 #   include <inttypes.h>
 #else
@@ -102,7 +108,7 @@ extern const struct AVOption avoptions_workaround_bug[11];
 #endif /* HAVE_INTTYPES_H */
 
 #ifndef INT64_MAX
-#define INT64_MAX 9223372036854775807LL
+#define INT64_MAX int64_t_C(9223372036854775807)
 #endif
 
 #ifdef EMULATE_FAST_INT
@@ -296,12 +302,56 @@ typedef struct PutBitContext {
 #endif
 } PutBitContext;
 
-void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size);
+static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
+{
+    s->buf = buffer;
+    s->buf_end = s->buf + buffer_size;
+#ifdef ALT_BITSTREAM_WRITER
+    s->index=0;
+    ((uint32_t*)(s->buf))[0]=0;
+//    memset(buffer, 0, buffer_size);
+#else
+    s->buf_ptr = s->buf;
+    s->bit_left=32;
+    s->bit_buf=0;
+#endif
+}
+
+/* return the number of bits output */
+static inline int put_bits_count(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    return s->index;
+#else
+    return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left;
+#endif
+}
+
+static inline int put_bits_left(PutBitContext* s)
+{
+    return (s->buf_end - s->buf) * 8 - put_bits_count(s);
+}
+
+/* pad the end of the output stream with zeros */
+static inline void flush_put_bits(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    align_put_bits(s);
+#else
+    s->bit_buf<<= s->bit_left;
+    while (s->bit_left < 32) {
+        /* XXX: should test end of buffer */
+        *s->buf_ptr++=s->bit_buf >> 24;
+        s->bit_buf<<=8;
+        s->bit_left+=8;
+    }
+    s->bit_left=32;
+    s->bit_buf=0;
+#endif
+}
 
-int get_bit_count(PutBitContext *s); /* XXX: change function name */
 void align_put_bits(PutBitContext *s);
-void flush_put_bits(PutBitContext *s);
-void put_string(PutBitContext * pbc, char *s);
+void put_string(PutBitContext * pbc, char *s, int put_zero);
 
 /* bit input */
 
@@ -322,8 +372,6 @@ typedef struct GetBitContext {
     int size_in_bits;
 } GetBitContext;
 
-static inline int get_bits_count(GetBitContext *s);
-
 #define VLC_TYPE int16_t
 
 typedef struct VLC {
@@ -485,6 +533,28 @@ static inline uint8_t* pbBufPtr(PutBitContext *s)
 #endif
 }
 
+/**
+ *
+ * PutBitContext must be flushed & aligned to a byte boundary before calling this.
+ */
+static inline void skip_put_bytes(PutBitContext *s, int n){
+        assert((put_bits_count(s)&7)==0);
+#ifdef ALT_BITSTREAM_WRITER
+        FIXME may need some cleaning of the buffer
+	s->index += n<<3;
+#else
+        assert(s->bit_left==32);
+	s->buf_ptr += n;
+#endif    
+}
+
+/**
+ * Changes the end of the buffer.
+ */
+static inline void set_put_bits_buffer_size(PutBitContext *s, int size){
+    s->buf_end= s->buf + size;
+}
+
 /* Bitstream reader API docs:
 name
     abritary name which is used as prefix for the internal variables
@@ -807,8 +877,57 @@ static inline void skip_bits1(GetBitContext *s){
     skip_bits(s, 1);
 }
 
-void init_get_bits(GetBitContext *s,
-                   const uint8_t *buffer, int buffer_size);
+/**
+ * init GetBitContext.
+ * @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes larger then the actual read bits
+ * because some optimized bitstream readers read 32 or 64 bit at once and could read over the end
+ * @param bit_size the size of the buffer in bits
+ */
+static inline void init_get_bits(GetBitContext *s,
+                   const uint8_t *buffer, int bit_size)
+{
+    const int buffer_size= (bit_size+7)>>3;
+
+    s->buffer= buffer;
+    s->size_in_bits= bit_size;
+    s->buffer_end= buffer + buffer_size;
+#ifdef ALT_BITSTREAM_READER
+    s->index=0;
+#elif defined LIBMPEG2_BITSTREAM_READER
+#ifdef LIBMPEG2_BITSTREAM_READER_HACK
+  if ((int)buffer&1) {
+     /* word alignment */
+    s->cache = (*buffer++)<<24;
+    s->buffer_ptr = buffer;
+    s->bit_count = 16-8;
+  } else
+#endif
+  {
+    s->buffer_ptr = buffer;
+    s->bit_count = 16;
+    s->cache = 0;
+  }
+#elif defined A32_BITSTREAM_READER
+    s->buffer_ptr = (uint32_t*)buffer;
+    s->bit_count = 32;
+    s->cache0 = 0;
+    s->cache1 = 0;
+#endif
+    {
+        OPEN_READER(re, s)
+        UPDATE_CACHE(re, s)
+        UPDATE_CACHE(re, s)
+        CLOSE_READER(re, s)
+    }
+#ifdef A32_BITSTREAM_READER
+    s->cache1 = 0;
+#endif
+}
+
+static inline int get_bits_left(GetBitContext *s)
+{
+    return s->size_in_bits - get_bits_count(s);
+}
 
 int check_marker(GetBitContext *s, const char *msg);
 void align_get_bits(GetBitContext *s);
@@ -964,7 +1083,7 @@ static inline int get_xbits_trace(GetBitContext *s, int n, char *file, char *fun
 #define tprintf printf
 
 #else //TRACE
-#define tprintf(_arg...) {}
+#define tprintf(...) {}
 #endif
 
 /* define it to include statistics code (useful only for optimizing
@@ -1144,21 +1263,23 @@ static inline long long rdtsc()
 }
 
 #define START_TIMER \
-static uint64_t tsum=0;\
-static int tcount=0;\
-static int tskip_count=0;\
 uint64_t tend;\
 uint64_t tstart= rdtsc();\
 
 #define STOP_TIMER(id) \
 tend= rdtsc();\
-if(tcount<2 || tend - tstart < 4*tsum/tcount){\
-    tsum+= tend - tstart;\
-    tcount++;\
-}else\
-    tskip_count++;\
-if(256*256*256*64%(tcount+tskip_count)==0){\
-    fprintf(stderr, "%Ld dezicycles in %s, %d runs, %d skips\n", tsum*10/tcount, id, tcount, tskip_count);\
+{\
+  static uint64_t tsum=0;\
+  static int tcount=0;\
+  static int tskip_count=0;\
+  if(tcount<2 || tend - tstart < 8*tsum/tcount){\
+      tsum+= tend - tstart;\
+      tcount++;\
+  }else\
+      tskip_count++;\
+  if(256*256*256*64%(tcount+tskip_count)==0){\
+      av_log(NULL, AV_LOG_DEBUG, "%Ld dezicycles in %s, %d runs, %d skips\n", tsum*10/tcount, id, tcount, tskip_count);\
+  }\
 }
 #endif
 
@@ -1168,6 +1289,10 @@ if(256*256*256*64%(tcount+tskip_count)==0){\
 #define malloc please_use_av_malloc
 #define free please_use_av_free
 #define realloc please_use_av_realloc
+#if !(defined(LIBAVFORMAT_BUILD) || defined(_FRAMEHOOK_H))
+#define printf please_use_av_log
+#define fprintf please_use_av_log
+#endif
 
 #define CHECKED_ALLOCZ(p, size)\
 {\
diff --git a/src/libffmpeg/libavcodec/dpcm.c b/src/libffmpeg/libavcodec/dpcm.c
index b59a9cd6b..b80604e5f 100644
--- a/src/libffmpeg/libavcodec/dpcm.c
+++ b/src/libffmpeg/libavcodec/dpcm.c
@@ -44,11 +44,6 @@ typedef struct DPCMContext {
 #define SATURATE_S16(x)  if (x < -32768) x = -32768; \
   else if (x > 32767) x = 32767;
 #define SE_16BIT(x)  if (x & 0x8000) x -= 0x10000;
-#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
-#define LE_32(x)  ((((uint8_t*)(x))[3] << 24) | \
-                   (((uint8_t*)(x))[2] << 16) | \
-                   (((uint8_t*)(x))[1] << 8) | \
-                    ((uint8_t*)(x))[0])
 
 static int interplay_delta_table[] = {
          0,      1,      2,      3,      4,      5,      6,      7,
diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c
index 114d67b50..7f26bd98a 100644
--- a/src/libffmpeg/libavcodec/dsputil.c
+++ b/src/libffmpeg/libavcodec/dsputil.c
@@ -874,6 +874,13 @@ PIXOP2(put, op_put)
 #define avg2(a,b) ((a+b+1)>>1)
 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
 
+static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
+}
+
+static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
+}
 
 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
 {
@@ -2532,6 +2539,29 @@ static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size,
     return s;
 }
 
+static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
+    int i;
+    unsigned int sum=0;
+
+    for(i=0; i<8*8; i++){
+        int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
+        int w= weight[i];
+        b>>= RECON_SHIFT;
+        assert(-512<b && b<512);
+
+        sum += (w*b)*(w*b)>>4;
+    }
+    return sum>>2;
+}
+
+static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
+    int i;
+
+    for(i=0; i<8*8; i++){
+        rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
+    }    
+}
+
 /**
  * permutes an 8x8 block.
  * @param block the block which will be permuted according to the given permutation vector
@@ -3094,6 +3124,11 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
         c->idct_permutation_type= FF_NO_IDCT_PERM;
     }
 
+    /* VP3 DSP support */
+    c->vp3_dsp_init = vp3_dsp_init_c;
+    c->vp3_idct_put = vp3_idct_put_c;
+    c->vp3_idct_add = vp3_idct_add_c;
+
     c->get_pixels = get_pixels_c;
     c->diff_pixels = diff_pixels_c;
     c->put_pixels_clamped = put_pixels_clamped_c;
@@ -3135,6 +3170,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     dspfunc(avg, 3, 2);
 #undef dspfunc
 
+    c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
+    c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
+
     c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
     c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
     c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
@@ -3235,6 +3273,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     
     c->h263_h_loop_filter= h263_h_loop_filter_c;
     c->h263_v_loop_filter= h263_v_loop_filter_c;
+    
+    c->try_8x8basis= try_8x8basis_c;
+    c->add_8x8basis= add_8x8basis_c;
 
 #ifdef HAVE_MMX
     dsputil_init_mmx(c, avctx);
diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h
index d8346d509..35e965db0 100644
--- a/src/libffmpeg/libavcodec/dsputil.h
+++ b/src/libffmpeg/libavcodec/dsputil.h
@@ -63,6 +63,19 @@ extern const uint8_t ff_zigzag248_direct[64];
 extern uint32_t squareTbl[512];
 extern uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
 
+/* VP3 DSP functions */
+void vp3_dsp_init_c(void);
+void vp3_idct_put_c(int16_t *input_data, int16_t *dequant_matrix,
+    int coeff_count, uint8_t *dest, int stride);
+void vp3_idct_add_c(int16_t *input_data, int16_t *dequant_matrix,
+    int coeff_count, uint8_t *dest, int stride);
+
+void vp3_dsp_init_mmx(void);
+void vp3_idct_put_mmx(int16_t *input_data, int16_t *dequant_matrix,
+    int coeff_count, uint8_t *dest, int stride);
+void vp3_idct_add_mmx(int16_t *input_data, int16_t *dequant_matrix,
+    int coeff_count, uint8_t *dest, int stride);
+
 
 /* minimum alignment rules ;)
 if u notice errors in the align stuff, need more alignment for some asm code for some cpu
@@ -207,6 +220,8 @@ typedef struct DSPContext {
      */
     op_pixels_func avg_no_rnd_pixels_tab[2][4];
     
+    void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
+    
     /**
      * Thirdpel motion compensation with rounding (a+b+1)>>1.
      * this is an array[12] of motion compensation funcions for the 9 thirdpel positions<br>
@@ -288,6 +303,45 @@ typedef struct DSPContext {
 #define FF_SIMPLE_IDCT_PERM 3
 #define FF_TRANSPOSE_IDCT_PERM 4
 
+    int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
+    void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
+#define BASIS_SHIFT 16
+#define RECON_SHIFT 6
+
+    /**
+     * This function handles any initialization for the VP3 DSP functions.
+     */
+    void (*vp3_dsp_init)(void);
+
+    /** 
+     * This function is responsible for taking a block of zigzag'd,
+     * quantized DCT coefficients, reconstructing the original block of
+     * samples, and placing it into the output.
+     * @param input_data 64 zigzag'd, quantized DCT coefficients
+     * @param dequant_matrix 64 zigzag'd quantizer coefficients
+     * @param coeff_count index of the last coefficient
+     * @param dest the final output location where the transformed samples
+     * are to be placed
+     * @param stride the width in 8-bit samples of a line on this plane
+     */
+    void (*vp3_idct_put)(int16_t *input_data, int16_t *dequant_matrix,
+        int coeff_count, uint8_t *dest, int stride);
+
+    /** 
+     * This function is responsible for taking a block of zigzag'd,
+     * quantized DCT coefficients, reconstructing the original block of
+     * samples, and adding the transformed samples to an existing block of
+     * samples in the output.
+     * @param input_data 64 zigzag'd, quantized DCT coefficients
+     * @param dequant_matrix 64 zigzag'd quantizer coefficients
+     * @param coeff_count index of the last coefficient
+     * @param dest the final output location where the transformed samples
+     * are to be placed
+     * @param stride the width in 8-bit samples of a line on this plane
+     */
+    void (*vp3_idct_add)(int16_t *input_data, int16_t *dequant_matrix,
+        int coeff_count, uint8_t *dest, int stride);
+
 } DSPContext;
 
 void dsputil_static_init(void);
@@ -457,17 +511,17 @@ typedef struct FFTContext {
     void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
 } FFTContext;
 
-int fft_init(FFTContext *s, int nbits, int inverse);
-void fft_permute(FFTContext *s, FFTComplex *z);
-void fft_calc_c(FFTContext *s, FFTComplex *z);
-void fft_calc_sse(FFTContext *s, FFTComplex *z);
-void fft_calc_altivec(FFTContext *s, FFTComplex *z);
+int ff_fft_init(FFTContext *s, int nbits, int inverse);
+void ff_fft_permute(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z);
 
-static inline void fft_calc(FFTContext *s, FFTComplex *z)
+static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
 {
     s->fft_calc(s, z);
 }
-void fft_end(FFTContext *s);
+void ff_fft_end(FFTContext *s);
 
 /* MDCT computation */
 
diff --git a/src/libffmpeg/libavcodec/dv.c b/src/libffmpeg/libavcodec/dv.c
index 8e041b503..08be11d45 100644
--- a/src/libffmpeg/libavcodec/dv.c
+++ b/src/libffmpeg/libavcodec/dv.c
@@ -642,12 +642,6 @@ typedef struct EncBlockInfo {
     uint32_t partial_bit_buffer; /* we can't use uint16_t here */
 } EncBlockInfo;
 
-static always_inline int dv_bits_left(PutBitContext* s)
-{
-    return (s->buf_end - s->buf) * 8 - 
-           ((s->buf_ptr - s->buf) * 8 + 32 - (int64_t)s->bit_left);
-}
-
 static always_inline void dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool, 
                                        int pb_size)
 {
@@ -660,7 +654,7 @@ static always_inline void dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool,
     bi->partial_bit_count = bi->partial_bit_buffer = 0;
 vlc_loop:
        /* Find suitable storage space */
-       for (; size > (bits_left = dv_bits_left(pb)); pb++) {
+       for (; size > (bits_left = put_bits_left(pb)); pb++) {
           if (bits_left) {
               size -= bits_left;
 	      put_bits(pb, bits_left, vlc >> size);
diff --git a/src/libffmpeg/libavcodec/error_resilience.c b/src/libffmpeg/libavcodec/error_resilience.c
index 5067a248f..c6b10a79c 100644
--- a/src/libffmpeg/libavcodec/error_resilience.c
+++ b/src/libffmpeg/libavcodec/error_resilience.c
@@ -652,7 +652,7 @@ void ff_er_add_slice(MpegEncContext *s, int startx, int starty, int endx, int en
  
     s->error_status_table[start_xy] |= VP_START;
 
-    if(start_xy > 0){
+    if(start_xy > 0 && s->avctx->thread_count <= 1){
         int prev_status= s->error_status_table[ s->mb_index2xy[start_i - 1] ];
         
         prev_status &= ~ VP_START;
diff --git a/src/libffmpeg/libavcodec/eval.c b/src/libffmpeg/libavcodec/eval.c
index 714ba046c..aead600e8 100644
--- a/src/libffmpeg/libavcodec/eval.c
+++ b/src/libffmpeg/libavcodec/eval.c
@@ -115,12 +115,16 @@ static void evalPrimary(Parser *p){
     p->s++; // "("
     evalExpression(p);
     d= pop(p);
-    p->s++; // ")" or ","
-    if(p->s[-1]== ','){
+    if(p->s[0]== ','){
+        p->s++; // ","
         evalExpression(p);
         d2= pop(p);
-        p->s++; // ")"
     }
+    if(p->s[0] != ')'){
+        av_log(NULL, AV_LOG_ERROR, "Parser: missing ) in \"%s\"\n", next);
+        return;
+    }
+    p->s++; // ")"
     
          if( strmatch(next, "sinh"  ) ) d= sinh(d);
     else if( strmatch(next, "cosh"  ) ) d= cosh(d);
@@ -136,7 +140,9 @@ static void evalPrimary(Parser *p){
     else if( strmatch(next, "max"   ) ) d= d > d2 ? d : d2;
     else if( strmatch(next, "min"   ) ) d= d < d2 ? d : d2;
     else if( strmatch(next, "gt"    ) ) d= d > d2 ? 1.0 : 0.0;
+    else if( strmatch(next, "gte"    ) ) d= d >= d2 ? 1.0 : 0.0;
     else if( strmatch(next, "lt"    ) ) d= d > d2 ? 0.0 : 1.0;
+    else if( strmatch(next, "lte"    ) ) d= d >= d2 ? 0.0 : 1.0;
     else if( strmatch(next, "eq"    ) ) d= d == d2 ? 1.0 : 0.0;
 //    else if( strmatch(next, "l1"    ) ) d= 1 + d2*(d - 1);
 //    else if( strmatch(next, "sq01"  ) ) d= (d >= 0.0 && d <=1.0) ? 1.0 : 0.0;
@@ -164,10 +170,6 @@ static void evalPrimary(Parser *p){
         }
     }
     
-    if(p->s[-1]!= ')'){
-        av_log(NULL, AV_LOG_ERROR, "Parser: missing ) in \"%s\"\n", next);
-        return;
-    }
     push(p, d);
 }      
        
diff --git a/src/libffmpeg/libavcodec/fft.c b/src/libffmpeg/libavcodec/fft.c
index 3b5244a07..912a2edd6 100644
--- a/src/libffmpeg/libavcodec/fft.c
+++ b/src/libffmpeg/libavcodec/fft.c
@@ -28,7 +28,7 @@
  * The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is
  * done 
  */
-int fft_init(FFTContext *s, int nbits, int inverse)
+int ff_fft_init(FFTContext *s, int nbits, int inverse)
 {
     int i, j, m, n;
     float alpha, c1, s1, s2;
@@ -53,7 +53,7 @@ int fft_init(FFTContext *s, int nbits, int inverse)
         s->exptab[i].re = c1;
         s->exptab[i].im = s1;
     }
-    s->fft_calc = fft_calc_c;
+    s->fft_calc = ff_fft_calc_c;
     s->exptab1 = NULL;
 
     /* compute constant table for HAVE_SSE version */
@@ -94,9 +94,9 @@ int fft_init(FFTContext *s, int nbits, int inverse)
             } while (nblocks != 0);
             av_freep(&s->exptab);
 #if defined(HAVE_MMX)
-            s->fft_calc = fft_calc_sse;
+            s->fft_calc = ff_fft_calc_sse;
 #else
-            s->fft_calc = fft_calc_altivec;
+            s->fft_calc = ff_fft_calc_altivec;
 #endif
         }
     }
@@ -142,11 +142,11 @@ int fft_init(FFTContext *s, int nbits, int inverse)
 }
 
 /**
- * Do a complex FFT with the parameters defined in fft_init(). The
+ * Do a complex FFT with the parameters defined in ff_fft_init(). The
  * input data must be permuted before with s->revtab table. No
  * 1.0/sqrt(n) normalization is done.  
  */
-void fft_calc_c(FFTContext *s, FFTComplex *z)
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
 {
     int ln = s->nbits;
     int	j, np, np2;
@@ -221,9 +221,9 @@ void fft_calc_c(FFTContext *s, FFTComplex *z)
 }
 
 /**
- * Do the permutation needed BEFORE calling fft_calc()
+ * Do the permutation needed BEFORE calling ff_fft_calc()
  */
-void fft_permute(FFTContext *s, FFTComplex *z)
+void ff_fft_permute(FFTContext *s, FFTComplex *z)
 {
     int j, k, np;
     FFTComplex tmp;
@@ -241,7 +241,7 @@ void fft_permute(FFTContext *s, FFTComplex *z)
     }
 }
 
-void fft_end(FFTContext *s)
+void ff_fft_end(FFTContext *s)
 {
     av_freep(&s->revtab);
     av_freep(&s->exptab);
diff --git a/src/libffmpeg/libavcodec/flicvideo.c b/src/libffmpeg/libavcodec/flicvideo.c
index 248fc843a..99825cebc 100644
--- a/src/libffmpeg/libavcodec/flicvideo.c
+++ b/src/libffmpeg/libavcodec/flicvideo.c
@@ -51,12 +51,6 @@
 #define FLI_COPY      16
 #define FLI_MINI      18
 
-#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
-#define LE_32(x)  ((((uint8_t*)(x))[3] << 24) | \
-                   (((uint8_t*)(x))[2] << 16) | \
-                   (((uint8_t*)(x))[1] << 8) | \
-                    ((uint8_t*)(x))[0])
-
 typedef struct FlicDecodeContext {
     AVCodecContext *avctx;
     AVFrame frame;
diff --git a/src/libffmpeg/libavcodec/golomb.h b/src/libffmpeg/libavcodec/golomb.h
index 5ebebe94c..cd8bdd38d 100644
--- a/src/libffmpeg/libavcodec/golomb.h
+++ b/src/libffmpeg/libavcodec/golomb.h
@@ -256,6 +256,14 @@ static inline int get_ur_golomb_jpegls(GetBitContext *gb, int k, int limit, int
     }
 }
 
+/**
+ * read unsigned golomb rice code (flac).
+ */
+static inline int get_sr_golomb_flac(GetBitContext *gb, int k, int limit, int esc_len){
+    int v= get_ur_golomb_jpegls(gb, k, limit, esc_len);
+    return (v>>1) ^ -(v&1);
+}
+
 #ifdef TRACE
 
 static inline int get_ue(GetBitContext *s, char *file, char *func, int line){
diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c
index de9110a4e..8a60ff08b 100644
--- a/src/libffmpeg/libavcodec/h263.c
+++ b/src/libffmpeg/libavcodec/h263.c
@@ -197,15 +197,36 @@ void ff_flv_encode_picture_header(MpegEncContext * s, int picture_number)
 
 void h263_encode_picture_header(MpegEncContext * s, int picture_number)
 {
-    int format;
+    int format, coded_frame_rate, coded_frame_rate_base, i, temp_ref;
+    int best_clock_code=1;
+    int best_divisor=60;
+    int best_error= INT_MAX;
+   
+    if(s->h263_plus){
+        for(i=0; i<2; i++){
+            int div, error;
+            div= (s->avctx->frame_rate_base*1800000LL + 500LL*s->avctx->frame_rate) / ((1000LL+i)*s->avctx->frame_rate);
+            div= clip(1, div, 127);
+            error= ABS(s->avctx->frame_rate_base*1800000LL - (1000LL+i)*s->avctx->frame_rate*div);
+            if(error < best_error){
+                best_error= error;
+                best_divisor= div;
+                best_clock_code= i;
+            }
+        }
+    }
+    s->custom_pcf= best_clock_code!=1 || best_divisor!=60;
+    coded_frame_rate= 1800000;
+    coded_frame_rate_base= (1000+best_clock_code)*best_divisor;
 
     align_put_bits(&s->pb);
 
     /* Update the pointer to last GOB */
     s->ptr_lastgob = pbBufPtr(&s->pb);
     put_bits(&s->pb, 22, 0x20); /* PSC */
-    put_bits(&s->pb, 8, (((int64_t)s->picture_number * 30 * s->avctx->frame_rate_base) / 
-                         s->avctx->frame_rate) & 0xff);
+    temp_ref= s->picture_number * (int64_t)coded_frame_rate * s->avctx->frame_rate_base / 
+                         (coded_frame_rate_base * (int64_t)s->avctx->frame_rate);
+    put_bits(&s->pb, 8, temp_ref & 0xff); /* TemporalReference */
 
     put_bits(&s->pb, 1, 1);	/* marker */
     put_bits(&s->pb, 1, 0);	/* h263 id */
@@ -228,16 +249,18 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
         put_bits(&s->pb, 5, s->qscale);
         put_bits(&s->pb, 1, 0);	/* Continuous Presence Multipoint mode: off */
     } else {
+        int ufep=1;
         /* H.263v2 */
         /* H.263 Plus PTYPE */
+        
         put_bits(&s->pb, 3, 7);
-        put_bits(&s->pb,3,1); /* Update Full Extended PTYPE */
+        put_bits(&s->pb,3,ufep); /* Update Full Extended PTYPE */
         if (format == 7)
             put_bits(&s->pb,3,6); /* Custom Source Format */
         else
             put_bits(&s->pb, 3, format);
             
-        put_bits(&s->pb,1,0); /* Custom PCF: off */
+        put_bits(&s->pb,1, s->custom_pcf);
         put_bits(&s->pb,1, s->umvplus); /* Unrestricted Motion Vector */
         put_bits(&s->pb,1,0); /* SAC: off */
         put_bits(&s->pb,1,s->obmc); /* Advanced Prediction Mode */
@@ -275,6 +298,13 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
                 put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den);
 	    }
         }
+        if(s->custom_pcf){
+            if(ufep){
+                put_bits(&s->pb, 1, best_clock_code);
+                put_bits(&s->pb, 7, best_divisor);
+            }
+            put_bits(&s->pb, 2, (temp_ref>>8)&3);
+        }
         
         /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */
         if (s->umvplus)
@@ -1123,7 +1153,7 @@ void h263_encode_mb(MpegEncContext * s,
         /* compute cbp */
         cbp= get_p_cbp(s, block, motion_x, motion_y);
 
-        if ((cbp | motion_x | motion_y | s->dquant) == 0) {
+        if ((cbp | motion_x | motion_y | s->dquant | (s->mv_type - MV_TYPE_16X16)) == 0) {
             /* skip macroblock */
             put_bits(&s->pb, 1, 1);
             if(interleaved_stats){
@@ -1961,7 +1991,9 @@ void h263_encode_init(MpegEncContext *s)
         s->luma_dc_vlc_length= uni_DCtab_lum_len;
         s->chroma_dc_vlc_length= uni_DCtab_chrom_len;
         s->ac_esc_length= 7+2+1+6+1+12+1;
-        
+        s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table;
+        s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table;
+
         if(s->flags & CODEC_FLAG_GLOBAL_HEADER){
 
             s->avctx->extradata= av_malloc(1024);
@@ -1972,7 +2004,7 @@ void h263_encode_init(MpegEncContext *s)
 
 //            ff_mpeg4_stuffing(&s->pb); ?
             flush_put_bits(&s->pb);
-            s->avctx->extradata_size= (get_bit_count(&s->pb)+7)>>3;
+            s->avctx->extradata_size= (put_bits_count(&s->pb)+7)>>3;
         }
         
         break;
@@ -2146,7 +2178,7 @@ void ff_mpeg4_stuffing(PutBitContext * pbc)
 {
     int length;
     put_bits(pbc, 1, 0);
-    length= (-get_bit_count(pbc))&7;
+    length= (-put_bits_count(pbc))&7;
     if(length) put_bits(pbc, length, (1<<length)-1);
 }
 
@@ -2154,10 +2186,9 @@ void ff_mpeg4_stuffing(PutBitContext * pbc)
 void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){
     int time_div, time_mod;
 
-    if(s->current_picture_ptr->pts)
-        s->time= (s->current_picture_ptr->pts*s->time_increment_resolution + 500*1000)/(1000*1000);
-    else
-        s->time= av_rescale(picture_number*(int64_t)s->avctx->frame_rate_base, s->time_increment_resolution, s->avctx->frame_rate);
+    assert(s->current_picture_ptr->pts != AV_NOPTS_VALUE);
+    s->time= (s->current_picture_ptr->pts*s->time_increment_resolution + AV_TIME_BASE/2)/AV_TIME_BASE;
+
     time_div= s->time/s->time_increment_resolution;
     time_mod= s->time%s->time_increment_resolution;
 
@@ -2178,11 +2209,10 @@ static void mpeg4_encode_gop_header(MpegEncContext * s){
     put_bits(&s->pb, 16, 0);
     put_bits(&s->pb, 16, GOP_STARTCODE);
     
-    if(s->current_picture_ptr->pts && s->reordered_input_picture[1]){
-        time= FFMIN(s->reordered_input_picture[1]->pts, s->current_picture_ptr->pts);
-        time= (time*s->time_increment_resolution + 500*1000)/(1000*1000);
-    }else
-        time= av_rescale(s->current_picture_ptr->coded_picture_number*(int64_t)s->avctx->frame_rate_base, s->time_increment_resolution, s->avctx->frame_rate);
+    time= s->current_picture_ptr->pts;
+    if(s->reordered_input_picture[1])
+        time= FFMIN(time, s->reordered_input_picture[1]->pts);
+    time= (time*s->time_increment_resolution + AV_TIME_BASE/2)/AV_TIME_BASE;
 
     seconds= time/s->time_increment_resolution;
     minutes= seconds/60; seconds %= 60;
@@ -2290,12 +2320,11 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
     put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1);
     put_bits(&s->pb, 1, 1);		/* obmc disable */
     if (vo_ver_id == 1) {
-        put_bits(&s->pb, 1, s->vol_sprite_usage=0);		/* sprite enable */
+        put_bits(&s->pb, 1, s->vol_sprite_usage);		/* sprite enable */
     }else{
-        put_bits(&s->pb, 2, s->vol_sprite_usage=0);		/* sprite enable */
+        put_bits(&s->pb, 2, s->vol_sprite_usage);		/* sprite enable */
     }
     
-    s->quant_precision=5;
     put_bits(&s->pb, 1, 0);		/* not 8 bit == false */
     put_bits(&s->pb, 1, s->mpeg_quant);	/* quant type= (0=h263 style)*/
 
@@ -2326,8 +2355,7 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
     if(!(s->flags & CODEC_FLAG_BITEXACT)){
         put_bits(&s->pb, 16, 0);
         put_bits(&s->pb, 16, 0x1B2);	/* user_data */
-	put_string(&s->pb, LIBAVCODEC_IDENT);
-        ff_mpeg4_stuffing(&s->pb);
+	put_string(&s->pb, LIBAVCODEC_IDENT, 0);
     }
 }
 
@@ -2385,9 +2413,6 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
     if (s->pict_type == B_TYPE)
 	put_bits(&s->pb, 3, s->b_code);	/* fcode_back */
     //    printf("****frame %d\n", picture_number);
-
-     s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; //FIXME add short header support 
-     s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table;
 }
 
 #endif //CONFIG_ENCODERS
@@ -2966,15 +2991,23 @@ static inline void memsetw(short *tab, int val, int n)
 
 void ff_mpeg4_init_partitions(MpegEncContext *s)
 {
-    init_put_bits(&s->tex_pb, s->tex_pb_buffer, PB_BUFFER_SIZE);
-    init_put_bits(&s->pb2   , s->pb2_buffer   , PB_BUFFER_SIZE);
+    uint8_t *start= pbBufPtr(&s->pb);
+    uint8_t *end= s->pb.buf_end;
+    int size= end - start;
+    int pb_size = size/3;
+    int pb2_size= size/3;
+    int tex_size= size - pb_size - pb2_size;
+    
+    set_put_bits_buffer_size(&s->pb, pb_size);
+    init_put_bits(&s->tex_pb, start + pb_size           , tex_size);
+    init_put_bits(&s->pb2   , start + pb_size + tex_size, pb2_size);
 }
 
 void ff_mpeg4_merge_partitions(MpegEncContext *s)
 {
-    const int pb2_len   = get_bit_count(&s->pb2   );
-    const int tex_pb_len= get_bit_count(&s->tex_pb);
-    const int bits= get_bit_count(&s->pb);
+    const int pb2_len   = put_bits_count(&s->pb2   );
+    const int tex_pb_len= put_bits_count(&s->tex_pb);
+    const int bits= put_bits_count(&s->pb);
 
     if(s->pict_type==I_TYPE){
         put_bits(&s->pb, 19, DC_MARKER);
@@ -2990,9 +3023,10 @@ void ff_mpeg4_merge_partitions(MpegEncContext *s)
     flush_put_bits(&s->pb2);
     flush_put_bits(&s->tex_pb);
 
-    ff_copy_bits(&s->pb, s->pb2_buffer   , pb2_len);
-    ff_copy_bits(&s->pb, s->tex_pb_buffer, tex_pb_len);
-    s->last_bits= get_bit_count(&s->pb);
+    set_put_bits_buffer_size(&s->pb, s->pb2.buf_end - s->pb.buf);
+    ff_copy_bits(&s->pb, s->pb2.buf   , pb2_len);
+    ff_copy_bits(&s->pb, s->tex_pb.buf, tex_pb_len);
+    s->last_bits= put_bits_count(&s->pb);
 }
 
 #endif //CONFIG_ENCODERS
@@ -3302,18 +3336,18 @@ static int mpeg4_decode_partition_a(MpegEncContext *s){
             if(s->pict_type==I_TYPE){
                 int i;
 
-                if(show_bits_long(&s->gb, 19)==DC_MARKER){
-                    return mb_num-1;
-                }
-
                 do{
+                    if(show_bits_long(&s->gb, 19)==DC_MARKER){
+                        return mb_num-1;
+                    }
+
                     cbpc = get_vlc2(&s->gb, intra_MCBPC_vlc.table, INTRA_MCBPC_VLC_BITS, 2);
                     if (cbpc < 0){
                         av_log(s->avctx, AV_LOG_ERROR, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
                         return -1;
                     }
                 }while(cbpc == 8);
-
+                
                 s->cbp_table[xy]= cbpc & 3;
                 s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
                 s->mb_intra = 1;
@@ -3340,7 +3374,7 @@ static int mpeg4_decode_partition_a(MpegEncContext *s){
                 int16_t * const mot_val= s->current_picture.motion_val[0][s->block_index[0]];
                 const int stride= s->block_wrap[0]*2;
 
-//              do{ //FIXME
+try_again:
                 bits= show_bits(&s->gb, 17);
                 if(bits==MOTION_MARKER){
                     return mb_num-1;
@@ -3371,7 +3405,8 @@ static int mpeg4_decode_partition_a(MpegEncContext *s){
                     av_log(s->avctx, AV_LOG_ERROR, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
                     return -1;
                 }
-//              }while(cbpc == 20);
+                if(cbpc == 20)
+                    goto try_again;
 
                 s->cbp_table[xy]= cbpc&(8+3); //8 is dquant
     
@@ -3552,11 +3587,15 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
     s->mb_num_left= mb_num;
         
     if(s->pict_type==I_TYPE){
+        while(show_bits(&s->gb, 9) == 1)
+            skip_bits(&s->gb, 9);
         if(get_bits_long(&s->gb, 19)!=DC_MARKER){
             av_log(s->avctx, AV_LOG_ERROR, "marker missing after first I partition at %d %d\n", s->mb_x, s->mb_y);
             return -1;
         }
     }else{
+        while(show_bits(&s->gb, 10) == 1)
+            skip_bits(&s->gb, 10);
         if(get_bits(&s->gb, 17)!=MOTION_MARKER){
             av_log(s->avctx, AV_LOG_ERROR, "marker missing after first P partition at %d %d\n", s->mb_x, s->mb_y);
             return -1;
@@ -3867,8 +3906,15 @@ int ff_h263_decode_mb(MpegEncContext *s,
             }
         }
 
+        /* decode each block */
+        for (i = 0; i < 6; i++) {
+            if (h263_decode_block(s, block[i], i, cbp&32) < 0)
+                return -1;
+            cbp+=cbp;
+        }
+
         if(s->obmc){
-            if(s->pict_type == P_TYPE && s->mb_x+1<s->mb_width)
+            if(s->pict_type == P_TYPE && s->mb_x+1<s->mb_width && s->mb_num_left != 1)
                 preview_obmc(s);
         }
     } else if(s->pict_type==B_TYPE) {
@@ -3958,6 +4004,13 @@ int ff_h263_decode_mb(MpegEncContext *s,
         }
           
         s->current_picture.mb_type[xy]= mb_type;
+
+        /* decode each block */
+        for (i = 0; i < 6; i++) {
+            if (h263_decode_block(s, block[i], i, cbp&32) < 0)
+                return -1;
+            cbp+=cbp;
+        }
     } else { /* I-Frame */
         do{
             cbpc = get_vlc2(&s->gb, intra_MCBPC_vlc.table, INTRA_MCBPC_VLC_BITS, 2);
@@ -3990,13 +4043,13 @@ intra:
         if (dquant) {
             h263_decode_dquant(s);
         }
-    }
 
-    /* decode each block */
-    for (i = 0; i < 6; i++) {
-        if (h263_decode_block(s, block[i], i, cbp&32) < 0)
-            return -1;
-        cbp+=cbp;
+        /* decode each block */
+        for (i = 0; i < 6; i++) {
+            if (h263_decode_block(s, block[i], i, cbp&32) < 0)
+                return -1;
+            cbp+=cbp;
+        }
     }
 end:
 
@@ -4873,7 +4926,10 @@ int h263_decode_picture_header(MpegEncContext *s)
         return -1;
     }
     /* temporal reference */
-    s->picture_number = get_bits(&s->gb, 8); /* picture timestamp */
+    i = get_bits(&s->gb, 8); /* picture timestamp */
+    if( (s->picture_number&~0xFF)+i < s->picture_number)
+        i+= 256;
+    s->picture_number= (s->picture_number&~0xFF) + i;
 
     /* PTYPE starts here */    
     if (get_bits1(&s->gb) != 1) {
@@ -4925,6 +4981,9 @@ int h263_decode_picture_header(MpegEncContext *s)
 
         s->width = width;
         s->height = height;
+        s->avctx->sample_aspect_ratio= (AVRational){12,11};
+        s->avctx->frame_rate     = 30000;
+        s->avctx->frame_rate_base= 1001;
     } else {
         int ufep;
         
@@ -4937,7 +4996,7 @@ int h263_decode_picture_header(MpegEncContext *s)
             /* OPPTYPE */       
             format = get_bits(&s->gb, 3);
             dprintf("ufep=1, format: %d\n", format);
-            skip_bits(&s->gb,1); /* Custom PCF */
+            s->custom_pcf= get_bits1(&s->gb);
             s->umvplus = get_bits(&s->gb, 1); /* Unrestricted Motion Vector */
             if (get_bits1(&s->gb) != 0) {
                 av_log(s->avctx, AV_LOG_ERROR, "Syntax-based Arithmetic Coding (SAC) not supported\n");
@@ -5010,11 +5069,37 @@ int h263_decode_picture_header(MpegEncContext *s)
             } else {
                 width = h263_format[format][0];
                 height = h263_format[format][1];
+                s->avctx->sample_aspect_ratio= (AVRational){12,11};
             }
             if ((width == 0) || (height == 0))
                 return -1;
             s->width = width;
             s->height = height;
+
+            if(s->custom_pcf){
+                int gcd;
+                s->avctx->frame_rate= 1800000;
+                s->avctx->frame_rate_base= 1000 + get_bits1(&s->gb);
+                s->avctx->frame_rate_base*= get_bits(&s->gb, 7);
+                if(s->avctx->frame_rate_base == 0){
+                    av_log(s, AV_LOG_ERROR, "zero framerate\n");
+                    return -1;
+                }
+                gcd= ff_gcd(s->avctx->frame_rate, s->avctx->frame_rate_base);
+                s->avctx->frame_rate      /= gcd;
+                s->avctx->frame_rate_base /= gcd;
+//                av_log(s->avctx, AV_LOG_DEBUG, "%d/%d\n", s->avctx->frame_rate, s->avctx->frame_rate_base);
+            }else{
+                s->avctx->frame_rate     = 30000;
+                s->avctx->frame_rate_base= 1001;
+            }
+        }
+            
+        if(s->custom_pcf){
+            skip_bits(&s->gb, 2); //extended Temporal reference
+        }
+
+        if (ufep) {
             if (s->umvplus) {
                 if(get_bits1(&s->gb)==0) /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */
                     skip_bits1(&s->gb); 
@@ -5061,7 +5146,7 @@ int h263_decode_picture_header(MpegEncContext *s)
     }
 
      if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-         av_log(s->avctx, AV_LOG_DEBUG, "qp:%d %c size:%d rnd:%d%s%s%s%s%s%s%s%s%s\n", 
+         av_log(s->avctx, AV_LOG_DEBUG, "qp:%d %c size:%d rnd:%d%s%s%s%s%s%s%s%s%s %d/%d\n", 
          s->qscale, av_get_pict_type_char(s->pict_type),
          s->gb.size_in_bits, 1-s->no_rounding,
          s->obmc ? " AP" : "",
@@ -5072,7 +5157,8 @@ int h263_decode_picture_header(MpegEncContext *s)
          s->alt_inter_vlc ? " AIV" : "",
          s->modified_quant ? " MQ" : "",
          s->loop_filter ? " LOOP" : "",
-         s->h263_slice_structured ? " SS" : ""
+         s->h263_slice_structured ? " SS" : "",
+         s->avctx->frame_rate, s->avctx->frame_rate_base
          ); 
      }
 #if 1
@@ -5690,9 +5776,9 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
                            - ROUNDED_DIV(s->last_non_b_time - s->pp_time, s->t_frame))*2;
     }
     
-    s->current_picture_ptr->pts= s->time*1000LL*1000LL / s->time_increment_resolution;
+    s->current_picture_ptr->pts= s->time*(int64_t)AV_TIME_BASE / s->time_increment_resolution;
     if(s->avctx->debug&FF_DEBUG_PTS)
-        av_log(s->avctx, AV_LOG_DEBUG, "MPEG4 PTS: %f\n", s->current_picture_ptr->pts/(1000.0*1000.0));
+        av_log(s->avctx, AV_LOG_DEBUG, "MPEG4 PTS: %f\n", s->current_picture_ptr->pts/(float)AV_TIME_BASE);
     
     check_marker(gb, "before vop_coded");
     
diff --git a/src/libffmpeg/libavcodec/h264.c b/src/libffmpeg/libavcodec/h264.c
index 43ed13e99..3f60e35e8 100644
--- a/src/libffmpeg/libavcodec/h264.c
+++ b/src/libffmpeg/libavcodec/h264.c
@@ -1070,7 +1070,7 @@ static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, in
 static void encode_rbsp_trailing(PutBitContext *pb){
     int length;
     put_bits(pb, 1, 1);
-    length= (-get_bit_count(pb))&7;
+    length= (-put_bits_count(pb))&7;
     if(length) put_bits(pb, length, 0);
 }
 
diff --git a/src/libffmpeg/libavcodec/huffyuv.c b/src/libffmpeg/libavcodec/huffyuv.c
index d180d6aee..4047a6a93 100644
--- a/src/libffmpeg/libavcodec/huffyuv.c
+++ b/src/libffmpeg/libavcodec/huffyuv.c
@@ -1049,7 +1049,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
     }
     emms_c();
     
-    size= (get_bit_count(&s->pb)+31)/32;
+    size= (put_bits_count(&s->pb)+31)/32;
     
     if((s->flags&CODEC_FLAG_PASS1) && (s->picture_number&31)==0){
         int j;
diff --git a/src/libffmpeg/libavcodec/i386/Makefile.am b/src/libffmpeg/libavcodec/i386/Makefile.am
index c9d90d951..d7b2bb4f8 100644
--- a/src/libffmpeg/libavcodec/i386/Makefile.am
+++ b/src/libffmpeg/libavcodec/i386/Makefile.am
@@ -17,7 +17,8 @@ libavcodec_mmx_src = \
 	idct_mmx.c \
 	motion_est_mmx.c \
 	mpegvideo_mmx.c \
-	simple_idct_mmx.c
+	simple_idct_mmx.c \
+	vp3dsp_mmx.c
 
 libavcodec_mmx_dummy = libavcodec_mmx_dummy.c
 
diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
index efa022557..15dc8eec2 100644
--- a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
@@ -31,16 +31,16 @@ extern const uint8_t ff_h263_loop_filter_strength[32];
 int mm_flags; /* multimedia extension flags */
 
 /* pixel operations */
-static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL;
-static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
-static const uint64_t mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002ULL;
+static const uint64_t mm_bone attribute_used __attribute__ ((aligned(8))) = 0x0101010101010101ULL;
+static const uint64_t mm_wone attribute_used __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
+static const uint64_t mm_wtwo attribute_used __attribute__ ((aligned(8))) = 0x0002000200020002ULL;
 
-static const uint64_t ff_pw_20 __attribute__ ((aligned(8))) = 0x0014001400140014ULL;
-static const uint64_t ff_pw_3  __attribute__ ((aligned(8))) = 0x0003000300030003ULL;
-static const uint64_t ff_pw_16 __attribute__ ((aligned(8))) = 0x0010001000100010ULL;
-static const uint64_t ff_pw_15 __attribute__ ((aligned(8))) = 0x000F000F000F000FULL;
+static const uint64_t ff_pw_20 attribute_used __attribute__ ((aligned(8))) = 0x0014001400140014ULL;
+static const uint64_t ff_pw_3  attribute_used __attribute__ ((aligned(8))) = 0x0003000300030003ULL;
+static const uint64_t ff_pw_16 attribute_used __attribute__ ((aligned(8))) = 0x0010001000100010ULL;
+static const uint64_t ff_pw_15 attribute_used __attribute__ ((aligned(8))) = 0x000F000F000F000FULL;
 
-static const uint64_t ff_pb_FC __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL;
+static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL;
 
 #define JUMPALIGN() __asm __volatile (".balign 8"::)
 #define MOVQ_ZERO(regd)  __asm __volatile ("pxor %%" #regd ", %%" #regd ::)
@@ -1973,6 +1973,92 @@ static void just_return() { return; }
     c->put_no_rnd_ ## postfix1 = put_no_rnd_ ## postfix2;\
     c->avg_ ## postfix1 = avg_ ## postfix2;
 
+static int try_8x8basis_mmx(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
+    int i=0;
+    
+    assert(ABS(scale) < 256);
+    scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT;
+
+    asm volatile(
+        "pcmpeqw %%mm6, %%mm6		\n\t" // -1w
+        "psrlw $15, %%mm6		\n\t" //  1w
+        "pxor %%mm7, %%mm7		\n\t"
+        "movd  %4, %%mm5		\n\t" 
+        "punpcklwd %%mm5, %%mm5		\n\t" 
+        "punpcklwd %%mm5, %%mm5		\n\t" 
+        "1:				\n\t"
+        "movq  (%1, %0), %%mm0		\n\t" 
+        "movq  8(%1, %0), %%mm1		\n\t"
+        "pmulhw %%mm5, %%mm0		\n\t"
+        "pmulhw %%mm5, %%mm1		\n\t"
+        "paddw %%mm6, %%mm0		\n\t"
+        "paddw %%mm6, %%mm1		\n\t"
+        "psraw $1, %%mm0		\n\t"
+        "psraw $1, %%mm1		\n\t"
+        "paddw (%2, %0), %%mm0		\n\t"
+        "paddw 8(%2, %0), %%mm1		\n\t"
+        "psraw $6, %%mm0		\n\t"
+        "psraw $6, %%mm1		\n\t"
+        "pmullw (%3, %0), %%mm0		\n\t"
+        "pmullw 8(%3, %0), %%mm1	\n\t"
+        "pmaddwd %%mm0, %%mm0		\n\t"
+        "pmaddwd %%mm1, %%mm1		\n\t"
+        "paddd %%mm1, %%mm0		\n\t"
+        "psrld $4, %%mm0		\n\t"
+        "paddd %%mm0, %%mm7		\n\t"
+        "addl $16, %0			\n\t"
+        "cmpl $128, %0			\n\t" //FIXME optimize & bench
+        " jb 1b				\n\t"
+        "movq %%mm7, %%mm6		\n\t"
+        "psrlq $32, %%mm7		\n\t"
+        "paddd %%mm6, %%mm7		\n\t"
+        "psrld $2, %%mm7		\n\t"
+        "movd %%mm7, %0			\n\t"
+        
+        : "+r" (i)
+        : "r"(basis), "r"(rem), "r"(weight), "g"(scale)
+    );
+    return i;
+}
+
+static void add_8x8basis_mmx(int16_t rem[64], int16_t basis[64], int scale){
+    int i=0;
+    
+    if(ABS(scale) < 256){
+        scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT;
+        asm volatile(
+                "pcmpeqw %%mm6, %%mm6		\n\t" // -1w
+                "psrlw $15, %%mm6		\n\t" //  1w
+                "movd  %3, %%mm5		\n\t" 
+                "punpcklwd %%mm5, %%mm5		\n\t" 
+                "punpcklwd %%mm5, %%mm5		\n\t" 
+                "1:				\n\t"
+                "movq  (%1, %0), %%mm0		\n\t" 
+                "movq  8(%1, %0), %%mm1		\n\t"
+                "pmulhw %%mm5, %%mm0		\n\t"
+                "pmulhw %%mm5, %%mm1		\n\t"
+                "paddw %%mm6, %%mm0		\n\t" 
+                "paddw %%mm6, %%mm1		\n\t"
+                "psraw $1, %%mm0		\n\t"
+                "psraw $1, %%mm1		\n\t"
+                "paddw (%2, %0), %%mm0		\n\t"
+                "paddw 8(%2, %0), %%mm1		\n\t"
+                "movq %%mm0, (%2, %0)		\n\t"
+                "movq %%mm1, 8(%2, %0)		\n\t"
+                "addl $16, %0			\n\t"
+                "cmpl $128, %0			\n\t" //FIXME optimize & bench
+                " jb 1b				\n\t"
+                
+                : "+r" (i)
+                : "r"(basis), "r"(rem), "g"(scale)
+        );
+    }else{
+        for(i=0; i<8*8; i++){
+            rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
+        }    
+    }
+}
+    
 /* external functions, from idct_mmx.c */
 void ff_mmx_idct(DCTELEM *block);
 void ff_mmxext_idct(DCTELEM *block);
@@ -2012,18 +2098,18 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
     }
 
 #if 0
-    fprintf(stderr, "libavcodec: CPU flags:");
+    av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:");
     if (mm_flags & MM_MMX)
-        fprintf(stderr, " mmx");
+        av_log(avctx, AV_LOG_INFO, " mmx");
     if (mm_flags & MM_MMXEXT)
-        fprintf(stderr, " mmxext");
+        av_log(avctx, AV_LOG_INFO, " mmxext");
     if (mm_flags & MM_3DNOW)
-        fprintf(stderr, " 3dnow");
+        av_log(avctx, AV_LOG_INFO, " 3dnow");
     if (mm_flags & MM_SSE)
-        fprintf(stderr, " sse");
+        av_log(avctx, AV_LOG_INFO, " sse");
     if (mm_flags & MM_SSE2)
-        fprintf(stderr, " sse2");
-    fprintf(stderr, "\n");
+        av_log(avctx, AV_LOG_INFO, " sse2");
+    av_log(avctx, AV_LOG_INFO, "\n");
 #endif
 
     if (mm_flags & MM_MMX) {
@@ -2059,6 +2145,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             }
             c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
         }
+
+        /* VP3 optimized DSP functions */
+        c->vp3_dsp_init = vp3_dsp_init_mmx;
+        c->vp3_idct_put = vp3_idct_put_mmx;
+        c->vp3_idct_add = vp3_idct_add_mmx;
         
 #ifdef CONFIG_ENCODERS
         c->get_pixels = get_pixels_mmx;
@@ -2125,10 +2216,16 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
             c->vsad[0] = vsad16_mmx;
         }
+        
+        if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+            c->try_8x8basis= try_8x8basis_mmx;
+        }
+        c->add_8x8basis= add_8x8basis_mmx;
+        
 #endif //CONFIG_ENCODERS
 
         c->h263_v_loop_filter= h263_v_loop_filter_mmx;
-        c->h263_h_loop_filter= h263_h_loop_filter_mmx;
+        c->h263_h_loop_filter= h263_h_loop_filter_mmx;        
         
         if (mm_flags & MM_MMXEXT) {
             c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
diff --git a/src/libffmpeg/libavcodec/i386/fft_sse.c b/src/libffmpeg/libavcodec/i386/fft_sse.c
index 175cea506..d07c943e9 100644
--- a/src/libffmpeg/libavcodec/i386/fft_sse.c
+++ b/src/libffmpeg/libavcodec/i386/fft_sse.c
@@ -42,7 +42,7 @@ static void print_v4sf(const char *str, __m128 a)
 #endif
 
 /* XXX: handle reverse case */
-void fft_calc_sse(FFTContext *s, FFTComplex *z)
+void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
 {
     int ln = s->nbits;
     int	j, np, np2;
diff --git a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
index f32afae0b..39246d905 100644
--- a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
@@ -27,7 +27,7 @@ static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={
 0x0002000200020002ULL,
 };
 
-static __attribute__ ((aligned(8), unused)) uint64_t bone= 0x0101010101010101LL;
+static attribute_used __attribute__ ((aligned(8))) uint64_t bone= 0x0101010101010101LL;
 
 static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
 {
diff --git a/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c b/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c
index 626c1f565..b005f9d82 100644
--- a/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c
@@ -45,8 +45,8 @@
 #define ROW_SHIFT 11
 #define COL_SHIFT 20 // 6
 
-static const uint64_t __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000ULL;
-static const uint64_t __attribute__((aligned(8))) d40000= 0x0000000000040000ULL;
+static const uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000ULL;
+static const uint64_t attribute_used __attribute__((aligned(8))) d40000= 0x0000000000040000ULL;
 
 static const int16_t __attribute__((aligned(8))) coeffs[]= {
 	1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
diff --git a/src/libffmpeg/libavcodec/indeo3.c b/src/libffmpeg/libavcodec/indeo3.c
index cad8e982b..12f4ced6d 100644
--- a/src/libffmpeg/libavcodec/indeo3.c
+++ b/src/libffmpeg/libavcodec/indeo3.c
@@ -1056,6 +1056,12 @@ static int indeo3_decode_frame(AVCodecContext *avctx,
     unsigned char *src, *dest;
     int y;
 
+    /* no supplementary picture */
+    if (buf_size == 0) {
+        *data_size = 0;
+        return 0;
+    }
+
     iv_decode_frame(s, buf, buf_size);
 
     if(s->frame.data[0])
diff --git a/src/libffmpeg/libavcodec/libpostproc/Makefile.am b/src/libffmpeg/libavcodec/libpostproc/Makefile.am
index 26631d570..c9bb6ad4a 100644
--- a/src/libffmpeg/libavcodec/libpostproc/Makefile.am
+++ b/src/libffmpeg/libavcodec/libpostproc/Makefile.am
@@ -1,6 +1,6 @@
 include $(top_srcdir)/misc/Makefile.common
 
-AM_CFLAGS = $(LIBFFMPEG_CFLAGS)
+AM_CFLAGS = $(LIBFFMPEG_CFLAGS) -I../
 ASFLAGS =
 
 noinst_LTLIBRARIES = libpostprocess.la
diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess.c b/src/libffmpeg/libavcodec/libpostproc/postprocess.c
index 093d94aea..9ac18eaf2 100644
--- a/src/libffmpeg/libavcodec/libpostproc/postprocess.c
+++ b/src/libffmpeg/libavcodec/libpostproc/postprocess.c
@@ -81,7 +81,7 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
 //#undef ARCH_X86
 //#define DEBUG_BRIGHTNESS
 #ifdef USE_FASTMEMCPY
-#include "../fastmemcpy.h"
+#include "fastmemcpy.h"
 #endif
 #include "postprocess.h"
 #include "postprocess_internal.h"
@@ -103,14 +103,20 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
 #define TEMP_STRIDE 8
 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
 
+#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
+#    define attribute_used __attribute__((used))
+#else
+#    define attribute_used
+#endif
+
 #ifdef ARCH_X86
-static uint64_t __attribute__((aligned(8))) w05=		0x0005000500050005LL;
-static uint64_t __attribute__((aligned(8))) w20=		0x0020002000200020LL;
-static uint64_t __attribute__((aligned(8))) b00= 		0x0000000000000000LL;
-static uint64_t __attribute__((aligned(8))) b01= 		0x0101010101010101LL;
-static uint64_t __attribute__((aligned(8))) b02= 		0x0202020202020202LL;
-static uint64_t __attribute__((aligned(8))) b08= 		0x0808080808080808LL;
-static uint64_t __attribute__((aligned(8))) b80= 		0x8080808080808080LL;
+static uint64_t __attribute__((aligned(8))) attribute_used w05=		0x0005000500050005LL;
+static uint64_t __attribute__((aligned(8))) attribute_used w20=		0x0020002000200020LL;
+static uint64_t __attribute__((aligned(8))) attribute_used b00= 		0x0000000000000000LL;
+static uint64_t __attribute__((aligned(8))) attribute_used b01= 		0x0101010101010101LL;
+static uint64_t __attribute__((aligned(8))) attribute_used b02= 		0x0202020202020202LL;
+static uint64_t __attribute__((aligned(8))) attribute_used b08= 		0x0808080808080808LL;
+static uint64_t __attribute__((aligned(8))) attribute_used b80= 		0x8080808080808080LL;
 #endif
 
 
@@ -119,7 +125,7 @@ static uint8_t * const clip_tab= clip_table + 256;
 
 static int verbose= 0;
 
-static const int deringThreshold= 20;
+static const int attribute_used deringThreshold= 20;
 
 
 static struct PPFilter filters[]=
diff --git a/src/libffmpeg/libavcodec/mdct.c b/src/libffmpeg/libavcodec/mdct.c
index a0f567177..6628958b6 100644
--- a/src/libffmpeg/libavcodec/mdct.c
+++ b/src/libffmpeg/libavcodec/mdct.c
@@ -48,7 +48,7 @@ int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
         s->tcos[i] = -cos(alpha);
         s->tsin[i] = -sin(alpha);
     }
-    if (fft_init(&s->fft, s->nbits - 2, inverse) < 0)
+    if (ff_fft_init(&s->fft, s->nbits - 2, inverse) < 0)
         goto fail;
     return 0;
  fail:
@@ -98,7 +98,7 @@ void ff_imdct_calc(MDCTContext *s, FFTSample *output,
         in1 += 2;
         in2 -= 2;
     }
-    fft_calc(&s->fft, z);
+    ff_fft_calc(&s->fft, z);
 
     /* post rotation + reordering */
     /* XXX: optimize */
@@ -155,7 +155,7 @@ void ff_mdct_calc(MDCTContext *s, FFTSample *out,
         CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
     }
 
-    fft_calc(&s->fft, x);
+    ff_fft_calc(&s->fft, x);
   
     /* post rotation */
     for(i=0;i<n4;i++) {
@@ -171,5 +171,5 @@ void ff_mdct_end(MDCTContext *s)
 {
     av_freep(&s->tcos);
     av_freep(&s->tsin);
-    fft_end(&s->fft);
+    ff_fft_end(&s->fft);
 }
diff --git a/src/libffmpeg/libavcodec/mjpeg.c b/src/libffmpeg/libavcodec/mjpeg.c
index 1a948aa56..30029d40c 100644
--- a/src/libffmpeg/libavcodec/mjpeg.c
+++ b/src/libffmpeg/libavcodec/mjpeg.c
@@ -378,7 +378,7 @@ static void jpeg_put_comments(MpegEncContext *s)
     /* JFIF header */
     put_marker(p, APP0);
     put_bits(p, 16, 16);
-    put_string(p, "JFIF"); /* this puts the trailing zero-byte too */
+    put_string(p, "JFIF", 1); /* this puts the trailing zero-byte too */
     put_bits(p, 16, 0x0201); /* v 1.02 */
     put_bits(p, 8, 0); /* units type: 0 - aspect ratio */
     put_bits(p, 16, s->avctx->sample_aspect_ratio.num);
@@ -393,7 +393,7 @@ static void jpeg_put_comments(MpegEncContext *s)
         flush_put_bits(p);
         ptr = pbBufPtr(p);
         put_bits(p, 16, 0); /* patched later */
-        put_string(p, LIBAVCODEC_IDENT);
+        put_string(p, LIBAVCODEC_IDENT, 1);
         size = strlen(LIBAVCODEC_IDENT)+3;
         ptr[0] = size >> 8;
         ptr[1] = size;
@@ -477,7 +477,7 @@ void mjpeg_picture_header(MpegEncContext *s)
 
 static void escape_FF(MpegEncContext *s, int start)
 {
-    int size= get_bit_count(&s->pb) - start*8;
+    int size= put_bits_count(&s->pb) - start*8;
     int i, ff_count;
     uint8_t *buf= s->pb.buf + start;
     int align= (-(size_t)(buf))&3;
@@ -531,11 +531,16 @@ static void escape_FF(MpegEncContext *s, int start)
     }
 }
 
+void ff_mjpeg_stuffing(PutBitContext * pbc)
+{
+    int length;
+    length= (-put_bits_count(pbc))&7;
+    if(length) put_bits(pbc, length, (1<<length)-1);
+}
+
 void mjpeg_picture_trailer(MpegEncContext *s)
 {
-    int pad= (-get_bit_count(&s->pb))&7;
-    
-    put_bits(&s->pb, pad,0xFF>>(8-pad));
+    ff_mjpeg_stuffing(&s->pb);
     flush_put_bits(&s->pb);
 
     assert((s->header_bits&7)==0);
@@ -651,7 +656,7 @@ static int encode_picture_lossless(AVCodecContext *avctx, unsigned char *buf, in
     
     mjpeg_picture_header(s);
 
-    s->header_bits= get_bit_count(&s->pb);
+    s->header_bits= put_bits_count(&s->pb);
 
     if(avctx->pix_fmt == PIX_FMT_RGBA32){
         int x, y, i;
@@ -770,7 +775,7 @@ static int encode_picture_lossless(AVCodecContext *avctx, unsigned char *buf, in
 
     flush_put_bits(&s->pb);
     return pbBufPtr(&s->pb) - s->pb.buf;
-//    return (get_bit_count(&f->pb)+7)/8;
+//    return (put_bits_count(&f->pb)+7)/8;
 }
 
 #endif //CONFIG_ENCODERS
diff --git a/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c b/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c
index 278682534..0d56c576e 100644
--- a/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c
+++ b/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c
@@ -62,7 +62,7 @@ static void add_pixels_clamped_mlib(const DCTELEM *block, uint8_t *pixels, int l
 
 /* put block, width 16 pixel, height 8/16 */
 
-static void put_pixels16_mlib(uint8_t * dest, const uint8_t * ref,
+static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
 			       int stride, int height)
 {
   switch (height) {
@@ -79,7 +79,7 @@ static void put_pixels16_mlib(uint8_t * dest, const uint8_t * ref,
   }
 }
 
-static void put_pixels16_x2_mlib(uint8_t * dest, const uint8_t * ref,
+static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
 				  int stride, int height)
 {
   switch (height) {
@@ -96,7 +96,7 @@ static void put_pixels16_x2_mlib(uint8_t * dest, const uint8_t * ref,
   }
 }
 
-static void put_pixels16_y2_mlib(uint8_t * dest, const uint8_t * ref,
+static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
 				  int stride, int height)
 {
   switch (height) {
@@ -132,7 +132,7 @@ static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
 
 /* put block, width 8 pixel, height 4/8/16 */
 
-static void put_pixels8_mlib(uint8_t * dest, const uint8_t * ref,
+static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
 			       int stride, int height)
 {
   switch (height) {
@@ -153,7 +153,7 @@ static void put_pixels8_mlib(uint8_t * dest, const uint8_t * ref,
   }
 }
 
-static void put_pixels8_x2_mlib(uint8_t * dest, const uint8_t * ref,
+static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
 				  int stride, int height)
 {
   switch (height) {
@@ -174,7 +174,7 @@ static void put_pixels8_x2_mlib(uint8_t * dest, const uint8_t * ref,
   }
 }
 
-static void put_pixels8_y2_mlib(uint8_t * dest, const uint8_t * ref,
+static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
 				  int stride, int height)
 {
   switch (height) {
@@ -218,7 +218,7 @@ static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
 
 /* average block, width 16 pixel, height 8/16 */
 
-static void avg_pixels16_mlib(uint8_t * dest, const uint8_t * ref,
+static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
 			       int stride, int height)
 {
   switch (height) {
@@ -235,7 +235,7 @@ static void avg_pixels16_mlib(uint8_t * dest, const uint8_t * ref,
   }
 }
 
-static void avg_pixels16_x2_mlib(uint8_t * dest, const uint8_t * ref,
+static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
 				  int stride, int height)
 {
   switch (height) {
@@ -252,7 +252,7 @@ static void avg_pixels16_x2_mlib(uint8_t * dest, const uint8_t * ref,
   }
 }
 
-static void avg_pixels16_y2_mlib(uint8_t * dest, const uint8_t * ref,
+static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
 				  int stride, int height)
 {
   switch (height) {
@@ -288,7 +288,7 @@ static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
 
 /* average block, width 8 pixel, height 4/8/16 */
 
-static void avg_pixels8_mlib(uint8_t * dest, const uint8_t * ref,
+static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
 			       int stride, int height)
 {
   switch (height) {
@@ -309,7 +309,7 @@ static void avg_pixels8_mlib(uint8_t * dest, const uint8_t * ref,
   }
 }
 
-static void avg_pixels8_x2_mlib(uint8_t * dest, const uint8_t * ref,
+static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
 				  int stride, int height)
 {
   switch (height) {
@@ -330,7 +330,7 @@ static void avg_pixels8_x2_mlib(uint8_t * dest, const uint8_t * ref,
   }
 }
 
-static void avg_pixels8_y2_mlib(uint8_t * dest, const uint8_t * ref,
+static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
 				  int stride, int height)
 {
   switch (height) {
@@ -386,9 +386,9 @@ static void ff_idct_put_mlib(uint8_t *dest, int line_size, DCTELEM *data)
     int i;
     uint8_t *cm = cropTbl + MAX_NEG_CROP;
 
-    mlib_VideoIDCT8x8_S16_S16(data, data);
+    mlib_VideoIDCT8x8_S16_S16 (data, data);
     
-    for (i=0;i<8;i++) {
+    for(i=0;i<8;i++) {
         dest[0] = cm[data[0]];
         dest[1] = cm[data[1]];
         dest[2] = cm[data[2]];
@@ -401,22 +401,23 @@ static void ff_idct_put_mlib(uint8_t *dest, int line_size, DCTELEM *data)
         dest += line_size;
         data += 8;
     }
+  }
 }
 
 static void ff_idct_add_mlib(uint8_t *dest, int line_size, DCTELEM *data)
 {
-    mlib_VideoIDCT8x8_S16_S16(data, data);
+    mlib_VideoIDCT8x8_S16_S16 (data, data);
     mlib_VideoAddBlock_U8_S16(dest, (mlib_s16 *)data, line_size);
 }
 
-static void ff_idct_mlib(DCTELEM *data)
+static void ff_idct_mlib(uint8_t *dest, int line_size, DCTELEM *data)
 {
-    mlib_VideoIDCT8x8_S16_S16(data, data);
+    mlib_VideoIDCT8x8_S16_S16 (data, data);
 }
 
 static void ff_fdct_mlib(DCTELEM *data)
 {
-    mlib_VideoDCT8x8_S16_S16(data, data);
+    mlib_VideoDCT8x8_S16_S16 (data, data);
 }
 
 void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
@@ -454,15 +455,15 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
 void MPV_common_init_mlib(MpegEncContext *s)
 {
   if (xine_mm_accel() & MM_ACCEL_MLIB) {
-    if ((s->avctx->dct_algo == FF_DCT_AUTO) || (s->avctx->dct_algo==FF_DCT_MLIB)) {
+    if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){
 	s->dsp.fdct = ff_fdct_mlib;
     }
 
-    if ((s->avctx->idct_algo == FF_IDCT_AUTO) || (s->avctx->idct_algo == FF_IDCT_MLIB)) {
-        s->dsp.idct                  = ff_idct_mlib;
-        s->dsp.idct_put              = ff_idct_put_mlib;
-        s->dsp.idct_add              = ff_idct_add_mlib;
-        s->dsp.idct_permutation_type = FF_NO_IDCT_PERM;
+    if(s->avctx->idct_algo==FF_IDCT_AUTO || s->avctx->idct_algo==FF_IDCT_MLIB){
+        s->dsp.idct_put= ff_idct_put_mlib;
+        s->dsp.idct_add= ff_idct_add_mlib;
+        s->dsp.idct    = ff_idct_mlib;
+        s->dsp.idct_permutation_type= FF_NO_IDCT_PERM;
     }
   }
 }
diff --git a/src/libffmpeg/libavcodec/motion_est.c b/src/libffmpeg/libavcodec/motion_est.c
index e8641790b..5132487cf 100644
--- a/src/libffmpeg/libavcodec/motion_est.c
+++ b/src/libffmpeg/libavcodec/motion_est.c
@@ -805,7 +805,7 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
         if(P_LEFT[0]       > (s->me.xmax<<shift)) P_LEFT[0]       = (s->me.xmax<<shift);
 
         /* special case for first line */
-        if (s->mb_y == 0 && block<2) {
+        if (s->first_slice_line && block<2) {
             pred_x4= P_LEFT[0];
             pred_y4= P_LEFT[1];
         } else {
@@ -845,13 +845,12 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
             int dxy;
             const int offset= ((block&1) + (block>>1)*stride)*8;
             uint8_t *dest_y = s->me.scratchpad + offset;
-
             if(s->quarter_sample){
                 uint8_t *ref= ref_data[0] + (mx4>>2) + (my4>>2)*stride;
                 dxy = ((my4 & 3) << 2) | (mx4 & 3);
 
                 if(s->no_rounding)
-                    s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y   , ref    , s->linesize);
+                    s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y   , ref    , stride);
                 else
                     s->dsp.put_qpel_pixels_tab       [1][dxy](dest_y   , ref    , stride);
             }else{
@@ -966,7 +965,7 @@ static int interlaced_search(MpegEncContext *s, uint8_t *frame_src_data[3], uint
             pred_x= P_LEFT[0];
             pred_y= P_LEFT[1];
             
-            if(s->mb_y){
+            if(!s->first_slice_line){
                 P_TOP[0]      = mv_table[xy - mot_stride][0];
                 P_TOP[1]      = mv_table[xy - mot_stride][1];
                 P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
@@ -1115,7 +1114,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
 
             if(P_LEFT[0]       > (s->me.xmax<<shift)) P_LEFT[0]       = (s->me.xmax<<shift);
 
-            if(mb_y) {
+            if(!s->first_slice_line) {
                 P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][0];
                 P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][1];
                 P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
@@ -1164,8 +1163,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
     pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = vard;
     pic->mb_mean  [s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
 //    pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin; 
-    pic->mb_var_sum    += varc;
-    pic->mc_mb_var_sum += vard;
+    s->mb_var_sum_temp    += varc;
+    s->mc_mb_var_sum_temp += vard;
 //printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
     
 #if 0
@@ -1326,7 +1325,7 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
     if(P_LEFT[0]       < (s->me.xmin<<shift)) P_LEFT[0]       = (s->me.xmin<<shift);
 
     /* special case for first line */
-    if (mb_y == s->mb_height-1) {
+    if (s->first_slice_line) {
         pred_x= P_LEFT[0];
         pred_y= P_LEFT[1];
         P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
@@ -1409,7 +1408,7 @@ static int ff_estimate_motion_b(MpegEncContext * s,
             if(P_LEFT[0]       > (s->me.xmax<<shift)) P_LEFT[0]       = (s->me.xmax<<shift);
 
             /* special case for first line */
-            if (mb_y) {
+            if (!s->first_slice_line) {
                 P_TOP[0] = mv_table[mot_xy - mot_stride             ][0];
                 P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
                 P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
@@ -1610,7 +1609,7 @@ static inline int direct_search(MpegEncContext * s, uint8_t *src_data[3], uint8_
     P_LEFT[1]        = clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);
 
     /* special case for first line */
-    if (mb_y) {
+    if (!s->first_slice_line) { //FIXME maybe allow this over thread boundary as its cliped
         P_TOP[0]      = clip(mv_table[mot_xy - mot_stride             ][0], xmin<<shift, xmax<<shift);
         P_TOP[1]      = clip(mv_table[mot_xy - mot_stride             ][1], ymin<<shift, ymax<<shift);
         P_TOPRIGHT[0] = clip(mv_table[mot_xy - mot_stride + 1         ][0], xmin<<shift, xmax<<shift);
@@ -1727,7 +1726,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
         }
         
         score= ((unsigned)(score*score + 128*256))>>16;
-        s->current_picture.mc_mb_var_sum += score;
+        s->mc_mb_var_sum_temp += score;
         s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
     }
 
diff --git a/src/libffmpeg/libavcodec/motion_est_template.c b/src/libffmpeg/libavcodec/motion_est_template.c
index 18203ec06..49c2e57b5 100644
--- a/src/libffmpeg/libavcodec/motion_est_template.c
+++ b/src/libffmpeg/libavcodec/motion_est_template.c
@@ -557,9 +557,11 @@ static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pre
 
 #define CHECK_CLIPED_MV(ax,ay)\
 {\
-    const int x= FFMAX(xmin, FFMIN(ax, xmax));\
-    const int y= FFMAX(ymin, FFMIN(ay, ymax));\
-    CHECK_MV(x, y)\
+    const int x= ax;\
+    const int y= ay;\
+    const int x2= FFMAX(xmin, FFMIN(x, xmax));\
+    const int y2= FFMAX(ymin, FFMIN(y, ymax));\
+    CHECK_MV(x2, y2)\
 }
 
 #define CHECK_MV_DIR(x,y,new_dir)\
@@ -912,7 +914,7 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s,
     score_map[0]= dmin;
 
     /* first line */
-    if (s->mb_y == 0) {
+    if (s->first_slice_line) {
         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
         CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 
                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
@@ -938,13 +940,15 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s,
         if(s->me.pre_pass){
             CHECK_CLIPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16, 
                             (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
-            CHECK_CLIPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 
-                            (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
+            if(!s->first_slice_line)
+                CHECK_CLIPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 
+                                (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
         }else{
             CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, 
                             (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
-            CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 
-                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
+            if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
+                CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 
+                                (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
         }
     }
 
@@ -1024,7 +1028,7 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s,
     dmin = 1000000;
 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); 
     /* first line */
-    if (s->mb_y == 0/* && block<2*/) {
+    if (s->first_slice_line) {
 	CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
         CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 
                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
@@ -1044,8 +1048,9 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s,
     if(dmin>64*4){
         CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, 
                         (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
-        CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 
-                        (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
+        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 
+                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
     }
 
     if(s->me.dia_size==-1)
@@ -1102,7 +1107,7 @@ static int RENAME(epzs_motion_search2)(MpegEncContext * s,
     dmin = 1000000;
 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); 
     /* first line */
-    if (s->mb_y == 0) {
+    if (s->first_slice_line) {
 	CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
         CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 
                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
@@ -1122,8 +1127,9 @@ static int RENAME(epzs_motion_search2)(MpegEncContext * s,
     if(dmin>64*4){
         CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, 
                         (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
-        CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 
-                        (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
+        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 
+                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
     }
 
     if(s->me.dia_size==-1)
diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c
index aecfd35f1..e39356c9d 100644
--- a/src/libffmpeg/libavcodec/mpeg12.c
+++ b/src/libffmpeg/libavcodec/mpeg12.c
@@ -34,6 +34,13 @@
 //#include <assert.h>
 
 
+/* if xine's MPEG encoder is enabled, enable the encoding features in
+ * this particular module */
+#ifdef XINE_MPEG_ENCODER
+#define CONFIG_ENCODERS
+#endif
+
+
 /* Start codes. */
 #define SEQ_END_CODE		0x000001b7
 #define SEQ_START_CODE		0x000001b3
@@ -52,7 +59,7 @@
 #define MB_BTYPE_VLC_BITS 6
 #define TEX_VLC_BITS 9
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
 static void mpeg1_encode_block(MpegEncContext *s, 
                          DCTELEM *block, 
                          int component);
@@ -80,7 +87,14 @@ extern void XVMC_pack_pblocks(MpegEncContext *s,int cbp);
 extern void XVMC_init_block(MpegEncContext *s);//set s->block
 #endif
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+const enum PixelFormat pixfmt_yuv_420[]= {PIX_FMT_YUV420P,-1};
+const enum PixelFormat pixfmt_yuv_422[]= {PIX_FMT_YUV422P,-1};
+const enum PixelFormat pixfmt_yuv_444[]= {PIX_FMT_YUV444P,-1};
+const enum PixelFormat pixfmt_xvmc_mpg2_420[] = {
+                                           PIX_FMT_XVMC_MPEG2_IDCT,
+                                           PIX_FMT_XVMC_MPEG2_MC,
+					   -1};
+#ifdef CONFIG_ENCODERS
 static uint8_t (*mv_penalty)[MAX_MV*2+1]= NULL;
 static uint8_t fcode_tab[MAX_MV*2+1];
 
@@ -134,7 +148,7 @@ static void init_2d_vlc_rl(RLTable *rl)
     }
 }
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
 static void init_uni_ac_vlc(RLTable *rl, uint32_t *uni_ac_vlc_bits, uint8_t *uni_ac_vlc_len){
     int i;
 
@@ -188,9 +202,11 @@ static int find_frame_rate_index(MpegEncContext *s){
     int64_t d;
 
     for(i=1;i<14;i++) {
+        int64_t n0= 1001LL/frame_rate_tab[i].den*frame_rate_tab[i].num*s->avctx->frame_rate_base;
+        int64_t n1= 1001LL*s->avctx->frame_rate;
         if(s->avctx->strict_std_compliance >= 0 && i>=9) break;
-         
-        d = ABS(MPEG1_FRAME_RATE_BASE*(int64_t)s->avctx->frame_rate - frame_rate_tab[i]*(int64_t)s->avctx->frame_rate_base);
+
+        d = ABS(n0 - n1);
         if(d < dmin){
             dmin=d;
             s->frame_rate_index= i;
@@ -242,6 +258,8 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
         if(aspect_ratio==0.0) aspect_ratio= 1.0; //pixel aspect 1:1 (VGA)
         
         if (s->current_picture.key_frame) {
+            AVRational framerate= frame_rate_tab[s->frame_rate_index];
+
             /* mpeg1 header repeated every gop */
             put_header(s, SEQ_START_CODE);
  
@@ -288,8 +306,8 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
             constraint_parameter_flag= 
                 s->width <= 768 && s->height <= 576 && 
                 s->mb_width * s->mb_height <= 396 &&
-                s->mb_width * s->mb_height * frame_rate_tab[s->frame_rate_index] <= MPEG1_FRAME_RATE_BASE*396*25 &&
-                frame_rate_tab[s->frame_rate_index] <= MPEG1_FRAME_RATE_BASE*30 &&
+                s->mb_width * s->mb_height * framerate.num <= framerate.den*396*25 &&
+                framerate.num <= framerate.den*30 &&
                 vbv_buffer_size <= 20 &&
                 v <= 1856000/400 &&
                 s->codec_id == CODEC_ID_MPEG1VIDEO;
@@ -321,7 +339,7 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
             put_bits(&s->pb, 1, 0); /* do drop frame */
             /* time code : we must convert from the real frame rate to a
                fake mpeg frame rate in case of low frame rate */
-            fps = (frame_rate_tab[s->frame_rate_index] + MPEG1_FRAME_RATE_BASE/2)/ MPEG1_FRAME_RATE_BASE;
+            fps = (framerate.num + framerate.den/2)/ framerate.den;
             time_code = s->current_picture_ptr->coded_picture_number;
 
             s->gop_picture_number = time_code;
@@ -358,7 +376,7 @@ void ff_mpeg1_clean_buffers(MpegEncContext *s){
     memset(s->last_mv, 0, sizeof(s->last_mv));
 }
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
 
 void ff_mpeg1_encode_slice_header(MpegEncContext *s){
     put_header(s, SLICE_MIN_START_CODE + s->mb_y);
@@ -379,7 +397,7 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number)
                           s->gop_picture_number) & 0x3ff); 
     put_bits(&s->pb, 3, s->pict_type);
 
-    s->vbv_delay_ptr= s->pb.buf + get_bit_count(&s->pb)/8;
+    s->vbv_delay_ptr= s->pb.buf + put_bits_count(&s->pb)/8;
     put_bits(&s->pb, 16, 0xFFFF); /* vbv_delay */
     
     // RAL: Forward f_code also needed for B frames
@@ -419,7 +437,9 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number)
             put_bits(&s->pb, 8, 255);
         }
         put_bits(&s->pb, 2, s->intra_dc_precision);
-        put_bits(&s->pb, 2, s->picture_structure= PICT_FRAME);
+        
+        assert(s->picture_structure == PICT_FRAME);
+        put_bits(&s->pb, 2, s->picture_structure);
         if (s->progressive_sequence) {
             put_bits(&s->pb, 1, 0); /* no repeat */
         } else {
@@ -583,7 +603,7 @@ void mpeg1_encode_mb(MpegEncContext *s,
                 s->mv_bits+= get_bits_diff(s);
             }
             if(cbp)
-                put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
+                put_bits(&s->pb, mbPatTable[cbp][1], mbPatTable[cbp][0]);
             s->f_count++;
         } else{  
             static const int mb_type_len[4]={0,3,4,2}; //bak,for,bi
@@ -662,7 +682,7 @@ void mpeg1_encode_mb(MpegEncContext *s,
             }
             s->mv_bits += get_bits_diff(s);
             if(cbp)
-                put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
+                put_bits(&s->pb, mbPatTable[cbp][1], mbPatTable[cbp][0]);
         }
         for(i=0;i<6;i++) {
             if (cbp & (1 << (5 - i))) {
@@ -954,7 +974,7 @@ static void init_vlcs()
         init_vlc(&mbincr_vlc, MBINCR_VLC_BITS, 36, 
                  &mbAddrIncrTable[0][1], 2, 1,
                  &mbAddrIncrTable[0][0], 2, 1);
-        init_vlc(&mb_pat_vlc, MB_PAT_VLC_BITS, 63, 
+        init_vlc(&mb_pat_vlc, MB_PAT_VLC_BITS, 64,
                  &mbPatTable[0][1], 2, 1,
                  &mbPatTable[0][0], 2, 1);
         
@@ -997,7 +1017,7 @@ static inline int get_qscale(MpegEncContext *s)
 #define MT_DMV   3
 
 static int mpeg_decode_mb(MpegEncContext *s,
-                          DCTELEM block[6][64])
+                          DCTELEM block[12][64])
 {
     int i, j, k, cbp, val, mb_type, motion_type;
     
@@ -1013,15 +1033,19 @@ static int mpeg_decode_mb(MpegEncContext *s,
     
         /* skip mb */
         s->mb_intra = 0;
-        for(i=0;i<6;i++)
+        for(i=0;i<12;i++)
             s->block_last_index[i] = -1;
-        s->mv_type = MV_TYPE_16X16;
+        if(s->picture_structure == PICT_FRAME)
+            s->mv_type = MV_TYPE_16X16;
+        else
+            s->mv_type = MV_TYPE_FIELD;
         if (s->pict_type == P_TYPE) {
             /* if P type, zero motion vector is implied */
             s->mv_dir = MV_DIR_FORWARD;
             s->mv[0][0][0] = s->mv[0][0][1] = 0;
             s->last_mv[0][0][0] = s->last_mv[0][0][1] = 0;
             s->last_mv[0][1][0] = s->last_mv[0][1][1] = 0;
+            s->field_select[0][0]= s->picture_structure - 1;
             s->mb_skiped = 1;
             s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride ]= MB_TYPE_SKIP | MB_TYPE_L0 | MB_TYPE_16x16;
         } else {
@@ -1109,7 +1133,7 @@ static int mpeg_decode_mb(MpegEncContext *s,
 #endif
 
         if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
-            for(i=0;i<6;i++) {
+            for(i=0;i<4+(1<<s->chroma_format);i++) {
                 if (mpeg2_decode_block_intra(s, s->pblocks[i], i) < 0)
                     return -1;
             }
@@ -1133,7 +1157,13 @@ static int mpeg_decode_mb(MpegEncContext *s,
                 s->qscale = get_qscale(s);
 
             s->mv_dir = MV_DIR_FORWARD;
-            s->mv_type = MV_TYPE_16X16;
+            if(s->picture_structure == PICT_FRAME)
+                s->mv_type = MV_TYPE_16X16;
+            else{
+                s->mv_type = MV_TYPE_FIELD;
+                mb_type |= MB_TYPE_INTERLACED;
+                s->field_select[0][0]= s->picture_structure - 1;
+            }
             s->last_mv[0][0][0] = 0;
             s->last_mv[0][0][1] = 0;
             s->last_mv[0][1][0] = 0;
@@ -1283,11 +1313,16 @@ static int mpeg_decode_mb(MpegEncContext *s,
 
         if (HAS_CBP(mb_type)) {
             cbp = get_vlc2(&s->gb, mb_pat_vlc.table, MB_PAT_VLC_BITS, 1);
-            if (cbp < 0){
+            if (cbp < 0 || ((cbp == 0) && (s->chroma_format < 2)) ){
                 av_log(s->avctx, AV_LOG_ERROR, "invalid cbp at %d %d\n", s->mb_x, s->mb_y);
                 return -1;
             }
-            cbp++;
+            if(s->chroma_format == 2){//CHROMA422
+                 cbp|= ( get_bits(&s->gb,2) ) << 6;
+            }else
+            if(s->chroma_format >  2){//CHROMA444
+                 cbp|= ( get_bits(&s->gb,6) ) << 6;
+            }
 
 #ifdef HAVE_XVMC
             //on 1 we memcpy blocks in xvmcvideo
@@ -1301,13 +1336,33 @@ static int mpeg_decode_mb(MpegEncContext *s,
 
             if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
                 for(i=0;i<6;i++) {
-                    if (cbp & 32) {
+                    if (cbp & (1<<(5-i)) ) {
                         if (mpeg2_decode_block_non_intra(s, s->pblocks[i], i) < 0)
                             return -1;
                     } else {
                         s->block_last_index[i] = -1;
                     }
-                    cbp+=cbp;
+                }
+                if (s->chroma_format >= 2) {
+                    if (s->chroma_format == 2) {//CHROMA_422)
+                        for(i=6;i<8;i++) {
+                            if (cbp & (1<<(6+7-i)) ) {
+                                if (mpeg2_decode_block_non_intra(s, s->pblocks[i], i) < 0)
+                                    return -1;
+                            } else {
+                                s->block_last_index[i] = -1;
+                            }
+                        }
+                    }else{ /*CHROMA_444*/
+                        for(i=6;i<12;i++) {
+                            if (cbp & (1<<(6+11-i)) ) {
+                                if (mpeg2_decode_block_non_intra(s, s->pblocks[i], i) < 0)
+                                    return -1;
+                            } else {
+                                s->block_last_index[i] = -1;
+                            }
+                        }
+                    }
                 }
             } else {
                 for(i=0;i<6;i++) {
@@ -1631,7 +1686,7 @@ static inline int mpeg2_decode_block_intra(MpegEncContext *s,
         component = 0; 
     }else{
         quant_matrix = s->chroma_intra_matrix;
-        component = n - 3;
+        component = (n&1) + 1;
     }
     diff = decode_dc(&s->gb, component);
     if (diff >= 0xffff)
@@ -1698,6 +1753,7 @@ typedef struct Mpeg1Context {
     int mpeg_enc_ctx_allocated; /* true if decoding context allocated */
     int repeat_field; /* true if we must repeat the field */
     AVPanScan pan_scan; /** some temporary storage for the panscan */
+    int slice_count;
 } Mpeg1Context;
 
 static int mpeg_decode_init(AVCodecContext *avctx)
@@ -1719,30 +1775,33 @@ static int mpeg_decode_init(AVCodecContext *avctx)
 
 /* return the 8 bit start code value and update the search
    state. Return -1 if no start code found */
-static int find_start_code(uint8_t **pbuf_ptr, uint8_t *buf_end)
+static int find_start_code(const uint8_t **pbuf_ptr, const uint8_t *buf_end)
 {
-    uint8_t *buf_ptr;
-    unsigned int state=0xFFFFFFFF, v;
-    int val;
+    const uint8_t *buf_ptr= *pbuf_ptr;
+
+    buf_ptr++; //gurantees that -1 is within the array
+    buf_end -= 2; // gurantees that +2 is within the array
 
-    buf_ptr = *pbuf_ptr;
     while (buf_ptr < buf_end) {
-        v = *buf_ptr++;
-        if (state == 0x000001) {
-            state = ((state << 8) | v) & 0xffffff;
-            val = state;
-            goto found;
+        if(*buf_ptr==0){
+            while(buf_ptr < buf_end && buf_ptr[1]==0)
+                buf_ptr++;
+
+            if(buf_ptr[-1] == 0 && buf_ptr[1] == 1){
+                *pbuf_ptr = buf_ptr+3;
+                return buf_ptr[2] + 0x100;
+            }
         }
-        state = ((state << 8) | v) & 0xffffff;
+        buf_ptr += 2;
     }
-    val = -1;
- found:
-    *pbuf_ptr = buf_ptr;
-    return val;
+    buf_end += 2; //undo the hack above
+    
+    *pbuf_ptr = buf_end;
+    return -1;
 }
 
 static int mpeg1_decode_picture(AVCodecContext *avctx, 
-                                uint8_t *buf, int buf_size)
+                                const uint8_t *buf, int buf_size)
 {
     Mpeg1Context *s1 = avctx->priv_data;
     MpegEncContext *s = &s1->mpeg_enc_ctx;
@@ -1793,13 +1852,13 @@ static void mpeg_decode_sequence_extension(MpegEncContext *s)
     profile= get_bits(&s->gb, 3);
     level= get_bits(&s->gb, 4);
     s->progressive_sequence = get_bits1(&s->gb); /* progressive_sequence */
-    skip_bits(&s->gb, 2); /* chroma_format */
+    s->chroma_format = get_bits(&s->gb, 2); /* chroma_format 1=420, 2=422, 3=444 */
     horiz_size_ext = get_bits(&s->gb, 2);
     vert_size_ext = get_bits(&s->gb, 2);
     s->width |= (horiz_size_ext << 12);
     s->height |= (vert_size_ext << 12);
     bit_rate_ext = get_bits(&s->gb, 12);  /* XXX: handle it */
-    s->bit_rate = ((s->bit_rate / 400) | (bit_rate_ext << 12)) * 400;
+    s->bit_rate += (bit_rate_ext << 12) * 400;
     skip_bits1(&s->gb); /* marker */
     s->avctx->rc_buffer_size += get_bits(&s->gb, 8)*1024*16<<10;
 
@@ -1811,8 +1870,8 @@ static void mpeg_decode_sequence_extension(MpegEncContext *s)
     av_reduce(
         &s->avctx->frame_rate, 
         &s->avctx->frame_rate_base, 
-        frame_rate_tab[s->frame_rate_index] * (frame_rate_ext_n+1),
-        MPEG1_FRAME_RATE_BASE * (frame_rate_ext_d+1),
+        frame_rate_tab[s->frame_rate_index].num * (frame_rate_ext_n+1),
+        frame_rate_tab[s->frame_rate_index].den * (frame_rate_ext_d+1),
         1<<30);
 
     dprintf("sequence extension\n");
@@ -1970,7 +2029,7 @@ static void mpeg_decode_picture_coding_extension(MpegEncContext *s)
 }
 
 static void mpeg_decode_extension(AVCodecContext *avctx, 
-                                  uint8_t *buf, int buf_size)
+                                  const uint8_t *buf, int buf_size)
 {
     Mpeg1Context *s1 = avctx->priv_data;
     MpegEncContext *s = &s1->mpeg_enc_ctx;
@@ -2006,43 +2065,14 @@ short * tmp;
     s->pblocks[5] = tmp;
 }
 
-#define DECODE_SLICE_FATAL_ERROR -2
-#define DECODE_SLICE_ERROR -1
-#define DECODE_SLICE_OK 0
+static int mpeg_field_start(MpegEncContext *s){
+    AVCodecContext *avctx= s->avctx;
+    Mpeg1Context *s1 = (Mpeg1Context*)s;
 
-/**
- * decodes a slice.
- * @return DECODE_SLICE_FATAL_ERROR if a non recoverable error occured<br>
- *         DECODE_SLICE_ERROR if the slice is damaged<br>
- *         DECODE_SLICE_OK if this slice is ok<br>
- */
-static int mpeg_decode_slice(AVCodecContext *avctx, 
-                              AVFrame *pict,
-                              int start_code,
-                              uint8_t **buf, int buf_size)
-{
-    Mpeg1Context *s1 = avctx->priv_data;
-    MpegEncContext *s = &s1->mpeg_enc_ctx;
-    int ret;
-    const int field_pic= s->picture_structure != PICT_FRAME;
-
-    s->resync_mb_x= s->mb_x = 
-    s->resync_mb_y= s->mb_y = -1;
-    
-    start_code = (start_code - 1) & 0xff;
-    if (start_code >= s->mb_height){
-        av_log(s->avctx, AV_LOG_ERROR, "slice below image (%d >= %d)\n", start_code, s->mb_height);
-        return -1;
-    }
-    
-    ff_mpeg1_clean_buffers(s);
-    s->interlaced_dct = 0;
-        
     /* start frame decoding */
-    if (s->first_slice) {
-      if(s->first_field || s->picture_structure==PICT_FRAME){
+    if(s->first_field || s->picture_structure==PICT_FRAME){
         if(MPV_frame_start(s, avctx) < 0)
-            return DECODE_SLICE_FATAL_ERROR;
+            return -1;
 
         ff_er_frame_start(s);
 
@@ -2060,7 +2090,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
         }         
 
         *s->current_picture_ptr->pan_scan= s1->pan_scan;
-      }else{ //second field
+    }else{ //second field
             int i;
             
             if(!s->current_picture_ptr){
@@ -2074,30 +2104,48 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
                     s->current_picture.data[i] += s->current_picture_ptr->linesize[i];
                 } 
             }
-      }
+    }
 #ifdef HAVE_XVMC
 // MPV_frame_start will call this function too,
 // but we need to call it on every field
-      if(s->avctx->xvmc_acceleration)
+    if(s->avctx->xvmc_acceleration)
          XVMC_field_start(s,avctx);
 #endif
-    }//fi(s->first_slice)
 
+    return 0;
+}
+
+#define DECODE_SLICE_ERROR -1
+#define DECODE_SLICE_OK 0
+
+/**
+ * decodes a slice. MpegEncContext.mb_y must be set to the MB row from the startcode
+ * @return DECODE_SLICE_ERROR if the slice is damaged<br>
+ *         DECODE_SLICE_OK if this slice is ok<br>
+ */
+static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
+                             const uint8_t **buf, int buf_size)
+{
+    MpegEncContext *s = &s1->mpeg_enc_ctx;
+    AVCodecContext *avctx= s->avctx;
+    int ret;
+    const int field_pic= s->picture_structure != PICT_FRAME;
+
+    s->resync_mb_x=
+    s->resync_mb_y= -1;
+
+    if (mb_y >= s->mb_height){
+        av_log(s->avctx, AV_LOG_ERROR, "slice below image (%d >= %d)\n", s->mb_y, s->mb_height);
+        return -1;
+    }
+    
     init_get_bits(&s->gb, *buf, buf_size*8);
 
+    ff_mpeg1_clean_buffers(s);
+    s->interlaced_dct = 0;
+
     s->qscale = get_qscale(s);
-    if (s->first_slice && (s->first_field || s->picture_structure==PICT_FRAME)) {
-        if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-             av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n", 
-                 s->qscale, s->mpeg_f_code[0][0],s->mpeg_f_code[0][1],s->mpeg_f_code[1][0],s->mpeg_f_code[1][1],
-                 s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), 
-                 s->progressive_sequence ? "ps" :"", s->progressive_frame ? "pf" : "", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"", 
-                 s->intra_dc_precision, s->picture_structure, s->frame_pred_frame_dct, s->concealment_motion_vectors,
-                 s->q_scale_type, s->intra_vlc_format, s->repeat_first_field, s->chroma_420_type ? "420" :"");
-        }
-    }
 
-    s->first_slice = 0;
     if(s->qscale == 0){
         av_log(s->avctx, AV_LOG_ERROR, "qscale == 0\n");
         return -1;
@@ -2126,12 +2174,23 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
             break;
         }
     }
-    
+
     s->resync_mb_x= s->mb_x;
-    s->resync_mb_y= s->mb_y = start_code;
+    s->resync_mb_y= s->mb_y= mb_y;
     s->mb_skip_run= 0;
     ff_init_block_index(s);
 
+    if (s->mb_y==0 && s->mb_x==0 && (s->first_field || s->picture_structure==PICT_FRAME)) {
+        if(s->avctx->debug&FF_DEBUG_PICT_INFO){
+             av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n", 
+                 s->qscale, s->mpeg_f_code[0][0],s->mpeg_f_code[0][1],s->mpeg_f_code[1][0],s->mpeg_f_code[1][1],
+                 s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), 
+                 s->progressive_sequence ? "ps" :"", s->progressive_frame ? "pf" : "", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"", 
+                 s->intra_dc_precision, s->picture_structure, s->frame_pred_frame_dct, s->concealment_motion_vectors,
+                 s->q_scale_type, s->intra_vlc_format, s->repeat_first_field, s->chroma_420_type ? "420" :"");
+        }
+    }    
+    
     for(;;) {
 #ifdef HAVE_XVMC
         //one 1 we memcpy blocks in xvmcvideo
@@ -2151,51 +2210,29 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
         if(s->current_picture.motion_val[0] && !s->encoding){ //note motion_val is normally NULL unless we want to extract the MVs
             const int wrap = field_pic ? 2*s->block_wrap[0] : s->block_wrap[0];
             int xy = s->mb_x*2 + 1 + (s->mb_y*2 +1)*wrap;
-            int motion_for_top_x, motion_for_top_y, motion_back_top_x, motion_back_top_y;
-            int motion_for_bottom_x, motion_for_bottom_y, motion_back_bottom_x, motion_back_bottom_y;
+            int motion_x, motion_y, dir, i;
             if(field_pic && !s->first_field)
                 xy += wrap/2;
 
-            if (s->mb_intra) {
-                motion_for_top_x = motion_for_top_y = motion_back_top_x = motion_back_top_y =
-                motion_for_bottom_x = motion_for_bottom_y = motion_back_bottom_x = motion_back_bottom_y = 0;
-            }else if (s->mv_type == MV_TYPE_16X16){
-                motion_for_top_x = motion_for_bottom_x = s->mv[0][0][0];
-                motion_for_top_y = motion_for_bottom_y = s->mv[0][0][1];
-                motion_back_top_x = motion_back_bottom_x = s->mv[1][0][0];
-                motion_back_top_y = motion_back_bottom_y = s->mv[1][0][1];
-            } else /*if ((s->mv_type == MV_TYPE_FIELD) || (s->mv_type == MV_TYPE_16X8))*/ {
-                motion_for_top_x = s->mv[0][0][0];
-                motion_for_top_y = s->mv[0][0][1];
-                motion_for_bottom_x = s->mv[0][1][0];
-                motion_for_bottom_y = s->mv[0][1][1];
-                motion_back_top_x = s->mv[1][0][0];
-                motion_back_top_y = s->mv[1][0][1];
-                motion_back_bottom_x = s->mv[1][1][0];
-                motion_back_bottom_y = s->mv[1][1][1];
-            }
-
-            s->current_picture.motion_val[0][xy][0] = motion_for_top_x;
-            s->current_picture.motion_val[0][xy][1] = motion_for_top_y;
-            s->current_picture.motion_val[0][xy + 1][0] = motion_for_top_x;
-            s->current_picture.motion_val[0][xy + 1][1] = motion_for_top_y;
-            s->current_picture.motion_val[0][xy + wrap][0] = motion_for_bottom_x;
-            s->current_picture.motion_val[0][xy + wrap][1] = motion_for_bottom_y;
-            s->current_picture.motion_val[0][xy + 1 + wrap][0] = motion_for_bottom_x;
-            s->current_picture.motion_val[0][xy + 1 + wrap][1] = motion_for_bottom_y;
-
-            if(s->pict_type != B_TYPE){
-                motion_back_top_x = motion_back_top_y = motion_back_bottom_x = motion_back_bottom_y = 0;
+            for(i=0; i<2; i++){
+                for(dir=0; dir<2; dir++){
+                    if (s->mb_intra || (dir==1 && s->pict_type != B_TYPE)) {
+                        motion_x = motion_y = 0;
+                    }else if (s->mv_type == MV_TYPE_16X16){
+                        motion_x = s->mv[dir][0][0];
+                        motion_y = s->mv[dir][0][1];
+                    } else /*if ((s->mv_type == MV_TYPE_FIELD) || (s->mv_type == MV_TYPE_16X8))*/ {
+                        motion_x = s->mv[dir][i][0];
+                        motion_y = s->mv[dir][i][1];
+                    }
+                    
+                    s->current_picture.motion_val[dir][xy    ][0] = motion_x;
+                    s->current_picture.motion_val[dir][xy    ][1] = motion_y;
+                    s->current_picture.motion_val[dir][xy + 1][0] = motion_x;
+                    s->current_picture.motion_val[dir][xy + 1][1] = motion_y;
+                }
+                xy += wrap;
             }
-
-            s->current_picture.motion_val[1][xy][0] = motion_back_top_x;
-            s->current_picture.motion_val[1][xy][1] = motion_back_top_y;
-            s->current_picture.motion_val[1][xy + 1][0] = motion_back_top_x;
-            s->current_picture.motion_val[1][xy + 1][1] = motion_back_top_y;
-            s->current_picture.motion_val[1][xy + wrap][0] = motion_back_bottom_x;
-            s->current_picture.motion_val[1][xy + wrap][1] = motion_back_bottom_y;
-            s->current_picture.motion_val[1][xy + 1 + wrap][0] = motion_back_bottom_x;
-            s->current_picture.motion_val[1][xy + 1 + wrap][1] = motion_back_bottom_y;
         }
 
         s->dest[0] += 16;
@@ -2259,6 +2296,39 @@ eos: // end of slice
     return 0;
 }
 
+static int slice_decode_thread(AVCodecContext *c, void *arg){
+    MpegEncContext *s= arg;
+    const uint8_t *buf= s->gb.buffer;
+    int mb_y= s->start_mb_y;
+
+    s->error_count= 3*(s->end_mb_y - s->start_mb_y)*s->mb_width;
+
+    for(;;){
+        int start_code, ret;
+
+        ret= mpeg_decode_slice((Mpeg1Context*)s, mb_y, &buf, s->gb.buffer_end - buf);
+        emms_c();
+//av_log(c, AV_LOG_DEBUG, "ret:%d resync:%d/%d mb:%d/%d ts:%d/%d ec:%d\n", 
+//ret, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, s->start_mb_y, s->end_mb_y, s->error_count);
+        if(ret < 0){
+            if(s->resync_mb_x>=0 && s->resync_mb_y>=0)
+                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, AC_ERROR|DC_ERROR|MV_ERROR);
+        }else{
+            ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, AC_END|DC_END|MV_END);
+        }
+        
+        if(s->mb_y == s->end_mb_y)
+            return 0;
+        
+        start_code = find_start_code(&buf, s->gb.buffer_end);
+        mb_y= start_code - SLICE_MIN_START_CODE;
+        if(mb_y < 0 || mb_y >= s->end_mb_y)
+            return -1;
+    }
+    
+    return 0; //not reached
+}
+
 /**
  * handles slice ends.
  * @return 1 if it seems to be the last slice of 
@@ -2305,7 +2375,7 @@ static int slice_end(AVCodecContext *avctx, AVFrame *pict)
 }
 
 static int mpeg1_decode_sequence(AVCodecContext *avctx, 
-                                 uint8_t *buf, int buf_size)
+                                 const uint8_t *buf, int buf_size)
 {
     Mpeg1Context *s1 = avctx->priv_data;
     MpegEncContext *s = &s1->mpeg_enc_ctx;
@@ -2323,7 +2393,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
     avctx->sample_aspect_ratio= av_d2q(aspect, 255);
 
     s->frame_rate_index = get_bits(&s->gb, 4);
-    if (s->frame_rate_index == 0)
+    if (s->frame_rate_index == 0 || s->frame_rate_index > 13)
         return -1;
     s->bit_rate = get_bits(&s->gb, 18) * 400;
     if (get_bits1(&s->gb) == 0) /* marker */
@@ -2343,17 +2413,16 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
         avctx->has_b_frames= 1;
         avctx->width = width;
         avctx->height = height;
-        av_reduce(
-            &avctx->frame_rate, 
-            &avctx->frame_rate_base,
-            frame_rate_tab[s->frame_rate_index],
-            MPEG1_FRAME_RATE_BASE, //FIXME store in allready reduced form 
-            1<<30
-            );
+        avctx->frame_rate     = frame_rate_tab[s->frame_rate_index].num;
+        avctx->frame_rate_base= frame_rate_tab[s->frame_rate_index].den;
         avctx->bit_rate = s->bit_rate;
         
-        //get_format() or set_video(width,height,aspect,pix_fmt);
-        //until then pix_fmt may be changed right after codec init
+        if(avctx->xvmc_acceleration){
+            avctx->pix_fmt = avctx->get_format(avctx,pixfmt_xvmc_mpg2_420);
+        }else{
+            avctx->pix_fmt = avctx->get_format(avctx,pixfmt_yuv_420);
+        }
+	
         if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT )
             if( avctx->idct_algo == FF_IDCT_AUTO )
                 avctx->idct_algo = FF_IDCT_SIMPLE;
@@ -2371,6 +2440,10 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
     if (get_bits1(&s->gb)) {
         for(i=0;i<64;i++) {
             v = get_bits(&s->gb, 8);
+            if(v==0){
+                av_log(s->avctx, AV_LOG_ERROR, "intra matrix damaged\n");
+                return -1;
+            }
             j = s->intra_scantable.permutated[i];
             s->intra_matrix[j] = v;
             s->chroma_intra_matrix[j] = v;
@@ -2392,6 +2465,10 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
     if (get_bits1(&s->gb)) {
         for(i=0;i<64;i++) {
             v = get_bits(&s->gb, 8);
+            if(v==0){
+                av_log(s->avctx, AV_LOG_ERROR, "inter matrix damaged\n");
+                return -1;
+            }
             j = s->intra_scantable.permutated[i];
             s->inter_matrix[j] = v;
             s->chroma_inter_matrix[j] = v;
@@ -2410,12 +2487,18 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
             s->chroma_inter_matrix[j] = v;
         }
     }
+    
+    if(show_bits(&s->gb, 23) != 0){
+        av_log(s->avctx, AV_LOG_ERROR, "sequence header damaged\n");
+        return -1;
+    }
 
     /* we set mpeg2 parameters so that it emulates mpeg1 */
     s->progressive_sequence = 1;
     s->progressive_frame = 1;
     s->picture_structure = PICT_FRAME;
     s->frame_pred_frame_dct = 1;
+    s->chroma_format = 1;
     s->codec_id= s->avctx->codec_id= CODEC_ID_MPEG1VIDEO;
     avctx->sub_id = 1; /* indicates mpeg1 */
     if(s->flags & CODEC_FLAG_LOW_DELAY) s->low_delay=1;
@@ -2443,8 +2526,12 @@ static int vcr2_init_sequence(AVCodecContext *avctx)
     avctx->has_b_frames= 0; //true?
     s->low_delay= 1;
 
-    //get_format() or set_video(width,height,aspect,pix_fmt);
-    //until then pix_fmt may be changed right after codec init
+    if(avctx->xvmc_acceleration){
+        avctx->pix_fmt = avctx->get_format(avctx,pixfmt_xvmc_mpg2_420);
+    }else{
+        avctx->pix_fmt = avctx->get_format(avctx,pixfmt_yuv_420);
+    }
+
     if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT )
         if( avctx->idct_algo == FF_IDCT_AUTO )
             avctx->idct_algo = FF_IDCT_SIMPLE;
@@ -2470,6 +2557,7 @@ static int vcr2_init_sequence(AVCodecContext *avctx)
     s->progressive_frame = 1;
     s->picture_structure = PICT_FRAME;
     s->frame_pred_frame_dct = 1;
+    s->chroma_format = 1;
     s->codec_id= s->avctx->codec_id= CODEC_ID_MPEG2VIDEO;
     avctx->sub_id = 2; /* indicates mpeg2 */
     return 0;
@@ -2550,7 +2638,8 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
                              uint8_t *buf, int buf_size)
 {
     Mpeg1Context *s = avctx->priv_data;
-    uint8_t *buf_end, *buf_ptr;
+    const uint8_t *buf_end;
+    const uint8_t *buf_ptr;
     int ret, start_code, input_size;
     AVFrame *picture = data;
     MpegEncContext *s2 = &s->mpeg_enc_ctx;
@@ -2591,12 +2680,21 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
 
     if(s->mpeg_enc_ctx_allocated==0 && avctx->codec_tag == ff_get_fourcc("VCR2"))
         vcr2_init_sequence(avctx);
-
+    
+    s->slice_count= 0;
+        
     for(;;) {
         /* find start next code */
         start_code = find_start_code(&buf_ptr, buf_end);
         if (start_code < 0){
             if(s2->pict_type != B_TYPE || avctx->hurry_up==0){
+                if(avctx->thread_count > 1){
+                    int i;
+
+                    avctx->execute(avctx, slice_decode_thread,  (void**)&(s2->thread_context[0]), NULL, s->slice_count);
+                    for(i=0; i<s->slice_count; i++)
+                        s2->error_count += s2->thread_context[i]->error_count;
+                }
                 if (slice_end(avctx, picture)) {
                     if(s2->last_picture_ptr || s2->low_delay) //FIXME merge with the stuff in mpeg_decode_slice
                         *data_size = sizeof(AVPicture);
@@ -2615,13 +2713,13 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
                 switch(start_code) {
                 case SEQ_START_CODE:
                     mpeg1_decode_sequence(avctx, buf_ptr, 
-                                          input_size);
+					  input_size);
                     break;
                             
                 case PICTURE_START_CODE:
                     /* we have a complete image : we try to decompress it */
                     mpeg1_decode_picture(avctx, 
-                                         buf_ptr, input_size);
+					 buf_ptr, input_size);
                     break;
                 case EXT_START_CODE:
                     mpeg_decode_extension(avctx,
@@ -2637,6 +2735,7 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
                 default:
                     if (start_code >= SLICE_MIN_START_CODE &&
                         start_code <= SLICE_MAX_START_CODE) {
+                        int mb_y= start_code - SLICE_MIN_START_CODE;
                         
                         /* skip b frames if we dont have reference frames */
                         if(s2->last_picture_ptr==NULL && s2->pict_type==B_TYPE) break;
@@ -2646,17 +2745,38 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
                         if(avctx->hurry_up>=5) break;
                         
                         if (!s->mpeg_enc_ctx_allocated) break;
-
-                        ret = mpeg_decode_slice(avctx, picture,
-                                                start_code, &buf_ptr, input_size);
-                        emms_c();
-
-                        if(ret < 0){
-                            if(s2->resync_mb_x>=0 && s2->resync_mb_y>=0)
-                                ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x, s2->mb_y, AC_ERROR|DC_ERROR|MV_ERROR);
-                            if(ret==DECODE_SLICE_FATAL_ERROR) return -1;
+                        
+                        if(s2->first_slice){
+                            s2->first_slice=0;
+                            if(mpeg_field_start(s2) < 0)
+                                return -1;
+                        }
+                        
+                        if(avctx->thread_count > 1){
+                            int threshold= (s2->mb_height*s->slice_count + avctx->thread_count/2) / avctx->thread_count;
+                            if(threshold <= mb_y){
+                                MpegEncContext *thread_context= s2->thread_context[s->slice_count];
+                                
+                                thread_context->start_mb_y= mb_y;
+                                thread_context->end_mb_y  = s2->mb_height;
+                                if(s->slice_count){
+                                    s2->thread_context[s->slice_count-1]->end_mb_y= mb_y;
+                                    ff_update_duplicate_context(thread_context, s2);
+                                }
+                                init_get_bits(&thread_context->gb, buf_ptr, input_size*8);
+                                s->slice_count++;
+                            }
+                            buf_ptr += 2; //FIXME add minimum num of bytes per slice
                         }else{
-                            ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x-1, s2->mb_y, AC_END|DC_END|MV_END);
+                            ret = mpeg_decode_slice(s, mb_y, &buf_ptr, input_size);
+                            emms_c();
+
+                            if(ret < 0){
+                                if(s2->resync_mb_x>=0 && s2->resync_mb_y>=0)
+                                    ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x, s2->mb_y, AC_ERROR|DC_ERROR|MV_ERROR);
+                            }else{
+                                ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x-1, s2->mb_y, AC_END|DC_END|MV_END);
+                            }
                         }
                     }
                     break;
@@ -2713,7 +2833,7 @@ AVCodec mpegvideo_decoder = {
     .flush= ff_mpeg_flush,
 };
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
 
 AVCodec mpeg1video_encoder = {
     "mpeg1video",
@@ -2723,6 +2843,7 @@ AVCodec mpeg1video_encoder = {
     encode_init,
     MPV_encode_picture,
     MPV_encode_end,
+    .supported_framerates= frame_rate_tab+1,
 };
 
 #ifdef CONFIG_RISKY
@@ -2735,6 +2856,7 @@ AVCodec mpeg2video_encoder = {
     encode_init,
     MPV_encode_picture,
     MPV_encode_end,
+    .supported_framerates= frame_rate_tab+1,
 };
 #endif
 #endif
@@ -2766,7 +2888,8 @@ AVCodec mpeg_xvmc_decoder = {
     NULL,
     mpeg_decode_end,
     mpeg_decode_frame,
-    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED| CODEC_CAP_HWACCEL,
+    .flush= ff_mpeg_flush,
 };
 
 #endif
diff --git a/src/libffmpeg/libavcodec/mpeg12data.h b/src/libffmpeg/libavcodec/mpeg12data.h
index 42b3d49a1..4ee460ccc 100644
--- a/src/libffmpeg/libavcodec/mpeg12data.h
+++ b/src/libffmpeg/libavcodec/mpeg12data.h
@@ -217,7 +217,8 @@ static const uint8_t mbAddrIncrTable[36][2] = {
     {0x0, 8}, /* end (and 15 more 0 bits should follow) */
 };
 
-static const uint8_t mbPatTable[63][2] = {
+static const uint8_t mbPatTable[64][2] = {
+    {0x1, 9},
     {0xb, 5},
     {0x9, 5},
     {0xd, 6},
@@ -354,28 +355,24 @@ static const uint8_t mbMotionVectorTable[17][2] = {
 { 0xc, 10 },
 };
 
-#define MPEG1_FRAME_RATE_BASE 1001
-
-static const int frame_rate_tab[16] = {
-        0,        
-    24000,
-    24024,
-    25025,
-    30000,
-    30030,
-    50050,
-    60000,
-    60060,
+static const AVRational frame_rate_tab[] = {
+    {    0,    0},
+    {24000, 1001},
+    {   24,    1},
+    {   25,    1},
+    {30000, 1001},
+    {   30,    1},
+    {   50,    1},
+    {60000, 1001},
+    {   60,    1},
   // Xing's 15fps: (9)
-    15015,
+    {   15,    1},
   // libmpeg3's "Unofficial economy rates": (10-13)
-     5005,
-    10010,
-    12012,
-    15015,
-  // random, just to avoid segfault !never encode these
-    25025,
-    25025,
+    {    5,    1},
+    {   10,    1},
+    {   12,    1},
+    {   15,    1},
+    {    0,    0},
 };
 
 static const uint8_t non_linear_qscale[32] = {
diff --git a/src/libffmpeg/libavcodec/mpegaudiodec.c b/src/libffmpeg/libavcodec/mpegaudiodec.c
index 09e9b8cdb..58caf6510 100644
--- a/src/libffmpeg/libavcodec/mpegaudiodec.c
+++ b/src/libffmpeg/libavcodec/mpegaudiodec.c
@@ -1231,6 +1231,7 @@ static int decode_header(MPADecodeContext *s, uint32_t header)
 int mpa_decode_header(AVCodecContext *avctx, uint32_t head)
 {
     MPADecodeContext s1, *s = &s1;
+    memset( s, 0, sizeof(MPADecodeContext) );
 
     if (check_header(head) != 0)
         return -1;
@@ -1373,6 +1374,10 @@ static int mp_decode_layer2(MPADecodeContext *s)
         bound = sblimit;
 
     dprintf("bound=%d sblimit=%d\n", bound, sblimit);
+
+    /* sanity check */
+    if( bound > sblimit ) bound = sblimit;
+
     /* parse bit allocation */
     j = 0;
     for(i=0;i<bound;i++) {
diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c
index 883c21260..32a92917c 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.c
+++ b/src/libffmpeg/libavcodec/mpegvideo.c
@@ -26,6 +26,7 @@
  */ 
  
 #include <limits.h>
+#include <math.h> //for PI
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
@@ -38,7 +39,15 @@
 //#undef NDEBUG
 //#include <assert.h>
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+
+/* if xine's MPEG encoder is enabled, enable the encoding features in
+ * this particular module */
+#ifdef XINE_MPEG_ENCODER
+#define CONFIG_ENCODERS
+#endif
+
+
+#ifdef CONFIG_ENCODERS
 static void encode_picture(MpegEncContext *s, int picture_number);
 #endif //CONFIG_ENCODERS
 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, 
@@ -54,9 +63,10 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
 static void dct_unquantize_h263_inter_c(MpegEncContext *s, 
                                   DCTELEM *block, int n, int qscale);
 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
+static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
 static int sse_mb(MpegEncContext *s);
 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
 #endif //CONFIG_ENCODERS
@@ -101,7 +111,7 @@ static const uint8_t ff_default_chroma_qscale_table[32]={
     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 };
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
 static uint8_t default_fcode_tab[MAX_MV*2+1];
 
@@ -195,7 +205,7 @@ void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_s
     }
 }
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
     int i;
 
@@ -219,7 +229,7 @@ int DCT_common_init(MpegEncContext *s)
     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
     s->dct_quantize= dct_quantize_c;
     s->denoise_dct= denoise_dct_c;
 #endif
@@ -243,7 +253,7 @@ int DCT_common_init(MpegEncContext *s)
     MPV_common_init_ppc(s);
 #endif
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
     s->fast_dct_quantize= s->dct_quantize;
 
     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
@@ -265,8 +275,6 @@ int DCT_common_init(MpegEncContext *s)
     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 
-    s->picture_structure= PICT_FRAME;
-    
     return 0;
 }
 
@@ -340,18 +348,21 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
         if(s->out_format == FMT_H264){
             for(i=0; i<2; i++){
-                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+1)  * sizeof(int16_t))
-                pic->motion_val[i]= pic->motion_val_base[i]+1;
+                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+2)  * sizeof(int16_t))
+                pic->motion_val[i]= pic->motion_val_base[i]+2;
                 CHECKED_ALLOCZ(pic->ref_index[i] , b8_array_size * sizeof(uint8_t))
             }
             pic->motion_subsample_log2= 2;
         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
             for(i=0; i<2; i++){
-                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+1) * sizeof(int16_t)*2) //FIXME
-                pic->motion_val[i]= pic->motion_val_base[i]+1;
+                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+2) * sizeof(int16_t)*2) //FIXME
+                pic->motion_val[i]= pic->motion_val_base[i]+2;
             }
             pic->motion_subsample_log2= 3;
         }
+        if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
+            CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
+        }
         pic->qstride= s->mb_stride;
         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
     }
@@ -383,6 +394,7 @@ static void free_picture(MpegEncContext *s, Picture *pic){
     av_freep(&pic->mbskip_table);
     av_freep(&pic->qscale_table);
     av_freep(&pic->mb_type_base);
+    av_freep(&pic->dct_coeff);
     av_freep(&pic->pan_scan);
     pic->mb_type= NULL;
     for(i=0; i<2; i++){
@@ -399,6 +411,105 @@ static void free_picture(MpegEncContext *s, Picture *pic){
     }
 }
 
+static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
+    int i;
+
+    // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264) 
+    CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
+    s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
+
+     //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
+    CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*2*16*2*sizeof(uint8_t)) 
+    s->rd_scratchpad=   s->me.scratchpad;
+    s->b_scratchpad=    s->me.scratchpad;
+    s->obmc_scratchpad= s->me.scratchpad + 16;
+    if (s->encoding) {
+        CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
+        CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
+        if(s->avctx->noise_reduction){
+            CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
+        }
+    }   
+    CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
+    s->block= s->blocks[0];
+
+    for(i=0;i<12;i++){
+        s->pblocks[i] = (short *)(&s->block[i]);
+    }
+    return 0;
+fail:
+    return -1; //free() through MPV_common_end()
+}
+
+static void free_duplicate_context(MpegEncContext *s){
+    if(s==NULL) return;
+
+    av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
+    av_freep(&s->me.scratchpad);
+    s->rd_scratchpad=   
+    s->b_scratchpad=    
+    s->obmc_scratchpad= NULL;
+    
+    av_freep(&s->dct_error_sum);
+    av_freep(&s->me.map);
+    av_freep(&s->me.score_map);
+    av_freep(&s->blocks);
+    s->block= NULL;
+}
+
+static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
+#define COPY(a) bak->a= src->a
+    COPY(allocated_edge_emu_buffer);
+    COPY(edge_emu_buffer);
+    COPY(me.scratchpad);
+    COPY(rd_scratchpad);
+    COPY(b_scratchpad);
+    COPY(obmc_scratchpad);
+    COPY(me.map);
+    COPY(me.score_map);
+    COPY(blocks);
+    COPY(block);
+    COPY(start_mb_y);
+    COPY(end_mb_y);
+    COPY(me.map_generation);
+    COPY(pb);
+    COPY(dct_error_sum);
+    COPY(dct_count[0]);
+    COPY(dct_count[1]);
+#undef COPY
+}
+
+void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
+    MpegEncContext bak;
+    int i;
+    //FIXME copy only needed parts
+//START_TIMER
+    backup_duplicate_context(&bak, dst);
+    memcpy(dst, src, sizeof(MpegEncContext));
+    backup_duplicate_context(dst, &bak);
+    for(i=0;i<12;i++){
+        dst->pblocks[i] = (short *)(&dst->block[i]);
+    }
+//STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
+}
+
+static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
+#define COPY(a) dst->a= src->a
+    COPY(pict_type);
+    COPY(current_picture);
+    COPY(f_code);
+    COPY(b_code);
+    COPY(qscale);
+    COPY(lambda);
+    COPY(lambda2);
+    COPY(picture_in_gop_number);
+    COPY(gop_picture_number);
+    COPY(frame_pred_frame_dct); //FIXME dont set in encode_header
+    COPY(progressive_frame); //FIXME dont set in encode_header
+    COPY(partitioned_frame); //FIXME dont set in encode_header
+#undef COPY
+}
+
 /* init common structure for both encoder and decoder */
 int MPV_common_init(MpegEncContext *s)
 {
@@ -434,9 +545,18 @@ int MPV_common_init(MpegEncContext *s)
     s->y_dc_scale_table=
     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
     s->chroma_qscale_table= ff_default_chroma_qscale_table;
-    if (!s->encoding)
-        s->progressive_sequence= 1;
-    s->progressive_frame= 1;
+    if( s->codec_id != CODEC_ID_MPEG1VIDEO && 
+        s->codec_id != CODEC_ID_MPEG2VIDEO) 
+    {
+        /* default structure is frame */
+        s->progressive_frame= 1;
+        s->picture_structure= PICT_FRAME;
+
+        s->y_dc_scale_table=
+        s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+        if (!s->encoding)
+            s->progressive_sequence= 1;
+    }
     s->coded_picture_number = 0;
 
     y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
@@ -454,9 +574,6 @@ int MPV_common_init(MpegEncContext *s)
                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16) 
                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 
-    CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
-    s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
-
     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 
     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
@@ -482,17 +599,6 @@ int MPV_common_init(MpegEncContext *s)
         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 
-        //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
-        CHECKED_ALLOCZ(s->me.scratchpad,  s->width*2*16*3*sizeof(uint8_t)) 
-        
-        CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
-        CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
-
-        if(s->codec_id==CODEC_ID_MPEG4){
-            CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
-            CHECKED_ALLOCZ(   s->pb2_buffer, PB_BUFFER_SIZE);
-        }
-        
         if(s->msmpeg4_version){
             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
         }
@@ -511,12 +617,9 @@ int MPV_common_init(MpegEncContext *s)
         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
         
         if(s->avctx->noise_reduction){
-            CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
         }
     }
-    CHECKED_ALLOCZ(s->blocks, 64*6*2 * sizeof(DCTELEM))
-        
     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 
     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
@@ -576,12 +679,6 @@ int MPV_common_init(MpegEncContext *s)
     //Note the +1 is for a quicker mpeg4 slice_end detection
     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
     
-    s->block= s->blocks[0];
-
-    for(i=0;i<12;i++){
-        s->pblocks[i] = (short *)(&s->block[i]);
-    }
-
     s->parse_context.state= -1;
     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
@@ -590,20 +687,38 @@ int MPV_common_init(MpegEncContext *s)
     }
 
     s->context_initialized = 1;
+
+    s->thread_context[0]= s;
+    for(i=1; i<s->avctx->thread_count; i++){
+        s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
+        memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
+    }
+
+    for(i=0; i<s->avctx->thread_count; i++){
+        if(init_duplicate_context(s->thread_context[i], s) < 0)
+           goto fail;
+        s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
+        s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
+    }
+
     return 0;
  fail:
     MPV_common_end(s);
     return -1;
 }
 
-
-//extern int sads;
-
 /* init common structure for both encoder and decoder */
 void MPV_common_end(MpegEncContext *s)
 {
     int i, j, k;
 
+    for(i=0; i<s->avctx->thread_count; i++){
+        free_duplicate_context(s->thread_context[i]);
+    }
+    for(i=1; i<s->avctx->thread_count; i++){
+        av_freep(&s->thread_context[i]);
+    }
+
     av_freep(&s->parse_context.buffer);
     s->parse_context.buffer_size=0;
 
@@ -639,16 +754,10 @@ void MPV_common_end(MpegEncContext *s)
     av_freep(&s->mbintra_table);
     av_freep(&s->cbp_table);
     av_freep(&s->pred_dir_table);
-    av_freep(&s->me.scratchpad);
-    av_freep(&s->me.map);
-    av_freep(&s->me.score_map);
     
     av_freep(&s->mbskip_table);
     av_freep(&s->prev_pict_types);
     av_freep(&s->bitstream_buffer);
-    av_freep(&s->tex_pb_buffer);
-    av_freep(&s->pb2_buffer);
-    av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
     av_freep(&s->avctx->stats_out);
     av_freep(&s->ac_stats);
     av_freep(&s->error_status_table);
@@ -658,10 +767,8 @@ void MPV_common_end(MpegEncContext *s)
     av_freep(&s->q_inter_matrix);
     av_freep(&s->q_intra_matrix16);
     av_freep(&s->q_inter_matrix16);
-    av_freep(&s->blocks);
     av_freep(&s->input_picture);
     av_freep(&s->reordered_input_picture);
-    av_freep(&s->dct_error_sum);
     av_freep(&s->dct_offset);
 
     if(s->picture){
@@ -680,7 +787,7 @@ void MPV_common_end(MpegEncContext *s)
             av_free(s->visualization_buffer[i]);
 }
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
 
 /* init video encoder */
 int MPV_encode_init(AVCodecContext *avctx)
@@ -746,7 +853,7 @@ int MPV_encode_init(AVCodecContext *avctx)
     }    
         
     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4 
-       && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
+       && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
         return -1;
     }
@@ -795,6 +902,21 @@ int MPV_encode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
         return -1;
     }
+    
+    if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4 
+       && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO 
+       && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
+        av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
+        return -1;
+    }
+    
+    if(s->avctx->thread_count > MAX_THREADS || 16*s->avctx->thread_count > s->height){
+        av_log(avctx, AV_LOG_ERROR, "too many threads\n");
+        return -1;
+    }
+    
+    if(s->avctx->thread_count > 1)
+        s->rtp_mode= 1;
 
     i= ff_gcd(avctx->frame_rate, avctx->frame_rate_base);
     if(i > 1){
@@ -831,7 +953,8 @@ int MPV_encode_init(AVCodecContext *avctx)
         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
         break;
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
+/* xine: this is never used in either decode or MPEG-1 encode mode */
+#if 0
     case CODEC_ID_MPEG2VIDEO:
         s->out_format = FMT_MPEG1;
         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
@@ -953,7 +1076,7 @@ int MPV_encode_init(AVCodecContext *avctx)
         s->low_delay=1;
         break;
 #endif
-#endif
+#endif /* #if 0 */
     default:
         return -1;
     }
@@ -990,14 +1113,18 @@ int MPV_encode_init(AVCodecContext *avctx)
         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
     s->progressive_frame= 
     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
+    s->quant_precision=5;
     
     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
     
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
     ff_init_me(s);
-#endif
+#endif /* #if 0 */
 
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
+#ifdef CONFIG_ENCODERS
 #ifdef CONFIG_RISKY
     if (s->out_format == FMT_H263)
         h263_encode_init(s);
@@ -1005,6 +1132,8 @@ int MPV_encode_init(AVCodecContext *avctx)
         ff_msmpeg4_encode_init(s);
 #endif
 #endif
+#endif /* #if 0 */
+/* xine: we do want this for MPEG-1 encoding */
     if (s->out_format == FMT_MPEG1)
         ff_mpeg1_encode_init(s);
 
@@ -1062,11 +1191,12 @@ int MPV_encode_end(AVCodecContext *avctx)
 
     ff_rate_control_uninit(s);
 
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
     MPV_common_end(s);
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
     if (s->out_format == FMT_MJPEG)
         mjpeg_close(s);
-#endif
+#endif /* #if 0 */
 
     av_freep(&avctx->extradata);
       
@@ -1650,7 +1780,7 @@ v= (int)(128 + r*sin(theta*3.141592/180));
     }
 }
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
 
 static int get_sae(uint8_t *src, int ref, int stride){
     int x,y;
@@ -1753,8 +1883,18 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
     copy_picture_attributes(pic, pic_arg);
     
     pic->display_picture_number= s->input_picture_number++;
+    if(pic->pts != AV_NOPTS_VALUE){ 
+        s->user_specified_pts= pic->pts;
+    }else{
+        if(s->user_specified_pts){
+            pic->pts= s->user_specified_pts + AV_TIME_BASE*(int64_t)s->avctx->frame_rate_base / s->avctx->frame_rate;
+            av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%Ld)\n", pic->pts);
+        }else{
+            pic->pts= av_rescale(pic->display_picture_number*(int64_t)s->avctx->frame_rate_base, AV_TIME_BASE, s->avctx->frame_rate);
+        }
+    }
   }
-
+  
     /* shift buffer entries */
     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
         s->input_picture[i-1]= s->input_picture[i];
@@ -1907,7 +2047,15 @@ int MPV_encode_picture(AVCodecContext *avctx,
         return -1;
     }
     
-    init_put_bits(&s->pb, buf, buf_size);
+    for(i=0; i<avctx->thread_count; i++){
+        int start_y= s->thread_context[i]->start_mb_y;
+        int   end_y= s->thread_context[i]->  end_mb_y;
+        int h= s->mb_height;
+        uint8_t *start= buf + buf_size*start_y/h;
+        uint8_t *end  = buf + buf_size*  end_y/h;
+
+        init_put_bits(&s->thread_context[i]->pb, start, end - start);
+    }
 
     s->picture_in_gop_number++;
 
@@ -1936,10 +2084,11 @@ int MPV_encode_picture(AVCodecContext *avctx,
 
         MPV_frame_end(s);
 
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
         if (s->out_format == FMT_MJPEG)
             mjpeg_picture_trailer(s);
-#endif
+#endif /* #if 0 */
         
         if(s->flags&CODEC_FLAG_PASS1)
             ff_write_pass1_stats(s);
@@ -1949,7 +2098,7 @@ int MPV_encode_picture(AVCodecContext *avctx,
         }
 
         flush_put_bits(&s->pb);
-        s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
+        s->frame_bits  = put_bits_count(&s->pb);
 
         stuffing_count= ff_vbv_update(s, s->frame_bits);
         if(stuffing_count){
@@ -1972,11 +2121,11 @@ int MPV_encode_picture(AVCodecContext *avctx,
                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
             }
             flush_put_bits(&s->pb);
-            s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
+            s->frame_bits  = put_bits_count(&s->pb);
         }
 
         /* update mpeg1/2 vbv_delay for CBR */    
-        if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate){
+        if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1){
             int vbv_delay;
 
             assert(s->repeat_first_field==0);
@@ -2005,8 +2154,7 @@ int MPV_encode_picture(AVCodecContext *avctx,
 
 static inline void gmc1_motion(MpegEncContext *s,
                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
-                               int dest_offset,
-                               uint8_t **ref_picture, int src_offset)
+                               uint8_t **ref_picture)
 {
     uint8_t *ptr;
     int offset, src_x, src_y, linesize, uvlinesize;
@@ -2029,9 +2177,8 @@ static inline void gmc1_motion(MpegEncContext *s,
     linesize = s->linesize;
     uvlinesize = s->uvlinesize;
     
-    ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
+    ptr = ref_picture[0] + (src_y * linesize) + src_x;
 
-    dest_y+=dest_offset;
     if(s->flags&CODEC_FLAG_EMU_EDGE){
         if(   (unsigned)src_x >= s->h_edge_pos - 17
            || (unsigned)src_y >= s->v_edge_pos - 17){
@@ -2069,7 +2216,7 @@ static inline void gmc1_motion(MpegEncContext *s,
     if (src_y == s->height>>1)
         motion_y =0;
 
-    offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
+    offset = (src_y * uvlinesize) + src_x;
     ptr = ref_picture[1] + offset;
     if(s->flags&CODEC_FLAG_EMU_EDGE){
         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
@@ -2079,22 +2226,21 @@ static inline void gmc1_motion(MpegEncContext *s,
             emu=1;
         }
     }
-    s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
+    s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
     
     ptr = ref_picture[2] + offset;
     if(emu){
         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
         ptr= s->edge_emu_buffer;
     }
-    s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
+    s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
     
     return;
 }
 
 static inline void gmc_motion(MpegEncContext *s,
                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
-                               int dest_offset,
-                               uint8_t **ref_picture, int src_offset)
+                               uint8_t **ref_picture)
 {
     uint8_t *ptr;
     int linesize, uvlinesize;
@@ -2104,10 +2250,8 @@ static inline void gmc_motion(MpegEncContext *s,
     linesize = s->linesize;
     uvlinesize = s->uvlinesize;
 
-    ptr = ref_picture[0] + src_offset;
+    ptr = ref_picture[0];
 
-    dest_y+=dest_offset;
-    
     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
 
@@ -2128,14 +2272,10 @@ static inline void gmc_motion(MpegEncContext *s,
 
     if(s->flags&CODEC_FLAG_GRAY) return;
 
-
-    dest_cb+=dest_offset>>1;
-    dest_cr+=dest_offset>>1;
-    
     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
 
-    ptr = ref_picture[1] + (src_offset>>1);
+    ptr = ref_picture[1];
     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
            ox, 
            oy, 
@@ -2144,7 +2284,7 @@ static inline void gmc_motion(MpegEncContext *s,
            a+1, (1<<(2*a+1)) - s->no_rounding,
            s->h_edge_pos>>1, s->v_edge_pos>>1);
     
-    ptr = ref_picture[2] + (src_offset>>1);
+    ptr = ref_picture[2];
     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
            ox, 
            oy, 
@@ -2226,7 +2366,8 @@ void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w,
 }
 
 static inline int hpel_motion(MpegEncContext *s, 
-                                  uint8_t *dest, uint8_t *src, 
+                                  uint8_t *dest, uint8_t *src,
+                                  int field_based, int field_select,
                                   int src_x, int src_y,
                                   int width, int height, int stride,
                                   int h_edge_pos, int v_edge_pos,
@@ -2252,12 +2393,14 @@ static inline int hpel_motion(MpegEncContext *s,
     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
-            ff_emulated_edge_mc(s->edge_emu_buffer, src, stride, w+1, h+1,
-                             src_x, src_y, h_edge_pos, v_edge_pos);
+            ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
+                             src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
             src= s->edge_emu_buffer;
             emu=1;
         }
     }
+    if(field_select)
+        src += s->linesize;
     pix_op[dxy](dest, src, stride, h);
     return emu;
 }
@@ -2265,14 +2408,13 @@ static inline int hpel_motion(MpegEncContext *s,
 /* apply one mpeg motion vector to the three components */
 static inline void mpeg_motion(MpegEncContext *s,
                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
-                               int dest_offset,
-                               uint8_t **ref_picture, int src_offset,
-                               int field_based, op_pixels_func (*pix_op)[4],
+                               int field_based, int bottom_field, int field_select,
+                               uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
                                int motion_x, int motion_y, int h)
 {
-    uint8_t *ptr;
-    int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, uvlinesize;
-    int emu=0;
+    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
+    int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
+    
 #if 0    
 if(s->quarter_sample)
 {
@@ -2281,61 +2423,64 @@ if(s->quarter_sample)
 }
 #endif
 
-    height = s->height >> field_based;
     v_edge_pos = s->v_edge_pos >> field_based;
+    linesize   = s->current_picture.linesize[0] << field_based;
     uvlinesize = s->current_picture.linesize[1] << field_based;
 
-    emu= hpel_motion(s, 
-                dest_y + dest_offset, ref_picture[0] + src_offset,
-                s->mb_x * 16, s->mb_y * (16 >> field_based),
-                s->width, height, s->current_picture.linesize[0] << field_based,
-                s->h_edge_pos, v_edge_pos,
-                16, h, pix_op[0],
-                motion_x, motion_y);
-
-
-    if(s->flags&CODEC_FLAG_GRAY) return;
+    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
+    src_x = s->mb_x* 16               + (motion_x >> 1);
+    src_y = s->mb_y*(16>>field_based) + (motion_y >> 1);
 
     if (s->out_format == FMT_H263) {
-        dxy = 0;
-        if ((motion_x & 3) != 0)
-            dxy |= 1;
-        if ((motion_y & 3) != 0)
-            dxy |= 2;
-        mx = motion_x >> 2;
-        my = motion_y >> 2;
+        uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
+        uvsrc_x = src_x>>1;
+        uvsrc_y = src_y>>1;
     } else {
         mx = motion_x / 2;
         my = motion_y / 2;
-        dxy = ((my & 1) << 1) | (mx & 1);
-        mx >>= 1;
-        my >>= 1;
+        uvdxy = ((my & 1) << 1) | (mx & 1);
+        uvsrc_x = s->mb_x* 8               + (mx >> 1);
+        uvsrc_y = s->mb_y*(8>>field_based) + (my >> 1);
     }
-    
-    src_x = s->mb_x * 8 + mx;
-    src_y = s->mb_y * (8 >> field_based) + my;
-    src_x = clip(src_x, -8, s->width >> 1);
-    if (src_x == (s->width >> 1))
-        dxy &= ~1;
-    src_y = clip(src_y, -8, height >> 1);
-    if (src_y == (height >> 1))
-        dxy &= ~2;
-    offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
-    ptr = ref_picture[1] + offset;
-    if(emu){
-        ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
-                         src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
-        ptr= s->edge_emu_buffer + (src_offset >> 1);
+
+    ptr_y  = ref_picture[0] + src_y * linesize + src_x;
+    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
+    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
+
+    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
+       || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
+                             src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
+            ptr_y = s->edge_emu_buffer;
+            if(!(s->flags&CODEC_FLAG_GRAY)){
+                uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
+                ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based, 
+                                 uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
+                ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based, 
+                                 uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
+                ptr_cb= uvbuf;
+                ptr_cr= uvbuf+16;
+            }
     }
-    pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
 
-    ptr = ref_picture[2] + offset;
-    if(emu){
-        ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
-                         src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
-        ptr= s->edge_emu_buffer + (src_offset >> 1);
+    if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
+        dest_y += s->linesize;
+        dest_cb+= s->uvlinesize;
+        dest_cr+= s->uvlinesize;
+    }
+
+    if(field_select){
+        ptr_y += s->linesize;
+        ptr_cb+= s->uvlinesize;
+        ptr_cr+= s->uvlinesize;
+    }
+
+    pix_op[0][dxy](dest_y, ptr_y, linesize, h);
+    
+    if(!(s->flags&CODEC_FLAG_GRAY)){
+        pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
+        pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
     }
-    pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
 }
 //FIXME move to dsputil, avg variant, 16x16 version
 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
@@ -2406,8 +2551,8 @@ static inline void obmc_motion(MpegEncContext *s,
         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
             ptr[i]= ptr[MID];
         }else{
-            ptr[i]= s->edge_emu_buffer + 16 + 8*(i&1) + s->linesize*8*(i>>1);
-            hpel_motion(s, ptr[i], src,
+            ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
+            hpel_motion(s, ptr[i], src, 0, 0,
                         src_x, src_y,
                         s->width, s->height, s->linesize,
                         s->h_edge_pos, s->v_edge_pos,
@@ -2421,54 +2566,22 @@ static inline void obmc_motion(MpegEncContext *s,
 
 static inline void qpel_motion(MpegEncContext *s,
                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
-                               int dest_offset,
-                               uint8_t **ref_picture, int src_offset,
-                               int field_based, op_pixels_func (*pix_op)[4],
+                               int field_based, int bottom_field, int field_select,
+                               uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
                                qpel_mc_func (*qpix_op)[16],
                                int motion_x, int motion_y, int h)
 {
-    uint8_t *ptr;
-    int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
-    int emu=0;
+    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
+    int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
 
     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
-    src_x = s->mb_x * 16 + (motion_x >> 2);
+    src_x = s->mb_x *  16                 + (motion_x >> 2);
     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
 
-    height = s->height >> field_based;
     v_edge_pos = s->v_edge_pos >> field_based;
-    src_x = clip(src_x, -16, s->width);
-    if (src_x == s->width)
-        dxy &= ~3;
-    src_y = clip(src_y, -16, height);
-    if (src_y == height)
-        dxy &= ~12;
     linesize = s->linesize << field_based;
     uvlinesize = s->uvlinesize << field_based;
-    ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
-    dest_y += dest_offset;
-//printf("%d %d %d\n", src_x, src_y, dxy);
     
-    if(s->flags&CODEC_FLAG_EMU_EDGE){
-        if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16 
-           || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
-            ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based, 
-                             src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
-            ptr= s->edge_emu_buffer + src_offset;
-            emu=1;
-        }
-    }
-    if(!field_based)
-        qpix_op[0][dxy](dest_y, ptr, linesize);
-    else{
-        //damn interlaced mode
-        //FIXME boundary mirroring is not exactly correct here
-        qpix_op[1][dxy](dest_y  , ptr  , linesize);
-        qpix_op[1][dxy](dest_y+8, ptr+8, linesize);
-    }
-
-    if(s->flags&CODEC_FLAG_GRAY) return;
-
     if(field_based){
         mx= motion_x/2;
         my= motion_y>>1;
@@ -2486,35 +2599,56 @@ static inline void qpel_motion(MpegEncContext *s,
     mx= (mx>>1)|(mx&1);
     my= (my>>1)|(my&1);
 
-    dxy= (mx&1) | ((my&1)<<1);
+    uvdxy= (mx&1) | ((my&1)<<1);
     mx>>=1;
     my>>=1;
 
-    src_x = s->mb_x * 8 + mx;
-    src_y = s->mb_y * (8 >> field_based) + my;
-    src_x = clip(src_x, -8, s->width >> 1);
-    if (src_x == (s->width >> 1))
-        dxy &= ~1;
-    src_y = clip(src_y, -8, height >> 1);
-    if (src_y == (height >> 1))
-        dxy &= ~2;
+    uvsrc_x = s->mb_x *  8                 + mx;
+    uvsrc_y = s->mb_y * (8 >> field_based) + my;
+
+    ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
+    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
+    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
+
+    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16 
+       || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based, 
+                         src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
+        ptr_y= s->edge_emu_buffer;
+        if(!(s->flags&CODEC_FLAG_GRAY)){
+            uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
+            ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based, 
+                             uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
+            ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based, 
+                             uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
+            ptr_cb= uvbuf;
+            ptr_cr= uvbuf + 16;
+        }
+    }
 
-    offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
-    ptr = ref_picture[1] + offset;
-    if(emu){
-        ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
-                         src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
-        ptr= s->edge_emu_buffer + (src_offset >> 1);
+    if(!field_based)
+        qpix_op[0][dxy](dest_y, ptr_y, linesize);
+    else{
+        if(bottom_field){
+            dest_y += s->linesize;
+            dest_cb+= s->uvlinesize;
+            dest_cr+= s->uvlinesize;
+        }
+
+        if(field_select){
+            ptr_y  += s->linesize;
+            ptr_cb += s->uvlinesize;
+            ptr_cr += s->uvlinesize;
+        }
+        //damn interlaced mode
+        //FIXME boundary mirroring is not exactly correct here
+        qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
+        qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
     }
-    pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
-    
-    ptr = ref_picture[2] + offset;
-    if(emu){
-        ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
-                         src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
-        ptr= s->edge_emu_buffer + (src_offset >> 1);
+    if(!(s->flags&CODEC_FLAG_GRAY)){
+        pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
+        pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
     }
-    pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
 }
 
 inline int ff_h263_round_chroma(int x){
@@ -2665,16 +2799,16 @@ static inline void MPV_motion(MpegEncContext *s,
 #ifdef CONFIG_RISKY
         if(s->mcsel){
             if(s->real_sprite_warping_points==1){
-                gmc1_motion(s, dest_y, dest_cb, dest_cr, 0,
-                            ref_picture, 0);
+                gmc1_motion(s, dest_y, dest_cb, dest_cr,
+                            ref_picture);
             }else{
-                gmc_motion(s, dest_y, dest_cb, dest_cr, 0,
-                            ref_picture, 0);
+                gmc_motion(s, dest_y, dest_cb, dest_cr,
+                            ref_picture);
             }
         }else if(s->quarter_sample){
-            qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
-                        ref_picture, 0,
-                        0, pix_op, qpix_op,
+            qpel_motion(s, dest_y, dest_cb, dest_cr, 
+                        0, 0, 0,
+                        ref_picture, pix_op, qpix_op,
                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
         }else if(s->mspel){
             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
@@ -2683,9 +2817,9 @@ static inline void MPV_motion(MpegEncContext *s,
         }else
 #endif
         {
-            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
-                        ref_picture, 0,
-                        0, pix_op,
+            mpeg_motion(s, dest_y, dest_cb, dest_cr, 
+                        0, 0, 0,
+                        ref_picture, pix_op,
                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
         }           
         break;
@@ -2726,7 +2860,7 @@ static inline void MPV_motion(MpegEncContext *s,
         }else{
             for(i=0;i<4;i++) {
                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
-                            ref_picture[0],
+                            ref_picture[0], 0, 0,
                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
                             s->width, s->height, s->linesize,
                             s->h_edge_pos, s->v_edge_pos,
@@ -2744,140 +2878,83 @@ static inline void MPV_motion(MpegEncContext *s,
     case MV_TYPE_FIELD:
         if (s->picture_structure == PICT_FRAME) {
             if(s->quarter_sample){
-                /* top field */
-                qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
-                            ref_picture, s->field_select[dir][0] ? s->linesize : 0,
-                            1, pix_op, qpix_op,
-                            s->mv[dir][0][0], s->mv[dir][0][1], 8);
-                /* bottom field */
-                qpel_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
-                            ref_picture, s->field_select[dir][1] ? s->linesize : 0,
-                            1, pix_op, qpix_op,
-                            s->mv[dir][1][0], s->mv[dir][1][1], 8);
+                for(i=0; i<2; i++){
+                    qpel_motion(s, dest_y, dest_cb, dest_cr,
+                                1, i, s->field_select[dir][i],
+                                ref_picture, pix_op, qpix_op,
+                                s->mv[dir][i][0], s->mv[dir][i][1], 8);
+                }
             }else{
                 /* top field */       
-                mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
-                            ref_picture, s->field_select[dir][0] ? s->linesize : 0,
-                            1, pix_op,
+                mpeg_motion(s, dest_y, dest_cb, dest_cr,
+                            1, 0, s->field_select[dir][0],
+                            ref_picture, pix_op,
                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
                 /* bottom field */
-                mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
-                            ref_picture, s->field_select[dir][1] ? s->linesize : 0,
-                            1, pix_op,
+                mpeg_motion(s, dest_y, dest_cb, dest_cr,
+                            1, 1, s->field_select[dir][1],
+                            ref_picture, pix_op,
                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
             }
         } else {
-            int offset;
-            if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
-                offset= s->field_select[dir][0] ? s->linesize : 0;
-            }else{
-                ref_picture= s->current_picture.data;
-                offset= s->field_select[dir][0] ? s->linesize : -s->linesize; 
+            if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
+                ref_picture= s->current_picture_ptr->data;
             } 
 
-            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
-                        ref_picture, offset,
-                        0, pix_op,
+            mpeg_motion(s, dest_y, dest_cb, dest_cr,
+                        0, 0, s->field_select[dir][0],
+                        ref_picture, pix_op,
                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
         }
         break;
-    case MV_TYPE_16X8:{
-        int offset;
-         uint8_t ** ref2picture;
+    case MV_TYPE_16X8:
+        for(i=0; i<2; i++){
+            uint8_t ** ref2picture;
 
-            if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
+            if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
                 ref2picture= ref_picture;
-                offset= s->field_select[dir][0] ? s->linesize : 0;
             }else{
-                ref2picture= s->current_picture.data;
-                offset= s->field_select[dir][0] ? s->linesize : -s->linesize; 
+                ref2picture= s->current_picture_ptr->data;
             } 
 
-            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
-                        ref2picture, offset,
-                        0, pix_op,
-                        s->mv[dir][0][0], s->mv[dir][0][1], 8);
-
-
-            if(s->picture_structure == s->field_select[dir][1] + 1 || s->pict_type == B_TYPE || s->first_field){
-                ref2picture= ref_picture;
-                offset= s->field_select[dir][1] ? s->linesize : 0;
-            }else{
-                ref2picture= s->current_picture.data;
-                offset= s->field_select[dir][1] ? s->linesize : -s->linesize; 
-            } 
-            // I know it is ugly but this is the only way to fool emu_edge without rewrite mpeg_motion
-            mpeg_motion(s, dest_y+16*s->linesize, dest_cb+8*s->uvlinesize, dest_cr+8*s->uvlinesize,
-                        0,
-                        ref2picture, offset,
-                        0, pix_op,
-                        s->mv[dir][1][0], s->mv[dir][1][1]+16, 8);
-        }
-        
+            mpeg_motion(s, dest_y, dest_cb, dest_cr, 
+                        0, 0, s->field_select[dir][i],
+                        ref2picture, pix_op,
+                        s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
+                
+            dest_y += 16*s->linesize;
+            dest_cb+=  8*s->uvlinesize;
+            dest_cr+=  8*s->uvlinesize;
+        }        
         break;
     case MV_TYPE_DMV:
-    {
-    op_pixels_func (*dmv_pix_op)[4];
-    int offset;
-
-        dmv_pix_op = s->dsp.put_pixels_tab;
-
         if(s->picture_structure == PICT_FRAME){
-            //put top field from top field
-            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
-                        ref_picture, 0,
-                        1, dmv_pix_op,
-                        s->mv[dir][0][0], s->mv[dir][0][1], 8);
-            //put bottom field from bottom field
-            mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
-                        ref_picture, s->linesize,
-                        1, dmv_pix_op,
-                        s->mv[dir][0][0], s->mv[dir][0][1], 8);
-
-            dmv_pix_op = s->dsp.avg_pixels_tab; 
-        
-            //avg top field from bottom field
-            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
-                        ref_picture, s->linesize,
-                        1, dmv_pix_op,
-                        s->mv[dir][2][0], s->mv[dir][2][1], 8);
-            //avg bottom field from top field
-            mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
-                        ref_picture, 0,
-                        1, dmv_pix_op,
-                        s->mv[dir][3][0], s->mv[dir][3][1], 8);
-
+            for(i=0; i<2; i++){
+                int j;
+                for(j=0; j<2; j++){
+                    mpeg_motion(s, dest_y, dest_cb, dest_cr,
+                                1, j, j^i,
+                                ref_picture, pix_op,
+                                s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
+                }
+                pix_op = s->dsp.avg_pixels_tab; 
+            }
         }else{
-            offset=(s->picture_structure == PICT_BOTTOM_FIELD)? 
-                         s->linesize : 0;
-
-            //put field from the same parity
-            //same parity is never in the same frame
-            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
-                        ref_picture,offset,
-                        0,dmv_pix_op,
-                        s->mv[dir][0][0],s->mv[dir][0][1],16);
-
-            // after put we make avg of the same block
-            dmv_pix_op=s->dsp.avg_pixels_tab; 
-
-            //opposite parity is always in the same frame if this is second field
-            if(!s->first_field){
-                ref_picture = s->current_picture.data;    
-                //top field is one linesize from frame beginig
-                offset=(s->picture_structure == PICT_BOTTOM_FIELD)? 
-                        -s->linesize : s->linesize;
-            }else 
-                offset=(s->picture_structure == PICT_BOTTOM_FIELD)? 
-                        0 : s->linesize;
-
-            //avg field from the opposite parity
-            mpeg_motion(s, dest_y, dest_cb, dest_cr,0,
-                        ref_picture, offset,
-                        0,dmv_pix_op,
-                        s->mv[dir][2][0],s->mv[dir][2][1],16);
+            for(i=0; i<2; i++){
+                mpeg_motion(s, dest_y, dest_cb, dest_cr, 
+                            0, 0, s->picture_structure != i+1,
+                            ref_picture, pix_op,
+                            s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
+
+                // after put we make avg of the same block
+                pix_op=s->dsp.avg_pixels_tab; 
+
+                //opposite parity is always in the same frame if this is second field
+                if(!s->first_field){
+                    ref_picture = s->current_picture_ptr->data;    
+                }
+            }
         }
-    }
     break;
     default: assert(0);
     }
@@ -2968,6 +3045,15 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
     mb_x = s->mb_x;
     mb_y = s->mb_y;
 
+    if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
+       /* save DCT coefficients */
+       int i,j;
+       DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
+       for(i=0; i<6; i++)
+           for(j=0; j<64; j++)
+               *dct++ = block[i][s->dsp.idct_permutation[j]];
+    }
+
     s->current_picture.qscale_table[mb_xy]= s->qscale;
 
     /* update DC predictors for P macroblocks */
@@ -3032,9 +3118,9 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
             dest_cb= s->dest[1];
             dest_cr= s->dest[2];
         }else{
-            dest_y = s->edge_emu_buffer+32; //FIXME cleanup scratchpad pointers
-            dest_cb= s->edge_emu_buffer+48;
-            dest_cr= s->edge_emu_buffer+56;
+            dest_y = s->b_scratchpad;
+            dest_cb= s->b_scratchpad+16*linesize;
+            dest_cr= s->b_scratchpad+16*linesize+8;
         }
         if (!s->mb_intra) {
             /* motion handling */
@@ -3121,7 +3207,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
     }
 }
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
 
 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
 {
@@ -3270,15 +3356,43 @@ void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
     }    
 }
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
+
+static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
+    int x, y;
+//FIXME optimize
+    for(y=0; y<8; y++){
+        for(x=0; x<8; x++){
+            int x2, y2;
+            int sum=0;
+            int sqr=0;
+            int count=0;
+
+            for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
+                for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
+                    int v= ptr[x2 + y2*stride];
+                    sum += v;
+                    sqr += v*v;
+                    count++;
+                }
+            }
+            weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
+        }
+    }
+}
 
 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
 {
+    int16_t weight[6][64];
+    DCTELEM orig[6][64];
     const int mb_x= s->mb_x;
     const int mb_y= s->mb_y;
     int i;
     int skip_dct[6];
     int dct_offset   = s->linesize*8; //default for progressive frames
+    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
+    int wrap_y, wrap_c;
+    int emu=0;
     
     for(i=0; i<6; i++) skip_dct[i]=0;
     
@@ -3292,43 +3406,52 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
         if(!(s->flags&CODEC_FLAG_QP_RD)){
             s->dquant= s->qscale - last_qp;
 
-            if(s->out_format==FMT_H263)
+            if(s->out_format==FMT_H263){
                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
             
-            if(s->codec_id==CODEC_ID_MPEG4){        
-                if(!s->mb_intra){
-                    if((s->mv_dir&MV_DIRECT) || s->mv_type==MV_TYPE_8X8)
-                        s->dquant=0;
+                if(s->codec_id==CODEC_ID_MPEG4){        
+                    if(!s->mb_intra){
+                        if(s->pict_type == B_TYPE){
+                            if(s->dquant&1) 
+                                s->dquant= (s->dquant/2)*2;
+                            if(s->mv_dir&MV_DIRECT)
+                                s->dquant= 0;
+                        }
+                        if(s->mv_type==MV_TYPE_8X8)
+                            s->dquant=0;
+                    }
                 }
             }
         }
         ff_set_qscale(s, last_qp + s->dquant);
     }
 
-    if (s->mb_intra) {
-        uint8_t *ptr;
-        int wrap_y;
-        int emu=0;
+    wrap_y = s->linesize;
+    wrap_c = s->uvlinesize;
+    ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
+    ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
+    ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
 
-        wrap_y = s->linesize;
-        ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
+    if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
+        ff_emulated_edge_mc(s->edge_emu_buffer            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
+        ptr_y= s->edge_emu_buffer;
+        ff_emulated_edge_mc(s->edge_emu_buffer+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+        ptr_cb= s->edge_emu_buffer+18*wrap_y;
+        ff_emulated_edge_mc(s->edge_emu_buffer+18*wrap_y+9, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+        ptr_cr= s->edge_emu_buffer+18*wrap_y+9;
+    }
 
-        if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
-            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
-            ptr= s->edge_emu_buffer;
-            emu=1;
-        }
-        
+    if (s->mb_intra) {
         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
             int progressive_score, interlaced_score;
 
             s->interlaced_dct=0;
-            progressive_score= s->dsp.ildct_cmp[4](s, ptr           , NULL, wrap_y, 8) 
-                              +s->dsp.ildct_cmp[4](s, ptr + wrap_y*8, NULL, wrap_y, 8) - 400;
+            progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8) 
+                              +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
 
             if(progressive_score > 0){
-                interlaced_score = s->dsp.ildct_cmp[4](s, ptr           , NULL, wrap_y*2, 8) 
-                                  +s->dsp.ildct_cmp[4](s, ptr + wrap_y  , NULL, wrap_y*2, 8);
+                interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8) 
+                                  +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
                 if(progressive_score > interlaced_score){
                     s->interlaced_dct=1;
             
@@ -3338,46 +3461,26 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
             }
         }
         
-	s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
-        s->dsp.get_pixels(s->block[1], ptr              + 8, wrap_y);
-        s->dsp.get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
-        s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
+	s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
+        s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
+        s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
+        s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
 
         if(s->flags&CODEC_FLAG_GRAY){
             skip_dct[4]= 1;
             skip_dct[5]= 1;
         }else{
-            int wrap_c = s->uvlinesize;
-            ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
-            if(emu){
-                ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
-                ptr= s->edge_emu_buffer;
-            }
-	    s->dsp.get_pixels(s->block[4], ptr, wrap_c);
-
-            ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
-            if(emu){
-                ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
-                ptr= s->edge_emu_buffer;
-            }
-            s->dsp.get_pixels(s->block[5], ptr, wrap_c);
+	    s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
+            s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
         }
     }else{
         op_pixels_func (*op_pix)[4];
         qpel_mc_func (*op_qpix)[16];
         uint8_t *dest_y, *dest_cb, *dest_cr;
-        uint8_t *ptr_y, *ptr_cb, *ptr_cr;
-        int wrap_y, wrap_c;
-        int emu=0;
 
         dest_y  = s->dest[0];
         dest_cb = s->dest[1];
         dest_cr = s->dest[2];
-        wrap_y = s->linesize;
-        wrap_c = s->uvlinesize;
-        ptr_y  = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
-        ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
-        ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
 
         if ((!s->no_rounding) || s->pict_type==B_TYPE){
 	    op_pix = s->dsp.put_pixels_tab;
@@ -3396,12 +3499,6 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
         }
 
-        if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
-            ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
-            ptr_y= s->edge_emu_buffer;
-            emu=1;
-        }
-        
         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
             int progressive_score, interlaced_score;
 
@@ -3433,15 +3530,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
             skip_dct[4]= 1;
             skip_dct[5]= 1;
         }else{
-            if(emu){
-                ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
-                ptr_cb= s->edge_emu_buffer;
-            }
             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
-            if(emu){
-                ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
-                ptr_cr= s->edge_emu_buffer;
-            }
             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
         }
         /* pre quantization */         
@@ -3453,33 +3542,22 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
-#if 0
-{
- static int stat[7];
- int num=0;
- for(i=0; i<6; i++)
-  if(skip_dct[i]) num++;
- stat[num]++;
- 
- if(s->mb_x==0 && s->mb_y==0){
-  for(i=0; i<7; i++){
-   printf("%6d %1d\n", stat[i], i);
-  }
- }
-}
-#endif
         }
+    }
 
+    if(s->avctx->quantizer_noise_shaping){
+        if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
+        if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
+        if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
+        if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
+        if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
+        if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
+        memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
     }
             
     /* DCT & quantize */
-    if(s->out_format==FMT_MJPEG){
-        for(i=0;i<6;i++) {
-            int overflow;
-            s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, 8, &overflow);
-            if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
-        }
-    }else{
+    assert(s->out_format!=FMT_MJPEG || s->qscale==8);
+    {
         for(i=0;i<6;i++) {
             if(!skip_dct[i]){
                 int overflow;
@@ -3491,6 +3569,13 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
             }else
                 s->block_last_index[i]= -1;
         }
+        if(s->avctx->quantizer_noise_shaping){
+            for(i=0;i<6;i++) {
+                if(!skip_dct[i]){
+                    s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
+                }
+            }
+        }
         
         if(s->luma_elim_threshold && !s->mb_intra)
             for(i=0; i<4; i++)
@@ -3532,7 +3617,8 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
     case CODEC_ID_MPEG1VIDEO:
     case CODEC_ID_MPEG2VIDEO:
         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
 #ifdef CONFIG_RISKY
     case CODEC_ID_MPEG4:
         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
@@ -3550,7 +3636,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
 #endif
     case CODEC_ID_MJPEG:
         mjpeg_encode_mb(s, s->block); break;
-#endif
+#endif /* #if 0 */
     default:
         assert(0);
     }
@@ -3636,19 +3722,32 @@ void ff_mpeg_flush(AVCodecContext *avctx){
     s->parse_context.overread_index= 0;
     s->parse_context.index= 0;
     s->parse_context.last_index= 0;
+    s->bitstream_buffer_size=0;
 }
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
 {
-    int bytes= length>>4;
+    const uint16_t *srcw= (uint16_t*)src;
+    int words= length>>4;
     int bits= length&15;
     int i;
 
     if(length==0) return;
-
-    for(i=0; i<bytes; i++) put_bits(pb, 16, be2me_16(((uint16_t*)src)[i]));
-    put_bits(pb, bits, be2me_16(((uint16_t*)src)[i])>>(16-bits));
+    
+    if(words < 16){
+        for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
+    }else if(put_bits_count(pb)&7){
+        for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
+    }else{
+        for(i=0; put_bits_count(pb)&31; i++)
+            put_bits(pb, 8, src[i]);
+        flush_put_bits(pb);
+        memcpy(pbBufPtr(pb), src+i, 2*words-i);
+        skip_put_bytes(pb, 2*words-i);
+    }
+        
+    put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
 }
 
 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
@@ -3732,19 +3831,18 @@ static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegE
     
     if(*next_block){
         memcpy(dest_backup, s->dest, sizeof(s->dest));
-        s->dest[0] = s->me.scratchpad;
-        s->dest[1] = s->me.scratchpad + 16;
-        s->dest[2] = s->me.scratchpad + 16 + 8;
-        assert(2*s->uvlinesize == s->linesize); //should be no prob for encoding
-        assert(s->linesize >= 64); //FIXME
+        s->dest[0] = s->rd_scratchpad;
+        s->dest[1] = s->rd_scratchpad + 16*s->linesize;
+        s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
+        assert(s->linesize >= 32); //FIXME
     }
 
     encode_mb(s, motion_x, motion_y);
     
-    score= get_bit_count(&s->pb);
+    score= put_bits_count(&s->pb);
     if(s->data_partitioning){
-        score+= get_bit_count(&s->pb2);
-        score+= get_bit_count(&s->tex_pb);
+        score+= put_bits_count(&s->pb2);
+        score+= put_bits_count(&s->tex_pb);
     }
    
     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
@@ -3804,270 +3902,108 @@ static int sse_mb(MpegEncContext *s){
                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
 }
 
-static void encode_picture(MpegEncContext *s, int picture_number)
-{
-    int mb_x, mb_y, pdif = 0;
-    int i, j;
-    int bits;
-    MpegEncContext best_s, backup_s;
-    uint8_t bit_buf[2][3000];
-    uint8_t bit_buf2[2][3000];
-    uint8_t bit_buf_tex[2][3000];
-    PutBitContext pb[2], pb2[2], tex_pb[2];
+static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
+    MpegEncContext *s= arg;
 
-    for(i=0; i<2; i++){
-        init_put_bits(&pb    [i], bit_buf    [i], 3000);
-        init_put_bits(&pb2   [i], bit_buf2   [i], 3000);
-        init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000);
-    }
-
-    s->picture_number = picture_number;
-    
-    /* Reset the average MB variance */
-    s->current_picture.mb_var_sum = 0;
-    s->current_picture.mc_mb_var_sum = 0;
-
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
-#ifdef CONFIG_RISKY
-    /* we need to initialize some time vars before we can encode b-frames */
-    // RAL: Condition added for MPEG1VIDEO
-    if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
-        ff_set_mpeg4_time(s, s->picture_number); 
-#endif
-#endif
-        
-    s->scene_change_score=0;
     
-    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
-    
-    if(s->pict_type==I_TYPE){
-        if(s->msmpeg4_version >= 3) s->no_rounding=1;
-        else                        s->no_rounding=0;
-    }else if(s->pict_type!=B_TYPE){
-        if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
-            s->no_rounding ^= 1;          
-    }
-    
-    /* Estimate motion for every MB */
-    s->mb_intra=0; //for the rate distoration & bit compare functions
-    if(s->pict_type != I_TYPE){
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
-        if(s->pict_type != B_TYPE){
-            if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
-                s->me.pre_pass=1;
-                s->me.dia_size= s->avctx->pre_dia_size;
-
-                for(mb_y=s->mb_height-1; mb_y >=0 ; mb_y--) {
-                    s->mb_y = mb_y;
-                    for(mb_x=s->mb_width-1; mb_x >=0 ; mb_x--) {
-                        s->mb_x = mb_x;
-                        ff_pre_estimate_p_frame_motion(s, mb_x, mb_y);
-                    }
-                }
-                s->me.pre_pass=0;
-            }
-        }
-
-        s->me.dia_size= s->avctx->dia_size;
-        for(mb_y=0; mb_y < s->mb_height; mb_y++) {
-            s->mb_y = mb_y;
-            s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
-            s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
-            s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
-            s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
-            for(mb_x=0; mb_x < s->mb_width; mb_x++) {
-                s->mb_x = mb_x;
-                s->block_index[0]+=2;
-                s->block_index[1]+=2;
-                s->block_index[2]+=2;
-                s->block_index[3]+=2;
-                
-                /* compute motion vector & mb_type and store in context */
-                if(s->pict_type==B_TYPE)
-                    ff_estimate_b_frame_motion(s, mb_x, mb_y);
-                else
-                    ff_estimate_p_frame_motion(s, mb_x, mb_y);
-            }
-        }
-#endif
-    }else /* if(s->pict_type == I_TYPE) */{
-        /* I-Frame */
-        for(i=0; i<s->mb_stride*s->mb_height; i++)
-            s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
-        
-        if(!s->fixed_qscale){
-            /* finding spatial complexity for I-frame rate control */
-            for(mb_y=0; mb_y < s->mb_height; mb_y++) {
-                for(mb_x=0; mb_x < s->mb_width; mb_x++) {
-                    int xx = mb_x * 16;
-                    int yy = mb_y * 16;
-                    uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
-                    int varc;
-		    int sum = s->dsp.pix_sum(pix, s->linesize);
-    
-		    varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
-
-                    s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
-                    s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
-                    s->current_picture.mb_var_sum    += varc;
-                }
-            }
+    s->me.pre_pass=1;
+    s->me.dia_size= s->avctx->pre_dia_size;
+    s->first_slice_line=1;
+    for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
+        for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
+            ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
         }
+        s->first_slice_line=0;
     }
-    emms_c();
-
-    if(s->scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
-        s->pict_type= I_TYPE;
-        for(i=0; i<s->mb_stride*s->mb_height; i++)
-            s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
-//printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
-    }
+    
+    s->me.pre_pass=0;
+    
+    return 0;
+}
 
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
-    if(!s->umvplus){
-        if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
-            s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
+static int estimate_motion_thread(AVCodecContext *c, void *arg){
+    MpegEncContext *s= arg;
 
-            if(s->flags & CODEC_FLAG_INTERLACED_ME){
-                int a,b;
-                a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
-                b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
-                s->f_code= FFMAX(s->f_code, FFMAX(a,b));
-            }
-                    
-            ff_fix_long_p_mvs(s);
-            ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
-            if(s->flags & CODEC_FLAG_INTERLACED_ME){
-                for(i=0; i<2; i++){
-                    for(j=0; j<2; j++)
-                        ff_fix_long_mvs(s, s->p_field_select_table[i], j, 
-                                        s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
-                }
-            }
+    s->me.dia_size= s->avctx->dia_size;
+    s->first_slice_line=1;
+    for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
+        s->mb_x=0; //for block init below
+        ff_init_block_index(s);
+        for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
+            s->block_index[0]+=2;
+            s->block_index[1]+=2;
+            s->block_index[2]+=2;
+            s->block_index[3]+=2;
+            
+            /* compute motion vector & mb_type and store in context */
+            if(s->pict_type==B_TYPE)
+                ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
+            else
+                ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
         }
+        s->first_slice_line=0;
+    }
+    return 0;
+}
 
-        if(s->pict_type==B_TYPE){
-            int a, b;
-
-            a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
-            b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
-            s->f_code = FFMAX(a, b);
+static int mb_var_thread(AVCodecContext *c, void *arg){
+    MpegEncContext *s= arg;
+    int mb_x, mb_y;
 
-            a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
-            b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
-            s->b_code = FFMAX(a, b);
+    for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
+        for(mb_x=0; mb_x < s->mb_width; mb_x++) {
+            int xx = mb_x * 16;
+            int yy = mb_y * 16;
+            uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
+            int varc;
+            int sum = s->dsp.pix_sum(pix, s->linesize);
+    
+            varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
 
-            ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
-            ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
-            ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
-            ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
-            if(s->flags & CODEC_FLAG_INTERLACED_ME){
-                int dir;
-                for(dir=0; dir<2; dir++){
-                    for(i=0; i<2; i++){
-                        for(j=0; j<2; j++){
-                            int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I) 
-                                          : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
-                            ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j, 
-                                            s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
-                        }
-                    }
-                }
-            }
+            s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
+            s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
+            s->mb_var_sum_temp    += varc;
         }
     }
-#endif
-
-    if (!s->fixed_qscale) 
-        s->current_picture.quality = ff_rate_estimate_qscale(s);
+    return 0;
+}
 
-    if(s->adaptive_quant){
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
-#ifdef CONFIG_RISKY
-        switch(s->codec_id){
-        case CODEC_ID_MPEG4:
-            ff_clean_mpeg4_qscales(s);
-            break;
-        case CODEC_ID_H263:
-        case CODEC_ID_H263P:
-        case CODEC_ID_FLV1:
-            ff_clean_h263_qscales(s);
-            break;
+static void write_slice_end(MpegEncContext *s){
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
+    if(s->codec_id==CODEC_ID_MPEG4){
+        if(s->partitioned_frame){
+            ff_mpeg4_merge_partitions(s);
         }
-#endif
-#endif
-
-        s->lambda= s->lambda_table[0];
-        //FIXME broken
-    }else
-        s->lambda= s->current_picture.quality;
-//printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
-    update_qscale(s);
     
-    if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE)) 
-        s->qscale= 3; //reduce cliping problems
-        
-    if (s->out_format == FMT_MJPEG) {
-        /* for mjpeg, we do include qscale in the matrix */
-        s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
-        for(i=1;i<64;i++){
-            int j= s->dsp.idct_permutation[i];
-
-            s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
-        }
-        convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
-                       s->intra_matrix, s->intra_quant_bias, 8, 8);
+        ff_mpeg4_stuffing(&s->pb);
+    }else if(s->out_format == FMT_MJPEG){
+        ff_mjpeg_stuffing(&s->pb);
     }
-    
-    //FIXME var duplication
-    s->current_picture.key_frame= s->pict_type == I_TYPE;
-    s->current_picture.pict_type= s->pict_type;
+#endif /* #if 0 */
 
-    if(s->current_picture.key_frame)
-        s->picture_in_gop_number=0;
+    align_put_bits(&s->pb);
+    flush_put_bits(&s->pb);
+}
 
-    s->last_bits= get_bit_count(&s->pb);
-    switch(s->out_format) {
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
-    case FMT_MJPEG:
-        mjpeg_picture_header(s);
-        break;
-#ifdef CONFIG_RISKY
-    case FMT_H263:
-        if (s->codec_id == CODEC_ID_WMV2) 
-            ff_wmv2_encode_picture_header(s, picture_number);
-        else if (s->h263_msmpeg4) 
-            msmpeg4_encode_picture_header(s, picture_number);
-        else if (s->h263_pred)
-            mpeg4_encode_picture_header(s, picture_number);
-        else if (s->codec_id == CODEC_ID_RV10) 
-            rv10_encode_picture_header(s, picture_number);
-        else if (s->codec_id == CODEC_ID_FLV1)
-            ff_flv_encode_picture_header(s, picture_number);
-        else
-            h263_encode_picture_header(s, picture_number);
-        break;
-#else
-    case FMT_H263:
-        break;
-#endif
-#else
-    case FMT_MJPEG:
-        break;
-    case FMT_H263:
-        break;
-#endif
-    case FMT_MPEG1:
-        mpeg1_encode_picture_header(s, picture_number);
-        break;
-    case FMT_H264:
-        break;
-    default:
-        assert(0);
+static int encode_thread(AVCodecContext *c, void *arg){
+    MpegEncContext *s= arg;
+    int mb_x, mb_y, pdif = 0;
+    int i, j;
+    MpegEncContext best_s, backup_s;
+    uint8_t bit_buf[2][3000];
+    uint8_t bit_buf2[2][3000];
+    uint8_t bit_buf_tex[2][3000];
+    PutBitContext pb[2], pb2[2], tex_pb[2];
+//printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
+
+    for(i=0; i<2; i++){
+        init_put_bits(&pb    [i], bit_buf    [i], 3000);
+        init_put_bits(&pb2   [i], bit_buf2   [i], 3000);
+        init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000);
     }
-    bits= get_bit_count(&s->pb);
-    s->header_bits= bits - s->last_bits;
-    s->last_bits= bits;
+
+    s->last_bits= put_bits_count(&s->pb);
     s->mv_bits=0;
     s->misc_bits=0;
     s->i_tex_bits=0;
@@ -4089,7 +4025,6 @@ static void encode_picture(MpegEncContext *s, int picture_number)
      
     s->last_mv_dir = 0;
 
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
 #ifdef CONFIG_RISKY
     switch(s->codec_id){
     case CODEC_ID_H263:
@@ -4097,19 +4032,22 @@ static void encode_picture(MpegEncContext *s, int picture_number)
     case CODEC_ID_FLV1:
         s->gob_index = ff_h263_get_gob_height(s);
         break;
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
     case CODEC_ID_MPEG4:
         if(s->partitioned_frame)
             ff_mpeg4_init_partitions(s);
         break;
+#endif /* #if 0 */
     }
 #endif
-#endif
 
     s->resync_mb_x=0;
-    s->resync_mb_y=0;
+    s->resync_mb_y=0; 
     s->first_slice_line = 1;
     s->ptr_lastgob = s->pb.buf;
-    for(mb_y=0; mb_y < s->mb_height; mb_y++) {
+    for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
+//    printf("row %d at %X\n", s->mb_y, (int)s);
         s->mb_x=0;
         s->mb_y= mb_y;
 
@@ -4127,15 +4065,18 @@ static void encode_picture(MpegEncContext *s, int picture_number)
             ff_update_block_index(s);
 
             /* write gob / video packet header  */
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
 #ifdef CONFIG_RISKY
             if(s->rtp_mode){
                 int current_packet_size, is_gob_start;
                 
-                current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
+                current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
                 
                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0; 
                 
+                if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
+                
                 switch(s->codec_id){
                 case CODEC_ID_H263:
                 case CODEC_ID_H263P:
@@ -4148,24 +4089,24 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                     if(s->mb_skip_run) is_gob_start=0;
                     break;
                 }
-                
+
                 if(is_gob_start){
-                    if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
-                        ff_mpeg4_merge_partitions(s);
-                        ff_mpeg4_init_partitions(s);
+                    if(s->start_mb_y != mb_y || mb_x!=0){
+                        write_slice_end(s);
+
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
+                        if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
+                            ff_mpeg4_init_partitions(s);
+                        }
+#endif /* #if 0 */
                     }
                 
-                    if(s->codec_id==CODEC_ID_MPEG4) 
-                        ff_mpeg4_stuffing(&s->pb);
-
-                    align_put_bits(&s->pb);
-                    flush_put_bits(&s->pb);
-
-                    assert((get_bit_count(&s->pb)&7) == 0);
+                    assert((put_bits_count(&s->pb)&7) == 0);
                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
                     
                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
-                        int r= get_bit_count(&s->pb)/8 + s->picture_number + s->codec_id + s->mb_x + s->mb_y;
+                        int r= put_bits_count(&s->pb)/8 + s->picture_number + s->codec_id + s->mb_x + s->mb_y;
                         int d= 100 / s->avctx->error_rate;
                         if(r % d == 0){
                             current_packet_size=0;
@@ -4196,7 +4137,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                     }
 
                     if(s->flags&CODEC_FLAG_PASS1){
-                        int bits= get_bit_count(&s->pb);
+                        int bits= put_bits_count(&s->pb);
                         s->misc_bits+= bits - s->last_bits;
                         s->last_bits= bits;
                     }
@@ -4208,7 +4149,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                 }
             }
 #endif
-#endif
+#endif /* #if 0 */
+
 
             if(  (s->resync_mb_x   == s->mb_x)
                && s->resync_mb_y+1 == s->mb_y){
@@ -4307,11 +4249,12 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                     
                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
                     s->mb_intra= 0;
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
 #ifdef CONFIG_RISKY
                     ff_mpeg4_set_direct_mv(s, mx, my);
 #endif
-#endif
+#endif /* #if 0 */
                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, 
                                  &dmin, &next_block, mx, my);
                 }
@@ -4424,33 +4367,33 @@ static void encode_picture(MpegEncContext *s, int picture_number)
 
                 copy_context_after_encode(s, &best_s, -1);
                 
-                pb_bits_count= get_bit_count(&s->pb);
+                pb_bits_count= put_bits_count(&s->pb);
                 flush_put_bits(&s->pb);
                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
                 s->pb= backup_s.pb;
                 
                 if(s->data_partitioning){
-                    pb2_bits_count= get_bit_count(&s->pb2);
+                    pb2_bits_count= put_bits_count(&s->pb2);
                     flush_put_bits(&s->pb2);
                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
                     s->pb2= backup_s.pb2;
                     
-                    tex_pb_bits_count= get_bit_count(&s->tex_pb);
+                    tex_pb_bits_count= put_bits_count(&s->tex_pb);
                     flush_put_bits(&s->tex_pb);
                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
                     s->tex_pb= backup_s.tex_pb;
                 }
-                s->last_bits= get_bit_count(&s->pb);
+                s->last_bits= put_bits_count(&s->pb);
                
 #ifdef CONFIG_RISKY
                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
                     ff_h263_update_motion_val(s);
 #endif
         
-                if(next_block==0){
-                    s->dsp.put_pixels_tab[0][0](s->dest[0], s->me.scratchpad     , s->linesize  ,16);
-                    s->dsp.put_pixels_tab[1][0](s->dest[1], s->me.scratchpad + 16, s->uvlinesize, 8);
-                    s->dsp.put_pixels_tab[1][0](s->dest[2], s->me.scratchpad + 24, s->uvlinesize, 8);
+                if(next_block==0){ //FIXME 16 vs linesize16
+                    s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
+                    s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
+                    s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
                 }
 
                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
@@ -4499,11 +4442,12 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                     s->mb_intra= 0;
                     motion_x=s->b_direct_mv_table[xy][0];
                     motion_y=s->b_direct_mv_table[xy][1];
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
 #ifdef CONFIG_RISKY
                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
 #endif
-#endif
+#endif /* #if 0 */
                     break;
                 case CANDIDATE_MB_TYPE_BIDIR:
                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
@@ -4606,31 +4550,296 @@ static void encode_picture(MpegEncContext *s, int picture_number)
             }
             if(s->loop_filter)
                 ff_h263_loop_filter(s);
-//printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, get_bit_count(&s->pb));
+//printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
         }
     }
-    emms_c();
 
-#if defined(CONFIG_ENCODERS) || !defined(XINE_MPEG_ENCODER)
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
 #ifdef CONFIG_RISKY
-    if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
-        ff_mpeg4_merge_partitions(s);
-
+    //not beautifull here but we must write it before flushing so it has to be here
     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
         msmpeg4_encode_ext_header(s);
-
-    if(s->codec_id==CODEC_ID_MPEG4) 
-        ff_mpeg4_stuffing(&s->pb);
-#endif
 #endif
+#endif /* #if 0 */
+
+    write_slice_end(s);
 
     /* Send the last GOB if RTP */    
     if (s->avctx->rtp_callback) {
-        flush_put_bits(&s->pb);
         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
         /* Call the RTP callback to send the last GOB */
+        emms_c();
         s->avctx->rtp_callback(s->ptr_lastgob, pdif, 0);
     }
+
+    return 0;
+}
+
+#define MERGE(field) dst->field += src->field; src->field=0
+static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
+    MERGE(scene_change_score);
+    MERGE(mc_mb_var_sum_temp);
+    MERGE(mb_var_sum_temp);
+}
+
+static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
+    int i;
+
+    MERGE(dct_count[0]); //note, the other dct vars are not part of the context
+    MERGE(dct_count[1]);
+    MERGE(mv_bits);
+    MERGE(header_bits);
+    MERGE(i_tex_bits);
+    MERGE(p_tex_bits);
+    MERGE(i_count);
+    MERGE(f_count);
+    MERGE(b_count);
+    MERGE(skip_count);
+    MERGE(misc_bits);
+    MERGE(error_count);
+    MERGE(padding_bug_score);
+
+    if(dst->avctx->noise_reduction){
+        for(i=0; i<64; i++){
+            MERGE(dct_error_sum[0][i]);
+            MERGE(dct_error_sum[1][i]);
+        }
+    }
+    
+    assert(put_bits_count(&src->pb) % 8 ==0);
+    assert(put_bits_count(&dst->pb) % 8 ==0);
+    ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
+    flush_put_bits(&dst->pb);
+}
+
+static void encode_picture(MpegEncContext *s, int picture_number)
+{
+    int mb_x, mb_y;
+    int i, j;
+    int bits;
+
+    s->picture_number = picture_number;
+    
+    /* Reset the average MB variance */
+    s->mb_var_sum_temp    =
+    s->mc_mb_var_sum_temp = 0;
+
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
+#ifdef CONFIG_RISKY
+    /* we need to initialize some time vars before we can encode b-frames */
+    // RAL: Condition added for MPEG1VIDEO
+    if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
+        ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
+#endif
+#endif /* #if 0 */
+        
+    s->scene_change_score=0;
+    
+    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
+    
+    if(s->pict_type==I_TYPE){
+        if(s->msmpeg4_version >= 3) s->no_rounding=1;
+        else                        s->no_rounding=0;
+    }else if(s->pict_type!=B_TYPE){
+        if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
+            s->no_rounding ^= 1;          
+    }
+    
+    s->mb_intra=0; //for the rate distoration & bit compare functions
+    for(i=1; i<s->avctx->thread_count; i++){
+        ff_update_duplicate_context(s->thread_context[i], s);
+    }
+    
+    /* Estimate motion for every MB */
+    if(s->pict_type != I_TYPE){
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
+        if(s->pict_type != B_TYPE){
+            if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
+                s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
+            }
+        }
+
+        s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
+#endif /* #if 0 */
+    }else /* if(s->pict_type == I_TYPE) */{
+        /* I-Frame */
+        for(i=0; i<s->mb_stride*s->mb_height; i++)
+            s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
+        
+        if(!s->fixed_qscale){
+            /* finding spatial complexity for I-frame rate control */
+            s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
+        }
+    }
+    for(i=1; i<s->avctx->thread_count; i++){
+        merge_context_after_me(s, s->thread_context[i]);
+    }
+    s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->mc_mb_var_sum_temp;
+    s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->   mb_var_sum_temp;
+    emms_c();
+
+    if(s->scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
+        s->pict_type= I_TYPE;
+        for(i=0; i<s->mb_stride*s->mb_height; i++)
+            s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
+//printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
+    }
+
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
+    if(!s->umvplus){
+        if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
+            s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
+
+            if(s->flags & CODEC_FLAG_INTERLACED_ME){
+                int a,b;
+                a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
+                b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
+                s->f_code= FFMAX(s->f_code, FFMAX(a,b));
+            }
+                    
+            ff_fix_long_p_mvs(s);
+            ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
+            if(s->flags & CODEC_FLAG_INTERLACED_ME){
+                for(i=0; i<2; i++){
+                    for(j=0; j<2; j++)
+                        ff_fix_long_mvs(s, s->p_field_select_table[i], j, 
+                                        s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
+                }
+            }
+        }
+
+        if(s->pict_type==B_TYPE){
+            int a, b;
+
+            a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
+            b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
+            s->f_code = FFMAX(a, b);
+
+            a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
+            b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
+            s->b_code = FFMAX(a, b);
+
+            ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
+            ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
+            ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
+            ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
+            if(s->flags & CODEC_FLAG_INTERLACED_ME){
+                int dir;
+                for(dir=0; dir<2; dir++){
+                    for(i=0; i<2; i++){
+                        for(j=0; j<2; j++){
+                            int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I) 
+                                          : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
+                            ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j, 
+                                            s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
+                        }
+                    }
+                }
+            }
+        }
+    }
+#endif /* #if 0 */
+
+    if (!s->fixed_qscale) 
+        s->current_picture.quality = ff_rate_estimate_qscale(s); //FIXME pic_ptr
+
+    if(s->adaptive_quant){
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
+#ifdef CONFIG_RISKY
+        switch(s->codec_id){
+        case CODEC_ID_MPEG4:
+            ff_clean_mpeg4_qscales(s);
+            break;
+        case CODEC_ID_H263:
+        case CODEC_ID_H263P:
+        case CODEC_ID_FLV1:
+            ff_clean_h263_qscales(s);
+            break;
+        }
+#endif
+#endif /* #if 0 */
+
+        s->lambda= s->lambda_table[0];
+        //FIXME broken
+    }else
+        s->lambda= s->current_picture.quality;
+//printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
+    update_qscale(s);
+    
+    if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE)) 
+        s->qscale= 3; //reduce cliping problems
+        
+    if (s->out_format == FMT_MJPEG) {
+        /* for mjpeg, we do include qscale in the matrix */
+        s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
+        for(i=1;i<64;i++){
+            int j= s->dsp.idct_permutation[i];
+
+            s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
+        }
+        convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
+                       s->intra_matrix, s->intra_quant_bias, 8, 8);
+        s->qscale= 8;
+    }
+    
+    //FIXME var duplication
+    s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
+    s->current_picture.pict_type= s->pict_type;
+
+    if(s->current_picture.key_frame)
+        s->picture_in_gop_number=0;
+
+    s->last_bits= put_bits_count(&s->pb);
+    switch(s->out_format) {
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
+    case FMT_MJPEG:
+        mjpeg_picture_header(s);
+        break;
+#ifdef CONFIG_RISKY
+    case FMT_H263:
+        if (s->codec_id == CODEC_ID_WMV2) 
+            ff_wmv2_encode_picture_header(s, picture_number);
+        else if (s->h263_msmpeg4) 
+            msmpeg4_encode_picture_header(s, picture_number);
+        else if (s->h263_pred)
+            mpeg4_encode_picture_header(s, picture_number);
+        else if (s->codec_id == CODEC_ID_RV10) 
+            rv10_encode_picture_header(s, picture_number);
+        else if (s->codec_id == CODEC_ID_FLV1)
+            ff_flv_encode_picture_header(s, picture_number);
+        else
+            h263_encode_picture_header(s, picture_number);
+        break;
+#endif
+#endif /* #if 0 */
+    case FMT_MPEG1:
+        mpeg1_encode_picture_header(s, picture_number);
+        break;
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
+    case FMT_H264:
+        break;
+#endif /* #if 0 */
+    default:
+        assert(0);
+    }
+    bits= put_bits_count(&s->pb);
+    s->header_bits= bits - s->last_bits;
+        
+    for(i=1; i<s->avctx->thread_count; i++){
+        update_duplicate_context_after_me(s->thread_context[i], s);
+    }
+    s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
+    for(i=1; i<s->avctx->thread_count; i++){
+        merge_context_after_encode(s, s->thread_context[i]);
+    }
+    emms_c();
 }
 
 #endif //CONFIG_ENCODERS
@@ -4659,7 +4868,7 @@ static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
     }
 }
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
 
 static int dct_quantize_trellis_c(MpegEncContext *s, 
                         DCTELEM *block, int n,
@@ -4949,6 +5158,406 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
     return last_non_zero;
 }
 
+//#define REFINE_STATS 1
+static int16_t basis[64][64];
+
+static void build_basis(uint8_t *perm){
+    int i, j, x, y;
+    emms_c();
+    for(i=0; i<8; i++){
+        for(j=0; j<8; j++){
+            for(y=0; y<8; y++){
+                for(x=0; x<8; x++){
+                    double s= 0.25*(1<<BASIS_SHIFT);
+                    int index= 8*i + j;
+                    int perm_index= perm[index];
+                    if(i==0) s*= sqrt(0.5);
+                    if(j==0) s*= sqrt(0.5);
+                    basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
+                }
+            }
+        }
+    }
+}
+
+static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
+                        DCTELEM *block, int16_t *weight, DCTELEM *orig,
+                        int n, int qscale){
+    int16_t rem[64];
+    DCTELEM d1[64];
+    const int *qmat;
+    const uint8_t *scantable= s->intra_scantable.scantable;
+    const uint8_t *perm_scantable= s->intra_scantable.permutated;
+//    unsigned int threshold1, threshold2;
+//    int bias=0;
+    int run_tab[65];
+    int prev_run=0;
+    int prev_level=0;
+    int qmul, qadd, start_i, last_non_zero, i, dc;
+    const int esc_length= s->ac_esc_length;
+    uint8_t * length;
+    uint8_t * last_length;
+    int lambda;
+    int rle_index, run, q, sum;
+#ifdef REFINE_STATS
+static int count=0;
+static int after_last=0;
+static int to_zero=0;
+static int from_zero=0;
+static int raise=0;
+static int lower=0;
+static int messed_sign=0;
+#endif
+
+    if(basis[0][0] == 0)
+        build_basis(s->dsp.idct_permutation);
+    
+    qmul= qscale*2;
+    qadd= (qscale-1)|1;
+    if (s->mb_intra) {
+        if (!s->h263_aic) {
+            if (n < 4)
+                q = s->y_dc_scale;
+            else
+                q = s->c_dc_scale;
+        } else{
+            /* For AIC we skip quant/dequant of INTRADC */
+            q = 1;
+            qadd=0;
+        }
+        q <<= RECON_SHIFT-3;
+        /* note: block[0] is assumed to be positive */
+        dc= block[0]*q;
+//        block[0] = (block[0] + (q >> 1)) / q;
+        start_i = 1;
+        qmat = s->q_intra_matrix[qscale];
+//        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
+//            bias= 1<<(QMAT_SHIFT-1);
+        length     = s->intra_ac_vlc_length;
+        last_length= s->intra_ac_vlc_last_length;
+    } else {
+        dc= 0;
+        start_i = 0;
+        qmat = s->q_inter_matrix[qscale];
+        length     = s->inter_ac_vlc_length;
+        last_length= s->inter_ac_vlc_last_length;
+    }
+    last_non_zero = s->block_last_index[n];
+
+#ifdef REFINE_STATS
+{START_TIMER
+#endif
+    dc += (1<<(RECON_SHIFT-1));
+    for(i=0; i<64; i++){
+        rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly insteadof copying to rem[]
+    }
+#ifdef REFINE_STATS
+STOP_TIMER("memset rem[]")}
+#endif
+    sum=0;
+    for(i=0; i<64; i++){
+        int one= 36;
+        int qns=4;
+        int w;
+
+        w= ABS(weight[i]) + qns*one;
+        w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
+
+        weight[i] = w;
+//        w=weight[i] = (63*qns + (w/2)) / w;
+         
+        assert(w>0);
+        assert(w<(1<<6));
+        sum += w*w;
+    }
+    lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
+#ifdef REFINE_STATS
+{START_TIMER
+#endif
+    run=0;
+    rle_index=0;
+    for(i=start_i; i<=last_non_zero; i++){
+        int j= perm_scantable[i];
+        const int level= block[j];
+        int coeff;
+        
+        if(level){
+            if(level<0) coeff= qmul*level - qadd;
+            else        coeff= qmul*level + qadd;
+            run_tab[rle_index++]=run;
+            run=0;
+
+            s->dsp.add_8x8basis(rem, basis[j], coeff);
+        }else{
+            run++;
+        }
+    }
+#ifdef REFINE_STATS
+if(last_non_zero>0){
+STOP_TIMER("init rem[]")
+}
+}
+
+{START_TIMER
+#endif
+    for(;;){
+        int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
+        int nochange_score= best_score;
+        int best_coeff=0;
+        int best_change=0;
+        int run2, best_unquant_change, analyze_gradient;
+#ifdef REFINE_STATS
+{START_TIMER
+#endif
+        analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
+
+        if(analyze_gradient){
+#ifdef REFINE_STATS
+{START_TIMER
+#endif
+            for(i=0; i<64; i++){
+                int w= weight[i];
+            
+                d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
+            }
+#ifdef REFINE_STATS
+STOP_TIMER("rem*w*w")}
+{START_TIMER
+#endif
+            s->dsp.fdct(d1);
+#ifdef REFINE_STATS
+STOP_TIMER("dct")}
+#endif
+        }
+
+        if(start_i){
+            const int level= block[0];
+            int change, old_coeff;
+
+            assert(s->mb_intra);
+            
+            old_coeff= q*level;
+            
+            for(change=-1; change<=1; change+=2){
+                int new_level= level + change;
+                int score, new_coeff;
+                
+                new_coeff= q*new_level;
+                if(new_coeff >= 2048 || new_coeff < 0)
+                    continue;
+
+                score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
+                if(score<best_score){
+                    best_score= score;
+                    best_coeff= 0;
+                    best_change= change;
+                    best_unquant_change= new_coeff - old_coeff;
+                }
+            }
+        }
+        
+        run=0;
+        rle_index=0;
+        run2= run_tab[rle_index++];
+        prev_level=0;
+        prev_run=0;
+
+        for(i=start_i; i<64; i++){
+            int j= perm_scantable[i];
+            const int level= block[j];
+            int change, old_coeff;
+
+            if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
+                break;
+
+            if(level){
+                if(level<0) old_coeff= qmul*level - qadd;
+                else        old_coeff= qmul*level + qadd;
+                run2= run_tab[rle_index++]; //FIXME ! maybe after last
+            }else{
+                old_coeff=0;
+                run2--;
+                assert(run2>=0 || i >= last_non_zero );
+            }
+            
+            for(change=-1; change<=1; change+=2){
+                int new_level= level + change;
+                int score, new_coeff, unquant_change;
+                
+                score=0;
+                if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
+                   continue;
+
+                if(new_level){
+                    if(new_level<0) new_coeff= qmul*new_level - qadd;
+                    else            new_coeff= qmul*new_level + qadd;
+                    if(new_coeff >= 2048 || new_coeff <= -2048)
+                        continue;
+                    //FIXME check for overflow
+                    
+                    if(level){
+                        if(level < 63 && level > -63){
+                            if(i < last_non_zero)
+                                score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
+                                         - length[UNI_AC_ENC_INDEX(run, level+64)];
+                            else
+                                score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
+                                         - last_length[UNI_AC_ENC_INDEX(run, level+64)];
+                        }
+                    }else{
+                        assert(ABS(new_level)==1);
+                        
+                        if(analyze_gradient){
+                            int g= d1[ scantable[i] ];
+                            if(g && (g^new_level) >= 0)
+                                continue;
+                        }
+
+                        if(i < last_non_zero){
+                            int next_i= i + run2 + 1;
+                            int next_level= block[ perm_scantable[next_i] ] + 64;
+                            
+                            if(next_level&(~127))
+                                next_level= 0;
+
+                            if(next_i < last_non_zero)
+                                score +=   length[UNI_AC_ENC_INDEX(run, 65)]
+                                         + length[UNI_AC_ENC_INDEX(run2, next_level)]
+                                         - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
+                            else
+                                score +=  length[UNI_AC_ENC_INDEX(run, 65)]
+                                        + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
+                                        - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
+                        }else{
+                            score += last_length[UNI_AC_ENC_INDEX(run, 65)];
+                            if(prev_level){
+                                score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
+                                        - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
+                            }
+                        }
+                    }
+                }else{
+                    new_coeff=0;
+                    assert(ABS(level)==1);
+
+                    if(i < last_non_zero){
+                        int next_i= i + run2 + 1;
+                        int next_level= block[ perm_scantable[next_i] ] + 64;
+                            
+                        if(next_level&(~127))
+                            next_level= 0;
+
+                        if(next_i < last_non_zero)
+                            score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
+                                     - length[UNI_AC_ENC_INDEX(run2, next_level)]
+                                     - length[UNI_AC_ENC_INDEX(run, 65)];
+                        else
+                            score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
+                                     - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
+                                     - length[UNI_AC_ENC_INDEX(run, 65)];
+                    }else{
+                        score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
+                        if(prev_level){
+                            score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
+                                    - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
+                        }
+                    }
+                }
+                
+                score *= lambda;
+
+                unquant_change= new_coeff - old_coeff;
+                assert((score < 100*lambda && score > -100*lambda) || lambda==0);
+                
+                score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
+                if(score<best_score){
+                    best_score= score;
+                    best_coeff= i;
+                    best_change= change;
+                    best_unquant_change= unquant_change;
+                }
+            }
+            if(level){
+                prev_level= level + 64;
+                if(prev_level&(~127))
+                    prev_level= 0;
+                prev_run= run;
+                run=0;
+            }else{
+                run++;
+            }
+        }
+#ifdef REFINE_STATS
+STOP_TIMER("iterative step")}
+#endif
+
+        if(best_change){
+            int j= perm_scantable[ best_coeff ];
+            
+            block[j] += best_change;
+            
+            if(best_coeff > last_non_zero){
+                last_non_zero= best_coeff;
+                assert(block[j]);
+#ifdef REFINE_STATS
+after_last++;
+#endif
+            }else{
+#ifdef REFINE_STATS
+if(block[j]){
+    if(block[j] - best_change){
+        if(ABS(block[j]) > ABS(block[j] - best_change)){
+            raise++;
+        }else{
+            lower++;
+        }
+    }else{
+        from_zero++;
+    }
+}else{
+    to_zero++;
+}
+#endif
+                for(; last_non_zero>=start_i; last_non_zero--){
+                    if(block[perm_scantable[last_non_zero]])
+                        break;
+                }
+            }
+#ifdef REFINE_STATS
+count++;
+if(256*256*256*64 % count == 0){
+    printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
+}
+#endif
+            run=0;
+            rle_index=0;
+            for(i=start_i; i<=last_non_zero; i++){
+                int j= perm_scantable[i];
+                const int level= block[j];
+        
+                 if(level){
+                     run_tab[rle_index++]=run;
+                     run=0;
+                 }else{
+                     run++;
+                 }
+            }
+            
+            s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
+        }else{
+            break;
+        }
+    }
+#ifdef REFINE_STATS
+if(last_non_zero>0){
+STOP_TIMER("iterative search")
+}
+}
+#endif
+
+    return last_non_zero;
+}
+
 static int dct_quantize_c(MpegEncContext *s, 
                         DCTELEM *block, int n,
                         int qscale, int *overflow)
@@ -5290,7 +5899,7 @@ static const AVOption mpeg4_options[] =
     AVOPTION_END()
 };
 
-#if defined(CONFIG_ENCODERS) || defined(XINE_MPEG_ENCODER)
+#ifdef CONFIG_ENCODERS
 #ifdef CONFIG_RISKY
 AVCodec h263_encoder = {
     "h263",
diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h
index 88e2f6d50..171d66d83 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.h
+++ b/src/libffmpeg/libavcodec/mpegvideo.h
@@ -47,6 +47,8 @@ enum OutputFormat {
 #define MAX_FCODE 7
 #define MAX_MV 2048
 
+#define MAX_THREADS 8
+
 #define MAX_PICTURE_COUNT 15
 
 #define ME_MAP_SIZE 64
@@ -273,6 +275,7 @@ typedef struct MpegEncContext {
     int picture_number;       //FIXME remove, unclear definition
     int picture_in_gop_number; ///< 0-> first pic in gop, ... 
     int b_frames_since_non_b;  ///< used for encoding, relative to not yet reordered input 
+    int64_t user_specified_pts;///< last non zero pts from AVFrame which was passed into avcodec_encode_video()
     int mb_width, mb_height;   ///< number of MBs horizontally & vertically 
     int mb_stride;             ///< mb_width+1 used for some arrays to allow simple addressng of left & top MBs withoutt sig11
     int b8_stride;             ///< 2*mb_width+1 used for some 8x8 block arrays to allow simple addressng
@@ -285,6 +288,10 @@ typedef struct MpegEncContext {
     Picture **input_picture;   ///< next pictures on display order for encoding
     Picture **reordered_input_picture; ///< pointer to the next pictures in codedorder for encoding
     
+    int start_mb_y;            ///< start mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y)
+    int end_mb_y;              ///< end   mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y)
+    struct MpegEncContext *thread_context[MAX_THREADS];
+    
     /** 
      * copy of the previous picture structure.
      * note, linesize & data, might not match the previous picture (for field pictures)
@@ -332,7 +339,10 @@ typedef struct MpegEncContext {
     uint8_t *cbp_table;           ///< used to store cbp, ac_pred for partitioned decoding 
     uint8_t *pred_dir_table;      ///< used to store pred_dir for partitioned decoding 
     uint8_t *allocated_edge_emu_buffer;
-    uint8_t *edge_emu_buffer;     ///< points into the middle of allocated_edge_emu_buffer  
+    uint8_t *edge_emu_buffer;     ///< points into the middle of allocated_edge_emu_buffer
+    uint8_t *rd_scratchpad;       ///< scartchpad for rate distortion mb decission
+    uint8_t *obmc_scratchpad;
+    uint8_t *b_scratchpad;        ///< scratchpad used for writing into write only buffers
 
     int qscale;                 ///< QP 
     int chroma_qscale;          ///< chroma QP 
@@ -345,6 +355,9 @@ typedef struct MpegEncContext {
     int last_pict_type;
     int last_non_b_pict_type;   ///< used for mpeg4 gmc b-frames & ratecontrol 
     int frame_rate_index;
+    int frame_rate_ext_n;       ///< MPEG-2 specific framerate modificators (numerator)
+    int frame_rate_ext_d;       ///< MPEG-2 specific framerate modificators (denominator)
+
     /* motion compensation */
     int unrestricted_mv;        ///< mv can point outside of the coded picture 
     int h263_long_vectors;      ///< use horrible h263v1 long vector mode 
@@ -455,7 +468,7 @@ typedef struct MpegEncContext {
     /** identical to the above but for MMX & these are not permutated, second 64 entries are bias*/
     uint16_t (*q_intra_matrix16)[2][64];
     uint16_t (*q_inter_matrix16)[2][64];
-    int block_last_index[6];  ///< last non zero coefficient in block
+    int block_last_index[12];  ///< last non zero coefficient in block
     /* scantables */
     ScanTable __align8 intra_scantable;
     ScanTable intra_h_scantable;
@@ -487,6 +500,10 @@ typedef struct MpegEncContext {
     int misc_bits; ///< cbp, mb_type
     int last_bits; ///< temp var used for calculating the above vars
     
+    /* temp variables for picture complexity calculation */
+    int mc_mb_var_sum_temp;
+    int mb_var_sum_temp;
+
     /* error concealment / resync */
     int error_count;
     uint8_t *error_status_table;       ///< table of the error status of each MB  
@@ -520,6 +537,7 @@ typedef struct MpegEncContext {
     int alt_inter_vlc;              ///< alternative inter vlc
     int modified_quant;
     int loop_filter;    
+    int custom_pcf;
     
     /* mpeg4 specific */
     int time_increment_resolution;
@@ -565,9 +583,6 @@ typedef struct MpegEncContext {
     int intra_dc_threshold;          ///< QP above whch the ac VLC should be used for intra dc 
     PutBitContext tex_pb;            ///< used for data partitioned VOPs 
     PutBitContext pb2;               ///< used for data partitioned VOPs 
-#define PB_BUFFER_SIZE 1024*256
-    uint8_t *tex_pb_buffer;          
-    uint8_t *pb2_buffer;
     int mpeg_quant;
     int t_frame;                       ///< time distance of first I -> B, used for interlaced b frames 
     int padding_bug_score;             ///< used to detect the VERY common padding bug in MPEG4 
@@ -642,6 +657,11 @@ typedef struct MpegEncContext {
     int alternate_scan;
     int repeat_first_field;
     int chroma_420_type;
+    int chroma_format;
+#define CHROMA_420 1
+#define CHROMA_422 2
+#define CHROMA_444 3
+
     int progressive_frame;
     int full_pel[2];
     int interlaced_dct;
@@ -726,6 +746,7 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict);
 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix);
 int ff_find_unused_picture(MpegEncContext *s, int shared);
 void ff_denoise_dct(MpegEncContext *s, DCTELEM *block);
+void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src);
 
 void ff_er_frame_start(MpegEncContext *s);
 void ff_er_frame_end(MpegEncContext *s);
@@ -749,7 +770,7 @@ static inline void ff_update_block_index(MpegEncContext *s){
 }
 
 static inline int get_bits_diff(MpegEncContext *s){
-    const int bits= get_bit_count(&s->pb);
+    const int bits= put_bits_count(&s->pb);
     const int last= s->last_bits;
 
     s->last_bits = bits;
@@ -908,6 +929,7 @@ void mjpeg_encode_mb(MpegEncContext *s,
                      DCTELEM block[6][64]);
 void mjpeg_picture_header(MpegEncContext *s);
 void mjpeg_picture_trailer(MpegEncContext *s);
+void ff_mjpeg_stuffing(PutBitContext * pbc);
 
 
 /* rate control */
diff --git a/src/libffmpeg/libavcodec/msvideo1.c b/src/libffmpeg/libavcodec/msvideo1.c
index 3190efb9e..b88bdab5d 100644
--- a/src/libffmpeg/libavcodec/msvideo1.c
+++ b/src/libffmpeg/libavcodec/msvideo1.c
@@ -39,7 +39,6 @@
 #include "dsputil.h"
 
 #define PALETTE_COUNT 256
-#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
 #define CHECK_STREAM_PTR(n) \
   if ((stream_ptr + n) > s->size ) { \
     av_log(s->avctx, AV_LOG_ERROR, " MS Video-1 warning: stream_ptr out of bounds (%d >= %d)\n", \
diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c
index d1a294365..633cae68b 100644
--- a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c
@@ -49,7 +49,7 @@ int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h
 {
     int i;
     int s __attribute__((aligned(16)));
-    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
+    const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
     vector unsigned char *tv;
     vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;
     vector unsigned int sad;
@@ -96,7 +96,7 @@ int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h
 {
     int i;
     int s __attribute__((aligned(16)));
-    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
+    const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
     vector unsigned char *tv;
     vector unsigned char pix1v, pix2v, pix3v, avgv, t5;
     vector unsigned int sad;
@@ -157,8 +157,8 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int
     int i;
     int s __attribute__((aligned(16)));
     uint8_t *pix3 = pix2 + line_size;
-    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
-    const vector unsigned short two = (const vector unsigned short)vec_splat_u16(2);
+    const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
+    const_vector unsigned short two = (const_vector unsigned short)vec_splat_u16(2);
     vector unsigned char *tv, avgv, t5;
     vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv;
     vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv;
@@ -257,7 +257,7 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
 {
     int i;
     int s __attribute__((aligned(16)));
-    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
+    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
     vector unsigned char perm1, perm2, *pix1v, *pix2v;
     vector unsigned char t1, t2, t3,t4, t5;
     vector unsigned int sad;
@@ -299,7 +299,7 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
 {
     int i;
     int s __attribute__((aligned(16)));
-    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
+    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
     vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
     vector unsigned char t1, t2, t3,t4, t5;
     vector unsigned int sad;
@@ -344,7 +344,7 @@ int pix_norm1_altivec(uint8_t *pix, int line_size)
 {
     int i;
     int s __attribute__((aligned(16)));
-    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
+    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
     vector unsigned char *tv;
     vector unsigned char pixv;
     vector unsigned int sv;
@@ -380,7 +380,7 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
 {
     int i;
     int s __attribute__((aligned(16)));
-    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
+    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
     vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
     vector unsigned char t1, t2, t3,t4, t5;
     vector unsigned int sum;
@@ -436,7 +436,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
 {
     int i;
     int s __attribute__((aligned(16)));
-    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
+    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
     vector unsigned char perm1, perm2, *pix1v, *pix2v;
     vector unsigned char t1, t2, t3,t4, t5;
     vector unsigned int sum;
@@ -480,7 +480,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
 
 int pix_sum_altivec(uint8_t * pix, int line_size)
 {
-    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
+    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
     vector unsigned char perm, *pixv;
     vector unsigned char t1;
     vector unsigned int sad;
@@ -515,7 +515,7 @@ void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line
 {
     int i;
     vector unsigned char perm, bytes, *pixv;
-    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
+    const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
     vector signed short shorts;
 
     for(i=0;i<8;i++)
@@ -542,7 +542,7 @@ void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1,
 {
     int i;
     vector unsigned char perm, bytes, *pixv;
-    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
+    const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
     vector signed short shorts1, shorts2;
 
     for(i=0;i<4;i++)
@@ -654,10 +654,10 @@ POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1);
 POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1);
 
     for(i=0; i<h; i++) {
-      *((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l);
-      *((uint32_t*)(block+4)) = (((const struct unaligned_32 *) (pixels+4))->l);
-      *((uint32_t*)(block+8)) = (((const struct unaligned_32 *) (pixels+8))->l);
-      *((uint32_t*)(block+12)) = (((const struct unaligned_32 *) (pixels+12))->l);
+      *((uint32_t*)(block)) = LD32(pixels);
+      *((uint32_t*)(block+4)) = LD32(pixels+4);
+      *((uint32_t*)(block+8)) = LD32(pixels+8);
+      *((uint32_t*)(block+12)) = LD32(pixels+12);
       pixels+=line_size;
       block +=line_size;
     }
@@ -729,10 +729,10 @@ POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1);
 POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
 
     for(i=0; i<h; i++) {
-      op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l));
-      op_avg(*((uint32_t*)(block+4)),(((const struct unaligned_32 *)(pixels+4))->l));
-      op_avg(*((uint32_t*)(block+8)),(((const struct unaligned_32 *)(pixels+8))->l));
-      op_avg(*((uint32_t*)(block+12)),(((const struct unaligned_32 *)(pixels+12))->l));
+      op_avg(*((uint32_t*)(block)),LD32(pixels));
+      op_avg(*((uint32_t*)(block+4)),LD32(pixels+4));
+      op_avg(*((uint32_t*)(block+8)),LD32(pixels+8));
+      op_avg(*((uint32_t*)(block+12)),LD32(pixels+12));
       pixels+=line_size;
       block +=line_size;
     }
@@ -878,8 +878,8 @@ POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
      blockv, temp1, temp2;
    register vector unsigned short
      pixelssum1, pixelssum2, temp3;
-   register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
-   register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
+   register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
+   register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
    
    temp1 = vec_ld(0, pixels);
    temp2 = vec_ld(16, pixels);
@@ -993,9 +993,9 @@ POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
      blockv, temp1, temp2;
    register vector unsigned short
      pixelssum1, pixelssum2, temp3;
-   register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
-   register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
-   register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
+   register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
+   register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1);
+   register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
    
    temp1 = vec_ld(0, pixels);
    temp2 = vec_ld(16, pixels);
@@ -1109,8 +1109,8 @@ POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
    register vector unsigned short
      pixelssum1, pixelssum2, temp3,
      pixelssum3, pixelssum4, temp4;
-   register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
-   register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
+   register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
+   register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
 
 POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
  
@@ -1230,9 +1230,9 @@ POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
    register vector unsigned short
      pixelssum1, pixelssum2, temp3,
      pixelssum3, pixelssum4, temp4;
-   register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
-   register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
-   register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
+   register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
+   register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1);
+   register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
 
 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
  
diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c
index 9882e401f..b8372e51e 100644
--- a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c
+++ b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c
@@ -46,7 +46,7 @@ int mm_support(void)
 unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
 /* list below must match enum in dsputil_ppc.h */
 static unsigned char* perfname[] = {
-  "fft_calc_altivec",
+  "ff_fft_calc_altivec",
   "gmc1_altivec",
   "dct_unquantize_h263_altivec",
   "fdct_altivec",
@@ -69,13 +69,13 @@ static unsigned char* perfname[] = {
 void powerpc_display_perf_report(void)
 {
   int i, j;
-  fprintf(stderr, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
+  av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
   for(i = 0 ; i < powerpc_perf_total ; i++)
   {
     for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
       {
 	if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
-	  fprintf(stderr,
+	  av_log(NULL, AV_LOG_INFO,
 		  " Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
 		  perfname[i],
 		  j+1,
@@ -129,7 +129,11 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
       i += 16;
     }
     for ( ; i < sizeof(DCTELEM)*6*64 ; i += 32) {
+#ifndef __MWERKS__
       asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
+#else
+      __dcbz( blocks, i );
+#endif
     }
     if (misal) {
       ((unsigned long*)blocks)[188] = 0L;
diff --git a/src/libffmpeg/libavcodec/ppc/fft_altivec.c b/src/libffmpeg/libavcodec/ppc/fft_altivec.c
index e39c9dbb7..29d85e87d 100644
--- a/src/libffmpeg/libavcodec/ppc/fft_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/fft_altivec.c
@@ -50,7 +50,7 @@
 
 
 /**
- * Do a complex FFT with the parameters defined in fft_init(). The
+ * Do a complex FFT with the parameters defined in ff_fft_init(). The
  * input data must be permuted before with s->revtab table. No
  * 1.0/sqrt(n) normalization is done.
  * AltiVec-enabled
@@ -60,7 +60,7 @@
  * that successive MUL + ADD/SUB have been merged into
  * fused multiply-add ('vec_madd' in altivec)
  */
-void fft_calc_altivec(FFTContext *s, FFTComplex *z)
+void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z)
 {
 POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
diff --git a/src/libffmpeg/libavcodec/ppc/gcc_fixes.h b/src/libffmpeg/libavcodec/ppc/gcc_fixes.h
index a8e92cb2f..13d4ff12e 100644
--- a/src/libffmpeg/libavcodec/ppc/gcc_fixes.h
+++ b/src/libffmpeg/libavcodec/ppc/gcc_fixes.h
@@ -12,7 +12,11 @@
 #endif
 
 #ifdef CONFIG_DARWIN
-#define AVV(x...) (x)
+# ifndef __MWERKS__
+#  define AVV(x...) (x)
+# else
+#  define AVV
+# endif
 #else
 #define AVV(x...) {x}
 #if (__GNUC__ * 100 + __GNUC_MINOR__ < 303)  
@@ -81,4 +85,10 @@ __ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
 
 #endif /* CONFIG_DARWIN */
 
+#ifndef __MWERKS__
+#define const_vector const vector
+#else
+#define const_vector vector
+#endif
+
 #endif /* _GCC_FIXES_ */
diff --git a/src/libffmpeg/libavcodec/ppc/gmc_altivec.c b/src/libffmpeg/libavcodec/ppc/gmc_altivec.c
index 671ee110a..344821685 100644
--- a/src/libffmpeg/libavcodec/ppc/gmc_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/gmc_altivec.c
@@ -69,8 +69,8 @@ POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
         (   x16)*(   y16), /* D */
         0, 0, 0, 0         /* padding */
       };
-    register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
-    register const vector unsigned short vcsr8 = (const vector unsigned short)vec_splat_u16(8);
+    register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
+    register const_vector unsigned short vcsr8 = (const_vector unsigned short)vec_splat_u16(8);
     register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD;
     register vector unsigned short Av, Bv, Cv, Dv, rounderV, tempA, tempB, tempC, tempD;
     int i;
diff --git a/src/libffmpeg/libavcodec/ppc/idct_altivec.c b/src/libffmpeg/libavcodec/ppc/idct_altivec.c
index d821ecd22..3445adadd 100644
--- a/src/libffmpeg/libavcodec/ppc/idct_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/idct_altivec.c
@@ -44,6 +44,7 @@
 #include "dsputil_altivec.h"
 
 #define vector_s16_t vector signed short
+#define const_vector_s16_t const_vector signed short
 #define vector_u16_t vector unsigned short
 #define vector_s8_t vector signed char
 #define vector_u8_t vector unsigned char
@@ -155,7 +156,7 @@
     vx7 = vec_sra (vy7, shift);
 
 
-static const vector_s16_t constants[5] = {
+static const_vector_s16_t constants[5] = {
     (vector_s16_t) AVV(23170, 13573, 6518, 21895, -23170, -21895, 32, 31),
     (vector_s16_t) AVV(16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725),
     (vector_s16_t) AVV(22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521),
@@ -174,8 +175,9 @@ POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
     vector_u8_t tmp;
 
+#ifdef POWERPC_PERFORMANCE_REPORT
 POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
-
+#endif
     IDCT
 
 #define COPY(dest,src)						\
@@ -211,7 +213,9 @@ POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);
     vector_u8_t perm1;
     vector_u8_t p0, p1, p;
 
+#ifdef POWERPC_PERFORMANCE_REPORT
 POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
+#endif
 
     IDCT
 
diff --git a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
index ae3170d91..51b387792 100644
--- a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
@@ -107,7 +107,9 @@ int dct_quantize_altivec(MpegEncContext* s,
     int lastNonZero;
     vector float row0, row1, row2, row3, row4, row5, row6, row7;
     vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7;
-    const vector float zero = (const vector float)FOUROF(0.);
+    const_vector float zero = (const_vector float)FOUROF(0.);
+    // used after quantise step
+    int oldBaseValue = 0;
 
     // Load the data into the row/alt vectors
     {
@@ -284,9 +286,6 @@ int dct_quantize_altivec(MpegEncContext* s,
         }
     }
 
-    // used after quantise step
-    int oldBaseValue = 0;
-
     // perform the quantise step, using the floating point data
     // still in the row/alt registers
     {
@@ -414,21 +413,23 @@ int dct_quantize_altivec(MpegEncContext* s,
             data7 = vec_max(vec_min(data7, max_q), min_q);
         }
 
+        {
         vector bool char zero_01, zero_23, zero_45, zero_67;
         vector signed char scanIndices_01, scanIndices_23, scanIndices_45, scanIndices_67;
         vector signed char negOne = vec_splat_s8(-1);
         vector signed char* scanPtr =
                 (vector signed char*)(s->intra_scantable.inverse);
+        signed char lastNonZeroChar;
 
         // Determine the largest non-zero index.
-        zero_01 = vec_pack(vec_cmpeq(data0, (vector short)zero),
-                vec_cmpeq(data1, (vector short)zero));
-        zero_23 = vec_pack(vec_cmpeq(data2, (vector short)zero),
-                vec_cmpeq(data3, (vector short)zero));
-        zero_45 = vec_pack(vec_cmpeq(data4, (vector short)zero),
-                vec_cmpeq(data5, (vector short)zero));
-        zero_67 = vec_pack(vec_cmpeq(data6, (vector short)zero),
-                vec_cmpeq(data7, (vector short)zero));
+        zero_01 = vec_pack(vec_cmpeq(data0, (vector signed short)zero),
+                vec_cmpeq(data1, (vector signed short)zero));
+        zero_23 = vec_pack(vec_cmpeq(data2, (vector signed short)zero),
+                vec_cmpeq(data3, (vector signed short)zero));
+        zero_45 = vec_pack(vec_cmpeq(data4, (vector signed short)zero),
+                vec_cmpeq(data5, (vector signed short)zero));
+        zero_67 = vec_pack(vec_cmpeq(data6, (vector signed short)zero),
+                vec_cmpeq(data7, (vector signed short)zero));
 
         // 64 biggest values
         scanIndices_01 = vec_sel(scanPtr[0], negOne, zero_01);
@@ -461,7 +462,6 @@ int dct_quantize_altivec(MpegEncContext* s,
 
         scanIndices_01 = vec_splat(scanIndices_01, 0);
 
-        signed char lastNonZeroChar;
 
         vec_ste(scanIndices_01, 0, &lastNonZeroChar);
 
@@ -484,6 +484,7 @@ int dct_quantize_altivec(MpegEncContext* s,
         vec_st(data5, 80, data);
         vec_st(data6, 96, data);
         vec_st(data7, 112, data);
+        }
     }
 
     // special handling of block[0]
@@ -562,7 +563,7 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
     }
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
     {
-      register const vector short vczero = (const vector short)vec_splat_s16(0);
+      register const_vector signed short vczero = (const_vector signed short)vec_splat_s16(0);
       short __attribute__ ((aligned(16))) qmul8[] =
           {
             qmul, qmul, qmul, qmul,
@@ -578,7 +579,7 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
             -qadd, -qadd, -qadd, -qadd,
             -qadd, -qadd, -qadd, -qadd
           };
-      register vector short blockv, qmulv, qaddv, nqaddv, temp1;
+      register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
       register vector bool short blockv_null, blockv_neg;
       register short backup_0 = block[0];
       register int j = 0;
diff --git a/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c b/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c
index a83a0aa5c..ce4bf8a47 100644
--- a/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c
+++ b/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c
@@ -55,14 +55,14 @@ void MPV_common_init_ppc(MpegEncContext *s)
         if ((((long)(s->q_intra_matrix) & 0x0f) != 0) ||
                 (((long)(s->q_inter_matrix) & 0x0f) != 0))
         {
-            fprintf(stderr, "Internal Error: q-matrix blocks must be 16-byte aligned "
+            av_log(s->avctx, AV_LOG_INFO, "Internal Error: q-matrix blocks must be 16-byte aligned "
                     "to use Altivec DCT. Reverting to non-altivec version.\n");
             return;
         }
 
         if (((long)(s->intra_scantable.inverse) & 0x0f) != 0)
         {
-            fprintf(stderr, "Internal Error: scan table blocks must be 16-byte aligned "
+            av_log(s->avctx, AV_LOG_INFO, "Internal Error: scan table blocks must be 16-byte aligned "
                     "to use Altivec DCT. Reverting to non-altivec version.\n");
             return;
         }
diff --git a/src/libffmpeg/libavcodec/ra144.c b/src/libffmpeg/libavcodec/ra144.c
index 2d882f744..65829b6a3 100644
--- a/src/libffmpeg/libavcodec/ra144.c
+++ b/src/libffmpeg/libavcodec/ra144.c
@@ -424,7 +424,7 @@ static void dec2(Real144_internal *glob, int *data, int *inp, int n, int f, int
 
 /* Uncompress one block (20 bytes -> 160*2 bytes) */
 static int ra144_decode_frame(AVCodecContext * avctx,
-            void *data, int *data_size,
+            void *vdata, int *data_size,
             uint8_t * buf, int buf_size)
 {
   unsigned int a,b,c;
@@ -432,9 +432,13 @@ static int ra144_decode_frame(AVCodecContext * avctx,
   signed short *shptr;
   unsigned int *lptr,*temp;
   const short **dptr;
-  void *datao;
+  int16_t *datao;
+  int16_t *data = vdata;
   Real144_internal *glob=avctx->priv_data;
 
+  if(buf_size==0)
+      return 0;
+  
   datao = data;
   unpack_input(buf,glob->unpacked);
   
@@ -480,10 +484,10 @@ static int ra144_decode_frame(AVCodecContext * avctx,
     shptr=glob->output_buffer;
     while (shptr<glob->output_buffer+BLOCKSIZE) {
       s=*(shptr++)<<2;
-      *((int16_t *)data)=s;
-      if (s>32767) *((int16_t *)data)=32767;
-      if (s<-32767) *((int16_t *)data)=-32768;
-      ((int16_t *)data)++;
+      *data=s;
+      if (s>32767) *data=32767;
+      if (s<-32767) *data=-32768;
+      data++;
     }
     b+=30;
   }
@@ -495,7 +499,7 @@ static int ra144_decode_frame(AVCodecContext * avctx,
   temp=glob->swapbuf2alt;
   glob->swapbuf2alt=glob->swapbuf2;
   glob->swapbuf2=temp;
-  *data_size=data-datao;
+  *data_size=(data-datao)*sizeof(*data);
   return 20;
 }
 
diff --git a/src/libffmpeg/libavcodec/ratecontrol.c b/src/libffmpeg/libavcodec/ratecontrol.c
index 6c90b1b6c..473645def 100644
--- a/src/libffmpeg/libavcodec/ratecontrol.c
+++ b/src/libffmpeg/libavcodec/ratecontrol.c
@@ -545,9 +545,10 @@ static void adaptive_quantization(MpegEncContext *s, double q){
 
     /* handle qmin/qmax cliping */
     if(s->flags&CODEC_FLAG_NORMALIZE_AQP){
+        float factor= bits_sum/cplx_sum;
         for(i=0; i<s->mb_num; i++){
             float newq= q*cplx_tab[i]/bits_tab[i];
-            newq*= bits_sum/cplx_sum;
+            newq*= factor;
 
             if     (newq > qmax){
                 bits_sum -= bits_tab[i];
@@ -558,6 +559,8 @@ static void adaptive_quantization(MpegEncContext *s, double q){
                 cplx_sum -= cplx_tab[i]*q/qmin;
             }
         }
+        if(bits_sum < 0.001) bits_sum= 0.001;
+        if(cplx_sum < 0.001) cplx_sum= 0.001;
     }
    
     for(i=0; i<s->mb_num; i++){
diff --git a/src/libffmpeg/libavcodec/rpza.c b/src/libffmpeg/libavcodec/rpza.c
index 965fb729b..1c429f624 100644
--- a/src/libffmpeg/libavcodec/rpza.c
+++ b/src/libffmpeg/libavcodec/rpza.c
@@ -53,12 +53,6 @@ typedef struct RpzaContext {
 
 } RpzaContext;
 
-#define BE_16(x)  ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1])
-#define BE_32(x)  ((((uint8_t*)(x))[0] << 24) | \
-                   (((uint8_t*)(x))[1] << 16) | \
-                   (((uint8_t*)(x))[2] << 8) | \
-                    ((uint8_t*)(x))[3])
-
 #define ADVANCE_BLOCK() \
 { \
     pixel_ptr += 4; \
diff --git a/src/libffmpeg/libavcodec/rv10.c b/src/libffmpeg/libavcodec/rv10.c
index 3b6346465..11c9734fc 100644
--- a/src/libffmpeg/libavcodec/rv10.c
+++ b/src/libffmpeg/libavcodec/rv10.c
@@ -360,6 +360,11 @@ static int rv20_decode_picture_header(MpegEncContext *s)
         return -1;
     }
     
+    if(s->last_picture_ptr==NULL && s->pict_type==B_TYPE){
+        av_log(s->avctx, AV_LOG_ERROR, "early B pix\n");
+        return -1;
+    }
+    
     if (get_bits(&s->gb, 1)){
         av_log(s->avctx, AV_LOG_ERROR, "unknown bit set\n");
         return -1;
@@ -403,7 +408,7 @@ static int rv20_decode_picture_header(MpegEncContext *s)
             s->time= seq;
             s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
             if(s->pp_time <=s->pb_time || s->pp_time <= s->pp_time - s->pb_time || s->pp_time<=0){
-                printf("messed up order, seeking?, skiping current b frame\n");
+                av_log(s->avctx, AV_LOG_DEBUG, "messed up order, seeking?, skiping current b frame\n");
                 return FRAME_SKIPED;
             }
         }
@@ -449,6 +454,12 @@ static int rv10_decode_init(AVCodecContext *avctx)
         s->h263_long_vectors=0;
         s->low_delay=1;
         break;
+    case 0x10002000:
+        s->rv10_version= 3;
+        s->h263_long_vectors=1;
+        s->low_delay=1;
+        s->obmc=1;
+        break;
     case 0x10003000:
         s->rv10_version= 3;
         s->h263_long_vectors=1;
@@ -586,13 +597,14 @@ static int rv10_decode_packet(AVCodecContext *avctx,
     s->block_wrap[5]= s->mb_width + 2;
     ff_init_block_index(s);
     /* decode each macroblock */
-    for(i=0;i<mb_count;i++) {
+
+    for(s->mb_num_left= mb_count; s->mb_num_left>0; s->mb_num_left--) {
         int ret;
         ff_update_block_index(s);
 #ifdef DEBUG
         printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
 #endif
-        
+
 	s->dsp.clear_blocks(s->block[0]);
         s->mv_dir = MV_DIR_FORWARD;
         s->mv_type = MV_TYPE_16X16; 
diff --git a/src/libffmpeg/libavcodec/smc.c b/src/libffmpeg/libavcodec/smc.c
index be02b162d..87db50005 100644
--- a/src/libffmpeg/libavcodec/smc.c
+++ b/src/libffmpeg/libavcodec/smc.c
@@ -36,6 +36,9 @@
 #include "avcodec.h"
 #include "dsputil.h"
 
+#define printf(...) {} //(f)printf() usage is forbidden in libavcodec, use av_log
+#define fprintf(...) {} 
+
 #define CPAIR 2
 #define CQUAD 4
 #define COCTET 8
@@ -58,11 +61,6 @@ typedef struct SmcContext {
 
 } SmcContext;
 
-#define BE_16(x)  ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1])
-#define BE_32(x)  ((((uint8_t*)(x))[0] << 24) | \
-                   (((uint8_t*)(x))[1] << 16) | \
-                   (((uint8_t*)(x))[2] << 8) | \
-                    ((uint8_t*)(x))[3])
 #define GET_BLOCK_COUNT() \
   (opcode & 0x10) ? (1 + s->buf[stream_ptr++]) : 1 + (opcode & 0x0F);
 
diff --git a/src/libffmpeg/libavcodec/svq3.c b/src/libffmpeg/libavcodec/svq3.c
index c8720c07a..413cc8963 100644
--- a/src/libffmpeg/libavcodec/svq3.c
+++ b/src/libffmpeg/libavcodec/svq3.c
@@ -766,6 +766,8 @@ static int svq3_decode_frame (AVCodecContext *avctx,
   MpegEncContext *const s = avctx->priv_data;
   H264Context *const h = avctx->priv_data;
   int m, mb_type;
+  unsigned char *extradata;
+  unsigned int size;
 
   *data_size = 0;
 
@@ -790,13 +792,21 @@ static int svq3_decode_frame (AVCodecContext *avctx,
 
     alloc_tables (h);
 
-    if (avctx->extradata && avctx->extradata_size >= 0x64
-	&& !memcmp (avctx->extradata, "SVQ3", 4)) {
+    /* prowl for the "SEQH" marker in the extradata */
+    extradata = (unsigned char *)avctx->extradata;
+    for (m = 0; m < avctx->extradata_size; m++) {
+      if (!memcmp (extradata, "SEQH", 4))
+        break;
+      extradata++;
+    }
+
+    /* if a match was found, parse the extra data */
+    if (!memcmp (extradata, "SEQH", 4)) {
 
       GetBitContext gb;
 
-      init_get_bits (&gb, (uint8_t *) avctx->extradata + 0x62,
-		     8*(avctx->extradata_size - 0x62));
+      size = BE_32(&extradata[4]);
+      init_get_bits (&gb, extradata + 8, size);
 
       /* 'frame size code' and optional 'width, height' */
       if (get_bits (&gb, 3) == 7) {
diff --git a/src/libffmpeg/libavcodec/truemotion1.c b/src/libffmpeg/libavcodec/truemotion1.c
index ebd0ad522..35bf3a788 100644
--- a/src/libffmpeg/libavcodec/truemotion1.c
+++ b/src/libffmpeg/libavcodec/truemotion1.c
@@ -36,9 +36,10 @@
 #include "avcodec.h"
 #include "dsputil.h"
 
-#include "truemotion1data.h"
+#define printf(...) {} //(f)printf() usage is forbidden in libavcodec, use av_log
+#define fprintf(...) {} 
 
-#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
+#include "truemotion1data.h"
 
 typedef struct TrueMotion1Context {
     AVCodecContext *avctx;
diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c
index 2fb82c347..b6b6059e9 100644
--- a/src/libffmpeg/libavcodec/utils.c
+++ b/src/libffmpeg/libavcodec/utils.c
@@ -103,9 +103,13 @@ void av_free_static(void)
     last_static = 0;
 }
 
-/* cannot call it directly because of 'void **' casting is not automatic */
-void __av_freep(void **ptr)
+/**
+ * Frees memory and sets the pointer to NULL.
+ * @param arg pointer to the pointer which should be freed
+ */
+void av_freep(void *arg)
 {
+    void **ptr= (void**)arg;
     av_free(*ptr);
     *ptr = NULL;
 }
@@ -238,7 +242,8 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
             const int h_shift= i==0 ? 0 : h_chroma_shift;
             const int v_shift= i==0 ? 0 : v_chroma_shift;
 
-            buf->linesize[i]= ALIGN(pixel_size*w>>h_shift, s_align);
+            //FIXME next ensures that linesize= 2^x uvlinesize, thats needed because some MC code assumes it
+            buf->linesize[i]= ALIGN(pixel_size*w>>h_shift, s_align<<(h_chroma_shift-h_shift)); 
 
             buf->base[i]= av_mallocz((buf->linesize[i]*h>>v_shift)+16); //FIXME 16
             if(buf->base[i]==NULL) return -1;
@@ -323,11 +328,35 @@ int avcodec_default_reget_buffer(AVCodecContext *s, AVFrame *pic){
     return 0;
 }
 
-enum PixelFormat avcodec_default_get_format(struct AVCodecContext *s, enum PixelFormat * fmt){
+int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count){
+    int i;
+
+    for(i=0; i<count; i++){
+        int r= func(c, arg[i]);
+        if(ret) ret[i]= r;
+    }
+    return 0;
+}
+
+enum PixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum PixelFormat * fmt){
     return fmt[0];
 }
 
+static const char* context_to_name(void* ptr) {
+    AVCodecContext *avc= ptr;
+
+    if(avc && avc->codec && avc->codec->name)
+        return avc->codec->name; 
+    else
+        return "NULL";
+}
+
+static AVClass av_codec_context_class = { "AVCodecContext", context_to_name };
+
 void avcodec_get_context_defaults(AVCodecContext *s){
+    memset(s, 0, sizeof(AVCodecContext));
+
+    s->av_class= &av_codec_context_class;
     s->bit_rate= 800*1000;
     s->bit_rate_tolerance= s->bit_rate*10;
     s->qmin= 2;
@@ -351,6 +380,8 @@ void avcodec_get_context_defaults(AVCodecContext *s){
     s->get_buffer= avcodec_default_get_buffer;
     s->release_buffer= avcodec_default_release_buffer;
     s->get_format= avcodec_default_get_format;
+    s->execute= avcodec_default_execute;
+    s->thread_count=1;
     s->me_subpel_quality=8;
     s->lmin= FF_QP2LAMBDA * s->qmin;
     s->lmax= FF_QP2LAMBDA * s->qmax;
@@ -368,7 +399,7 @@ void avcodec_get_context_defaults(AVCodecContext *s){
  * this can be deallocated by simply calling free() 
  */
 AVCodecContext *avcodec_alloc_context(void){
-    AVCodecContext *avctx= av_mallocz(sizeof(AVCodecContext));
+    AVCodecContext *avctx= av_malloc(sizeof(AVCodecContext));
     
     if(avctx==NULL) return NULL;
     
@@ -377,12 +408,22 @@ AVCodecContext *avcodec_alloc_context(void){
     return avctx;
 }
 
+void avcodec_get_frame_defaults(AVFrame *pic){
+    memset(pic, 0, sizeof(AVFrame));
+
+    pic->pts= AV_NOPTS_VALUE;
+}
+
 /**
  * allocates a AVPFrame and set it to defaults.
  * this can be deallocated by simply calling free() 
  */
 AVFrame *avcodec_alloc_frame(void){
-    AVFrame *pic= av_mallocz(sizeof(AVFrame));
+    AVFrame *pic= av_malloc(sizeof(AVFrame));
+    
+    if(pic==NULL) return NULL;
+    
+    avcodec_get_frame_defaults(pic);
     
     return pic;
 }
@@ -806,40 +847,38 @@ int64_t av_rescale(int64_t a, int b, int c){
 
 /* av_log API */
 
-#ifdef AV_LOG_TRAP_PRINTF
-#undef stderr
-#undef fprintf
-#endif
-
 static int av_log_level = AV_LOG_DEBUG;
 
-static void av_log_default_callback(AVCodecContext* avctx, int level, const char* fmt, va_list vl)
+static void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl)
 {
     static int print_prefix=1;
-
+    AVClass* avc= ptr ? *(AVClass**)ptr : NULL;
     if(level>av_log_level)
-	    return;
-    if(avctx && print_prefix)
-        fprintf(stderr, "[%s @ %p]", avctx->codec ? avctx->codec->name : "?", avctx);
+	return;
+#undef fprintf
+    if(print_prefix && avc) {
+	    fprintf(stderr, "[%s @ %p]", avc->item_name(ptr), avc);
+    }
+#define fprintf please_use_av_log
         
-    print_prefix= (int)strstr(fmt, "\n");
+    print_prefix= strstr(fmt, "\n") != NULL;
         
     vfprintf(stderr, fmt, vl);
 }
 
-static void (*av_log_callback)(AVCodecContext*, int, const char*, va_list) = av_log_default_callback;
+static void (*av_log_callback)(void*, int, const char*, va_list) = av_log_default_callback;
 
-void av_log(AVCodecContext* avctx, int level, const char *fmt, ...)
+void av_log(void* avcl, int level, const char *fmt, ...)
 {
     va_list vl;
     va_start(vl, fmt);
-    av_vlog(avctx, level, fmt, vl);
+    av_vlog(avcl, level, fmt, vl);
     va_end(vl);
 }
 
-void av_vlog(AVCodecContext* avctx, int level, const char *fmt, va_list vl)
+void av_vlog(void* avcl, int level, const char *fmt, va_list vl)
 {
-    av_log_callback(avctx, level, fmt, vl);
+    av_log_callback(avcl, level, fmt, vl);
 }
 
 int av_log_get_level(void)
@@ -852,7 +891,7 @@ void av_log_set_level(int level)
     av_log_level = level;
 }
 
-void av_log_set_callback(void (*callback)(AVCodecContext*, int, const char*, va_list))
+void av_log_set_callback(void (*callback)(void*, int, const char*, va_list))
 {
     av_log_callback = callback;
 }
diff --git a/src/libffmpeg/libavcodec/vmdav.c b/src/libffmpeg/libavcodec/vmdav.c
index a5c7f450c..47c77513d 100644
--- a/src/libffmpeg/libavcodec/vmdav.c
+++ b/src/libffmpeg/libavcodec/vmdav.c
@@ -22,6 +22,8 @@
  * @file vmdvideo.c
  * Sierra VMD audio & video decoders
  * by Vladimir "VAG" Gneushev (vagsoft at mail.ru)
+ * for more information on the Sierra VMD format, visit:
+ *   http://www.pcisys.net/~melanson/codecs/
  *
  * The video decoder outputs PAL8 colorspace data. The decoder expects
  * a 0x330-byte VMD file header to be transmitted via extradata during
@@ -30,7 +32,7 @@
  * information record from the VMD file.
  *
  * The audio decoder, like the video decoder, expects each encoded data
- * chunk to be prepended with the approriate 16-byte frame information
+ * chunk to be prepended with the appropriate 16-byte frame information
  * record from the VMD file. It does not require the 0x330-byte VMD file
  * header, but it does need the audio setup parameters passed in through
  * normal libavcodec API means.
@@ -45,15 +47,12 @@
 #include "avcodec.h"
 #include "dsputil.h"
 
+#define printf(...) {} //(f)printf() usage is forbidden in libavcodec, use av_log
+#define fprintf(...) {} 
+
 #define VMD_HEADER_SIZE 0x330
 #define PALETTE_COUNT 256
 
-#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
-#define LE_32(x)  ((((uint8_t*)(x))[3] << 24) | \
-                   (((uint8_t*)(x))[2] << 16) | \
-                   (((uint8_t*)(x))[1] << 8) | \
-                    ((uint8_t*)(x))[0])
-
 /*
  * Video Decoder
  */
@@ -272,7 +271,7 @@ static void vmd_decode(VmdVideoContext *s)
                     if (len & 0x80) {
                         len = (len & 0x7F) + 1;
                         if (*pb++ == 0xFF)
-                            len = rle_unpack(pb, dp, len);
+                            len = rle_unpack(pb, &dp[ofs], len);
                         else
                             memcpy(&dp[ofs], pb, len);
                         pb += len;
@@ -346,6 +345,9 @@ static int vmdvideo_decode_frame(AVCodecContext *avctx,
     s->buf = buf;
     s->size = buf_size;
 
+    if (buf_size < 16)
+        return buf_size;
+
     s->frame.reference = 1;
     if (avctx->get_buffer(avctx, &s->frame)) {
         printf ("  VMD Video: get_buffer() failed\n");
@@ -405,8 +407,8 @@ static int vmdaudio_decode_init(AVCodecContext *avctx)
     s->bits = avctx->bits_per_sample;
     s->block_align = avctx->block_align;
 
-printf ("  %d channels, %d bits/sample, block align = %d\n",
-  s->channels, s->bits, s->block_align);
+printf ("  %d channels, %d bits/sample, block align = %d, sample rate = %d\n",
+  s->channels, s->bits, s->block_align, avctx->sample_rate);
 
     /* set up the steps8 and steps16 tables */
     for (i = 0; i < 8; i++) {
@@ -457,10 +459,17 @@ static void vmdaudio_decode_audio(VmdAudioContext *s, unsigned char *data,
 
 }
 
-static void vmdaudio_loadsound(VmdAudioContext *s, unsigned char *data,
+static int vmdaudio_loadsound(VmdAudioContext *s, unsigned char *data,
     uint8_t *buf, int silence)
 {
+    int bytes_decoded = 0;
+    int i;
+
+if (silence)
+  printf (" silent block!\n");
     if (s->channels == 2) {
+
+        /* stereo handling */
         if ((s->block_align & 0x01) == 0) {
             if (silence)
                 memset(data, 0, s->block_align * 2);
@@ -469,11 +478,34 @@ static void vmdaudio_loadsound(VmdAudioContext *s, unsigned char *data,
         } else {
             if (silence)
                 memset(data, 0, s->block_align * 2);
-//            else
-//                vmdaudio_decode_audio(s, data, buf, 1);
+            else
+                vmdaudio_decode_audio(s, data, buf, 1);
         }
     } else {
+
+        /* mono handling */
+        if (silence) {
+            if (s->bits == 16) {
+                memset(data, 0, s->block_align * 2);
+                bytes_decoded = s->block_align * 2;
+            } else {
+//                memset(data, 0x00, s->block_align);
+//                bytes_decoded = s->block_align;
+memset(data, 0x00, s->block_align * 2);
+bytes_decoded = s->block_align * 2;
+            }
+        } else {
+            if (s->bits == 16) {
+            } else {
+                /* copy the data but convert it to signed */
+                for (i = 0; i < s->block_align; i++)
+                    data[i * 2 + 1] = buf[i] + 0x80;
+                bytes_decoded = s->block_align * 2;
+            }
+        }
     }
+
+    return bytes_decoded;
 }
 
 static int vmdaudio_decode_frame(AVCodecContext *avctx,
@@ -488,10 +520,16 @@ static int vmdaudio_decode_frame(AVCodecContext *avctx,
     unsigned char *p = buf + 16;
     unsigned char *p_end = buf + buf_size;
 
+printf ("    processing audio frame with %d bytes\n", buf_size);
+    if (buf_size < 16)
+        return buf_size;
+
+    *data_size = 0;
     if (buf[6] == 1) {
         /* the chunk contains audio */
-        vmdaudio_loadsound(s, output_samples, p, 0);
+        *data_size = vmdaudio_loadsound(s, output_samples, p, 0);
     } else if (buf[6] == 2) {
+printf ("  hey! audio case #2\n");
         /* the chunk contains audio and silence mixed together */
         sound_flags = LE_32(p);
         p += 4;
@@ -500,22 +538,24 @@ static int vmdaudio_decode_frame(AVCodecContext *avctx,
 
         while (p < p_end) {
             if (sound_flags & 0x01)
-                /* audio */
-                vmdaudio_loadsound(s, output_samples, p, 1);
-            else
                 /* silence */
-                vmdaudio_loadsound(s, output_samples, p, 0);
-            p += s->block_align;
+                *data_size += vmdaudio_loadsound(s, output_samples, p, 1);
+            else {
+                /* audio */
+                *data_size += vmdaudio_loadsound(s, output_samples, p, 0);
+                p += s->block_align;
+            }
             output_samples += (s->block_align * s->bits / 8);
             sound_flags >>= 1;
         }
     } else if (buf[6] == 3) {
+printf ("  hey! audio case #3\n");
         /* silent chunk */
-        vmdaudio_loadsound(s, output_samples, p, 1);
+        *data_size = vmdaudio_loadsound(s, output_samples, p, 1);
     }
 
-
-//    *datasize = ;
+printf ("      final sample count = %d, byte count = %d\n", (*data_size) / 2,
+  *data_size);
     return buf_size;
 }
 
diff --git a/src/libffmpeg/libavcodec/vp3.c b/src/libffmpeg/libavcodec/vp3.c
index 636d2dfd8..eadfd39b9 100644
--- a/src/libffmpeg/libavcodec/vp3.c
+++ b/src/libffmpeg/libavcodec/vp3.c
@@ -1,6 +1,5 @@
 /*
- *
- * Copyright (C) 2003 the ffmpeg project
+ * Copyright (C) 2003-2004 the ffmpeg project
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -38,7 +37,6 @@
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
-#include "dsputil.h"
 
 #include "vp3data.h"
 
@@ -253,7 +251,7 @@ typedef struct Vp3DecodeContext {
     
     /* tables */
     uint16_t coded_dc_scale_factor[64];
-    uint32_t coded_quality_threshold[64];
+    uint32_t coded_ac_scale_factor[64];
     uint16_t coded_intra_y_dequant[64];
     uint16_t coded_intra_c_dequant[64];
     uint16_t coded_inter_dequant[64];
@@ -307,307 +305,6 @@ static int theora_decode_comments(AVCodecContext *avctx, GetBitContext gb);
 static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb);
 
 /************************************************************************
- * VP3 I/DCT
- ************************************************************************/
-
-#define IdctAdjustBeforeShift 8
-#define xC1S7 64277
-#define xC2S6 60547
-#define xC3S5 54491
-#define xC4S4 46341
-#define xC5S3 36410
-#define xC6S2 25080
-#define xC7S1 12785
-
-void vp3_idct_c(int16_t *input_data, int16_t *dequant_matrix, 
-    int16_t *output_data)
-{
-    int32_t intermediate_data[64];
-    int32_t *ip = intermediate_data;
-    int16_t *op = output_data;
-
-    int32_t A_, B_, C_, D_, _Ad, _Bd, _Cd, _Dd, E_, F_, G_, H_;
-    int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
-    int32_t t1, t2;
-
-    int i, j;
-
-    debug_idct("raw coefficient block:\n");
-    for (i = 0; i < 8; i++) {
-        for (j = 0; j < 8; j++) {
-            debug_idct(" %5d", input_data[i * 8 + j]);
-        }
-        debug_idct("\n");
-    }
-    debug_idct("\n");
-
-    for (i = 0; i < 64; i++) {
-        j = dezigzag_index[i];
-        intermediate_data[j] = dequant_matrix[i] * input_data[i];
-    }
-
-    debug_idct("dequantized block:\n");
-    for (i = 0; i < 8; i++) {
-        for (j = 0; j < 8; j++) {
-            debug_idct(" %5d", intermediate_data[i * 8 + j]);
-        }
-        debug_idct("\n");
-    }
-    debug_idct("\n");
-
-    /* Inverse DCT on the rows now */
-    for (i = 0; i < 8; i++) {
-        /* Check for non-zero values */
-        if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
-            t1 = (int32_t)(xC1S7 * ip[1]);
-            t2 = (int32_t)(xC7S1 * ip[7]);
-            t1 >>= 16;
-            t2 >>= 16;
-            A_ = t1 + t2;
-
-            t1 = (int32_t)(xC7S1 * ip[1]);
-            t2 = (int32_t)(xC1S7 * ip[7]);
-            t1 >>= 16;
-            t2 >>= 16;
-            B_ = t1 - t2;
-
-            t1 = (int32_t)(xC3S5 * ip[3]);
-            t2 = (int32_t)(xC5S3 * ip[5]);
-            t1 >>= 16;
-            t2 >>= 16;
-            C_ = t1 + t2;
-
-            t1 = (int32_t)(xC3S5 * ip[5]);
-            t2 = (int32_t)(xC5S3 * ip[3]);
-            t1 >>= 16;
-            t2 >>= 16;
-            D_ = t1 - t2;
-
-
-            t1 = (int32_t)(xC4S4 * (A_ - C_));
-            t1 >>= 16;
-            _Ad = t1;
-
-            t1 = (int32_t)(xC4S4 * (B_ - D_));
-            t1 >>= 16;
-            _Bd = t1;
-
-
-            _Cd = A_ + C_;
-            _Dd = B_ + D_;
-
-            t1 = (int32_t)(xC4S4 * (ip[0] + ip[4]));
-            t1 >>= 16;
-            E_ = t1;
-
-            t1 = (int32_t)(xC4S4 * (ip[0] - ip[4]));
-            t1 >>= 16;
-            F_ = t1;
-
-            t1 = (int32_t)(xC2S6 * ip[2]);
-            t2 = (int32_t)(xC6S2 * ip[6]);
-            t1 >>= 16;
-            t2 >>= 16;
-            G_ = t1 + t2;
-
-            t1 = (int32_t)(xC6S2 * ip[2]);
-            t2 = (int32_t)(xC2S6 * ip[6]);
-            t1 >>= 16;
-            t2 >>= 16;
-            H_ = t1 - t2;
-
-
-            _Ed = E_ - G_;
-            _Gd = E_ + G_;
-
-            _Add = F_ + _Ad;
-            _Bdd = _Bd - H_;
-
-            _Fd = F_ - _Ad;
-            _Hd = _Bd + H_;
-
-            /*  Final sequence of operations over-write original inputs. */
-            ip[0] = (int16_t)((_Gd + _Cd )   >> 0);
-            ip[7] = (int16_t)((_Gd - _Cd )   >> 0);
-
-            ip[1] = (int16_t)((_Add + _Hd )  >> 0);
-            ip[2] = (int16_t)((_Add - _Hd )  >> 0);
-
-            ip[3] = (int16_t)((_Ed + _Dd )   >> 0);
-            ip[4] = (int16_t)((_Ed - _Dd )   >> 0);
-
-            ip[5] = (int16_t)((_Fd + _Bdd )  >> 0);
-            ip[6] = (int16_t)((_Fd - _Bdd )  >> 0);
-
-        }
-
-        ip += 8;            /* next row */
-    }
-
-    ip = intermediate_data;
-
-    for ( i = 0; i < 8; i++) {
-        /* Check for non-zero values (bitwise or faster than ||) */
-        if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
-             ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
-
-            t1 = (int32_t)(xC1S7 * ip[1*8]);
-            t2 = (int32_t)(xC7S1 * ip[7*8]);
-            t1 >>= 16;
-            t2 >>= 16;
-            A_ = t1 + t2;
-
-            t1 = (int32_t)(xC7S1 * ip[1*8]);
-            t2 = (int32_t)(xC1S7 * ip[7*8]);
-            t1 >>= 16;
-            t2 >>= 16;
-            B_ = t1 - t2;
-
-            t1 = (int32_t)(xC3S5 * ip[3*8]);
-            t2 = (int32_t)(xC5S3 * ip[5*8]);
-            t1 >>= 16;
-            t2 >>= 16;
-            C_ = t1 + t2;
-
-            t1 = (int32_t)(xC3S5 * ip[5*8]);
-            t2 = (int32_t)(xC5S3 * ip[3*8]);
-            t1 >>= 16;
-            t2 >>= 16;
-            D_ = t1 - t2;
-
-
-            t1 = (int32_t)(xC4S4 * (A_ - C_));
-            t1 >>= 16;
-            _Ad = t1;
-
-            t1 = (int32_t)(xC4S4 * (B_ - D_));
-            t1 >>= 16;
-            _Bd = t1;
-
-
-            _Cd = A_ + C_;
-            _Dd = B_ + D_;
-
-            t1 = (int32_t)(xC4S4 * (ip[0*8] + ip[4*8]));
-            t1 >>= 16;
-            E_ = t1;
-
-            t1 = (int32_t)(xC4S4 * (ip[0*8] - ip[4*8]));
-            t1 >>= 16;
-            F_ = t1;
-
-            t1 = (int32_t)(xC2S6 * ip[2*8]);
-            t2 = (int32_t)(xC6S2 * ip[6*8]);
-            t1 >>= 16;
-            t2 >>= 16;
-            G_ = t1 + t2;
-
-            t1 = (int32_t)(xC6S2 * ip[2*8]);
-            t2 = (int32_t)(xC2S6 * ip[6*8]);
-            t1 >>= 16;
-            t2 >>= 16;
-            H_ = t1 - t2;
-
-
-            _Ed = E_ - G_;
-            _Gd = E_ + G_;
-
-            _Add = F_ + _Ad;
-            _Bdd = _Bd - H_;
-
-            _Fd = F_ - _Ad;
-            _Hd = _Bd + H_;
-
-            _Gd += IdctAdjustBeforeShift;
-            _Add += IdctAdjustBeforeShift;
-            _Ed += IdctAdjustBeforeShift;
-            _Fd += IdctAdjustBeforeShift;
-
-            /* Final sequence of operations over-write original inputs. */
-            op[0*8] = (int16_t)((_Gd + _Cd )   >> 4);
-            op[7*8] = (int16_t)((_Gd - _Cd )   >> 4);
-
-            op[1*8] = (int16_t)((_Add + _Hd )  >> 4);
-            op[2*8] = (int16_t)((_Add - _Hd )  >> 4);
-
-            op[3*8] = (int16_t)((_Ed + _Dd )   >> 4);
-            op[4*8] = (int16_t)((_Ed - _Dd )   >> 4);
-
-            op[5*8] = (int16_t)((_Fd + _Bdd )  >> 4);
-            op[6*8] = (int16_t)((_Fd - _Bdd )  >> 4);
-
-        } else {
-
-            op[0*8] = 0;
-            op[7*8] = 0;
-            op[1*8] = 0;
-            op[2*8] = 0;
-            op[3*8] = 0;
-            op[4*8] = 0;
-            op[5*8] = 0;
-            op[6*8] = 0;
-        }
-
-        ip++;            /* next column */
-        op++;
-    }
-}
-
-void vp3_idct_put(int16_t *input_data, int16_t *dequant_matrix, 
-    uint8_t *dest, int stride)
-{
-    int16_t transformed_data[64];
-    int16_t *op;
-    int i, j;
-
-    vp3_idct_c(input_data, dequant_matrix, transformed_data);
-
-    /* place in final output */
-    op = transformed_data;
-    for (i = 0; i < 8; i++) {
-        for (j = 0; j < 8; j++) {
-            if (*op < -128)
-                *dest = 0;
-            else if (*op > 127)
-                *dest = 255;
-            else
-                *dest = (uint8_t)(*op + 128);
-            op++;
-            dest++;
-        }
-        dest += (stride - 8);
-    }
-}
-
-void vp3_idct_add(int16_t *input_data, int16_t *dequant_matrix, 
-    uint8_t *dest, int stride)
-{
-    int16_t transformed_data[64];
-    int16_t *op;
-    int i, j;
-    int16_t sample;
-
-    vp3_idct_c(input_data, dequant_matrix, transformed_data);
-
-    /* place in final output */
-    op = transformed_data;
-    for (i = 0; i < 8; i++) {
-        for (j = 0; j < 8; j++) {
-            sample = *dest + *op;
-            if (sample < 0)
-                *dest = 0;
-            else if (sample > 255)
-                *dest = 255;
-            else
-                *dest = (uint8_t)(sample & 0xFF);
-            op++;
-            dest++;
-        }
-        dest += (stride - 8);
-    }
-}
-
-/************************************************************************
  * VP3 specific functions
  ************************************************************************/
 
@@ -1145,7 +842,7 @@ s->all_fragments[i].motion_y = 0xbeef;
 static void init_dequantizer(Vp3DecodeContext *s)
 {
 
-    int quality_scale = s->coded_quality_threshold[s->quality_index];
+    int ac_scale_factor = s->coded_ac_scale_factor[s->quality_index];
     int dc_scale_factor = s->coded_dc_scale_factor[s->quality_index];
     int i, j;
 
@@ -1159,7 +856,7 @@ static void init_dequantizer(Vp3DecodeContext *s)
      *        100
      *
      * where sf = dc_scale_factor for DC quantizer
-     *           or quality_scale for AC quantizer
+     *         or ac_scale_factor for AC quantizer
      *
      * Then, saturate the result to a lower limit of MIN_DEQUANT_VAL.
      */
@@ -1187,17 +884,17 @@ static void init_dequantizer(Vp3DecodeContext *s)
 
         j = zigzag_index[i];
 
-        s->intra_y_dequant[j] = s->coded_intra_y_dequant[i] * quality_scale / 100;
+        s->intra_y_dequant[j] = s->coded_intra_y_dequant[i] * ac_scale_factor / 100;
         if (s->intra_y_dequant[j] < MIN_DEQUANT_VAL)
             s->intra_y_dequant[j] = MIN_DEQUANT_VAL;
         s->intra_y_dequant[j] *= SCALER;
 
-        s->intra_c_dequant[j] = s->coded_intra_c_dequant[i] * quality_scale / 100;
+        s->intra_c_dequant[j] = s->coded_intra_c_dequant[i] * ac_scale_factor / 100;
         if (s->intra_c_dequant[j] < MIN_DEQUANT_VAL)
             s->intra_c_dequant[j] = MIN_DEQUANT_VAL;
         s->intra_c_dequant[j] *= SCALER;
 
-        s->inter_dequant[j] = s->coded_inter_dequant[i] * quality_scale / 100;
+        s->inter_dequant[j] = s->coded_inter_dequant[i] * ac_scale_factor / 100;
         if (s->inter_dequant[j] < MIN_DEQUANT_VAL * 2)
             s->inter_dequant[j] = MIN_DEQUANT_VAL * 2;
         s->inter_dequant[j] *= SCALER;
@@ -2451,15 +2148,24 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x,
                         motion_source= temp;
                     }
                 }
+                
 
                 /* first, take care of copying a block from either the
                  * previous or the golden frame */
                 if (s->all_fragments[i].coding_method != MODE_INTRA) {
-
-                    s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index](
-                        output_plane + s->all_fragments[i].first_pixel,
-                        motion_source,
-                        stride, 8);
+                    //Note, it is possible to implement all MC cases with put_no_rnd_pixels_l2 which would look more like the VP3 source but this would be slower as put_no_rnd_pixels_tab is better optimzed
+                    if(motion_halfpel_index != 3){
+                        s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index](
+                            output_plane + s->all_fragments[i].first_pixel,
+                            motion_source, stride, 8);
+                    }else{
+                        int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1
+                        s->dsp.put_no_rnd_pixels_l2[1](
+                            output_plane + s->all_fragments[i].first_pixel,
+                            motion_source - d, 
+                            motion_source + stride + 1 + d, 
+                            stride, 8);
+                    }
                 }
 
                 /* dequantize the DCT coefficients */
@@ -2469,11 +2175,15 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x,
 
                 /* invert DCT and place (or add) in final output */
                 if (s->all_fragments[i].coding_method == MODE_INTRA) {
-                    vp3_idct_put(s->all_fragments[i].coeffs, dequantizer,
+                    s->dsp.vp3_idct_put(s->all_fragments[i].coeffs, 
+                        dequantizer,
+                        s->all_fragments[i].coeff_count,
                         output_plane + s->all_fragments[i].first_pixel,
                         stride);
                 } else {
-                    vp3_idct_add(s->all_fragments[i].coeffs, dequantizer,
+                    s->dsp.vp3_idct_add(s->all_fragments[i].coeffs, 
+                        dequantizer,
+                        s->all_fragments[i].coeff_count,
                         output_plane + s->all_fragments[i].first_pixel,
                         stride);
                 }
@@ -2632,6 +2342,7 @@ static int vp3_decode_init(AVCodecContext *avctx)
     avctx->pix_fmt = PIX_FMT_YUV420P;
     avctx->has_b_frames = 0;
     dsputil_init(&s->dsp, avctx);
+    s->dsp.vp3_dsp_init();
 
     /* initialize to an impossible value which will force a recalculation
      * in the first frame decode */
@@ -2691,7 +2402,7 @@ static int vp3_decode_init(AVCodecContext *avctx)
 	for (i = 0; i < 64; i++)
 	    s->coded_dc_scale_factor[i] = vp31_dc_scale_factor[i];
 	for (i = 0; i < 64; i++)
-	    s->coded_quality_threshold[i] = vp31_quality_threshold[i];
+	    s->coded_ac_scale_factor[i] = vp31_ac_scale_factor[i];
 	for (i = 0; i < 64; i++)
 	    s->coded_intra_y_dequant[i] = vp31_intra_y_dequant[i];
 	for (i = 0; i < 64; i++)
@@ -3032,7 +2743,7 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb)
     
     /* quality threshold table */
     for (i = 0; i < 64; i++)
-	s->coded_quality_threshold[i] = get_bits(&gb, 16);
+	s->coded_ac_scale_factor[i] = get_bits(&gb, 16);
 
     /* dc scale factor table */
     for (i = 0; i < 64; i++)
diff --git a/src/libffmpeg/libavcodec/vp3data.h b/src/libffmpeg/libavcodec/vp3data.h
index 0021049fc..a25c9f06e 100644
--- a/src/libffmpeg/libavcodec/vp3data.h
+++ b/src/libffmpeg/libavcodec/vp3data.h
@@ -1,7 +1,8 @@
 #ifndef VP3DATA_H
 #define VP3DATA_H
 
-/* these coefficients dequantize intraframe Y plane coefficients */
+/* these coefficients dequantize intraframe Y plane coefficients
+ * (note: same as JPEG) */
 static int16_t vp31_intra_y_dequant[64] =
 {       16,  11,  10,  16,  24,  40,  51,  61,
         12,  12,  14,  19,  26,  58,  60,  55,
@@ -13,7 +14,8 @@ static int16_t vp31_intra_y_dequant[64] =
         72,  92,  95,  98, 112, 100, 103,  99
 };
 
-/* these coefficients dequantize intraframe C plane coefficients */
+/* these coefficients dequantize intraframe C plane coefficients 
+ * (note: same as JPEG) */
 static int16_t vp31_intra_c_dequant[64] =
 {       17,  18,     24,     47,     99,     99,     99,     99,
         18,  21,     26,     66,     99,     99,     99,     99,
@@ -48,7 +50,7 @@ static int16_t vp31_dc_scale_factor[64] =
   20,  10,  10,  10,  10,  10,  10,  10
 };
 
-static uint32_t vp31_quality_threshold[64] =
+static uint32_t vp31_ac_scale_factor[64] =
 { 500,  450,  400,  370,  340,  310, 285, 265,
   245,  225,  210,  195,  185,  180, 170, 160,
   150,  145,  135,  130,  125,  115, 110, 107,
diff --git a/src/libffmpeg/libavcodec/vqavideo.c b/src/libffmpeg/libavcodec/vqavideo.c
index 97cbbd6d9..ea86fb108 100644
--- a/src/libffmpeg/libavcodec/vqavideo.c
+++ b/src/libffmpeg/libavcodec/vqavideo.c
@@ -82,26 +82,13 @@
 #define MAX_VECTORS (MAX_CODEBOOK_VECTORS + SOLID_PIXEL_VECTORS)
 #define MAX_CODEBOOK_SIZE (MAX_VECTORS * 4 * 4)
 
-#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
-#define BE_16(x)  ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1])
-#define BE_32(x)  ((((uint8_t*)(x))[0] << 24) | \
-                   (((uint8_t*)(x))[1] << 16) | \
-                   (((uint8_t*)(x))[2] << 8) | \
-                    ((uint8_t*)(x))[3])
-
-#define FOURCC_TAG( ch0, ch1, ch2, ch3 ) \
-        ( (long)(unsigned char)(ch3) | \
-        ( (long)(unsigned char)(ch2) << 8 ) | \
-        ( (long)(unsigned char)(ch1) << 16 ) | \
-        ( (long)(unsigned char)(ch0) << 24 ) )
-
-#define CBF0_TAG FOURCC_TAG('C', 'B', 'F', '0')
-#define CBFZ_TAG FOURCC_TAG('C', 'B', 'F', 'Z')
-#define CBP0_TAG FOURCC_TAG('C', 'B', 'P', '0')
-#define CBPZ_TAG FOURCC_TAG('C', 'B', 'P', 'Z')
-#define CPL0_TAG FOURCC_TAG('C', 'P', 'L', '0')
-#define CPLZ_TAG FOURCC_TAG('C', 'P', 'L', 'Z')
-#define VPTZ_TAG FOURCC_TAG('V', 'P', 'T', 'Z')
+#define CBF0_TAG MKBETAG('C', 'B', 'F', '0')
+#define CBFZ_TAG MKBETAG('C', 'B', 'F', 'Z')
+#define CBP0_TAG MKBETAG('C', 'B', 'P', '0')
+#define CBPZ_TAG MKBETAG('C', 'B', 'P', 'Z')
+#define CPL0_TAG MKBETAG('C', 'P', 'L', '0')
+#define CPLZ_TAG MKBETAG('C', 'P', 'L', 'Z')
+#define VPTZ_TAG MKBETAG('V', 'P', 'T', 'Z')
 
 #define VQA_DEBUG 0
 
diff --git a/src/libffmpeg/libavcodec/xan.c b/src/libffmpeg/libavcodec/xan.c
index 4dad2e214..f98a06bca 100644
--- a/src/libffmpeg/libavcodec/xan.c
+++ b/src/libffmpeg/libavcodec/xan.c
@@ -58,13 +58,6 @@ typedef struct XanContext {
 
 } XanContext;
 
-#define BE_16(x)  ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1])
-#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
-#define LE_32(x)  ((((uint8_t*)(x))[3] << 24) | \
-                   (((uint8_t*)(x))[2] << 16) | \
-                   (((uint8_t*)(x))[1] << 8) | \
-                    ((uint8_t*)(x))[0])
-
 /* RGB -> YUV conversion stuff */
 #define SCALEFACTOR 65536
 #define CENTERSAMPLE 128