* ffmpeg update to 51.28.0

* Workaround ffmpeg buggy codecs that don't release their DR1 frames. * Fix several segfaults and freezing problem with H264 streams that use a lot of reference frames (eg. 15) * Initial support to enable/disable ffmpeg codecs. Codecs may be disabled in groups by --disable-ffmpeg-uncommon-codecs/--disable-ffmpeg-popular-codecs Think of "uncommon" codecs what people would never want to play with their PDAs (they will save memory by removing them). Note: currently both uncommon/popular codecs are _build_ but disabled. that is, build system still need some improvements to really save memory. warning: non-autoconf guru playing with the build system, likely breakage. CVS patchset: 8499 CVS date: 2007/01/13 21:19:52
author: Miguel Freitas <miguelfreitas@users.sourceforge.net> 2007-01-13 21:19:52 +0000
committer: Miguel Freitas <miguelfreitas@users.sourceforge.net> 2007-01-13 21:19:52 +0000
commit: 6e8ff6e5c232de4b8235626af31ab85345120a93 (patch)
tree: 25930156aa9f4f2014bf6fe3d65c183262626b8d /src
parent: 2f5905081ee2040537f043fe4afabbb66d26354e (diff)
download: xine-lib-6e8ff6e5c232de4b8235626af31ab85345120a93.tar.gz
xine-lib-6e8ff6e5c232de4b8235626af31ab85345120a93.tar.bz2
75 files changed, 9773 insertions, 873 deletions
diff --git a/src/libffmpeg/audio_decoder.c b/src/libffmpeg/audio_decoder.c
index 22f567e9c..8f0425775 100644
--- a/src/libffmpeg/audio_decoder.c
+++ b/src/libffmpeg/audio_decoder.c
@@ -17,7 +17,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
  *
- * $Id: audio_decoder.c,v 1.31 2006/12/26 03:20:12 dgp85 Exp $
+ * $Id: audio_decoder.c,v 1.32 2007/01/13 21:19:52 miguelfreitas Exp $
  *
  * xine audio decoder plugin using ffmpeg
  *
@@ -25,6 +25,7 @@
  
 #ifdef HAVE_CONFIG_H
 #include "config.h"
+#include "ffmpeg_config.h"
 #endif
 
 #include <stdlib.h>
@@ -107,8 +108,8 @@ static const ff_codec_t ff_audio_lookup[] = {
   {BUF_AUDIO_TRUESPEECH, CODEC_ID_TRUESPEECH,     "TrueSpeech (ffmpeg)"},
   {BUF_AUDIO_TTA,        CODEC_ID_TTA,            "True Audio Lossless (ffmpeg)"},
   {BUF_AUDIO_SMACKER,    CODEC_ID_SMACKAUDIO,     "Smacker (ffmpeg)"},
-  {BUF_AUDIO_FLVADPCM,   CODEC_ID_ADPCM_SWF,			"Flash ADPCM (ffmpeg)"},
-  {BUF_AUDIO_WAVPACK,	 CODEC_ID_WAVPACK,	   "WavPack (ffmpeg)"},
+  {BUF_AUDIO_FLVADPCM,   CODEC_ID_ADPCM_SWF,	  "Flash ADPCM (ffmpeg)"},
+  {BUF_AUDIO_WAVPACK,	 CODEC_ID_WAVPACK,	  "WavPack (ffmpeg)"},
 };
 
 
@@ -443,39 +444,106 @@ void *init_audio_plugin (xine_t *xine, void *data) {
 }
 
 static uint32_t supported_audio_types[] = { 
+  #ifdef CONFIG_WMAV1_DECODER
   BUF_AUDIO_WMAV1,
+  #endif
+  #ifdef CONFIG_WMAV2_DECODER
   BUF_AUDIO_WMAV2,
+  #endif
+  #ifdef CONFIG_RA_144_DECODER
   BUF_AUDIO_14_4,
+  #endif
+  #ifdef CONFIG_RA_288_DECODER
   BUF_AUDIO_28_8,
-  BUF_AUDIO_MULAW,
-  BUF_AUDIO_ALAW,
+  #endif
+  #ifdef CONFIG_MP3_DECODER
+  BUF_AUDIO_MPEG,
+  #endif
+  #ifdef CONFIG_ADPCM_MS_DECODER
   BUF_AUDIO_MSADPCM,
+  #endif
+  #ifdef CONFIG_ADPCM_IMA_QT_DECODER
   BUF_AUDIO_QTIMAADPCM,
+  #endif
+  #ifdef CONFIG_ADPCM_IMA_WAV_DECODER
   BUF_AUDIO_MSIMAADPCM,
+  #endif
+  #ifdef CONFIG_ADPCM_IMA_DK3_DECODER
   BUF_AUDIO_DK3ADPCM,
+  #endif
+  #ifdef CONFIG_ADPCM_IMA_DK4_DECODER
   BUF_AUDIO_DK4ADPCM,
+  #endif
+  #ifdef CONFIG_ADPCM_IMA_WS_DECODER
+  BUF_AUDIO_VQA_IMA,
+  #endif
+  #ifdef CONFIG_ADPCM_IMA_SMJPEG_DECODER
+  BUF_AUDIO_SMJPEG_IMA,
+  #endif
+  #ifdef CONFIG_ADPCM_XA_DECODER
   BUF_AUDIO_XA_ADPCM,
+  #endif
+  #ifdef CONFIG_ADPCM_4XM_DECODER
+  BUF_AUDIO_4X_ADPCM,
+  #endif
+  #ifdef CONFIG_ADPCM_EA_DECODER
+  BUF_AUDIO_EA_ADPCM,
+  #endif
+  #ifdef CONFIG_PCM_MULAW_DECODER
+  BUF_AUDIO_MULAW,
+  #endif
+  #ifdef CONFIG_PCM_ALAW_DECODER
+  BUF_AUDIO_ALAW,
+  #endif
+  #ifdef CONFIG_ROQ_DPCM_DECODER
   BUF_AUDIO_ROQ,
+  #endif
+  #ifdef CONFIG_INTERPLAY_DPCM_DECODER
   BUF_AUDIO_INTERPLAY,
-  BUF_AUDIO_VQA_IMA,
-  BUF_AUDIO_4X_ADPCM,
+  #endif
+  #ifdef CONFIG_MACE3_DECODER
   BUF_AUDIO_MAC3,
+  #endif
+  #ifdef CONFIG_MACE6_DECODER
   BUF_AUDIO_MAC6,
+  #endif
+  #ifdef CONFIG_XAN_DPCM_DECODER
   BUF_AUDIO_XAN_DPCM,
+  #endif
+  #ifdef CONFIG_VMDAUDIO_DECODER
   BUF_AUDIO_VMD,
-  BUF_AUDIO_EA_ADPCM,
-  BUF_AUDIO_SMJPEG_IMA,
+  #endif
+  #ifdef CONFIG_FLAC_DECODER
   BUF_AUDIO_FLAC,
-  BUF_AUDIO_ALAC,
+  #endif
+  #ifdef CONFIG_SHORTEN_DECODER
   BUF_AUDIO_SHORTEN,
-  BUF_AUDIO_MPEG,
+  #endif
+  #ifdef CONFIG_ALAC_DECODER
+  BUF_AUDIO_ALAC,
+  #endif
+  #ifdef CONFIG_QDM2_DECODER
   BUF_AUDIO_QDESIGN2,
+  #endif
+  #ifdef CONFIG_COOK_DECODER
   BUF_AUDIO_COOK,
+  #endif
+  #ifdef CONFIG_TRUESPEECH_DECODER
   BUF_AUDIO_TRUESPEECH,
+  #endif
+  #ifdef CONFIG_TTA_DECODER
   BUF_AUDIO_TTA,
+  #endif
+  #ifdef CONFIG_SMACKAUDIO_DECODER
   BUF_AUDIO_SMACKER,
+  #endif
+  #ifdef CONFIG_ADPCM_SWF_DECODER
   BUF_AUDIO_FLVADPCM,
+  #endif
+  #ifdef CONFIG_WAVPACK_DECODER
   BUF_AUDIO_WAVPACK,
+  #endif
+  
   0
 };
 
diff --git a/src/libffmpeg/diff_to_ffmpeg_cvs.txt b/src/libffmpeg/diff_to_ffmpeg_cvs.txt
index 7e19e643c..b813b3ab2 100644
--- a/src/libffmpeg/diff_to_ffmpeg_cvs.txt
+++ b/src/libffmpeg/diff_to_ffmpeg_cvs.txt
@@ -1,74 +1,79 @@
-Index: libavcodec/avcodec.h
+Index: libavutil/internal.h
 ===================================================================
---- libavcodec/avcodec.h	(revision 7221)
-+++ libavcodec/avcodec.h	(working copy)
-@@ -47,6 +47,13 @@
- #define AV_TIME_BASE            1000000
- #define AV_TIME_BASE_Q          (AVRational){1, AV_TIME_BASE}
+--- libavutil/internal.h	(revision 7433)
++++ libavutil/internal.h	(working copy)
+@@ -181,11 +181,15 @@
+ #include <assert.h>
  
-+/* FIXME: We cannot use ffmpeg's XvMC capabilities, since that would require
-+ * linking the ffmpeg plugin against XvMC libraries, which is a bad thing,
-+ * since they are output dependend.
-+ * The correct fix would be to reimplement the XvMC functions libavcodec uses
-+ * and do the necessary talking with our XvMC output plugin there. */
-+#undef HAVE_XVMC
-+
- enum CodecID {
-     CODEC_ID_NONE,
-     CODEC_ID_MPEG1VIDEO,
-@@ -2686,6 +2693,13 @@
+ /* dprintf macros */
+-#ifdef DEBUG
+-#    define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__)
+-#else
+-#    define dprintf(fmt,...)
+-#endif
++#    ifdef DEBUG
++#        ifdef __GNUC__
++#            define dprintf(fmt,args...) av_log(NULL, AV_LOG_DEBUG, fmt, ##args)
++#        else
++#            define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__)
++#        endif
++#    else
++#        define dprintf(fmt,...)
++#    endif
  
- extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
+ #define av_abort()      do { av_log(NULL, AV_LOG_ERROR, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0)
  
-+/* unused static macro */
-+#if defined(__GNUC__) && !defined(DEBUG)
-+/* since we do not compile the encoder part of ffmpeg, some static
-+ * functions will be unused; this is ok, the compiler will take care */
-+#  define static static __attribute__((__unused__))
-+#endif
-+
- #ifdef __cplusplus
- }
- #endif
-Index: libavcodec/dsputil.h
+Index: libavutil/integer.c
 ===================================================================
---- libavcodec/dsputil.h	(revision 7221)
-+++ libavcodec/dsputil.h	(working copy)
-@@ -33,6 +33,9 @@
- #include "common.h"
- #include "avcodec.h"
- 
-+#if defined(ARCH_X86) || defined(ARCH_X86_64)
-+#define HAVE_MMX 1
-+#endif
+--- libavutil/integer.c	(revision 7433)
++++ libavutil/integer.c	(working copy)
+@@ -126,8 +126,8 @@
+     AVInteger quot_temp;
+     if(!quot) quot = &quot_temp;
  
- //#define DEBUG
- /* dct code */
-Index: libavcodec/motion_est.c
+-    assert((int16_t)a[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b[AV_INTEGER_SIZE-1] >= 0);
+-    assert(av_log2(b)>=0);
++    assert((int16_t)a.v[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b.v[AV_INTEGER_SIZE-1] >= 0);
++    assert(av_log2_i(b)>=0);
+ 
+     if(i > 0)
+         b= av_shr_i(b, -i);
+Index: libavutil/common.h
 ===================================================================
---- libavcodec/motion_est.c	(revision 7221)
-+++ libavcodec/motion_est.c	(working copy)
-@@ -23,6 +23,9 @@
-  * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
-  */
+--- libavutil/common.h	(revision 7433)
++++ libavutil/common.h	(working copy)
+@@ -345,4 +345,27 @@
+ char *av_strdup(const char *s);
+ void av_freep(void *ptr);
  
-+/* motion estimation only needed for encoders */
-+#ifdef CONFIG_ENCODERS
++/* xine: inline causes trouble for debug compiling */
++#ifdef DISABLE_INLINE
++# ifdef inline
++#  undef inline
++# endif
++# ifdef always_inline
++#  undef always_inline
++# endif
++# define inline
++# define always_inline
++#endif
 +
- /**
-  * @file motion_est.c
-  * Motion estimation.
-@@ -2112,3 +2115,5 @@
-         }
-     }
- }
++/* xine: define ASMALIGN here since it's cleaner that generating it in the configure */
++#if HAVE_ASMALIGN_POT
++# define ASMALIGN(ZEROBITS) ".align " #ZEROBITS "\n\t"
++#else
++# define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t"
++#endif
++
++/* xine: another config.h with codecs to use */
++#include "ffmpeg_config.h"
++
+ #endif /* COMMON_H */
 +
-+#endif /* CONFIG_ENCODERS */
 Index: libavcodec/mjpeg.c
 ===================================================================
-diff -u -r1.38 mjpeg.c
---- libavcodec/mjpeg.c	4 Dec 2006 22:25:19 -0000	1.38
-+++ libavcodec/mjpeg.c	30 Dec 2006 22:21:34 -0000
+--- libavcodec/mjpeg.c	(revision 7433)
++++ libavcodec/mjpeg.c	(working copy)
 @@ -38,6 +38,13 @@
  #include "mpegvideo.h"
  #include "bytestream.h"
@@ -83,27 +88,61 @@ diff -u -r1.38 mjpeg.c
  /* use two quantizer tables (one for luminance and one for chrominance) */
  /* not yet working */
  #undef TWOMATRIXES
-Index: libavcodec/mpeg12.c
+Index: libavcodec/i386/dsputil_mmx.c
 ===================================================================
---- libavcodec/mpeg12.c	(revision 7221)
-+++ libavcodec/mpeg12.c	(working copy)
-@@ -36,6 +36,13 @@
- //#include <assert.h>
- 
- 
-+/* if xine's MPEG encoder is enabled, enable the encoding features in
-+ * this particular module */
-+#if defined(XINE_MPEG_ENCODER) && !defined(CONFIG_ENCODERS)
-+#define CONFIG_ENCODERS
-+#endif
+--- libavcodec/i386/dsputil_mmx.c	(revision 7433)
++++ libavcodec/i386/dsputil_mmx.c	(working copy)
+@@ -2545,33 +2545,39 @@
+                 "pmullw %%mm5, %%mm2 \n\t" // (s-dx)*dy
+                 "pmullw %%mm4, %%mm1 \n\t" // dx*(s-dy)
+ 
+-                "movd   %4,    %%mm5 \n\t"
+-                "movd   %3,    %%mm4 \n\t"
++                "movd   %3,    %%mm5 \n\t"
++                "movd   %2,    %%mm4 \n\t"
+                 "punpcklbw %%mm7, %%mm5 \n\t"
+                 "punpcklbw %%mm7, %%mm4 \n\t"
+                 "pmullw %%mm5, %%mm3 \n\t" // src[1,1] * dx*dy
+                 "pmullw %%mm4, %%mm2 \n\t" // src[0,1] * (s-dx)*dy
+ 
+-                "movd   %2,    %%mm5 \n\t"
+-                "movd   %1,    %%mm4 \n\t"
++                "movd   %1,    %%mm5 \n\t"
++                "movd   %0,    %%mm4 \n\t"
+                 "punpcklbw %%mm7, %%mm5 \n\t"
+                 "punpcklbw %%mm7, %%mm4 \n\t"
+                 "pmullw %%mm5, %%mm1 \n\t" // src[1,0] * dx*(s-dy)
+                 "pmullw %%mm4, %%mm0 \n\t" // src[0,0] * (s-dx)*(s-dy)
+-                "paddw  %5,    %%mm1 \n\t"
++                "paddw  %4,    %%mm1 \n\t"
+                 "paddw  %%mm3, %%mm2 \n\t"
+                 "paddw  %%mm1, %%mm0 \n\t"
+                 "paddw  %%mm2, %%mm0 \n\t"
+ 
+-                "psrlw    %6,    %%mm0 \n\t"
++                "psrlw    %5,    %%mm0 \n\t"
+                 "packuswb %%mm0, %%mm0 \n\t"
+-                "movd     %%mm0, %0    \n\t"
+ 
+-                : "=m"(dst[x+y*stride])
++                : 
+                 : "m"(src[0]), "m"(src[1]),
+                   "m"(src[stride]), "m"(src[stride+1]),
+                   "m"(*r4), "m"(shift2)
+             );
++            
++            asm volatile(
++                "movd     %%mm0, %0    \n\t"
 +
-+
- /* Start codes. */
- #define SEQ_END_CODE            0x000001b7
- #define SEQ_START_CODE          0x000001b3
++                : "=m"(dst[x+y*stride])
++                : 
++            );
+             src += stride;
+         }
+         src += 4-h*stride;
 Index: libavcodec/mpegvideo.c
 ===================================================================
---- libavcodec/mpegvideo.c	(revision 7221)
+--- libavcodec/mpegvideo.c	(revision 7433)
 +++ libavcodec/mpegvideo.c	(working copy)
 @@ -40,6 +40,14 @@
  //#undef NDEBUG
@@ -163,7 +202,7 @@ Index: libavcodec/mpegvideo.c
  
          if(avctx->rc_buffer_size){
              RateControlContext *rcc= &s->rc_context;
-@@ -4575,6 +4593,8 @@
+@@ -4574,6 +4592,8 @@
      case CODEC_ID_MPEG1VIDEO:
      case CODEC_ID_MPEG2VIDEO:
          mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
@@ -172,7 +211,7 @@ Index: libavcodec/mpegvideo.c
      case CODEC_ID_MPEG4:
          mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
      case CODEC_ID_MSMPEG4V2:
-@@ -4595,6 +4615,7 @@
+@@ -4594,6 +4614,7 @@
          h263_encode_mb(s, s->block, motion_x, motion_y); break;
      case CODEC_ID_MJPEG:
          mjpeg_encode_mb(s, s->block); break;
@@ -180,7 +219,7 @@ Index: libavcodec/mpegvideo.c
      default:
          assert(0);
      }
-@@ -4816,6 +4837,8 @@
+@@ -4815,6 +4836,8 @@
                 +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
  }
  
@@ -189,7 +228,7 @@ Index: libavcodec/mpegvideo.c
  static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
      MpegEncContext *s= arg;
  
-@@ -4859,6 +4882,7 @@
+@@ -4860,6 +4883,7 @@
      }
      return 0;
  }
@@ -197,7 +236,7 @@ Index: libavcodec/mpegvideo.c
  
  static int mb_var_thread(AVCodecContext *c, void *arg){
      MpegEncContext *s= arg;
-@@ -4883,6 +4907,8 @@
+@@ -4886,6 +4910,8 @@
  }
  
  static void write_slice_end(MpegEncContext *s){
@@ -206,7 +245,7 @@ Index: libavcodec/mpegvideo.c
      if(s->codec_id==CODEC_ID_MPEG4){
          if(s->partitioned_frame){
              ff_mpeg4_merge_partitions(s);
-@@ -4892,6 +4918,7 @@
+@@ -4895,6 +4921,7 @@
      }else if(s->out_format == FMT_MJPEG){
          ff_mjpeg_stuffing(&s->pb);
      }
@@ -214,7 +253,7 @@ Index: libavcodec/mpegvideo.c
  
      align_put_bits(&s->pb);
      flush_put_bits(&s->pb);
-@@ -4945,10 +4972,13 @@
+@@ -4950,10 +4977,13 @@
      case CODEC_ID_FLV1:
          s->gob_index = ff_h263_get_gob_height(s);
          break;
@@ -228,7 +267,7 @@ Index: libavcodec/mpegvideo.c
      }
  
      s->resync_mb_x=0;
-@@ -5021,9 +5051,12 @@
+@@ -5026,9 +5056,12 @@
                      if(s->start_mb_y != mb_y || mb_x!=0){
                          write_slice_end(s);
  
@@ -241,7 +280,7 @@ Index: libavcodec/mpegvideo.c
                      }
  
                      assert((put_bits_count(&s->pb)&7) == 0);
-@@ -5047,19 +5080,25 @@
+@@ -5052,19 +5085,25 @@
                      }
  
                      switch(s->codec_id){
@@ -267,18 +306,18 @@ Index: libavcodec/mpegvideo.c
                      }
  
                      if(s->flags&CODEC_FLAG_PASS1){
-@@ -5172,7 +5211,10 @@
- 
+@@ -5286,7 +5325,10 @@
+                     backup_s.dquant = 0;
                      s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
                      s->mb_intra= 0;
 +/* xine: do not need this for decode or MPEG-1 encoding modes */
 +#if 0
-                     ff_mpeg4_set_direct_mv(s, mx, my);
+                     ff_mpeg4_set_direct_mv(s, 0, 0);
 +#endif /* #if 0 */
                      encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
-                                  &dmin, &next_block, mx, my);
+                                  &dmin, &next_block, 0, 0);
                  }
-@@ -5354,7 +5396,10 @@
+@@ -5400,7 +5442,10 @@
                      s->mb_intra= 0;
                      motion_x=s->b_direct_mv_table[xy][0];
                      motion_y=s->b_direct_mv_table[xy][1];
@@ -287,9 +326,9 @@ Index: libavcodec/mpegvideo.c
                      ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
 +#endif /* #if 0 */
                      break;
-                 case CANDIDATE_MB_TYPE_BIDIR:
-                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
-@@ -5462,8 +5507,11 @@
+                 case CANDIDATE_MB_TYPE_DIRECT0:
+                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+@@ -5513,8 +5558,11 @@
      }
  
      //not beautiful here but we must write it before flushing so it has to be here
@@ -301,7 +340,7 @@ Index: libavcodec/mpegvideo.c
  
      write_slice_end(s);
  
-@@ -5531,6 +5579,8 @@
+@@ -5582,6 +5630,8 @@
      }
  
      if(s->adaptive_quant){
@@ -310,7 +349,7 @@ Index: libavcodec/mpegvideo.c
          switch(s->codec_id){
          case CODEC_ID_MPEG4:
              ff_clean_mpeg4_qscales(s);
-@@ -5541,6 +5591,7 @@
+@@ -5592,6 +5642,7 @@
              ff_clean_h263_qscales(s);
              break;
          }
@@ -318,7 +357,7 @@ Index: libavcodec/mpegvideo.c
  
          s->lambda= s->lambda_table[0];
          //FIXME broken
-@@ -5562,10 +5613,13 @@
+@@ -5613,10 +5664,13 @@
      s->me.mb_var_sum_temp    =
      s->me.mc_mb_var_sum_temp = 0;
  
@@ -332,7 +371,7 @@ Index: libavcodec/mpegvideo.c
  
      s->me.scene_change_score=0;
  
-@@ -5596,6 +5650,8 @@
+@@ -5647,6 +5701,8 @@
          ff_update_duplicate_context(s->thread_context[i], s);
      }
  
@@ -341,7 +380,7 @@ Index: libavcodec/mpegvideo.c
      ff_init_me(s);
  
      /* Estimate motion for every MB */
-@@ -5610,6 +5666,8 @@
+@@ -5661,6 +5717,8 @@
  
          s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
      }else /* if(s->pict_type == I_TYPE) */{
@@ -350,7 +389,7 @@ Index: libavcodec/mpegvideo.c
          /* I-Frame */
          for(i=0; i<s->mb_stride*s->mb_height; i++)
              s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
-@@ -5633,6 +5691,8 @@
+@@ -5684,6 +5742,8 @@
  //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
      }
  
@@ -359,7 +398,7 @@ Index: libavcodec/mpegvideo.c
      if(!s->umvplus){
          if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
              s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
-@@ -5686,6 +5746,7 @@
+@@ -5737,6 +5797,7 @@
              }
          }
      }
@@ -367,7 +406,7 @@ Index: libavcodec/mpegvideo.c
  
      if (estimate_qp(s, 0) < 0)
          return -1;
-@@ -5717,6 +5778,8 @@
+@@ -5768,6 +5829,8 @@
  
      s->last_bits= put_bits_count(&s->pb);
      switch(s->out_format) {
@@ -376,7 +415,7 @@ Index: libavcodec/mpegvideo.c
      case FMT_MJPEG:
          mjpeg_picture_header(s);
          break;
-@@ -5745,11 +5808,15 @@
+@@ -5796,11 +5859,15 @@
          else
              h263_encode_picture_header(s, picture_number);
          break;
@@ -392,11 +431,49 @@ Index: libavcodec/mpegvideo.c
      default:
          assert(0);
      }
+Index: libavcodec/mpeg12.c
+===================================================================
+--- libavcodec/mpeg12.c	(revision 7433)
++++ libavcodec/mpeg12.c	(working copy)
+@@ -36,6 +36,13 @@
+ //#include <assert.h>
+ 
+ 
++/* if xine's MPEG encoder is enabled, enable the encoding features in
++ * this particular module */
++#if defined(XINE_MPEG_ENCODER) && !defined(CONFIG_ENCODERS)
++#define CONFIG_ENCODERS
++#endif
++
++
+ /* Start codes. */
+ #define SEQ_END_CODE            0x000001b7
+ #define SEQ_START_CODE          0x000001b3
+Index: libavcodec/motion_est.c
+===================================================================
+--- libavcodec/motion_est.c	(revision 7433)
++++ libavcodec/motion_est.c	(working copy)
+@@ -23,6 +23,9 @@
+  * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
+  */
+ 
++/* motion estimation only needed for encoders */
++#ifdef CONFIG_ENCODERS
++
+ /**
+  * @file motion_est.c
+  * Motion estimation.
+@@ -2142,3 +2145,5 @@
+         }
+     }
+ }
++
++#endif /* CONFIG_ENCODERS */
 Index: libavcodec/snow.c
 ===================================================================
---- libavcodec/snow.c	(revision 7221)
+--- libavcodec/snow.c	(revision 7433)
 +++ libavcodec/snow.c	(working copy)
-@@ -1977,6 +1977,7 @@
+@@ -1982,6 +1982,7 @@
  #define P_MV1 P[9]
  #define FLAG_QPEL   1 //must be 1
  
@@ -404,15 +481,15 @@ Index: libavcodec/snow.c
  static int encode_q_branch(SnowContext *s, int level, int x, int y){
      uint8_t p_buffer[1024];
      uint8_t i_buffer[1024];
-@@ -2205,6 +2206,7 @@
+@@ -2210,6 +2211,7 @@
          return score;
      }
  }
 +#endif
  
- static always_inline int same_block(BlockNode *a, BlockNode *b){
+ static av_always_inline int same_block(BlockNode *a, BlockNode *b){
      if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
-@@ -2319,6 +2321,7 @@
+@@ -2322,6 +2324,7 @@
      }
  }
  
@@ -420,7 +497,7 @@ Index: libavcodec/snow.c
  static void encode_blocks(SnowContext *s, int search){
      int x, y;
      int w= s->b_width;
-@@ -2340,6 +2343,7 @@
+@@ -2343,6 +2346,7 @@
          }
      }
  }
@@ -428,7 +505,7 @@ Index: libavcodec/snow.c
  
  static void decode_blocks(SnowContext *s){
      int x, y;
-@@ -3910,6 +3914,7 @@
+@@ -3931,6 +3935,7 @@
      }
  }
  
@@ -436,7 +513,7 @@ Index: libavcodec/snow.c
  static int encode_init(AVCodecContext *avctx)
  {
      SnowContext *s = avctx->priv_data;
-@@ -3997,6 +4002,7 @@
+@@ -4018,6 +4023,7 @@
  
      return 0;
  }
@@ -444,7 +521,7 @@ Index: libavcodec/snow.c
  
  static int frame_start(SnowContext *s){
     AVFrame tmp;
-@@ -4035,6 +4041,7 @@
+@@ -4056,6 +4062,7 @@
      return 0;
  }
  
@@ -452,7 +529,7 @@ Index: libavcodec/snow.c
  static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
      SnowContext *s = avctx->priv_data;
      RangeCoder * const c= &s->c;
-@@ -4288,6 +4295,7 @@
+@@ -4308,6 +4315,7 @@
  
      return ff_rac_terminate(c);
  }
@@ -460,7 +537,7 @@ Index: libavcodec/snow.c
  
  static void common_end(SnowContext *s){
      int plane_index, level, orientation, i;
-@@ -4319,6 +4327,7 @@
+@@ -4339,6 +4347,7 @@
      }
  }
  
@@ -468,7 +545,7 @@ Index: libavcodec/snow.c
  static int encode_end(AVCodecContext *avctx)
  {
      SnowContext *s = avctx->priv_data;
-@@ -4328,6 +4337,7 @@
+@@ -4348,6 +4357,7 @@
  
      return 0;
  }
@@ -476,86 +553,9 @@ Index: libavcodec/snow.c
  
  static int decode_init(AVCodecContext *avctx)
  {
-Index: libavutil/common.h
-===================================================================
---- libavutil/common.h	(revision 7221)
-+++ libavutil/common.h	(working copy)
-@@ -375,7 +375,7 @@
-         );
-         return (d << 32) | (a & 0xffffffff);
- }
--#elif defined(ARCH_X86_32)
-+#elif defined(ARCH_X86)
- static inline long long read_time(void)
- {
-         long long l;
-@@ -446,4 +446,23 @@
- char *av_strdup(const char *s);
- void av_freep(void *ptr);
- 
-+/* xine: inline causes trouble for debug compiling */
-+#ifdef DISABLE_INLINE
-+# ifdef inline
-+#  undef inline
-+# endif
-+# ifdef always_inline
-+#  undef always_inline
-+# endif
-+# define inline
-+# define always_inline
-+#endif
-+
-+/* xine: define ASMALIGN here since it's cleaner that generating it in the configure */
-+#if HAVE_ASMALIGN_POT
-+# define ASMALIGN(ZEROBITS) ".align " #ZEROBITS "\n\t"
-+#else
-+# define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t"
-+#endif
-+
- #endif /* COMMON_H */
-Index: libavutil/integer.c
-===================================================================
---- libavutil/integer.c	(revision 7221)
-+++ libavutil/integer.c	(working copy)
-@@ -126,8 +126,8 @@
-     AVInteger quot_temp;
-     if(!quot) quot = &quot_temp;
- 
--    assert((int16_t)a[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b[AV_INTEGER_SIZE-1] >= 0);
--    assert(av_log2(b)>=0);
-+    assert((int16_t)a.v[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b.v[AV_INTEGER_SIZE-1] >= 0);
-+    assert(av_log2_i(b)>=0);
- 
-     if(i > 0)
-         b= av_shr_i(b, -i);
-Index: libavutil/internal.h
-===================================================================
---- libavutil/internal.h	(revision 7221)
-+++ libavutil/internal.h	(working copy)
-@@ -93,11 +93,15 @@
- #include <assert.h>
- 
- /* dprintf macros */
--#ifdef DEBUG
--#    define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__)
--#else
--#    define dprintf(fmt,...)
--#endif
-+#    ifdef DEBUG
-+#        ifdef __GNUC__
-+#            define dprintf(fmt,args...) av_log(NULL, AV_LOG_DEBUG, fmt, ##args)
-+#        else
-+#            define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__)
-+#        endif
-+#    else
-+#        define dprintf(fmt,...)
-+#    endif
- 
- #define av_abort()      do { av_log(NULL, AV_LOG_ERROR, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0)
- 
 Index: libavcodec/mlib/dsputil_mlib.c
 ===================================================================
---- libavcodec/mlib/dsputil_mlib.c	(revision 7221)
+--- libavcodec/mlib/dsputil_mlib.c	(revision 7433)
 +++ libavcodec/mlib/dsputil_mlib.c	(working copy)
 @@ -22,6 +22,8 @@
  #include "../dsputil.h"
@@ -566,3 +566,35 @@ Index: libavcodec/mlib/dsputil_mlib.c
  #include <mlib_types.h>
  #include <mlib_status.h>
  #include <mlib_sys.h>
+Index: libavcodec/avcodec.h
+===================================================================
+--- libavcodec/avcodec.h	(revision 7433)
++++ libavcodec/avcodec.h	(working copy)
+@@ -47,6 +47,13 @@
+ #define AV_TIME_BASE            1000000
+ #define AV_TIME_BASE_Q          (AVRational){1, AV_TIME_BASE}
+ 
++/* FIXME: We cannot use ffmpeg's XvMC capabilities, since that would require
++ * linking the ffmpeg plugin against XvMC libraries, which is a bad thing,
++ * since they are output dependend.
++ * The correct fix would be to reimplement the XvMC functions libavcodec uses
++ * and do the necessary talking with our XvMC output plugin there. */
++#undef HAVE_XVMC
++
+ enum CodecID {
+     CODEC_ID_NONE,
+     CODEC_ID_MPEG1VIDEO,
+@@ -2688,6 +2695,13 @@
+ 
+ extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
+ 
++/* unused static macro */
++#if defined(__GNUC__) && !defined(DEBUG)
++/* since we do not compile the encoder part of ffmpeg, some static
++ * functions will be unused; this is ok, the compiler will take care */
++#  define static static __attribute__((__unused__))
++#endif
++
+ #ifdef __cplusplus
+ }
+ #endif
diff --git a/src/libffmpeg/libavcodec/Makefile.am b/src/libffmpeg/libavcodec/Makefile.am
index cf34b0d28..cae72eeff 100644
--- a/src/libffmpeg/libavcodec/Makefile.am
+++ b/src/libffmpeg/libavcodec/Makefile.am
@@ -4,14 +4,15 @@ SUBDIRS = armv4l i386 mlib alpha ppc sparc libpostproc
 
 # some of ffmpeg's decoders are not used by xine yet
 EXTRA_DIST = motion_est_template.c \
-	adx.c cljr.c fdctref.c ffv1.c g726.c jpeg_ls.c mdec.c raw.c snow.c svq3.c wmv2.c
+	adx.c cljr.c fdctref.c ffv1.c g726.c jpeg_ls.c mdec.c raw.c svq3.c wmv2.c
 
 # we need to compile everything in debug mode, including the encoders,
 # otherwise we get unresolved symbols, because some unsatisfied function calls
 # are not optimized away with debug optimization
-AM_CFLAGS = `test "$(CFLAGS)" = "$(DEBUG_CFLAGS)" && echo -DCONFIG_ENCODERS` -fno-strict-aliasing -DCONFIG_VC1_DECODER
+#AM_CFLAGS = `test "$(CFLAGS)" = "$(DEBUG_CFLAGS)" && echo -DCONFIG_ENCODERS` -fno-strict-aliasing
+AM_CFLAGS = `test "$(CFLAGS)" = "$(DEBUG_CFLAGS)"` -fno-strict-aliasing
 AM_CPPFLAGS = $(ZLIB_CPPFLAGS) $(LIBFFMPEG_CPPFLAGS) \
-	-I$(top_srcdir)/src/libffmpeg/libavutil
+	-I$(top_srcdir)/src/libffmpeg/libavutil -I$(top_srcdir)/src/libffmpeg
 ASFLAGS =
 
 noinst_LTLIBRARIES = libavcodec.la
@@ -94,6 +95,7 @@ libavcodec_la_SOURCES = \
 	simple_idct.c \
 	smacker.c \
 	smc.c \
+	snow.c \
 	svq1.c \
 	tscc.c \
 	truemotion1.c \
@@ -110,7 +112,12 @@ libavcodec_la_SOURCES = \
 	vorbis_data.c \
 	vp3.c \
 	vp3dsp.c \
+	vp5.c \
+        vp56.c \
+        vp56data.c \
+        vp6.c \
 	vqavideo.c \
+	wavpack.c \
 	wmadec.c \
 	wnv1.c \
 	xan.c \
@@ -175,4 +182,8 @@ noinst_HEADERS = \
 	vc1acdata.h \
 	vc1data.h \
 	vp3data.h \
+	vp56.h \
+	vp56data.h \
+	vp5data.h \
+	vp6data.h \
 	wmadata.h
diff --git a/src/libffmpeg/libavcodec/armv4l/Makefile.am b/src/libffmpeg/libavcodec/armv4l/Makefile.am
index 0f3d230f6..33e0882c9 100644
--- a/src/libffmpeg/libavcodec/armv4l/Makefile.am
+++ b/src/libffmpeg/libavcodec/armv4l/Makefile.am
@@ -6,7 +6,12 @@ ASFLAGS =
 
 noinst_LTLIBRARIES = libavcodec_armv4l.la
 
-libavcodec_armv4l_src = dsputil_arm.c jrevdct_arm.S mpegvideo_arm.c simple_idct_arm.S
+libavcodec_armv4l_src = dsputil_arm.c jrevdct_arm.S mpegvideo_arm.c simple_idct_arm.S \
+                       dsputil_arm_s.S dsputil_iwmmxt.c dsputil_iwmmxt_rnd.h  \
+                       mpegvideo_armv5te.c mpegvideo_iwmmxt.c simple_idct_armv5te.S
+
+noinst_HEADERS = mathops.h
+
 libavcodec_armv4l_dummy =  libavcodec_armv4l_dummy.c
 EXTRA_DIST =  $(libavcodec_armv4l_src) $(libavcodec_armv4l_dummy)
 
diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_arm_s.S b/src/libffmpeg/libavcodec/armv4l/dsputil_arm_s.S
new file mode 100644
index 000000000..2a3ee9c50
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/dsputil_arm_s.S
@@ -0,0 +1,696 @@
+@
+@ ARMv4L optimized DSP utils
+@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
+@
+@ This file is part of FFmpeg.
+@
+@ FFmpeg is free software; you can redistribute it and/or
+@ modify it under the terms of the GNU Lesser General Public
+@ License as published by the Free Software Foundation; either
+@ version 2.1 of the License, or (at your option) any later version.
+@
+@ FFmpeg is distributed in the hope that it will be useful,
+@ but WITHOUT ANY WARRANTY; without even the implied warranty of
+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+@ Lesser General Public License for more details.
+@
+@ You should have received a copy of the GNU Lesser General Public
+@ License along with FFmpeg; if not, write to the Free Software
+@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+@
+
+.macro  ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
+        mov \Rd0, \Rn0, lsr #(\shift * 8)
+        mov \Rd1, \Rn1, lsr #(\shift * 8)
+        mov \Rd2, \Rn2, lsr #(\shift * 8)
+        mov \Rd3, \Rn3, lsr #(\shift * 8)
+        orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
+        orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
+        orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
+        orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
+.endm
+.macro  ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
+        mov \R0, \R0, lsr #(\shift * 8)
+        orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
+        mov \R1, \R1, lsr #(\shift * 8)
+        orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
+.endm
+.macro  ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
+        mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
+        mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
+        orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
+        orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
+.endm
+
+.macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
+        @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
+        @ Rmask = 0xFEFEFEFE
+        @ Rn = destroy
+        eor \Rd0, \Rn0, \Rm0
+        eor \Rd1, \Rn1, \Rm1
+        orr \Rn0, \Rn0, \Rm0
+        orr \Rn1, \Rn1, \Rm1
+        and \Rd0, \Rd0, \Rmask
+        and \Rd1, \Rd1, \Rmask
+        sub \Rd0, \Rn0, \Rd0, lsr #1
+        sub \Rd1, \Rn1, \Rd1, lsr #1
+.endm
+
+.macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
+        @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
+        @ Rmask = 0xFEFEFEFE
+        @ Rn = destroy
+        eor \Rd0, \Rn0, \Rm0
+        eor \Rd1, \Rn1, \Rm1
+        and \Rn0, \Rn0, \Rm0
+        and \Rn1, \Rn1, \Rm1
+        and \Rd0, \Rd0, \Rmask
+        and \Rd1, \Rd1, \Rmask
+        add \Rd0, \Rn0, \Rd0, lsr #1
+        add \Rd1, \Rn1, \Rd1, lsr #1
+.endm
+
+@ ----------------------------------------------------------------
+        .align 8
+        .global put_pixels16_arm
+put_pixels16_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r11, lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        bic r1, r1, #3
+        add r5, r5, r4, lsl #2
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r7}
+        add r1, r1, r2
+        stmia r0, {r4-r7}
+        pld [r1]
+        subs r3, r3, #1
+        add r0, r0, r2
+        bne 1b
+        ldmfd sp!, {r4-r11, pc}
+        .align 8
+2:
+        ldmia r1, {r4-r8}
+        add r1, r1, r2
+        ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r9-r12}
+        add r0, r0, r2
+        bne 2b
+        ldmfd sp!, {r4-r11, pc}
+        .align 8
+3:
+        ldmia r1, {r4-r8}
+        add r1, r1, r2
+        ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r9-r12}
+        add r0, r0, r2
+        bne 3b
+        ldmfd sp!, {r4-r11, pc}
+        .align 8
+4:
+        ldmia r1, {r4-r8}
+        add r1, r1, r2
+        ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r9-r12}
+        add r0, r0, r2
+        bne 4b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+5:
+        .word 1b
+        .word 2b
+        .word 3b
+        .word 4b
+
+@ ----------------------------------------------------------------
+        .align 8
+        .global put_pixels8_arm
+put_pixels8_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r5,lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        bic r1, r1, #3
+        add r5, r5, r4, lsl #2
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r5}
+        add r1, r1, r2
+        subs r3, r3, #1
+        pld [r1]
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 1b
+        ldmfd sp!, {r4-r5,pc}
+        .align 8
+2:
+        ldmia r1, {r4-r5, r12}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 2b
+        ldmfd sp!, {r4-r5,pc}
+        .align 8
+3:
+        ldmia r1, {r4-r5, r12}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 3b
+        ldmfd sp!, {r4-r5,pc}
+        .align 8
+4:
+        ldmia r1, {r4-r5, r12}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 4b
+        ldmfd sp!, {r4-r5,pc}
+        .align 8
+5:
+        .word 1b
+        .word 2b
+        .word 3b
+        .word 4b
+
+@ ----------------------------------------------------------------
+        .align 8
+        .global put_pixels8_x2_arm
+put_pixels8_x2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        ldr r12, [r5]
+        add r5, r5, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+        pld [r1]
+        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 1b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+2:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
+        pld [r1]
+        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 2b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+3:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
+        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
+        pld [r1]
+        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 3b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+4:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
+        pld [r1]
+        RND_AVG32 r8, r9, r6, r7, r5, r10, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 4b
+        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
+        .align 8
+5:
+        .word 0xFEFEFEFE
+        .word 2b
+        .word 3b
+        .word 4b
+
+        .align 8
+        .global put_no_rnd_pixels8_x2_arm
+put_no_rnd_pixels8_x2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        ldr r12, [r5]
+        add r5, r5, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+        pld [r1]
+        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 1b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+2:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
+        pld [r1]
+        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 2b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+3:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
+        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
+        pld [r1]
+        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 3b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+4:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
+        pld [r1]
+        NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 4b
+        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
+        .align 8
+5:
+        .word 0xFEFEFEFE
+        .word 2b
+        .word 3b
+        .word 4b
+
+
+@ ----------------------------------------------------------------
+        .align 8
+        .global put_pixels8_y2_arm
+put_pixels8_y2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        mov r3, r3, lsr #1
+        ldr r12, [r5]
+        add r5, r5, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r5}
+        add r1, r1, r2
+6:      ldmia r1, {r6-r7}
+        add r1, r1, r2
+        pld [r1]
+        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+        ldmia r1, {r4-r5}
+        add r1, r1, r2
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        pld [r1]
+        RND_AVG32 r8, r9, r6, r7, r4, r5, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+2:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
+        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+        subs r3, r3, #1
+        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+3:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
+        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+        subs r3, r3, #1
+        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+4:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
+        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+        subs r3, r3, #1
+        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+
+        .align 8
+5:
+        .word 0xFEFEFEFE
+        .word 2b
+        .word 3b
+        .word 4b
+
+        .align 8
+        .global put_no_rnd_pixels8_y2_arm
+put_no_rnd_pixels8_y2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        mov r3, r3, lsr #1
+        ldr r12, [r5]
+        add r5, r5, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r5}
+        add r1, r1, r2
+6:      ldmia r1, {r6-r7}
+        add r1, r1, r2
+        pld [r1]
+        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+        ldmia r1, {r4-r5}
+        add r1, r1, r2
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        pld [r1]
+        NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+2:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
+        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+        subs r3, r3, #1
+        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+3:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
+        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+        subs r3, r3, #1
+        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+4:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
+        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+        subs r3, r3, #1
+        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+5:
+        .word 0xFEFEFEFE
+        .word 2b
+        .word 3b
+        .word 4b
+
+@ ----------------------------------------------------------------
+.macro  RND_XY2_IT align, rnd
+        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
+        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
+.if \align == 0
+        ldmia r1, {r6-r8}
+.elseif \align == 3
+        ldmia r1, {r5-r7}
+.else
+        ldmia r1, {r8-r10}
+.endif
+        add r1, r1, r2
+        pld [r1]
+.if \align == 0
+        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
+.elseif \align == 1
+        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
+        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
+.elseif \align == 2
+        ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
+        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
+.elseif \align == 3
+        ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
+.endif
+        ldr r14, [r12, #0]      @ 0x03030303
+        tst r3, #1
+        and r8, r4, r14
+        and r9, r5, r14
+        and r10, r6, r14
+        and r11, r7, r14
+.if \rnd == 1
+        ldreq r14, [r12, #16]   @ 0x02020202
+.else
+        ldreq r14, [r12, #28]   @ 0x01010101
+.endif
+        add r8, r8, r10
+        add r9, r9, r11
+        addeq r8, r8, r14
+        addeq r9, r9, r14
+        ldr r14, [r12, #20]     @ 0xFCFCFCFC >> 2
+        and r4, r14, r4, lsr #2
+        and r5, r14, r5, lsr #2
+        and r6, r14, r6, lsr #2
+        and r7, r14, r7, lsr #2
+        add r10, r4, r6
+        add r11, r5, r7
+.endm
+
+.macro RND_XY2_EXPAND align, rnd
+        RND_XY2_IT \align, \rnd
+6:      stmfd sp!, {r8-r11}
+        RND_XY2_IT \align, \rnd
+        ldmfd sp!, {r4-r7}
+        add r4, r4, r8
+        add r5, r5, r9
+        add r6, r6, r10
+        add r7, r7, r11
+        ldr r14, [r12, #24]     @ 0x0F0F0F0F
+        and r4, r14, r4, lsr #2
+        and r5, r14, r5, lsr #2
+        add r4, r4, r6
+        add r5, r5, r7
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+.endm
+
+        .align 8
+        .global put_pixels8_xy2_arm
+put_pixels8_xy2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+        adrl r12, 5f
+        ands r4, r1, #3
+        add r5, r12, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        RND_XY2_EXPAND 0, 1
+
+        .align 8
+2:
+        RND_XY2_EXPAND 1, 1
+
+        .align 8
+3:
+        RND_XY2_EXPAND 2, 1
+
+        .align 8
+4:
+        RND_XY2_EXPAND 3, 1
+
+5:
+        .word 0x03030303
+        .word 2b
+        .word 3b
+        .word 4b
+        .word 0x02020202
+        .word 0xFCFCFCFC >> 2
+        .word 0x0F0F0F0F
+        .word 0x01010101
+
+        .align 8
+        .global put_no_rnd_pixels8_xy2_arm
+put_no_rnd_pixels8_xy2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+        adrl r12, 5f
+        ands r4, r1, #3
+        add r5, r12, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        RND_XY2_EXPAND 0, 0
+
+        .align 8
+2:
+        RND_XY2_EXPAND 1, 0
+
+        .align 8
+3:
+        RND_XY2_EXPAND 2, 0
+
+        .align 8
+4:
+        RND_XY2_EXPAND 3, 0
+
+5:
+        .word 0x03030303
+        .word 2b
+        .word 3b
+        .word 4b
+        .word 0x02020202
+        .word 0xFCFCFCFC >> 2
+        .word 0x0F0F0F0F
+        .word 0x01010101
diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c
new file mode 100644
index 000000000..d7401e760
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c
@@ -0,0 +1,188 @@
+/*
+ * iWMMXt optimized DSP utils
+ * Copyright (c) 2004 AGAWA Koji
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+
+#define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
+#define SET_RND(regd)  __asm__ __volatile__ ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
+#define WAVG2B "wavg2b"
+#include "dsputil_iwmmxt_rnd.h"
+#undef DEF
+#undef SET_RND
+#undef WAVG2B
+
+#define DEF(x, y) x ## _ ## y ##_iwmmxt
+#define SET_RND(regd)  __asm__ __volatile__ ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
+#define WAVG2B "wavg2br"
+#include "dsputil_iwmmxt_rnd.h"
+#undef DEF
+#undef SET_RND
+#undef WAVG2BR
+
+// need scheduling
+#define OP(AVG)                                         \
+    asm volatile (                                      \
+        /* alignment */                                 \
+        "and r12, %[pixels], #7 \n\t"                   \
+        "bic %[pixels], %[pixels], #7 \n\t"             \
+        "tmcr wcgr1, r12 \n\t"                          \
+                                                        \
+        "wldrd wr0, [%[pixels]] \n\t"                   \
+        "wldrd wr1, [%[pixels], #8] \n\t"               \
+        "add %[pixels], %[pixels], %[line_size] \n\t"   \
+        "walignr1 wr4, wr0, wr1 \n\t"                   \
+                                                        \
+        "1: \n\t"                                       \
+                                                        \
+        "wldrd wr2, [%[pixels]] \n\t"                   \
+        "wldrd wr3, [%[pixels], #8] \n\t"               \
+        "add %[pixels], %[pixels], %[line_size] \n\t"   \
+        "pld [%[pixels]] \n\t"                          \
+        "walignr1 wr5, wr2, wr3 \n\t"                   \
+        AVG " wr6, wr4, wr5 \n\t"                       \
+        "wstrd wr6, [%[block]] \n\t"                    \
+        "add %[block], %[block], %[line_size] \n\t"     \
+                                                        \
+        "wldrd wr0, [%[pixels]] \n\t"                   \
+        "wldrd wr1, [%[pixels], #8] \n\t"               \
+        "add %[pixels], %[pixels], %[line_size] \n\t"   \
+        "walignr1 wr4, wr0, wr1 \n\t"                   \
+        "pld [%[pixels]] \n\t"                          \
+        AVG " wr6, wr4, wr5 \n\t"                       \
+        "wstrd wr6, [%[block]] \n\t"                    \
+        "add %[block], %[block], %[line_size] \n\t"     \
+                                                        \
+        "subs %[h], %[h], #2 \n\t"                      \
+        "bne 1b \n\t"                                   \
+        : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h)  \
+        : [line_size]"r"(line_size) \
+        : "memory", "r12");
+void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    OP("wavg2br");
+}
+void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    OP("wavg2b");
+}
+#undef OP
+
+void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size)
+{
+    uint8_t *pixels2 = pixels + line_size;
+
+    __asm__ __volatile__ (
+        "mov            r12, #4                 \n\t"
+        "1:                                     \n\t"
+        "pld            [%[pixels], %[line_size2]]              \n\t"
+        "pld            [%[pixels2], %[line_size2]]             \n\t"
+        "wldrd          wr4, [%[pixels]]        \n\t"
+        "wldrd          wr5, [%[pixels2]]       \n\t"
+        "pld            [%[block], #32]         \n\t"
+        "wunpckelub     wr6, wr4                \n\t"
+        "wldrd          wr0, [%[block]]         \n\t"
+        "wunpckehub     wr7, wr4                \n\t"
+        "wldrd          wr1, [%[block], #8]     \n\t"
+        "wunpckelub     wr8, wr5                \n\t"
+        "wldrd          wr2, [%[block], #16]    \n\t"
+        "wunpckehub     wr9, wr5                \n\t"
+        "wldrd          wr3, [%[block], #24]    \n\t"
+        "add            %[block], %[block], #32 \n\t"
+        "waddhss        wr10, wr0, wr6          \n\t"
+        "waddhss        wr11, wr1, wr7          \n\t"
+        "waddhss        wr12, wr2, wr8          \n\t"
+        "waddhss        wr13, wr3, wr9          \n\t"
+        "wpackhus       wr14, wr10, wr11        \n\t"
+        "wpackhus       wr15, wr12, wr13        \n\t"
+        "wstrd          wr14, [%[pixels]]       \n\t"
+        "add            %[pixels], %[pixels], %[line_size2]     \n\t"
+        "subs           r12, r12, #1            \n\t"
+        "wstrd          wr15, [%[pixels2]]      \n\t"
+        "add            %[pixels2], %[pixels2], %[line_size2]   \n\t"
+        "bne            1b                      \n\t"
+        : [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2)
+        : [line_size2]"r"(line_size << 1)
+        : "cc", "memory", "r12");
+}
+
+static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    return;
+}
+
+int mm_flags; /* multimedia extension flags */
+
+int mm_support(void)
+{
+    return 0; /* TODO, implement proper detection */
+}
+
+void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
+{
+    mm_flags = mm_support();
+
+    if (avctx->dsp_mask) {
+        if (avctx->dsp_mask & FF_MM_FORCE)
+            mm_flags |= (avctx->dsp_mask & 0xffff);
+        else
+            mm_flags &= ~(avctx->dsp_mask & 0xffff);
+    }
+
+    if (!(mm_flags & MM_IWMMXT)) return;
+
+    c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
+
+    c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
+    c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt;
+    c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt;
+    c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt;
+    c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt;
+    c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt;
+    c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt;
+    c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt;
+
+    c->put_pixels_tab[1][0] = put_pixels8_iwmmxt;
+    c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt;
+    c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt;
+    c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt;
+    c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt;
+    c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt;
+    c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt;
+    c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt;
+
+    c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt;
+    c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt;
+    c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt;
+    c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt;
+    c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt;
+
+    c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt;
+    c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt;
+    c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt;
+    c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt;
+    c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
+}
diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h
new file mode 100644
index 000000000..51ba61c47
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h
@@ -0,0 +1,1114 @@
+/*
+ * iWMMXt optimized DSP utils
+ * copyright (c) 2004 AGAWA Koji
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    __asm__ __volatile__ (
+        "and r12, %[pixels], #7 \n\t"
+        "bic %[pixels], %[pixels], #7 \n\t"
+        "tmcr wcgr1, r12 \n\t"
+        "add r4, %[pixels], %[line_size] \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "1: \n\t"
+        "wldrd wr0, [%[pixels]] \n\t"
+        "subs %[h], %[h], #2 \n\t"
+        "wldrd wr1, [%[pixels], #8] \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr3, [r4] \n\t"
+        "pld [%[pixels]] \n\t"
+        "pld [%[pixels], #32] \n\t"
+        "wldrd wr4, [r4, #8] \n\t"
+        "add r4, r4, %[line_size] \n\t"
+        "walignr1 wr8, wr0, wr1 \n\t"
+        "pld [r4] \n\t"
+        "pld [r4, #32] \n\t"
+        "walignr1 wr10, wr3, wr4 \n\t"
+        "wstrd wr8, [%[block]] \n\t"
+        "add %[block], %[block], %[line_size] \n\t"
+        "wstrd wr10, [r5] \n\t"
+        "add r5, r5, %[line_size] \n\t"
+        "bne 1b \n\t"
+        : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+        :
+        : "memory", "r4", "r5", "r12");
+}
+
+void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    __asm__ __volatile__ (
+        "and r12, %[pixels], #7 \n\t"
+        "bic %[pixels], %[pixels], #7 \n\t"
+        "tmcr wcgr1, r12 \n\t"
+        "add r4, %[pixels], %[line_size] \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "1: \n\t"
+        "wldrd wr0, [%[pixels]] \n\t"
+        "subs %[h], %[h], #2 \n\t"
+        "wldrd wr1, [%[pixels], #8] \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr3, [r4] \n\t"
+        "pld [%[pixels]] \n\t"
+        "pld [%[pixels], #32] \n\t"
+        "wldrd wr4, [r4, #8] \n\t"
+        "add r4, r4, %[line_size] \n\t"
+        "walignr1 wr8, wr0, wr1 \n\t"
+        "wldrd wr0, [%[block]] \n\t"
+        "wldrd wr2, [r5] \n\t"
+        "pld [r4] \n\t"
+        "pld [r4, #32] \n\t"
+        "walignr1 wr10, wr3, wr4 \n\t"
+        WAVG2B" wr8, wr8, wr0 \n\t"
+        WAVG2B" wr10, wr10, wr2 \n\t"
+        "wstrd wr8, [%[block]] \n\t"
+        "add %[block], %[block], %[line_size] \n\t"
+        "wstrd wr10, [r5] \n\t"
+        "pld [%[block]] \n\t"
+        "pld [%[block], #32] \n\t"
+        "add r5, r5, %[line_size] \n\t"
+        "pld [r5] \n\t"
+        "pld [r5, #32] \n\t"
+        "bne 1b \n\t"
+        : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+        :
+        : "memory", "r4", "r5", "r12");
+}
+
+void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    __asm__ __volatile__ (
+        "and r12, %[pixels], #7 \n\t"
+        "bic %[pixels], %[pixels], #7 \n\t"
+        "tmcr wcgr1, r12 \n\t"
+        "add r4, %[pixels], %[line_size] \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "1: \n\t"
+        "wldrd wr0, [%[pixels]] \n\t"
+        "wldrd wr1, [%[pixels], #8] \n\t"
+        "subs %[h], %[h], #2 \n\t"
+        "wldrd wr2, [%[pixels], #16] \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr3, [r4] \n\t"
+        "pld [%[pixels]] \n\t"
+        "pld [%[pixels], #32] \n\t"
+        "walignr1 wr8, wr0, wr1 \n\t"
+        "wldrd wr4, [r4, #8] \n\t"
+        "walignr1 wr9, wr1, wr2 \n\t"
+        "wldrd wr5, [r4, #16] \n\t"
+        "add r4, r4, %[line_size] \n\t"
+        "pld [r4] \n\t"
+        "pld [r4, #32] \n\t"
+        "walignr1 wr10, wr3, wr4 \n\t"
+        "wstrd wr8, [%[block]] \n\t"
+        "walignr1 wr11, wr4, wr5 \n\t"
+        "wstrd wr9, [%[block], #8] \n\t"
+        "add %[block], %[block], %[line_size] \n\t"
+        "wstrd wr10, [r5] \n\t"
+        "wstrd wr11, [r5, #8] \n\t"
+        "add r5, r5, %[line_size] \n\t"
+        "bne 1b \n\t"
+        : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+        :
+        : "memory", "r4", "r5", "r12");
+}
+
+void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    __asm__ __volatile__ (
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r4, %[pixels], %[line_size]\n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "1:                             \n\t"
+        "wldrd wr0, [%[pixels]]         \n\t"
+        "wldrd wr1, [%[pixels], #8]     \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wldrd wr2, [%[pixels], #16]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr3, [r4]                \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr8, wr0, wr1         \n\t"
+        "wldrd wr4, [r4, #8]            \n\t"
+        "walignr1 wr9, wr1, wr2         \n\t"
+        "wldrd wr5, [r4, #16]           \n\t"
+        "add r4, r4, %[line_size]       \n\t"
+        "wldrd wr0, [%[block]]          \n\t"
+        "pld [r4]                       \n\t"
+        "wldrd wr1, [%[block], #8]      \n\t"
+        "pld [r4, #32]                  \n\t"
+        "wldrd wr2, [r5]                \n\t"
+        "walignr1 wr10, wr3, wr4        \n\t"
+        "wldrd wr3, [r5, #8]            \n\t"
+        WAVG2B" wr8, wr8, wr0           \n\t"
+        WAVG2B" wr9, wr9, wr1           \n\t"
+        WAVG2B" wr10, wr10, wr2         \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "walignr1 wr11, wr4, wr5        \n\t"
+        WAVG2B" wr11, wr11, wr3         \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size] \n\t"
+        "wstrd wr10, [r5]               \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "wstrd wr11, [r5, #8]           \n\t"
+        "add r5, r5, %[line_size]       \n\t"
+        "pld [r5]                       \n\t"
+        "pld [r5, #32]                  \n\t"
+        "bne 1b \n\t"
+        : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+        :
+        : "memory", "r4", "r5", "r12");
+}
+
+void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "add r4, %[pixels], %[line_size]\n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr13, [r4]               \n\t"
+        "pld [%[pixels]]                \n\t"
+        "wldrd wr14, [r4, #8]           \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "add r4, r4, %[line_size]       \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "pld [r4]                       \n\t"
+        "pld [r4, #32]                  \n\t"
+        "walignr1 wr2, wr13, wr14       \n\t"
+        "wmoveq wr4, wr11               \n\t"
+        "wmoveq wr6, wr14               \n\t"
+        "walignr2ne wr4, wr10, wr11     \n\t"
+        "walignr2ne wr6, wr13, wr14     \n\t"
+        WAVG2B" wr0, wr0, wr4           \n\t"
+        WAVG2B" wr2, wr2, wr6           \n\t"
+        "wstrd wr0, [%[block]]          \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wstrd wr2, [r5]                \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "add r5, r5, %[line_size]       \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "r4", "r5", "r12", "memory");
+}
+
+void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "add r4, %[pixels], %[line_size]\n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr13, [r4]               \n\t"
+        "pld [%[pixels]]                \n\t"
+        "wldrd wr14, [r4, #8]           \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wldrd wr15, [r4, #16]          \n\t"
+        "add r4, r4, %[line_size]       \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "pld [r4]                       \n\t"
+        "pld [r4, #32]                  \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+        "walignr1 wr2, wr13, wr14       \n\t"
+        "walignr1 wr3, wr14, wr15       \n\t"
+        "wmoveq wr4, wr11               \n\t"
+        "wmoveq wr5, wr12               \n\t"
+        "wmoveq wr6, wr14               \n\t"
+        "wmoveq wr7, wr15               \n\t"
+        "walignr2ne wr4, wr10, wr11     \n\t"
+        "walignr2ne wr5, wr11, wr12     \n\t"
+        "walignr2ne wr6, wr13, wr14     \n\t"
+        "walignr2ne wr7, wr14, wr15     \n\t"
+        WAVG2B" wr0, wr0, wr4           \n\t"
+        WAVG2B" wr1, wr1, wr5           \n\t"
+        "wstrd wr0, [%[block]]          \n\t"
+        WAVG2B" wr2, wr2, wr6           \n\t"
+        "wstrd wr1, [%[block], #8]      \n\t"
+        WAVG2B" wr3, wr3, wr7           \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "wstrd wr2, [r5]                \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wstrd wr3, [r5, #8]            \n\t"
+        "add r5, r5, %[line_size]       \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "add r4, %[pixels], %[line_size]\n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "pld [r5]                       \n\t"
+        "pld [r5, #32]                  \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr13, [r4]               \n\t"
+        "pld [%[pixels]]                \n\t"
+        "wldrd wr14, [r4, #8]           \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "add r4, r4, %[line_size]       \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "pld [r4]                       \n\t"
+        "pld [r4, #32]                  \n\t"
+        "walignr1 wr2, wr13, wr14       \n\t"
+        "wmoveq wr4, wr11               \n\t"
+        "wmoveq wr6, wr14               \n\t"
+        "walignr2ne wr4, wr10, wr11     \n\t"
+        "wldrd wr10, [%[block]]         \n\t"
+        "walignr2ne wr6, wr13, wr14     \n\t"
+        "wldrd wr12, [r5]               \n\t"
+        WAVG2B" wr0, wr0, wr4           \n\t"
+        WAVG2B" wr2, wr2, wr6           \n\t"
+        WAVG2B" wr0, wr0, wr10          \n\t"
+        WAVG2B" wr2, wr2, wr12          \n\t"
+        "wstrd wr0, [%[block]]          \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wstrd wr2, [r5]                \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "add r5, r5, %[line_size]       \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "pld [r5]                       \n\t"
+        "pld [r5, #32]                  \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "add r4, %[pixels], %[line_size]\n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "pld [r5]                       \n\t"
+        "pld [r5, #32]                  \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr13, [r4]               \n\t"
+        "pld [%[pixels]]                \n\t"
+        "wldrd wr14, [r4, #8]           \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wldrd wr15, [r4, #16]          \n\t"
+        "add r4, r4, %[line_size]       \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "pld [r4]                       \n\t"
+        "pld [r4, #32]                  \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+        "walignr1 wr2, wr13, wr14       \n\t"
+        "walignr1 wr3, wr14, wr15       \n\t"
+        "wmoveq wr4, wr11               \n\t"
+        "wmoveq wr5, wr12               \n\t"
+        "wmoveq wr6, wr14               \n\t"
+        "wmoveq wr7, wr15               \n\t"
+        "walignr2ne wr4, wr10, wr11     \n\t"
+        "walignr2ne wr5, wr11, wr12     \n\t"
+        "walignr2ne wr6, wr13, wr14     \n\t"
+        "walignr2ne wr7, wr14, wr15     \n\t"
+        "wldrd wr10, [%[block]]         \n\t"
+        WAVG2B" wr0, wr0, wr4           \n\t"
+        "wldrd wr11, [%[block], #8]     \n\t"
+        WAVG2B" wr1, wr1, wr5           \n\t"
+        "wldrd wr12, [r5]               \n\t"
+        WAVG2B" wr2, wr2, wr6           \n\t"
+        "wldrd wr13, [r5, #8]           \n\t"
+        WAVG2B" wr3, wr3, wr7           \n\t"
+        WAVG2B" wr0, wr0, wr10          \n\t"
+        WAVG2B" wr1, wr1, wr11          \n\t"
+        WAVG2B" wr2, wr2, wr12          \n\t"
+        WAVG2B" wr3, wr3, wr13          \n\t"
+        "wstrd wr0, [%[block]]          \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wstrd wr1, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "wstrd wr2, [r5]                \n\t"
+        "pld [%[block]]                 \n\t"
+        "wstrd wr3, [r5, #8]            \n\t"
+        "add r5, r5, %[line_size]       \n\t"
+        "pld [%[block], #32]            \n\t"
+        "pld [r5]                       \n\t"
+        "pld [r5, #32]                  \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        :"r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    __asm__ __volatile__(
+        "pld            [%[pixels]]                             \n\t"
+        "pld            [%[pixels], #32]                        \n\t"
+        "and            r12, %[pixels], #7                      \n\t"
+        "tmcr           wcgr1, r12                              \n\t"
+        "bic            %[pixels], %[pixels], #7                \n\t"
+
+        "wldrd          wr10, [%[pixels]]                       \n\t"
+        "wldrd          wr11, [%[pixels], #8]                   \n\t"
+        "pld            [%[block]]                              \n\t"
+        "add            %[pixels], %[pixels], %[line_size]      \n\t"
+        "walignr1       wr0, wr10, wr11                         \n\t"
+        "pld            [%[pixels]]                             \n\t"
+        "pld            [%[pixels], #32]                        \n\t"
+
+      "1:                                                       \n\t"
+        "wldrd          wr10, [%[pixels]]                       \n\t"
+        "wldrd          wr11, [%[pixels], #8]                   \n\t"
+        "add            %[pixels], %[pixels], %[line_size]      \n\t"
+        "pld            [%[pixels]]                             \n\t"
+        "pld            [%[pixels], #32]                        \n\t"
+        "walignr1       wr4, wr10, wr11                         \n\t"
+        "wldrd          wr10, [%[block]]                        \n\t"
+         WAVG2B"        wr8, wr0, wr4                           \n\t"
+         WAVG2B"        wr8, wr8, wr10                          \n\t"
+        "wstrd          wr8, [%[block]]                         \n\t"
+        "add            %[block], %[block], %[line_size]        \n\t"
+
+        "wldrd          wr10, [%[pixels]]                       \n\t"
+        "wldrd          wr11, [%[pixels], #8]                   \n\t"
+        "pld            [%[block]]                              \n\t"
+        "add            %[pixels], %[pixels], %[line_size]      \n\t"
+        "pld            [%[pixels]]                             \n\t"
+        "pld            [%[pixels], #32]                        \n\t"
+        "walignr1       wr0, wr10, wr11                         \n\t"
+        "wldrd          wr10, [%[block]]                        \n\t"
+         WAVG2B"        wr8, wr0, wr4                           \n\t"
+         WAVG2B"        wr8, wr8, wr10                          \n\t"
+        "wstrd          wr8, [%[block]]                         \n\t"
+        "add            %[block], %[block], %[line_size]        \n\t"
+
+        "subs           %[h], %[h], #2                          \n\t"
+        "pld            [%[block]]                              \n\t"
+        "bne            1b                                      \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "cc", "memory", "r12");
+}
+
+void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr4, wr10, wr11       \n\t"
+        "walignr1 wr5, wr11, wr12       \n\t"
+        WAVG2B" wr8, wr0, wr4           \n\t"
+        WAVG2B" wr9, wr1, wr5           \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+        WAVG2B" wr8, wr0, wr4           \n\t"
+        WAVG2B" wr9, wr1, wr5           \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        "subs %[h], %[h], #2            \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "pld [%[block]]                 \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr4, wr10, wr11       \n\t"
+        "walignr1 wr5, wr11, wr12       \n\t"
+        "wldrd wr10, [%[block]]         \n\t"
+        "wldrd wr11, [%[block], #8]     \n\t"
+        WAVG2B" wr8, wr0, wr4           \n\t"
+        WAVG2B" wr9, wr1, wr5           \n\t"
+        WAVG2B" wr8, wr8, wr10          \n\t"
+        WAVG2B" wr9, wr9, wr11          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "pld [%[block]]                 \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+        "wldrd wr10, [%[block]]         \n\t"
+        "wldrd wr11, [%[block], #8]     \n\t"
+        WAVG2B" wr8, wr0, wr4           \n\t"
+        WAVG2B" wr9, wr1, wr5           \n\t"
+        WAVG2B" wr8, wr8, wr10          \n\t"
+        WAVG2B" wr9, wr9, wr11          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        "subs %[h], %[h], #2            \n\t"
+        "pld [%[block]]                 \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "r4", "r5", "r12", "memory");
+}
+
+void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "mov r12, #2                    \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "tmcr wcgr0, r12                \n\t" /* for shift value */
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "add r12, r12, #1               \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "cmp r12, #8                    \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+
+        "1:                             \n\t"
+        // [wr0 wr1 wr2 wr3]
+        // [wr4 wr5 wr6 wr7] <= *
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr6, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr4, wr6            \n\t"
+        "wunpckehub wr5, wr6            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr4, wr4, wr8          \n\t"
+        "waddhus wr5, wr5, wr9          \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+        : [line_size]"r"(line_size)
+        : "r12", "memory");
+}
+
+void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "mov r12, #2                    \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "tmcr wcgr0, r12                \n\t" /* for shift value */
+        /* alignment */
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "tmcr wcgr2, r12                \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr3, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr2, wr3            \n\t"
+        "wunpckehub wr3, wr3            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr2, wr2, wr10         \n\t"
+        "waddhus wr3, wr3, wr11         \n\t"
+
+        "1:                             \n\t"
+        // [wr0 wr1 wr2 wr3]
+        // [wr4 wr5 wr6 wr7] <= *
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr6, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr7, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr4, wr6            \n\t"
+        "wunpckehub wr5, wr6            \n\t"
+        "wunpckelub wr6, wr7            \n\t"
+        "wunpckehub wr7, wr7            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr4, wr4, wr8          \n\t"
+        "waddhus wr5, wr5, wr9          \n\t"
+        "waddhus wr6, wr6, wr10         \n\t"
+        "waddhus wr7, wr7, wr11         \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr10, wr2, wr6         \n\t"
+        "waddhus wr11, wr3, wr7         \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "waddhus wr10, wr10, wr15       \n\t"
+        "waddhus wr11, wr11, wr15       \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wsrlhg wr10, wr10, wcgr0       \n\t"
+        "wsrlhg wr11, wr11, wcgr0       \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "wpackhus wr9, wr10, wr11       \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr3, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr2, wr3            \n\t"
+        "wunpckehub wr3, wr3            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr2, wr2, wr10         \n\t"
+        "waddhus wr3, wr3, wr11         \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr10, wr2, wr6         \n\t"
+        "waddhus wr11, wr3, wr7         \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "waddhus wr10, wr10, wr15       \n\t"
+        "waddhus wr11, wr11, wr15       \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wsrlhg wr10, wr10, wcgr0       \n\t"
+        "wsrlhg wr11, wr11, wcgr0       \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "wpackhus wr9, wr10, wr11       \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        "subs %[h], %[h], #2            \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+        : [line_size]"r"(line_size)
+        : "r12", "memory");
+}
+
+void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "pld [%[pixels]]                \n\t"
+        "mov r12, #2                    \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "tmcr wcgr0, r12                \n\t" /* for shift value */
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "add r12, r12, #1               \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "cmp r12, #8                    \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+
+        "1:                             \n\t"
+        // [wr0 wr1 wr2 wr3]
+        // [wr4 wr5 wr6 wr7] <= *
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr6, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr4, wr6            \n\t"
+        "wunpckehub wr5, wr6            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr4, wr4, wr8          \n\t"
+        "waddhus wr5, wr5, wr9          \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "wldrd wr12, [%[block]]         \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        WAVG2B" wr8, wr8, wr12          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "wldrd wr12, [%[block]]         \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        WAVG2B" wr8, wr8, wr12          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+        : [line_size]"r"(line_size)
+        : "r12", "memory");
+}
+
+void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "pld [%[pixels]]                \n\t"
+        "mov r12, #2                    \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "tmcr wcgr0, r12                \n\t" /* for shift value */
+        /* alignment */
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7           \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "tmcr wcgr2, r12                \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr3, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr2, wr3            \n\t"
+        "wunpckehub wr3, wr3            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr2, wr2, wr10         \n\t"
+        "waddhus wr3, wr3, wr11         \n\t"
+
+        "1:                             \n\t"
+        // [wr0 wr1 wr2 wr3]
+        // [wr4 wr5 wr6 wr7] <= *
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr6, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr7, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr4, wr6            \n\t"
+        "wunpckehub wr5, wr6            \n\t"
+        "wunpckelub wr6, wr7            \n\t"
+        "wunpckehub wr7, wr7            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr4, wr4, wr8          \n\t"
+        "waddhus wr5, wr5, wr9          \n\t"
+        "waddhus wr6, wr6, wr10         \n\t"
+        "waddhus wr7, wr7, wr11         \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr10, wr2, wr6         \n\t"
+        "waddhus wr11, wr3, wr7         \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "waddhus wr10, wr10, wr15       \n\t"
+        "waddhus wr11, wr11, wr15       \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wldrd wr12, [%[block]]         \n\t"
+        "wldrd wr13, [%[block], #8]     \n\t"
+        "wsrlhg wr10, wr10, wcgr0       \n\t"
+        "wsrlhg wr11, wr11, wcgr0       \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "wpackhus wr9, wr10, wr11       \n\t"
+        WAVG2B" wr8, wr8, wr12          \n\t"
+        WAVG2B" wr9, wr9, wr13          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "pld [%[block]]                 \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "pld [%[block], #32]            \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr3, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr2, wr3            \n\t"
+        "wunpckehub wr3, wr3            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr2, wr2, wr10         \n\t"
+        "waddhus wr3, wr3, wr11         \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr10, wr2, wr6         \n\t"
+        "waddhus wr11, wr3, wr7         \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "waddhus wr10, wr10, wr15       \n\t"
+        "waddhus wr11, wr11, wr15       \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wldrd wr12, [%[block]]         \n\t"
+        "wldrd wr13, [%[block], #8]     \n\t"
+        "wsrlhg wr10, wr10, wcgr0       \n\t"
+        "wsrlhg wr11, wr11, wcgr0       \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "wpackhus wr9, wr10, wr11       \n\t"
+        WAVG2B" wr8, wr8, wr12          \n\t"
+        WAVG2B" wr9, wr9, wr13          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+        : [line_size]"r"(line_size)
+        : "r12", "memory");
+}
diff --git a/src/libffmpeg/libavcodec/armv4l/mathops.h b/src/libffmpeg/libavcodec/armv4l/mathops.h
new file mode 100644
index 000000000..7ddd0ec6e
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/mathops.h
@@ -0,0 +1,49 @@
+/*
+ * simple math operations
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef FRAC_BITS
+#   define MULL(a, b) \
+        ({  int lo, hi;\
+         asm("smull %0, %1, %2, %3     \n\t"\
+             "mov   %0, %0,     lsr %4\n\t"\
+             "add   %1, %0, %1, lsl %5\n\t"\
+             : "=&r"(lo), "=&r"(hi)\
+             : "r"(b), "r"(a), "i"(FRAC_BITS), "i"(32-FRAC_BITS));\
+         hi; })
+#endif
+
+#define MULH(a, b) \
+    ({ int lo, hi;\
+     asm ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));\
+     hi; })
+
+#if defined(HAVE_ARMV5TE)
+
+/* signed 16x16 -> 32 multiply add accumulate */
+#   define MAC16(rt, ra, rb) \
+        asm ("smlabb %0, %2, %3, %0" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
+/* signed 16x16 -> 32 multiply */
+#   define MUL16(ra, rb)                                                \
+        ({ int __rt;                                                    \
+         asm ("smulbb %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb));  \
+         __rt; })
+
+#endif
diff --git a/src/libffmpeg/libavcodec/armv4l/mpegvideo_armv5te.c b/src/libffmpeg/libavcodec/armv4l/mpegvideo_armv5te.c
new file mode 100644
index 000000000..a8d09b8ce
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/mpegvideo_armv5te.c
@@ -0,0 +1,213 @@
+/*
+ * Optimization of some functions from mpegvideo.c for armv5te
+ * Copyright (c) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * Some useful links for those who may be interested in optimizing code for ARM.
+ * ARM Architecture Reference Manual: http://www.arm.com/community/academy/resources.html
+ * Instructions timings and optimization guide for ARM9E: http://www.arm.com/pdfs/DDI0222B_9EJS_r1p2.pdf
+ */
+
+#include "../dsputil.h"
+#include "../mpegvideo.h"
+#include "../avcodec.h"
+
+
+#ifdef ENABLE_ARM_TESTS
+/**
+ * h263 dequantizer supplementary function, it is performance critical and needs to
+ * have optimized implementations for each architecture. Is also used as a reference
+ * implementation in regression tests
+ */
+static inline void dct_unquantize_h263_helper_c(DCTELEM *block, int qmul, int qadd, int count)
+{
+    int i, level;
+    for (i = 0; i < count; i++) {
+        level = block[i];
+        if (level) {
+            if (level < 0) {
+                level = level * qmul - qadd;
+            } else {
+                level = level * qmul + qadd;
+            }
+            block[i] = level;
+        }
+    }
+}
+#endif
+
+/* GCC 3.1 or higher is required to support symbolic names in assembly code */
+#if (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))
+
+/**
+ * Special optimized version of dct_unquantize_h263_helper_c, it requires the block
+ * to be at least 8 bytes aligned, and may process more elements than requested.
+ * But it is guaranteed to never process more than 64 elements provided that
+ * xxcount argument is <= 64, so it is safe. This macro is optimized for a common
+ * distribution of values for nCoeffs (they are mostly multiple of 8 plus one or
+ * two extra elements). So this macro processes data as 8 elements per loop iteration
+ * and contains optional 2 elements processing in the end.
+ *
+ * Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770)
+ */
+#define dct_unquantize_h263_special_helper_armv5te(xxblock, xxqmul, xxqadd, xxcount) \
+({ DCTELEM *xblock = xxblock; \
+   int xqmul = xxqmul, xqadd = xxqadd, xcount = xxcount, xtmp; \
+   int xdata1, xdata2; \
+__asm__ __volatile__( \
+        "subs %[count], #2                 \n\t" \
+        "ble 2f                            \n\t" \
+        "ldrd r4, [%[block], #0]           \n\t" \
+        "1:                                \n\t" \
+        "ldrd r6, [%[block], #8]           \n\t" \
+\
+        "rsbs %[data1], %[zero], r4, asr #16 \n\t" \
+        "addgt %[data1], %[qadd], #0       \n\t" \
+        "rsblt %[data1], %[qadd], #0       \n\t" \
+        "smlatbne %[data1], r4, %[qmul], %[data1] \n\t" \
+\
+        "rsbs %[data2], %[zero], r5, asr #16 \n\t" \
+        "addgt %[data2], %[qadd], #0       \n\t" \
+        "rsblt %[data2], %[qadd], #0       \n\t" \
+        "smlatbne %[data2], r5, %[qmul], %[data2] \n\t" \
+\
+        "rsbs %[tmp], %[zero], r4, asl #16 \n\t" \
+        "addgt %[tmp], %[qadd], #0         \n\t" \
+        "rsblt %[tmp], %[qadd], #0         \n\t" \
+        "smlabbne r4, r4, %[qmul], %[tmp]  \n\t" \
+\
+        "rsbs %[tmp], %[zero], r5, asl #16 \n\t" \
+        "addgt %[tmp], %[qadd], #0         \n\t" \
+        "rsblt %[tmp], %[qadd], #0         \n\t" \
+        "smlabbne r5, r5, %[qmul], %[tmp]  \n\t" \
+\
+        "strh r4, [%[block]], #2           \n\t" \
+        "strh %[data1], [%[block]], #2     \n\t" \
+        "strh r5, [%[block]], #2           \n\t" \
+        "strh %[data2], [%[block]], #2     \n\t" \
+\
+        "rsbs %[data1], %[zero], r6, asr #16 \n\t" \
+        "addgt %[data1], %[qadd], #0        \n\t" \
+        "rsblt %[data1], %[qadd], #0        \n\t" \
+        "smlatbne %[data1], r6, %[qmul], %[data1] \n\t" \
+\
+        "rsbs %[data2], %[zero], r7, asr #16 \n\t" \
+        "addgt %[data2], %[qadd], #0        \n\t" \
+        "rsblt %[data2], %[qadd], #0        \n\t" \
+        "smlatbne %[data2], r7, %[qmul], %[data2] \n\t" \
+\
+        "rsbs %[tmp], %[zero], r6, asl #16  \n\t" \
+        "addgt %[tmp], %[qadd], #0          \n\t" \
+        "rsblt %[tmp], %[qadd], #0          \n\t" \
+        "smlabbne r6, r6, %[qmul], %[tmp]   \n\t" \
+\
+        "rsbs %[tmp], %[zero], r7, asl #16  \n\t" \
+        "addgt %[tmp], %[qadd], #0          \n\t" \
+        "rsblt %[tmp], %[qadd], #0          \n\t" \
+        "smlabbne r7, r7, %[qmul], %[tmp]   \n\t" \
+\
+        "strh r6, [%[block]], #2            \n\t" \
+        "strh %[data1], [%[block]], #2      \n\t" \
+        "strh r7, [%[block]], #2            \n\t" \
+        "strh %[data2], [%[block]], #2      \n\t" \
+\
+        "subs %[count], #8                  \n\t" \
+        "ldrgtd r4, [%[block], #0]          \n\t" /* load data early to avoid load/use pipeline stall */ \
+        "bgt 1b                             \n\t" \
+\
+        "adds %[count], #2                  \n\t" \
+        "ble  3f                            \n\t" \
+        "2:                                 \n\t" \
+        "ldrsh %[data1], [%[block], #0]     \n\t" \
+        "ldrsh %[data2], [%[block], #2]     \n\t" \
+        "mov  %[tmp], %[qadd]               \n\t" \
+        "cmp  %[data1], #0                  \n\t" \
+        "rsblt %[tmp], %[qadd], #0          \n\t" \
+        "smlabbne %[data1], %[data1], %[qmul], %[tmp] \n\t" \
+        "mov  %[tmp], %[qadd]               \n\t" \
+        "cmp  %[data2], #0                  \n\t" \
+        "rsblt %[tmp], %[qadd], #0          \n\t" \
+        "smlabbne %[data2], %[data2], %[qmul], %[tmp] \n\t" \
+        "strh %[data1], [%[block]], #2      \n\t" \
+        "strh %[data2], [%[block]], #2      \n\t" \
+        "3:                                 \n\t" \
+        : [block] "+&r" (xblock), [count] "+&r" (xcount), [tmp] "=&r" (xtmp), \
+          [data1] "=&r" (xdata1), [data2] "=&r" (xdata2)  \
+        : [qmul] "r" (xqmul), [qadd] "r" (xqadd), [zero] "r" (0) \
+        : "r4", "r5", "r6", "r7", "cc", "memory" \
+); \
+})
+
+static void dct_unquantize_h263_intra_armv5te(MpegEncContext *s,
+                                  DCTELEM *block, int n, int qscale)
+{
+    int i, level, qmul, qadd;
+    int nCoeffs;
+
+    assert(s->block_last_index[n]>=0);
+
+    qmul = qscale << 1;
+
+    if (!s->h263_aic) {
+        if (n < 4)
+            level = block[0] * s->y_dc_scale;
+        else
+            level = block[0] * s->c_dc_scale;
+        qadd = (qscale - 1) | 1;
+    }else{
+        qadd = 0;
+        level = block[0];
+    }
+    if(s->ac_pred)
+        nCoeffs=63;
+    else
+        nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+    dct_unquantize_h263_special_helper_armv5te(block, qmul, qadd, nCoeffs + 1);
+    block[0] = level;
+}
+
+static void dct_unquantize_h263_inter_armv5te(MpegEncContext *s,
+                                  DCTELEM *block, int n, int qscale)
+{
+    int i, level, qmul, qadd;
+    int nCoeffs;
+
+    assert(s->block_last_index[n]>=0);
+
+    qadd = (qscale - 1) | 1;
+    qmul = qscale << 1;
+
+    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+    dct_unquantize_h263_special_helper_armv5te(block, qmul, qadd, nCoeffs + 1);
+}
+
+#define HAVE_DCT_UNQUANTIZE_H263_ARMV5TE_OPTIMIZED
+
+#endif
+
+void MPV_common_init_armv5te(MpegEncContext *s)
+{
+#ifdef HAVE_DCT_UNQUANTIZE_H263_ARMV5TE_OPTIMIZED
+    s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_armv5te;
+    s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_armv5te;
+#endif
+}
diff --git a/src/libffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c b/src/libffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c
new file mode 100644
index 000000000..1336ac5f8
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c
@@ -0,0 +1,119 @@
+/*
+ * copyright (c) 2004 AGAWA Koji
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+#include "../mpegvideo.h"
+#include "../avcodec.h"
+
+static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s,
+                                             DCTELEM *block, int n, int qscale)
+{
+    int level, qmul, qadd;
+    int nCoeffs;
+    DCTELEM *block_orig = block;
+
+    assert(s->block_last_index[n]>=0);
+
+    qmul = qscale << 1;
+
+    if (!s->h263_aic) {
+        if (n < 4)
+            level = block[0] * s->y_dc_scale;
+        else
+            level = block[0] * s->c_dc_scale;
+        qadd = (qscale - 1) | 1;
+    }else{
+        qadd = 0;
+        level = block[0];
+    }
+    if(s->ac_pred)
+        nCoeffs=63;
+    else
+        nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+    __asm__ __volatile__ (
+/*      "movd %1, %%mm6                 \n\t" //qmul */
+/*      "packssdw %%mm6, %%mm6          \n\t" */
+/*      "packssdw %%mm6, %%mm6          \n\t" */
+        "tbcsth wr6, %[qmul]            \n\t"
+/*      "movd %2, %%mm5                 \n\t" //qadd */
+/*      "packssdw %%mm5, %%mm5          \n\t" */
+/*      "packssdw %%mm5, %%mm5          \n\t" */
+        "tbcsth wr5, %[qadd]            \n\t"
+        "wzero wr7                      \n\t" /* "pxor %%mm7, %%mm7             \n\t" */
+        "wzero wr4                      \n\t" /* "pxor %%mm4, %%mm4             \n\t" */
+        "wsubh wr7, wr5, wr7            \n\t" /* "psubw %%mm5, %%mm7            \n\t" */
+        "1:                             \n\t"
+        "wldrd wr2, [%[block]]          \n\t" /* "movq (%0, %3), %%mm0          \n\t" */
+        "wldrd wr3, [%[block], #8]      \n\t" /* "movq 8(%0, %3), %%mm1         \n\t" */
+        "wmulsl wr0, wr6, wr2           \n\t" /* "pmullw %%mm6, %%mm0           \n\t" */
+        "wmulsl wr1, wr6, wr3           \n\t" /* "pmullw %%mm6, %%mm1           \n\t" */
+/*      "movq (%0, %3), %%mm2           \n\t" */
+/*      "movq 8(%0, %3), %%mm3          \n\t" */
+        "wcmpgtsh wr2, wr4, wr2         \n\t" /* "pcmpgtw %%mm4, %%mm2          \n\t" // block[i] < 0 ? -1 : 0 */
+        "wcmpgtsh wr3, wr4, wr2         \n\t" /* "pcmpgtw %%mm4, %%mm3          \n\t" // block[i] < 0 ? -1 : 0 */
+        "wxor wr0, wr2, wr0             \n\t" /* "pxor %%mm2, %%mm0             \n\t" */
+        "wxor wr1, wr3, wr1             \n\t" /* "pxor %%mm3, %%mm1             \n\t" */
+        "waddh wr0, wr7, wr0            \n\t" /* "paddw %%mm7, %%mm0            \n\t" */
+        "waddh wr1, wr7, wr1            \n\t" /* "paddw %%mm7, %%mm1            \n\t" */
+        "wxor wr2, wr0, wr2             \n\t" /* "pxor %%mm0, %%mm2             \n\t" */
+        "wxor wr3, wr1, wr3             \n\t" /* "pxor %%mm1, %%mm3             \n\t" */
+        "wcmpeqh wr0, wr7, wr0          \n\t" /* "pcmpeqw %%mm7, %%mm0          \n\t" // block[i] == 0 ? -1 : 0 */
+        "wcmpeqh wr1, wr7, wr1          \n\t" /* "pcmpeqw %%mm7, %%mm1          \n\t" // block[i] == 0 ? -1 : 0 */
+        "wandn wr0, wr2, wr0            \n\t" /* "pandn %%mm2, %%mm0            \n\t" */
+        "wandn wr1, wr3, wr1            \n\t" /* "pandn %%mm3, %%mm1            \n\t" */
+        "wstrd wr0, [%[block]]          \n\t" /* "movq %%mm0, (%0, %3)          \n\t" */
+        "wstrd wr1, [%[block], #8]      \n\t" /* "movq %%mm1, 8(%0, %3)         \n\t" */
+        "add %[block], %[block], #16    \n\t" /* "addl $16, %3                  \n\t" */
+        "subs %[i], %[i], #1            \n\t"
+        "bne 1b                         \n\t" /* "jng 1b                                \n\t" */
+        :[block]"+r"(block)
+        :[i]"r"((nCoeffs + 8) / 8), [qmul]"r"(qmul), [qadd]"r"(qadd)
+        :"memory");
+
+    block_orig[0] = level;
+}
+
+#if 0
+static void dct_unquantize_h263_inter_iwmmxt(MpegEncContext *s,
+                                             DCTELEM *block, int n, int qscale)
+{
+    int nCoeffs;
+
+    assert(s->block_last_index[n]>=0);
+
+    if(s->ac_pred)
+        nCoeffs=63;
+    else
+        nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+    ippiQuantInvInter_Compact_H263_16s_I(block, nCoeffs+1, qscale);
+}
+#endif
+
+void MPV_common_init_iwmmxt(MpegEncContext *s)
+{
+    if (!(mm_flags & MM_IWMMXT)) return;
+
+    s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_iwmmxt;
+#if 0
+    s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_iwmmxt;
+#endif
+}
diff --git a/src/libffmpeg/libavcodec/armv4l/simple_idct_armv5te.S b/src/libffmpeg/libavcodec/armv4l/simple_idct_armv5te.S
new file mode 100644
index 000000000..28bee0643
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/simple_idct_armv5te.S
@@ -0,0 +1,718 @@
+/*
+ * Simple IDCT
+ *
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2006 Mans Rullgard <mru@inprovide.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define W1  22725   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W2  21407   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W3  19266   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W4  16383   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W5  12873   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W6  8867    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W7  4520    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define ROW_SHIFT 11
+#define COL_SHIFT 20
+
+#define W13 (W1 | (W3 << 16))
+#define W26 (W2 | (W6 << 16))
+#define W57 (W5 | (W7 << 16))
+
+        .text
+        .align
+w13:    .long W13
+w26:    .long W26
+w57:    .long W57
+
+        .align
+        .func idct_row_armv5te
+idct_row_armv5te:
+        str    lr, [sp, #-4]!
+
+        ldrd   v1, [a1, #8]
+        ldrd   a3, [a1]              /* a3 = row[1:0], a4 = row[3:2] */
+        orrs   v1, v1, v2
+        cmpeq  v1, a4
+        cmpeq  v1, a3, lsr #16
+        beq    row_dc_only
+
+        mov    v1, #(1<<(ROW_SHIFT-1))
+        mov    ip, #16384
+        sub    ip, ip, #1            /* ip = W4 */
+        smlabb v1, ip, a3, v1        /* v1 = W4*row[0]+(1<<(RS-1)) */
+        ldr    ip, [pc, #(w26-.-8)]  /* ip = W2 | (W6 << 16) */
+        smultb a2, ip, a4
+        smulbb lr, ip, a4
+        add    v2, v1, a2
+        sub    v3, v1, a2
+        sub    v4, v1, lr
+        add    v1, v1, lr
+
+        ldr    ip, [pc, #(w13-.-8)]  /* ip = W1 | (W3 << 16) */
+        ldr    lr, [pc, #(w57-.-8)]  /* lr = W5 | (W7 << 16) */
+        smulbt v5, ip, a3
+        smultt v6, lr, a4
+        smlatt v5, ip, a4, v5
+        smultt a2, ip, a3
+        smulbt v7, lr, a3
+        sub    v6, v6, a2
+        smulbt a2, ip, a4
+        smultt fp, lr, a3
+        sub    v7, v7, a2
+        smulbt a2, lr, a4
+        ldrd   a3, [a1, #8]          /* a3=row[5:4] a4=row[7:6] */
+        sub    fp, fp, a2
+
+        orrs   a2, a3, a4
+        beq    1f
+
+        smlabt v5, lr, a3, v5
+        smlabt v6, ip, a3, v6
+        smlatt v5, lr, a4, v5
+        smlabt v6, lr, a4, v6
+        smlatt v7, lr, a3, v7
+        smlatt fp, ip, a3, fp
+        smulbt a2, ip, a4
+        smlatt v7, ip, a4, v7
+        sub    fp, fp, a2
+
+        ldr    ip, [pc, #(w26-.-8)]  /* ip = W2 | (W6 << 16) */
+        mov    a2, #16384
+        sub    a2, a2, #1            /* a2 =  W4 */
+        smulbb a2, a2, a3            /* a2 =  W4*row[4] */
+        smultb lr, ip, a4            /* lr =  W6*row[6] */
+        add    v1, v1, a2            /* v1 += W4*row[4] */
+        add    v1, v1, lr            /* v1 += W6*row[6] */
+        add    v4, v4, a2            /* v4 += W4*row[4] */
+        sub    v4, v4, lr            /* v4 -= W6*row[6] */
+        smulbb lr, ip, a4            /* lr =  W2*row[6] */
+        sub    v2, v2, a2            /* v2 -= W4*row[4] */
+        sub    v2, v2, lr            /* v2 -= W2*row[6] */
+        sub    v3, v3, a2            /* v3 -= W4*row[4] */
+        add    v3, v3, lr            /* v3 += W2*row[6] */
+
+1:      add    a2, v1, v5
+        mov    a3, a2, lsr #11
+        bic    a3, a3, #0x1f0000
+        sub    a2, v2, v6
+        mov    a2, a2, lsr #11
+        add    a3, a3, a2, lsl #16
+        add    a2, v3, v7
+        mov    a4, a2, lsr #11
+        bic    a4, a4, #0x1f0000
+        add    a2, v4, fp
+        mov    a2, a2, lsr #11
+        add    a4, a4, a2, lsl #16
+        strd   a3, [a1]
+
+        sub    a2, v4, fp
+        mov    a3, a2, lsr #11
+        bic    a3, a3, #0x1f0000
+        sub    a2, v3, v7
+        mov    a2, a2, lsr #11
+        add    a3, a3, a2, lsl #16
+        add    a2, v2, v6
+        mov    a4, a2, lsr #11
+        bic    a4, a4, #0x1f0000
+        sub    a2, v1, v5
+        mov    a2, a2, lsr #11
+        add    a4, a4, a2, lsl #16
+        strd   a3, [a1, #8]
+
+        ldr    pc, [sp], #4
+
+row_dc_only:
+        orr    a3, a3, a3, lsl #16
+        bic    a3, a3, #0xe000
+        mov    a3, a3, lsl #3
+        mov    a4, a3
+        strd   a3, [a1]
+        strd   a3, [a1, #8]
+
+        ldr    pc, [sp], #4
+        .endfunc
+
+        .macro idct_col
+        ldr    a4, [a1]              /* a4 = col[1:0] */
+        mov    ip, #16384
+        sub    ip, ip, #1            /* ip = W4 */
+#if 0
+        mov    v1, #(1<<(COL_SHIFT-1))
+        smlabt v2, ip, a4, v1        /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */
+        smlabb v1, ip, a4, v1        /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */
+        ldr    a4, [a1, #(16*4)]
+#else
+        mov    v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */
+        add    v2, v1, a4, asr #16
+        rsb    v2, v2, v2, lsl #14
+        mov    a4, a4, lsl #16
+        add    v1, v1, a4, asr #16
+        ldr    a4, [a1, #(16*4)]
+        rsb    v1, v1, v1, lsl #14
+#endif
+
+        smulbb lr, ip, a4
+        smulbt a3, ip, a4
+        sub    v3, v1, lr
+        sub    v5, v1, lr
+        add    v7, v1, lr
+        add    v1, v1, lr
+        sub    v4, v2, a3
+        sub    v6, v2, a3
+        add    fp, v2, a3
+        ldr    ip, [pc, #(w26-.-8)]
+        ldr    a4, [a1, #(16*2)]
+        add    v2, v2, a3
+
+        smulbb lr, ip, a4
+        smultb a3, ip, a4
+        add    v1, v1, lr
+        sub    v7, v7, lr
+        add    v3, v3, a3
+        sub    v5, v5, a3
+        smulbt lr, ip, a4
+        smultt a3, ip, a4
+        add    v2, v2, lr
+        sub    fp, fp, lr
+        add    v4, v4, a3
+        ldr    a4, [a1, #(16*6)]
+        sub    v6, v6, a3
+
+        smultb lr, ip, a4
+        smulbb a3, ip, a4
+        add    v1, v1, lr
+        sub    v7, v7, lr
+        sub    v3, v3, a3
+        add    v5, v5, a3
+        smultt lr, ip, a4
+        smulbt a3, ip, a4
+        add    v2, v2, lr
+        sub    fp, fp, lr
+        sub    v4, v4, a3
+        add    v6, v6, a3
+
+        stmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
+
+        ldr    ip, [pc, #(w13-.-8)]
+        ldr    a4, [a1, #(16*1)]
+        ldr    lr, [pc, #(w57-.-8)]
+        smulbb v1, ip, a4
+        smultb v3, ip, a4
+        smulbb v5, lr, a4
+        smultb v7, lr, a4
+        smulbt v2, ip, a4
+        smultt v4, ip, a4
+        smulbt v6, lr, a4
+        smultt fp, lr, a4
+        rsb    v4, v4, #0
+        ldr    a4, [a1, #(16*3)]
+        rsb    v3, v3, #0
+
+        smlatb v1, ip, a4, v1
+        smlatb v3, lr, a4, v3
+        smulbb a3, ip, a4
+        smulbb a2, lr, a4
+        sub    v5, v5, a3
+        sub    v7, v7, a2
+        smlatt v2, ip, a4, v2
+        smlatt v4, lr, a4, v4
+        smulbt a3, ip, a4
+        smulbt a2, lr, a4
+        sub    v6, v6, a3
+        ldr    a4, [a1, #(16*5)]
+        sub    fp, fp, a2
+
+        smlabb v1, lr, a4, v1
+        smlabb v3, ip, a4, v3
+        smlatb v5, lr, a4, v5
+        smlatb v7, ip, a4, v7
+        smlabt v2, lr, a4, v2
+        smlabt v4, ip, a4, v4
+        smlatt v6, lr, a4, v6
+        ldr    a3, [a1, #(16*7)]
+        smlatt fp, ip, a4, fp
+
+        smlatb v1, lr, a3, v1
+        smlabb v3, lr, a3, v3
+        smlatb v5, ip, a3, v5
+        smulbb a4, ip, a3
+        smlatt v2, lr, a3, v2
+        sub    v7, v7, a4
+        smlabt v4, lr, a3, v4
+        smulbt a4, ip, a3
+        smlatt v6, ip, a3, v6
+        sub    fp, fp, a4
+        .endm
+
+        .align
+        .func idct_col_armv5te
+idct_col_armv5te:
+        str    lr, [sp, #-4]!
+
+        idct_col
+
+        ldmfd  sp!, {a3, a4}
+        adds   a2, a3, v1
+        mov    a2, a2, lsr #20
+        orrmi  a2, a2, #0xf000
+        add    ip, a4, v2
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1]
+        subs   a3, a3, v1
+        mov    a2, a3, lsr #20
+        orrmi  a2, a2, #0xf000
+        sub    a4, a4, v2
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        ldmfd  sp!, {a3, a4}
+        str    a2, [a1, #(16*7)]
+
+        subs   a2, a3, v3
+        mov    a2, a2, lsr #20
+        orrmi  a2, a2, #0xf000
+        sub    ip, a4, v4
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1, #(16*1)]
+        adds   a3, a3, v3
+        mov    a2, a3, lsr #20
+        orrmi  a2, a2, #0xf000
+        add    a4, a4, v4
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        ldmfd  sp!, {a3, a4}
+        str    a2, [a1, #(16*6)]
+
+        adds   a2, a3, v5
+        mov    a2, a2, lsr #20
+        orrmi  a2, a2, #0xf000
+        add    ip, a4, v6
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1, #(16*2)]
+        subs   a3, a3, v5
+        mov    a2, a3, lsr #20
+        orrmi  a2, a2, #0xf000
+        sub    a4, a4, v6
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        ldmfd  sp!, {a3, a4}
+        str    a2, [a1, #(16*5)]
+
+        adds   a2, a3, v7
+        mov    a2, a2, lsr #20
+        orrmi  a2, a2, #0xf000
+        add    ip, a4, fp
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1, #(16*3)]
+        subs   a3, a3, v7
+        mov    a2, a3, lsr #20
+        orrmi  a2, a2, #0xf000
+        sub    a4, a4, fp
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        str    a2, [a1, #(16*4)]
+
+        ldr    pc, [sp], #4
+        .endfunc
+
+        .align
+        .func idct_col_put_armv5te
+idct_col_put_armv5te:
+        str    lr, [sp, #-4]!
+
+        idct_col
+
+        ldmfd  sp!, {a3, a4}
+        ldr    lr, [sp, #32]
+        add    a2, a3, v1
+        movs   a2, a2, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    ip, a4, v2
+        movs   ip, ip, asr #20
+        movmi  ip, #0
+        cmp    ip, #255
+        movgt  ip, #255
+        orr    a2, a2, ip, lsl #8
+        sub    a3, a3, v1
+        movs   a3, a3, asr #20
+        movmi  a3, #0
+        cmp    a3, #255
+        movgt  a3, #255
+        sub    a4, a4, v2
+        movs   a4, a4, asr #20
+        movmi  a4, #0
+        cmp    a4, #255
+        ldr    v1, [sp, #28]
+        movgt  a4, #255
+        strh   a2, [v1]
+        add    a2, v1, #2
+        str    a2, [sp, #28]
+        orr    a2, a3, a4, lsl #8
+        rsb    v2, lr, lr, lsl #3
+        ldmfd  sp!, {a3, a4}
+        strh   a2, [v2, v1]!
+
+        sub    a2, a3, v3
+        movs   a2, a2, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        sub    ip, a4, v4
+        movs   ip, ip, asr #20
+        movmi  ip, #0
+        cmp    ip, #255
+        movgt  ip, #255
+        orr    a2, a2, ip, lsl #8
+        strh   a2, [v1, lr]!
+        add    a3, a3, v3
+        movs   a2, a3, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    a4, a4, v4
+        movs   a4, a4, asr #20
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a2, a4, lsl #8
+        ldmfd  sp!, {a3, a4}
+        strh   a2, [v2, -lr]!
+
+        add    a2, a3, v5
+        movs   a2, a2, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    ip, a4, v6
+        movs   ip, ip, asr #20
+        movmi  ip, #0
+        cmp    ip, #255
+        movgt  ip, #255
+        orr    a2, a2, ip, lsl #8
+        strh   a2, [v1, lr]!
+        sub    a3, a3, v5
+        movs   a2, a3, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        sub    a4, a4, v6
+        movs   a4, a4, asr #20
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a2, a4, lsl #8
+        ldmfd  sp!, {a3, a4}
+        strh   a2, [v2, -lr]!
+
+        add    a2, a3, v7
+        movs   a2, a2, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    ip, a4, fp
+        movs   ip, ip, asr #20
+        movmi  ip, #0
+        cmp    ip, #255
+        movgt  ip, #255
+        orr    a2, a2, ip, lsl #8
+        strh   a2, [v1, lr]
+        sub    a3, a3, v7
+        movs   a2, a3, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        sub    a4, a4, fp
+        movs   a4, a4, asr #20
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a2, a4, lsl #8
+        strh   a2, [v2, -lr]
+
+        ldr    pc, [sp], #4
+        .endfunc
+
+        .align
+        .func idct_col_add_armv5te
+idct_col_add_armv5te:
+        str    lr, [sp, #-4]!
+
+        idct_col
+
+        ldr    lr, [sp, #36]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh   ip, [lr]
+        add    a2, a3, v1
+        mov    a2, a2, asr #20
+        sub    a3, a3, v1
+        and    v1, ip, #255
+        adds   a2, a2, v1
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    v1, a4, v2
+        mov    v1, v1, asr #20
+        adds   v1, v1, ip, lsr #8
+        movmi  v1, #0
+        cmp    v1, #255
+        movgt  v1, #255
+        orr    a2, a2, v1, lsl #8
+        ldr    v1, [sp, #32]
+        sub    a4, a4, v2
+        rsb    v2, v1, v1, lsl #3
+        ldrh   ip, [v2, lr]!
+        strh   a2, [lr]
+        mov    a3, a3, asr #20
+        and    a2, ip, #255
+        adds   a3, a3, a2
+        movmi  a3, #0
+        cmp    a3, #255
+        movgt  a3, #255
+        mov    a4, a4, asr #20
+        adds   a4, a4, ip, lsr #8
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        add    a2, lr, #2
+        str    a2, [sp, #28]
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh   ip, [lr, v1]!
+        sub    a2, a3, v3
+        mov    a2, a2, asr #20
+        add    a3, a3, v3
+        and    v3, ip, #255
+        adds   a2, a2, v3
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        sub    v3, a4, v4
+        mov    v3, v3, asr #20
+        adds   v3, v3, ip, lsr #8
+        movmi  v3, #0
+        cmp    v3, #255
+        movgt  v3, #255
+        orr    a2, a2, v3, lsl #8
+        add    a4, a4, v4
+        ldrh   ip, [v2, -v1]!
+        strh   a2, [lr]
+        mov    a3, a3, asr #20
+        and    a2, ip, #255
+        adds   a3, a3, a2
+        movmi  a3, #0
+        cmp    a3, #255
+        movgt  a3, #255
+        mov    a4, a4, asr #20
+        adds   a4, a4, ip, lsr #8
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh   ip, [lr, v1]!
+        add    a2, a3, v5
+        mov    a2, a2, asr #20
+        sub    a3, a3, v5
+        and    v3, ip, #255
+        adds   a2, a2, v3
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    v3, a4, v6
+        mov    v3, v3, asr #20
+        adds   v3, v3, ip, lsr #8
+        movmi  v3, #0
+        cmp    v3, #255
+        movgt  v3, #255
+        orr    a2, a2, v3, lsl #8
+        sub    a4, a4, v6
+        ldrh   ip, [v2, -v1]!
+        strh   a2, [lr]
+        mov    a3, a3, asr #20
+        and    a2, ip, #255
+        adds   a3, a3, a2
+        movmi  a3, #0
+        cmp    a3, #255
+        movgt  a3, #255
+        mov    a4, a4, asr #20
+        adds   a4, a4, ip, lsr #8
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh   ip, [lr, v1]!
+        add    a2, a3, v7
+        mov    a2, a2, asr #20
+        sub    a3, a3, v7
+        and    v3, ip, #255
+        adds   a2, a2, v3
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    v3, a4, fp
+        mov    v3, v3, asr #20
+        adds   v3, v3, ip, lsr #8
+        movmi  v3, #0
+        cmp    v3, #255
+        movgt  v3, #255
+        orr    a2, a2, v3, lsl #8
+        sub    a4, a4, fp
+        ldrh   ip, [v2, -v1]!
+        strh   a2, [lr]
+        mov    a3, a3, asr #20
+        and    a2, ip, #255
+        adds   a3, a3, a2
+        movmi  a3, #0
+        cmp    a3, #255
+        movgt  a3, #255
+        mov    a4, a4, asr #20
+        adds   a4, a4, ip, lsr #8
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldr    pc, [sp], #4
+        .endfunc
+
+        .align
+        .global simple_idct_armv5te
+        .func simple_idct_armv5te
+simple_idct_armv5te:
+        stmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
+
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+
+        sub    a1, a1, #(16*7)
+
+        bl     idct_col_armv5te
+        add    a1, a1, #4
+        bl     idct_col_armv5te
+        add    a1, a1, #4
+        bl     idct_col_armv5te
+        add    a1, a1, #4
+        bl     idct_col_armv5te
+
+        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
+        .endfunc
+
+        .align
+        .global simple_idct_add_armv5te
+        .func simple_idct_add_armv5te
+simple_idct_add_armv5te:
+        stmfd  sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
+
+        mov    a1, a3
+
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+
+        sub    a1, a1, #(16*7)
+
+        bl     idct_col_add_armv5te
+        add    a1, a1, #4
+        bl     idct_col_add_armv5te
+        add    a1, a1, #4
+        bl     idct_col_add_armv5te
+        add    a1, a1, #4
+        bl     idct_col_add_armv5te
+
+        add    sp, sp, #8
+        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
+        .endfunc
+
+        .align
+        .global simple_idct_put_armv5te
+        .func simple_idct_put_armv5te
+simple_idct_put_armv5te:
+        stmfd  sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
+
+        mov    a1, a3
+
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+
+        sub    a1, a1, #(16*7)
+
+        bl     idct_col_put_armv5te
+        add    a1, a1, #4
+        bl     idct_col_put_armv5te
+        add    a1, a1, #4
+        bl     idct_col_put_armv5te
+        add    a1, a1, #4
+        bl     idct_col_put_armv5te
+
+        add    sp, sp, #8
+        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
+        .endfunc
diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h
index d8090ed32..7d7678455 100644
--- a/src/libffmpeg/libavcodec/avcodec.h
+++ b/src/libffmpeg/libavcodec/avcodec.h
@@ -37,13 +37,13 @@ extern "C" {
 #define AV_STRINGIFY(s)         AV_TOSTRING(s)
 #define AV_TOSTRING(s) #s
 
-#define LIBAVCODEC_VERSION_INT  ((51<<16)+(25<<8)+0)
-#define LIBAVCODEC_VERSION      51.25.0
+#define LIBAVCODEC_VERSION_INT  ((51<<16)+(28<<8)+0)
+#define LIBAVCODEC_VERSION      51.28.0
 #define LIBAVCODEC_BUILD        LIBAVCODEC_VERSION_INT
 
 #define LIBAVCODEC_IDENT        "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
 
-#define AV_NOPTS_VALUE          int64_t_C(0x8000000000000000)
+#define AV_NOPTS_VALUE          INT64_C(0x8000000000000000)
 #define AV_TIME_BASE            1000000
 #define AV_TIME_BASE_Q          (AVRational){1, AV_TIME_BASE}
 
@@ -156,6 +156,7 @@ enum CodecID {
     CODEC_ID_TIERTEXSEQVIDEO,
     CODEC_ID_TIFF,
     CODEC_ID_GIF,
+    CODEC_ID_FFH264,
 
     /* various pcm "codecs" */
     CODEC_ID_PCM_S16LE= 0x10000,
@@ -243,6 +244,7 @@ enum CodecID {
     CODEC_ID_WAVPACK,
     CODEC_ID_DSICINAUDIO,
     CODEC_ID_IMC,
+    CODEC_ID_MUSEPACK7,
 
     /* subtitle codecs */
     CODEC_ID_DVD_SUBTITLE= 0x17000,
@@ -372,7 +374,7 @@ typedef struct RcOverride{
 #define CODEC_FLAG2_LOCAL_HEADER  0x00000008 ///< place global headers at every keyframe instead of in extradata
 #define CODEC_FLAG2_BPYRAMID      0x00000010 ///< H.264 allow b-frames to be used as references
 #define CODEC_FLAG2_WPRED         0x00000020 ///< H.264 weighted biprediction for b-frames
-#define CODEC_FLAG2_MIXED_REFS    0x00000040 ///< H.264 multiple references per partition
+#define CODEC_FLAG2_MIXED_REFS    0x00000040 ///< H.264 one reference per partition, as opposed to one reference per macroblock
 #define CODEC_FLAG2_8X8DCT        0x00000080 ///< H.264 high profile 8x8 transform
 #define CODEC_FLAG2_FASTPSKIP     0x00000100 ///< H.264 fast pskip
 #define CODEC_FLAG2_AUD           0x00000200 ///< H.264 access unit delimiters
@@ -380,6 +382,7 @@ typedef struct RcOverride{
 #define CODEC_FLAG2_INTRA_VLC     0x00000800 ///< use MPEG-2 intra VLC table
 #define CODEC_FLAG2_MEMC_ONLY     0x00001000 ///< only do ME/MC (I frames -> ref, P frame -> ME+MC)
 #define CODEC_FLAG2_DROP_FRAME_TIMECODE 0x00002000 ///< timecode is in drop frame format
+#define CODEC_FLAG2_SKIP_RD       0x00004000 ///< RD optimal MB level residual skiping
 
 /* Unsupported options :
  *              Syntax Arithmetic coding (SAC)
@@ -2090,9 +2093,6 @@ typedef struct AVCodec {
     int (*decode)(AVCodecContext *, void *outdata, int *outdata_size,
                   uint8_t *buf, int buf_size);
     int capabilities;
-#if LIBAVCODEC_VERSION_INT < ((50<<16)+(0<<8)+0)
-    void *dummy; // FIXME remove next time we break binary compatibility
-#endif
     struct AVCodec *next;
     void (*flush)(AVCodecContext *);
     const AVRational *supported_framerates; ///array of supported framerates, or NULL if any, array is terminated by {0,0}
@@ -2310,6 +2310,7 @@ extern AVCodec libgsm_decoder;
 extern AVCodec bmp_decoder;
 extern AVCodec mmvideo_decoder;
 extern AVCodec zmbv_decoder;
+extern AVCodec zmbv_encoder;
 extern AVCodec avs_decoder;
 extern AVCodec smacker_decoder;
 extern AVCodec smackaud_decoder;
@@ -2324,6 +2325,7 @@ extern AVCodec dsicinaudio_decoder;
 extern AVCodec tiertexseqvideo_decoder;
 extern AVCodec tiff_decoder;
 extern AVCodec imc_decoder;
+extern AVCodec mpc7_decoder;
 
 /* pcm codecs */
 #define PCM_CODEC(id, name) \
@@ -2691,20 +2693,6 @@ int img_crop(AVPicture *dst, const AVPicture *src,
 int img_pad(AVPicture *dst, const AVPicture *src, int height, int width, int pix_fmt,
             int padtop, int padbottom, int padleft, int padright, int *color);
 
-/* endian macros */
-#if !defined(BE_16) || !defined(BE_32) || !defined(LE_16) || !defined(LE_32)
-#define BE_16(x)  ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1])
-#define BE_32(x)  ((((uint8_t*)(x))[0] << 24) | \
-                   (((uint8_t*)(x))[1] << 16) | \
-                   (((uint8_t*)(x))[2] << 8) | \
-                    ((uint8_t*)(x))[3])
-#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
-#define LE_32(x)  ((((uint8_t*)(x))[3] << 24) | \
-                   (((uint8_t*)(x))[2] << 16) | \
-                   (((uint8_t*)(x))[1] << 8) | \
-                    ((uint8_t*)(x))[0])
-#endif
-
 extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
 
 /* unused static macro */
diff --git a/src/libffmpeg/libavcodec/bitstream.h b/src/libffmpeg/libavcodec/bitstream.h
index af25b6dcf..29e0f441e 100644
--- a/src/libffmpeg/libavcodec/bitstream.h
+++ b/src/libffmpeg/libavcodec/bitstream.h
@@ -187,12 +187,12 @@ static inline uint##x##_t unaligned##x(const void *v) { \
 }
 #    elif defined(__DECC)
 #    define unaligned(x)                                        \
-static inline uint##x##_t unaligned##x##(const void *v) {       \
+static inline uint##x##_t unaligned##x(const void *v) {         \
     return *(const __unaligned uint##x##_t *) v;                \
 }
 #    else
 #    define unaligned(x)                                        \
-static inline uint##x##_t unaligned##x##(const void *v) {       \
+static inline uint##x##_t unaligned##x(const void *v) {         \
     return *(const uint##x##_t *) v;                            \
 }
 #    endif
@@ -877,7 +877,7 @@ void free_vlc(VLC *vlc);
  *                  read the longest vlc code
  *                  = (max_vlc_length + bits - 1) / bits
  */
-static always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
+static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
                                   int bits, int max_depth)
 {
     int code;
diff --git a/src/libffmpeg/libavcodec/bytestream.h b/src/libffmpeg/libavcodec/bytestream.h
index 25c457fe4..a742fa1c1 100644
--- a/src/libffmpeg/libavcodec/bytestream.h
+++ b/src/libffmpeg/libavcodec/bytestream.h
@@ -22,32 +22,32 @@
 #ifndef FFMPEG_BYTESTREAM_H
 #define FFMPEG_BYTESTREAM_H
 
-static always_inline unsigned int bytestream_get_le32(uint8_t **b)
+static av_always_inline unsigned int bytestream_get_le32(uint8_t **b)
 {
     (*b) += 4;
     return LE_32(*b - 4);
 }
 
-static always_inline unsigned int bytestream_get_le16(uint8_t **b)
+static av_always_inline unsigned int bytestream_get_le16(uint8_t **b)
 {
     (*b) += 2;
     return LE_16(*b - 2);
 }
 
-static always_inline unsigned int bytestream_get_byte(uint8_t **b)
+static av_always_inline unsigned int bytestream_get_byte(uint8_t **b)
 {
     (*b)++;
     return (*b)[-1];
 }
 
-static always_inline unsigned int bytestream_get_buffer(uint8_t **b, uint8_t *dst, unsigned int size)
+static av_always_inline unsigned int bytestream_get_buffer(uint8_t **b, uint8_t *dst, unsigned int size)
 {
     memcpy(dst, *b, size);
     (*b) += size;
     return size;
 }
 
-static always_inline void bytestream_put_be32(uint8_t **b, const unsigned int value)
+static av_always_inline void bytestream_put_be32(uint8_t **b, const unsigned int value)
 {
     *(*b)++ = value >> 24;
     *(*b)++ = value >> 16;
@@ -55,13 +55,13 @@ static always_inline void bytestream_put_be32(uint8_t **b, const unsigned int va
     *(*b)++ = value;
 };
 
-static always_inline void bytestream_put_be16(uint8_t **b, const unsigned int value)
+static av_always_inline void bytestream_put_be16(uint8_t **b, const unsigned int value)
 {
     *(*b)++ = value >> 8;
     *(*b)++ = value;
 }
 
-static always_inline void bytestream_put_le32(uint8_t **b, const unsigned int value)
+static av_always_inline void bytestream_put_le32(uint8_t **b, const unsigned int value)
 {
     *(*b)++ = value;
     *(*b)++ = value >> 8;
@@ -69,18 +69,18 @@ static always_inline void bytestream_put_le32(uint8_t **b, const unsigned int va
     *(*b)++ = value >> 24;
 }
 
-static always_inline void bytestream_put_le16(uint8_t **b, const unsigned int value)
+static av_always_inline void bytestream_put_le16(uint8_t **b, const unsigned int value)
 {
     *(*b)++ = value;
     *(*b)++ = value >> 8;
 }
 
-static always_inline void bytestream_put_byte(uint8_t **b, const unsigned int value)
+static av_always_inline void bytestream_put_byte(uint8_t **b, const unsigned int value)
 {
     *(*b)++ = value;
 }
 
-static always_inline void bytestream_put_buffer(uint8_t **b, const uint8_t *src, unsigned int size)
+static av_always_inline void bytestream_put_buffer(uint8_t **b, const uint8_t *src, unsigned int size)
 {
     memcpy(*b, src, size);
     (*b) += size;
diff --git a/src/libffmpeg/libavcodec/cabac.h b/src/libffmpeg/libavcodec/cabac.h
index 43fe78e3b..f47406a9e 100644
--- a/src/libffmpeg/libavcodec/cabac.h
+++ b/src/libffmpeg/libavcodec/cabac.h
@@ -363,7 +363,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){
         refill(c);
 }
 
-static int always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){
+static int av_always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){
     //FIXME gcc generates duplicate load/stores for c->low and c->range
 #define LOW          "0"
 #define RANGE        "4"
@@ -631,7 +631,7 @@ static int get_cabac_bypass(CABACContext *c){
 }
 
 
-static always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
+static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
 #if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
     asm volatile(
         "movl "RANGE    "(%1), %%ebx            \n\t"
diff --git a/src/libffmpeg/libavcodec/cinepak.c b/src/libffmpeg/libavcodec/cinepak.c
index e137377e5..fd95b739e 100644
--- a/src/libffmpeg/libavcodec/cinepak.c
+++ b/src/libffmpeg/libavcodec/cinepak.c
@@ -26,6 +26,8 @@
  * by Ewald Snel <ewald@rambo.its.tudelft.nl>
  * For more information on the Cinepak algorithm, visit:
  *   http://www.csse.monash.edu.au/~timf/
+ * For more information on the quirky data inside Sega FILM/CPK files, visit:
+ *   http://wiki.multimedia.cx/index.php?title=Sega_FILM
  */
 
 #include <stdio.h>
@@ -67,6 +69,8 @@ typedef struct CinepakContext {
     int palette_video;
     cvid_strip_t strips[MAX_STRIPS];
 
+    int sega_film_skip_bytes;
+
 } CinepakContext;
 
 static void cinepak_decode_codebook (cvid_codebook_t *codebook,
@@ -319,8 +323,6 @@ static int cinepak_decode (CinepakContext *s)
     int           i, result, strip_size, frame_flags, num_strips;
     int           y0 = 0;
     int           encoded_buf_size;
-    /* if true, Cinepak data is from a Sega FILM/CPK file */
-    int           sega_film_data = 0;
 
     if (s->size < 10)
         return -1;
@@ -328,12 +330,29 @@ static int cinepak_decode (CinepakContext *s)
     frame_flags = s->data[0];
     num_strips  = BE_16 (&s->data[8]);
     encoded_buf_size = ((s->data[1] << 16) | BE_16 (&s->data[2]));
-    if (encoded_buf_size != s->size)
-        sega_film_data = 1;
-    if (sega_film_data)
-        s->data    += 12;
-    else
-        s->data    += 10;
+
+    /* if this is the first frame, check for deviant Sega FILM data */
+    if (s->sega_film_skip_bytes == -1) {
+        if (encoded_buf_size != s->size) {
+            /* If the encoded frame size differs from the frame size as indicated
+             * by the container file, this data likely comes from a Sega FILM/CPK file.
+             * If the frame header is followed by the bytes FE 00 00 06 00 00 then
+             * this is probably one of the two known files that have 6 extra bytes
+             * after the frame header. Else, assume 2 extra bytes. */
+            if ((s->data[10] == 0xFE) &&
+                (s->data[11] == 0x00) &&
+                (s->data[12] == 0x00) &&
+                (s->data[13] == 0x06) &&
+                (s->data[14] == 0x00) &&
+                (s->data[15] == 0x00))
+                s->sega_film_skip_bytes = 6;
+            else
+                s->sega_film_skip_bytes = 2;
+        } else
+            s->sega_film_skip_bytes = 0;
+    }
+
+    s->data += 10 + s->sega_film_skip_bytes;
 
     if (num_strips > MAX_STRIPS)
         num_strips = MAX_STRIPS;
@@ -377,6 +396,7 @@ static int cinepak_decode_init(AVCodecContext *avctx)
     s->avctx = avctx;
     s->width = (avctx->width + 3) & ~3;
     s->height = (avctx->height + 3) & ~3;
+    s->sega_film_skip_bytes = -1;  /* uninitialized state */
 
     // check for paletted data
     if ((avctx->palctrl == NULL) || (avctx->bits_per_sample == 40)) {
diff --git a/src/libffmpeg/libavcodec/cook.c b/src/libffmpeg/libavcodec/cook.c
index 47d9ce2c3..943addb89 100644
--- a/src/libffmpeg/libavcodec/cook.c
+++ b/src/libffmpeg/libavcodec/cook.c
@@ -312,7 +312,7 @@ static int cook_decode_close(AVCodecContext *avctx)
 {
     int i;
     COOKContext *q = avctx->priv_data;
-    av_log(NULL,AV_LOG_DEBUG, "Deallocating memory.\n");
+    av_log(avctx,AV_LOG_DEBUG, "Deallocating memory.\n");
 
     /* Free allocated memory buffers. */
     av_free(q->mlt_window);
@@ -1160,12 +1160,12 @@ static int cook_decode_init(AVCodecContext *avctx)
 
     /* Take care of the codec specific extradata. */
     if (avctx->extradata_size <= 0) {
-        av_log(NULL,AV_LOG_ERROR,"Necessary extradata missing!\n");
+        av_log(avctx,AV_LOG_ERROR,"Necessary extradata missing!\n");
         return -1;
     } else {
         /* 8 for mono, 16 for stereo, ? for multichannel
            Swap to right endianness so we don't need to care later on. */
-        av_log(NULL,AV_LOG_DEBUG,"codecdata_length=%d\n",avctx->extradata_size);
+        av_log(avctx,AV_LOG_DEBUG,"codecdata_length=%d\n",avctx->extradata_size);
         if (avctx->extradata_size >= 8){
             e->cookversion = be2me_32(e->cookversion);
             e->samples_per_frame = be2me_16(e->samples_per_frame);
@@ -1201,24 +1201,24 @@ static int cook_decode_init(AVCodecContext *avctx)
     switch (e->cookversion) {
         case MONO_COOK1:
             if (q->nb_channels != 1) {
-                av_log(NULL,AV_LOG_ERROR,"Container channels != 1, report sample!\n");
+                av_log(avctx,AV_LOG_ERROR,"Container channels != 1, report sample!\n");
                 return -1;
             }
-            av_log(NULL,AV_LOG_DEBUG,"MONO_COOK1\n");
+            av_log(avctx,AV_LOG_DEBUG,"MONO_COOK1\n");
             break;
         case MONO_COOK2:
             if (q->nb_channels != 1) {
                 q->joint_stereo = 0;
                 q->bits_per_subpacket = q->bits_per_subpacket/2;
             }
-            av_log(NULL,AV_LOG_DEBUG,"MONO_COOK2\n");
+            av_log(avctx,AV_LOG_DEBUG,"MONO_COOK2\n");
             break;
         case JOINT_STEREO:
             if (q->nb_channels != 2) {
-                av_log(NULL,AV_LOG_ERROR,"Container channels != 2, report sample!\n");
+                av_log(avctx,AV_LOG_ERROR,"Container channels != 2, report sample!\n");
                 return -1;
             }
-            av_log(NULL,AV_LOG_DEBUG,"JOINT_STEREO\n");
+            av_log(avctx,AV_LOG_DEBUG,"JOINT_STEREO\n");
             if (avctx->extradata_size >= 16){
                 q->total_subbands = q->subbands + e->js_subband_start;
                 q->js_subband_start = e->js_subband_start;
@@ -1233,11 +1233,11 @@ static int cook_decode_init(AVCodecContext *avctx)
             }
             break;
         case MC_COOK:
-            av_log(NULL,AV_LOG_ERROR,"MC_COOK not supported!\n");
+            av_log(avctx,AV_LOG_ERROR,"MC_COOK not supported!\n");
             return -1;
             break;
         default:
-            av_log(NULL,AV_LOG_ERROR,"Unknown Cook version, report sample!\n");
+            av_log(avctx,AV_LOG_ERROR,"Unknown Cook version, report sample!\n");
             return -1;
             break;
     }
@@ -1280,16 +1280,16 @@ static int cook_decode_init(AVCodecContext *avctx)
 
     /* Try to catch some obviously faulty streams, othervise it might be exploitable */
     if (q->total_subbands > 53) {
-        av_log(NULL,AV_LOG_ERROR,"total_subbands > 53, report sample!\n");
+        av_log(avctx,AV_LOG_ERROR,"total_subbands > 53, report sample!\n");
         return -1;
     }
     if (q->subbands > 50) {
-        av_log(NULL,AV_LOG_ERROR,"subbands > 50, report sample!\n");
+        av_log(avctx,AV_LOG_ERROR,"subbands > 50, report sample!\n");
         return -1;
     }
     if ((q->samples_per_channel == 256) || (q->samples_per_channel == 512) || (q->samples_per_channel == 1024)) {
     } else {
-        av_log(NULL,AV_LOG_ERROR,"unknown amount of samples_per_channel = %d, report sample!\n",q->samples_per_channel);
+        av_log(avctx,AV_LOG_ERROR,"unknown amount of samples_per_channel = %d, report sample!\n",q->samples_per_channel);
         return -1;
     }
 
diff --git a/src/libffmpeg/libavcodec/cscd.c b/src/libffmpeg/libavcodec/cscd.c
index e4257f4c0..d8733d6dd 100644
--- a/src/libffmpeg/libavcodec/cscd.c
+++ b/src/libffmpeg/libavcodec/cscd.c
@@ -220,12 +220,12 @@ static int decode_init(AVCodecContext *avctx) {
     }
     avctx->has_b_frames = 0;
     switch (avctx->bits_per_sample) {
-        case 16: avctx->pix_fmt = PIX_FMT_RGB565; break;
+        case 16: avctx->pix_fmt = PIX_FMT_RGB555; break;
         case 24: avctx->pix_fmt = PIX_FMT_BGR24; break;
         case 32: avctx->pix_fmt = PIX_FMT_RGBA32; break;
         default:
             av_log(avctx, AV_LOG_ERROR,
-                   "CamStudio codec error: unvalid depth %i bpp\n",
+                   "CamStudio codec error: invalid depth %i bpp\n",
                    avctx->bits_per_sample);
              return 1;
     }
diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c
index 51eddbc60..916d8658c 100644
--- a/src/libffmpeg/libavcodec/dsputil.c
+++ b/src/libffmpeg/libavcodec/dsputil.c
@@ -2549,6 +2549,11 @@ void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
 }
 #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
 
+#if defined(CONFIG_H264_ENCODER)
+/* H264 specific */
+void ff_h264dsp_init(DSPContext* c, AVCodecContext *avctx);
+#endif /* CONFIG_H264_ENCODER */
+
 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
     int i;
@@ -3801,11 +3806,31 @@ void dsputil_static_init(void)
     for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
 }
 
+int ff_check_alignment(void){
+    static int did_fail=0;
+    DECLARE_ALIGNED_16(int, aligned);
+
+    if((int)&aligned & 15){
+        if(!did_fail){
+#if defined(HAVE_MMX) || defined(HAVE_ALTIVEC)
+            av_log(NULL, AV_LOG_ERROR,
+                "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
+                "and may be very slow or crash. This is not a bug in libavcodec,\n"
+                "but in the compiler. Do not report crashes to FFmpeg developers.\n");
+#endif
+            did_fail=1;
+        }
+        return -1;
+    }
+    return 0;
+}
 
 void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 {
     int i;
 
+    ff_check_alignment();
+
 #ifdef CONFIG_ENCODERS
     if(avctx->dct_algo==FF_DCT_FASTINT) {
         c->fdct = fdct_ifast;
@@ -4006,6 +4031,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
     ff_vc1dsp_init(c,avctx);
 #endif
+#if defined(CONFIG_H264_ENCODER)
+    ff_h264dsp_init(c,avctx);
+#endif
 
     c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
     c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h
index de3c1d564..78109f7b9 100644
--- a/src/libffmpeg/libavcodec/dsputil.h
+++ b/src/libffmpeg/libavcodec/dsputil.h
@@ -33,9 +33,6 @@
 #include "common.h"
 #include "avcodec.h"
 
-#if defined(ARCH_X86) || defined(ARCH_X86_64)
-#define HAVE_MMX 1
-#endif
 
 //#define DEBUG
 /* dct code */
@@ -381,10 +378,12 @@ typedef struct DSPContext {
 #define BASIS_SHIFT 16
 #define RECON_SHIFT 6
 
+    /* h264 functions */
     void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride);
     void (*h264_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
     void (*h264_idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
     void (*h264_idct8_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
+    void (*h264_dct)(DCTELEM block[4][4]);
 
     /* snow wavelet */
     void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
@@ -411,6 +410,8 @@ typedef struct DSPContext {
 void dsputil_static_init(void);
 void dsputil_init(DSPContext* p, AVCodecContext *avctx);
 
+int ff_check_alignment(void);
+
 /**
  * permute block according to permuatation.
  * @param last last non zero element in scantable order
@@ -483,6 +484,7 @@ int mm_support(void);
 #define MM_SSE2   0x0010 /* PIV SSE2 functions */
 #define MM_3DNOWEXT  0x0020 /* AMD 3DNowExt */
 #define MM_SSE3   0x0040 /* Prescott SSE3 functions */
+#define MM_SSSE3  0x0080 /* Conroe SSSE3 functions */
 
 extern int mm_flags;
 
@@ -593,30 +595,6 @@ void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
 
 #endif
 
-#ifdef __GNUC__
-
-struct unaligned_64 { uint64_t l; } __attribute__((packed));
-struct unaligned_32 { uint32_t l; } __attribute__((packed));
-struct unaligned_16 { uint16_t l; } __attribute__((packed));
-
-#define LD16(a) (((const struct unaligned_16 *) (a))->l)
-#define LD32(a) (((const struct unaligned_32 *) (a))->l)
-#define LD64(a) (((const struct unaligned_64 *) (a))->l)
-
-#define ST16(a, b) (((struct unaligned_16 *) (a))->l) = (b)
-#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b)
-
-#else /* __GNUC__ */
-
-#define LD16(a) (*((uint16_t*)(a)))
-#define LD32(a) (*((uint32_t*)(a)))
-#define LD64(a) (*((uint64_t*)(a)))
-
-#define ST16(a, b) *((uint16_t*)(a)) = (b)
-#define ST32(a, b) *((uint32_t*)(a)) = (b)
-
-#endif /* !__GNUC__ */
-
 /* PSNR */
 void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
               int orig_linesize[3], int coded_linesize,
diff --git a/src/libffmpeg/libavcodec/dv.c b/src/libffmpeg/libavcodec/dv.c
index 76095a481..803d3502d 100644
--- a/src/libffmpeg/libavcodec/dv.c
+++ b/src/libffmpeg/libavcodec/dv.c
@@ -560,7 +560,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
 
 #ifdef DV_CODEC_TINY_TARGET
 /* Converts run and level (where level != 0) pair into vlc, returning bit size */
-static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
+static av_always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
 {
     int size;
     if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
@@ -585,7 +585,7 @@ static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
     return size;
 }
 
-static always_inline int dv_rl2vlc_size(int run, int level)
+static av_always_inline int dv_rl2vlc_size(int run, int level)
 {
     int size;
 
@@ -601,13 +601,13 @@ static always_inline int dv_rl2vlc_size(int run, int level)
     return size;
 }
 #else
-static always_inline int dv_rl2vlc(int run, int l, int sign, uint32_t* vlc)
+static av_always_inline int dv_rl2vlc(int run, int l, int sign, uint32_t* vlc)
 {
     *vlc = dv_vlc_map[run][l].vlc | sign;
     return dv_vlc_map[run][l].size;
 }
 
-static always_inline int dv_rl2vlc_size(int run, int l)
+static av_always_inline int dv_rl2vlc_size(int run, int l)
 {
     return dv_vlc_map[run][l].size;
 }
@@ -627,7 +627,7 @@ typedef struct EncBlockInfo {
     uint32_t partial_bit_buffer; /* we can't use uint16_t here */
 } EncBlockInfo;
 
-static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool,
+static av_always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool,
                                        PutBitContext* pb_end)
 {
     int prev;
@@ -670,7 +670,7 @@ static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext
     return pb;
 }
 
-static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
+static av_always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
                                               const uint8_t* zigzag_scan, const int *weight, int bias)
 {
     int i, area;
@@ -742,7 +742,7 @@ static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
 
 //FIXME replace this by dsputil
 #define SC(x, y) ((s[x] - s[y]) ^ ((s[x] - s[y]) >> 7))
-static always_inline int dv_guess_dct_mode(DCTELEM *blk) {
+static av_always_inline int dv_guess_dct_mode(DCTELEM *blk) {
     DCTELEM *s;
     int score88 = 0;
     int score248 = 0;
diff --git a/src/libffmpeg/libavcodec/faandct.c b/src/libffmpeg/libavcodec/faandct.c
index e3c0d84a2..6f73ee5e9 100644
--- a/src/libffmpeg/libavcodec/faandct.c
+++ b/src/libffmpeg/libavcodec/faandct.c
@@ -70,7 +70,7 @@ B6*B0, B6*B1, B6*B2, B6*B3, B6*B4, B6*B5, B6*B6, B6*B7,
 B7*B0, B7*B1, B7*B2, B7*B3, B7*B4, B7*B5, B7*B6, B7*B7,
 };
 
-static always_inline void row_fdct(FLOAT temp[64], DCTELEM * data)
+static av_always_inline void row_fdct(FLOAT temp[64], DCTELEM * data)
 {
     FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
     FLOAT tmp10, tmp11, tmp12, tmp13;
diff --git a/src/libffmpeg/libavcodec/ffv1.c b/src/libffmpeg/libavcodec/ffv1.c
index 62623e591..1ca18a4e8 100644
--- a/src/libffmpeg/libavcodec/ffv1.c
+++ b/src/libffmpeg/libavcodec/ffv1.c
@@ -186,7 +186,7 @@ typedef struct FFV1Context{
     DSPContext dsp;
 }FFV1Context;
 
-static always_inline int fold(int diff, int bits){
+static av_always_inline int fold(int diff, int bits){
     if(bits==8)
         diff= (int8_t)diff;
     else{
diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c
index ba51c245a..af5fa50e6 100644
--- a/src/libffmpeg/libavcodec/h263.c
+++ b/src/libffmpeg/libavcodec/h263.c
@@ -487,12 +487,28 @@ static inline void restore_ac_coeffs(MpegEncContext * s, DCTELEM block[6][64], i
 }
 
 /**
+ * init s->current_picture.qscale_table from s->lambda_table
+ */
+static void ff_init_qscale_tab(MpegEncContext *s){
+    int8_t * const qscale_table= s->current_picture.qscale_table;
+    int i;
+
+    for(i=0; i<s->mb_num; i++){
+        unsigned int lam= s->lambda_table[ s->mb_index2xy[i] ];
+        int qp= (lam*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
+        qscale_table[ s->mb_index2xy[i] ]= clip(qp, s->avctx->qmin, s->avctx->qmax);
+    }
+}
+
+/**
  * modify qscale so that encoding is acually possible in h263 (limit difference to -2..2)
  */
 void ff_clean_h263_qscales(MpegEncContext *s){
     int i;
     int8_t * const qscale_table= s->current_picture.qscale_table;
 
+    ff_init_qscale_tab(s);
+
     for(i=1; i<s->mb_num; i++){
         if(qscale_table[ s->mb_index2xy[i] ] - qscale_table[ s->mb_index2xy[i-1] ] >2)
             qscale_table[ s->mb_index2xy[i] ]= qscale_table[ s->mb_index2xy[i-1] ]+2;
@@ -507,7 +523,6 @@ void ff_clean_h263_qscales(MpegEncContext *s){
             int mb_xy= s->mb_index2xy[i];
 
             if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTER4V)){
-                s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_INTER4V;
                 s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_INTER;
             }
         }
@@ -546,7 +561,6 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
         for(i=1; i<s->mb_num; i++){
             int mb_xy= s->mb_index2xy[i];
             if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_DIRECT)){
-                s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_DIRECT;
                 s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_BIDIR;
             }
         }
diff --git a/src/libffmpeg/libavcodec/h264.c b/src/libffmpeg/libavcodec/h264.c
index ad23ae120..d7c48bd4a 100644
--- a/src/libffmpeg/libavcodec/h264.c
+++ b/src/libffmpeg/libavcodec/h264.c
@@ -165,20 +165,6 @@ typedef struct H264Context{
     MpegEncContext s;
     int nal_ref_idc;
     int nal_unit_type;
-#define NAL_SLICE                1
-#define NAL_DPA                  2
-#define NAL_DPB                  3
-#define NAL_DPC                  4
-#define NAL_IDR_SLICE            5
-#define NAL_SEI                  6
-#define NAL_SPS                  7
-#define NAL_PPS                  8
-#define NAL_AUD                  9
-#define NAL_END_SEQUENCE        10
-#define NAL_END_STREAM          11
-#define NAL_FILLER_DATA         12
-#define NAL_SPS_EXT             13
-#define NAL_AUXILIARY_SLICE     19
     uint8_t *rbsp_buffer;
     unsigned int rbsp_buffer_size;
 
@@ -414,7 +400,7 @@ static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, in
 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
 
-static always_inline uint32_t pack16to32(int a, int b){
+static av_always_inline uint32_t pack16to32(int a, int b){
 #ifdef WORDS_BIGENDIAN
    return (b&0xFFFF) + (a<<16);
 #else
@@ -422,13 +408,22 @@ static always_inline uint32_t pack16to32(int a, int b){
 #endif
 }
 
+const uint8_t ff_rem6[52]={
+0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
+};
+
+const uint8_t ff_div6[52]={
+0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
+};
+
+
 /**
  * fill a rectangle.
  * @param h height of the rectangle, should be a constant
  * @param w width of the rectangle, should be a constant
  * @param size the size of val (1 or 4), should be a constant
  */
-static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
+static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
     uint8_t *p= (uint8_t*)vp;
     assert(size==1 || size==4);
     assert(w<=4);
@@ -1808,81 +1803,6 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c
     return dst;
 }
 
-#if 0
-/**
- * @param src the data which should be escaped
- * @param dst the target buffer, dst+1 == src is allowed as a special case
- * @param length the length of the src data
- * @param dst_length the length of the dst array
- * @returns length of escaped data in bytes or -1 if an error occured
- */
-static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
-    int i, escape_count, si, di;
-    uint8_t *temp;
-
-    assert(length>=0);
-    assert(dst_length>0);
-
-    dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
-
-    if(length==0) return 1;
-
-    escape_count= 0;
-    for(i=0; i<length; i+=2){
-        if(src[i]) continue;
-        if(i>0 && src[i-1]==0)
-            i--;
-        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
-            escape_count++;
-            i+=2;
-        }
-    }
-
-    if(escape_count==0){
-        if(dst+1 != src)
-            memcpy(dst+1, src, length);
-        return length + 1;
-    }
-
-    if(length + escape_count + 1> dst_length)
-        return -1;
-
-    //this should be damn rare (hopefully)
-
-    h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
-    temp= h->rbsp_buffer;
-//printf("encoding esc\n");
-
-    si= 0;
-    di= 0;
-    while(si < length){
-        if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
-            temp[di++]= 0; si++;
-            temp[di++]= 0; si++;
-            temp[di++]= 3;
-            temp[di++]= src[si++];
-        }
-        else
-            temp[di++]= src[si++];
-    }
-    memcpy(dst+1, temp, length+escape_count);
-
-    assert(di == length+escape_count);
-
-    return di + 1;
-}
-
-/**
- * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
- */
-static void encode_rbsp_trailing(PutBitContext *pb){
-    int length;
-    put_bits(pb, 1, 1);
-    length= (-put_bits_count(pb))&7;
-    if(length) put_bits(pb, length, 0);
-}
-#endif
-
 /**
  * identifies the exact end of the bitstream
  * @return the length of the trailing, or 0 if damaged
@@ -2035,42 +1955,6 @@ static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
     return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
 }
 
-
-#if 0
-static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
-    int i;
-    //FIXME try int temp instead of block
-
-    for(i=0; i<4; i++){
-        const int d0= src1[0 + i*stride] - src2[0 + i*stride];
-        const int d1= src1[1 + i*stride] - src2[1 + i*stride];
-        const int d2= src1[2 + i*stride] - src2[2 + i*stride];
-        const int d3= src1[3 + i*stride] - src2[3 + i*stride];
-        const int z0= d0 + d3;
-        const int z3= d0 - d3;
-        const int z1= d1 + d2;
-        const int z2= d1 - d2;
-
-        block[0 + 4*i]=   z0 +   z1;
-        block[1 + 4*i]= 2*z3 +   z2;
-        block[2 + 4*i]=   z0 -   z1;
-        block[3 + 4*i]=   z3 - 2*z2;
-    }
-
-    for(i=0; i<4; i++){
-        const int z0= block[0*4 + i] + block[3*4 + i];
-        const int z3= block[0*4 + i] - block[3*4 + i];
-        const int z1= block[1*4 + i] + block[2*4 + i];
-        const int z2= block[1*4 + i] - block[2*4 + i];
-
-        block[0*4 + i]=   z0 +   z1;
-        block[1*4 + i]= 2*z3 +   z2;
-        block[2*4 + i]=   z0 -   z1;
-        block[3*4 + i]=   z3 - 2*z2;
-    }
-}
-#endif
-
 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
@@ -2357,7 +2241,7 @@ static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int strid
     src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
 }
 
-static void pred16x16_vertical_c(uint8_t *src, int stride){
+void ff_pred16x16_vertical_c(uint8_t *src, int stride){
     int i;
     const uint32_t a= ((uint32_t*)(src-stride))[0];
     const uint32_t b= ((uint32_t*)(src-stride))[1];
@@ -2372,7 +2256,7 @@ static void pred16x16_vertical_c(uint8_t *src, int stride){
     }
 }
 
-static void pred16x16_horizontal_c(uint8_t *src, int stride){
+void ff_pred16x16_horizontal_c(uint8_t *src, int stride){
     int i;
 
     for(i=0; i<16; i++){
@@ -2383,7 +2267,7 @@ static void pred16x16_horizontal_c(uint8_t *src, int stride){
     }
 }
 
-static void pred16x16_dc_c(uint8_t *src, int stride){
+void ff_pred16x16_dc_c(uint8_t *src, int stride){
     int i, dc=0;
 
     for(i=0;i<16; i++){
@@ -2437,7 +2321,7 @@ static void pred16x16_top_dc_c(uint8_t *src, int stride){
     }
 }
 
-static void pred16x16_128_dc_c(uint8_t *src, int stride){
+void ff_pred16x16_128_dc_c(uint8_t *src, int stride){
     int i;
 
     for(i=0; i<16; i++){
@@ -2488,11 +2372,11 @@ static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int
   }
 }
 
-static void pred16x16_plane_c(uint8_t *src, int stride){
+void ff_pred16x16_plane_c(uint8_t *src, int stride){
     pred16x16_plane_compat_c(src, stride, 0);
 }
 
-static void pred8x8_vertical_c(uint8_t *src, int stride){
+void ff_pred8x8_vertical_c(uint8_t *src, int stride){
     int i;
     const uint32_t a= ((uint32_t*)(src-stride))[0];
     const uint32_t b= ((uint32_t*)(src-stride))[1];
@@ -2503,7 +2387,7 @@ static void pred8x8_vertical_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_horizontal_c(uint8_t *src, int stride){
+void ff_pred8x8_horizontal_c(uint8_t *src, int stride){
     int i;
 
     for(i=0; i<8; i++){
@@ -2512,7 +2396,7 @@ static void pred8x8_horizontal_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_128_dc_c(uint8_t *src, int stride){
+void ff_pred8x8_128_dc_c(uint8_t *src, int stride){
     int i;
 
     for(i=0; i<8; i++){
@@ -2566,7 +2450,7 @@ static void pred8x8_top_dc_c(uint8_t *src, int stride){
 }
 
 
-static void pred8x8_dc_c(uint8_t *src, int stride){
+void ff_pred8x8_dc_c(uint8_t *src, int stride){
     int i;
     int dc0, dc1, dc2, dc3;
 
@@ -2591,7 +2475,7 @@ static void pred8x8_dc_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_plane_c(uint8_t *src, int stride){
+void ff_pred8x8_plane_c(uint8_t *src, int stride){
   int j, k;
   int a;
   uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
@@ -3220,21 +3104,21 @@ static void init_pred_ptrs(H264Context *h){
     h->pred8x8l[TOP_DC_PRED         ]= pred8x8l_top_dc_c;
     h->pred8x8l[DC_128_PRED         ]= pred8x8l_128_dc_c;
 
-    h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_c;
-    h->pred8x8[VERT_PRED8x8   ]= pred8x8_vertical_c;
-    h->pred8x8[HOR_PRED8x8    ]= pred8x8_horizontal_c;
-    h->pred8x8[PLANE_PRED8x8  ]= pred8x8_plane_c;
+    h->pred8x8[DC_PRED8x8     ]= ff_pred8x8_dc_c;
+    h->pred8x8[VERT_PRED8x8   ]= ff_pred8x8_vertical_c;
+    h->pred8x8[HOR_PRED8x8    ]= ff_pred8x8_horizontal_c;
+    h->pred8x8[PLANE_PRED8x8  ]= ff_pred8x8_plane_c;
     h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
     h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
-    h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
+    h->pred8x8[DC_128_PRED8x8 ]= ff_pred8x8_128_dc_c;
 
-    h->pred16x16[DC_PRED8x8     ]= pred16x16_dc_c;
-    h->pred16x16[VERT_PRED8x8   ]= pred16x16_vertical_c;
-    h->pred16x16[HOR_PRED8x8    ]= pred16x16_horizontal_c;
-    h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_c;
+    h->pred16x16[DC_PRED8x8     ]= ff_pred16x16_dc_c;
+    h->pred16x16[VERT_PRED8x8   ]= ff_pred16x16_vertical_c;
+    h->pred16x16[HOR_PRED8x8    ]= ff_pred16x16_horizontal_c;
+    h->pred16x16[PLANE_PRED8x8  ]= ff_pred16x16_plane_c;
     h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
     h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
-    h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
+    h->pred16x16[DC_128_PRED8x8 ]= ff_pred16x16_128_dc_c;
 }
 
 static void free_tables(H264Context *h){
@@ -3269,8 +3153,8 @@ static void init_dequant8_coeff_table(H264Context *h){
         }
 
         for(q=0; q<52; q++){
-            int shift = div6[q];
-            int idx = rem6[q];
+            int shift = ff_div6[q];
+            int idx = ff_rem6[q];
             for(x=0; x<64; x++)
                 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
@@ -3294,8 +3178,8 @@ static void init_dequant4_coeff_table(H264Context *h){
             continue;
 
         for(q=0; q<52; q++){
-            int shift = div6[q] + 2;
-            int idx = rem6[q];
+            int shift = ff_div6[q] + 2;
+            int idx = ff_rem6[q];
             for(x=0; x<16; x++)
                 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
@@ -4972,6 +4856,10 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
 
     if(total_coeff==0)
         return 0;
+    if(total_coeff<0) {
+        av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff<0)\n", s->mb_x, s->mb_y);
+        return -1;
+    }
 
     trailing_ones= coeff_token&3;
     tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
diff --git a/src/libffmpeg/libavcodec/h264data.h b/src/libffmpeg/libavcodec/h264data.h
index 2dea3580f..74e720421 100644
--- a/src/libffmpeg/libavcodec/h264data.h
+++ b/src/libffmpeg/libavcodec/h264data.h
@@ -53,6 +53,24 @@
 
 #define EXTENDED_SAR          255
 
+/* NAL unit types */
+enum {
+NAL_SLICE=1,
+NAL_DPA,
+NAL_DPB,
+NAL_DPC,
+NAL_IDR_SLICE,
+NAL_SEI,
+NAL_SPS,
+NAL_PPS,
+NAL_AUD,
+NAL_END_SEQUENCE,
+NAL_END_STREAM,
+NAL_FILLER_DATA,
+NAL_SPS_EXT,
+NAL_AUXILIARY_SLICE=19
+};
+
 static const AVRational pixel_aspect[14]={
  {0, 1},
  {1, 1},
@@ -488,15 +506,6 @@ static const PMbInfo b_sub_mb_type_info[13]={
 {MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, },
 };
 
-
-static const uint8_t rem6[52]={
-0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
-};
-
-static const uint8_t div6[52]={
-0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
-};
-
 static const uint8_t default_scaling4[2][16]={
 {   6,13,20,28,
    13,20,28,32,
diff --git a/src/libffmpeg/libavcodec/h264idct.c b/src/libffmpeg/libavcodec/h264idct.c
index 3506418ad..a6a56d33a 100755
--- a/src/libffmpeg/libavcodec/h264idct.c
+++ b/src/libffmpeg/libavcodec/h264idct.c
@@ -28,7 +28,7 @@
 
 #include "dsputil.h"
 
-static always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
+static av_always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
     int i;
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
diff --git a/src/libffmpeg/libavcodec/i386/Makefile.am b/src/libffmpeg/libavcodec/i386/Makefile.am
index 15ab4db89..ee170efd5 100644
--- a/src/libffmpeg/libavcodec/i386/Makefile.am
+++ b/src/libffmpeg/libavcodec/i386/Makefile.am
@@ -6,7 +6,7 @@ AM_CFLAGS = -fomit-frame-pointer -fno-strict-aliasing
 # CFLAGS is here to filter out -funroll-loops because it causes bad
 # behavior of libavcodec
 CFLAGS := `echo @CFLAGS@ | sed -e 's/-funroll-loops//g'`
-AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS) -I$(top_srcdir)/src/libffmpeg/libavutil
+AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS) -I$(top_srcdir)/src/libffmpeg/libavutil -I$(top_srcdir)/src/libffmpeg
 
 # Avoid "can't find register" failures with -O1 and higher
 dsputil_mmx.o dsputil_mmx.lo: CFLAGS=$(shell echo @CFLAGS@ | sed -e 's/-funroll-loops//g; s/$$/ -Os/')
@@ -42,10 +42,10 @@ EXTRA_DIST = \
 	h264dsp_mmx.c \
 	mpegvideo_mmx_template.c
 
-if HAVE_FFMMX
+if HAVE_MMX
 mmx_modules = $(libavcodec_mmx_src)
 endif
 
 libavcodec_mmx_la_SOURCES = $(mmx_modules) $(libavcodec_mmx_dummy)
 
-noinst_HEADERS = dsputil_mmx_avg.h dsputil_mmx_rnd.h mmx.h
+noinst_HEADERS = dsputil_mmx_avg.h dsputil_mmx_rnd.h mathops.h mmx.h
diff --git a/src/libffmpeg/libavcodec/i386/cputest.c b/src/libffmpeg/libavcodec/i386/cputest.c
index 262786b71..0705ab3e5 100644
--- a/src/libffmpeg/libavcodec/i386/cputest.c
+++ b/src/libffmpeg/libavcodec/i386/cputest.c
@@ -87,6 +87,8 @@ int mm_support(void)
             rval |= MM_SSE2;
         if (ecx & 1)
             rval |= MM_SSE3;
+        if (ecx & 0x00000200 )
+            rval |= MM_SSSE3;
     }
 
     cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
@@ -104,11 +106,13 @@ int mm_support(void)
     }
 
 #if 0
-    av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n",
+    av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s%s%s\n",
         (rval&MM_MMX) ? "MMX ":"",
         (rval&MM_MMXEXT) ? "MMX2 ":"",
         (rval&MM_SSE) ? "SSE ":"",
         (rval&MM_SSE2) ? "SSE2 ":"",
+        (rval&MM_SSE3) ? "SSE3 ":"",
+        (rval&MM_SSSE3) ? "SSSE3 ":"",
         (rval&MM_3DNOW) ? "3DNow ":"",
         (rval&MM_3DNOWEXT) ? "3DNowExt ":"");
 #endif
diff --git a/src/libffmpeg/libavcodec/i386/fdct_mmx.c b/src/libffmpeg/libavcodec/i386/fdct_mmx.c
index 2ffbfecf6..7e2682a4a 100644
--- a/src/libffmpeg/libavcodec/i386/fdct_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/fdct_mmx.c
@@ -284,7 +284,7 @@ TABLE_SSE2
 }};
 
 
-static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
+static av_always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
 {
     movq_m2r(*(in + offset + 1 * 8), mm0);
     movq_m2r(*(in + offset + 6 * 8), mm1);
@@ -364,7 +364,7 @@ static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
 }
 
 
-static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
+static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
 {
     asm volatile(
 #define FDCT_ROW_SSE2_H1(i,t)                    \
@@ -426,7 +426,7 @@ static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
     );
 }
 
-static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
+static av_always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
 {
     pshufw_m2r(*(in + 4), mm5, 0x1B);
     movq_m2r(*(in + 0), mm0);
@@ -469,7 +469,7 @@ static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const i
     movq_r2m(mm7, *(out + 4));
 }
 
-static always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
+static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
 {
 //FIXME reorder (i dont have a old mmx only cpu here to benchmark ...)
     movd_m2r(*(in + 6), mm1);
diff --git a/src/libffmpeg/libavcodec/i386/mathops.h b/src/libffmpeg/libavcodec/i386/mathops.h
new file mode 100644
index 000000000..3553a4025
--- /dev/null
+++ b/src/libffmpeg/libavcodec/i386/mathops.h
@@ -0,0 +1,41 @@
+/*
+ * simple math operations
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef FRAC_BITS
+#   define MULL(ra, rb) \
+        ({ int rt, dummy; asm (\
+            "imull %3               \n\t"\
+            "shrdl %4, %%edx, %%eax \n\t"\
+            : "=a"(rt), "=d"(dummy)\
+            : "a" (ra), "rm" (rb), "i"(FRAC_BITS));\
+         rt; })
+#endif
+
+#define MULH(ra, rb) \
+    ({ int rt, dummy;\
+     asm ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" (ra), "rm" (rb));\
+     rt; })
+
+#define MUL64(ra, rb) \
+    ({ int64_t rt;\
+     asm ("imull %2\n\t" : "=A"(rt) : "a" (ra), "g" (rb));\
+     rt; })
+
diff --git a/src/libffmpeg/libavcodec/jfdctfst.c b/src/libffmpeg/libavcodec/jfdctfst.c
index 38424563d..a9dcfab82 100644
--- a/src/libffmpeg/libavcodec/jfdctfst.c
+++ b/src/libffmpeg/libavcodec/jfdctfst.c
@@ -145,7 +145,7 @@
 
 #define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
 
-static always_inline void row_fdct(DCTELEM * data){
+static av_always_inline void row_fdct(DCTELEM * data){
   int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   int_fast16_t tmp10, tmp11, tmp12, tmp13;
   int_fast16_t z1, z2, z3, z4, z5, z11, z13;
diff --git a/src/libffmpeg/libavcodec/jfdctint.c b/src/libffmpeg/libavcodec/jfdctint.c
index 58f3a1446..250312467 100644
--- a/src/libffmpeg/libavcodec/jfdctint.c
+++ b/src/libffmpeg/libavcodec/jfdctint.c
@@ -181,7 +181,7 @@
 #endif
 
 
-static always_inline void row_fdct(DCTELEM * data){
+static av_always_inline void row_fdct(DCTELEM * data){
   int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   int_fast32_t tmp10, tmp11, tmp12, tmp13;
   int_fast32_t z1, z2, z3, z4, z5;
diff --git a/src/libffmpeg/libavcodec/jpeg_ls.c b/src/libffmpeg/libavcodec/jpeg_ls.c
index 1b4df2b1a..4629176ad 100644
--- a/src/libffmpeg/libavcodec/jpeg_ls.c
+++ b/src/libffmpeg/libavcodec/jpeg_ls.c
@@ -804,11 +804,16 @@ static int encode_picture_ls(AVCodecContext *avctx, unsigned char *buf, int buf_
     av_free(zero);
     av_free(state);
 
+    // the specification says that after doing 0xff escaping unused bits in the
+    // last byte must be set to 0, so just append 7 "optional" zero-bits to
+    // avoid special-casing.
+    put_bits(&pb2, 7, 0);
+    size = put_bits_count(&pb2);
     flush_put_bits(&pb2);
     /* do escape coding */
-    size = put_bits_count(&pb2) >> 3;
     init_get_bits(&gb, buf2, size);
-    while(get_bits_count(&gb) < size * 8){
+    size -= 7;
+    while(get_bits_count(&gb) < size){
         int v;
         v = get_bits(&gb, 8);
         put_bits(&pb, 8, v);
diff --git a/src/libffmpeg/libavcodec/mathops.h b/src/libffmpeg/libavcodec/mathops.h
index 9ae34d71b..c6ec70597 100644
--- a/src/libffmpeg/libavcodec/mathops.h
+++ b/src/libffmpeg/libavcodec/mathops.h
@@ -46,7 +46,7 @@
 //gcc 3.4 creates an incredibly bloated mess out of this
 //#    define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32)
 
-static always_inline int MULH(int a, int b){
+static av_always_inline int MULH(int a, int b){
     return ((int64_t)(a) * (int64_t)(b))>>32;
 }
 #endif
diff --git a/src/libffmpeg/libavcodec/motion_est.c b/src/libffmpeg/libavcodec/motion_est.c
index 0e1504147..a11787bac 100644
--- a/src/libffmpeg/libavcodec/motion_est.c
+++ b/src/libffmpeg/libavcodec/motion_est.c
@@ -106,7 +106,7 @@ static int get_flags(MotionEstContext *c, int direct, int chroma){
            + (chroma ? FLAG_CHROMA : 0);
 }
 
-static always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
+static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                       const int size, const int h, int ref_index, int src_index,
                       me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
     MotionEstContext * const c= &s->me;
@@ -122,6 +122,7 @@ static always_inline int cmp(MpegEncContext *s, const int x, const int y, const
     int d;
     //FIXME check chroma 4mv, (no crashes ...)
     if(flags&FLAG_DIRECT){
+        assert(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
         if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
             const int time_pp= s->pp_time;
             const int time_pb= s->pb_time;
@@ -233,8 +234,14 @@ static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){
 
 void ff_init_me(MpegEncContext *s){
     MotionEstContext * const c= &s->me;
+    int cache_size= FFMIN(ME_MAP_SIZE>>ME_MAP_SHIFT, 1<<ME_MAP_SHIFT);
+    int dia_size= FFMAX(FFABS(s->avctx->dia_size)&255, FFABS(s->avctx->pre_dia_size)&255);
     c->avctx= s->avctx;
 
+    if(cache_size < 2*dia_size && !c->stride){
+        av_log(s->avctx, AV_LOG_INFO, "ME_MAP size may be a little small for the selected diamond size\n");
+    }
+
     ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, c->avctx->me_pre_cmp);
     ff_set_cmp(&s->dsp, s->dsp.me_cmp, c->avctx->me_cmp);
     ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, c->avctx->me_sub_cmp);
@@ -692,6 +699,7 @@ static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
 static inline void get_limits(MpegEncContext *s, int x, int y)
 {
     MotionEstContext * const c= &s->me;
+    int range= c->avctx->me_range >> (1 + !!(c->flags&FLAG_QPEL));
 /*
     if(c->avctx->me_range) c->range= c->avctx->me_range >> 1;
     else                   c->range= 16;
@@ -713,6 +721,12 @@ static inline void get_limits(MpegEncContext *s, int x, int y)
         c->xmax = - x + s->mb_width *16 - 16;
         c->ymax = - y + s->mb_height*16 - 16;
     }
+    if(range){
+        c->xmin = FFMAX(c->xmin,-range);
+        c->xmax = FFMIN(c->xmax, range);
+        c->ymin = FFMAX(c->ymin,-range);
+        c->ymax = FFMIN(c->ymax, range);
+    }
 }
 
 static inline void init_mv4_ref(MotionEstContext *c){
@@ -1148,7 +1162,9 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
 {
     MotionEstContext * const c= &s->me;
     uint8_t *pix, *ppix;
-    int sum, varc, vard, mx, my, dmin;
+    int sum, mx, my, dmin;
+    int varc;            ///< the variance of the block (sum of squared (p[y][x]-average))
+    int vard;            ///< sum of squared differences with the estimated motion vector
     int P[10][2];
     const int shift= 1+s->quarter_sample;
     int mb_type=0;
@@ -1810,8 +1826,8 @@ static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
 
     get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed
 
-    s->b_direct_mv_table[mot_xy][0]= mx;
-    s->b_direct_mv_table[mot_xy][1]= my;
+    mv_table[mot_xy][0]= mx;
+    mv_table[mot_xy][1]= my;
     c->flags     &= ~FLAG_DIRECT;
     c->sub_flags &= ~FLAG_DIRECT;
 
@@ -1831,6 +1847,18 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
     get_limits(s, 16*mb_x, 16*mb_y);
 
     c->skip=0;
+
+    if(s->codec_id == CODEC_ID_MPEG4 && s->next_picture.mbskip_table[xy]){
+        int score= direct_search(s, mb_x, mb_y); //FIXME just check 0,0
+
+        score= ((unsigned)(score*score + 128*256))>>16;
+        c->mc_mb_var_sum_temp += score;
+        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
+        s->mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_DIRECT0;
+
+        return;
+    }
+
     if(c->avctx->me_threshold){
         int vard= check_input_motion(s, mb_x, mb_y, 0);
 
@@ -1953,6 +1981,8 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
         }
          //FIXME something smarter
         if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
+        if(s->codec_id == CODEC_ID_MPEG4 && type&CANDIDATE_MB_TYPE_DIRECT && s->flags&CODEC_FLAG_MV0 && *(uint32_t*)s->b_direct_mv_table[xy])
+            type |= CANDIDATE_MB_TYPE_DIRECT0;
 #if 0
         if(s->out_format == FMT_MPEG1)
             type |= CANDIDATE_MB_TYPE_INTRA;
diff --git a/src/libffmpeg/libavcodec/motion_est_template.c b/src/libffmpeg/libavcodec/motion_est_template.c
index d8feaff5a..897c08e3d 100644
--- a/src/libffmpeg/libavcodec/motion_est_template.c
+++ b/src/libffmpeg/libavcodec/motion_est_template.c
@@ -555,7 +555,7 @@ if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x,
     const int qpel= flags&FLAG_QPEL;\
     const int shift= 1+qpel;\
 
-static always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
+static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
                                        int src_index, int ref_index, int const penalty_factor,
                                        int size, int h, int flags)
 {
@@ -667,31 +667,28 @@ static int hex_search(MpegEncContext * s, int *best, int dmin,
     LOAD_COMMON
     LOAD_COMMON2
     int map_generation= c->map_generation;
-    int x,y,i,d;
-    static const int hex[6][2]={{-2, 0}, { 2,0}, {-1,-2}, {1,-2}, {-1,2},{1,2}};
+    int x,y,d;
+    const int dec= dia_size & (dia_size-1);
 
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
-    for(;dia_size; dia_size--){
+    for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
         do{
             x= best[0];
             y= best[1];
-            for(i=0; i<6; i++){
-                CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
+
+            CHECK_CLIPPED_MV(x  -dia_size    , y);
+            CHECK_CLIPPED_MV(x+  dia_size    , y);
+            CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
+            CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
+            if(dia_size>1){
+                CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
+                CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
             }
         }while(best[0] != x || best[1] != y);
     }
 
-    do{
-        x= best[0];
-        y= best[1];
-        CHECK_CLIPPED_MV(x+1, y);
-        CHECK_CLIPPED_MV(x, y+1);
-        CHECK_CLIPPED_MV(x-1, y);
-        CHECK_CLIPPED_MV(x, y-1);
-    }while(best[0] != x || best[1] != y);
-
     return dmin;
 }
 
@@ -704,14 +701,16 @@ static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
     LOAD_COMMON
     LOAD_COMMON2
     int map_generation= c->map_generation;
-    int x,y,i,d, dia_size;
+    int x,y,i,d;
+    int dia_size= c->dia_size&0xFF;
+    const int dec= dia_size & (dia_size-1);
     static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
                                 { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
 
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
-    for(dia_size= c->dia_size&0xFF; dia_size; dia_size--){
+    for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
         do{
             x= best[0];
             y= best[1];
@@ -775,7 +774,7 @@ static int umh_search(MpegEncContext * s, int *best, int dmin,
         }
     }
 
-    return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 1);
+    return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
 }
 
 #define SAB_CHECK_MV(ax,ay)\
@@ -824,20 +823,27 @@ static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
-    for(j=i=0; i<ME_MAP_SIZE; i++){
+    /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
+      become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
+     */
+    for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
         uint32_t key= map[i];
 
         key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
 
         if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
 
-        assert(j<MAX_SAB_SIZE); //max j = number of predictors
-
         minima[j].height= score_map[i];
         minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
         minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
         minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
         minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
+
+        // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
+        if(   minima[j].x > xmax || minima[j].x < xmin
+           || minima[j].y > ymax || minima[j].y < ymin)
+            continue;
+
         minima[j].checked=0;
         if(minima[j].x || minima[j].y)
             minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
@@ -965,7 +971,7 @@ if(256*256*256*64 % (stats[0]+1)==0){
     return dmin;
 }
 
-static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
+static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
                                        int src_index, int ref_index, int const penalty_factor,
                                        int size, int h, int flags){
     MotionEstContext * const c= &s->me;
@@ -985,7 +991,7 @@ static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
         return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
 }
 
-static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
+static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
                              int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
                              int ref_mv_scale, int flags, int size, int h)
 {
@@ -1018,6 +1024,10 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx
     map[0]= map_generation;
     score_map[0]= dmin;
 
+    //FIXME precalc first term below?
+    if((s->pict_type == B_TYPE && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
+        dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
+
     /* first line */
     if (s->first_slice_line) {
         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c
index e3a4c2da5..8af7bdfa7 100644
--- a/src/libffmpeg/libavcodec/mpeg12.c
+++ b/src/libffmpeg/libavcodec/mpeg12.c
@@ -515,7 +515,7 @@ static inline void put_mb_modes(MpegEncContext *s, int n, int bits,
     }
 }
 
-static always_inline void mpeg1_encode_mb_internal(MpegEncContext *s,
+static av_always_inline void mpeg1_encode_mb_internal(MpegEncContext *s,
                                                    DCTELEM block[6][64],
                                                    int motion_x, int motion_y,
                                                    int mb_block_count)
diff --git a/src/libffmpeg/libavcodec/mpegaudiodec.c b/src/libffmpeg/libavcodec/mpegaudiodec.c
index 54bcee3b0..367400581 100644
--- a/src/libffmpeg/libavcodec/mpegaudiodec.c
+++ b/src/libffmpeg/libavcodec/mpegaudiodec.c
@@ -327,7 +327,7 @@ static int decode_init(AVCodecContext * avctx)
         for(i=0;i<15;i++) {
             int n, norm;
             n = i + 2;
-            norm = ((int64_t_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
+            norm = ((INT64_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
             scale_factor_mult[i][0] = MULL(FIXR(1.0 * 2.0), norm);
             scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm);
             scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm);
@@ -1749,7 +1749,7 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
     /* skip extension bits */
     bits_left = end_pos - get_bits_count(&s->gb);
 //av_log(NULL, AV_LOG_ERROR, "left:%d buf:%p\n", bits_left, s->in_gb.buffer);
-    if (bits_left < 0 || bits_left > 16) {
+    if (bits_left < 0 || bits_left > 500) {
         av_log(NULL, AV_LOG_ERROR, "bits_left=%d\n", bits_left);
         s_index=0;
     }else if(bits_left > 0 && s->error_resilience >= FF_ER_AGGRESSIVE){
diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c
index a9d877fff..a33485549 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.c
+++ b/src/libffmpeg/libavcodec/mpegvideo.c
@@ -140,7 +140,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[
                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 
-                qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
+                qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
                                 (qscale * quant_matrix[j]));
             }
         } else if (dsp->fdct == fdct_ifast
@@ -155,7 +155,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[
                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 
-                qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
+                qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
                                 (aanscales[i] * qscale * quant_matrix[j]));
             }
         } else {
@@ -166,7 +166,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[
                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
                 */
-                qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
+                qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 
@@ -2964,7 +2964,7 @@ static inline int hpel_motion_lowres(MpegEncContext *s,
 }
 
 /* apply one mpeg motion vector to the three components */
-static always_inline void mpeg_motion(MpegEncContext *s,
+static av_always_inline void mpeg_motion(MpegEncContext *s,
                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                                int field_based, int bottom_field, int field_select,
                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
@@ -3081,7 +3081,7 @@ if(s->quarter_sample)
 }
 
 /* apply one mpeg motion vector to the three components */
-static always_inline void mpeg_motion_lowres(MpegEncContext *s,
+static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                                int field_based, int bottom_field, int field_select,
                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
@@ -3913,7 +3913,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s)
    s->mv       : motion vector
    s->interlaced_dct : true if interlaced dct used (mpeg2)
  */
-static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
+static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
 {
     int mb_x, mb_y;
     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
@@ -4336,7 +4336,7 @@ static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
     }
 }
 
-static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
+static av_always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
 {
     int16_t weight[8][64];
     DCTELEM orig[8][64];
@@ -4348,7 +4348,7 @@ static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, in
     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
     int wrap_y, wrap_c;
 
-    for(i=0; i<mb_block_count; i++) skip_dct[i]=0;
+    for(i=0; i<mb_block_count; i++) skip_dct[i]=s->skipdct;
 
     if(s->adaptive_quant){
         const int last_qp= s->qscale;
@@ -4358,17 +4358,16 @@ static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, in
         update_qscale(s);
 
         if(!(s->flags&CODEC_FLAG_QP_RD)){
+            s->qscale= s->current_picture_ptr->qscale_table[mb_xy];
             s->dquant= s->qscale - last_qp;
 
             if(s->out_format==FMT_H263){
-                s->dquant= clip(s->dquant, -2, 2); //FIXME RD
+                s->dquant= clip(s->dquant, -2, 2);
 
                 if(s->codec_id==CODEC_ID_MPEG4){
                     if(!s->mb_intra){
                         if(s->pict_type == B_TYPE){
-                            if(s->dquant&1)
-                                s->dquant= (s->dquant/2)*2;
-                            if(s->mv_dir&MV_DIRECT)
+                            if(s->dquant&1 || s->mv_dir&MV_DIRECT)
                                 s->dquant= 0;
                         }
                         if(s->mv_type==MV_TYPE_8X8)
@@ -4621,7 +4620,7 @@ static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, in
     }
 }
 
-static always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
+static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
 {
     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
@@ -4861,6 +4860,8 @@ static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
 static int estimate_motion_thread(AVCodecContext *c, void *arg){
     MpegEncContext *s= arg;
 
+    ff_check_alignment();
+
     s->me.dia_size= s->avctx->dia_size;
     s->first_slice_line=1;
     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
@@ -4888,6 +4889,8 @@ static int mb_var_thread(AVCodecContext *c, void *arg){
     MpegEncContext *s= arg;
     int mb_x, mb_y;
 
+    ff_check_alignment();
+
     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
             int xx = mb_x * 16;
@@ -4938,6 +4941,8 @@ static int encode_thread(AVCodecContext *c, void *arg){
     PutBitContext pb[2], pb2[2], tex_pb[2];
 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
 
+    ff_check_alignment();
+
     for(i=0; i<2; i++){
         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
@@ -5205,19 +5210,6 @@ static int encode_thread(AVCodecContext *c, void *arg){
                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
                                  &dmin, &next_block, 0, 0);
                 }
-                if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
-                    int mx= s->b_direct_mv_table[xy][0];
-                    int my= s->b_direct_mv_table[xy][1];
-
-                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
-                    s->mb_intra= 0;
-/* xine: do not need this for decode or MPEG-1 encoding modes */
-#if 0
-                    ff_mpeg4_set_direct_mv(s, mx, my);
-#endif /* #if 0 */
-                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
-                                 &dmin, &next_block, mx, my);
-                }
                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
                     s->mv_dir = MV_DIR_FORWARD;
                     s->mv_type = MV_TYPE_FIELD;
@@ -5272,8 +5264,8 @@ static int encode_thread(AVCodecContext *c, void *arg){
                     }
                 }
 
-                if(s->flags & CODEC_FLAG_QP_RD){
-                    if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
+                if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
+                    if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
                         const int last_qp= backup_s.qscale;
                         int qpi, qp, dc[6];
                         DCTELEM ac[6][16];
@@ -5316,10 +5308,64 @@ static int encode_thread(AVCodecContext *c, void *arg){
                                 }
                             }
                         }
-                        qp= best_s.qscale;
-                        s->current_picture.qscale_table[xy]= qp;
                     }
                 }
+                if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
+                    int mx= s->b_direct_mv_table[xy][0];
+                    int my= s->b_direct_mv_table[xy][1];
+
+                    backup_s.dquant = 0;
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+                    s->mb_intra= 0;
+                    ff_mpeg4_set_direct_mv(s, mx, my);
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
+                                 &dmin, &next_block, mx, my);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_DIRECT0){
+                    backup_s.dquant = 0;
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+                    s->mb_intra= 0;
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
+                    ff_mpeg4_set_direct_mv(s, 0, 0);
+#endif /* #if 0 */
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
+                                 &dmin, &next_block, 0, 0);
+                }
+                if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
+                    int coded=0;
+                    for(i=0; i<6; i++)
+                        coded |= s->block_last_index[i];
+                    if(coded){
+                        int mx,my;
+                        memcpy(s->mv, best_s.mv, sizeof(s->mv));
+                        if(best_s.mv_dir & MV_DIRECT){
+                            mx=my=0; //FIXME find the one we actually used
+                            ff_mpeg4_set_direct_mv(s, mx, my);
+                        }else if(best_s.mv_dir&MV_DIR_BACKWARD){
+                            mx= s->mv[1][0][0];
+                            my= s->mv[1][0][1];
+                        }else{
+                            mx= s->mv[0][0][0];
+                            my= s->mv[0][0][1];
+                        }
+
+                        s->mv_dir= best_s.mv_dir;
+                        s->mv_type = best_s.mv_type;
+                        s->mb_intra= 0;
+/*                        s->mv[0][0][0] = best_s.mv[0][0][0];
+                        s->mv[0][0][1] = best_s.mv[0][0][1];
+                        s->mv[1][0][0] = best_s.mv[1][0][0];
+                        s->mv[1][0][1] = best_s.mv[1][0][1];*/
+                        backup_s.dquant= 0;
+                        s->skipdct=1;
+                        encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
+                                        &dmin, &next_block, mx, my);
+                        s->skipdct=0;
+                    }
+                }
+
+                s->current_picture.qscale_table[xy]= best_s.qscale;
 
                 copy_context_after_encode(s, &best_s, -1);
 
@@ -5401,6 +5447,11 @@ static int encode_thread(AVCodecContext *c, void *arg){
                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
 #endif /* #if 0 */
                     break;
+                case CANDIDATE_MB_TYPE_DIRECT0:
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+                    s->mb_intra= 0;
+                    ff_mpeg4_set_direct_mv(s, 0, 0);
+                    break;
                 case CANDIDATE_MB_TYPE_BIDIR:
                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
                     s->mb_intra= 0;
diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h
index 011678a42..ed02759ae 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.h
+++ b/src/libffmpeg/libavcodec/mpegvideo.h
@@ -324,6 +324,7 @@ typedef struct MpegEncContext {
     int dropable;
     int frame_rate_index;
     int last_lambda_for[5];     ///< last lambda for a specific pict type
+    int skipdct;                ///< skip dct and code zero residual
 
     /* motion compensation */
     int unrestricted_mv;        ///< mv can point outside of the coded picture
@@ -402,6 +403,8 @@ typedef struct MpegEncContext {
 #define CANDIDATE_MB_TYPE_BACKWARD_I 0x400
 #define CANDIDATE_MB_TYPE_BIDIR_I    0x800
 
+#define CANDIDATE_MB_TYPE_DIRECT0    0x1000
+
     int block_index[6]; ///< index to current MB in block based arrays with edges
     int block_wrap[6];
     uint8_t *dest[3];
diff --git a/src/libffmpeg/libavcodec/parser.c b/src/libffmpeg/libavcodec/parser.c
index 72a3e55a3..740ad855c 100644
--- a/src/libffmpeg/libavcodec/parser.c
+++ b/src/libffmpeg/libavcodec/parser.c
@@ -91,7 +91,8 @@ AVCodecParserContext *av_parser_init(int codec_id)
  *       in_data += len;
  *       in_len  -= len;
  *
- *       decode_frame(data, size);
+ *       if(size)
+ *          decode_frame(data, size);
  *   }
  * @endcode
  */
diff --git a/src/libffmpeg/libavcodec/ppc/Makefile.am b/src/libffmpeg/libavcodec/ppc/Makefile.am
index 00e796f6d..d52cc481e 100644
--- a/src/libffmpeg/libavcodec/ppc/Makefile.am
+++ b/src/libffmpeg/libavcodec/ppc/Makefile.am
@@ -12,14 +12,17 @@ noinst_LTLIBRARIES = libavcodec_ppc.la
 
 libavcodec_ppc_src =  dsputil_altivec.c \
 		      dsputil_ppc.c \
-		      dsputil_h264_altivec.c \
-		      dsputil_h264_template_altivec.c \
+		      h264_altivec.c \
+		      h264_template_altivec.c \
 		      fdct_altivec.c \
 		      fft_altivec.c \
+		      float_altivec.c \
 		      idct_altivec.c \
 		      gmc_altivec.c \
 		      mpegvideo_altivec.c \
-		      mpegvideo_ppc.c
+		      mpegvideo_ppc.c \
+		      snow_altivec.c \
+		      vc1dsp_altivec.c
 libavcodec_ppc_dummy = libavcodec_ppc_dummy.c
 
 EXTRA_DIST =  $(libavcodec_ppc_src) $(libavcodec_ppc_dummy)
@@ -28,7 +31,6 @@ EXTRA_DIST =  $(libavcodec_ppc_src) $(libavcodec_ppc_dummy)
 #ppc_modules = $(libavcodec_ppc_src)
 #endif
 
-
 libavcodec_ppc_la_SOURCES = $(ppc_modules) $(libavcodec_ppc_dummy)
 
-noinst_HEADERS = dsputil_altivec.h dsputil_ppc.h gcc_fixes.h
+noinst_HEADERS = dsputil_altivec.h dsputil_ppc.h gcc_fixes.h mathops.h types_altivec.h
diff --git a/src/libffmpeg/libavcodec/ppc/float_altivec.c b/src/libffmpeg/libavcodec/ppc/float_altivec.c
new file mode 100644
index 000000000..c6e43dec2
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/float_altivec.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+
+static void vector_fmul_altivec(float *dst, const float *src, int len)
+{
+    int i;
+    vector float d0, d1, s, zero = (vector float)vec_splat_u32(0);
+    for(i=0; i<len-7; i+=8) {
+        d0 = vec_ld(0, dst+i);
+        s = vec_ld(0, src+i);
+        d1 = vec_ld(16, dst+i);
+        d0 = vec_madd(d0, s, zero);
+        d1 = vec_madd(d1, vec_ld(16,src+i), zero);
+        vec_st(d0, 0, dst+i);
+        vec_st(d1, 16, dst+i);
+    }
+}
+
+static void vector_fmul_reverse_altivec(float *dst, const float *src0,
+                                        const float *src1, int len)
+{
+    int i;
+    vector float d, s0, s1, h0, l0,
+                 s2, s3, zero = (vector float)vec_splat_u32(0);
+    src1 += len-4;
+    for(i=0; i<len-7; i+=8) {
+        s1 = vec_ld(0, src1-i);              // [a,b,c,d]
+        s0 = vec_ld(0, src0+i);
+        l0 = vec_mergel(s1, s1);             // [c,c,d,d]
+        s3 = vec_ld(-16, src1-i);
+        h0 = vec_mergeh(s1, s1);             // [a,a,b,b]
+        s2 = vec_ld(16, src0+i);
+        s1 = vec_mergeh(vec_mergel(l0,h0),   // [d,b,d,b]
+                        vec_mergeh(l0,h0));  // [c,a,c,a]
+                                             // [d,c,b,a]
+        l0 = vec_mergel(s3, s3);
+        d = vec_madd(s0, s1, zero);
+        h0 = vec_mergeh(s3, s3);
+        vec_st(d, 0, dst+i);
+        s3 = vec_mergeh(vec_mergel(l0,h0),
+                        vec_mergeh(l0,h0));
+        d = vec_madd(s2, s3, zero);
+        vec_st(d, 16, dst+i);
+    }
+}
+
+static void vector_fmul_add_add_altivec(float *dst, const float *src0,
+                                        const float *src1, const float *src2,
+                                        int src3, int len, int step)
+{
+    int i;
+    vector float d, s0, s1, s2, t0, t1, edges;
+    vector unsigned char align = vec_lvsr(0,dst),
+                         mask = vec_lvsl(0, dst);
+
+    t0 = vec_ld(0, dst);
+#if 0 //FIXME: there is still something wrong
+    if (step == 2) {
+        int y;
+        vector float d0, d1, s3, t2;
+        vector unsigned int sel =
+                vec_mergeh(vec_splat_u32(-1), vec_splat_u32(0));
+        t1 = vec_ld(16, dst);
+        for (i=0,y=0; i<len-3; i+=4,y+=8) {
+
+            s0 = vec_ld(0,src0+i);
+            s1 = vec_ld(0,src1+i);
+            s2 = vec_ld(0,src2+i);
+
+//          t0 = vec_ld(0, dst+y);  //[x x x|a]
+//          t1 = vec_ld(16, dst+y); //[b c d|e]
+            t2 = vec_ld(31, dst+y); //[f g h|x]
+
+            d = vec_madd(s0,s1,s2); // [A B C D]
+
+                                                 // [A A B B]
+
+                                                 // [C C D D]
+
+            d0 = vec_perm(t0, t1, mask); // [a b c d]
+
+            d0 = vec_sel(vec_mergeh(d, d), d0, sel);   // [A b B d]
+
+            edges = vec_perm(t1, t0, mask);
+
+            t0 = vec_perm(edges, d0, align); // [x x x|A]
+
+            t1 = vec_perm(d0, edges, align); // [b B d|e]
+
+            vec_stl(t0, 0, dst+y);
+
+            d1 = vec_perm(t1, t2, mask); // [e f g h]
+
+            d1 = vec_sel(vec_mergel(d, d), d1, sel); // [C f D h]
+
+            edges = vec_perm(t2, t1, mask);
+
+            t1 = vec_perm(edges, d1, align); // [b B d|C]
+
+            t2 = vec_perm(d1, edges, align); // [f D h|x]
+
+            vec_stl(t1, 16, dst+y);
+
+            t0 = t1;
+
+            vec_stl(t2, 31, dst+y);
+
+            t1 = t2;
+        }
+    } else
+    #endif
+    if (step == 1 && src3 == 0)
+        for (i=0; i<len-3; i+=4) {
+            t1 = vec_ld(15, dst+i);
+            s0 = vec_ld(0, src0+i);
+            s1 = vec_ld(0, src1+i);
+            s2 = vec_ld(0, src2+i);
+            edges = vec_perm(t1 ,t0, mask);
+            d = vec_madd(s0,s1,s2);
+            t1 = vec_perm(d, edges, align);
+            t0 = vec_perm(edges, d, align);
+            vec_st(t1, 15, dst+i);
+            vec_st(t0, 0, dst+i);
+            t0 = t1;
+        }
+    else
+        ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
+}
+
+void float_to_int16_altivec(int16_t *dst, const float *src, int len)
+{
+    int i;
+    vector float s0, s1;
+    vector signed int t0, t1;
+    vector signed short d0, d1, d;
+    vector unsigned char align;
+    if(((long)dst)&15) //FIXME
+    for(i=0; i<len-7; i+=8) {
+        s0 = vec_ld(0, src+i);
+        s1 = vec_ld(16, src+i);
+        t0 = vec_cts(s0, 0);
+        d0 = vec_ld(0, dst+i);
+        t1 = vec_cts(s1, 0);
+        d1 = vec_ld(15, dst+i);
+        d = vec_packs(t0,t1);
+        d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i));
+        align = vec_lvsr(0, dst+i);
+        d0 = vec_perm(d1, d, align);
+        d1 = vec_perm(d, d1, align);
+        vec_st(d0, 0, dst+i);
+        vec_st(d1,15, dst+i);
+    }
+    else
+    for(i=0; i<len-7; i+=8) {
+        s0 = vec_ld(0, src+i);
+        s1 = vec_ld(16, src+i);
+        t0 = vec_cts(s0, 0);
+        t1 = vec_cts(s1, 0);
+        d = vec_packs(t0,t1);
+        vec_st(d, 0, dst+i);
+    }
+}
+
+void float_init_altivec(DSPContext* c, AVCodecContext *avctx)
+{
+    c->vector_fmul = vector_fmul_altivec;
+    c->vector_fmul_reverse = vector_fmul_reverse_altivec;
+    c->vector_fmul_add_add = vector_fmul_add_add_altivec;
+    if(!(avctx->flags & CODEC_FLAG_BITEXACT))
+        c->float_to_int16 = float_to_int16_altivec;
+}
diff --git a/src/libffmpeg/libavcodec/ppc/h264_altivec.c b/src/libffmpeg/libavcodec/ppc/h264_altivec.c
new file mode 100644
index 000000000..bac620e82
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/h264_altivec.c
@@ -0,0 +1,565 @@
+/*
+ * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+#include "types_altivec.h"
+
+#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
+#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
+
+#define OP_U8_ALTIVEC                          PUT_OP_U8_ALTIVEC
+#define PREFIX_h264_chroma_mc8_altivec         put_h264_chroma_mc8_altivec
+#define PREFIX_h264_chroma_mc8_num             altivec_put_h264_chroma_mc8_num
+#define PREFIX_h264_qpel16_h_lowpass_altivec   put_h264_qpel16_h_lowpass_altivec
+#define PREFIX_h264_qpel16_h_lowpass_num       altivec_put_h264_qpel16_h_lowpass_num
+#define PREFIX_h264_qpel16_v_lowpass_altivec   put_h264_qpel16_v_lowpass_altivec
+#define PREFIX_h264_qpel16_v_lowpass_num       altivec_put_h264_qpel16_v_lowpass_num
+#define PREFIX_h264_qpel16_hv_lowpass_altivec  put_h264_qpel16_hv_lowpass_altivec
+#define PREFIX_h264_qpel16_hv_lowpass_num      altivec_put_h264_qpel16_hv_lowpass_num
+#include "h264_template_altivec.c"
+#undef OP_U8_ALTIVEC
+#undef PREFIX_h264_chroma_mc8_altivec
+#undef PREFIX_h264_chroma_mc8_num
+#undef PREFIX_h264_qpel16_h_lowpass_altivec
+#undef PREFIX_h264_qpel16_h_lowpass_num
+#undef PREFIX_h264_qpel16_v_lowpass_altivec
+#undef PREFIX_h264_qpel16_v_lowpass_num
+#undef PREFIX_h264_qpel16_hv_lowpass_altivec
+#undef PREFIX_h264_qpel16_hv_lowpass_num
+
+#define OP_U8_ALTIVEC                          AVG_OP_U8_ALTIVEC
+#define PREFIX_h264_chroma_mc8_altivec         avg_h264_chroma_mc8_altivec
+#define PREFIX_h264_chroma_mc8_num             altivec_avg_h264_chroma_mc8_num
+#define PREFIX_h264_qpel16_h_lowpass_altivec   avg_h264_qpel16_h_lowpass_altivec
+#define PREFIX_h264_qpel16_h_lowpass_num       altivec_avg_h264_qpel16_h_lowpass_num
+#define PREFIX_h264_qpel16_v_lowpass_altivec   avg_h264_qpel16_v_lowpass_altivec
+#define PREFIX_h264_qpel16_v_lowpass_num       altivec_avg_h264_qpel16_v_lowpass_num
+#define PREFIX_h264_qpel16_hv_lowpass_altivec  avg_h264_qpel16_hv_lowpass_altivec
+#define PREFIX_h264_qpel16_hv_lowpass_num      altivec_avg_h264_qpel16_hv_lowpass_num
+#include "h264_template_altivec.c"
+#undef OP_U8_ALTIVEC
+#undef PREFIX_h264_chroma_mc8_altivec
+#undef PREFIX_h264_chroma_mc8_num
+#undef PREFIX_h264_qpel16_h_lowpass_altivec
+#undef PREFIX_h264_qpel16_h_lowpass_num
+#undef PREFIX_h264_qpel16_v_lowpass_altivec
+#undef PREFIX_h264_qpel16_v_lowpass_num
+#undef PREFIX_h264_qpel16_hv_lowpass_altivec
+#undef PREFIX_h264_qpel16_hv_lowpass_num
+
+#define H264_MC(OPNAME, SIZE, CODETYPE) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
+    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
+}\
+
+/* this code assume that stride % 16 == 0 */
+void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
+    signed int ABCD[4] __attribute__((aligned(16))) =
+                        {((8 - x) * (8 - y)),
+                          ((x) * (8 - y)),
+                          ((8 - x) * (y)),
+                          ((x) * (y))};
+    register int i;
+    vector unsigned char fperm;
+    const vector signed int vABCD = vec_ld(0, ABCD);
+    const vector signed short vA = vec_splat((vector signed short)vABCD, 1);
+    const vector signed short vB = vec_splat((vector signed short)vABCD, 3);
+    const vector signed short vC = vec_splat((vector signed short)vABCD, 5);
+    const vector signed short vD = vec_splat((vector signed short)vABCD, 7);
+    const vector signed int vzero = vec_splat_s32(0);
+    const vector signed short v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
+    const vector unsigned short v6us = vec_splat_u16(6);
+    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
+    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
+
+    vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
+    vector unsigned char vsrc0uc, vsrc1uc;
+    vector signed short vsrc0ssH, vsrc1ssH;
+    vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;
+    vector signed short vsrc2ssH, vsrc3ssH, psum;
+    vector unsigned char vdst, ppsum, fsum;
+
+    if (((unsigned long)dst) % 16 == 0) {
+      fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,
+                                        0x14, 0x15, 0x16, 0x17,
+                                        0x08, 0x09, 0x0A, 0x0B,
+                                        0x0C, 0x0D, 0x0E, 0x0F);
+    } else {
+      fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,
+                                        0x04, 0x05, 0x06, 0x07,
+                                        0x18, 0x19, 0x1A, 0x1B,
+                                        0x1C, 0x1D, 0x1E, 0x1F);
+    }
+
+    vsrcAuc = vec_ld(0, src);
+
+    if (loadSecond)
+      vsrcBuc = vec_ld(16, src);
+    vsrcperm0 = vec_lvsl(0, src);
+    vsrcperm1 = vec_lvsl(1, src);
+
+    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
+    if (reallyBadAlign)
+      vsrc1uc = vsrcBuc;
+    else
+      vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
+
+    vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                               (vector unsigned char)vsrc0uc);
+    vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                               (vector unsigned char)vsrc1uc);
+
+    if (!loadSecond) {// -> !reallyBadAlign
+      for (i = 0 ; i < h ; i++) {
+
+
+        vsrcCuc = vec_ld(stride + 0, src);
+
+        vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
+        vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
+
+        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc2uc);
+        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc3uc);
+
+        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+        psum = vec_mladd(vB, vsrc1ssH, psum);
+        psum = vec_mladd(vC, vsrc2ssH, psum);
+        psum = vec_mladd(vD, vsrc3ssH, psum);
+        psum = vec_add(v28ss, psum);
+        psum = vec_sra(psum, v6us);
+
+        vdst = vec_ld(0, dst);
+        ppsum = (vector unsigned char)vec_packsu(psum, psum);
+        fsum = vec_perm(vdst, ppsum, fperm);
+
+        vec_st(fsum, 0, dst);
+
+        vsrc0ssH = vsrc2ssH;
+        vsrc1ssH = vsrc3ssH;
+
+        dst += stride;
+        src += stride;
+      }
+    } else {
+        vector unsigned char vsrcDuc;
+      for (i = 0 ; i < h ; i++) {
+        vsrcCuc = vec_ld(stride + 0, src);
+        vsrcDuc = vec_ld(stride + 16, src);
+
+        vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+        if (reallyBadAlign)
+          vsrc3uc = vsrcDuc;
+        else
+          vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
+
+        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc2uc);
+        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc3uc);
+
+        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+        psum = vec_mladd(vB, vsrc1ssH, psum);
+        psum = vec_mladd(vC, vsrc2ssH, psum);
+        psum = vec_mladd(vD, vsrc3ssH, psum);
+        psum = vec_add(v28ss, psum);
+        psum = vec_sr(psum, v6us);
+
+        vdst = vec_ld(0, dst);
+        ppsum = (vector unsigned char)vec_pack(psum, psum);
+        fsum = vec_perm(vdst, ppsum, fperm);
+
+        vec_st(fsum, 0, dst);
+
+        vsrc0ssH = vsrc2ssH;
+        vsrc1ssH = vsrc3ssH;
+
+        dst += stride;
+        src += stride;
+      }
+    }
+}
+
+static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
+                                    const uint8_t * src2, int dst_stride,
+                                    int src_stride1, int h)
+{
+    int i;
+    vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
+
+    mask_ = vec_lvsl(0, src2);
+
+    for (i = 0; i < h; i++) {
+
+        tmp1 = vec_ld(i * src_stride1, src1);
+        mask = vec_lvsl(i * src_stride1, src1);
+        tmp2 = vec_ld(i * src_stride1 + 15, src1);
+
+        a = vec_perm(tmp1, tmp2, mask);
+
+        tmp1 = vec_ld(i * 16, src2);
+        tmp2 = vec_ld(i * 16 + 15, src2);
+
+        b = vec_perm(tmp1, tmp2, mask_);
+
+        tmp1 = vec_ld(0, dst);
+        mask = vec_lvsl(0, dst);
+        tmp2 = vec_ld(15, dst);
+
+        d = vec_avg(a, b);
+
+        edges = vec_perm(tmp2, tmp1, mask);
+
+        align = vec_lvsr(0, dst);
+
+        tmp2 = vec_perm(d, edges, align);
+        tmp1 = vec_perm(edges, d, align);
+
+        vec_st(tmp2, 15, dst);
+        vec_st(tmp1, 0 , dst);
+
+        dst += dst_stride;
+    }
+}
+
+static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
+                                    const uint8_t * src2, int dst_stride,
+                                    int src_stride1, int h)
+{
+    int i;
+    vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
+
+    mask_ = vec_lvsl(0, src2);
+
+    for (i = 0; i < h; i++) {
+
+        tmp1 = vec_ld(i * src_stride1, src1);
+        mask = vec_lvsl(i * src_stride1, src1);
+        tmp2 = vec_ld(i * src_stride1 + 15, src1);
+
+        a = vec_perm(tmp1, tmp2, mask);
+
+        tmp1 = vec_ld(i * 16, src2);
+        tmp2 = vec_ld(i * 16 + 15, src2);
+
+        b = vec_perm(tmp1, tmp2, mask_);
+
+        tmp1 = vec_ld(0, dst);
+        mask = vec_lvsl(0, dst);
+        tmp2 = vec_ld(15, dst);
+
+        d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
+
+        edges = vec_perm(tmp2, tmp1, mask);
+
+        align = vec_lvsr(0, dst);
+
+        tmp2 = vec_perm(d, edges, align);
+        tmp1 = vec_perm(edges, d, align);
+
+        vec_st(tmp2, 15, dst);
+        vec_st(tmp1, 0 , dst);
+
+        dst += dst_stride;
+    }
+}
+
+/* Implemented but could be faster
+#define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
+#define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
+ */
+
+  H264_MC(put_, 16, altivec)
+  H264_MC(avg_, 16, altivec)
+
+
+/****************************************************************************
+ * IDCT transform:
+ ****************************************************************************/
+
+#define IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7,  d0, d1, d2, d3, d4, d5, d6, d7) {\
+    /*        a0  = SRC(0) + SRC(4); */ \
+    vec_s16_t a0v = vec_add(s0, s4);    \
+    /*        a2  = SRC(0) - SRC(4); */ \
+    vec_s16_t a2v = vec_sub(s0, s4);    \
+    /*        a4  =           (SRC(2)>>1) - SRC(6); */ \
+    vec_s16_t a4v = vec_sub(vec_sra(s2, onev), s6);    \
+    /*        a6  =           (SRC(6)>>1) + SRC(2); */ \
+    vec_s16_t a6v = vec_add(vec_sra(s6, onev), s2);    \
+    /*        b0  =         a0 + a6; */ \
+    vec_s16_t b0v = vec_add(a0v, a6v);  \
+    /*        b2  =         a2 + a4; */ \
+    vec_s16_t b2v = vec_add(a2v, a4v);  \
+    /*        b4  =         a2 - a4; */ \
+    vec_s16_t b4v = vec_sub(a2v, a4v);  \
+    /*        b6  =         a0 - a6; */ \
+    vec_s16_t b6v = vec_sub(a0v, a6v);  \
+    /* a1 =  SRC(5) - SRC(3) - SRC(7) - (SRC(7)>>1); */ \
+    /*        a1 =             (SRC(5)-SRC(3)) -  (SRC(7)  +  (SRC(7)>>1)); */ \
+    vec_s16_t a1v = vec_sub( vec_sub(s5, s3), vec_add(s7, vec_sra(s7, onev)) ); \
+    /* a3 =  SRC(7) + SRC(1) - SRC(3) - (SRC(3)>>1); */ \
+    /*        a3 =             (SRC(7)+SRC(1)) -  (SRC(3)  +  (SRC(3)>>1)); */ \
+    vec_s16_t a3v = vec_sub( vec_add(s7, s1), vec_add(s3, vec_sra(s3, onev)) );\
+    /* a5 =  SRC(7) - SRC(1) + SRC(5) + (SRC(5)>>1); */ \
+    /*        a5 =             (SRC(7)-SRC(1)) +   SRC(5) +   (SRC(5)>>1); */ \
+    vec_s16_t a5v = vec_add( vec_sub(s7, s1), vec_add(s5, vec_sra(s5, onev)) );\
+    /*        a7 =                SRC(5)+SRC(3) +  SRC(1) +   (SRC(1)>>1); */ \
+    vec_s16_t a7v = vec_add( vec_add(s5, s3), vec_add(s1, vec_sra(s1, onev)) );\
+    /*        b1 =                  (a7>>2)  +  a1; */ \
+    vec_s16_t b1v = vec_add( vec_sra(a7v, twov), a1v); \
+    /*        b3 =          a3 +        (a5>>2); */ \
+    vec_s16_t b3v = vec_add(a3v, vec_sra(a5v, twov)); \
+    /*        b5 =                  (a3>>2)  -   a5; */ \
+    vec_s16_t b5v = vec_sub( vec_sra(a3v, twov), a5v); \
+    /*        b7 =           a7 -        (a1>>2); */ \
+    vec_s16_t b7v = vec_sub( a7v, vec_sra(a1v, twov)); \
+    /* DST(0,    b0 + b7); */ \
+    d0 = vec_add(b0v, b7v); \
+    /* DST(1,    b2 + b5); */ \
+    d1 = vec_add(b2v, b5v); \
+    /* DST(2,    b4 + b3); */ \
+    d2 = vec_add(b4v, b3v); \
+    /* DST(3,    b6 + b1); */ \
+    d3 = vec_add(b6v, b1v); \
+    /* DST(4,    b6 - b1); */ \
+    d4 = vec_sub(b6v, b1v); \
+    /* DST(5,    b4 - b3); */ \
+    d5 = vec_sub(b4v, b3v); \
+    /* DST(6,    b2 - b5); */ \
+    d6 = vec_sub(b2v, b5v); \
+    /* DST(7,    b0 - b7); */ \
+    d7 = vec_sub(b0v, b7v); \
+}
+
+#define ALTIVEC_STORE_SUM_CLIP(dest, idctv, perm_ldv, perm_stv, sel) { \
+    /* unaligned load */                                       \
+    vec_u8_t hv = vec_ld( 0, dest );                           \
+    vec_u8_t lv = vec_ld( 7, dest );                           \
+    vec_u8_t dstv   = vec_perm( hv, lv, (vec_u8_t)perm_ldv );  \
+    vec_s16_t idct_sh6 = vec_sra(idctv, sixv);                 \
+    vec_u16_t dst16 = (vec_u16_t)vec_mergeh(zero_u8v, dstv);   \
+    vec_s16_t idstsum = vec_adds(idct_sh6, (vec_s16_t)dst16);  \
+    vec_u8_t idstsum8 = vec_packsu(zero_s16v, idstsum);        \
+    vec_u8_t edgehv;                                           \
+    /* unaligned store */                                      \
+    vec_u8_t bodyv  = vec_perm( idstsum8, idstsum8, perm_stv );\
+    vec_u8_t edgelv = vec_perm( sel, zero_u8v, perm_stv );     \
+    lv    = vec_sel( lv, bodyv, edgelv );                      \
+    vec_st( lv, 7, dest );                                     \
+    hv    = vec_ld( 0, dest );                                 \
+    edgehv = vec_perm( zero_u8v, sel, perm_stv );              \
+    hv    = vec_sel( hv, bodyv, edgehv );                      \
+    vec_st( hv, 0, dest );                                     \
+ }
+
+void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) {
+    vec_s16_t s0, s1, s2, s3, s4, s5, s6, s7;
+    vec_s16_t d0, d1, d2, d3, d4, d5, d6, d7;
+    vec_s16_t idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;
+
+    vec_u8_t perm_ldv = vec_lvsl(0, dst);
+    vec_u8_t perm_stv = vec_lvsr(8, dst);
+
+    const vec_u16_t onev = vec_splat_u16(1);
+    const vec_u16_t twov = vec_splat_u16(2);
+    const vec_u16_t sixv = vec_splat_u16(6);
+
+    const vec_u8_t sel = (vec_u8_t) AVV(0,0,0,0,0,0,0,0,
+                                        -1,-1,-1,-1,-1,-1,-1,-1);
+    LOAD_ZERO;
+
+    dct[0] += 32; // rounding for the >>6 at the end
+
+    s0 = vec_ld(0x00, (int16_t*)dct);
+    s1 = vec_ld(0x10, (int16_t*)dct);
+    s2 = vec_ld(0x20, (int16_t*)dct);
+    s3 = vec_ld(0x30, (int16_t*)dct);
+    s4 = vec_ld(0x40, (int16_t*)dct);
+    s5 = vec_ld(0x50, (int16_t*)dct);
+    s6 = vec_ld(0x60, (int16_t*)dct);
+    s7 = vec_ld(0x70, (int16_t*)dct);
+
+    IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7,
+                     d0, d1, d2, d3, d4, d5, d6, d7);
+
+    TRANSPOSE8( d0,  d1,  d2,  d3,  d4,  d5,  d6, d7 );
+
+    IDCT8_1D_ALTIVEC(d0,  d1,  d2,  d3,  d4,  d5,  d6, d7,
+                     idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7);
+
+    ALTIVEC_STORE_SUM_CLIP(&dst[0*stride], idct0, perm_ldv, perm_stv, sel);
+    ALTIVEC_STORE_SUM_CLIP(&dst[1*stride], idct1, perm_ldv, perm_stv, sel);
+    ALTIVEC_STORE_SUM_CLIP(&dst[2*stride], idct2, perm_ldv, perm_stv, sel);
+    ALTIVEC_STORE_SUM_CLIP(&dst[3*stride], idct3, perm_ldv, perm_stv, sel);
+    ALTIVEC_STORE_SUM_CLIP(&dst[4*stride], idct4, perm_ldv, perm_stv, sel);
+    ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel);
+    ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel);
+    ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);
+}
+
+void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
+
+#ifdef HAVE_ALTIVEC
+  if (has_altivec()) {
+    c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
+    c->put_no_rnd_h264_chroma_pixels_tab[0] = put_no_rnd_h264_chroma_mc8_altivec;
+    c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
+    c->h264_idct8_add = ff_h264_idct8_add_altivec;
+
+#define dspfunc(PFX, IDX, NUM) \
+    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
+    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
+    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
+    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
+    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
+    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
+    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
+
+    dspfunc(put_h264_qpel, 0, 16);
+    dspfunc(avg_h264_qpel, 0, 16);
+#undef dspfunc
+
+  } else
+#endif /* HAVE_ALTIVEC */
+  {
+    // Non-AltiVec PPC optimisations
+
+    // ... pending ...
+  }
+}
diff --git a/src/libffmpeg/libavcodec/ppc/h264_template_altivec.c b/src/libffmpeg/libavcodec/ppc/h264_template_altivec.c
new file mode 100644
index 000000000..e8ad67f2f
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/h264_template_altivec.c
@@ -0,0 +1,719 @@
+/*
+ * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* this code assume that stride % 16 == 0 */
+void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
+  POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);
+    signed int ABCD[4] __attribute__((aligned(16))) =
+                        {((8 - x) * (8 - y)),
+                          ((x) * (8 - y)),
+                          ((8 - x) * (y)),
+                          ((x) * (y))};
+    register int i;
+    vector unsigned char fperm;
+    const vector signed int vABCD = vec_ld(0, ABCD);
+    const vector signed short vA = vec_splat((vector signed short)vABCD, 1);
+    const vector signed short vB = vec_splat((vector signed short)vABCD, 3);
+    const vector signed short vC = vec_splat((vector signed short)vABCD, 5);
+    const vector signed short vD = vec_splat((vector signed short)vABCD, 7);
+    const vector signed int vzero = vec_splat_s32(0);
+    const vector signed short v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
+    const vector unsigned short v6us = vec_splat_u16(6);
+    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
+    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
+
+    vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
+    vector unsigned char vsrc0uc, vsrc1uc;
+    vector signed short vsrc0ssH, vsrc1ssH;
+    vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;
+    vector signed short vsrc2ssH, vsrc3ssH, psum;
+    vector unsigned char vdst, ppsum, vfdst, fsum;
+
+  POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1);
+
+    if (((unsigned long)dst) % 16 == 0) {
+      fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,
+                                        0x14, 0x15, 0x16, 0x17,
+                                        0x08, 0x09, 0x0A, 0x0B,
+                                        0x0C, 0x0D, 0x0E, 0x0F);
+    } else {
+      fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,
+                                        0x04, 0x05, 0x06, 0x07,
+                                        0x18, 0x19, 0x1A, 0x1B,
+                                        0x1C, 0x1D, 0x1E, 0x1F);
+    }
+
+    vsrcAuc = vec_ld(0, src);
+
+    if (loadSecond)
+      vsrcBuc = vec_ld(16, src);
+    vsrcperm0 = vec_lvsl(0, src);
+    vsrcperm1 = vec_lvsl(1, src);
+
+    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
+    if (reallyBadAlign)
+      vsrc1uc = vsrcBuc;
+    else
+      vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
+
+    vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                               (vector unsigned char)vsrc0uc);
+    vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                               (vector unsigned char)vsrc1uc);
+
+    if (!loadSecond) {// -> !reallyBadAlign
+      for (i = 0 ; i < h ; i++) {
+
+
+        vsrcCuc = vec_ld(stride + 0, src);
+
+        vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
+        vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
+
+        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc2uc);
+        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc3uc);
+
+        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+        psum = vec_mladd(vB, vsrc1ssH, psum);
+        psum = vec_mladd(vC, vsrc2ssH, psum);
+        psum = vec_mladd(vD, vsrc3ssH, psum);
+        psum = vec_add(v32ss, psum);
+        psum = vec_sra(psum, v6us);
+
+        vdst = vec_ld(0, dst);
+        ppsum = (vector unsigned char)vec_packsu(psum, psum);
+        vfdst = vec_perm(vdst, ppsum, fperm);
+
+        OP_U8_ALTIVEC(fsum, vfdst, vdst);
+
+        vec_st(fsum, 0, dst);
+
+        vsrc0ssH = vsrc2ssH;
+        vsrc1ssH = vsrc3ssH;
+
+        dst += stride;
+        src += stride;
+      }
+    } else {
+        vector unsigned char vsrcDuc;
+      for (i = 0 ; i < h ; i++) {
+        vsrcCuc = vec_ld(stride + 0, src);
+        vsrcDuc = vec_ld(stride + 16, src);
+
+        vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+        if (reallyBadAlign)
+          vsrc3uc = vsrcDuc;
+        else
+          vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
+
+        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc2uc);
+        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc3uc);
+
+        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+        psum = vec_mladd(vB, vsrc1ssH, psum);
+        psum = vec_mladd(vC, vsrc2ssH, psum);
+        psum = vec_mladd(vD, vsrc3ssH, psum);
+        psum = vec_add(v32ss, psum);
+        psum = vec_sr(psum, v6us);
+
+        vdst = vec_ld(0, dst);
+        ppsum = (vector unsigned char)vec_pack(psum, psum);
+        vfdst = vec_perm(vdst, ppsum, fperm);
+
+        OP_U8_ALTIVEC(fsum, vfdst, vdst);
+
+        vec_st(fsum, 0, dst);
+
+        vsrc0ssH = vsrc2ssH;
+        vsrc1ssH = vsrc3ssH;
+
+        dst += stride;
+        src += stride;
+      }
+    }
+    POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1);
+}
+
+/* this code assume stride % 16 == 0 */
+static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
+  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1);
+  register int i;
+
+  const vector signed int vzero = vec_splat_s32(0);
+  const vector unsigned char permM2 = vec_lvsl(-2, src);
+  const vector unsigned char permM1 = vec_lvsl(-1, src);
+  const vector unsigned char permP0 = vec_lvsl(+0, src);
+  const vector unsigned char permP1 = vec_lvsl(+1, src);
+  const vector unsigned char permP2 = vec_lvsl(+2, src);
+  const vector unsigned char permP3 = vec_lvsl(+3, src);
+  const vector signed short v5ss = vec_splat_s16(5);
+  const vector unsigned short v5us = vec_splat_u16(5);
+  const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+  const vector signed short v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
+  const vector unsigned char dstperm = vec_lvsr(0, dst);
+  const vector unsigned char neg1 =
+                                (const vector unsigned char) vec_splat_s8(-1);
+
+  const vector unsigned char dstmask =
+                                vec_perm((const vector unsigned char)vzero,
+                                                               neg1, dstperm);
+
+  vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
+
+  register int align = ((((unsigned long)src) - 2) % 16);
+
+  vector signed short srcP0A, srcP0B, srcP1A, srcP1B,
+                      srcP2A, srcP2B, srcP3A, srcP3B,
+                      srcM1A, srcM1B, srcM2A, srcM2B,
+                      sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
+                      pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
+                      psumA, psumB, sumA, sumB;
+
+  vector unsigned char sum, dst1, dst2, vdst, fsum,
+                       rsum, fdst1, fdst2;
+
+  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
+
+  for (i = 0 ; i < 16 ; i ++) {
+    vector unsigned char srcR1 = vec_ld(-2, src);
+    vector unsigned char srcR2 = vec_ld(14, src);
+
+    switch (align) {
+    default: {
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = vec_perm(srcR1, srcR2, permP2);
+      srcP3 = vec_perm(srcR1, srcR2, permP3);
+    } break;
+    case 11: {
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = vec_perm(srcR1, srcR2, permP2);
+      srcP3 = srcR2;
+    } break;
+    case 12: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = srcR2;
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 13: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = srcR2;
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 14: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = srcR2;
+      srcP1 = vec_perm(srcR2, srcR3, permP1);
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 15: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = srcR2;
+      srcP0 = vec_perm(srcR2, srcR3, permP0);
+      srcP1 = vec_perm(srcR2, srcR3, permP1);
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    }
+
+    srcP0A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcP0);
+    srcP0B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcP0);
+    srcP1A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcP1);
+    srcP1B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcP1);
+
+    srcP2A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcP2);
+    srcP2B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcP2);
+    srcP3A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcP3);
+    srcP3B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcP3);
+
+    srcM1A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcM1);
+    srcM1B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcM1);
+    srcM2A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcM2);
+    srcM2B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcM2);
+
+    sum1A = vec_adds(srcP0A, srcP1A);
+    sum1B = vec_adds(srcP0B, srcP1B);
+    sum2A = vec_adds(srcM1A, srcP2A);
+    sum2B = vec_adds(srcM1B, srcP2B);
+    sum3A = vec_adds(srcM2A, srcP3A);
+    sum3B = vec_adds(srcM2B, srcP3B);
+
+    pp1A = vec_mladd(sum1A, v20ss, v16ss);
+    pp1B = vec_mladd(sum1B, v20ss, v16ss);
+
+    pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
+    pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
+
+    pp3A = vec_add(sum3A, pp1A);
+    pp3B = vec_add(sum3B, pp1B);
+
+    psumA = vec_sub(pp3A, pp2A);
+    psumB = vec_sub(pp3B, pp2B);
+
+    sumA = vec_sra(psumA, v5us);
+    sumB = vec_sra(psumB, v5us);
+
+    sum = vec_packsu(sumA, sumB);
+
+    dst1 = vec_ld(0, dst);
+    dst2 = vec_ld(16, dst);
+    vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));
+
+    OP_U8_ALTIVEC(fsum, sum, vdst);
+
+    rsum = vec_perm(fsum, fsum, dstperm);
+    fdst1 = vec_sel(dst1, rsum, dstmask);
+    fdst2 = vec_sel(rsum, dst2, dstmask);
+
+    vec_st(fdst1, 0, dst);
+    vec_st(fdst2, 16, dst);
+
+    src += srcStride;
+    dst += dstStride;
+  }
+POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
+}
+
+/* this code assume stride % 16 == 0 */
+static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
+  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);
+
+  register int i;
+
+  const vector signed int vzero = vec_splat_s32(0);
+  const vector unsigned char perm = vec_lvsl(0, src);
+  const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+  const vector unsigned short v5us = vec_splat_u16(5);
+  const vector signed short v5ss = vec_splat_s16(5);
+  const vector signed short v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
+  const vector unsigned char dstperm = vec_lvsr(0, dst);
+  const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1);
+  const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);
+
+  uint8_t *srcbis = src - (srcStride * 2);
+
+  const vector unsigned char srcM2a = vec_ld(0, srcbis);
+  const vector unsigned char srcM2b = vec_ld(16, srcbis);
+  const vector unsigned char srcM2 = vec_perm(srcM2a, srcM2b, perm);
+//  srcbis += srcStride;
+  const vector unsigned char srcM1a = vec_ld(0, srcbis += srcStride);
+  const vector unsigned char srcM1b = vec_ld(16, srcbis);
+  const vector unsigned char srcM1 = vec_perm(srcM1a, srcM1b, perm);
+//  srcbis += srcStride;
+  const vector unsigned char srcP0a = vec_ld(0, srcbis += srcStride);
+  const vector unsigned char srcP0b = vec_ld(16, srcbis);
+  const vector unsigned char srcP0 = vec_perm(srcP0a, srcP0b, perm);
+//  srcbis += srcStride;
+  const vector unsigned char srcP1a = vec_ld(0, srcbis += srcStride);
+  const vector unsigned char srcP1b = vec_ld(16, srcbis);
+  const vector unsigned char srcP1 = vec_perm(srcP1a, srcP1b, perm);
+//  srcbis += srcStride;
+  const vector unsigned char srcP2a = vec_ld(0, srcbis += srcStride);
+  const vector unsigned char srcP2b = vec_ld(16, srcbis);
+  const vector unsigned char srcP2 = vec_perm(srcP2a, srcP2b, perm);
+//  srcbis += srcStride;
+
+  vector signed short srcM2ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcM2);
+  vector signed short srcM2ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcM2);
+  vector signed short srcM1ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcM1);
+  vector signed short srcM1ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcM1);
+  vector signed short srcP0ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcP0);
+  vector signed short srcP0ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcP0);
+  vector signed short srcP1ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcP1);
+  vector signed short srcP1ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcP1);
+  vector signed short srcP2ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcP2);
+  vector signed short srcP2ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcP2);
+
+  vector signed short pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
+                      psumA, psumB, sumA, sumB,
+                      srcP3ssA, srcP3ssB,
+                      sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;
+
+  vector unsigned char sum, dst1, dst2, vdst, fsum, rsum, fdst1, fdst2,
+                       srcP3a, srcP3b, srcP3;
+
+  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
+
+  for (i = 0 ; i < 16 ; i++) {
+    srcP3a = vec_ld(0, srcbis += srcStride);
+    srcP3b = vec_ld(16, srcbis);
+    srcP3 = vec_perm(srcP3a, srcP3b, perm);
+    srcP3ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcP3);
+    srcP3ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcP3);
+//    srcbis += srcStride;
+
+    sum1A = vec_adds(srcP0ssA, srcP1ssA);
+    sum1B = vec_adds(srcP0ssB, srcP1ssB);
+    sum2A = vec_adds(srcM1ssA, srcP2ssA);
+    sum2B = vec_adds(srcM1ssB, srcP2ssB);
+    sum3A = vec_adds(srcM2ssA, srcP3ssA);
+    sum3B = vec_adds(srcM2ssB, srcP3ssB);
+
+    srcM2ssA = srcM1ssA;
+    srcM2ssB = srcM1ssB;
+    srcM1ssA = srcP0ssA;
+    srcM1ssB = srcP0ssB;
+    srcP0ssA = srcP1ssA;
+    srcP0ssB = srcP1ssB;
+    srcP1ssA = srcP2ssA;
+    srcP1ssB = srcP2ssB;
+    srcP2ssA = srcP3ssA;
+    srcP2ssB = srcP3ssB;
+
+    pp1A = vec_mladd(sum1A, v20ss, v16ss);
+    pp1B = vec_mladd(sum1B, v20ss, v16ss);
+
+    pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
+    pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
+
+    pp3A = vec_add(sum3A, pp1A);
+    pp3B = vec_add(sum3B, pp1B);
+
+    psumA = vec_sub(pp3A, pp2A);
+    psumB = vec_sub(pp3B, pp2B);
+
+    sumA = vec_sra(psumA, v5us);
+    sumB = vec_sra(psumB, v5us);
+
+    sum = vec_packsu(sumA, sumB);
+
+    dst1 = vec_ld(0, dst);
+    dst2 = vec_ld(16, dst);
+    vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));
+
+    OP_U8_ALTIVEC(fsum, sum, vdst);
+
+    rsum = vec_perm(fsum, fsum, dstperm);
+    fdst1 = vec_sel(dst1, rsum, dstmask);
+    fdst2 = vec_sel(rsum, dst2, dstmask);
+
+    vec_st(fdst1, 0, dst);
+    vec_st(fdst2, 16, dst);
+
+    dst += dstStride;
+  }
+  POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
+}
+
+/* this code assume stride % 16 == 0 *and* tmp is properly aligned */
+static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) {
+  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+  register int i;
+  const vector signed int vzero = vec_splat_s32(0);
+  const vector unsigned char permM2 = vec_lvsl(-2, src);
+  const vector unsigned char permM1 = vec_lvsl(-1, src);
+  const vector unsigned char permP0 = vec_lvsl(+0, src);
+  const vector unsigned char permP1 = vec_lvsl(+1, src);
+  const vector unsigned char permP2 = vec_lvsl(+2, src);
+  const vector unsigned char permP3 = vec_lvsl(+3, src);
+  const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+  const vector unsigned int v10ui = vec_splat_u32(10);
+  const vector signed short v5ss = vec_splat_s16(5);
+  const vector signed short v1ss = vec_splat_s16(1);
+  const vector signed int v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9));
+  const vector unsigned int v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4));
+
+  register int align = ((((unsigned long)src) - 2) % 16);
+
+  const vector unsigned char neg1 = (const vector unsigned char)
+                                                        vec_splat_s8(-1);
+
+  vector signed short srcP0A, srcP0B, srcP1A, srcP1B,
+                      srcP2A, srcP2B, srcP3A, srcP3B,
+                      srcM1A, srcM1B, srcM2A, srcM2B,
+                      sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
+                      pp1A, pp1B, pp2A, pp2B, psumA, psumB;
+
+  const vector unsigned char dstperm = vec_lvsr(0, dst);
+
+  const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);
+
+  const vector unsigned char mperm = (const vector unsigned char)
+    AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,
+        0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F);
+  int16_t *tmpbis = tmp;
+
+  vector signed short tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB,
+                      tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB,
+                      tmpP2ssA, tmpP2ssB;
+
+  vector signed int pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo,
+                    pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo,
+                    pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo,
+                    ssumAe, ssumAo, ssumBe, ssumBo;
+  vector unsigned char fsum, sumv, sum, dst1, dst2, vdst,
+                       rsum, fdst1, fdst2;
+  vector signed short ssume, ssumo;
+
+  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+  src -= (2 * srcStride);
+  for (i = 0 ; i < 21 ; i ++) {
+    vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
+    vector unsigned char srcR1 = vec_ld(-2, src);
+    vector unsigned char srcR2 = vec_ld(14, src);
+
+    switch (align) {
+    default: {
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = vec_perm(srcR1, srcR2, permP2);
+      srcP3 = vec_perm(srcR1, srcR2, permP3);
+    } break;
+    case 11: {
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = vec_perm(srcR1, srcR2, permP2);
+      srcP3 = srcR2;
+    } break;
+    case 12: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = srcR2;
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 13: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = srcR2;
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 14: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = srcR2;
+      srcP1 = vec_perm(srcR2, srcR3, permP1);
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 15: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = srcR2;
+      srcP0 = vec_perm(srcR2, srcR3, permP0);
+      srcP1 = vec_perm(srcR2, srcR3, permP1);
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    }
+
+    srcP0A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcP0);
+    srcP0B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcP0);
+    srcP1A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcP1);
+    srcP1B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcP1);
+
+    srcP2A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcP2);
+    srcP2B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcP2);
+    srcP3A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcP3);
+    srcP3B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcP3);
+
+    srcM1A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcM1);
+    srcM1B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcM1);
+    srcM2A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcM2);
+    srcM2B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcM2);
+
+    sum1A = vec_adds(srcP0A, srcP1A);
+    sum1B = vec_adds(srcP0B, srcP1B);
+    sum2A = vec_adds(srcM1A, srcP2A);
+    sum2B = vec_adds(srcM1B, srcP2B);
+    sum3A = vec_adds(srcM2A, srcP3A);
+    sum3B = vec_adds(srcM2B, srcP3B);
+
+    pp1A = vec_mladd(sum1A, v20ss, sum3A);
+    pp1B = vec_mladd(sum1B, v20ss, sum3B);
+
+    pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
+    pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
+
+    psumA = vec_sub(pp1A, pp2A);
+    psumB = vec_sub(pp1B, pp2B);
+
+    vec_st(psumA, 0, tmp);
+    vec_st(psumB, 16, tmp);
+
+    src += srcStride;
+    tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */
+  }
+
+  tmpM2ssA = vec_ld(0, tmpbis);
+  tmpM2ssB = vec_ld(16, tmpbis);
+  tmpbis += tmpStride;
+  tmpM1ssA = vec_ld(0, tmpbis);
+  tmpM1ssB = vec_ld(16, tmpbis);
+  tmpbis += tmpStride;
+  tmpP0ssA = vec_ld(0, tmpbis);
+  tmpP0ssB = vec_ld(16, tmpbis);
+  tmpbis += tmpStride;
+  tmpP1ssA = vec_ld(0, tmpbis);
+  tmpP1ssB = vec_ld(16, tmpbis);
+  tmpbis += tmpStride;
+  tmpP2ssA = vec_ld(0, tmpbis);
+  tmpP2ssB = vec_ld(16, tmpbis);
+  tmpbis += tmpStride;
+
+  for (i = 0 ; i < 16 ; i++) {
+    const vector signed short tmpP3ssA = vec_ld(0, tmpbis);
+    const vector signed short tmpP3ssB = vec_ld(16, tmpbis);
+
+    const vector signed short sum1A = vec_adds(tmpP0ssA, tmpP1ssA);
+    const vector signed short sum1B = vec_adds(tmpP0ssB, tmpP1ssB);
+    const vector signed short sum2A = vec_adds(tmpM1ssA, tmpP2ssA);
+    const vector signed short sum2B = vec_adds(tmpM1ssB, tmpP2ssB);
+    const vector signed short sum3A = vec_adds(tmpM2ssA, tmpP3ssA);
+    const vector signed short sum3B = vec_adds(tmpM2ssB, tmpP3ssB);
+
+    tmpbis += tmpStride;
+
+    tmpM2ssA = tmpM1ssA;
+    tmpM2ssB = tmpM1ssB;
+    tmpM1ssA = tmpP0ssA;
+    tmpM1ssB = tmpP0ssB;
+    tmpP0ssA = tmpP1ssA;
+    tmpP0ssB = tmpP1ssB;
+    tmpP1ssA = tmpP2ssA;
+    tmpP1ssB = tmpP2ssB;
+    tmpP2ssA = tmpP3ssA;
+    tmpP2ssB = tmpP3ssB;
+
+    pp1Ae = vec_mule(sum1A, v20ss);
+    pp1Ao = vec_mulo(sum1A, v20ss);
+    pp1Be = vec_mule(sum1B, v20ss);
+    pp1Bo = vec_mulo(sum1B, v20ss);
+
+    pp2Ae = vec_mule(sum2A, v5ss);
+    pp2Ao = vec_mulo(sum2A, v5ss);
+    pp2Be = vec_mule(sum2B, v5ss);
+    pp2Bo = vec_mulo(sum2B, v5ss);
+
+    pp3Ae = vec_sra((vector signed int)sum3A, v16ui);
+    pp3Ao = vec_mulo(sum3A, v1ss);
+    pp3Be = vec_sra((vector signed int)sum3B, v16ui);
+    pp3Bo = vec_mulo(sum3B, v1ss);
+
+    pp1cAe = vec_add(pp1Ae, v512si);
+    pp1cAo = vec_add(pp1Ao, v512si);
+    pp1cBe = vec_add(pp1Be, v512si);
+    pp1cBo = vec_add(pp1Bo, v512si);
+
+    pp32Ae = vec_sub(pp3Ae, pp2Ae);
+    pp32Ao = vec_sub(pp3Ao, pp2Ao);
+    pp32Be = vec_sub(pp3Be, pp2Be);
+    pp32Bo = vec_sub(pp3Bo, pp2Bo);
+
+    sumAe = vec_add(pp1cAe, pp32Ae);
+    sumAo = vec_add(pp1cAo, pp32Ao);
+    sumBe = vec_add(pp1cBe, pp32Be);
+    sumBo = vec_add(pp1cBo, pp32Bo);
+
+    ssumAe = vec_sra(sumAe, v10ui);
+    ssumAo = vec_sra(sumAo, v10ui);
+    ssumBe = vec_sra(sumBe, v10ui);
+    ssumBo = vec_sra(sumBo, v10ui);
+
+    ssume = vec_packs(ssumAe, ssumBe);
+    ssumo = vec_packs(ssumAo, ssumBo);
+
+    sumv = vec_packsu(ssume, ssumo);
+    sum = vec_perm(sumv, sumv, mperm);
+
+    dst1 = vec_ld(0, dst);
+    dst2 = vec_ld(16, dst);
+    vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));
+
+    OP_U8_ALTIVEC(fsum, sum, vdst);
+
+    rsum = vec_perm(fsum, fsum, dstperm);
+    fdst1 = vec_sel(dst1, rsum, dstmask);
+    fdst2 = vec_sel(rsum, dst2, dstmask);
+
+    vec_st(fdst1, 0, dst);
+    vec_st(fdst2, 16, dst);
+
+    dst += dstStride;
+  }
+  POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+}
diff --git a/src/libffmpeg/libavcodec/ppc/mathops.h b/src/libffmpeg/libavcodec/ppc/mathops.h
new file mode 100644
index 000000000..6af23f246
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/mathops.h
@@ -0,0 +1,33 @@
+/*
+ * simple math operations
+ * Copyright (c) 2001, 2002 Fabrice Bellard.
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#if defined(ARCH_POWERPC_405)
+/* signed 16x16 -> 32 multiply add accumulate */
+#   define MAC16(rt, ra, rb) \
+        asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
+
+/* signed 16x16 -> 32 multiply */
+#   define MUL16(ra, rb) \
+        ({ int __rt;
+         asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb));
+         __rt; })
+#endif
diff --git a/src/libffmpeg/libavcodec/ppc/snow_altivec.c b/src/libffmpeg/libavcodec/ppc/snow_altivec.c
new file mode 100644
index 000000000..b15672ffe
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/snow_altivec.c
@@ -0,0 +1,788 @@
+/*
+ * Altivec optimized snow DSP utils
+ * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+#include "dsputil_altivec.h"
+#include "../snow.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+
+
+//FIXME remove this replication
+#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
+
+static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
+{
+    int offset;
+    DWTELEM * buffer;
+
+//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
+
+    assert(buf->data_stack_top >= 0);
+//  assert(!buf->line[line]);
+    if (buf->line[line])
+        return buf->line[line];
+
+    offset = buf->line_width * line;
+    buffer = buf->data_stack[buf->data_stack_top];
+    buf->data_stack_top--;
+    buf->line[line] = buffer;
+
+//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
+
+    return buffer;
+}
+
+
+//altivec code
+
+void ff_snow_horizontal_compose97i_altivec(DWTELEM *b, int width)
+{
+    const int w2= (width+1)>>1;
+    DECLARE_ALIGNED_16(DWTELEM, temp[(width>>1)]);
+    const int w_l= (width>>1);
+    const int w_r= w2 - 1;
+    int i;
+    vector signed int t1, t2, x, y, tmp1, tmp2;
+    vector signed int *vbuf, *vtmp;
+    vector unsigned char align;
+
+
+
+    { // Lift 0
+        DWTELEM * const ref = b + w2 - 1;
+        DWTELEM b_0 = b[0];
+        vbuf = (vector signed int *)b;
+
+        tmp1 = vec_ld (0, ref);
+        align = vec_lvsl (0, ref);
+        tmp2 = vec_ld (15, ref);
+        t1= vec_perm(tmp1, tmp2, align);
+
+        i = 0;
+
+        for (i=0; i<w_l-15; i+=16) {
+#if 0
+        b[i+0] = b[i+0] - ((3 * (ref[i+0] + ref[i+1]) + 4) >> 3);
+        b[i+1] = b[i+1] - ((3 * (ref[i+1] + ref[i+2]) + 4) >> 3);
+        b[i+2] = b[i+2] - ((3 * (ref[i+2] + ref[i+3]) + 4) >> 3);
+        b[i+3] = b[i+3] - ((3 * (ref[i+3] + ref[i+4]) + 4) >> 3);
+#else
+
+        tmp1 = vec_ld (0, ref+4+i);
+        tmp2 = vec_ld (15, ref+4+i);
+
+        t2 = vec_perm(tmp1, tmp2, align);
+
+        y = vec_add(t1,vec_sld(t1,t2,4));
+        y = vec_add(vec_add(y,y),y);
+
+        tmp1 = vec_ld (0, ref+8+i);
+
+        y = vec_add(y, vec_splat_s32(4));
+        y = vec_sra(y, vec_splat_u32(3));
+
+        tmp2 = vec_ld (15, ref+8+i);
+
+        *vbuf = vec_sub(*vbuf, y);
+
+        t1=t2;
+
+        vbuf++;
+
+        t2 = vec_perm(tmp1, tmp2, align);
+
+        y = vec_add(t1,vec_sld(t1,t2,4));
+        y = vec_add(vec_add(y,y),y);
+
+        tmp1 = vec_ld (0, ref+12+i);
+
+        y = vec_add(y, vec_splat_s32(4));
+        y = vec_sra(y, vec_splat_u32(3));
+
+        tmp2 = vec_ld (15, ref+12+i);
+
+        *vbuf = vec_sub(*vbuf, y);
+
+        t1=t2;
+
+        vbuf++;
+
+        t2 = vec_perm(tmp1, tmp2, align);
+
+        y = vec_add(t1,vec_sld(t1,t2,4));
+        y = vec_add(vec_add(y,y),y);
+
+        tmp1 = vec_ld (0, ref+16+i);
+
+        y = vec_add(y, vec_splat_s32(4));
+        y = vec_sra(y, vec_splat_u32(3));
+
+        tmp2 = vec_ld (15, ref+16+i);
+
+        *vbuf = vec_sub(*vbuf, y);
+
+        t1=t2;
+
+        t2 = vec_perm(tmp1, tmp2, align);
+
+        y = vec_add(t1,vec_sld(t1,t2,4));
+        y = vec_add(vec_add(y,y),y);
+
+        vbuf++;
+
+        y = vec_add(y, vec_splat_s32(4));
+        y = vec_sra(y, vec_splat_u32(3));
+        *vbuf = vec_sub(*vbuf, y);
+
+        t1=t2;
+
+        vbuf++;
+
+#endif
+        }
+
+        snow_horizontal_compose_lift_lead_out(i, b, b, ref, width, w_l, 0, W_DM, W_DO, W_DS);
+        b[0] = b_0 - ((W_DM * 2 * ref[1]+W_DO)>>W_DS);
+    }
+
+    { // Lift 1
+        DWTELEM * const dst = b+w2;
+
+        i = 0;
+        for(; (((long)&dst[i]) & 0xF) && i<w_r; i++){
+            dst[i] = dst[i] - (b[i] + b[i + 1]);
+        }
+
+        align = vec_lvsl(0, b+i);
+        tmp1 = vec_ld(0, b+i);
+        vbuf = (vector signed int*) (dst + i);
+        tmp2 = vec_ld(15, b+i);
+
+        t1 = vec_perm(tmp1, tmp2, align);
+
+        for (; i<w_r-3; i+=4) {
+
+#if 0
+            dst[i]   = dst[i]   - (b[i]   + b[i + 1]);
+            dst[i+1] = dst[i+1] - (b[i+1] + b[i + 2]);
+            dst[i+2] = dst[i+2] - (b[i+2] + b[i + 3]);
+            dst[i+3] = dst[i+3] - (b[i+3] + b[i + 4]);
+#else
+
+        tmp1 = vec_ld(0, b+4+i);
+        tmp2 = vec_ld(15, b+4+i);
+
+        t2 = vec_perm(tmp1, tmp2, align);
+
+        y = vec_add(t1, vec_sld(t1,t2,4));
+        *vbuf = vec_sub (*vbuf, y);
+
+        vbuf++;
+
+        t1 = t2;
+
+#endif
+
+        }
+
+        snow_horizontal_compose_lift_lead_out(i, dst, dst, b, width, w_r, 1, W_CM, W_CO, W_CS);
+    }
+
+    { // Lift 2
+        DWTELEM * const ref = b+w2 - 1;
+        DWTELEM b_0 = b[0];
+        vbuf= (vector signed int *) b;
+
+        tmp1 = vec_ld (0, ref);
+        align = vec_lvsl (0, ref);
+        tmp2 = vec_ld (15, ref);
+        t1= vec_perm(tmp1, tmp2, align);
+
+        i = 0;
+        for (; i<w_l-15; i+=16) {
+#if 0
+            b[i]   = b[i]   - (((8 -(ref[i]   + ref[i+1])) - (b[i]  <<2)) >> 4);
+            b[i+1] = b[i+1] - (((8 -(ref[i+1] + ref[i+2])) - (b[i+1]<<2)) >> 4);
+            b[i+2] = b[i+2] - (((8 -(ref[i+2] + ref[i+3])) - (b[i+2]<<2)) >> 4);
+            b[i+3] = b[i+3] - (((8 -(ref[i+3] + ref[i+4])) - (b[i+3]<<2)) >> 4);
+#else
+            tmp1 = vec_ld (0, ref+4+i);
+            tmp2 = vec_ld (15, ref+4+i);
+
+            t2 = vec_perm(tmp1, tmp2, align);
+
+            y = vec_add(t1,vec_sld(t1,t2,4));
+            y = vec_sub(vec_splat_s32(8),y);
+
+            tmp1 = vec_ld (0, ref+8+i);
+
+            x = vec_sl(*vbuf,vec_splat_u32(2));
+            y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+
+            tmp2 = vec_ld (15, ref+8+i);
+
+            *vbuf = vec_sub( *vbuf, y);
+
+            t1 = t2;
+
+            vbuf++;
+
+            t2 = vec_perm(tmp1, tmp2, align);
+
+            y = vec_add(t1,vec_sld(t1,t2,4));
+            y = vec_sub(vec_splat_s32(8),y);
+
+            tmp1 = vec_ld (0, ref+12+i);
+
+            x = vec_sl(*vbuf,vec_splat_u32(2));
+            y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+
+            tmp2 = vec_ld (15, ref+12+i);
+
+            *vbuf = vec_sub( *vbuf, y);
+
+            t1 = t2;
+
+            vbuf++;
+
+            t2 = vec_perm(tmp1, tmp2, align);
+
+            y = vec_add(t1,vec_sld(t1,t2,4));
+            y = vec_sub(vec_splat_s32(8),y);
+
+            tmp1 = vec_ld (0, ref+16+i);
+
+            x = vec_sl(*vbuf,vec_splat_u32(2));
+            y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+
+            tmp2 = vec_ld (15, ref+16+i);
+
+            *vbuf = vec_sub( *vbuf, y);
+
+            t1 = t2;
+
+            vbuf++;
+
+            t2 = vec_perm(tmp1, tmp2, align);
+
+            y = vec_add(t1,vec_sld(t1,t2,4));
+            y = vec_sub(vec_splat_s32(8),y);
+
+            t1 = t2;
+
+            x = vec_sl(*vbuf,vec_splat_u32(2));
+            y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+            *vbuf = vec_sub( *vbuf, y);
+
+            vbuf++;
+
+#endif
+        }
+
+        snow_horizontal_compose_liftS_lead_out(i, b, b, ref, width, w_l);
+        b[0] = b_0 - (((-2 * ref[1] + W_BO) - 4 * b_0) >> W_BS);
+    }
+
+    { // Lift 3
+        DWTELEM * const src = b+w2;
+
+        vbuf = (vector signed int *)b;
+        vtmp = (vector signed int *)temp;
+
+        i = 0;
+        align = vec_lvsl(0, src);
+
+        for (; i<w_r-3; i+=4) {
+#if 0
+            temp[i] = src[i] - ((-3*(b[i] + b[i+1]))>>1);
+            temp[i+1] = src[i+1] - ((-3*(b[i+1] + b[i+2]))>>1);
+            temp[i+2] = src[i+2] - ((-3*(b[i+2] + b[i+3]))>>1);
+            temp[i+3] = src[i+3] - ((-3*(b[i+3] + b[i+4]))>>1);
+#else
+            tmp1 = vec_ld(0,src+i);
+            t1 = vec_add(vbuf[0],vec_sld(vbuf[0],vbuf[1],4));
+            tmp2 = vec_ld(15,src+i);
+            t1 = vec_sub(vec_splat_s32(0),t1); //bad!
+            t1 = vec_add(t1,vec_add(t1,t1));
+            t2 = vec_perm(tmp1 ,tmp2 ,align);
+            t1 = vec_sra(t1,vec_splat_u32(1));
+            vbuf++;
+            *vtmp = vec_sub(t2,t1);
+            vtmp++;
+
+#endif
+
+        }
+
+        snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -3, 0, 1);
+    }
+
+    {
+    //Interleave
+        int a;
+        vector signed int *t = (vector signed int *)temp,
+                          *v = (vector signed int *)b;
+
+        snow_interleave_line_header(&i, width, b, temp);
+
+        for (; (i & 0xE) != 0xE; i-=2){
+            b[i+1] = temp[i>>1];
+            b[i] = b[i>>1];
+        }
+        for (i-=14; i>=0; i-=16){
+           a=i/4;
+
+           v[a+3]=vec_mergel(v[(a>>1)+1],t[(a>>1)+1]);
+           v[a+2]=vec_mergeh(v[(a>>1)+1],t[(a>>1)+1]);
+           v[a+1]=vec_mergel(v[a>>1],t[a>>1]);
+           v[a]=vec_mergeh(v[a>>1],t[a>>1]);
+
+        }
+
+    }
+}
+
+void ff_snow_vertical_compose97i_altivec(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width)
+{
+    int i, w4 = width/4;
+    vector signed int *v0, *v1,*v2,*v3,*v4,*v5;
+    vector signed int t1, t2;
+
+    v0=(vector signed int *)b0;
+    v1=(vector signed int *)b1;
+    v2=(vector signed int *)b2;
+    v3=(vector signed int *)b3;
+    v4=(vector signed int *)b4;
+    v5=(vector signed int *)b5;
+
+    for (i=0; i< w4;i++)
+    {
+
+    #if 0
+        b4[i] -= (3*(b3[i] + b5[i])+4)>>3;
+        b3[i] -= ((b2[i] + b4[i]));
+        b2[i] += ((b1[i] + b3[i])+4*b2[i]+8)>>4;
+        b1[i] += (3*(b0[i] + b2[i]))>>1;
+    #else
+        t1 = vec_add(v3[i], v5[i]);
+        t2 = vec_add(t1, vec_add(t1,t1));
+        t1 = vec_add(t2, vec_splat_s32(4));
+        v4[i] = vec_sub(v4[i], vec_sra(t1,vec_splat_u32(3)));
+
+        v3[i] = vec_sub(v3[i], vec_add(v2[i], v4[i]));
+
+        t1 = vec_add(vec_splat_s32(8), vec_add(v1[i], v3[i]));
+        t2 = vec_sl(v2[i], vec_splat_u32(2));
+        v2[i] = vec_add(v2[i], vec_sra(vec_add(t1,t2),vec_splat_u32(4)));
+        t1 = vec_add(v0[i], v2[i]);
+        t2 = vec_add(t1, vec_add(t1,t1));
+        v1[i] = vec_add(v1[i], vec_sra(t2,vec_splat_u32(1)));
+
+    #endif
+    }
+
+    for(i*=4; i < width; i++)
+    {
+        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
+        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
+        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
+        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
+    }
+}
+
+#define LOAD_BLOCKS \
+            tmp1 = vec_ld(0, &block[3][y*src_stride]);\
+            align = vec_lvsl(0, &block[3][y*src_stride]);\
+            tmp2 = vec_ld(15, &block[3][y*src_stride]);\
+\
+            b3 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, &block[2][y*src_stride]);\
+            align = vec_lvsl(0, &block[2][y*src_stride]);\
+            tmp2 = vec_ld(15, &block[2][y*src_stride]);\
+\
+            b2 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, &block[1][y*src_stride]);\
+            align = vec_lvsl(0, &block[1][y*src_stride]);\
+            tmp2 = vec_ld(15, &block[1][y*src_stride]);\
+\
+            b1 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, &block[0][y*src_stride]);\
+            align = vec_lvsl(0, &block[0][y*src_stride]);\
+            tmp2 = vec_ld(15, &block[0][y*src_stride]);\
+\
+            b0 = vec_perm(tmp1,tmp2,align);
+
+#define LOAD_OBMCS \
+            tmp1 = vec_ld(0, obmc1);\
+            align = vec_lvsl(0, obmc1);\
+            tmp2 = vec_ld(15, obmc1);\
+\
+            ob1 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, obmc2);\
+            align = vec_lvsl(0, obmc2);\
+            tmp2 = vec_ld(15, obmc2);\
+\
+            ob2 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, obmc3);\
+            align = vec_lvsl(0, obmc3);\
+            tmp2 = vec_ld(15, obmc3);\
+\
+            ob3 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, obmc4);\
+            align = vec_lvsl(0, obmc4);\
+            tmp2 = vec_ld(15, obmc4);\
+\
+            ob4 = vec_perm(tmp1,tmp2,align);
+
+/* interleave logic
+ * h1 <- [ a,b,a,b, a,b,a,b, a,b,a,b, a,b,a,b ]
+ * h2 <- [ c,d,c,d, c,d,c,d, c,d,c,d, c,d,c,d ]
+ * h  <- [ a,b,c,d, a,b,c,d, a,b,c,d, a,b,c,d ]
+ */
+
+#define STEPS_0_1\
+            h1 = (vector unsigned short)\
+                 vec_mergeh(ob1, ob2);\
+\
+            h2 = (vector unsigned short)\
+                 vec_mergeh(ob3, ob4);\
+\
+            ih = (vector unsigned char)\
+                 vec_mergeh(h1,h2);\
+\
+            l1 = (vector unsigned short) vec_mergeh(b3, b2);\
+\
+            ih1 = (vector unsigned char) vec_mergel(h1, h2);\
+\
+            l2 = (vector unsigned short) vec_mergeh(b1, b0);\
+\
+            il = (vector unsigned char) vec_mergeh(l1, l2);\
+\
+            v[0] = (vector signed int) vec_msum(ih, il, vec_splat_u32(0));\
+\
+            il1 = (vector unsigned char) vec_mergel(l1, l2);\
+\
+            v[1] = (vector signed int) vec_msum(ih1, il1, vec_splat_u32(0));
+
+#define FINAL_STEP_SCALAR\
+        for(x=0; x<b_w; x++)\
+            if(add){\
+                vbuf[x] += dst[x + src_x];\
+                vbuf[x] = (vbuf[x] + (1<<(FRAC_BITS-1))) >> FRAC_BITS;\
+                if(vbuf[x]&(~255)) vbuf[x]= ~(vbuf[x]>>31);\
+                dst8[x + y*src_stride] = vbuf[x];\
+            }else{\
+                dst[x + src_x] -= vbuf[x];\
+            }
+
+static void inner_add_yblock_bw_8_obmc_16_altivec(uint8_t *obmc,
+                                             const int obmc_stride,
+                                             uint8_t * * block, int b_w,
+                                             int b_h, int src_x, int src_y,
+                                             int src_stride, slice_buffer * sb,
+                                             int add, uint8_t * dst8)
+{
+    int y, x;
+    DWTELEM * dst;
+    vector unsigned short h1, h2, l1, l2;
+    vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+    vector unsigned char b0,b1,b2,b3;
+    vector unsigned char ob1,ob2,ob3,ob4;
+
+    DECLARE_ALIGNED_16(int, vbuf[16]);
+    vector signed int *v = (vector signed int *)vbuf, *d;
+
+    for(y=0; y<b_h; y++){
+        //FIXME ugly missue of obmc_stride
+
+        uint8_t *obmc1= obmc + y*obmc_stride;
+        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+        dst = slice_buffer_get_line(sb, src_y + y);
+        d = (vector signed int *)(dst + src_x);
+
+//FIXME i could avoid some loads!
+
+        // load blocks
+        LOAD_BLOCKS
+
+        // load obmcs
+        LOAD_OBMCS
+
+        // steps 0 1
+        STEPS_0_1
+
+        FINAL_STEP_SCALAR
+
+       }
+
+}
+
+#define STEPS_2_3\
+            h1 = (vector unsigned short) vec_mergel(ob1, ob2);\
+\
+            h2 = (vector unsigned short) vec_mergel(ob3, ob4);\
+\
+            ih = (vector unsigned char) vec_mergeh(h1,h2);\
+\
+            l1 = (vector unsigned short) vec_mergel(b3, b2);\
+\
+            l2 = (vector unsigned short) vec_mergel(b1, b0);\
+\
+            ih1 = (vector unsigned char) vec_mergel(h1,h2);\
+\
+            il = (vector unsigned char) vec_mergeh(l1,l2);\
+\
+            v[2] = (vector signed int) vec_msum(ih, il, vec_splat_u32(0));\
+\
+            il1 = (vector unsigned char) vec_mergel(l1,l2);\
+\
+            v[3] = (vector signed int) vec_msum(ih1, il1, vec_splat_u32(0));
+
+
+static void inner_add_yblock_bw_16_obmc_32_altivec(uint8_t *obmc,
+                                             const int obmc_stride,
+                                             uint8_t * * block, int b_w,
+                                             int b_h, int src_x, int src_y,
+                                             int src_stride, slice_buffer * sb,
+                                             int add, uint8_t * dst8)
+{
+    int y, x;
+    DWTELEM * dst;
+    vector unsigned short h1, h2, l1, l2;
+    vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+    vector unsigned char b0,b1,b2,b3;
+    vector unsigned char ob1,ob2,ob3,ob4;
+    DECLARE_ALIGNED_16(int, vbuf[b_w]);
+    vector signed int *v = (vector signed int *)vbuf, *d;
+
+    for(y=0; y<b_h; y++){
+        //FIXME ugly missue of obmc_stride
+
+        uint8_t *obmc1= obmc + y*obmc_stride;
+        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+        dst = slice_buffer_get_line(sb, src_y + y);
+        d = (vector signed int *)(dst + src_x);
+
+        // load blocks
+        LOAD_BLOCKS
+
+        // load obmcs
+        LOAD_OBMCS
+
+        // steps 0 1 2 3
+        STEPS_0_1
+
+        STEPS_2_3
+
+        FINAL_STEP_SCALAR
+
+    }
+}
+
+#define FINAL_STEP_VEC \
+\
+    if(add)\
+        {\
+            for(x=0; x<b_w/4; x++)\
+            {\
+                v[x] = vec_add(v[x], d[x]);\
+                v[x] = vec_sra(vec_add(v[x],\
+                                       vec_sl( vec_splat_s32(1),\
+                                               vec_splat_u32(7))),\
+                               vec_splat_u32(8));\
+\
+                mask = (vector bool int) vec_sl((vector signed int)\
+                        vec_cmpeq(v[x],v[x]),vec_splat_u32(8));\
+                mask = (vector bool int) vec_and(v[x],vec_nor(mask,mask));\
+\
+                mask = (vector bool int)\
+                        vec_cmpeq((vector signed int)mask,\
+                                  (vector signed int)vec_splat_u32(0));\
+\
+                vs = vec_sra(v[x],vec_splat_u32(8));\
+                vs = vec_sra(v[x],vec_splat_u32(8));\
+                vs = vec_sra(v[x],vec_splat_u32(15));\
+\
+                vs = vec_nor(vs,vs);\
+\
+                v[x]= vec_sel(v[x],vs,mask);\
+            }\
+\
+            for(x=0; x<b_w; x++)\
+                dst8[x + y*src_stride] = vbuf[x];\
+\
+        }\
+         else\
+            for(x=0; x<b_w/4; x++)\
+                d[x] = vec_sub(d[x], v[x]);
+
+static void inner_add_yblock_a_bw_8_obmc_16_altivec(uint8_t *obmc,
+                                             const int obmc_stride,
+                                             uint8_t * * block, int b_w,
+                                             int b_h, int src_x, int src_y,
+                                             int src_stride, slice_buffer * sb,
+                                             int add, uint8_t * dst8)
+{
+    int y, x;
+    DWTELEM * dst;
+    vector bool int mask;
+    vector signed int vs;
+    vector unsigned short h1, h2, l1, l2;
+    vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+    vector unsigned char b0,b1,b2,b3;
+    vector unsigned char ob1,ob2,ob3,ob4;
+
+    DECLARE_ALIGNED_16(int, vbuf[16]);
+    vector signed int *v = (vector signed int *)vbuf, *d;
+
+    for(y=0; y<b_h; y++){
+        //FIXME ugly missue of obmc_stride
+
+        uint8_t *obmc1= obmc + y*obmc_stride;
+        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+        dst = slice_buffer_get_line(sb, src_y + y);
+        d = (vector signed int *)(dst + src_x);
+
+//FIXME i could avoid some loads!
+
+        // load blocks
+        LOAD_BLOCKS
+
+        // load obmcs
+        LOAD_OBMCS
+
+        // steps 0 1
+        STEPS_0_1
+
+        FINAL_STEP_VEC
+
+       }
+
+}
+
+static void inner_add_yblock_a_bw_16_obmc_32_altivec(uint8_t *obmc,
+                                             const int obmc_stride,
+                                             uint8_t * * block, int b_w,
+                                             int b_h, int src_x, int src_y,
+                                             int src_stride, slice_buffer * sb,
+                                             int add, uint8_t * dst8)
+{
+    int y, x;
+    DWTELEM * dst;
+    vector bool int mask;
+    vector signed int vs;
+    vector unsigned short h1, h2, l1, l2;
+    vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+    vector unsigned char b0,b1,b2,b3;
+    vector unsigned char ob1,ob2,ob3,ob4;
+    DECLARE_ALIGNED_16(int, vbuf[b_w]);
+    vector signed int *v = (vector signed int *)vbuf, *d;
+
+    for(y=0; y<b_h; y++){
+        //FIXME ugly missue of obmc_stride
+
+        uint8_t *obmc1= obmc + y*obmc_stride;
+        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+        dst = slice_buffer_get_line(sb, src_y + y);
+        d = (vector signed int *)(dst + src_x);
+
+        // load blocks
+        LOAD_BLOCKS
+
+        // load obmcs
+        LOAD_OBMCS
+
+        // steps 0 1 2 3
+        STEPS_0_1
+
+        STEPS_2_3
+
+        FINAL_STEP_VEC
+
+    }
+}
+
+
+void ff_snow_inner_add_yblock_altivec(uint8_t *obmc, const int obmc_stride,
+                                      uint8_t * * block, int b_w, int b_h,
+                                      int src_x, int src_y, int src_stride,
+                                      slice_buffer * sb, int add,
+                                      uint8_t * dst8)
+{
+    if (src_x&15) {
+        if (b_w == 16)
+            inner_add_yblock_bw_16_obmc_32_altivec(obmc, obmc_stride, block,
+                                                   b_w, b_h, src_x, src_y,
+                                                   src_stride, sb, add, dst8);
+        else if (b_w == 8)
+            inner_add_yblock_bw_8_obmc_16_altivec(obmc, obmc_stride, block,
+                                                  b_w, b_h, src_x, src_y,
+                                                  src_stride, sb, add, dst8);
+        else
+            ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,
+                                     src_y, src_stride, sb, add, dst8);
+    } else {
+        if (b_w == 16)
+            inner_add_yblock_a_bw_16_obmc_32_altivec(obmc, obmc_stride, block,
+                                                     b_w, b_h, src_x, src_y,
+                                                     src_stride, sb, add, dst8);
+        else if (b_w == 8)
+            inner_add_yblock_a_bw_8_obmc_16_altivec(obmc, obmc_stride, block,
+                                                    b_w, b_h, src_x, src_y,
+                                                    src_stride, sb, add, dst8);
+        else
+            ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,
+                                     src_y, src_stride, sb, add, dst8);
+    }
+}
+
+
+void snow_init_altivec(DSPContext* c, AVCodecContext *avctx)
+{
+        c->horizontal_compose97i = ff_snow_horizontal_compose97i_altivec;
+        c->vertical_compose97i = ff_snow_vertical_compose97i_altivec;
+        c->inner_add_yblock = ff_snow_inner_add_yblock_altivec;
+}
diff --git a/src/libffmpeg/libavcodec/ppc/types_altivec.h b/src/libffmpeg/libavcodec/ppc/types_altivec.h
new file mode 100644
index 000000000..f29026e04
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/types_altivec.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2006 Guillaume Poirier <gpoirier@mplayerhq.hu>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/***********************************************************************
+ * Vector types
+ **********************************************************************/
+#define vec_u8_t  vector unsigned char
+#define vec_s8_t  vector signed char
+#define vec_u16_t vector unsigned short
+#define vec_s16_t vector signed short
+#define vec_u32_t vector unsigned int
+#define vec_s32_t vector signed int
+
+/***********************************************************************
+ * Null vector
+ **********************************************************************/
+#define LOAD_ZERO const vec_u8_t zerov = vec_splat_u8( 0 )
+
+#define zero_u8v  (vec_u8_t)  zerov
+#define zero_s8v  (vec_s8_t)  zerov
+#define zero_u16v (vec_u16_t) zerov
+#define zero_s16v (vec_s16_t) zerov
+#define zero_u32v (vec_u32_t) zerov
+#define zero_s32v (vec_s32_t) zerov
diff --git a/src/libffmpeg/libavcodec/ppc/vc1dsp_altivec.c b/src/libffmpeg/libavcodec/ppc/vc1dsp_altivec.c
new file mode 100644
index 000000000..114c9d41f
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/vc1dsp_altivec.c
@@ -0,0 +1,338 @@
+/*
+ * VC-1 and WMV3 decoder - DSP functions AltiVec-optimized
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+
+// main steps of 8x8 transform
+#define STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_rnd) \
+do { \
+    t0 = vec_sl(vec_add(s0, s4), vec_2); \
+    t0 = vec_add(vec_sl(t0, vec_1), t0); \
+    t0 = vec_add(t0, vec_rnd); \
+    t1 = vec_sl(vec_sub(s0, s4), vec_2); \
+    t1 = vec_add(vec_sl(t1, vec_1), t1); \
+    t1 = vec_add(t1, vec_rnd); \
+    t2 = vec_add(vec_sl(s6, vec_2), vec_sl(s6, vec_1)); \
+    t2 = vec_add(t2, vec_sl(s2, vec_4)); \
+    t3 = vec_add(vec_sl(s2, vec_2), vec_sl(s2, vec_1)); \
+    t3 = vec_sub(t3, vec_sl(s6, vec_4)); \
+    t4 = vec_add(t0, t2); \
+    t5 = vec_add(t1, t3); \
+    t6 = vec_sub(t1, t3); \
+    t7 = vec_sub(t0, t2); \
+\
+    t0 = vec_sl(vec_add(s1, s3), vec_4); \
+    t0 = vec_add(t0, vec_sl(s5, vec_3)); \
+    t0 = vec_add(t0, vec_sl(s7, vec_2)); \
+    t0 = vec_add(t0, vec_sub(s5, s3)); \
+\
+    t1 = vec_sl(vec_sub(s1, s5), vec_4); \
+    t1 = vec_sub(t1, vec_sl(s7, vec_3)); \
+    t1 = vec_sub(t1, vec_sl(s3, vec_2)); \
+    t1 = vec_sub(t1, vec_add(s1, s7)); \
+\
+    t2 = vec_sl(vec_sub(s7, s3), vec_4); \
+    t2 = vec_add(t2, vec_sl(s1, vec_3)); \
+    t2 = vec_add(t2, vec_sl(s5, vec_2)); \
+    t2 = vec_add(t2, vec_sub(s1, s7)); \
+\
+    t3 = vec_sl(vec_sub(s5, s7), vec_4); \
+    t3 = vec_sub(t3, vec_sl(s3, vec_3)); \
+    t3 = vec_add(t3, vec_sl(s1, vec_2)); \
+    t3 = vec_sub(t3, vec_add(s3, s5)); \
+\
+    s0 = vec_add(t4, t0); \
+    s1 = vec_add(t5, t1); \
+    s2 = vec_add(t6, t2); \
+    s3 = vec_add(t7, t3); \
+    s4 = vec_sub(t7, t3); \
+    s5 = vec_sub(t6, t2); \
+    s6 = vec_sub(t5, t1); \
+    s7 = vec_sub(t4, t0); \
+}while(0)
+
+#define SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7) \
+do { \
+    s0 = vec_sra(s0, vec_3); \
+    s1 = vec_sra(s1, vec_3); \
+    s2 = vec_sra(s2, vec_3); \
+    s3 = vec_sra(s3, vec_3); \
+    s4 = vec_sra(s4, vec_3); \
+    s5 = vec_sra(s5, vec_3); \
+    s6 = vec_sra(s6, vec_3); \
+    s7 = vec_sra(s7, vec_3); \
+}while(0)
+
+#define SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7) \
+do { \
+    s0 = vec_sra(s0, vec_7); \
+    s1 = vec_sra(s1, vec_7); \
+    s2 = vec_sra(s2, vec_7); \
+    s3 = vec_sra(s3, vec_7); \
+    s4 = vec_sra(vec_add(s4, vec_1s), vec_7); \
+    s5 = vec_sra(vec_add(s5, vec_1s), vec_7); \
+    s6 = vec_sra(vec_add(s6, vec_1s), vec_7); \
+    s7 = vec_sra(vec_add(s7, vec_1s), vec_7); \
+}while(0)
+
+/* main steps of 4x4 transform */
+#define STEP4(s0, s1, s2, s3, vec_rnd) \
+do { \
+    t1 = vec_add(vec_sl(s0, vec_4), s0); \
+    t1 = vec_add(t1, vec_rnd); \
+    t2 = vec_add(vec_sl(s2, vec_4), s2); \
+    t0 = vec_add(t1, t2); \
+    t1 = vec_sub(t1, t2); \
+    t3 = vec_sl(vec_sub(s3, s1), vec_1); \
+    t3 = vec_add(t3, vec_sl(t3, vec_2)); \
+    t2 = vec_add(t3, vec_sl(s1, vec_5)); \
+    t3 = vec_add(t3, vec_sl(s3, vec_3)); \
+    t3 = vec_add(t3, vec_sl(s3, vec_2)); \
+    s0 = vec_add(t0, t2); \
+    s1 = vec_sub(t1, t3); \
+    s2 = vec_add(t1, t3); \
+    s3 = vec_sub(t0, t2); \
+}while (0)
+
+#define SHIFT_HOR4(s0, s1, s2, s3) \
+    s0 = vec_sra(s0, vec_3); \
+    s1 = vec_sra(s1, vec_3); \
+    s2 = vec_sra(s2, vec_3); \
+    s3 = vec_sra(s3, vec_3);
+
+#define SHIFT_VERT4(s0, s1, s2, s3) \
+    s0 = vec_sra(s0, vec_7); \
+    s1 = vec_sra(s1, vec_7); \
+    s2 = vec_sra(s2, vec_7); \
+    s3 = vec_sra(s3, vec_7);
+
+/** Do inverse transform on 8x8 block
+*/
+static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
+{
+    vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
+    vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
+    vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
+    vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
+    const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));
+    const vector unsigned int vec_7 = vec_splat_u32(7);
+    const vector unsigned int vec_5 = vec_splat_u32(5);
+    const vector unsigned int vec_4 = vec_splat_u32(4);
+    const vector  signed int vec_4s = vec_splat_s32(4);
+    const vector unsigned int vec_3 = vec_splat_u32(3);
+    const vector unsigned int vec_2 = vec_splat_u32(2);
+    const vector  signed int vec_1s = vec_splat_s32(1);
+    const vector unsigned int vec_1 = vec_splat_u32(1);
+
+
+    src0 = vec_ld(  0, block);
+    src1 = vec_ld( 16, block);
+    src2 = vec_ld( 32, block);
+    src3 = vec_ld( 48, block);
+    src4 = vec_ld( 64, block);
+    src5 = vec_ld( 80, block);
+    src6 = vec_ld( 96, block);
+    src7 = vec_ld(112, block);
+
+    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+    s0 = vec_unpackl(src0);
+    s1 = vec_unpackl(src1);
+    s2 = vec_unpackl(src2);
+    s3 = vec_unpackl(src3);
+    s4 = vec_unpackl(src4);
+    s5 = vec_unpackl(src5);
+    s6 = vec_unpackl(src6);
+    s7 = vec_unpackl(src7);
+    s8 = vec_unpackh(src0);
+    s9 = vec_unpackh(src1);
+    sA = vec_unpackh(src2);
+    sB = vec_unpackh(src3);
+    sC = vec_unpackh(src4);
+    sD = vec_unpackh(src5);
+    sE = vec_unpackh(src6);
+    sF = vec_unpackh(src7);
+    STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
+    SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
+    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
+    SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
+    src0 = vec_pack(s8, s0);
+    src1 = vec_pack(s9, s1);
+    src2 = vec_pack(sA, s2);
+    src3 = vec_pack(sB, s3);
+    src4 = vec_pack(sC, s4);
+    src5 = vec_pack(sD, s5);
+    src6 = vec_pack(sE, s6);
+    src7 = vec_pack(sF, s7);
+    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+
+    s0 = vec_unpackl(src0);
+    s1 = vec_unpackl(src1);
+    s2 = vec_unpackl(src2);
+    s3 = vec_unpackl(src3);
+    s4 = vec_unpackl(src4);
+    s5 = vec_unpackl(src5);
+    s6 = vec_unpackl(src6);
+    s7 = vec_unpackl(src7);
+    s8 = vec_unpackh(src0);
+    s9 = vec_unpackh(src1);
+    sA = vec_unpackh(src2);
+    sB = vec_unpackh(src3);
+    sC = vec_unpackh(src4);
+    sD = vec_unpackh(src5);
+    sE = vec_unpackh(src6);
+    sF = vec_unpackh(src7);
+    STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_64);
+    SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7);
+    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_64);
+    SHIFT_VERT8(s8, s9, sA, sB, sC, sD, sE, sF);
+    src0 = vec_pack(s8, s0);
+    src1 = vec_pack(s9, s1);
+    src2 = vec_pack(sA, s2);
+    src3 = vec_pack(sB, s3);
+    src4 = vec_pack(sC, s4);
+    src5 = vec_pack(sD, s5);
+    src6 = vec_pack(sE, s6);
+    src7 = vec_pack(sF, s7);
+
+    vec_st(src0,  0, block);
+    vec_st(src1, 16, block);
+    vec_st(src2, 32, block);
+    vec_st(src3, 48, block);
+    vec_st(src4, 64, block);
+    vec_st(src5, 80, block);
+    vec_st(src6, 96, block);
+    vec_st(src7,112, block);
+}
+
+/** Do inverse transform on 8x4 part of block
+*/
+static void vc1_inv_trans_8x4_altivec(DCTELEM block[64], int n)
+{
+    vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
+    vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
+    vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
+    vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
+    const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));
+    const vector unsigned int vec_7 = vec_splat_u32(7);
+    const vector unsigned int vec_5 = vec_splat_u32(5);
+    const vector unsigned int vec_4 = vec_splat_u32(4);
+    const vector  signed int vec_4s = vec_splat_s32(4);
+    const vector unsigned int vec_3 = vec_splat_u32(3);
+    const vector unsigned int vec_2 = vec_splat_u32(2);
+    const vector unsigned int vec_1 = vec_splat_u32(1);
+
+    src0 = vec_ld(  0, block);
+    src1 = vec_ld( 16, block);
+    src2 = vec_ld( 32, block);
+    src3 = vec_ld( 48, block);
+    src4 = vec_ld( 64, block);
+    src5 = vec_ld( 80, block);
+    src6 = vec_ld( 96, block);
+    src7 = vec_ld(112, block);
+
+    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+    s0 = vec_unpackl(src0);
+    s1 = vec_unpackl(src1);
+    s2 = vec_unpackl(src2);
+    s3 = vec_unpackl(src3);
+    s4 = vec_unpackl(src4);
+    s5 = vec_unpackl(src5);
+    s6 = vec_unpackl(src6);
+    s7 = vec_unpackl(src7);
+    s8 = vec_unpackh(src0);
+    s9 = vec_unpackh(src1);
+    sA = vec_unpackh(src2);
+    sB = vec_unpackh(src3);
+    sC = vec_unpackh(src4);
+    sD = vec_unpackh(src5);
+    sE = vec_unpackh(src6);
+    sF = vec_unpackh(src7);
+    STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
+    SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
+    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
+    SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
+    src0 = vec_pack(s8, s0);
+    src1 = vec_pack(s9, s1);
+    src2 = vec_pack(sA, s2);
+    src3 = vec_pack(sB, s3);
+    src4 = vec_pack(sC, s4);
+    src5 = vec_pack(sD, s5);
+    src6 = vec_pack(sE, s6);
+    src7 = vec_pack(sF, s7);
+    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+
+    if(!n){ // upper half of block
+        s0 = vec_unpackh(src0);
+        s1 = vec_unpackh(src1);
+        s2 = vec_unpackh(src2);
+        s3 = vec_unpackh(src3);
+        s8 = vec_unpackl(src0);
+        s9 = vec_unpackl(src1);
+        sA = vec_unpackl(src2);
+        sB = vec_unpackl(src3);
+        STEP4(s0, s1, s2, s3, vec_64);
+        SHIFT_VERT4(s0, s1, s2, s3);
+        STEP4(s8, s9, sA, sB, vec_64);
+        SHIFT_VERT4(s8, s9, sA, sB);
+        src0 = vec_pack(s0, s8);
+        src1 = vec_pack(s1, s9);
+        src2 = vec_pack(s2, sA);
+        src3 = vec_pack(s3, sB);
+
+        vec_st(src0,  0, block);
+        vec_st(src1, 16, block);
+        vec_st(src2, 32, block);
+        vec_st(src3, 48, block);
+    } else { //lower half of block
+        s0 = vec_unpackh(src4);
+        s1 = vec_unpackh(src5);
+        s2 = vec_unpackh(src6);
+        s3 = vec_unpackh(src7);
+        s8 = vec_unpackl(src4);
+        s9 = vec_unpackl(src5);
+        sA = vec_unpackl(src6);
+        sB = vec_unpackl(src7);
+        STEP4(s0, s1, s2, s3, vec_64);
+        SHIFT_VERT4(s0, s1, s2, s3);
+        STEP4(s8, s9, sA, sB, vec_64);
+        SHIFT_VERT4(s8, s9, sA, sB);
+        src4 = vec_pack(s0, s8);
+        src5 = vec_pack(s1, s9);
+        src6 = vec_pack(s2, sA);
+        src7 = vec_pack(s3, sB);
+
+        vec_st(src4, 64, block);
+        vec_st(src5, 80, block);
+        vec_st(src6, 96, block);
+        vec_st(src7,112, block);
+    }
+}
+
+
+void vc1dsp_init_altivec(DSPContext* dsp, AVCodecContext *avctx) {
+    dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec;
+    dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec;
+}
diff --git a/src/libffmpeg/libavcodec/smacker.c b/src/libffmpeg/libavcodec/smacker.c
index 2f2185848..2e1784075 100644
--- a/src/libffmpeg/libavcodec/smacker.c
+++ b/src/libffmpeg/libavcodec/smacker.c
@@ -320,12 +320,12 @@ static int decode_header_trees(SmackVContext *smk) {
     return 0;
 }
 
-static always_inline void last_reset(int *recode, int *last) {
+static av_always_inline void last_reset(int *recode, int *last) {
     recode[last[0]] = recode[last[1]] = recode[last[2]] = 0;
 }
 
 /* get code and update history */
-static always_inline int smk_get_code(GetBitContext *gb, int *recode, int *last) {
+static av_always_inline int smk_get_code(GetBitContext *gb, int *recode, int *last) {
     register int *table = recode;
     int v, b;
 
diff --git a/src/libffmpeg/libavcodec/snow.c b/src/libffmpeg/libavcodec/snow.c
index 346d56861..5e93d40a1 100644
--- a/src/libffmpeg/libavcodec/snow.c
+++ b/src/libffmpeg/libavcodec/snow.c
@@ -439,6 +439,7 @@ typedef struct SnowContext{
     int always_reset;
     int version;
     int spatial_decomposition_type;
+    int last_spatial_decomposition_type;
     int temporal_decomposition_type;
     int spatial_decomposition_count;
     int temporal_decomposition_count;
@@ -452,15 +453,19 @@ typedef struct SnowContext{
     int chroma_v_shift;
     int spatial_scalability;
     int qlog;
+    int last_qlog;
     int lambda;
     int lambda2;
     int pass1_rc;
     int mv_scale;
+    int last_mv_scale;
     int qbias;
+    int last_qbias;
 #define QBIAS_SHIFT 3
     int b_width;
     int b_height;
     int block_max_depth;
+    int last_block_max_depth;
     Plane plane[MAX_PLANES];
     BlockNode *block;
 #define ME_CACHE_SIZE 1024
@@ -709,7 +714,7 @@ static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
     return v;
 }
 
-static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
     const int mirror_left= !highpass;
     const int mirror_right= (width&1) ^ highpass;
     const int w= (width>>1) - 1 + (highpass & width);
@@ -732,7 +737,7 @@ static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst
 }
 
 #ifndef lift5
-static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+static av_always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
     const int mirror_left= !highpass;
     const int mirror_right= (width&1) ^ highpass;
     const int w= (width>>1) - 1 + (highpass & width);
@@ -764,7 +769,7 @@ static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int ds
 #endif
 
 #ifndef liftS
-static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
     const int mirror_left= !highpass;
     const int mirror_right= (width&1) ^ highpass;
     const int w= (width>>1) - 1 + (highpass & width);
@@ -1849,7 +1854,7 @@ static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, sli
     return;
 }
 
-static void reset_contexts(SnowContext *s){
+static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
     int plane_index, level, orientation;
 
     for(plane_index=0; plane_index<3; plane_index++){
@@ -2208,7 +2213,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
 }
 #endif
 
-static always_inline int same_block(BlockNode *a, BlockNode *b){
+static av_always_inline int same_block(BlockNode *a, BlockNode *b){
     if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
         return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
     }else{
@@ -2287,12 +2292,10 @@ static void decode_q_branch(SnowContext *s, int level, int x, int y){
     }
 
     if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
-        int type;
+        int type, mx, my;
         int l = left->color[0];
         int cb= left->color[1];
         int cr= left->color[2];
-        int mx= mid_pred(left->mx, top->mx, tr->mx);
-        int my= mid_pred(left->my, top->my, tr->my);
         int ref = 0;
         int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
         int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
@@ -2557,7 +2560,7 @@ void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * *
 }
 
 //FIXME name clenup (b_w, block_w, b_width stuff)
-static always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
+static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
     const int b_width = s->b_width  << s->block_max_depth;
     const int b_height= s->b_height << s->block_max_depth;
     const int b_stride= b_width;
@@ -2716,7 +2719,7 @@ assert(src_stride > 2*MB_SIZE + 5);
 #endif
 }
 
-static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
+static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
     Plane *p= &s->plane[plane_index];
     const int mb_w= s->b_width  << s->block_max_depth;
     const int mb_h= s->b_height << s->block_max_depth;
@@ -2783,7 +2786,7 @@ static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer *
     STOP_TIMER("predict_slice")
 }
 
-static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
+static av_always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
     Plane *p= &s->plane[plane_index];
     const int mb_w= s->b_width  << s->block_max_depth;
     const int mb_h= s->b_height << s->block_max_depth;
@@ -2840,7 +2843,7 @@ static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_
     STOP_TIMER("predict_slice")
 }
 
-static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
+static av_always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
     const int mb_h= s->b_height << s->block_max_depth;
     int mb_y;
     for(mb_y=0; mb_y<=mb_h; mb_y++)
@@ -3098,7 +3101,7 @@ static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
     return distortion + rate*penalty_factor;
 }
 
-static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
+static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
     const int b_stride= s->b_width << s->block_max_depth;
     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
     BlockNode backup= *block;
@@ -3137,12 +3140,12 @@ static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3
 }
 
 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
-static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
+static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
     int p[2] = {p0, p1};
     return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
 }
 
-static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
+static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
     const int b_stride= s->b_width << s->block_max_depth;
     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
     BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
@@ -3607,8 +3610,14 @@ static void encode_header(SnowContext *s){
     memset(kstate, MID_STATE, sizeof(kstate));
 
     put_rac(&s->c, kstate, s->keyframe);
-    if(s->keyframe || s->always_reset)
+    if(s->keyframe || s->always_reset){
         reset_contexts(s);
+        s->last_spatial_decomposition_type=
+        s->last_qlog=
+        s->last_qbias=
+        s->last_mv_scale=
+        s->last_block_max_depth= 0;
+    }
     if(s->keyframe){
         put_symbol(&s->c, s->header_state, s->version, 0);
         put_rac(&s->c, s->header_state, s->always_reset);
@@ -3631,11 +3640,17 @@ static void encode_header(SnowContext *s){
             }
         }
     }
-    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
-    put_symbol(&s->c, s->header_state, s->qlog, 1);
-    put_symbol(&s->c, s->header_state, s->mv_scale, 0);
-    put_symbol(&s->c, s->header_state, s->qbias, 1);
-    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
+    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
+    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
+    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
+    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
+    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
+
+    s->last_spatial_decomposition_type= s->spatial_decomposition_type;
+    s->last_qlog                      = s->qlog;
+    s->last_qbias                     = s->qbias;
+    s->last_mv_scale                  = s->mv_scale;
+    s->last_block_max_depth           = s->block_max_depth;
 }
 
 static int decode_header(SnowContext *s){
@@ -3645,8 +3660,14 @@ static int decode_header(SnowContext *s){
     memset(kstate, MID_STATE, sizeof(kstate));
 
     s->keyframe= get_rac(&s->c, kstate);
-    if(s->keyframe || s->always_reset)
+    if(s->keyframe || s->always_reset){
         reset_contexts(s);
+        s->spatial_decomposition_type=
+        s->qlog=
+        s->qbias=
+        s->mv_scale=
+        s->block_max_depth= 0;
+    }
     if(s->keyframe){
         s->version= get_symbol(&s->c, s->header_state, 0);
         if(s->version>0){
@@ -3677,16 +3698,16 @@ static int decode_header(SnowContext *s){
         }
     }
 
-    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
+    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
     if(s->spatial_decomposition_type > 2){
         av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
         return -1;
     }
 
-    s->qlog= get_symbol(&s->c, s->header_state, 1);
-    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
-    s->qbias= get_symbol(&s->c, s->header_state, 1);
-    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
+    s->qlog           += get_symbol(&s->c, s->header_state, 1);
+    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
+    s->qbias          += get_symbol(&s->c, s->header_state, 1);
+    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
     if(s->block_max_depth > 1 || s->block_max_depth < 0){
         av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
         s->block_max_depth= 0;
@@ -4177,7 +4198,6 @@ redo_frame:
             pict->pict_type= FF_I_TYPE;
             s->keyframe=1;
             s->current_picture.key_frame=1;
-            reset_contexts(s);
             goto redo_frame;
         }
 
diff --git a/src/libffmpeg/libavcodec/snow.h b/src/libffmpeg/libavcodec/snow.h
index f7cee131a..6794d2c5a 100644
--- a/src/libffmpeg/libavcodec/snow.h
+++ b/src/libffmpeg/libavcodec/snow.h
@@ -137,7 +137,7 @@ static int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
 
 /* C bits used by mmx/sse2/altivec */
 
-static always_inline void snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){
+static av_always_inline void snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){
     (*i) = (width) - 2;
 
     if (width & 1){
@@ -146,14 +146,14 @@ static always_inline void snow_interleave_line_header(int * i, int width, DWTELE
     }
 }
 
-static always_inline void snow_interleave_line_footer(int * i, DWTELEM * low, DWTELEM * high){
+static av_always_inline void snow_interleave_line_footer(int * i, DWTELEM * low, DWTELEM * high){
     for (; (*i)>=0; (*i)-=2){
         low[(*i)+1] = high[(*i)>>1];
         low[*i] = low[(*i)>>1];
     }
 }
 
-static always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){
+static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){
     for(; i<w; i++){
         dst[i] = src[i] - ((mul * (ref[i] + ref[i + 1]) + add) >> shift);
     }
@@ -163,7 +163,7 @@ static always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM *
     }
 }
 
-static always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){
+static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){
         for(; i<w; i++){
             dst[i] = src[i] - (((-(ref[i] + ref[(i+1)])+W_BO) - 4 * src[i]) >> W_BS);
         }
diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c
index c3661dda7..36dcc7746 100644
--- a/src/libffmpeg/libavcodec/utils.c
+++ b/src/libffmpeg/libavcodec/utils.c
@@ -421,7 +421,7 @@ static const char* context_to_name(void* ptr) {
 
 static const AVOption options[]={
 {"b", "set video bitrate (in bits/s)", OFFSET(bit_rate), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE, INT_MIN, INT_MAX, V|A|E},
-{"bt", "set video bitrate tolerance (in bits/s)", OFFSET(bit_rate_tolerance), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE*20, INT_MIN, INT_MAX, V|E},
+{"bt", "set video bitrate tolerance (in bits/s)", OFFSET(bit_rate_tolerance), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE*20, 1, INT_MAX, V|E},
 {"flags", NULL, OFFSET(flags), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|A|E|D, "flags"},
 {"mv4", "use four motion vector by macroblock (mpeg4)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_4MV, INT_MIN, INT_MAX, V|E, "flags"},
 {"obmc", "use overlapped block motion compensation (h263+)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_OBMC, INT_MIN, INT_MAX, V|E, "flags"},
@@ -464,7 +464,7 @@ static const AVOption options[]={
 {"extradata_size", NULL, OFFSET(extradata_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
 {"time_base", NULL, OFFSET(time_base), FF_OPT_TYPE_RATIONAL, DEFAULT, INT_MIN, INT_MAX},
 {"g", "set the group of picture size", OFFSET(gop_size), FF_OPT_TYPE_INT, 12, INT_MIN, INT_MAX, V|E},
-{"rate_emu", NULL, OFFSET(rate_emu), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"rate_emu", "frame rate emulation", OFFSET(rate_emu), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
 {"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
 {"ac", "set number of audio channels", OFFSET(channels), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
 {"cutoff", "set cutoff bandwidth", OFFSET(cutoff), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|E},
@@ -509,15 +509,15 @@ static const AVOption options[]={
 {"edge", "edge padding bug (autodetected per fourcc/version)", 0, FF_OPT_TYPE_CONST, FF_BUG_EDGE, INT_MIN, INT_MAX, V|D, "bug"},
 {"hpel_chroma", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_HPEL_CHROMA, INT_MIN, INT_MAX, V|D, "bug"},
 {"dc_clip", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_DC_CLIP, INT_MIN, INT_MAX, V|D, "bug"},
-{"ms", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_MS, INT_MIN, INT_MAX, V|D, "bug"},
+{"ms", "workaround various bugs in microsofts broken decoders", 0, FF_OPT_TYPE_CONST, FF_BUG_MS, INT_MIN, INT_MAX, V|D, "bug"},
 {"lelim", "single coefficient elimination threshold for luminance (negative values also consider dc coefficient)", OFFSET(luma_elim_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"celim", "single coefficient elimination threshold for chrominance (negative values also consider dc coefficient)", OFFSET(chroma_elim_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"strict", "how strictly to follow the standards", OFFSET(strict_std_compliance), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "strict"},
-{"very", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_VERY_STRICT, INT_MIN, INT_MAX, V|E, "strict"},
-{"strict", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_STRICT, INT_MIN, INT_MAX, V|E, "strict"},
+{"very", "strictly conform to a older more strict version of the spec or reference software", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_VERY_STRICT, INT_MIN, INT_MAX, V|E, "strict"},
+{"strict", "strictly conform to all the things in the spec no matter what consequences", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_STRICT, INT_MIN, INT_MAX, V|E, "strict"},
 {"normal", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_NORMAL, INT_MIN, INT_MAX, V|E, "strict"},
-{"inofficial", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_INOFFICIAL, INT_MIN, INT_MAX, V|E, "strict"},
-{"experimental", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_EXPERIMENTAL, INT_MIN, INT_MAX, V|E, "strict"},
+{"inofficial", "allow inofficial extensions", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_INOFFICIAL, INT_MIN, INT_MAX, V|E, "strict"},
+{"experimental", "allow non standarized experimental things", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_EXPERIMENTAL, INT_MIN, INT_MAX, V|E, "strict"},
 {"b_qoffset", "qp offset between p and b frames", OFFSET(b_quant_offset), FF_OPT_TYPE_FLOAT, 1.25, FLT_MIN, FLT_MAX, V|E},
 {"er", "set error resilience strategy", OFFSET(error_resilience), FF_OPT_TYPE_INT, FF_ER_CAREFUL, INT_MIN, INT_MAX, V|D, "er"},
 {"careful", NULL, 0, FF_OPT_TYPE_CONST, FF_ER_CAREFUL, INT_MIN, INT_MAX, V|D, "er"},
@@ -549,14 +549,14 @@ static const AVOption options[]={
 {"mmx", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_MMX, INT_MIN, INT_MAX, V|E, "dct"},
 {"mlib", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_MLIB, INT_MIN, INT_MAX, V|E, "dct"},
 {"altivec", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_ALTIVEC, INT_MIN, INT_MAX, V|E, "dct"},
-{"faan", "floating point AAN", 0, FF_OPT_TYPE_CONST, FF_DCT_FAAN, INT_MIN, INT_MAX, V|E, "dct"},
+{"faan", "floating point AAN DCT", 0, FF_OPT_TYPE_CONST, FF_DCT_FAAN, INT_MIN, INT_MAX, V|E, "dct"},
 {"lumi_mask", "compresses bright areas stronger than medium ones", OFFSET(lumi_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
 {"tcplx_mask", "temporal complexity masking", OFFSET(temporal_cplx_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
 {"scplx_mask", "spatial complexity masking", OFFSET(spatial_cplx_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
 {"p_mask", "inter masking", OFFSET(p_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
 {"dark_mask", "compresses dark areas stronger than medium ones", OFFSET(dark_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
 {"unused", NULL, OFFSET(unused), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"idct", "use interlaced DCT", OFFSET(idct_algo), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, V|E|D, "idct"},
+{"idct", "select IDCT implementation", OFFSET(idct_algo), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, V|E|D, "idct"},
 {"auto", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_AUTO, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"int", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_INT, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"simple", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLE, INT_MIN, INT_MAX, V|E|D, "idct"},
@@ -582,7 +582,7 @@ static const AVOption options[]={
 {"left", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_LEFT, INT_MIN, INT_MAX, V|E, "pred"},
 {"plane", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_PLANE, INT_MIN, INT_MAX, V|E, "pred"},
 {"median", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_MEDIAN, INT_MIN, INT_MAX, V|E, "pred"},
-{"aspect", NULL, OFFSET(sample_aspect_ratio), FF_OPT_TYPE_RATIONAL, DEFAULT, 0, 10, V|E},
+{"aspect", "sample aspect ratio", OFFSET(sample_aspect_ratio), FF_OPT_TYPE_RATIONAL, DEFAULT, 0, 10, V|E},
 {"debug", "print specific debug info", OFFSET(debug), FF_OPT_TYPE_FLAGS, DEFAULT, 0, INT_MAX, V|A|S|E|D, "debug"},
 {"pict", "picture info", 0, FF_OPT_TYPE_CONST, FF_DEBUG_PICT_INFO, INT_MIN, INT_MAX, V|D, "debug"},
 {"rc", "rate control", 0, FF_OPT_TYPE_CONST, FF_DEBUG_RC, INT_MIN, INT_MAX, V|E, "debug"},
@@ -603,8 +603,8 @@ static const AVOption options[]={
 {"pf", "forward predicted MVs of P-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_P_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"},
 {"bf", "forward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"},
 {"bb", "backward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_BACK, INT_MIN, INT_MAX, V|D, "debug_mv"},
-{"mb_qmin", "obsolete, use vqmin", OFFSET(mb_qmin), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"mb_qmax", "obsolete, use vqmax", OFFSET(mb_qmax), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"mb_qmin", "obsolete, use qmin", OFFSET(mb_qmin), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"mb_qmax", "obsolete, use qmax", OFFSET(mb_qmax), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"cmp", "full pel me compare function", OFFSET(me_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"subcmp", "sub pel me compare function", OFFSET(me_sub_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"mbcmp", "macroblock compare function", OFFSET(mb_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
@@ -654,11 +654,11 @@ static const AVOption options[]={
 {"lmin", "min lagrange factor (VBR)", OFFSET(lmin), FF_OPT_TYPE_INT,  2*FF_QP2LAMBDA, 0, INT_MAX, V|E},
 {"lmax", "max lagrange factor (VBR)", OFFSET(lmax), FF_OPT_TYPE_INT, 31*FF_QP2LAMBDA, 0, INT_MAX, V|E},
 {"nr", "noise reduction", OFFSET(noise_reduction), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"rc_init_occupancy", NULL, OFFSET(rc_initial_buffer_occupancy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"rc_init_occupancy", "number of bits which should be loaded into the rc buffer before decoding starts", OFFSET(rc_initial_buffer_occupancy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"inter_threshold", NULL, OFFSET(inter_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"flags2", NULL, OFFSET(flags2), FF_OPT_TYPE_FLAGS, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|A|E|D, "flags2"},
 {"error", NULL, OFFSET(error_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"antialias", NULL, OFFSET(antialias_algo), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D, "aa"},
+{"antialias", "MP3 antialias algorithm", OFFSET(antialias_algo), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D, "aa"},
 {"auto", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_AUTO, INT_MIN, INT_MAX, V|D, "aa"},
 {"fastint", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_FASTINT, INT_MIN, INT_MAX, V|D, "aa"},
 {"int", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_INT, INT_MIN, INT_MAX, V|D, "aa"},
@@ -669,8 +669,8 @@ static const AVOption options[]={
 {"mb_threshold", "macroblock threshold", OFFSET(mb_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"dc", "intra_dc_precision", OFFSET(intra_dc_precision), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
 {"nssew", "nsse weight", OFFSET(nsse_weight), FF_OPT_TYPE_INT, 8, INT_MIN, INT_MAX, V|E},
-{"skip_top", NULL, OFFSET(skip_top), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
-{"skip_bottom", NULL, OFFSET(skip_bottom), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
+{"skip_top", "number of macroblock rows at the top which are skipped", OFFSET(skip_top), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
+{"skip_bottom", "number of macroblock rows at the bottom which are skipped", OFFSET(skip_bottom), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
 {"profile", NULL, OFFSET(profile), FF_OPT_TYPE_INT, FF_PROFILE_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "profile"},
 {"unknown", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "profile"},
 {"level", NULL, OFFSET(level), FF_OPT_TYPE_INT, FF_LEVEL_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "level"},
@@ -687,42 +687,43 @@ static const AVOption options[]={
 {"bidir_refine", "refine the two motion vectors used in bidirectional macroblocks", OFFSET(bidir_refine), FF_OPT_TYPE_INT, DEFAULT, 0, 4, V|E},
 {"brd_scale", "downscales frames for dynamic B-frame decision", OFFSET(brd_scale), FF_OPT_TYPE_INT, DEFAULT, 0, 10, V|E},
 {"crf", "enables constant quality mode, and selects the quality (x264)", OFFSET(crf), FF_OPT_TYPE_FLOAT, DEFAULT, 0, 51, V|E},
-{"cqp", NULL, OFFSET(cqp), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, V|E},
+{"cqp", "constant quantization parameter rate control method", OFFSET(cqp), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, V|E},
 {"keyint_min", "minimum interval between IDR-frames (x264)", OFFSET(keyint_min), FF_OPT_TYPE_INT, 25, INT_MIN, INT_MAX, V|E},
 {"refs", "reference frames to consider for motion compensation (Snow)", OFFSET(refs), FF_OPT_TYPE_INT, 1, INT_MIN, INT_MAX, V|E},
-{"chromaoffset", NULL, OFFSET(chromaoffset), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"bframebias", NULL, OFFSET(bframebias), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"chromaoffset", "chroma qp offset from luma", OFFSET(chromaoffset), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"bframebias", "influences how often B-frames are used", OFFSET(bframebias), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"trellis", "rate-distortion optimal quantization", OFFSET(trellis), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|A|E},
-{"directpred", NULL, OFFSET(directpred), FF_OPT_TYPE_INT, 2, INT_MIN, INT_MAX, V|E},
+{"directpred", "direct mv prediction mode - 0 (none), 1 (spatial), 2 (temporal)", OFFSET(directpred), FF_OPT_TYPE_INT, 2, INT_MIN, INT_MAX, V|E},
 {"bpyramid", "allows B-frames to be used as references for predicting", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BPYRAMID, INT_MIN, INT_MAX, V|E, "flags2"},
-{"wpred", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_WPRED, INT_MIN, INT_MAX, V|E, "flags2"},
-{"mixed_refs", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_MIXED_REFS, INT_MIN, INT_MAX, V|E, "flags2"},
-{"8x8dct", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_8X8DCT, INT_MIN, INT_MAX, V|E, "flags2"},
-{"fastpskip", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|E, "flags2"},
-{"aud", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_AUD, INT_MIN, INT_MAX, V|E, "flags2"},
-{"brdo", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BRDO, INT_MIN, INT_MAX, V|E, "flags2"},
-{"complexityblur", NULL, OFFSET(complexityblur), FF_OPT_TYPE_FLOAT, 20.0, FLT_MIN, FLT_MAX, V|E},
-{"deblockalpha", NULL, OFFSET(deblockalpha), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"deblockbeta", NULL, OFFSET(deblockbeta), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"partitions", NULL, OFFSET(partitions), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|E, "partitions"},
+{"wpred", "weighted biprediction for b-frames (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_WPRED, INT_MIN, INT_MAX, V|E, "flags2"},
+{"mixed_refs", "one reference per partition, as opposed to one reference per macroblock", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_MIXED_REFS, INT_MIN, INT_MAX, V|E, "flags2"},
+{"8x8dct", "high profile 8x8 transform (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_8X8DCT, INT_MIN, INT_MAX, V|E, "flags2"},
+{"fastpskip", "fast pskip (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|E, "flags2"},
+{"aud", "access unit delimiters (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_AUD, INT_MIN, INT_MAX, V|E, "flags2"},
+{"brdo", "b-frame rate-distortion optimization", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BRDO, INT_MIN, INT_MAX, V|E, "flags2"},
+{"skiprd", "RD optimal MB level residual skiping", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_SKIP_RD, INT_MIN, INT_MAX, V|E, "flags2"},
+{"complexityblur", "reduce fluctuations in qp (before curve compression)", OFFSET(complexityblur), FF_OPT_TYPE_FLOAT, 20.0, FLT_MIN, FLT_MAX, V|E},
+{"deblockalpha", "in-loop deblocking filter alphac0 parameter", OFFSET(deblockalpha), FF_OPT_TYPE_INT, DEFAULT, -6, 6, V|E},
+{"deblockbeta", "in-loop deblocking filter beta parameter", OFFSET(deblockbeta), FF_OPT_TYPE_INT, DEFAULT, -6, 6, V|E},
+{"partitions", "macroblock subpartition sizes to consider", OFFSET(partitions), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|E, "partitions"},
 {"parti4x4", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_I4X4, INT_MIN, INT_MAX, V|E, "partitions"},
 {"parti8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_I8X8, INT_MIN, INT_MAX, V|E, "partitions"},
 {"partp4x4", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_P4X4, INT_MIN, INT_MAX, V|E, "partitions"},
 {"partp8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_P8X8, INT_MIN, INT_MAX, V|E, "partitions"},
 {"partb8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_B8X8, INT_MIN, INT_MAX, V|E, "partitions"},
-{"sc_factor", NULL, OFFSET(scenechange_factor), FF_OPT_TYPE_INT, 6, 0, INT_MAX, V|E},
+{"sc_factor", "multiplied by qscale for each frame and added to scene_change_score", OFFSET(scenechange_factor), FF_OPT_TYPE_INT, 6, 0, INT_MAX, V|E},
 {"mv0_threshold", NULL, OFFSET(mv0_threshold), FF_OPT_TYPE_INT, 256, 0, INT_MAX, V|E},
 {"ivlc", "intra vlc table", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_INTRA_VLC, INT_MIN, INT_MAX, V|E, "flags2"},
-{"b_sensitivity", NULL, OFFSET(b_sensitivity), FF_OPT_TYPE_INT, 40, 1, INT_MAX, V|E},
+{"b_sensitivity", "adjusts sensitivity of b_frame_strategy 1", OFFSET(b_sensitivity), FF_OPT_TYPE_INT, 40, 1, INT_MAX, V|E},
 {"compression_level", NULL, OFFSET(compression_level), FF_OPT_TYPE_INT, FF_COMPRESSION_DEFAULT, INT_MIN, INT_MAX, V|A|E},
-{"use_lpc", NULL, OFFSET(use_lpc), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
-{"lpc_coeff_precision", NULL, OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, A|E},
+{"use_lpc", "sets whether to use LPC mode (FLAC)", OFFSET(use_lpc), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"lpc_coeff_precision", "LPC coefficient precision (FLAC)", OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, A|E},
 {"min_prediction_order", NULL, OFFSET(min_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
 {"max_prediction_order", NULL, OFFSET(max_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
-{"prediction_order_method", NULL, OFFSET(prediction_order_method), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"prediction_order_method", "search method for selecting prediction order", OFFSET(prediction_order_method), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
 {"min_partition_order", NULL, OFFSET(min_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
 {"max_partition_order", NULL, OFFSET(max_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
-{"timecode_frame_start", NULL, OFFSET(timecode_frame_start), FF_OPT_TYPE_INT, 0, 0, INT_MAX, V|E},
+{"timecode_frame_start", "GOP timecode frame start number, in non drop frame format", OFFSET(timecode_frame_start), FF_OPT_TYPE_INT, 0, 0, INT_MAX, V|E},
 {"drop_frame_timecode", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_DROP_FRAME_TIMECODE, INT_MIN, INT_MAX, V|E, "flags2"},
 {NULL},
 };
diff --git a/src/libffmpeg/libavcodec/vc1.c b/src/libffmpeg/libavcodec/vc1.c
index 7b385ca47..231f3ca26 100644
--- a/src/libffmpeg/libavcodec/vc1.c
+++ b/src/libffmpeg/libavcodec/vc1.c
@@ -2140,7 +2140,7 @@ static void vc1_interp_mc(VC1Context *v)
     dsp->avg_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
 }
 
-static always_inline int scale_mv(int value, int bfrac, int inv, int qs)
+static av_always_inline int scale_mv(int value, int bfrac, int inv, int qs)
 {
     int n = bfrac;
 
@@ -3072,8 +3072,8 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c
         ac_val -= 16 * s->block_wrap[n];
 
     q1 = s->current_picture.qscale_table[mb_pos];
-    if(dc_pred_dir && c_avail) q2 = s->current_picture.qscale_table[mb_pos - 1];
-    if(!dc_pred_dir && a_avail) q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride];
+    if(dc_pred_dir && c_avail && mb_pos) q2 = s->current_picture.qscale_table[mb_pos - 1];
+    if(!dc_pred_dir && a_avail && mb_pos >= s->mb_stride) q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride];
     if(n && n<4) q2 = q1;
 
     if(coded) {
diff --git a/src/libffmpeg/libavcodec/vc1dsp.c b/src/libffmpeg/libavcodec/vc1dsp.c
index 9139ffb28..f19f266d1 100644
--- a/src/libffmpeg/libavcodec/vc1dsp.c
+++ b/src/libffmpeg/libavcodec/vc1dsp.c
@@ -326,7 +326,7 @@ static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n)
 
 /** Filter used to interpolate fractional pel values
  */
-static always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r)
+static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r)
 {
     switch(mode){
     case 0: //no shift
diff --git a/src/libffmpeg/libavcodec/vp3dsp.c b/src/libffmpeg/libavcodec/vp3dsp.c
index a48515a5e..bb9fed091 100644
--- a/src/libffmpeg/libavcodec/vp3dsp.c
+++ b/src/libffmpeg/libavcodec/vp3dsp.c
@@ -39,7 +39,7 @@
 
 #define M(a,b) (((a) * (b))>>16)
 
-static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
+static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
 {
     int16_t *ip = input;
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
diff --git a/src/libffmpeg/libavcodec/vp5.c b/src/libffmpeg/libavcodec/vp5.c
new file mode 100644
index 000000000..ac953c7aa
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp5.c
@@ -0,0 +1,290 @@
+/**
+ * @file vp5.c
+ * VP5 compatible video decoder
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "bitstream.h"
+#include "mpegvideo.h"
+
+#include "vp56.h"
+#include "vp56data.h"
+#include "vp5data.h"
+
+
+static int vp5_parse_header(vp56_context_t *s, uint8_t *buf, int buf_size,
+                            int *golden_frame)
+{
+    vp56_range_coder_t *c = &s->c;
+    int rows, cols;
+
+    vp56_init_range_decoder(&s->c, buf, buf_size);
+    s->frames[VP56_FRAME_CURRENT].key_frame = !vp56_rac_get(c);
+    vp56_rac_get(c);
+    vp56_init_dequant(s, vp56_rac_gets(c, 6));
+    if (s->frames[VP56_FRAME_CURRENT].key_frame)
+    {
+        vp56_rac_gets(c, 8);
+        if(vp56_rac_gets(c, 5) > 5)
+            return 0;
+        vp56_rac_gets(c, 2);
+        if (vp56_rac_get(c)) {
+            av_log(s->avctx, AV_LOG_ERROR, "interlacing not supported\n");
+            return 0;
+        }
+        rows = vp56_rac_gets(c, 8);  /* number of stored macroblock rows */
+        cols = vp56_rac_gets(c, 8);  /* number of stored macroblock cols */
+        vp56_rac_gets(c, 8);  /* number of displayed macroblock rows */
+        vp56_rac_gets(c, 8);  /* number of displayed macroblock cols */
+        vp56_rac_gets(c, 2);
+        if (16*cols != s->avctx->coded_width ||
+            16*rows != s->avctx->coded_height) {
+            avcodec_set_dimensions(s->avctx, 16*cols, 16*rows);
+            return 2;
+        }
+    }
+    return 1;
+}
+
+/* Gives very similar result than the vp6 version except in a few cases */
+static int vp5_adjust(int v, int t)
+{
+    int s2, s1 = v >> 31;
+    v ^= s1;
+    v -= s1;
+    v *= v < 2*t;
+    v -= t;
+    s2 = v >> 31;
+    v ^= s2;
+    v -= s2;
+    v = t - v;
+    v += s1;
+    v ^= s1;
+    return v;
+}
+
+static void vp5_parse_vector_adjustment(vp56_context_t *s, vp56_mv_t *vect)
+{
+    vp56_range_coder_t *c = &s->c;
+    int comp, di;
+
+    for (comp=0; comp<2; comp++) {
+        int delta = 0;
+        if (vp56_rac_get_prob(c, s->vector_model_dct[comp])) {
+            int sign = vp56_rac_get_prob(c, s->vector_model_sig[comp]);
+            di  = vp56_rac_get_prob(c, s->vector_model_pdi[comp][0]);
+            di |= vp56_rac_get_prob(c, s->vector_model_pdi[comp][1]) << 1;
+            delta = vp56_rac_get_tree(c, vp56_pva_tree,
+                                      s->vector_model_pdv[comp]);
+            delta = di | (delta << 2);
+            delta = (delta ^ -sign) + sign;
+        }
+        if (!comp)
+            vect->x = delta;
+        else
+            vect->y = delta;
+    }
+}
+
+static void vp5_parse_vector_models(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    int comp, node;
+
+    for (comp=0; comp<2; comp++) {
+        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][0]))
+            s->vector_model_dct[comp] = vp56_rac_gets_nn(c, 7);
+        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][1]))
+            s->vector_model_sig[comp] = vp56_rac_gets_nn(c, 7);
+        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][2]))
+            s->vector_model_pdi[comp][0] = vp56_rac_gets_nn(c, 7);
+        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][3]))
+            s->vector_model_pdi[comp][1] = vp56_rac_gets_nn(c, 7);
+    }
+
+    for (comp=0; comp<2; comp++)
+        for (node=0; node<7; node++)
+            if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][4 + node]))
+                s->vector_model_pdv[comp][node] = vp56_rac_gets_nn(c, 7);
+}
+
+static void vp5_parse_coeff_models(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    uint8_t def_prob[11];
+    int node, cg, ctx;
+    int ct;    /* code type */
+    int pt;    /* plane type (0 for Y, 1 for U or V) */
+
+    memset(def_prob, 0x80, sizeof(def_prob));
+
+    for (pt=0; pt<2; pt++)
+        for (node=0; node<11; node++)
+            if (vp56_rac_get_prob(c, vp5_dccv_pct[pt][node])) {
+                def_prob[node] = vp56_rac_gets_nn(c, 7);
+                s->coeff_model_dccv[pt][node] = def_prob[node];
+            } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+                s->coeff_model_dccv[pt][node] = def_prob[node];
+            }
+
+    for (ct=0; ct<3; ct++)
+        for (pt=0; pt<2; pt++)
+            for (cg=0; cg<6; cg++)
+                for (node=0; node<11; node++)
+                    if (vp56_rac_get_prob(c, vp5_ract_pct[ct][pt][cg][node])) {
+                        def_prob[node] = vp56_rac_gets_nn(c, 7);
+                        s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+                    } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+                        s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+                    }
+
+    /* coeff_model_dcct is a linear combination of coeff_model_dccv */
+    for (pt=0; pt<2; pt++)
+        for (ctx=0; ctx<36; ctx++)
+            for (node=0; node<5; node++)
+                s->coeff_model_dcct[pt][ctx][node] = clip(((s->coeff_model_dccv[pt][node] * vp5_dccv_lc[node][ctx][0] + 128) >> 8) + vp5_dccv_lc[node][ctx][1], 1, 254);
+
+    /* coeff_model_acct is a linear combination of coeff_model_ract */
+    for (ct=0; ct<3; ct++)
+        for (pt=0; pt<2; pt++)
+            for (cg=0; cg<3; cg++)
+                for (ctx=0; ctx<6; ctx++)
+                    for (node=0; node<5; node++)
+                        s->coeff_model_acct[pt][ct][cg][ctx][node] = clip(((s->coeff_model_ract[pt][ct][cg][node] * vp5_ract_lc[ct][cg][node][ctx][0] + 128) >> 8) + vp5_ract_lc[ct][cg][node][ctx][1], 1, 254);
+}
+
+static void vp5_parse_coeff(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    uint8_t *permute = s->scantable.permutated;
+    uint8_t *model, *model2;
+    int coeff, sign, coeff_idx;
+    int b, i, cg, idx, ctx, ctx_last;
+    int pt = 0;    /* plane type (0 for Y, 1 for U or V) */
+
+    for (b=0; b<6; b++) {
+        int ct = 1;    /* code type */
+
+        if (b > 3) pt = 1;
+
+        ctx = 6*s->coeff_ctx[vp56_b6to4[b]][0]
+              + s->above_blocks[s->above_block_idx[b]].not_null_dc;
+        model = s->coeff_model_dccv[pt];
+        model2 = s->coeff_model_dcct[pt][ctx];
+
+        for (coeff_idx=0; coeff_idx<64; ) {
+            if (vp56_rac_get_prob(c, model2[0])) {
+                if (vp56_rac_get_prob(c, model2[2])) {
+                    if (vp56_rac_get_prob(c, model2[3])) {
+                        s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 4;
+                        idx = vp56_rac_get_tree(c, vp56_pc_tree, model);
+                        sign = vp56_rac_get(c);
+                        coeff = vp56_coeff_bias[idx];
+                        for (i=vp56_coeff_bit_length[idx]; i>=0; i--)
+                            coeff += vp56_rac_get_prob(c, vp56_coeff_parse_table[idx][i]) << i;
+                    } else {
+                        if (vp56_rac_get_prob(c, model2[4])) {
+                            coeff = 3 + vp56_rac_get_prob(c, model[5]);
+                            s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 3;
+                        } else {
+                            coeff = 2;
+                            s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 2;
+                        }
+                        sign = vp56_rac_get(c);
+                    }
+                    ct = 2;
+                } else {
+                    ct = 1;
+                    s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 1;
+                    sign = vp56_rac_get(c);
+                    coeff = 1;
+                }
+                coeff = (coeff ^ -sign) + sign;
+                if (coeff_idx)
+                    coeff *= s->dequant_ac;
+                s->block_coeff[b][permute[coeff_idx]] = coeff;
+            } else {
+                if (ct && !vp56_rac_get_prob(c, model2[1]))
+                    break;
+                ct = 0;
+                s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 0;
+            }
+
+            cg = vp5_coeff_groups[++coeff_idx];
+            ctx = s->coeff_ctx[vp56_b6to4[b]][coeff_idx];
+            model = s->coeff_model_ract[pt][ct][cg];
+            model2 = cg > 2 ? model : s->coeff_model_acct[pt][ct][cg][ctx];
+        }
+
+        ctx_last = FFMIN(s->coeff_ctx_last[vp56_b6to4[b]], 24);
+        s->coeff_ctx_last[vp56_b6to4[b]] = coeff_idx;
+        if (coeff_idx < ctx_last)
+            for (i=coeff_idx; i<=ctx_last; i++)
+                s->coeff_ctx[vp56_b6to4[b]][i] = 5;
+        s->above_blocks[s->above_block_idx[b]].not_null_dc = s->coeff_ctx[vp56_b6to4[b]][0];
+    }
+}
+
+static void vp5_default_models_init(vp56_context_t *s)
+{
+    int i;
+
+    for (i=0; i<2; i++) {
+        s->vector_model_sig[i] = 0x80;
+        s->vector_model_dct[i] = 0x80;
+        s->vector_model_pdi[i][0] = 0x55;
+        s->vector_model_pdi[i][1] = 0x80;
+    }
+    memcpy(s->mb_types_stats, vp56_def_mb_types_stats, sizeof(s->mb_types_stats));
+    memset(s->vector_model_pdv, 0x80, sizeof(s->vector_model_pdv));
+}
+
+static int vp5_decode_init(AVCodecContext *avctx)
+{
+    vp56_context_t *s = avctx->priv_data;
+
+    vp56_init(s, avctx, 1);
+    s->vp56_coord_div = vp5_coord_div;
+    s->parse_vector_adjustment = vp5_parse_vector_adjustment;
+    s->adjust = vp5_adjust;
+    s->parse_coeff = vp5_parse_coeff;
+    s->default_models_init = vp5_default_models_init;
+    s->parse_vector_models = vp5_parse_vector_models;
+    s->parse_coeff_models = vp5_parse_coeff_models;
+    s->parse_header = vp5_parse_header;
+
+    return 0;
+}
+
+AVCodec vp5_decoder = {
+    "vp5",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VP5,
+    sizeof(vp56_context_t),
+    vp5_decode_init,
+    NULL,
+    vp56_free,
+    vp56_decode_frame,
+};
diff --git a/src/libffmpeg/libavcodec/vp56.c b/src/libffmpeg/libavcodec/vp56.c
new file mode 100644
index 000000000..eb78d02e4
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp56.c
@@ -0,0 +1,665 @@
+/**
+ * @file vp56.c
+ * VP5 and VP6 compatible video decoder (common features)
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "avcodec.h"
+
+#include "vp56.h"
+#include "vp56data.h"
+
+
+void vp56_init_dequant(vp56_context_t *s, int quantizer)
+{
+    s->quantizer = quantizer;
+    s->dequant_dc = vp56_dc_dequant[quantizer] << 2;
+    s->dequant_ac = vp56_ac_dequant[quantizer] << 2;
+}
+
+static int vp56_get_vectors_predictors(vp56_context_t *s, int row, int col,
+                                       vp56_frame_t ref_frame)
+{
+    int nb_pred = 0;
+    vp56_mv_t vect[2] = {{0,0}, {0,0}};
+    int pos, offset;
+    vp56_mv_t mvp;
+
+    for (pos=0; pos<12; pos++) {
+        mvp.x = col + vp56_candidate_predictor_pos[pos][0];
+        mvp.y = row + vp56_candidate_predictor_pos[pos][1];
+        if (mvp.x < 0 || mvp.x >= s->mb_width ||
+            mvp.y < 0 || mvp.y >= s->mb_height)
+            continue;
+        offset = mvp.x + s->mb_width*mvp.y;
+
+        if (vp56_reference_frame[s->macroblocks[offset].type] != ref_frame)
+            continue;
+        if ((s->macroblocks[offset].mv.x == vect[0].x &&
+             s->macroblocks[offset].mv.y == vect[0].y) ||
+            (s->macroblocks[offset].mv.x == 0 &&
+             s->macroblocks[offset].mv.y == 0))
+            continue;
+
+        vect[nb_pred++] = s->macroblocks[offset].mv;
+        if (nb_pred > 1) {
+            nb_pred = -1;
+            break;
+        }
+        s->vector_candidate_pos = pos;
+    }
+
+    s->vector_candidate[0] = vect[0];
+    s->vector_candidate[1] = vect[1];
+
+    return nb_pred+1;
+}
+
+static void vp56_parse_mb_type_models(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    int i, ctx, type;
+
+    for (ctx=0; ctx<3; ctx++) {
+        if (vp56_rac_get_prob(c, 174)) {
+            int idx = vp56_rac_gets(c, 4);
+            memcpy(s->mb_types_stats[ctx],vp56_pre_def_mb_type_stats[idx][ctx],
+                   sizeof(s->mb_types_stats[ctx]));
+        }
+        if (vp56_rac_get_prob(c, 254)) {
+            for (type=0; type<10; type++) {
+                for(i=0; i<2; i++) {
+                    if (vp56_rac_get_prob(c, 205)) {
+                        int delta, sign = vp56_rac_get(c);
+
+                        delta = vp56_rac_get_tree(c, vp56_pmbtm_tree,
+                                                  vp56_mb_type_model_model);
+                        if (!delta)
+                            delta = 4 * vp56_rac_gets(c, 7);
+                        s->mb_types_stats[ctx][type][i] += (delta ^ -sign) + sign;
+                    }
+                }
+            }
+        }
+    }
+
+    /* compute MB type probability tables based on previous MB type */
+    for (ctx=0; ctx<3; ctx++) {
+        int p[10];
+
+        for (type=0; type<10; type++)
+            p[type] = 100 * s->mb_types_stats[ctx][type][1];
+
+        for (type=0; type<10; type++) {
+            int p02, p34, p0234, p17, p56, p89, p5689, p156789;
+
+            /* conservative MB type probability */
+            s->mb_type_model[ctx][type][0] = 255 - (255 * s->mb_types_stats[ctx][type][0]) / (1 + s->mb_types_stats[ctx][type][0] + s->mb_types_stats[ctx][type][1]);
+
+            p[type] = 0;    /* same MB type => weight is null */
+
+            /* binary tree parsing probabilities */
+            p02 = p[0] + p[2];
+            p34 = p[3] + p[4];
+            p0234 = p02 + p34;
+            p17 = p[1] + p[7];
+            p56 = p[5] + p[6];
+            p89 = p[8] + p[9];
+            p5689 = p56 + p89;
+            p156789 = p17 + p5689;
+
+            s->mb_type_model[ctx][type][1] = 1 + 255 * p0234/(1+p0234+p156789);
+            s->mb_type_model[ctx][type][2] = 1 + 255 * p02  / (1+p0234);
+            s->mb_type_model[ctx][type][3] = 1 + 255 * p17  / (1+p156789);
+            s->mb_type_model[ctx][type][4] = 1 + 255 * p[0] / (1+p02);
+            s->mb_type_model[ctx][type][5] = 1 + 255 * p[3] / (1+p34);
+            s->mb_type_model[ctx][type][6] = 1 + 255 * p[1] / (1+p17);
+            s->mb_type_model[ctx][type][7] = 1 + 255 * p56  / (1+p5689);
+            s->mb_type_model[ctx][type][8] = 1 + 255 * p[5] / (1+p56);
+            s->mb_type_model[ctx][type][9] = 1 + 255 * p[8] / (1+p89);
+
+            /* restore initial value */
+            p[type] = 100 * s->mb_types_stats[ctx][type][1];
+        }
+    }
+}
+
+static vp56_mb_t vp56_parse_mb_type(vp56_context_t *s,
+                                    vp56_mb_t prev_type, int ctx)
+{
+    uint8_t *mb_type_model = s->mb_type_model[ctx][prev_type];
+    vp56_range_coder_t *c = &s->c;
+
+    if (vp56_rac_get_prob(c, mb_type_model[0]))
+        return prev_type;
+    else
+        return vp56_rac_get_tree(c, vp56_pmbt_tree, mb_type_model);
+}
+
+static void vp56_decode_4mv(vp56_context_t *s, int row, int col)
+{
+    vp56_mv_t mv = {0,0};
+    int type[4];
+    int b;
+
+    /* parse each block type */
+    for (b=0; b<4; b++) {
+        type[b] = vp56_rac_gets(&s->c, 2);
+        if (type[b])
+            type[b]++;  /* only returns 0, 2, 3 or 4 (all INTER_PF) */
+    }
+
+    /* get vectors */
+    for (b=0; b<4; b++) {
+        switch (type[b]) {
+            case VP56_MB_INTER_NOVEC_PF:
+                s->mv[b] = (vp56_mv_t) {0,0};
+                break;
+            case VP56_MB_INTER_DELTA_PF:
+                s->parse_vector_adjustment(s, &s->mv[b]);
+                break;
+            case VP56_MB_INTER_V1_PF:
+                s->mv[b] = s->vector_candidate[0];
+                break;
+            case VP56_MB_INTER_V2_PF:
+                s->mv[b] = s->vector_candidate[1];
+                break;
+        }
+        mv.x += s->mv[b].x;
+        mv.y += s->mv[b].y;
+    }
+
+    /* this is the one selected for the whole MB for prediction */
+    s->macroblocks[row * s->mb_width + col].mv = s->mv[3];
+
+    /* chroma vectors are average luma vectors */
+    if (s->avctx->codec->id == CODEC_ID_VP5) {
+        s->mv[4].x = s->mv[5].x = RSHIFT(mv.x,2);
+        s->mv[4].y = s->mv[5].y = RSHIFT(mv.y,2);
+    } else {
+        s->mv[4] = s->mv[5] = (vp56_mv_t) {mv.x/4, mv.y/4};
+    }
+}
+
+static vp56_mb_t vp56_decode_mv(vp56_context_t *s, int row, int col)
+{
+    vp56_mv_t *mv, vect = {0,0};
+    int ctx, b;
+
+    ctx = vp56_get_vectors_predictors(s, row, col, VP56_FRAME_PREVIOUS);
+    s->mb_type = vp56_parse_mb_type(s, s->mb_type, ctx);
+    s->macroblocks[row * s->mb_width + col].type = s->mb_type;
+
+    switch (s->mb_type) {
+        case VP56_MB_INTER_V1_PF:
+            mv = &s->vector_candidate[0];
+            break;
+
+        case VP56_MB_INTER_V2_PF:
+            mv = &s->vector_candidate[1];
+            break;
+
+        case VP56_MB_INTER_V1_GF:
+            vp56_get_vectors_predictors(s, row, col, VP56_FRAME_GOLDEN);
+            mv = &s->vector_candidate[0];
+            break;
+
+        case VP56_MB_INTER_V2_GF:
+            vp56_get_vectors_predictors(s, row, col, VP56_FRAME_GOLDEN);
+            mv = &s->vector_candidate[1];
+            break;
+
+        case VP56_MB_INTER_DELTA_PF:
+            s->parse_vector_adjustment(s, &vect);
+            mv = &vect;
+            break;
+
+        case VP56_MB_INTER_DELTA_GF:
+            vp56_get_vectors_predictors(s, row, col, VP56_FRAME_GOLDEN);
+            s->parse_vector_adjustment(s, &vect);
+            mv = &vect;
+            break;
+
+        case VP56_MB_INTER_4V:
+            vp56_decode_4mv(s, row, col);
+            return s->mb_type;
+
+        default:
+            mv = &vect;
+            break;
+    }
+
+    s->macroblocks[row*s->mb_width + col].mv = *mv;
+
+    /* same vector for all blocks */
+    for (b=0; b<6; b++)
+        s->mv[b] = *mv;
+
+    return s->mb_type;
+}
+
+static void vp56_add_predictors_dc(vp56_context_t *s, vp56_frame_t ref_frame)
+{
+    int idx = s->scantable.permutated[0];
+    int i;
+
+    for (i=0; i<6; i++) {
+        vp56_ref_dc_t *ab = &s->above_blocks[s->above_block_idx[i]];
+        vp56_ref_dc_t *lb = &s->left_block[vp56_b6to4[i]];
+        int count = 0;
+        int dc = 0;
+
+        if (ref_frame == lb->ref_frame) {
+            dc += lb->dc_coeff;
+            count++;
+        }
+        if (ref_frame == ab->ref_frame) {
+            dc += ab->dc_coeff;
+            count++;
+        }
+        if (s->avctx->codec->id == CODEC_ID_VP5) {
+            if (count < 2 && ref_frame == ab[-1].ref_frame) {
+                dc += ab[-1].dc_coeff;
+                count++;
+            }
+            if (count < 2 && ref_frame == ab[1].ref_frame) {
+                dc += ab[1].dc_coeff;
+                count++;
+            }
+        }
+        if (count == 0)
+            dc = s->prev_dc[vp56_b6to3[i]][ref_frame];
+        else if (count == 2)
+            dc /= 2;
+
+        s->block_coeff[i][idx] += dc;
+        s->prev_dc[vp56_b6to3[i]][ref_frame] = s->block_coeff[i][idx];
+        ab->dc_coeff = s->block_coeff[i][idx];
+        ab->ref_frame = ref_frame;
+        lb->dc_coeff = s->block_coeff[i][idx];
+        lb->ref_frame = ref_frame;
+        s->block_coeff[i][idx] *= s->dequant_dc;
+    }
+}
+
+static void vp56_edge_filter(vp56_context_t *s, uint8_t *yuv,
+                             int pix_inc, int line_inc, int t)
+{
+    int pix2_inc = 2 * pix_inc;
+    int i, v;
+
+    for (i=0; i<12; i++) {
+        v = (yuv[-pix2_inc] + 3*(yuv[0]-yuv[-pix_inc]) - yuv[pix_inc] + 4) >>3;
+        v = s->adjust(v, t);
+        yuv[-pix_inc] = clip_uint8(yuv[-pix_inc] + v);
+        yuv[0] = clip_uint8(yuv[0] - v);
+        yuv += line_inc;
+    }
+}
+
+static void vp56_deblock_filter(vp56_context_t *s, uint8_t *yuv,
+                                int stride, int dx, int dy)
+{
+    int t = vp56_filter_threshold[s->quantizer];
+    if (dx)  vp56_edge_filter(s, yuv +         10-dx ,      1, stride, t);
+    if (dy)  vp56_edge_filter(s, yuv + stride*(10-dy), stride,      1, t);
+}
+
+static void vp56_mc(vp56_context_t *s, int b, uint8_t *src,
+                    int stride, int x, int y)
+{
+    int plane = vp56_b6to3[b];
+    uint8_t *dst= s->frames[VP56_FRAME_CURRENT].data[plane]+s->block_offset[b];
+    uint8_t *src_block;
+    int src_offset;
+    int overlap_offset = 0;
+    int mask = s->vp56_coord_div[b] - 1;
+    int deblock_filtering = s->deblock_filtering;
+    int dx;
+    int dy;
+
+    if (s->avctx->skip_loop_filter >= AVDISCARD_ALL ||
+        (s->avctx->skip_loop_filter >= AVDISCARD_NONKEY
+         && !s->frames[VP56_FRAME_CURRENT].key_frame))
+        deblock_filtering = 0;
+
+    dx = s->mv[b].x / s->vp56_coord_div[b];
+    dy = s->mv[b].y / s->vp56_coord_div[b];
+
+    if (b >= 4) {
+        x /= 2;
+        y /= 2;
+    }
+    x += dx - 2;
+    y += dy - 2;
+
+    if (x<0 || x+12>=s->plane_width[plane] ||
+        y<0 || y+12>=s->plane_height[plane]) {
+        ff_emulated_edge_mc(s->edge_emu_buffer,
+                            src + s->block_offset[b] + (dy-2)*stride + (dx-2),
+                            stride, 12, 12, x, y,
+                            s->plane_width[plane],
+                            s->plane_height[plane]);
+        src_block = s->edge_emu_buffer;
+        src_offset = 2 + 2*stride;
+    } else if (deblock_filtering) {
+        /* only need a 12x12 block, but there is no such dsp function, */
+        /* so copy a 16x12 block */
+        s->dsp.put_pixels_tab[0][0](s->edge_emu_buffer,
+                                    src + s->block_offset[b] + (dy-2)*stride + (dx-2),
+                                    stride, 12);
+        src_block = s->edge_emu_buffer;
+        src_offset = 2 + 2*stride;
+    } else {
+        src_block = src;
+        src_offset = s->block_offset[b] + dy*stride + dx;
+    }
+
+    if (deblock_filtering)
+        vp56_deblock_filter(s, src_block, stride, dx&7, dy&7);
+
+    if (s->mv[b].x & mask)
+        overlap_offset += (s->mv[b].x > 0) ? 1 : -1;
+    if (s->mv[b].y & mask)
+        overlap_offset += (s->mv[b].y > 0) ? stride : -stride;
+
+    if (overlap_offset) {
+        if (s->filter)
+            s->filter(s, dst, src_block, src_offset, src_offset+overlap_offset,
+                      stride, s->mv[b], mask, s->filter_selection, b<4);
+        else
+            s->dsp.put_no_rnd_pixels_l2[1](dst, src_block+src_offset,
+                                           src_block+src_offset+overlap_offset,
+                                           stride, 8);
+    } else {
+        s->dsp.put_pixels_tab[1][0](dst, src_block+src_offset, stride, 8);
+    }
+}
+
+static void vp56_decode_mb(vp56_context_t *s, int row, int col)
+{
+    AVFrame *frame_current, *frame_ref;
+    vp56_mb_t mb_type;
+    vp56_frame_t ref_frame;
+    int b, plan, off;
+
+    if (s->frames[VP56_FRAME_CURRENT].key_frame)
+        mb_type = VP56_MB_INTRA;
+    else
+        mb_type = vp56_decode_mv(s, row, col);
+    ref_frame = vp56_reference_frame[mb_type];
+
+    memset(s->block_coeff, 0, sizeof(s->block_coeff));
+
+    s->parse_coeff(s);
+
+    vp56_add_predictors_dc(s, ref_frame);
+
+    frame_current = &s->frames[VP56_FRAME_CURRENT];
+    frame_ref = &s->frames[ref_frame];
+
+    switch (mb_type) {
+        case VP56_MB_INTRA:
+            for (b=0; b<6; b++) {
+                plan = vp56_b6to3[b];
+                s->dsp.idct_put(frame_current->data[plan] + s->block_offset[b],
+                                s->stride[plan], s->block_coeff[b]);
+            }
+            break;
+
+        case VP56_MB_INTER_NOVEC_PF:
+        case VP56_MB_INTER_NOVEC_GF:
+            for (b=0; b<6; b++) {
+                plan = vp56_b6to3[b];
+                off = s->block_offset[b];
+                s->dsp.put_pixels_tab[1][0](frame_current->data[plan] + off,
+                                            frame_ref->data[plan] + off,
+                                            s->stride[plan], 8);
+                s->dsp.idct_add(frame_current->data[plan] + off,
+                                s->stride[plan], s->block_coeff[b]);
+            }
+            break;
+
+        case VP56_MB_INTER_DELTA_PF:
+        case VP56_MB_INTER_V1_PF:
+        case VP56_MB_INTER_V2_PF:
+        case VP56_MB_INTER_DELTA_GF:
+        case VP56_MB_INTER_4V:
+        case VP56_MB_INTER_V1_GF:
+        case VP56_MB_INTER_V2_GF:
+            for (b=0; b<6; b++) {
+                int x_off = b==1 || b==3 ? 8 : 0;
+                int y_off = b==2 || b==3 ? 8 : 0;
+                plan = vp56_b6to3[b];
+                vp56_mc(s, b, frame_ref->data[plan], s->stride[plan],
+                        16*col+x_off, 16*row+y_off);
+                s->dsp.idct_add(frame_current->data[plan] + s->block_offset[b],
+                                s->stride[plan], s->block_coeff[b]);
+            }
+            break;
+    }
+}
+
+static int vp56_size_changed(AVCodecContext *avctx, vp56_context_t *s)
+{
+    int stride = s->frames[VP56_FRAME_CURRENT].linesize[0];
+    int i;
+
+    s->plane_width[0] = s->avctx->coded_width;
+    s->plane_width[1] = s->plane_width[2] = s->avctx->coded_width/2;
+    s->plane_height[0] = s->avctx->coded_height;
+    s->plane_height[1] = s->plane_height[2] = s->avctx->coded_height/2;
+
+    for (i=0; i<3; i++)
+        s->stride[i] = s->flip * s->frames[VP56_FRAME_CURRENT].linesize[i];
+
+    s->mb_width = (s->avctx->coded_width+15) / 16;
+    s->mb_height = (s->avctx->coded_height+15) / 16;
+
+    if (s->mb_width > 1000 || s->mb_height > 1000) {
+        av_log(avctx, AV_LOG_ERROR, "picture too big\n");
+        return -1;
+    }
+
+    s->above_blocks = av_realloc(s->above_blocks,
+                                 (4*s->mb_width+6) * sizeof(*s->above_blocks));
+    s->macroblocks = av_realloc(s->macroblocks,
+                                s->mb_width*s->mb_height*sizeof(*s->macroblocks));
+    av_free(s->edge_emu_buffer_alloc);
+    s->edge_emu_buffer_alloc = av_malloc(16*stride);
+    s->edge_emu_buffer = s->edge_emu_buffer_alloc;
+    if (s->flip < 0)
+        s->edge_emu_buffer += 15 * stride;
+
+    return 0;
+}
+
+int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+                      uint8_t *buf, int buf_size)
+{
+    vp56_context_t *s = avctx->priv_data;
+    AVFrame *const p = &s->frames[VP56_FRAME_CURRENT];
+    AVFrame *picture = data;
+    int mb_row, mb_col, mb_row_flip, mb_offset = 0;
+    int block, y, uv, stride_y, stride_uv;
+    int golden_frame = 0;
+    int res;
+
+    res = s->parse_header(s, buf, buf_size, &golden_frame);
+    if (!res)
+        return -1;
+
+    p->reference = 1;
+    if (avctx->get_buffer(avctx, p) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    if (res == 2)
+        if (vp56_size_changed(avctx, s)) {
+            avctx->release_buffer(avctx, p);
+            return -1;
+        }
+
+    if (p->key_frame) {
+        p->pict_type = FF_I_TYPE;
+        s->default_models_init(s);
+        for (block=0; block<s->mb_height*s->mb_width; block++)
+            s->macroblocks[block].type = VP56_MB_INTRA;
+    } else {
+        p->pict_type = FF_P_TYPE;
+        vp56_parse_mb_type_models(s);
+        s->parse_vector_models(s);
+        s->mb_type = VP56_MB_INTER_NOVEC_PF;
+    }
+
+    s->parse_coeff_models(s);
+
+    memset(s->prev_dc, 0, sizeof(s->prev_dc));
+    s->prev_dc[1][VP56_FRAME_CURRENT] = 128;
+    s->prev_dc[2][VP56_FRAME_CURRENT] = 128;
+
+    for (block=0; block < 4*s->mb_width+6; block++) {
+        s->above_blocks[block].ref_frame = -1;
+        s->above_blocks[block].dc_coeff = 0;
+        s->above_blocks[block].not_null_dc = 0;
+    }
+    s->above_blocks[2*s->mb_width + 2].ref_frame = 0;
+    s->above_blocks[3*s->mb_width + 4].ref_frame = 0;
+
+    stride_y  = p->linesize[0];
+    stride_uv = p->linesize[1];
+
+    if (s->flip < 0)
+        mb_offset = 7;
+
+    /* main macroblocks loop */
+    for (mb_row=0; mb_row<s->mb_height; mb_row++) {
+        if (s->flip < 0)
+            mb_row_flip = s->mb_height - mb_row - 1;
+        else
+            mb_row_flip = mb_row;
+
+        for (block=0; block<4; block++) {
+            s->left_block[block].ref_frame = -1;
+            s->left_block[block].dc_coeff = 0;
+            s->left_block[block].not_null_dc = 0;
+            memset(s->coeff_ctx[block], 0, 64*sizeof(s->coeff_ctx[block][0]));
+        }
+        memset(s->coeff_ctx_last, 24, sizeof(s->coeff_ctx_last));
+
+        s->above_block_idx[0] = 1;
+        s->above_block_idx[1] = 2;
+        s->above_block_idx[2] = 1;
+        s->above_block_idx[3] = 2;
+        s->above_block_idx[4] = 2*s->mb_width + 2 + 1;
+        s->above_block_idx[5] = 3*s->mb_width + 4 + 1;
+
+        s->block_offset[s->frbi] = (mb_row_flip*16 + mb_offset) * stride_y;
+        s->block_offset[s->srbi] = s->block_offset[s->frbi] + 8*stride_y;
+        s->block_offset[1] = s->block_offset[0] + 8;
+        s->block_offset[3] = s->block_offset[2] + 8;
+        s->block_offset[4] = (mb_row_flip*8 + mb_offset) * stride_uv;
+        s->block_offset[5] = s->block_offset[4];
+
+        for (mb_col=0; mb_col<s->mb_width; mb_col++) {
+            vp56_decode_mb(s, mb_row, mb_col);
+
+            for (y=0; y<4; y++) {
+                s->above_block_idx[y] += 2;
+                s->block_offset[y] += 16;
+            }
+
+            for (uv=4; uv<6; uv++) {
+                s->above_block_idx[uv] += 1;
+                s->block_offset[uv] += 8;
+            }
+        }
+    }
+
+    if (s->frames[VP56_FRAME_PREVIOUS].data[0]
+        && (s->frames[VP56_FRAME_PREVIOUS].data[0]
+            != s->frames[VP56_FRAME_GOLDEN].data[0])) {
+        avctx->release_buffer(avctx, &s->frames[VP56_FRAME_PREVIOUS]);
+    }
+    if (p->key_frame || golden_frame) {
+        if (s->frames[VP56_FRAME_GOLDEN].data[0])
+            avctx->release_buffer(avctx, &s->frames[VP56_FRAME_GOLDEN]);
+        s->frames[VP56_FRAME_GOLDEN] = *p;
+    }
+    s->frames[VP56_FRAME_PREVIOUS] = *p;
+
+    *picture = *p;
+    *data_size = sizeof(AVPicture);
+
+    return buf_size;
+}
+
+void vp56_init(vp56_context_t *s, AVCodecContext *avctx, int flip)
+{
+    int i;
+
+    s->avctx = avctx;
+    avctx->pix_fmt = PIX_FMT_YUV420P;
+
+    if (s->avctx->idct_algo == FF_IDCT_AUTO)
+        s->avctx->idct_algo = FF_IDCT_VP3;
+    dsputil_init(&s->dsp, s->avctx);
+    ff_init_scantable(s->dsp.idct_permutation, &s->scantable,ff_zigzag_direct);
+
+    avcodec_set_dimensions(s->avctx, 0, 0);
+
+    for (i=0; i<3; i++)
+        s->frames[i].data[0] = NULL;
+    s->edge_emu_buffer_alloc = NULL;
+
+    s->above_blocks = NULL;
+    s->macroblocks = NULL;
+    s->quantizer = -1;
+    s->deblock_filtering = 1;
+
+    s->filter = NULL;
+
+    if (flip) {
+        s->flip = -1;
+        s->frbi = 2;
+        s->srbi = 0;
+    } else {
+        s->flip = 1;
+        s->frbi = 0;
+        s->srbi = 2;
+    }
+}
+
+int vp56_free(AVCodecContext *avctx)
+{
+    vp56_context_t *s = avctx->priv_data;
+
+    av_free(s->above_blocks);
+    av_free(s->macroblocks);
+    av_free(s->edge_emu_buffer_alloc);
+    if (s->frames[VP56_FRAME_GOLDEN].data[0]
+        && (s->frames[VP56_FRAME_PREVIOUS].data[0]
+            != s->frames[VP56_FRAME_GOLDEN].data[0]))
+        avctx->release_buffer(avctx, &s->frames[VP56_FRAME_GOLDEN]);
+    if (s->frames[VP56_FRAME_PREVIOUS].data[0])
+        avctx->release_buffer(avctx, &s->frames[VP56_FRAME_PREVIOUS]);
+    return 0;
+}
diff --git a/src/libffmpeg/libavcodec/vp56.h b/src/libffmpeg/libavcodec/vp56.h
new file mode 100644
index 000000000..f8b3a8e4b
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp56.h
@@ -0,0 +1,249 @@
+/**
+ * @file vp56.h
+ * VP5 and VP6 compatible video decoder (common features)
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef VP56_H
+#define VP56_H
+
+#include "vp56data.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+
+typedef struct vp56_context vp56_context_t;
+typedef struct vp56_mv vp56_mv_t;
+
+typedef void (*vp56_parse_vector_adjustment_t)(vp56_context_t *s,
+                                               vp56_mv_t *vect);
+typedef int (*vp56_adjust_t)(int v, int t);
+typedef void (*vp56_filter_t)(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+                              int offset1, int offset2, int stride,
+                              vp56_mv_t mv, int mask, int select, int luma);
+typedef void (*vp56_parse_coeff_t)(vp56_context_t *s);
+typedef void (*vp56_default_models_init_t)(vp56_context_t *s);
+typedef void (*vp56_parse_vector_models_t)(vp56_context_t *s);
+typedef void (*vp56_parse_coeff_models_t)(vp56_context_t *s);
+typedef int (*vp56_parse_header_t)(vp56_context_t *s, uint8_t *buf,
+                                   int buf_size, int *golden_frame);
+
+typedef struct {
+    int high;
+    int bits;
+    const uint8_t *buffer;
+    unsigned long code_word;
+} vp56_range_coder_t;
+
+typedef struct {
+    uint8_t not_null_dc;
+    vp56_frame_t ref_frame;
+    DCTELEM dc_coeff;
+} vp56_ref_dc_t;
+
+struct vp56_mv {
+    int x;
+    int y;
+};
+
+typedef struct {
+    uint8_t type;
+    vp56_mv_t mv;
+} vp56_macroblock_t;
+
+struct vp56_context {
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    ScanTable scantable;
+    AVFrame frames[3];
+    uint8_t *edge_emu_buffer_alloc;
+    uint8_t *edge_emu_buffer;
+    vp56_range_coder_t c;
+    int sub_version;
+
+    /* frame info */
+    int plane_width[3];
+    int plane_height[3];
+    int mb_width;   /* number of horizontal MB */
+    int mb_height;  /* number of vertical MB */
+    int block_offset[6];
+
+    int quantizer;
+    uint16_t dequant_dc;
+    uint16_t dequant_ac;
+
+    /* DC predictors management */
+    vp56_ref_dc_t *above_blocks;
+    vp56_ref_dc_t left_block[4];
+    int above_block_idx[6];
+    DCTELEM prev_dc[3][3];    /* [plan][ref_frame] */
+
+    /* blocks / macroblock */
+    vp56_mb_t mb_type;
+    vp56_macroblock_t *macroblocks;
+    DECLARE_ALIGNED_16(DCTELEM, block_coeff[6][64]);
+    uint8_t coeff_reorder[64];       /* used in vp6 only */
+    uint8_t coeff_index_to_pos[64];  /* used in vp6 only */
+
+    /* motion vectors */
+    vp56_mv_t mv[6];  /* vectors for each block in MB */
+    vp56_mv_t vector_candidate[2];
+    int vector_candidate_pos;
+
+    /* filtering hints */
+    int deblock_filtering;
+    int filter_selection;
+    int filter_mode;
+    int max_vector_length;
+    int sample_variance_threshold;
+
+    /* AC models */
+    uint8_t vector_model_sig[2];           /* delta sign */
+    uint8_t vector_model_dct[2];           /* delta coding types */
+    uint8_t vector_model_pdi[2][2];        /* predefined delta init */
+    uint8_t vector_model_pdv[2][7];        /* predefined delta values */
+    uint8_t vector_model_fdv[2][8];        /* 8 bit delta value definition */
+    uint8_t mb_type_model[3][10][10];      /* model for decoding MB type */
+    uint8_t coeff_model_dccv[2][11];       /* DC coeff value */
+    uint8_t coeff_model_ract[2][3][6][11]; /* Run/AC coding type and AC coeff value */
+    uint8_t coeff_model_acct[2][3][3][6][5];/* vp5 only AC coding type for coding group < 3 */
+    uint8_t coeff_model_dcct[2][36][5];    /* DC coeff coding type */
+    uint8_t coeff_model_runv[2][14];       /* run value (vp6 only) */
+    uint8_t mb_types_stats[3][10][2];      /* contextual, next MB type stats */
+    uint8_t coeff_ctx[4][64];              /* used in vp5 only */
+    uint8_t coeff_ctx_last[4];             /* used in vp5 only */
+
+    /* upside-down flipping hints */
+    int flip;  /* are we flipping ? */
+    int frbi;  /* first row block index in MB */
+    int srbi;  /* second row block index in MB */
+    int stride[3];  /* stride for each plan */
+
+    const uint8_t *vp56_coord_div;
+    vp56_parse_vector_adjustment_t parse_vector_adjustment;
+    vp56_adjust_t adjust;
+    vp56_filter_t filter;
+    vp56_parse_coeff_t parse_coeff;
+    vp56_default_models_init_t default_models_init;
+    vp56_parse_vector_models_t parse_vector_models;
+    vp56_parse_coeff_models_t parse_coeff_models;
+    vp56_parse_header_t parse_header;
+};
+
+
+void vp56_init(vp56_context_t *s, AVCodecContext *avctx, int flip);
+int vp56_free(AVCodecContext *avctx);
+void vp56_init_dequant(vp56_context_t *s, int quantizer);
+int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+                      uint8_t *buf, int buf_size);
+
+
+/**
+ * vp56 specific range coder implementation
+ */
+
+static inline void vp56_init_range_decoder(vp56_range_coder_t *c,
+                                           const uint8_t *buf, int buf_size)
+{
+    c->high = 255;
+    c->bits = 8;
+    c->buffer = buf;
+    c->code_word = *c->buffer++ << 8;
+    c->code_word |= *c->buffer++;
+}
+
+static inline int vp56_rac_get_prob(vp56_range_coder_t *c, uint8_t prob)
+{
+    unsigned int low = 1 + (((c->high - 1) * prob) / 256);
+    unsigned int low_shift = low << 8;
+    int bit = c->code_word >= low_shift;
+
+    if (bit) {
+        c->high -= low;
+        c->code_word -= low_shift;
+    } else {
+        c->high = low;
+    }
+
+    /* normalize */
+    while (c->high < 128) {
+        c->high <<= 1;
+        c->code_word <<= 1;
+        if (--c->bits == 0) {
+            c->bits = 8;
+            c->code_word |= *c->buffer++;
+        }
+    }
+    return bit;
+}
+
+static inline int vp56_rac_get(vp56_range_coder_t *c)
+{
+    /* equiprobable */
+    int low = (c->high + 1) >> 1;
+    unsigned int low_shift = low << 8;
+    int bit = c->code_word >= low_shift;
+    if (bit) {
+        c->high = (c->high - low) << 1;
+        c->code_word -= low_shift;
+    } else {
+        c->high = low << 1;
+    }
+
+    /* normalize */
+    c->code_word <<= 1;
+    if (--c->bits == 0) {
+        c->bits = 8;
+        c->code_word |= *c->buffer++;
+    }
+    return bit;
+}
+
+static inline int vp56_rac_gets(vp56_range_coder_t *c, int bits)
+{
+    int value = 0;
+
+    while (bits--) {
+        value = (value << 1) | vp56_rac_get(c);
+    }
+
+    return value;
+}
+
+static inline int vp56_rac_gets_nn(vp56_range_coder_t *c, int bits)
+{
+    int v = vp56_rac_gets(c, 7) << 1;
+    return v + !v;
+}
+
+static inline int vp56_rac_get_tree(vp56_range_coder_t *c,
+                                    const vp56_tree_t *tree,
+                                    const uint8_t *probs)
+{
+    while (tree->val > 0) {
+        if (vp56_rac_get_prob(c, probs[tree->prob_idx]))
+            tree += tree->val;
+        else
+            tree++;
+    }
+    return -tree->val;
+}
+
+#endif /* VP56_H */
diff --git a/src/libffmpeg/libavcodec/vp56data.c b/src/libffmpeg/libavcodec/vp56data.c
new file mode 100644
index 000000000..e75c6d1ce
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp56data.c
@@ -0,0 +1,66 @@
+/**
+ * @file vp56data.c
+ * VP5 and VP6 compatible video decoder (common data)
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "vp56data.h"
+
+const uint8_t vp56_b6to3[] = { 0, 0, 0, 0, 1, 2 };
+const uint8_t vp56_b6to4[] = { 0, 0, 1, 1, 2, 3 };
+
+const uint8_t vp56_coeff_parse_table[6][11] = {
+    { 159,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0 },
+    { 145, 165,   0,   0,   0,   0,   0,   0,   0,   0,   0 },
+    { 140, 148, 173,   0,   0,   0,   0,   0,   0,   0,   0 },
+    { 135, 140, 155, 176,   0,   0,   0,   0,   0,   0,   0 },
+    { 130, 134, 141, 157, 180,   0,   0,   0,   0,   0,   0 },
+    { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254 },
+};
+
+const uint8_t vp56_def_mb_types_stats[3][10][2] = {
+    { {  69, 42 }, {   1,  2 }, {  1,   7 }, {  44, 42 }, {  6, 22 },
+      {   1,  3 }, {   0,  2 }, {  1,   5 }, {   0,  1 }, {  0,  0 }, },
+    { { 229,  8 }, {   1,  1 }, {  0,   8 }, {   0,  0 }, {  0,  0 },
+      {   1,  2 }, {   0,  1 }, {  0,   0 }, {   1,  1 }, {  0,  0 }, },
+    { { 122, 35 }, {   1,  1 }, {  1,   6 }, {  46, 34 }, {  0,  0 },
+      {   1,  2 }, {   0,  1 }, {  0,   1 }, {   1,  1 }, {  0,  0 }, },
+};
+
+const vp56_tree_t vp56_pva_tree[] = {
+    { 8, 0},
+    { 4, 1},
+    { 2, 2}, {-0}, {-1},
+    { 2, 3}, {-2}, {-3},
+    { 4, 4},
+    { 2, 5}, {-4}, {-5},
+    { 2, 6}, {-6}, {-7},
+};
+
+const vp56_tree_t vp56_pc_tree[] = {
+    { 4, 6},
+    { 2, 7}, {-0}, {-1},
+    { 4, 8},
+    { 2, 9}, {-2}, {-3},
+    { 2,10}, {-4}, {-5},
+};
+
+const uint8_t vp56_coeff_bias[] = { 5, 7, 11, 19, 35, 67 };
+const uint8_t vp56_coeff_bit_length[] = { 0, 1, 2, 3, 4, 10 };
diff --git a/src/libffmpeg/libavcodec/vp56data.h b/src/libffmpeg/libavcodec/vp56data.h
new file mode 100644
index 000000000..dbf92dd68
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp56data.h
@@ -0,0 +1,248 @@
+/**
+ * @file vp56data.h
+ * VP5 and VP6 compatible video decoder (common data)
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef VP56DATA_H
+#define VP56DATA_H
+
+#include "common.h"
+
+typedef enum {
+    VP56_FRAME_CURRENT  = 0,
+    VP56_FRAME_PREVIOUS = 1,
+    VP56_FRAME_GOLDEN   = 2,
+} vp56_frame_t;
+
+typedef enum {
+    VP56_MB_INTER_NOVEC_PF = 0,  /**< Inter MB, no vector, from previous frame */
+    VP56_MB_INTRA          = 1,  /**< Intra MB */
+    VP56_MB_INTER_DELTA_PF = 2,  /**< Inter MB, above/left vector + delta, from previous frame */
+    VP56_MB_INTER_V1_PF    = 3,  /**< Inter MB, first vector, from previous frame */
+    VP56_MB_INTER_V2_PF    = 4,  /**< Inter MB, second vector, from previous frame */
+    VP56_MB_INTER_NOVEC_GF = 5,  /**< Inter MB, no vector, from golden frame */
+    VP56_MB_INTER_DELTA_GF = 6,  /**< Inter MB, above/left vector + delta, from golden frame */
+    VP56_MB_INTER_4V       = 7,  /**< Inter MB, 4 vectors, from previous frame */
+    VP56_MB_INTER_V1_GF    = 8,  /**< Inter MB, first vector, from golden frame */
+    VP56_MB_INTER_V2_GF    = 9,  /**< Inter MB, second vector, from golden frame */
+} vp56_mb_t;
+
+typedef struct {
+  int8_t val;
+  int8_t prob_idx;
+} vp56_tree_t;
+
+extern const uint8_t vp56_b6to3[];
+extern const uint8_t vp56_b6to4[];
+extern const uint8_t vp56_coeff_parse_table[6][11];
+extern const uint8_t vp56_def_mb_types_stats[3][10][2];
+extern const vp56_tree_t vp56_pva_tree[];
+extern const vp56_tree_t vp56_pc_tree[];
+extern const uint8_t vp56_coeff_bias[];
+extern const uint8_t vp56_coeff_bit_length[];
+
+static const vp56_frame_t vp56_reference_frame[] = {
+    VP56_FRAME_PREVIOUS,  /* VP56_MB_INTER_NOVEC_PF */
+    VP56_FRAME_CURRENT,   /* VP56_MB_INTRA */
+    VP56_FRAME_PREVIOUS,  /* VP56_MB_INTER_DELTA_PF */
+    VP56_FRAME_PREVIOUS,  /* VP56_MB_INTER_V1_PF */
+    VP56_FRAME_PREVIOUS,  /* VP56_MB_INTER_V2_PF */
+    VP56_FRAME_GOLDEN,    /* VP56_MB_INTER_NOVEC_GF */
+    VP56_FRAME_GOLDEN,    /* VP56_MB_INTER_DELTA_GF */
+    VP56_FRAME_PREVIOUS,  /* VP56_MB_INTER_4V */
+    VP56_FRAME_GOLDEN,    /* VP56_MB_INTER_V1_GF */
+    VP56_FRAME_GOLDEN,    /* VP56_MB_INTER_V2_GF */
+};
+
+static const uint8_t vp56_ac_dequant[64] = {
+    94, 92, 90, 88, 86, 82, 78, 74,
+    70, 66, 62, 58, 54, 53, 52, 51,
+    50, 49, 48, 47, 46, 45, 44, 43,
+    42, 40, 39, 37, 36, 35, 34, 33,
+    32, 31, 30, 29, 28, 27, 26, 25,
+    24, 23, 22, 21, 20, 19, 18, 17,
+    16, 15, 14, 13, 12, 11, 10,  9,
+     8,  7,  6,  5,  4,  3,  2,  1,
+};
+
+static const uint8_t vp56_dc_dequant[64] = {
+    47, 47, 47, 47, 45, 43, 43, 43,
+    43, 43, 42, 41, 41, 40, 40, 40,
+    40, 35, 35, 35, 35, 33, 33, 33,
+    33, 32, 32, 32, 27, 27, 26, 26,
+    25, 25, 24, 24, 23, 23, 19, 19,
+    19, 19, 18, 18, 17, 16, 16, 16,
+    16, 16, 15, 11, 11, 11, 10, 10,
+     9,  8,  7,  5,  3,  3,  2,  2,
+};
+
+static const uint8_t vp56_pre_def_mb_type_stats[16][3][10][2] = {
+  { { {   9, 15 }, {  32, 25 }, {  7,  19 }, {   9, 21 }, {  1, 12 },
+      {  14, 12 }, {   3, 18 }, { 14,  23 }, {   3, 10 }, {  0,  4 }, },
+    { {  41, 22 }, {   1,  0 }, {  1,  31 }, {   0,  0 }, {  0,  0 },
+      {   0,  1 }, {   1,  7 }, {  0,   1 }, {  98, 25 }, {  4, 10 }, },
+    { {   2,  3 }, {   2,  3 }, {  0,   2 }, {   0,  2 }, {  0,  0 },
+      {  11,  4 }, {   1,  4 }, {  0,   2 }, {   3,  2 }, {  0,  4 }, }, },
+  { { {  48, 39 }, {   1,  2 }, { 11,  27 }, {  29, 44 }, {  7, 27 },
+      {   1,  4 }, {   0,  3 }, {  1,   6 }, {   1,  2 }, {  0,  0 }, },
+    { { 123, 37 }, {   6,  4 }, {  1,  27 }, {   0,  0 }, {  0,  0 },
+      {   5,  8 }, {   1,  7 }, {  0,   1 }, {  12, 10 }, {  0,  2 }, },
+    { {  49, 46 }, {   3,  4 }, {  7,  31 }, {  42, 41 }, {  0,  0 },
+      {   2,  6 }, {   1,  7 }, {  1,   4 }, {   2,  4 }, {  0,  1 }, }, },
+  { { {  21, 32 }, {   1,  2 }, {  4,  10 }, {  32, 43 }, {  6, 23 },
+      {   2,  3 }, {   1, 19 }, {  1,   6 }, {  12, 21 }, {  0,  7 }, },
+    { {  26, 14 }, {  14, 12 }, {  0,  24 }, {   0,  0 }, {  0,  0 },
+      {  55, 17 }, {   1,  9 }, {  0,  36 }, {   5,  7 }, {  1,  3 }, },
+    { {  26, 25 }, {   1,  1 }, {  2,  10 }, {  67, 39 }, {  0,  0 },
+      {   1,  1 }, {   0, 14 }, {  0,   2 }, {  31, 26 }, {  1,  6 }, }, },
+  { { {  69, 83 }, {   0,  0 }, {  0,   2 }, {  10, 29 }, {  3, 12 },
+      {   0,  1 }, {   0,  3 }, {  0,   3 }, {   2,  2 }, {  0,  0 }, },
+    { { 209,  5 }, {   0,  0 }, {  0,  27 }, {   0,  0 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   0,  0 }, {  0,  0 }, },
+    { { 103, 46 }, {   1,  2 }, {  2,  10 }, {  33, 42 }, {  0,  0 },
+      {   1,  4 }, {   0,  3 }, {  0,   1 }, {   1,  3 }, {  0,  0 }, }, },
+  { { {  11, 20 }, {   1,  4 }, { 18,  36 }, {  43, 48 }, { 13, 35 },
+      {   0,  2 }, {   0,  5 }, {  3,  12 }, {   1,  2 }, {  0,  0 }, },
+    { {   2,  5 }, {   4,  5 }, {  0, 121 }, {   0,  0 }, {  0,  0 },
+      {   0,  3 }, {   2,  4 }, {  1,   4 }, {   2,  2 }, {  0,  1 }, },
+    { {  14, 31 }, {   9, 13 }, { 14,  54 }, {  22, 29 }, {  0,  0 },
+      {   2,  6 }, {   4, 18 }, {  6,  13 }, {   1,  5 }, {  0,  1 }, }, },
+  { { {  70, 44 }, {   0,  1 }, {  2,  10 }, {  37, 46 }, {  8, 26 },
+      {   0,  2 }, {   0,  2 }, {  0,   2 }, {   0,  1 }, {  0,  0 }, },
+    { { 175,  5 }, {   0,  1 }, {  0,  48 }, {   0,  0 }, {  0,  0 },
+      {   0,  2 }, {   0,  1 }, {  0,   2 }, {   0,  1 }, {  0,  0 }, },
+    { {  85, 39 }, {   0,  0 }, {  1,   9 }, {  69, 40 }, {  0,  0 },
+      {   0,  1 }, {   0,  3 }, {  0,   1 }, {   2,  3 }, {  0,  0 }, }, },
+  { { {   8, 15 }, {   0,  1 }, {  8,  21 }, {  74, 53 }, { 22, 42 },
+      {   0,  1 }, {   0,  2 }, {  0,   3 }, {   1,  2 }, {  0,  0 }, },
+    { {  83,  5 }, {   2,  3 }, {  0, 102 }, {   0,  0 }, {  0,  0 },
+      {   1,  3 }, {   0,  2 }, {  0,   1 }, {   0,  0 }, {  0,  0 }, },
+    { {  31, 28 }, {   0,  0 }, {  3,  14 }, { 130, 34 }, {  0,  0 },
+      {   0,  1 }, {   0,  3 }, {  0,   1 }, {   3,  3 }, {  0,  1 }, }, },
+  { { { 141, 42 }, {   0,  0 }, {  1,   4 }, {  11, 24 }, {  1, 11 },
+      {   0,  1 }, {   0,  1 }, {  0,   2 }, {   0,  0 }, {  0,  0 }, },
+    { { 233,  6 }, {   0,  0 }, {  0,   8 }, {   0,  0 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   0 }, {   0,  1 }, {  0,  0 }, },
+    { { 171, 25 }, {   0,  0 }, {  1,   5 }, {  25, 21 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   0 }, {   0,  0 }, {  0,  0 }, }, },
+  { { {   8, 19 }, {   4, 10 }, { 24,  45 }, {  21, 37 }, {  9, 29 },
+      {   0,  3 }, {   1,  7 }, { 11,  25 }, {   0,  2 }, {  0,  1 }, },
+    { {  34, 16 }, { 112, 21 }, {  1,  28 }, {   0,  0 }, {  0,  0 },
+      {   6,  8 }, {   1,  7 }, {  0,   3 }, {   2,  5 }, {  0,  2 }, },
+    { {  17, 21 }, {  68, 29 }, {  6,  15 }, {  13, 22 }, {  0,  0 },
+      {   6, 12 }, {   3, 14 }, {  4,  10 }, {   1,  7 }, {  0,  3 }, }, },
+  { { {  46, 42 }, {   0,  1 }, {  2,  10 }, {  54, 51 }, { 10, 30 },
+      {   0,  2 }, {   0,  2 }, {  0,   1 }, {   0,  1 }, {  0,  0 }, },
+    { { 159, 35 }, {   2,  2 }, {  0,  25 }, {   0,  0 }, {  0,  0 },
+      {   3,  6 }, {   0,  5 }, {  0,   1 }, {   4,  4 }, {  0,  1 }, },
+    { {  51, 39 }, {   0,  1 }, {  2,  12 }, {  91, 44 }, {  0,  0 },
+      {   0,  2 }, {   0,  3 }, {  0,   1 }, {   2,  3 }, {  0,  1 }, }, },
+  { { {  28, 32 }, {   0,  0 }, {  3,  10 }, {  75, 51 }, { 14, 33 },
+      {   0,  1 }, {   0,  2 }, {  0,   1 }, {   1,  2 }, {  0,  0 }, },
+    { {  75, 39 }, {   5,  7 }, {  2,  48 }, {   0,  0 }, {  0,  0 },
+      {   3, 11 }, {   2, 16 }, {  1,   4 }, {   7, 10 }, {  0,  2 }, },
+    { {  81, 25 }, {   0,  0 }, {  2,   9 }, { 106, 26 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   1,  1 }, {  0,  0 }, }, },
+  { { { 100, 46 }, {   0,  1 }, {  3,   9 }, {  21, 37 }, {  5, 20 },
+      {   0,  1 }, {   0,  2 }, {  1,   2 }, {   0,  1 }, {  0,  0 }, },
+    { { 212, 21 }, {   0,  1 }, {  0,   9 }, {   0,  0 }, {  0,  0 },
+      {   1,  2 }, {   0,  2 }, {  0,   0 }, {   2,  2 }, {  0,  0 }, },
+    { { 140, 37 }, {   0,  1 }, {  1,   8 }, {  24, 33 }, {  0,  0 },
+      {   1,  2 }, {   0,  2 }, {  0,   1 }, {   1,  2 }, {  0,  0 }, }, },
+  { { {  27, 29 }, {   0,  1 }, {  9,  25 }, {  53, 51 }, { 12, 34 },
+      {   0,  1 }, {   0,  3 }, {  1,   5 }, {   0,  2 }, {  0,  0 }, },
+    { {   4,  2 }, {   0,  0 }, {  0, 172 }, {   0,  0 }, {  0,  0 },
+      {   0,  1 }, {   0,  2 }, {  0,   0 }, {   2,  0 }, {  0,  0 }, },
+    { {  14, 23 }, {   1,  3 }, { 11,  53 }, {  90, 31 }, {  0,  0 },
+      {   0,  3 }, {   1,  5 }, {  2,   6 }, {   1,  2 }, {  0,  0 }, }, },
+  { { {  80, 38 }, {   0,  0 }, {  1,   4 }, {  69, 33 }, {  5, 16 },
+      {   0,  1 }, {   0,  1 }, {  0,   0 }, {   0,  1 }, {  0,  0 }, },
+    { { 187, 22 }, {   1,  1 }, {  0,  17 }, {   0,  0 }, {  0,  0 },
+      {   3,  6 }, {   0,  4 }, {  0,   1 }, {   4,  4 }, {  0,  1 }, },
+    { { 123, 29 }, {   0,  0 }, {  1,   7 }, {  57, 30 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   0,  1 }, {  0,  0 }, }, },
+  { { {  16, 20 }, {   0,  0 }, {  2,   8 }, { 104, 49 }, { 15, 33 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   1,  1 }, {  0,  0 }, },
+    { { 133,  6 }, {   1,  2 }, {  1,  70 }, {   0,  0 }, {  0,  0 },
+      {   0,  2 }, {   0,  4 }, {  0,   3 }, {   1,  1 }, {  0,  0 }, },
+    { {  13, 14 }, {   0,  0 }, {  4,  20 }, { 175, 20 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   1,  1 }, {  0,  0 }, }, },
+  { { { 194, 16 }, {   0,  0 }, {  1,   1 }, {   1,  9 }, {  1,  3 },
+      {   0,  0 }, {   0,  1 }, {  0,   1 }, {   0,  0 }, {  0,  0 }, },
+    { { 251,  1 }, {   0,  0 }, {  0,   2 }, {   0,  0 }, {  0,  0 },
+      {   0,  0 }, {   0,  0 }, {  0,   0 }, {   0,  0 }, {  0,  0 }, },
+    { { 202, 23 }, {   0,  0 }, {  1,   3 }, {   2,  9 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   0,  0 }, {  0,  0 }, }, },
+};
+
+static const uint8_t vp56_filter_threshold[] = {
+    14, 14, 13, 13, 12, 12, 10, 10,
+    10, 10,  8,  8,  8,  8,  8,  8,
+     8,  8,  8,  8,  8,  8,  8,  8,
+     8,  8,  8,  8,  8,  8,  8,  8,
+     8,  8,  8,  8,  7,  7,  7,  7,
+     7,  7,  6,  6,  6,  6,  6,  6,
+     5,  5,  5,  5,  4,  4,  4,  4,
+     4,  4,  4,  3,  3,  3,  3,  2,
+};
+
+static const uint8_t vp56_mb_type_model_model[] = {
+    171, 83, 199, 140, 125, 104,
+};
+
+static const vp56_tree_t vp56_pmbtm_tree[] = {
+    { 4, 0},
+    { 2, 1}, {-8}, {-4},
+    { 8, 2},
+    { 6, 3},
+    { 4, 4},
+    { 2, 5}, {-24}, {-20}, {-16}, {-12}, {-0},
+};
+
+static const vp56_tree_t vp56_pmbt_tree[] = {
+    { 8, 1},
+    { 4, 2},
+    { 2, 4}, {-VP56_MB_INTER_NOVEC_PF}, {-VP56_MB_INTER_DELTA_PF},
+    { 2, 5}, {-VP56_MB_INTER_V1_PF},    {-VP56_MB_INTER_V2_PF},
+    { 4, 3},
+    { 2, 6}, {-VP56_MB_INTRA},          {-VP56_MB_INTER_4V},
+    { 4, 7},
+    { 2, 8}, {-VP56_MB_INTER_NOVEC_GF}, {-VP56_MB_INTER_DELTA_GF},
+    { 2, 9}, {-VP56_MB_INTER_V1_GF},    {-VP56_MB_INTER_V2_GF},
+};
+
+/* relative pos of surrounding blocks, from closest to farthest */
+static const int8_t vp56_candidate_predictor_pos[12][2] = {
+    {  0, -1 },
+    { -1,  0 },
+    { -1, -1 },
+    {  1, -1 },
+    {  0, -2 },
+    { -2,  0 },
+    { -2, -1 },
+    { -1, -2 },
+    {  1, -2 },
+    {  2, -1 },
+    { -2, -2 },
+    {  2, -2 },
+};
+
+#endif /* VP56DATA */
diff --git a/src/libffmpeg/libavcodec/vp5data.h b/src/libffmpeg/libavcodec/vp5data.h
new file mode 100644
index 000000000..effc17c2c
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp5data.h
@@ -0,0 +1,173 @@
+/**
+ * @file vp5data.h
+ * VP5 compatible video decoder
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef VP5DATA_H
+#define VP5DATA_H
+
+static const uint8_t vp5_coeff_groups[] = {
+    -1, 0, 1, 1, 2, 1, 1, 2,
+     2, 1, 1, 2, 2, 2, 1, 2,
+     2, 2, 2, 2, 1, 1, 2, 2,
+     3, 3, 4, 3, 4, 4, 4, 3,
+     3, 3, 3, 3, 4, 3, 3, 3,
+     4, 4, 4, 4, 4, 3, 3, 4,
+     4, 4, 3, 4, 4, 4, 4, 4,
+     4, 4, 5, 5, 5, 5, 5, 5,
+};
+
+static const uint8_t vp5_vmc_pct[2][11] = {
+    { 243, 220, 251, 253, 237, 232, 241, 245, 247, 251, 253 },
+    { 235, 211, 246, 249, 234, 231, 248, 249, 252, 252, 254 },
+};
+
+static const uint8_t vp5_dccv_pct[2][11] = {
+    { 146, 197, 181, 207, 232, 243, 238, 251, 244, 250, 249 },
+    { 179, 219, 214, 240, 250, 254, 244, 254, 254, 254, 254 },
+};
+
+static const uint8_t vp5_ract_pct[3][2][6][11] = {
+    { { { 227, 246, 230, 247, 244, 254, 254, 254, 254, 254, 254 },
+        { 202, 254, 209, 231, 231, 249, 249, 253, 254, 254, 254 },
+        { 206, 254, 225, 242, 241, 251, 253, 254, 254, 254, 254 },
+        { 235, 254, 241, 253, 252, 254, 254, 254, 254, 254, 254 },
+        { 234, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } },
+      { { 240, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 238, 254, 240, 253, 254, 254, 254, 254, 254, 254, 254 },
+        { 244, 254, 251, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } } },
+    { { { 206, 203, 227, 239, 247, 254, 253, 254, 254, 254, 254 },
+        { 207, 199, 220, 236, 243, 252, 252, 254, 254, 254, 254 },
+        { 212, 219, 230, 243, 244, 253, 252, 254, 254, 254, 254 },
+        { 236, 237, 247, 252, 253, 254, 254, 254, 254, 254, 254 },
+        { 240, 240, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } },
+      { { 230, 233, 249, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 238, 238, 250, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 248, 251, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } } },
+    { { { 225, 239, 227, 231, 244, 253, 243, 254, 254, 253, 254 },
+        { 232, 234, 224, 228, 242, 249, 242, 252, 251, 251, 254 },
+        { 235, 249, 238, 240, 251, 254, 249, 254, 253, 253, 254 },
+        { 249, 253, 251, 250, 254, 254, 254, 254, 254, 254, 254 },
+        { 251, 250, 249, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } },
+      { { 243, 244, 250, 250, 254, 254, 254, 254, 254, 254, 254 },
+        { 249, 248, 250, 253, 254, 254, 254, 254, 254, 254, 254 },
+        { 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } } },
+};
+
+static const int16_t vp5_dccv_lc[5][36][2] = {
+    { {154,  61}, {141,  54}, { 90,  45}, { 54,  34}, { 54,  13}, {128, 109},
+      {136,  54}, {148,  45}, { 92,  41}, { 54,  33}, { 51,  15}, { 87, 113},
+      { 87,  44}, { 97,  40}, { 67,  36}, { 46,  29}, { 41,  15}, { 64,  80},
+      { 59,  33}, { 61,  31}, { 51,  28}, { 44,  22}, { 33,  12}, { 49,  63},
+      { 69,  12}, { 59,  16}, { 46,  14}, { 31,  13}, { 26,   6}, { 92,  26},
+      {128, 108}, { 77, 119}, { 54,  84}, { 26,  71}, { 87,  19}, { 95, 155} },
+    { {154,   4}, {182,   0}, {159,  -8}, {128,  -5}, {143,  -5}, {187,  55},
+      {182,   0}, {228,  -3}, {187,  -7}, {174,  -9}, {189, -11}, {169,  79},
+      {161,  -9}, {192,  -8}, {187,  -9}, {169, -10}, {136,  -9}, {184,  40},
+      {164, -11}, {179, -10}, {174, -10}, {161, -10}, {115,  -7}, {197,  20},
+      {195, -11}, {195, -11}, {146, -10}, {110,  -6}, { 95,  -4}, {195,  39},
+      {182,  55}, {172,  77}, {177,  37}, {169,  29}, {172,  52}, { 92, 162} },
+    { {174,  80}, {164,  80}, { 95,  80}, { 46,  66}, { 56,  24}, { 36, 193},
+      {164,  80}, {166,  77}, {105,  76}, { 49,  68}, { 46,  31}, { 49, 186},
+      { 97,  78}, {110,  74}, { 72,  72}, { 44,  60}, { 33,  30}, { 69, 131},
+      { 61,  61}, { 69,  63}, { 51,  57}, { 31,  48}, { 26,  27}, { 64,  89},
+      { 67,  23}, { 51,  32}, { 36,  33}, { 26,  28}, { 20,  12}, { 44,  68},
+      { 26, 197}, { 41, 189}, { 61, 129}, { 28, 103}, { 49,  52}, {-12, 245} },
+    { {102, 141}, { 79, 166}, { 72, 162}, { 97, 125}, {179,   4}, {307,   0},
+      { 72, 168}, { 69, 175}, { 84, 160}, {105, 127}, {148,  34}, {310,   0},
+      { 84, 151}, { 82, 161}, { 87, 153}, { 87, 135}, {115,  51}, {317,   0},
+      { 97, 125}, {102, 131}, {105, 125}, { 87, 122}, { 84,  64}, { 54, 184},
+      {166,  18}, {146,  43}, {125,  51}, { 90,  64}, { 95,   7}, { 38, 154},
+      {294,   0}, { 13, 225}, { 10, 225}, { 67, 168}, {  0, 167}, {161,  94} },
+    { {172,  76}, {172,  75}, {136,  80}, { 64,  98}, { 74,  67}, {315,   0},
+      {169,  76}, {207,  56}, {164,  66}, { 97,  80}, { 67,  72}, {328,   0},
+      {136,  80}, {187,  53}, {154,  62}, { 72,  85}, { -2, 105}, {305,   0},
+      { 74,  91}, {128,  64}, {113,  64}, { 61,  77}, { 41,  75}, {259,   0},
+      { 46,  84}, { 51,  81}, { 28,  89}, { 31,  78}, { 23,  77}, {202,   0},
+      {323,   0}, {323,   0}, {300,   0}, {236,   0}, {195,   0}, {328,   0} },
+};
+
+static const int16_t vp5_ract_lc[3][3][5][6][2] = {
+    { { { {276,  0}, {238,  0}, {195,  0}, {156,  0}, {113,  0}, {274,  0} },
+        { {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1} },
+        { {192, 59}, {182, 50}, {141, 48}, {110, 40}, { 92, 19}, {125,128} },
+        { {169, 87}, {169, 83}, {184, 62}, {220, 16}, {184,  0}, {264,  0} },
+        { {212, 40}, {212, 36}, {169, 49}, {174, 27}, {  8,120}, {182, 71} } },
+      { { {259, 10}, {197, 19}, {143, 22}, {123, 16}, {110,  8}, {133, 88} },
+        { {  0,  1}, {256,  0}, {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1} },
+        { {207, 46}, {187, 50}, { 97, 83}, { 23,100}, { 41, 56}, { 56,188} },
+        { {166, 90}, {146,108}, {161, 88}, {136, 95}, {174,  0}, {266,  0} },
+        { {264,  7}, {243, 18}, {184, 43}, {-14,154}, { 20,112}, { 20,199} } },
+      { { {230, 26}, {197, 22}, {159, 20}, {146, 12}, {136,  4}, { 54,162} },
+        { {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1} },
+        { {192, 59}, {156, 72}, { 84,101}, { 49,101}, { 79, 47}, { 79,167} },
+        { {138,115}, {136,116}, {166, 80}, {238,  0}, {195,  0}, {261,  0} },
+        { {225, 33}, {205, 42}, {159, 61}, { 79, 96}, { 92, 66}, { 28,195} } },
+    }, {
+      { { {200, 37}, {197, 18}, {159, 13}, {143,  7}, {102,  5}, {123,126} },
+        { {197,  3}, {220, -9}, {210,-12}, {187, -6}, {151, -2}, {174, 80} },
+        { {200, 53}, {187, 47}, {159, 40}, {118, 38}, {100, 18}, {141,111} },
+        { {179, 78}, {166, 86}, {197, 50}, {207, 27}, {187,  0}, {115,139} },
+        { {218, 34}, {220, 29}, {174, 46}, {128, 61}, { 54, 89}, {187, 65} } },
+      { { {238, 14}, {197, 18}, {125, 26}, { 90, 25}, { 82, 13}, {161, 86} },
+        { {189,  1}, {205, -2}, {156, -4}, {143, -4}, {146, -4}, {172, 72} },
+        { {230, 31}, {192, 45}, {102, 76}, { 38, 85}, { 56, 41}, { 64,173} },
+        { {166, 91}, {141,111}, {128,116}, {118,109}, {177,  0}, { 23,222} },
+        { {253, 14}, {236, 21}, {174, 49}, { 33,118}, { 44, 93}, { 23,187} } },
+      { { {218, 28}, {179, 28}, {118, 35}, { 95, 30}, { 72, 24}, {128,108} },
+        { {187,  1}, {174, -1}, {125, -1}, {110, -1}, {108, -1}, {202, 52} },
+        { {197, 53}, {146, 75}, { 46,118}, { 33,103}, { 64, 50}, {118,126} },
+        { {138,114}, {128,122}, {161, 86}, {243, -6}, {195,  0}, { 38,210} },
+        { {215, 39}, {179, 58}, { 97,101}, { 95, 85}, { 87, 70}, { 69,152} } },
+    }, {
+      { { {236, 24}, {205, 18}, {172, 12}, {154,  6}, {125,  1}, {169, 75} },
+        { {187,  4}, {230, -2}, {228, -4}, {236, -4}, {241, -2}, {192, 66} },
+        { {200, 46}, {187, 42}, {159, 34}, {136, 25}, {105, 10}, {179, 62} },
+        { {207, 55}, {192, 63}, {192, 54}, {195, 36}, {177,  1}, {143, 98} },
+        { {225, 27}, {207, 34}, {200, 30}, {131, 57}, { 97, 60}, {197, 45} } },
+      { { {271,  8}, {218, 13}, {133, 19}, { 90, 19}, { 72,  7}, {182, 51} },
+        { {179,  1}, {225, -1}, {154, -2}, {110, -1}, { 92,  0}, {195, 41} },
+        { {241, 26}, {189, 40}, { 82, 64}, { 33, 60}, { 67, 17}, {120, 94} },
+        { {192, 68}, {151, 94}, {146, 90}, {143, 72}, {161,  0}, {113,128} },
+        { {256, 12}, {218, 29}, {166, 48}, { 44, 99}, { 31, 87}, {148, 78} } },
+      { { {238, 20}, {184, 22}, {113, 27}, { 90, 22}, { 74,  9}, {192, 37} },
+        { {184,  0}, {215, -1}, {141, -1}, { 97,  0}, { 49,  0}, {264, 13} },
+        { {182, 51}, {138, 61}, { 95, 63}, { 54, 59}, { 64, 25}, {200, 45} },
+        { {179, 75}, {156, 87}, {174, 65}, {177, 44}, {174,  0}, {164, 85} },
+        { {195, 45}, {148, 65}, {105, 79}, { 95, 72}, { 87, 60}, {169, 63} } },
+    }
+};
+
+static const uint8_t vp5_coord_div[] = { 2, 2, 2, 2, 4, 4 };
+
+#endif /* VP5DATA_H */
diff --git a/src/libffmpeg/libavcodec/vp6.c b/src/libffmpeg/libavcodec/vp6.c
new file mode 100644
index 000000000..381fcc8ee
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp6.c
@@ -0,0 +1,537 @@
+/**
+ * @file vp6.c
+ * VP6 compatible video decoder
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ *
+ * The VP6F decoder accept an optional 1 byte extradata. It is composed of:
+ *  - upper 4bits: difference between encoded width and visible width
+ *  - lower 4bits: difference between encoded height and visible height
+ */
+
+#include <stdlib.h>
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "bitstream.h"
+#include "mpegvideo.h"
+
+#include "vp56.h"
+#include "vp56data.h"
+#include "vp6data.h"
+
+
+static int vp6_parse_header(vp56_context_t *s, uint8_t *buf, int buf_size,
+                            int *golden_frame)
+{
+    vp56_range_coder_t *c = &s->c;
+    int parse_filter_info = 0;
+    int vrt_shift = 0;
+    int sub_version;
+    int rows, cols;
+    int res = 1;
+
+    if (buf[0] & 1)
+        return 0;
+
+    s->frames[VP56_FRAME_CURRENT].key_frame = !(buf[0] & 0x80);
+    vp56_init_dequant(s, (buf[0] >> 1) & 0x3F);
+
+    if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+        sub_version = buf[1] >> 3;
+        if (sub_version > 8)
+            return 0;
+        if ((buf[1] & 0x06) != 0x06)
+            return 0;
+        if (buf[1] & 1) {
+            av_log(s->avctx, AV_LOG_ERROR, "interlacing not supported\n");
+            return 0;
+        }
+
+        rows = buf[2];  /* number of stored macroblock rows */
+        cols = buf[3];  /* number of stored macroblock cols */
+        /* buf[4] is number of displayed macroblock rows */
+        /* buf[5] is number of displayed macroblock cols */
+
+        if (16*cols != s->avctx->coded_width ||
+            16*rows != s->avctx->coded_height) {
+            avcodec_set_dimensions(s->avctx, 16*cols, 16*rows);
+            if (s->avctx->extradata_size == 1) {
+                s->avctx->width  -= s->avctx->extradata[0] >> 4;
+                s->avctx->height -= s->avctx->extradata[0] & 0x0F;
+            }
+            res = 2;
+        }
+
+        vp56_init_range_decoder(c, buf+6, buf_size-6);
+        vp56_rac_gets(c, 2);
+
+        parse_filter_info = 1;
+        if (sub_version < 8)
+            vrt_shift = 5;
+        s->sub_version = sub_version;
+    } else {
+        if (!s->sub_version)
+            return 0;
+
+        vp56_init_range_decoder(c, buf+1, buf_size-1);
+
+        *golden_frame = vp56_rac_get(c);
+        s->deblock_filtering = vp56_rac_get(c);
+        if (s->deblock_filtering)
+            vp56_rac_get(c);
+        if (s->sub_version > 7)
+            parse_filter_info = vp56_rac_get(c);
+    }
+
+    if (parse_filter_info) {
+        if (vp56_rac_get(c)) {
+            s->filter_mode = 2;
+            s->sample_variance_threshold = vp56_rac_gets(c, 5) << vrt_shift;
+            s->max_vector_length = 2 << vp56_rac_gets(c, 3);
+        } else if (vp56_rac_get(c)) {
+            s->filter_mode = 1;
+        } else {
+            s->filter_mode = 0;
+        }
+        if (s->sub_version > 7)
+            s->filter_selection = vp56_rac_gets(c, 4);
+        else
+            s->filter_selection = 16;
+    }
+
+    vp56_rac_get(c);
+    return res;
+}
+
+static void vp6_coeff_order_table_init(vp56_context_t *s)
+{
+    int i, pos, idx = 1;
+
+    s->coeff_index_to_pos[0] = 0;
+    for (i=0; i<16; i++)
+        for (pos=1; pos<64; pos++)
+            if (s->coeff_reorder[pos] == i)
+                s->coeff_index_to_pos[idx++] = pos;
+}
+
+static void vp6_default_models_init(vp56_context_t *s)
+{
+    s->vector_model_dct[0] = 0xA2;
+    s->vector_model_dct[1] = 0xA4;
+    s->vector_model_sig[0] = 0x80;
+    s->vector_model_sig[1] = 0x80;
+
+    memcpy(s->mb_types_stats, vp56_def_mb_types_stats, sizeof(s->mb_types_stats));
+    memcpy(s->vector_model_fdv, vp6_def_fdv_vector_model, sizeof(s->vector_model_fdv));
+    memcpy(s->vector_model_pdv, vp6_def_pdv_vector_model, sizeof(s->vector_model_pdv));
+    memcpy(s->coeff_model_runv, vp6_def_runv_coeff_model, sizeof(s->coeff_model_runv));
+    memcpy(s->coeff_reorder, vp6_def_coeff_reorder, sizeof(s->coeff_reorder));
+
+    vp6_coeff_order_table_init(s);
+}
+
+static void vp6_parse_vector_models(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    int comp, node;
+
+    for (comp=0; comp<2; comp++) {
+        if (vp56_rac_get_prob(c, vp6_sig_dct_pct[comp][0]))
+            s->vector_model_dct[comp] = vp56_rac_gets_nn(c, 7);
+        if (vp56_rac_get_prob(c, vp6_sig_dct_pct[comp][1]))
+            s->vector_model_sig[comp] = vp56_rac_gets_nn(c, 7);
+    }
+
+    for (comp=0; comp<2; comp++)
+        for (node=0; node<7; node++)
+            if (vp56_rac_get_prob(c, vp6_pdv_pct[comp][node]))
+                s->vector_model_pdv[comp][node] = vp56_rac_gets_nn(c, 7);
+
+    for (comp=0; comp<2; comp++)
+        for (node=0; node<8; node++)
+            if (vp56_rac_get_prob(c, vp6_fdv_pct[comp][node]))
+                s->vector_model_fdv[comp][node] = vp56_rac_gets_nn(c, 7);
+}
+
+static void vp6_parse_coeff_models(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    int def_prob[11];
+    int node, cg, ctx, pos;
+    int ct;    /* code type */
+    int pt;    /* plane type (0 for Y, 1 for U or V) */
+
+    memset(def_prob, 0x80, sizeof(def_prob));
+
+    for (pt=0; pt<2; pt++)
+        for (node=0; node<11; node++)
+            if (vp56_rac_get_prob(c, vp6_dccv_pct[pt][node])) {
+                def_prob[node] = vp56_rac_gets_nn(c, 7);
+                s->coeff_model_dccv[pt][node] = def_prob[node];
+            } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+                s->coeff_model_dccv[pt][node] = def_prob[node];
+            }
+
+    if (vp56_rac_get(c)) {
+        for (pos=1; pos<64; pos++)
+            if (vp56_rac_get_prob(c, vp6_coeff_reorder_pct[pos]))
+                s->coeff_reorder[pos] = vp56_rac_gets(c, 4);
+        vp6_coeff_order_table_init(s);
+    }
+
+    for (cg=0; cg<2; cg++)
+        for (node=0; node<14; node++)
+            if (vp56_rac_get_prob(c, vp6_runv_pct[cg][node]))
+                s->coeff_model_runv[cg][node] = vp56_rac_gets_nn(c, 7);
+
+    for (ct=0; ct<3; ct++)
+        for (pt=0; pt<2; pt++)
+            for (cg=0; cg<6; cg++)
+                for (node=0; node<11; node++)
+                    if (vp56_rac_get_prob(c, vp6_ract_pct[ct][pt][cg][node])) {
+                        def_prob[node] = vp56_rac_gets_nn(c, 7);
+                        s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+                    } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+                        s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+                    }
+
+    /* coeff_model_dcct is a linear combination of coeff_model_dccv */
+    for (pt=0; pt<2; pt++)
+        for (ctx=0; ctx<3; ctx++)
+            for (node=0; node<5; node++)
+                s->coeff_model_dcct[pt][ctx][node] = clip(((s->coeff_model_dccv[pt][node] * vp6_dccv_lc[ctx][node][0] + 128) >> 8) + vp6_dccv_lc[ctx][node][1], 1, 255);
+}
+
+static void vp6_parse_vector_adjustment(vp56_context_t *s, vp56_mv_t *vect)
+{
+    vp56_range_coder_t *c = &s->c;
+    int comp;
+
+    *vect = (vp56_mv_t) {0,0};
+    if (s->vector_candidate_pos < 2)
+        *vect = s->vector_candidate[0];
+
+    for (comp=0; comp<2; comp++) {
+        int i, delta = 0;
+
+        if (vp56_rac_get_prob(c, s->vector_model_dct[comp])) {
+            static const uint8_t prob_order[] = {0, 1, 2, 7, 6, 5, 4};
+            for (i=0; i<sizeof(prob_order); i++) {
+                int j = prob_order[i];
+                delta |= vp56_rac_get_prob(c, s->vector_model_fdv[comp][j])<<j;
+            }
+            if (delta & 0xF0)
+                delta |= vp56_rac_get_prob(c, s->vector_model_fdv[comp][3])<<3;
+            else
+                delta |= 8;
+        } else {
+            delta = vp56_rac_get_tree(c, vp56_pva_tree,
+                                      s->vector_model_pdv[comp]);
+        }
+
+        if (delta && vp56_rac_get_prob(c, s->vector_model_sig[comp]))
+            delta = -delta;
+
+        if (!comp)
+            vect->x += delta;
+        else
+            vect->y += delta;
+    }
+}
+
+static void vp6_parse_coeff(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    uint8_t *permute = s->scantable.permutated;
+    uint8_t *model, *model2, *model3;
+    int coeff, sign, coeff_idx;
+    int b, i, cg, idx, ctx;
+    int pt = 0;    /* plane type (0 for Y, 1 for U or V) */
+
+    for (b=0; b<6; b++) {
+        int ct = 1;    /* code type */
+        int run = 1;
+
+        if (b > 3) pt = 1;
+
+        ctx = s->left_block[vp56_b6to4[b]].not_null_dc
+              + s->above_blocks[s->above_block_idx[b]].not_null_dc;
+        model = s->coeff_model_dccv[pt];
+        model2 = s->coeff_model_dcct[pt][ctx];
+
+        for (coeff_idx=0; coeff_idx<64; ) {
+            if ((coeff_idx>1 && ct==0) || vp56_rac_get_prob(c, model2[0])) {
+                /* parse a coeff */
+                if (coeff_idx == 0) {
+                    s->left_block[vp56_b6to4[b]].not_null_dc = 1;
+                    s->above_blocks[s->above_block_idx[b]].not_null_dc = 1;
+                }
+
+                if (vp56_rac_get_prob(c, model2[2])) {
+                    if (vp56_rac_get_prob(c, model2[3])) {
+                        idx = vp56_rac_get_tree(c, vp56_pc_tree, model);
+                        coeff = vp56_coeff_bias[idx];
+                        for (i=vp56_coeff_bit_length[idx]; i>=0; i--)
+                            coeff += vp56_rac_get_prob(c, vp56_coeff_parse_table[idx][i]) << i;
+                    } else {
+                        if (vp56_rac_get_prob(c, model2[4]))
+                            coeff = 3 + vp56_rac_get_prob(c, model[5]);
+                        else
+                            coeff = 2;
+                    }
+                    ct = 2;
+                } else {
+                    ct = 1;
+                    coeff = 1;
+                }
+                sign = vp56_rac_get(c);
+                coeff = (coeff ^ -sign) + sign;
+                if (coeff_idx)
+                    coeff *= s->dequant_ac;
+                idx = s->coeff_index_to_pos[coeff_idx];
+                s->block_coeff[b][permute[idx]] = coeff;
+                run = 1;
+            } else {
+                /* parse a run */
+                ct = 0;
+                if (coeff_idx == 0) {
+                    s->left_block[vp56_b6to4[b]].not_null_dc = 0;
+                    s->above_blocks[s->above_block_idx[b]].not_null_dc = 0;
+                } else {
+                    if (!vp56_rac_get_prob(c, model2[1]))
+                        break;
+
+                    model3 = s->coeff_model_runv[coeff_idx >= 6];
+                    run = vp56_rac_get_tree(c, vp6_pcr_tree, model3);
+                    if (!run)
+                        for (run=9, i=0; i<6; i++)
+                            run += vp56_rac_get_prob(c, model3[i+8]) << i;
+                }
+            }
+
+            cg = vp6_coeff_groups[coeff_idx+=run];
+            model = model2 = s->coeff_model_ract[pt][ct][cg];
+        }
+    }
+}
+
+static int vp6_adjust(int v, int t)
+{
+    int V = v, s = v >> 31;
+    V ^= s;
+    V -= s;
+    if (V-t-1 >= (unsigned)(t-1))
+        return v;
+    V = 2*t - V;
+    V += s;
+    V ^= s;
+    return V;
+}
+
+static int vp6_block_variance(uint8_t *src, int stride)
+{
+    int sum = 0, square_sum = 0;
+    int y, x;
+
+    for (y=0; y<8; y+=2) {
+        for (x=0; x<8; x+=2) {
+            sum += src[x];
+            square_sum += src[x]*src[x];
+        }
+        src += 2*stride;
+    }
+    return (16*square_sum - sum*sum) >> 8;
+}
+
+static void vp6_filter_hv2(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+                           int stride, int delta, int16_t weight)
+{
+    s->dsp.put_pixels_tab[1][0](dst, src, stride, 8);
+    s->dsp.biweight_h264_pixels_tab[3](dst, src+delta, stride, 2,
+                                       8-weight, weight, 0);
+}
+
+static void vp6_filter_hv4(uint8_t *dst, uint8_t *src, int stride,
+                           int delta, const int16_t *weights)
+{
+    int x, y;
+
+    for (y=0; y<8; y++) {
+        for (x=0; x<8; x++) {
+            dst[x] = clip_uint8((  src[x-delta  ] * weights[0]
+                                 + src[x        ] * weights[1]
+                                 + src[x+delta  ] * weights[2]
+                                 + src[x+2*delta] * weights[3] + 64) >> 7);
+        }
+        src += stride;
+        dst += stride;
+    }
+}
+
+static void vp6_filter_diag2(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+                             int stride, int h_weight, int v_weight)
+{
+    uint8_t *tmp = s->edge_emu_buffer+16;
+    int x, xmax;
+
+    s->dsp.put_pixels_tab[1][0](tmp, src, stride, 8);
+    s->dsp.biweight_h264_pixels_tab[3](tmp, src+1, stride, 2,
+                                       8-h_weight, h_weight, 0);
+    /* we need a 8x9 block to do vertical filter, so compute one more line */
+    for (x=8*stride, xmax=x+8; x<xmax; x++)
+        tmp[x] = (src[x]*(8-h_weight) + src[x+1]*h_weight + 4) >> 3;
+
+    s->dsp.put_pixels_tab[1][0](dst, tmp, stride, 8);
+    s->dsp.biweight_h264_pixels_tab[3](dst, tmp+stride, stride, 2,
+                                       8-v_weight, v_weight, 0);
+}
+
+static void vp6_filter_diag4(uint8_t *dst, uint8_t *src, int stride,
+                             const int16_t *h_weights,const int16_t *v_weights)
+{
+    int x, y;
+    int tmp[8*11];
+    int *t = tmp;
+
+    src -= stride;
+
+    for (y=0; y<11; y++) {
+        for (x=0; x<8; x++) {
+            t[x] = clip_uint8((  src[x-1] * h_weights[0]
+                               + src[x  ] * h_weights[1]
+                               + src[x+1] * h_weights[2]
+                               + src[x+2] * h_weights[3] + 64) >> 7);
+        }
+        src += stride;
+        t += 8;
+    }
+
+    t = tmp + 8;
+    for (y=0; y<8; y++) {
+        for (x=0; x<8; x++) {
+            dst[x] = clip_uint8((  t[x-8 ] * v_weights[0]
+                                 + t[x   ] * v_weights[1]
+                                 + t[x+8 ] * v_weights[2]
+                                 + t[x+16] * v_weights[3] + 64) >> 7);
+        }
+        dst += stride;
+        t += 8;
+    }
+}
+
+static void vp6_filter(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+                       int offset1, int offset2, int stride,
+                       vp56_mv_t mv, int mask, int select, int luma)
+{
+    int filter4 = 0;
+    int x8 = mv.x & mask;
+    int y8 = mv.y & mask;
+
+    if (luma) {
+        x8 *= 2;
+        y8 *= 2;
+        filter4 = s->filter_mode;
+        if (filter4 == 2) {
+            if (s->max_vector_length &&
+                (FFABS(mv.x) > s->max_vector_length ||
+                 FFABS(mv.y) > s->max_vector_length)) {
+                filter4 = 0;
+            } else if (s->sample_variance_threshold
+                       && (vp6_block_variance(src+offset1, stride)
+                           < s->sample_variance_threshold)) {
+                filter4 = 0;
+            }
+        }
+    }
+
+    if ((y8 && (offset2-offset1)*s->flip<0) || (!y8 && offset1 > offset2)) {
+        offset1 = offset2;
+    }
+
+    if (filter4) {
+        if (!y8) {                      /* left or right combine */
+            vp6_filter_hv4(dst, src+offset1, stride, 1,
+                           vp6_block_copy_filter[select][x8]);
+        } else if (!x8) {               /* above or below combine */
+            vp6_filter_hv4(dst, src+offset1, stride, stride,
+                           vp6_block_copy_filter[select][y8]);
+        } else if ((mv.x^mv.y) >> 31) { /* lower-left or upper-right combine */
+            vp6_filter_diag4(dst, src+offset1-1, stride,
+                             vp6_block_copy_filter[select][x8],
+                             vp6_block_copy_filter[select][y8]);
+        } else {                        /* lower-right or upper-left combine */
+            vp6_filter_diag4(dst, src+offset1, stride,
+                             vp6_block_copy_filter[select][x8],
+                             vp6_block_copy_filter[select][y8]);
+        }
+    } else {
+        if (!y8) {                      /* left or right combine */
+            vp6_filter_hv2(s, dst, src+offset1, stride, 1, x8);
+        } else if (!x8) {               /* above or below combine */
+            vp6_filter_hv2(s, dst, src+offset1, stride, stride, y8);
+        } else if ((mv.x^mv.y) >> 31) { /* lower-left or upper-right combine */
+            vp6_filter_diag2(s, dst, src+offset1-1, stride, x8, y8);
+        } else {                        /* lower-right or upper-left combine */
+            vp6_filter_diag2(s, dst, src+offset1, stride, x8, y8);
+        }
+    }
+}
+
+static int vp6_decode_init(AVCodecContext *avctx)
+{
+    vp56_context_t *s = avctx->priv_data;
+
+    vp56_init(s, avctx, avctx->codec->id == CODEC_ID_VP6);
+    s->vp56_coord_div = vp6_coord_div;
+    s->parse_vector_adjustment = vp6_parse_vector_adjustment;
+    s->adjust = vp6_adjust;
+    s->filter = vp6_filter;
+    s->parse_coeff = vp6_parse_coeff;
+    s->default_models_init = vp6_default_models_init;
+    s->parse_vector_models = vp6_parse_vector_models;
+    s->parse_coeff_models = vp6_parse_coeff_models;
+    s->parse_header = vp6_parse_header;
+
+    return 0;
+}
+
+AVCodec vp6_decoder = {
+    "vp6",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VP6,
+    sizeof(vp56_context_t),
+    vp6_decode_init,
+    NULL,
+    vp56_free,
+    vp56_decode_frame,
+};
+
+/* flash version, not flipped upside-down */
+AVCodec vp6f_decoder = {
+    "vp6f",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VP6F,
+    sizeof(vp56_context_t),
+    vp6_decode_init,
+    NULL,
+    vp56_free,
+    vp56_decode_frame,
+};
diff --git a/src/libffmpeg/libavcodec/vp6data.h b/src/libffmpeg/libavcodec/vp6data.h
new file mode 100644
index 000000000..0545a9d66
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp6data.h
@@ -0,0 +1,300 @@
+/**
+ * @file vp6data.h
+ * VP6 compatible video decoder
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef VP6DATA_H
+#define VP6DATA_H
+
+#include "vp56data.h"
+
+static const uint8_t vp6_def_fdv_vector_model[2][8] = {
+    { 247, 210, 135, 68, 138, 220, 239, 246 },
+    { 244, 184, 201, 44, 173, 221, 239, 253 },
+};
+
+static const uint8_t vp6_def_pdv_vector_model[2][7] = {
+    { 225, 146, 172, 147, 214,  39, 156 },
+    { 204, 170, 119, 235, 140, 230, 228 },
+};
+
+static const uint8_t vp6_def_coeff_reorder[] = {
+     0,  0,  1,  1,  1,  2,  2,  2,
+     2,  2,  2,  3,  3,  4,  4,  4,
+     5,  5,  5,  5,  6,  6,  7,  7,
+     7,  7,  7,  8,  8,  9,  9,  9,
+     9,  9,  9, 10, 10, 11, 11, 11,
+    11, 11, 11, 12, 12, 12, 12, 12,
+    12, 13, 13, 13, 13, 13, 14, 14,
+    14, 14, 15, 15, 15, 15, 15, 15,
+};
+
+static const uint8_t vp6_def_runv_coeff_model[2][14] = {
+    { 198, 197, 196, 146, 198, 204, 169, 142, 130, 136, 149, 149, 191, 249 },
+    { 135, 201, 181, 154,  98, 117, 132, 126, 146, 169, 184, 240, 246, 254 },
+};
+
+static const uint8_t vp6_sig_dct_pct[2][2] = {
+    { 237, 246 },
+    { 231, 243 },
+};
+
+static const uint8_t vp6_pdv_pct[2][7] = {
+    { 253, 253, 254, 254, 254, 254, 254 },
+    { 245, 253, 254, 254, 254, 254, 254 },
+};
+
+static const uint8_t vp6_fdv_pct[2][8] = {
+    { 254, 254, 254, 254, 254, 250, 250, 252 },
+    { 254, 254, 254, 254, 254, 251, 251, 254 },
+};
+
+static const uint8_t vp6_dccv_pct[2][11] = {
+    { 146, 255, 181, 207, 232, 243, 238, 251, 244, 250, 249 },
+    { 179, 255, 214, 240, 250, 255, 244, 255, 255, 255, 255 },
+};
+
+static const uint8_t vp6_coeff_reorder_pct[] =  {
+    255, 132, 132, 159, 153, 151, 161, 170,
+    164, 162, 136, 110, 103, 114, 129, 118,
+    124, 125, 132, 136, 114, 110, 142, 135,
+    134, 123, 143, 126, 153, 183, 166, 161,
+    171, 180, 179, 164, 203, 218, 225, 217,
+    215, 206, 203, 217, 229, 241, 248, 243,
+    253, 255, 253, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255,
+};
+
+static const uint8_t vp6_runv_pct[2][14] = {
+  { 219, 246, 238, 249, 232, 239, 249, 255, 248, 253, 239, 244, 241, 248 },
+  { 198, 232, 251, 253, 219, 241, 253, 255, 248, 249, 244, 238, 251, 255 },
+};
+
+static const uint8_t vp6_ract_pct[3][2][6][11] = {
+  { { { 227, 246, 230, 247, 244, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 209, 231, 231, 249, 249, 253, 255, 255, 255 },
+      { 255, 255, 225, 242, 241, 251, 253, 255, 255, 255, 255 },
+      { 255, 255, 241, 253, 252, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } },
+    { { 240, 255, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 240, 253, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } } },
+  { { { 206, 203, 227, 239, 247, 255, 253, 255, 255, 255, 255 },
+      { 207, 199, 220, 236, 243, 252, 252, 255, 255, 255, 255 },
+      { 212, 219, 230, 243, 244, 253, 252, 255, 255, 255, 255 },
+      { 236, 237, 247, 252, 253, 255, 255, 255, 255, 255, 255 },
+      { 240, 240, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } },
+    { { 230, 233, 249, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 238, 238, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 248, 251, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } } },
+  { { { 225, 239, 227, 231, 244, 253, 243, 255, 255, 253, 255 },
+      { 232, 234, 224, 228, 242, 249, 242, 252, 251, 251, 255 },
+      { 235, 249, 238, 240, 251, 255, 249, 255, 253, 253, 255 },
+      { 249, 253, 251, 250, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 250, 249, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } },
+    { { 243, 244, 250, 250, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 248, 250, 253, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } } }
+};
+
+static const int vp6_dccv_lc[3][5][2] = {
+  { { 122, 133 }, { 0, 1 }, { 78,  171 }, { 139, 117 }, { 168, 79 } },
+  { { 133,  51 }, { 0, 1 }, { 169,  71 }, { 214,  44 }, { 210, 38 } },
+  { { 142, -16 }, { 0, 1 }, { 221, -30 }, { 246,  -3 }, { 203, 17 } },
+};
+
+static const uint8_t vp6_coeff_groups[] = {
+    0, 0, 1, 1, 1, 2, 2, 2,
+    2, 2, 2, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5,
+};
+
+static const int16_t vp6_block_copy_filter[17][8][4] = {
+  { {   0, 128,   0,   0  },  /* 0 */
+    {  -3, 122,   9,   0  },
+    {  -4, 109,  24,  -1  },
+    {  -5,  91,  45,  -3  },
+    {  -4,  68,  68,  -4  },
+    {  -3,  45,  91,  -5  },
+    {  -1,  24, 109,  -4  },
+    {   0,   9, 122,  -3  } },
+  { {   0, 128,   0,   0  },  /* 1 */
+    {  -4, 124,   9,  -1  },
+    {  -5, 110,  25,  -2  },
+    {  -6,  91,  46,  -3  },
+    {  -5,  69,  69,  -5  },
+    {  -3,  46,  91,  -6  },
+    {  -2,  25, 110,  -5  },
+    {  -1,   9, 124,  -4  } },
+  { {   0, 128,   0,   0  },  /* 2 */
+    {  -4, 123,  10,  -1  },
+    {  -6, 110,  26,  -2  },
+    {  -7,  92,  47,  -4  },
+    {  -6,  70,  70,  -6  },
+    {  -4,  47,  92,  -7  },
+    {  -2,  26, 110,  -6  },
+    {  -1,  10, 123,  -4  } },
+  { {   0, 128,   0,   0  },  /* 3 */
+    {  -5, 124,  10,  -1  },
+    {  -7, 110,  27,  -2  },
+    {  -7,  91,  48,  -4  },
+    {  -6,  70,  70,  -6  },
+    {  -4,  48,  92,  -8  },
+    {  -2,  27, 110,  -7  },
+    {  -1,  10, 124,  -5  } },
+  { {   0, 128,   0,   0  },  /* 4 */
+    {  -6, 124,  11,  -1  },
+    {  -8, 111,  28,  -3  },
+    {  -8,  92,  49,  -5  },
+    {  -7,  71,  71,  -7  },
+    {  -5,  49,  92,  -8  },
+    {  -3,  28, 111,  -8  },
+    {  -1,  11, 124,  -6  } },
+  { {  0,  128,   0,   0  },  /* 5 */
+    {  -6, 123,  12,  -1  },
+    {  -9, 111,  29,  -3  },
+    {  -9,  93,  50,  -6  },
+    {  -8,  72,  72,  -8  },
+    {  -6,  50,  93,  -9  },
+    {  -3,  29, 111,  -9  },
+    {  -1,  12, 123,  -6  } },
+  { {   0, 128,   0,   0  },  /* 6 */
+    {  -7, 124,  12,  -1  },
+    { -10, 111,  30,  -3  },
+    { -10,  93,  51,  -6  },
+    {  -9,  73,  73,  -9  },
+    {  -6,  51,  93, -10  },
+    {  -3,  30, 111, -10  },
+    {  -1,  12, 124,  -7  } },
+  { {   0, 128,   0,   0  },  /* 7 */
+    {  -7, 123,  13,  -1  },
+    { -11, 112,  31,  -4  },
+    { -11,  94,  52,  -7  },
+    { -10,  74,  74, -10  },
+    {  -7,  52,  94, -11  },
+    {  -4,  31, 112, -11  },
+    {  -1,  13, 123,  -7  } },
+  { {   0, 128,   0,  0  },  /* 8 */
+    {  -8, 124,  13,  -1  },
+    { -12, 112,  32,  -4  },
+    { -12,  94,  53,  -7  },
+    { -10,  74,  74, -10  },
+    {  -7,  53,  94, -12  },
+    {  -4,  32, 112, -12  },
+    {  -1,  13, 124,  -8  } },
+  { {   0, 128,   0,   0  },  /* 9 */
+    {  -9, 124,  14,  -1  },
+    { -13, 112,  33,  -4  },
+    { -13,  95,  54,  -8  },
+    { -11,  75,  75, -11  },
+    {  -8,  54,  95, -13  },
+    {  -4,  33, 112, -13  },
+    {  -1,  14, 124,  -9  } },
+  { {   0, 128,   0,   0  },  /* 10 */
+    {  -9, 123,  15,  -1  },
+    { -14, 113,  34,  -5  },
+    { -14,  95,  55,  -8  },
+    { -12,  76,  76, -12  },
+    {  -8,  55,  95, -14  },
+    {  -5,  34, 112, -13  },
+    {  -1,  15, 123,  -9  } },
+  { {   0, 128,   0,   0  },  /* 11 */
+    { -10, 124,  15,  -1  },
+    { -14, 113,  34,  -5  },
+    { -15,  96,  56,  -9  },
+    { -13,  77,  77, -13  },
+    {  -9,  56,  96, -15  },
+    {  -5,  34, 113, -14  },
+    {  -1,  15, 124, -10  } },
+  { {   0, 128,   0,   0  },  /* 12 */
+    { -10, 123,  16,  -1  },
+    { -15, 113,  35,  -5  },
+    { -16,  98,  56, -10  },
+    { -14,  78,  78, -14  },
+    { -10,  56,  98, -16  },
+    {  -5,  35, 113, -15  },
+    {  -1,  16, 123, -10  } },
+  { {   0, 128,   0,   0  },  /* 13 */
+    { -11, 124,  17,  -2  },
+    { -16, 113,  36,  -5  },
+    { -17,  98,  57, -10  },
+    { -14,  78,  78, -14  },
+    { -10,  57,  98, -17  },
+    {  -5,  36, 113, -16  },
+    {  -2,  17, 124, -11  } },
+  { {   0, 128,   0,   0  },  /* 14 */
+    { -12, 125,  17,  -2  },
+    { -17, 114,  37,  -6  },
+    { -18,  99,  58, -11  },
+    { -15,  79,  79, -15  },
+    { -11,  58,  99, -18  },
+    {  -6,  37, 114, -17  },
+    {  -2,  17, 125, -12  } },
+  { {   0, 128,   0,   0  },  /* 15 */
+    { -12, 124,  18,  -2  },
+    { -18, 114,  38,  -6  },
+    { -19,  99,  59, -11  },
+    { -16,  80,  80, -16  },
+    { -11,  59,  99, -19  },
+    {  -6,  38, 114, -18  },
+    {  -2,  18, 124, -12  } },
+  { {   0, 128,   0,   0  },  /* 16 */
+    {  -4, 118,  16,  -2  },
+    {  -7, 106,  34,  -5  },
+    {  -8,  90,  53,  -7  },
+    {  -8,  72,  72,  -8  },
+    {  -7,  53,  90,  -8  },
+    {  -5,  34, 106,  -7  },
+    {  -2,  16, 118,  -4  } },
+};
+
+static const vp56_tree_t vp6_pcr_tree[] = {
+    { 8, 0},
+    { 4, 1},
+    { 2, 2}, {-1}, {-2},
+    { 2, 3}, {-3}, {-4},
+    { 8, 4},
+    { 4, 5},
+    { 2, 6}, {-5}, {-6},
+    { 2, 7}, {-7}, {-8},
+             {-0},
+};
+
+static const uint8_t vp6_coord_div[] = { 4, 4, 4, 4, 8, 8 };
+
+#endif /* VP6DATA_H */
diff --git a/src/libffmpeg/libavcodec/wmadec.c b/src/libffmpeg/libavcodec/wmadec.c
index 684aea2c8..bbf4970ce 100644
--- a/src/libffmpeg/libavcodec/wmadec.c
+++ b/src/libffmpeg/libavcodec/wmadec.c
@@ -115,6 +115,8 @@ typedef struct WMADecodeContext {
     float max_exponent[MAX_CHANNELS];
     int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
     DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]);
+    DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]);
+    DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]);
     MDCTContext mdct_ctx[BLOCK_NB_SIZES];
     float *windows[BLOCK_NB_SIZES];
     DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); /* temporary storage for imdct */
@@ -717,7 +719,6 @@ static int wma_decode_block(WMADecodeContext *s)
 {
     int n, v, a, ch, code, bsize;
     int coef_nb_bits, total_gain, parse_exponents;
-    DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]);
     int nb_coefs[MAX_CHANNELS];
     float mdct_norm;
 
@@ -1072,7 +1073,7 @@ static int wma_decode_block(WMADecodeContext *s)
         next_block_len = 1 << s->next_block_len_bits;
 
         /* right part */
-        wptr = window + block_len;
+        wptr = s->window + block_len;
         if (block_len <= next_block_len) {
             for(i=0;i<block_len;i++)
                 *wptr++ = s->windows[bsize][i];
@@ -1088,7 +1089,7 @@ static int wma_decode_block(WMADecodeContext *s)
         }
 
         /* left part */
-        wptr = window + block_len;
+        wptr = s->window + block_len;
         if (block_len <= prev_block_len) {
             for(i=0;i<block_len;i++)
                 *--wptr = s->windows[bsize][i];
@@ -1107,14 +1108,13 @@ static int wma_decode_block(WMADecodeContext *s)
 
     for(ch = 0; ch < s->nb_channels; ch++) {
         if (s->channel_coded[ch]) {
-            DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]);
             float *ptr;
             int n4, index, n;
 
             n = s->block_len;
             n4 = s->block_len / 2;
             s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize],
-                          output, s->coefs[ch], s->mdct_tmp);
+                          s->output, s->coefs[ch], s->mdct_tmp);
 
             /* XXX: optimize all that by build the window and
                multipying/adding at the same time */
@@ -1122,13 +1122,13 @@ static int wma_decode_block(WMADecodeContext *s)
             /* multiply by the window and add in the frame */
             index = (s->frame_len / 2) + s->block_pos - n4;
             ptr = &s->frame_out[ch][index];
-            s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1);
+            s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1);
 
             /* specific fast case for ms-stereo : add to second
                channel if it is not coded */
             if (s->ms_stereo && !s->channel_coded[1]) {
                 ptr = &s->frame_out[1][index];
-                s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1);
+                s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1);
             }
         }
     }
diff --git a/src/libffmpeg/libavcodec/wmv2.c b/src/libffmpeg/libavcodec/wmv2.c
index 5abc51775..f3d4f0f23 100644
--- a/src/libffmpeg/libavcodec/wmv2.c
+++ b/src/libffmpeg/libavcodec/wmv2.c
@@ -643,6 +643,12 @@ void ff_mspel_motion(MpegEncContext *s,
     v_edge_pos = s->v_edge_pos;
     src_x = clip(src_x, -16, s->width);
     src_y = clip(src_y, -16, s->height);
+
+    if(src_x<=-16 || src_x >= s->width)
+        dxy &= ~3;
+    if(src_y<=-16 || src_y >= s->height)
+        dxy &= ~4;
+
     linesize   = s->linesize;
     uvlinesize = s->uvlinesize;
     ptr = ref_picture[0] + (src_y * linesize) + src_x;
diff --git a/src/libffmpeg/libavutil/Makefile.am b/src/libffmpeg/libavutil/Makefile.am
index 76340cf14..6e507cb67 100644
--- a/src/libffmpeg/libavutil/Makefile.am
+++ b/src/libffmpeg/libavutil/Makefile.am
@@ -1,6 +1,6 @@
 include $(top_srcdir)/misc/Makefile.common
 
-AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS)
+AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS) -I$(top_srcdir)/src/libffmpeg
 AM_CFLAGS = -fno-strict-aliasing
 ASFLAGS =
 
@@ -28,6 +28,7 @@ noinst_HEADERS = \
 	integer.h \
 	internal.h \
 	intfloat_readwrite.h \
+	intreadwrite.h \
 	lls.h \
 	log.h \
 	mathematics.h \
diff --git a/src/libffmpeg/libavutil/bswap.h b/src/libffmpeg/libavutil/bswap.h
index 4614c9045..03d613db2 100644
--- a/src/libffmpeg/libavutil/bswap.h
+++ b/src/libffmpeg/libavutil/bswap.h
@@ -37,7 +37,7 @@
 #endif
 
 #if defined(ARCH_X86)
-static always_inline uint16_t bswap_16(uint16_t x)
+static av_always_inline uint16_t bswap_16(uint16_t x)
 {
   __asm("rorw $8, %0"   :
         LEGACY_REGS (x) :
@@ -45,7 +45,7 @@ static always_inline uint16_t bswap_16(uint16_t x)
     return x;
 }
 
-static always_inline uint32_t bswap_32(uint32_t x)
+static av_always_inline uint32_t bswap_32(uint32_t x)
 {
 #if __CPU__ != 386
  __asm("bswap   %0":
@@ -82,12 +82,12 @@ static inline uint64_t bswap_64(uint64_t x)
 
 #elif defined(ARCH_SH4)
 
-static always_inline uint16_t bswap_16(uint16_t x) {
+static av_always_inline uint16_t bswap_16(uint16_t x) {
         __asm__("swap.b %0,%0":"=r"(x):"0"(x));
         return x;
 }
 
-static always_inline uint32_t bswap_32(uint32_t x) {
+static av_always_inline uint32_t bswap_32(uint32_t x) {
         __asm__(
         "swap.b %0,%0\n"
         "swap.w %0,%0\n"
@@ -110,12 +110,12 @@ static inline uint64_t bswap_64(uint64_t x)
 }
 #else
 
-static always_inline uint16_t bswap_16(uint16_t x){
+static av_always_inline uint16_t bswap_16(uint16_t x){
     return (x>>8) | (x<<8);
 }
 
 #ifdef ARCH_ARM
-static always_inline uint32_t bswap_32(uint32_t x){
+static av_always_inline uint32_t bswap_32(uint32_t x){
     uint32_t t;
     __asm__ (
       "eor %1, %0, %0, ror #16 \n\t"
@@ -126,7 +126,7 @@ static always_inline uint32_t bswap_32(uint32_t x){
     return x;
 }
 #else
-static always_inline uint32_t bswap_32(uint32_t x){
+static av_always_inline uint32_t bswap_32(uint32_t x){
     x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
     return (x>>16) | (x<<16);
 }
diff --git a/src/libffmpeg/libavutil/common.h b/src/libffmpeg/libavutil/common.h
index d167404b6..0e093616c 100644
--- a/src/libffmpeg/libavutil/common.h
+++ b/src/libffmpeg/libavutil/common.h
@@ -26,9 +26,7 @@
 #ifndef COMMON_H
 #define COMMON_H
 
-#ifndef M_PI
-#define M_PI    3.14159265358979323846
-#endif
+#include <inttypes.h>
 
 #ifdef HAVE_AV_CONFIG_H
 /* only include the following when compiling package */
@@ -47,34 +45,17 @@
 #    include <math.h>
 #endif /* HAVE_AV_CONFIG_H */
 
-/* Suppress restrict if it was not defined in config.h.  */
-#ifndef restrict
-#    define restrict
-#endif
-
-#ifndef always_inline
-#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
-#    define always_inline __attribute__((always_inline)) inline
-#else
-#    define always_inline inline
-#endif
-#endif
-
-#ifndef attribute_used
+#ifndef av_always_inline
 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
-#    define attribute_used __attribute__((used))
+#    define av_always_inline __attribute__((always_inline)) inline
 #else
-#    define attribute_used
+#    define av_always_inline inline
 #endif
 #endif
 
-#ifndef attribute_unused
-#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
-#    define attribute_unused __attribute__((unused))
-#else
-#    define attribute_unused
-#endif
-#endif
+#ifdef HAVE_AV_CONFIG_H
+#    include "internal.h"
+#endif /* HAVE_AV_CONFIG_H */
 
 #ifndef attribute_deprecated
 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
@@ -84,91 +65,9 @@
 #endif
 #endif
 
-#   include <inttypes.h>
-
-#ifndef PRId64
-#define PRId64 "lld"
-#endif
-
-#ifndef PRIu64
-#define PRIu64 "llu"
-#endif
-
-#ifndef PRIx64
-#define PRIx64 "llx"
-#endif
-
-#ifndef PRIX64
-#define PRIX64 "llX"
-#endif
-
-#ifndef PRId32
-#define PRId32 "d"
-#endif
-
-#ifndef PRIdFAST16
-#define PRIdFAST16 PRId32
-#endif
-
-#ifndef PRIdFAST32
-#define PRIdFAST32 PRId32
-#endif
-
-#ifndef INT16_MIN
-#define INT16_MIN       (-0x7fff-1)
-#endif
-
-#ifndef INT16_MAX
-#define INT16_MAX       0x7fff
-#endif
-
-#ifndef INT32_MIN
-#define INT32_MIN       (-0x7fffffff-1)
-#endif
-
-#ifndef INT32_MAX
-#define INT32_MAX       0x7fffffff
-#endif
-
-#ifndef UINT32_MAX
-#define UINT32_MAX      0xffffffff
-#endif
-
-#ifndef INT64_MIN
-#define INT64_MIN       (-0x7fffffffffffffffLL-1)
-#endif
-
-#ifndef INT64_MAX
-#define INT64_MAX int64_t_C(9223372036854775807)
-#endif
-
-#ifndef UINT64_MAX
-#define UINT64_MAX uint64_t_C(0xFFFFFFFFFFFFFFFF)
-#endif
-
-#ifndef INT_BIT
-#    if INT_MAX != 2147483647
-#        define INT_BIT 64
-#    else
-#        define INT_BIT 32
-#    endif
-#endif
-
-#ifndef int64_t_C
-#define int64_t_C(c)     (c ## LL)
-#define uint64_t_C(c)    (c ## ULL)
-#endif
-
-#if defined(__MINGW32__) && !defined(BUILD_AVUTIL) && defined(BUILD_SHARED_AV)
-#  define FF_IMPORT_ATTR __declspec(dllimport)
-#else
-#  define FF_IMPORT_ATTR
-#endif
-
-
-#ifdef HAVE_AV_CONFIG_H
-/* only include the following when compiling package */
-#    include "internal.h"
+#ifndef INT64_C
+#define INT64_C(c)     (c ## LL)
+#define UINT64_C(c)    (c ## ULL)
 #endif
 
 //rounded divison & shift
@@ -184,7 +83,7 @@
 #define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0)
 
 /* misc math functions */
-extern FF_IMPORT_ATTR const uint8_t ff_log2_tab[256];
+extern const uint8_t ff_log2_tab[256];
 
 static inline int av_log2(unsigned int v)
 {
@@ -375,7 +274,7 @@ static inline uint64_t read_time(void)
         );
         return (d << 32) | (a & 0xffffffff);
 }
-#elif defined(ARCH_X86)
+#elif defined(ARCH_X86_32)
 static inline long long read_time(void)
 {
         long long l;
@@ -465,4 +364,8 @@ void av_freep(void *ptr);
 # define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t"
 #endif
 
+/* xine: another config.h with codecs to use */
+#include "ffmpeg_config.h"
+
 #endif /* COMMON_H */
+
diff --git a/src/libffmpeg/libavutil/internal.h b/src/libffmpeg/libavutil/internal.h
index 7d850141b..0c4b44170 100644
--- a/src/libffmpeg/libavutil/internal.h
+++ b/src/libffmpeg/libavutil/internal.h
@@ -26,6 +26,94 @@
 #ifndef INTERNAL_H
 #define INTERNAL_H
 
+#ifndef attribute_used
+#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
+#    define attribute_used __attribute__((used))
+#else
+#    define attribute_used
+#endif
+#endif
+
+#ifndef attribute_unused
+#if defined(__GNUC__)
+#    define attribute_unused __attribute__((unused))
+#else
+#    define attribute_unused
+#endif
+#endif
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846
+#endif
+
+#ifndef PRId64
+#define PRId64 "lld"
+#endif
+
+#ifndef PRIu64
+#define PRIu64 "llu"
+#endif
+
+#ifndef PRIx64
+#define PRIx64 "llx"
+#endif
+
+#ifndef PRIX64
+#define PRIX64 "llX"
+#endif
+
+#ifndef PRId32
+#define PRId32 "d"
+#endif
+
+#ifndef PRIdFAST16
+#define PRIdFAST16 PRId32
+#endif
+
+#ifndef PRIdFAST32
+#define PRIdFAST32 PRId32
+#endif
+
+#ifndef INT16_MIN
+#define INT16_MIN       (-0x7fff-1)
+#endif
+
+#ifndef INT16_MAX
+#define INT16_MAX       0x7fff
+#endif
+
+#ifndef INT32_MIN
+#define INT32_MIN       (-0x7fffffff-1)
+#endif
+
+#ifndef INT32_MAX
+#define INT32_MAX       0x7fffffff
+#endif
+
+#ifndef UINT32_MAX
+#define UINT32_MAX      0xffffffff
+#endif
+
+#ifndef INT64_MIN
+#define INT64_MIN       (-0x7fffffffffffffffLL-1)
+#endif
+
+#ifndef INT64_MAX
+#define INT64_MAX INT64_C(9223372036854775807)
+#endif
+
+#ifndef UINT64_MAX
+#define UINT64_MAX UINT64_C(0xFFFFFFFFFFFFFFFF)
+#endif
+
+#ifndef INT_BIT
+#    if INT_MAX != 2147483647
+#        define INT_BIT 64
+#    else
+#        define INT_BIT 32
+#    endif
+#endif
+
 #if ( defined(__PIC__) || defined(__pic__) ) && ! defined(PIC)
 #    define PIC
 #endif
@@ -34,6 +122,7 @@
 #    define ENODATA  61
 #endif
 
+#include "intreadwrite.h"
 #include "bswap.h"
 
 #include <stddef.h>
@@ -136,7 +225,7 @@ extern const uint32_t ff_inverse[256];
 #    define FASTDIV(a,b)   ((a)/(b))
 #endif
 
-extern FF_IMPORT_ATTR const uint8_t ff_sqrt_tab[128];
+extern const uint8_t ff_sqrt_tab[128];
 
 static inline int ff_sqrt(int a)
 {
@@ -216,7 +305,7 @@ if((y)<(x)){\
 /* XXX: add ISOC specific test to avoid specific BSD testing. */
 /* better than nothing implementation. */
 /* btw, rintf() is existing on fbsd too -- alex */
-static always_inline long int lrintf(float x)
+static av_always_inline long int lrintf(float x)
 {
 #ifdef __MINGW32__
 #  ifdef ARCH_X86_32
diff --git a/src/libffmpeg/libavutil/intreadwrite.h b/src/libffmpeg/libavutil/intreadwrite.h
new file mode 100644
index 000000000..c43f9d651
--- /dev/null
+++ b/src/libffmpeg/libavutil/intreadwrite.h
@@ -0,0 +1,42 @@
+#ifndef INTREADWRITE_H
+#define INTREADWRITE_H
+
+#ifdef __GNUC__
+
+struct unaligned_64 { uint64_t l; } __attribute__((packed));
+struct unaligned_32 { uint32_t l; } __attribute__((packed));
+struct unaligned_16 { uint16_t l; } __attribute__((packed));
+
+#define LD16(a) (((const struct unaligned_16 *) (a))->l)
+#define LD32(a) (((const struct unaligned_32 *) (a))->l)
+#define LD64(a) (((const struct unaligned_64 *) (a))->l)
+
+#define ST16(a, b) (((struct unaligned_16 *) (a))->l) = (b)
+#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b)
+
+#else /* __GNUC__ */
+
+#define LD16(a) (*((uint16_t*)(a)))
+#define LD32(a) (*((uint32_t*)(a)))
+#define LD64(a) (*((uint64_t*)(a)))
+
+#define ST16(a, b) *((uint16_t*)(a)) = (b)
+#define ST32(a, b) *((uint32_t*)(a)) = (b)
+
+#endif /* !__GNUC__ */
+
+/* endian macros */
+#if !defined(BE_16) || !defined(BE_32) || !defined(LE_16) || !defined(LE_32)
+#define BE_16(x)  ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1])
+#define BE_32(x)  ((((uint8_t*)(x))[0] << 24) | \
+                   (((uint8_t*)(x))[1] << 16) | \
+                   (((uint8_t*)(x))[2] << 8) | \
+                    ((uint8_t*)(x))[3])
+#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
+#define LE_32(x)  ((((uint8_t*)(x))[3] << 24) | \
+                   (((uint8_t*)(x))[2] << 16) | \
+                   (((uint8_t*)(x))[1] << 8) | \
+                    ((uint8_t*)(x))[0])
+#endif
+
+#endif /* INTREADWRITE_H */
diff --git a/src/libffmpeg/libavutil/rational.c b/src/libffmpeg/libavutil/rational.c
index 0e018c41b..0480aa882 100644
--- a/src/libffmpeg/libavutil/rational.c
+++ b/src/libffmpeg/libavutil/rational.c
@@ -38,8 +38,10 @@ int av_reduce(int *dst_nom, int *dst_den, int64_t nom, int64_t den, int64_t max)
     int sign= (nom<0) ^ (den<0);
     int64_t gcd= ff_gcd(FFABS(nom), FFABS(den));
 
-    nom = FFABS(nom)/gcd;
-    den = FFABS(den)/gcd;
+    if(gcd){
+        nom = FFABS(nom)/gcd;
+        den = FFABS(den)/gcd;
+    }
     if(nom<=max && den<=max){
         a1= (AVRational){nom, den};
         den=0;
@@ -65,7 +67,7 @@ int av_reduce(int *dst_nom, int *dst_den, int64_t nom, int64_t den, int64_t max)
         nom= den;
         den= next_den;
     }
-    assert(ff_gcd(a1.num, a1.den) == 1);
+    assert(ff_gcd(a1.num, a1.den) <= 1U);
 
     *dst_nom = sign ? -a1.num : a1.num;
     *dst_den = a1.den;
diff --git a/src/libffmpeg/video_decoder.c b/src/libffmpeg/video_decoder.c
index ad2bc99b4..b019d52d3 100644
--- a/src/libffmpeg/video_decoder.c
+++ b/src/libffmpeg/video_decoder.c
@@ -17,7 +17,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
  *
- * $Id: video_decoder.c,v 1.64 2006/12/02 21:06:18 miguelfreitas Exp $
+ * $Id: video_decoder.c,v 1.65 2007/01/13 21:19:52 miguelfreitas Exp $
  *
  * xine video decoder plugin using ffmpeg
  *
@@ -25,6 +25,7 @@
  
 #ifdef HAVE_CONFIG_H
 #include "config.h"
+#include "ffmpeg_config.h"
 #endif
 
 #include <stdlib.h>
@@ -116,6 +117,8 @@ struct ff_video_decoder_s {
   int               is_direct_rendering_disabled;
 
   AVPaletteControl  palette_control;
+  
+  xine_list_t       *dr1_frames;
 };
 
 
@@ -203,16 +206,25 @@ static int get_buffer(AVCodecContext *context, AVFrame *av_frame){
 
   av_frame->type= FF_BUFFER_TYPE_USER;
 
+  xine_list_push_back(this->dr1_frames, av_frame);
+
   return 0;
 }
 
 static void release_buffer(struct AVCodecContext *context, AVFrame *av_frame){
+  ff_video_decoder_t *this = (ff_video_decoder_t *)context->opaque;
 
   if (av_frame->type == FF_BUFFER_TYPE_USER) {
     vo_frame_t *img = (vo_frame_t *)av_frame->opaque;
+    xine_list_iterator_t it;
     
     assert(av_frame->opaque);  
     img->free(img);
+    
+    it = xine_list_find(this->dr1_frames, av_frame);
+    assert(it);
+    if( it != NULL )
+      xine_list_remove(this->dr1_frames, it);
   } else {
     avcodec_default_release_buffer(context, av_frame);
   }
@@ -249,6 +261,8 @@ static const ff_codec_t ff_video_lookup[] = {
   {BUF_VIDEO_DV,          CODEC_ID_DVVIDEO,   "DV (ffmpeg)"},
   {BUF_VIDEO_HUFFYUV,     CODEC_ID_HUFFYUV,   "HuffYUV (ffmpeg)"},
   {BUF_VIDEO_VP31,        CODEC_ID_VP3,       "On2 VP3.1 (ffmpeg)"},
+  {BUF_VIDEO_VP5,         CODEC_ID_VP5,       "On2 VP5 (ffmpeg)"},
+  {BUF_VIDEO_VP6,         CODEC_ID_VP6,       "On2 VP6 (ffmpeg)"},
   {BUF_VIDEO_4XM,         CODEC_ID_4XM,       "4X Video (ffmpeg)"},
   {BUF_VIDEO_CINEPAK,     CODEC_ID_CINEPAK,   "Cinepak (ffmpeg)"},
   {BUF_VIDEO_MSVC,        CODEC_ID_MSVIDEO1,  "Microsoft Video 1 (ffmpeg)"},
@@ -376,7 +390,7 @@ static void init_video_codec (ff_video_decoder_t *this, unsigned int codec_type)
   /* enable direct rendering by default */
   this->output_format = XINE_IMGFMT_YV12;
 #ifdef ENABLE_DIRECT_RENDERING
-  if( this->codec->capabilities & CODEC_CAP_DR1 ) {
+  if( this->codec->capabilities & CODEC_CAP_DR1 && this->codec->id != CODEC_ID_H264 ) {
     this->context->get_buffer = get_buffer;
     this->context->release_buffer = release_buffer;
     xprintf(this->stream->xine, XINE_VERBOSITY_LOG, 
@@ -801,7 +815,7 @@ static void ff_check_bufsize (ff_video_decoder_t *this, int size) {
     xprintf(this->stream->xine, XINE_VERBOSITY_LOG, 
 	    _("ffmpeg_video_dec: increasing buffer to %d to avoid overflow.\n"), 
 	    this->bufsize);
-    this->buf = realloc(this->buf, this->bufsize);
+    this->buf = realloc(this->buf, this->bufsize + FF_INPUT_BUFFER_PADDING_SIZE );
   }
 }
 
@@ -826,7 +840,7 @@ static void ff_handle_header_buffer (ff_video_decoder_t *this, buf_element_t *bu
   lprintf ("header buffer\n");
 
   /* accumulate data */
-  ff_check_bufsize(this, this->size + buf->size + FF_INPUT_BUFFER_PADDING_SIZE);
+  ff_check_bufsize(this, this->size + buf->size);
   xine_fast_memcpy (&this->buf[this->size], buf->content, buf->size);
   this->size += buf->size;
 
@@ -1102,7 +1116,7 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) {
       lprintf("no memcpy needed to accumulate data\n");
     } else {
       /* copy data into our internal buffer */
-      ff_check_bufsize(this, this->size + buf->size + FF_INPUT_BUFFER_PADDING_SIZE);
+      ff_check_bufsize(this, this->size + buf->size);
       chunk_buf = this->buf; /* ff_check_bufsize might realloc this->buf */
 
       xine_fast_memcpy (&this->buf[this->size], buf->content, buf->size);
@@ -1122,7 +1136,13 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) {
     int         codec_type = buf->type & 0xFFFF0000;
 
     /* pad input data */
-    chunk_buf[this->size] = 0;
+    /* note: bitstream, alt bitstream reader or something will cause
+     * severe mpeg4 artifacts if padding is less than 32 bits.
+     */
+    chunk_buf[this->size+0] = 0;
+    chunk_buf[this->size+1] = 0;
+    chunk_buf[this->size+2] = 0;
+    chunk_buf[this->size+3] = 0;
 
     while (this->size > 0) {
       
@@ -1150,7 +1170,7 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) {
           this->size -= len;
 
           if (this->size > 0) {
-            ff_check_bufsize(this, this->size + FF_INPUT_BUFFER_PADDING_SIZE);
+            ff_check_bufsize(this, this->size);
             memmove (this->buf, &chunk_buf[offset], this->size);
             chunk_buf = this->buf;
           }
@@ -1256,7 +1276,7 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) {
         img->crop_bottom = this->crop_bottom;
         
         this->skipframes = img->draw(img, this->stream);
-
+        
         if(free_img)
           img->free(img);
       }
@@ -1360,12 +1380,23 @@ static void ff_dispose (video_decoder_t *this_gen) {
   ff_video_decoder_t *this = (ff_video_decoder_t *) this_gen;
 
   lprintf ("ff_dispose\n");
-
+  
   if (this->decoder_ok) {
+    xine_list_iterator_t it;
+    AVFrame *av_frame;
+        
     pthread_mutex_lock(&ffmpeg_lock);
     avcodec_close (this->context);
     pthread_mutex_unlock(&ffmpeg_lock);
-
+    
+    /* frame garbage collector here - workaround for buggy ffmpeg codecs that
+     * don't release their DR1 frames */
+    while( (it = xine_list_front(this->dr1_frames)) != NULL )
+    {
+      av_frame = (AVFrame *)xine_list_get_value(this->dr1_frames, it);
+      release_buffer(this->context, av_frame);
+    }
+    
     this->stream->video_out->close(this->stream->video_out, this->stream);
     this->decoder_ok = 0;
   }
@@ -1394,6 +1425,8 @@ static void ff_dispose (video_decoder_t *this_gen) {
     
   if(this->pp_mode)
     pp_free_mode(this->pp_mode);
+    
+  xine_list_delete(this->dr1_frames);
   
   free (this_gen);
 }
@@ -1433,6 +1466,8 @@ static video_decoder_t *ff_video_open_plugin (video_decoder_class_t *class_gen,
   this->pp_context        = NULL;
   this->pp_mode           = NULL;
   
+  this->dr1_frames        = xine_list_new();
+  
   mpeg_parser_init(&this->mpeg_parser);
 
   return &this->video_decoder;
@@ -1483,73 +1518,223 @@ void *init_video_plugin (xine_t *xine, void *data) {
 }
 
 static uint32_t supported_video_types[] = { 
-  BUF_VIDEO_MSMPEG4_V1, 
+  #ifdef CONFIG_MSMPEG4V1_DECODER
+  BUF_VIDEO_MSMPEG4_V1,
+  #endif
+  #ifdef CONFIG_MSMPEG4V2_DECODER
   BUF_VIDEO_MSMPEG4_V2,
-  BUF_VIDEO_MSMPEG4_V3, 
-  BUF_VIDEO_WMV7, 
+  #endif
+  #ifdef CONFIG_MSMPEG4V3_DECODER
+  BUF_VIDEO_MSMPEG4_V3,
+  #endif
+  #ifdef CONFIG_WMV1_DECODER
+  BUF_VIDEO_WMV7,
+  #endif
+  #ifdef CONFIG_WMV2_DECODER
+  BUF_VIDEO_WMV8,
+  #endif
+  #ifdef CONFIG_WMV3_DECODER
+  BUF_VIDEO_WMV9,
+  #endif
+  #ifdef CONFIG_MPEG4_DECODER
   BUF_VIDEO_MPEG4,
-  BUF_VIDEO_XVID, 
-  BUF_VIDEO_DIVX5, 
+  #endif
+  #ifdef CONFIG_MPEG4_DECODER
+  BUF_VIDEO_XVID,
+  #endif
+  #ifdef CONFIG_MPEG4_DECODER
+  BUF_VIDEO_DIVX5,
+  #endif
+  #ifdef CONFIG_MPEG4_DECODER
   BUF_VIDEO_3IVX,
+  #endif
+  #ifdef CONFIG_MJPEG_DECODER
+  BUF_VIDEO_JPEG,
+  #endif
+  #ifdef CONFIG_MJPEG_DECODER
   BUF_VIDEO_MJPEG,
+  #endif
+  #ifdef CONFIG_MJPEGB_DECODER
   BUF_VIDEO_MJPEG_B,
+  #endif
+  #ifdef CONFIG_H263I_DECODER
+  BUF_VIDEO_I263,
+  #endif
+  #ifdef CONFIG_H263_DECODER
   BUF_VIDEO_H263,
+  #endif
+  #ifdef CONFIG_RV10_DECODER
   BUF_VIDEO_RV10,
+  #endif
+  #ifdef CONFIG_RV20_DECODER
   BUF_VIDEO_RV20,
+  #endif
+  #ifdef CONFIG_INDEO3_DECODER
   BUF_VIDEO_IV31,
+  #endif
+  #ifdef CONFIG_INDEO3_DECODER
   BUF_VIDEO_IV32,
+  #endif
+  #ifdef CONFIG_SVQ1_DECODER
   BUF_VIDEO_SORENSON_V1,
+  #endif
+  #ifdef CONFIG_SVQ3_DECODER
   BUF_VIDEO_SORENSON_V3,
-  BUF_VIDEO_JPEG, 
-  BUF_VIDEO_MPEG, 
+  #endif
+  #ifdef CONFIG_DVVIDEO_DECODER
   BUF_VIDEO_DV,
+  #endif
+  #ifdef CONFIG_HUFFYUV_DECODER
   BUF_VIDEO_HUFFYUV,
+  #endif
+  #ifdef CONFIG_VP3_DECODER
   BUF_VIDEO_VP31,
+  #endif
+  #ifdef CONFIG_VP5_DECODER
+  BUF_VIDEO_VP5,
+  #endif
+  #ifdef CONFIG_VP6_DECODER
+  BUF_VIDEO_VP6,
+  #endif
+  #ifdef CONFIG_4XM_DECODER
   BUF_VIDEO_4XM,
+  #endif
+  #ifdef CONFIG_CINEPAK_DECODER
   BUF_VIDEO_CINEPAK,
+  #endif
+  #ifdef CONFIG_MSVIDEO1_DECODER
   BUF_VIDEO_MSVC,
+  #endif
+  #ifdef CONFIG_MSRLE_DECODER
   BUF_VIDEO_MSRLE,
+  #endif
+  #ifdef CONFIG_RPZA_DECODER
   BUF_VIDEO_RPZA,
+  #endif
+  #ifdef CONFIG_CYUV_DECODER
   BUF_VIDEO_CYUV,
+  #endif
+  #ifdef CONFIG_ROQ_DECODER
   BUF_VIDEO_ROQ,
+  #endif
+  #ifdef CONFIG_IDCIN_DECODER
   BUF_VIDEO_IDCIN,
+  #endif
+  #ifdef CONFIG_XAN_WC3_DECODER
   BUF_VIDEO_WC3,
+  #endif
+  #ifdef CONFIG_WS_VQA_DECODER
   BUF_VIDEO_VQA,
+  #endif
+  #ifdef CONFIG_INTERPLAY_VIDEO_DECODER
   BUF_VIDEO_INTERPLAY,
+  #endif
+  #ifdef CONFIG_FLIC_DECODER
   BUF_VIDEO_FLI,
+  #endif
+  #ifdef CONFIG_8BPS_DECODER
   BUF_VIDEO_8BPS,
+  #endif
+  #ifdef CONFIG_SMC_DECODER
   BUF_VIDEO_SMC,
-  BUF_VIDEO_VMD,
+  #endif
+  #ifdef CONFIG_TRUEMOTION1_DECODER
   BUF_VIDEO_DUCKTM1,
+  #endif
+  #ifdef CONFIG_TRUEMOTION2_DECODER
   BUF_VIDEO_DUCKTM2,
+  #endif
+  #ifdef CONFIG_VMDVIDEO_DECODER
+  BUF_VIDEO_VMD,
+  #endif
+  #ifdef CONFIG_ZLIB_DECODER
   BUF_VIDEO_ZLIB,
+  #endif
+  #ifdef CONFIG_MSZH_DECODER
   BUF_VIDEO_MSZH,
+  #endif
+  #ifdef CONFIG_ASV1_DECODER
   BUF_VIDEO_ASV1,
+  #endif
+  #ifdef CONFIG_ASV2_DECODER
   BUF_VIDEO_ASV2,
+  #endif
+  #ifdef CONFIG_VCR1_DECODER
   BUF_VIDEO_ATIVCR1,
+  #endif
+  #ifdef CONFIG_FLV1_DECODER
   BUF_VIDEO_FLV1,
+  #endif
+  #ifdef CONFIG_QTRLE_DECODER
   BUF_VIDEO_QTRLE,
+  #endif
+  #ifdef CONFIG_H264_DECODER
   BUF_VIDEO_H264,
+  #endif
+  #ifdef CONFIG_H261_DECODER
   BUF_VIDEO_H261,
+  #endif
+  #ifdef CONFIG_AASC_DECODER
   BUF_VIDEO_AASC,
+  #endif
+  #ifdef CONFIG_LOCO_DECODER
   BUF_VIDEO_LOCO,
+  #endif
+  #ifdef CONFIG_QDRAW_DECODER
   BUF_VIDEO_QDRW,
+  #endif
+  #ifdef CONFIG_QPEG_DECODER
   BUF_VIDEO_QPEG,
+  #endif
+  #ifdef CONFIG_TSCC_DECODER
   BUF_VIDEO_TSCC,
+  #endif
+  #ifdef CONFIG_ULTI_DECODER
   BUF_VIDEO_ULTI,
+  #endif
+  #ifdef CONFIG_WNV1_DECODER
   BUF_VIDEO_WNV1,
+  #endif
+  #ifdef CONFIG_VIXL_DECODER
   BUF_VIDEO_XL,
+  #endif
+  #ifdef CONFIG_INDEO2_DECODER
   BUF_VIDEO_RT21,
+  #endif
+  #ifdef CONFIG_FRAPS_DECODER
   BUF_VIDEO_FPS1,
+  #endif
+  #ifdef CONFIG_MPEG1VIDEO_DECODER
+  BUF_VIDEO_MPEG,
+  #endif
+  #ifdef CONFIG_CSCD_DECODER
   BUF_VIDEO_CSCD,
+  #endif
+  #ifdef CONFIG_AVS_DECODER
+  BUF_VIDEO_AVS,
+  #endif
+  #ifdef CONFIG_MMVIDEO_DECODER
   BUF_VIDEO_ALGMM,
+  #endif
+  #ifdef CONFIG_ZMBV_DECODER
   BUF_VIDEO_ZMBV,
-  BUF_VIDEO_AVS,
+  #endif
+  #ifdef CONFIG_SMACKVIDEO_DECODER
   BUF_VIDEO_SMACKER,
+  #endif
+  #ifdef CONFIG_NUV_DECODER
   BUF_VIDEO_NUV,
+  #endif
+  #ifdef CONFIG_KMVC_DECODER
   BUF_VIDEO_KMVC,
+  #endif
+  #ifdef CONFIG_FLASHSV_DECODER
   BUF_VIDEO_FLASHSV,
+  #endif
+  #ifdef CONFIG_CAVS_DECODER
   BUF_VIDEO_CAVS,
+  #endif
+
   0 
 };
 
diff --git a/src/libffmpeg/xine_decoder.c b/src/libffmpeg/xine_decoder.c
index 02d19cc1a..2eeb9746b 100644
--- a/src/libffmpeg/xine_decoder.c
+++ b/src/libffmpeg/xine_decoder.c
@@ -17,7 +17,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
  *
- * $Id: xine_decoder.c,v 1.172 2006/12/04 22:25:13 miguelfreitas Exp $
+ * $Id: xine_decoder.c,v 1.173 2007/01/13 21:19:52 miguelfreitas Exp $
  *
  * xine decoder plugin using ffmpeg
  *
@@ -25,6 +25,7 @@
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
+#include "ffmpeg_config.h"
 #endif
 
 #include "xine_internal.h"
@@ -39,114 +40,273 @@ pthread_once_t once_control = PTHREAD_ONCE_INIT;
 pthread_mutex_t ffmpeg_lock;
 
 #ifndef HAVE_FFMPEG
+
+#define REGISTER_ENCODER(X,x) \
+          if(ENABLE_##X##_ENCODER)  register_avcodec(&x##_encoder)
+#define REGISTER_DECODER(X,x) \
+          if(ENABLE_##X##_DECODER)  register_avcodec(&x##_decoder)
+#define REGISTER_ENCDEC(X,x)  REGISTER_ENCODER(X,x); REGISTER_DECODER(X,x)
+
+#define REGISTER_PARSER(X,x) \
+          if(ENABLE_##X##_PARSER)  av_register_codec_parser(&x##_parser)
+
+/* If you do not call this function, then you can select exactly which
+   formats you want to support */
+
+/**
+ * simple call to register all the codecs.
+ */
 void avcodec_register_all(void)
 {
     static int inited = 0;
-    
+
     if (inited != 0)
-	return;
+        return;
     inited = 1;
 
-    /* decoders */
-    register_avcodec(&h263_decoder);
-    register_avcodec(&mpeg4_decoder);
-    register_avcodec(&msmpeg4v1_decoder);
-    register_avcodec(&msmpeg4v2_decoder);
-    register_avcodec(&msmpeg4v3_decoder);
-    register_avcodec(&wmv1_decoder);
-    register_avcodec(&wmv2_decoder);
-    register_avcodec(&h263i_decoder);
-    register_avcodec(&rv10_decoder);
-    register_avcodec(&rv20_decoder);
-    register_avcodec(&svq1_decoder);
-    register_avcodec(&svq3_decoder);
-    register_avcodec(&wmav1_decoder);
-    register_avcodec(&wmav2_decoder);
-    register_avcodec(&indeo3_decoder);
-    register_avcodec(&mpeg1video_decoder);
-    register_avcodec(&dvvideo_decoder);
-    register_avcodec(&pcm_s16le_decoder);
-    register_avcodec(&mjpeg_decoder);
-    register_avcodec(&mjpegb_decoder);
-    register_avcodec(&mp2_decoder);
-    register_avcodec(&mp3_decoder);
-    register_avcodec(&mace3_decoder);
-    register_avcodec(&mace6_decoder);
-    register_avcodec(&huffyuv_decoder);
-    register_avcodec(&cyuv_decoder);
-    register_avcodec(&h264_decoder);
-    register_avcodec(&vp3_decoder);
-    register_avcodec(&fourxm_decoder);
-    register_avcodec(&ra_144_decoder);
-    register_avcodec(&ra_288_decoder);
-    register_avcodec(&adpcm_ms_decoder);
-    register_avcodec(&adpcm_ima_qt_decoder);
-    register_avcodec(&adpcm_ima_wav_decoder);
-    register_avcodec(&adpcm_ima_dk3_decoder);
-    register_avcodec(&adpcm_ima_dk4_decoder);
-    register_avcodec(&adpcm_ima_ws_decoder);
-    register_avcodec(&adpcm_ima_smjpeg_decoder);
-    register_avcodec(&adpcm_xa_decoder);
-    register_avcodec(&adpcm_4xm_decoder);
-    register_avcodec(&adpcm_ea_decoder);
-    register_avcodec(&pcm_alaw_decoder);
-    register_avcodec(&pcm_mulaw_decoder);
-    register_avcodec(&roq_dpcm_decoder);
-    register_avcodec(&interplay_dpcm_decoder);
-    register_avcodec(&cinepak_decoder);
-    register_avcodec(&msvideo1_decoder);
-    register_avcodec(&msrle_decoder);
-    register_avcodec(&rpza_decoder);
-    register_avcodec(&roq_decoder);
-    register_avcodec(&idcin_decoder);
-    register_avcodec(&xan_wc3_decoder);
-    register_avcodec(&vqa_decoder);
-    register_avcodec(&interplay_video_decoder);
-    register_avcodec(&flic_decoder);
-    register_avcodec(&smc_decoder);
-    register_avcodec(&eightbps_decoder);
-    register_avcodec(&vmdvideo_decoder);
-    register_avcodec(&vmdaudio_decoder);
-    register_avcodec(&truemotion1_decoder);
-    //register_avcodec(&mszh_decoder);
-    //register_avcodec(&zlib_decoder);
-    register_avcodec(&xan_dpcm_decoder);
-    register_avcodec(&asv1_decoder);
-    register_avcodec(&asv2_decoder);
-    register_avcodec(&vcr1_decoder);
-    register_avcodec(&flv_decoder);
-    register_avcodec(&qtrle_decoder);
-    register_avcodec(&flac_decoder);
-    register_avcodec(&aasc_decoder);
-    register_avcodec(&alac_decoder);
-    register_avcodec(&h261_decoder);
-    register_avcodec(&loco_decoder);
-    register_avcodec(&qdraw_decoder);
-    register_avcodec(&qpeg_decoder);
-    register_avcodec(&tscc_decoder);
-    register_avcodec(&ulti_decoder);
-    register_avcodec(&wnv1_decoder);
-    register_avcodec(&xl_decoder);
-    register_avcodec(&indeo2_decoder);
-    register_avcodec(&fraps_decoder);
-    register_avcodec(&shorten_decoder);
-    register_avcodec(&qdm2_decoder);
-    register_avcodec(&truemotion2_decoder);
-    register_avcodec(&wmv3_decoder);
-    register_avcodec(&cscd_decoder);
-    register_avcodec(&mmvideo_decoder);
-    register_avcodec(&zmbv_decoder);
-    register_avcodec(&avs_decoder);
-    register_avcodec(&smacker_decoder);
-    register_avcodec(&smackaud_decoder);
-    register_avcodec(&nuv_decoder);
-    register_avcodec(&kmvc_decoder);
-    register_avcodec(&flashsv_decoder);
-    //register_avcodec(&cavs_decoder);
-    register_avcodec(&cook_decoder);
-    register_avcodec(&truespeech_decoder);
-    register_avcodec(&tta_decoder);
+    /* video codecs */
+    REGISTER_DECODER(AASC, aasc);
+    REGISTER_ENCDEC (ASV1, asv1);
+    REGISTER_ENCDEC (ASV2, asv2);
+    REGISTER_DECODER(AVS, avs);
+    REGISTER_DECODER(BMP, bmp);
+    REGISTER_DECODER(CAVS, cavs);
+    REGISTER_DECODER(CINEPAK, cinepak);
+    REGISTER_DECODER(CLJR, cljr);
+    REGISTER_DECODER(CSCD, cscd);
+    REGISTER_DECODER(CYUV, cyuv);
+    REGISTER_DECODER(DSICINVIDEO, dsicinvideo);
+    REGISTER_ENCDEC (DVVIDEO, dvvideo);
+    REGISTER_DECODER(EIGHTBPS, eightbps);
+    REGISTER_ENCDEC (FFV1, ffv1);
+    REGISTER_ENCDEC (FFVHUFF, ffvhuff);
+    REGISTER_DECODER(FLASHSV, flashsv);
+    REGISTER_DECODER(FLIC, flic);
+    REGISTER_ENCDEC (FLV, flv);
+    REGISTER_DECODER(FOURXM, fourxm);
+    REGISTER_DECODER(FRAPS, fraps);
+    REGISTER_ENCDEC (GIF, gif);
+    REGISTER_ENCDEC (H261, h261);
+    REGISTER_ENCDEC (H263, h263);
+    REGISTER_DECODER(H263I, h263i);
+    REGISTER_ENCODER(H263P, h263p);
+    REGISTER_DECODER(H264, h264);
+    REGISTER_ENCDEC (HUFFYUV, huffyuv);
+    REGISTER_DECODER(IDCIN, idcin);
+    REGISTER_DECODER(INDEO2, indeo2);
+    REGISTER_DECODER(INDEO3, indeo3);
+    REGISTER_DECODER(INTERPLAY_VIDEO, interplay_video);
+    REGISTER_ENCODER(JPEGLS, jpegls);
+    REGISTER_DECODER(KMVC, kmvc);
+    REGISTER_ENCODER(LJPEG, ljpeg);
+    REGISTER_DECODER(LOCO, loco);
+    REGISTER_DECODER(MDEC, mdec);
+    REGISTER_ENCDEC (MJPEG, mjpeg);
+    REGISTER_DECODER(MJPEGB, mjpegb);
+    REGISTER_DECODER(MMVIDEO, mmvideo);
+#ifdef HAVE_XVMC
+    REGISTER_DECODER(MPEG_XVMC, mpeg_xvmc);
+#endif
+    REGISTER_ENCDEC (MPEG1VIDEO, mpeg1video);
+    REGISTER_ENCDEC (MPEG2VIDEO, mpeg2video);
+    REGISTER_ENCDEC (MPEG4, mpeg4);
+    REGISTER_DECODER(MPEGVIDEO, mpegvideo);
+    REGISTER_ENCDEC (MSMPEG4V1, msmpeg4v1);
+    REGISTER_ENCDEC (MSMPEG4V2, msmpeg4v2);
+    REGISTER_ENCDEC (MSMPEG4V3, msmpeg4v3);
+    REGISTER_DECODER(MSRLE, msrle);
+    REGISTER_DECODER(MSVIDEO1, msvideo1);
+    REGISTER_DECODER(MSZH, mszh);
+    REGISTER_DECODER(NUV, nuv);
+    REGISTER_ENCODER(PAM, pam);
+    REGISTER_ENCODER(PBM, pbm);
+    REGISTER_ENCODER(PGM, pgm);
+    REGISTER_ENCODER(PGMYUV, pgmyuv);
+#ifdef CONFIG_ZLIB
+    REGISTER_ENCDEC (PNG, png);
+#endif
+    REGISTER_ENCODER(PPM, ppm);
+    REGISTER_DECODER(QDRAW, qdraw);
+    REGISTER_DECODER(QPEG, qpeg);
+    REGISTER_DECODER(QTRLE, qtrle);
+    REGISTER_ENCDEC (RAWVIDEO, rawvideo);
+    REGISTER_DECODER(ROQ, roq);
+    REGISTER_DECODER(RPZA, rpza);
+    REGISTER_ENCDEC (RV10, rv10);
+    REGISTER_ENCDEC (RV20, rv20);
+    REGISTER_DECODER(SMACKER, smacker);
+    REGISTER_DECODER(SMC, smc);
+    REGISTER_ENCDEC (SNOW, snow);
+    REGISTER_DECODER(SP5X, sp5x);
+    REGISTER_ENCDEC (SVQ1, svq1);
+    REGISTER_DECODER(SVQ3, svq3);
+    REGISTER_DECODER(TARGA, targa);
+    REGISTER_DECODER(THEORA, theora);
+    REGISTER_DECODER(TIERTEXSEQVIDEO, tiertexseqvideo);
+    REGISTER_DECODER(TIFF, tiff);
+    REGISTER_DECODER(TRUEMOTION1, truemotion1);
+    REGISTER_DECODER(TRUEMOTION2, truemotion2);
+    REGISTER_DECODER(TSCC, tscc);
+    REGISTER_DECODER(ULTI, ulti);
+    REGISTER_DECODER(VC1, vc1);
+    REGISTER_DECODER(VCR1, vcr1);
+    REGISTER_DECODER(VMDVIDEO, vmdvideo);
+    REGISTER_DECODER(VMNC, vmnc);
+    REGISTER_DECODER(VP3, vp3);
+    REGISTER_DECODER(VP5, vp5);
+    REGISTER_DECODER(VP6, vp6);
+    REGISTER_DECODER(VP6F, vp6f);
+    REGISTER_DECODER(VQA, vqa);
+    REGISTER_ENCDEC (WMV1, wmv1);
+    REGISTER_ENCDEC (WMV2, wmv2);
+    REGISTER_DECODER(WMV3, wmv3);
+    REGISTER_DECODER(WNV1, wnv1);
+#ifdef CONFIG_X264
+    REGISTER_ENCODER(X264, x264);
+#endif
+    REGISTER_DECODER(XAN_WC3, xan_wc3);
+    REGISTER_DECODER(XL, xl);
+#ifdef CONFIG_XVID
+    REGISTER_ENCODER(XVID, xvid);
+#endif
+    REGISTER_ENCDEC (ZLIB, zlib);
+#ifdef CONFIG_ZLIB
+    REGISTER_ENCDEC (ZMBV, zmbv);
+#endif
+
+    /* audio codecs */
+#ifdef CONFIG_LIBFAAD
+    REGISTER_DECODER(AAC, aac);
+    REGISTER_DECODER(MPEG4AAC, mpeg4aac);
+#endif
+#ifdef CONFIG_LIBA52
+    REGISTER_DECODER(AC3, ac3);
+#endif
+    REGISTER_ENCODER(AC3, ac3);
+    REGISTER_DECODER(ALAC, alac);
+#if defined(CONFIG_AMR_NB) || defined(CONFIG_AMR_NB_FIXED)
+    REGISTER_ENCDEC (AMR_NB, amr_nb);
+#endif
+#ifdef CONFIG_AMR_WB
+    REGISTER_ENCDEC (AMR_WB, amr_wb);
+#endif
+    REGISTER_DECODER(COOK, cook);
+    REGISTER_DECODER(DSICINAUDIO, dsicinaudio);
+#ifdef CONFIG_LIBDTS
+    REGISTER_DECODER(DTS, dts);
+#endif
+#ifdef CONFIG_LIBFAAC
+    REGISTER_ENCODER(FAAC, faac);
+#endif
+    REGISTER_ENCDEC (FLAC, flac);
+    REGISTER_DECODER(IMC, imc);
+#ifdef CONFIG_LIBGSM
+    REGISTER_ENCDEC (LIBGSM, libgsm);
+#endif
+    REGISTER_DECODER(MACE3, mace3);
+    REGISTER_DECODER(MACE6, mace6);
+    REGISTER_ENCDEC (MP2, mp2);
+    REGISTER_DECODER(MP3, mp3);
+    REGISTER_DECODER(MP3ADU, mp3adu);
+#ifdef CONFIG_LIBMP3LAME
+    REGISTER_ENCODER(MP3LAME, mp3lame);
+#endif
+    REGISTER_DECODER(MP3ON4, mp3on4);
+    REGISTER_DECODER(MPC7, mpc7);
+#ifdef CONFIG_LIBVORBIS
+    if (!ENABLE_VORBIS_ENCODER)  REGISTER_ENCODER(OGGVORBIS, oggvorbis);
+    if (!ENABLE_VORBIS_DECODER)  REGISTER_DECODER(OGGVORBIS, oggvorbis);
+#endif
+    REGISTER_DECODER(QDM2, qdm2);
+    REGISTER_DECODER(RA_144, ra_144);
+    REGISTER_DECODER(RA_288, ra_288);
+    REGISTER_DECODER(SHORTEN, shorten);
+    REGISTER_DECODER(SMACKAUD, smackaud);
+    REGISTER_ENCDEC (SONIC, sonic);
+    REGISTER_ENCODER(SONIC_LS, sonic_ls);
+    REGISTER_DECODER(TRUESPEECH, truespeech);
+    REGISTER_DECODER(TTA, tta);
+    REGISTER_DECODER(VMDAUDIO, vmdaudio);
+    REGISTER_ENCDEC (VORBIS, vorbis);
+    REGISTER_DECODER(WAVPACK, wavpack);
+    REGISTER_DECODER(WMAV1, wmav1);
+    REGISTER_DECODER(WMAV2, wmav2);
+    REGISTER_DECODER(WS_SND1, ws_snd1);
+
+    /* pcm codecs */
+    REGISTER_ENCDEC (PCM_ALAW, pcm_alaw);
+    REGISTER_ENCDEC (PCM_MULAW, pcm_mulaw);
+    REGISTER_ENCDEC (PCM_S8, pcm_s8);
+    REGISTER_ENCDEC (PCM_S16BE, pcm_s16be);
+    REGISTER_ENCDEC (PCM_S16LE, pcm_s16le);
+    REGISTER_ENCDEC (PCM_S24BE, pcm_s24be);
+    REGISTER_ENCDEC (PCM_S24DAUD, pcm_s24daud);
+    REGISTER_ENCDEC (PCM_S24LE, pcm_s24le);
+    REGISTER_ENCDEC (PCM_S32BE, pcm_s32be);
+    REGISTER_ENCDEC (PCM_S32LE, pcm_s32le);
+    REGISTER_ENCDEC (PCM_U8, pcm_u8);
+    REGISTER_ENCDEC (PCM_U16BE, pcm_u16be);
+    REGISTER_ENCDEC (PCM_U16LE, pcm_u16le);
+    REGISTER_ENCDEC (PCM_U24BE, pcm_u24be);
+    REGISTER_ENCDEC (PCM_U24LE, pcm_u24le);
+    REGISTER_ENCDEC (PCM_U32BE, pcm_u32be);
+    REGISTER_ENCDEC (PCM_U32LE, pcm_u32le);
+
+    /* dpcm codecs */
+    REGISTER_DECODER(INTERPLAY_DPCM, interplay_dpcm);
+    REGISTER_DECODER(ROQ_DPCM, roq_dpcm);
+    REGISTER_DECODER(SOL_DPCM, sol_dpcm);
+    REGISTER_DECODER(XAN_DPCM, xan_dpcm);
+
+    /* adpcm codecs */
+    REGISTER_ENCDEC (ADPCM_4XM, adpcm_4xm);
+    REGISTER_ENCDEC (ADPCM_ADX, adpcm_adx);
+    REGISTER_ENCDEC (ADPCM_CT, adpcm_ct);
+    REGISTER_ENCDEC (ADPCM_EA, adpcm_ea);
+    REGISTER_ENCDEC (ADPCM_G726, adpcm_g726);
+    REGISTER_ENCDEC (ADPCM_IMA_DK3, adpcm_ima_dk3);
+    REGISTER_ENCDEC (ADPCM_IMA_DK4, adpcm_ima_dk4);
+    REGISTER_ENCDEC (ADPCM_IMA_QT, adpcm_ima_qt);
+    REGISTER_ENCDEC (ADPCM_IMA_SMJPEG, adpcm_ima_smjpeg);
+    REGISTER_ENCDEC (ADPCM_IMA_WAV, adpcm_ima_wav);
+    REGISTER_ENCDEC (ADPCM_IMA_WS, adpcm_ima_ws);
+    REGISTER_ENCDEC (ADPCM_MS, adpcm_ms);
+    REGISTER_ENCDEC (ADPCM_SBPRO_2, adpcm_sbpro_2);
+    REGISTER_ENCDEC (ADPCM_SBPRO_3, adpcm_sbpro_3);
+    REGISTER_ENCDEC (ADPCM_SBPRO_4, adpcm_sbpro_4);
+    REGISTER_ENCDEC (ADPCM_SWF, adpcm_swf);
+    REGISTER_ENCDEC (ADPCM_XA, adpcm_xa);
+    REGISTER_ENCDEC (ADPCM_YAMAHA, adpcm_yamaha);
+
+    /* subtitles */
+    REGISTER_ENCDEC (DVBSUB, dvbsub);
+    REGISTER_ENCDEC (DVDSUB, dvdsub);
+
+    /* parsers */
+    REGISTER_PARSER (AAC, aac);
+    REGISTER_PARSER (AC3, ac3);
+    REGISTER_PARSER (CAVSVIDEO, cavsvideo);
+    REGISTER_PARSER (DVBSUB, dvbsub);
+    REGISTER_PARSER (DVDSUB, dvdsub);
+    REGISTER_PARSER (H261, h261);
+    REGISTER_PARSER (H263, h263);
+    REGISTER_PARSER (H264, h264);
+    REGISTER_PARSER (MJPEG, mjpeg);
+    REGISTER_PARSER (MPEG4VIDEO, mpeg4video);
+    REGISTER_PARSER (MPEGAUDIO, mpegaudio);
+    REGISTER_PARSER (MPEGVIDEO, mpegvideo);
+    REGISTER_PARSER (PNM, pnm);
+
+    /*
+    av_register_bitstream_filter(&dump_extradata_bsf);
+    av_register_bitstream_filter(&remove_extradata_bsf);
+    av_register_bitstream_filter(&noise_bsf);
+    av_register_bitstream_filter(&mp3_header_compress_bsf);
+    av_register_bitstream_filter(&mp3_header_decompress_bsf);
+    av_register_bitstream_filter(&mjpega_dump_header_bsf);
+    */
 }
+
 #endif
 
 void init_once_routine(void) {
author	Miguel Freitas <miguelfreitas@users.sourceforge.net>	2007-01-13 21:19:52 +0000
committer	Miguel Freitas <miguelfreitas@users.sourceforge.net>	2007-01-13 21:19:52 +0000
commit	6e8ff6e5c232de4b8235626af31ab85345120a93 (patch)
tree	25930156aa9f4f2014bf6fe3d65c183262626b8d /src
parent	2f5905081ee2040537f043fe4afabbb66d26354e (diff)
download	xine-lib-6e8ff6e5c232de4b8235626af31ab85345120a93.tar.gz xine-lib-6e8ff6e5c232de4b8235626af31ab85345120a93.tar.bz2