78 files changed, 9884 insertions, 884 deletions
diff --git a/CREDITS b/CREDITS
index a2c7ef6fb..fb2f15235 100644
--- a/CREDITS
+++ b/CREDITS
@@ -12,7 +12,7 @@ updates (the word 'maintainer' is intentionally avoided here).
 project				version			mediator
 -----------------------------------------------------------------------
 
-ffmpeg				51.11.0			Mike Melanson
+ffmpeg				51.28.0			Mike Melanson
 goom				2k4-0
 gsm610				1.0.10			Mike Melanson
 liba52				0.7.4
diff --git a/ChangeLog b/ChangeLog
index ce7ad6fc9..e546ebde1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -42,6 +42,16 @@ xine-lib (1.1.4)
   * Implement a True Audio files demuxer. [bug #1586381]
   * Allow decoding of MusePack SV 7.x files (7.1 files at least play fine).
   * Fix demuxing of uncompressed VobSub subtitles in Matroska files
+  * ffmpeg update to 51.28.0
+  * Workaround ffmpeg buggy codecs that don't release their DR1 frames.
+  * Fix several segfaults and freezing problem with H264 streams that use a lot
+    of reference frames (eg. 15)
+  * Initial support to enable/disable ffmpeg codecs. Codecs may be disabled in
+    groups by --disable-ffmpeg-uncommon-codecs/--disable-ffmpeg-popular-codecs
+    Think of "uncommon" codecs what people would never want to play with their
+    PDAs (they will save memory by removing them).
+    Note: currently both uncommon/popular codecs are _build_ but disabled.
+    that is, build system still need some improvements to really save memory.
 
 xine-lib (1.1.3)
   * Security fixes:
diff --git a/configure.ac b/configure.ac
index 0442334a5..1027aa604 100644
--- a/configure.ac
+++ b/configure.ac
@@ -208,7 +208,6 @@ AC_CHECK_GENERATE_INTTYPES([include])
 AM_CONDITIONAL(GENERATED_INTTYPES_H, test x"$ac_cv_header_inttypes_h" != x"yes")
 AC_CHECK_TYPE(ssize_t, :, AC_DEFINE(ssize_t, __int64, [define ssize_t to __int64 if it's missing in default includes]))
 
-
 dnl ---------------------------------------------
 dnl threads and OS specific stuff
 dnl ---------------------------------------------
@@ -393,13 +392,78 @@ use internal ffmpeg.
 *********************************************************************])
 else
   AC_MSG_RESULT([using included ffmpeg])
-  LIBFFMPEG_CPPFLAGS="-DSIMPLE_IDCT -DHAVE_AV_CONFIG_H -DRUNTIME_CPUDETECT -DCONFIG_RISKY -DCONFIG_DECODERS -DXINE_MPEG_ENCODER -DCONFIG_ZLIB -DCONFIG_GPL -DCONFIG_MP3_DECODER -DCONFIG_MP2_DECODER -DCONFIG_DVVIDEO_DECODER"
-  AC_SUBST([LIBFFMPEG_CPPFLAGS])
-
+  LIBFFMPEG_CPPFLAGS="-DHAVE_AV_CONFIG_H -DRUNTIME_CPUDETECT -DXINE_MPEG_ENCODER -D_ISOC9X_SOURCE -DCONFIG_DECODERS"
   AC_CHECK_TYPES(int_fast8_t, [], [LIBFFMPEG_CPPFLAGS="$LIBFFMPEG_CPPFLAGS -DEMULATE_FAST_INT"])
+  AC_SUBST([LIBFFMPEG_CPPFLAGS])
 fi
 AM_CONDITIONAL(HAVE_FFMPEG, test "x$with_external_ffmpeg" = "xyes")
 
+
+AC_ARG_ENABLE([ffmpeg_uncommon_codecs],
+	AS_HELP_STRING([--disable-ffmpeg-uncommon-codecs], [don't build uncommon ffmpeg codecs]))
+
+AC_ARG_ENABLE([ffmpeg_popular_codecs],
+	AS_HELP_STRING([--disable-ffmpeg-popular-codecs], [don't build popular ffmpeg codecs]))
+
+ffmpeg_config_h=src/libffmpeg/ffmpeg_config.h
+echo "/* Automatically generated */" > $ffmpeg_config_h
+
+dnl uncommon ffmpeg codecs
+ffmpeg_uncommon_codecs="AASC ASV1 ASV2 AVS CSCD CYUV DVVIDEO EIGHTBPS FLIC FLV FOURXM FRAPS HUFFYUV IDCIN INTERPLAY_VIDEO KMVC LOCO MMVIDEO NUV QDRAW QPEG ROQ RPZA SMACKER SMC SNOW TRUEMOTION1 TRUEMOTION2 TSCC ULTI VCR1 VMDVIDEO WNV1 XAN_WC3 XL ZMBV ALAC AMR_NB AMR_WB LIBGSM MACE3 MACE6 SHORTEN SMACKAUD TRUESPEECH TTA VMDAUDIO PCM_ALAW PCM_MULAW PCM_S8 PCM_S16BE PCM_S16LE PCM_S24BE PCM_S24DAUD PCM_S24LE PCM_S32BE PCM_S32LE PCM_U8 PCM_U16BE PCM_U16LE PCM_U24BE PCM_U24LE PCM_U32BE PCM_U32LE INTERPLAY_DPCM ROQ_DPCM SOL_DPCM VQA XAN_DPCM ADPCM_4XM ADPCM_CT ADPCM_EA ADPCM_IMA_DK3 ADPCM_IMA_DK4 ADPCM_IMA_QT ADPCM_IMA_SMJPEG ADPCM_IMA_WAV ADPCM_IMA_WS ADPCM_MS ADPCM_SBPRO_2 ADPCM_SBPRO_3 ADPCM_SBPRO_4 ADPCM_XA ADPCM_YAMAHA"
+for ucname in $ffmpeg_uncommon_codecs; do
+  config_name="CONFIG_${ucname}_DECODER"
+  enabled_name="ENABLE_${ucname}_DECODER"
+  
+  if test x$enable_ffmpeg_uncommon_codecs != xno; then
+    echo "#define $config_name 1" >> $ffmpeg_config_h
+    echo "#define $enabled_name 1" >> $ffmpeg_config_h
+  else
+    echo "#define $enabled_name 0" >> $ffmpeg_config_h
+  fi
+done
+
+dnl popular ffmpeg codecs
+ffmpeg_popular_codecs="CINEPAK FLASHSV H261 H263 H263I H264 INDEO2 INDEO3 MJPEG MJPEGB MPEG1VIDEO MPEG2VIDEO MPEG4 MPEGVIDEO MSMPEG4V1 MSMPEG4V2 MSMPEG4V3 MSRLE MSVIDEO1 QTRLE RV10 RV20 SVQ1 SVQ3 VC1 VP3 VP5 VP6 VP6F WMV1 WMV2 WMV3 COOK DTS FLAC MP2 MP3 QDM2 RA_144 RA_288 WAVPACK WMAV1 WMAV2 ADPCM_SWF"
+
+for ucname in $ffmpeg_popular_codecs; do
+  config_name="CONFIG_${ucname}_DECODER"
+  enabled_name="ENABLE_${ucname}_DECODER"
+  
+  if test x$enable_ffmpeg_popular_codecs != xno; then
+    echo "#define $config_name 1" >> $ffmpeg_config_h
+    echo "#define $enabled_name 1" >> $ffmpeg_config_h
+  else
+    echo "#define $enabled_name 0" >> $ffmpeg_config_h
+  fi
+done
+
+dnl disabled ffmpeg codecs
+ffmpeg_disabled_codecs="BMP CAVS CLJR DSICINVIDEO FFV1 FFVHUFF GIF MDEC MPEG_XVMC MSZH PNG RAWVIDEO SP5X TARGA TIERTEXSEQVIDEO TIFF VMNC ZLIB DSICINAUDIO IMC MP3ADU MP3ON4 MPC7 SONIC WS_SND1 ADPCM_ADX ADPCM_G726 DVBSUB DVDSUB THEORA AAC MPEG4AAC AC3 VORBIS"
+for ucname in $ffmpeg_disabled_codecs; do
+  config_name="CONFIG_${ucname}_DECODER"
+  enabled_name="ENABLE_${ucname}_DECODER"
+  
+  echo "#define $enabled_name 0" >> $ffmpeg_config_h
+done
+
+dnl disabled ffmpeg encoders
+ffmpeg_extra_encoders="H263P JPEGLS LJPEG PAM PBM PGM PGMYUV PPM SONIC_LS"
+for ucname in $ffmpeg_uncommon_codecs $ffmpeg_popular_codecs $ffmpeg_disabled_codecs $ffmpeg_extra_encoders; do
+  config_name="CONFIG_${ucname}_ENCODER"
+  enabled_name="ENABLE_${ucname}_ENCODER"
+  
+  echo "#define $enabled_name 0" >> $ffmpeg_config_h
+done
+
+dnl disabled parsers
+ffmpeg_parsers="AAC AC3 CAVSVIDEO DVBSUB DVDSUB H261 H263 H264 MJPEG MPEG4VIDEO MPEGAUDIO MPEGVIDEO PNM"
+for ucname in $ffmpeg_parsers; do
+  config_name="CONFIG_${ucname}_PARSER"
+  enabled_name="ENABLE_${ucname}_PARSER"
+  
+  echo "#define $enabled_name 0" >> $ffmpeg_config_h
+done
+
 LIBMPEG2_CFLAGS=""
 
 AC_CHECK_DECL(lrintf,[
@@ -415,6 +479,8 @@ AC_CHECK_DECL(rintf,[
 #include <math.h>
 ])
 
+AC_CHECK_FUNCS(memalign)
+
 AC_ARG_ENABLE([altivec],
 	AS_HELP_STRING([--disable-altivec], [do not use assembly codes for Motorola 74xx CPUs]))
 
@@ -1994,6 +2060,25 @@ if test x"$have_ip_mreqn" = "xyes"; then
 fi
 
 dnl ---------------------------------------------
+dnl restrict keyword finding (from gstreamer)
+dnl ---------------------------------------------
+restrict=""
+for restrict_keyword in restrict __restrict__ __restrict; do
+  AC_MSG_CHECKING(for restrict keyword $restrict_keyword)
+  AC_TRY_COMPILE([],[ void foo(char * $restrict_keyword p); ],[
+                 KEYWORD_FOUND=yes && AC_MSG_RESULT(yes) ],[
+                 KEYWORD_FOUND=no && AC_MSG_RESULT(no) ])
+  if test x$KEYWORD_FOUND = xyes; then
+    restrict="$restrict_keyword"
+    break
+  fi
+done
+if test x$restrict = x; then
+  AC_MSG_ERROR(No restrict keyword found)
+fi
+AC_DEFINE_UNQUOTED(restrict, $restrict, [restrict keyword])
+
+dnl ---------------------------------------------
 dnl ASM ALIGN is power of two ?
 dnl ---------------------------------------------
 asmalign_pot="unknown"
@@ -2082,7 +2167,7 @@ CC_ATTRIBUTE_SENTINEL
 
 AC_OPTIMIZATIONS
 
-enable_ffmmx="no"
+arch_x86="no"
 enable_armv4l="no"
 
 case "$host_or_hostalias" in
@@ -2093,9 +2178,9 @@ case "$host_or_hostalias" in
     dnl like the extended asm() or __attribute(__cdecl__), or other direct
     dnl mmx/sse/3dnow assembler instructions. 
     dnl
-    AC_DEFINE_UNQUOTED(ARCH_X86,,[Define this if you're running x86 architecture])
+    AC_DEFINE_UNQUOTED(ARCH_X86_32,,[Define this if you're running x86 architecture 32 bits])
     AC_DEFINE(FPM_INTEL,1,[Define to select libmad fixed point arithmetic implementation])
-    enable_ffmmx="yes"
+    arch_x86="yes"
     enable_impure_text="yes"
 
     case "$host_or_hostalias" in
@@ -2105,9 +2190,9 @@ case "$host_or_hostalias" in
     esac
     ;;
   x86_64-*)
-    AC_DEFINE_UNQUOTED(ARCH_X86_64,,[Define this if you're running x86 architecture])
+    AC_DEFINE_UNQUOTED(ARCH_X86_64,,[Define this if you're running x86 architecture 64 bits])
     AC_DEFINE(FPM_64BIT,1,[Define to select libmad fixed point arithmetic implementation])
-    enable_ffmmx="yes"
+    arch_x86="yes"
     ;;
   powerpc-*-darwin*)
     dnl avoid ppc compilation crash
@@ -2186,7 +2271,12 @@ if test "x$has_vis" = "xyes"; then
 fi
 AM_CONDITIONAL(ENABLE_VIS, test x"$has_vis" = "xyes")
 
-AM_CONDITIONAL(HAVE_FFMMX, test x"$enable_ffmmx" = "xyes")
+if test "x$arch_x86" = "xyes"; then
+   AC_DEFINE_UNQUOTED(ARCH_X86,,[Define this if you're running x86 architecture])
+   AC_DEFINE_UNQUOTED(HAVE_MMX,,[Define this if you can compile MMX asm instructions])
+fi
+AM_CONDITIONAL(ARCH_X86, test x"$arch_x86" = "xyes")
+AM_CONDITIONAL(HAVE_MMX, test x"$arch_x86" = "xyes")
 
 case $host_os in
   darwin*)
diff --git a/src/libffmpeg/audio_decoder.c b/src/libffmpeg/audio_decoder.c
index 22f567e9c..8f0425775 100644
--- a/src/libffmpeg/audio_decoder.c
+++ b/src/libffmpeg/audio_decoder.c
@@ -17,7 +17,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
  *
- * $Id: audio_decoder.c,v 1.31 2006/12/26 03:20:12 dgp85 Exp $
+ * $Id: audio_decoder.c,v 1.32 2007/01/13 21:19:52 miguelfreitas Exp $
  *
  * xine audio decoder plugin using ffmpeg
  *
@@ -25,6 +25,7 @@
  
 #ifdef HAVE_CONFIG_H
 #include "config.h"
+#include "ffmpeg_config.h"
 #endif
 
 #include <stdlib.h>
@@ -107,8 +108,8 @@ static const ff_codec_t ff_audio_lookup[] = {
   {BUF_AUDIO_TRUESPEECH, CODEC_ID_TRUESPEECH,     "TrueSpeech (ffmpeg)"},
   {BUF_AUDIO_TTA,        CODEC_ID_TTA,            "True Audio Lossless (ffmpeg)"},
   {BUF_AUDIO_SMACKER,    CODEC_ID_SMACKAUDIO,     "Smacker (ffmpeg)"},
-  {BUF_AUDIO_FLVADPCM,   CODEC_ID_ADPCM_SWF,			"Flash ADPCM (ffmpeg)"},
-  {BUF_AUDIO_WAVPACK,	 CODEC_ID_WAVPACK,	   "WavPack (ffmpeg)"},
+  {BUF_AUDIO_FLVADPCM,   CODEC_ID_ADPCM_SWF,	  "Flash ADPCM (ffmpeg)"},
+  {BUF_AUDIO_WAVPACK,	 CODEC_ID_WAVPACK,	  "WavPack (ffmpeg)"},
 };
 
 
@@ -443,39 +444,106 @@ void *init_audio_plugin (xine_t *xine, void *data) {
 }
 
 static uint32_t supported_audio_types[] = { 
+  #ifdef CONFIG_WMAV1_DECODER
   BUF_AUDIO_WMAV1,
+  #endif
+  #ifdef CONFIG_WMAV2_DECODER
   BUF_AUDIO_WMAV2,
+  #endif
+  #ifdef CONFIG_RA_144_DECODER
   BUF_AUDIO_14_4,
+  #endif
+  #ifdef CONFIG_RA_288_DECODER
   BUF_AUDIO_28_8,
-  BUF_AUDIO_MULAW,
-  BUF_AUDIO_ALAW,
+  #endif
+  #ifdef CONFIG_MP3_DECODER
+  BUF_AUDIO_MPEG,
+  #endif
+  #ifdef CONFIG_ADPCM_MS_DECODER
   BUF_AUDIO_MSADPCM,
+  #endif
+  #ifdef CONFIG_ADPCM_IMA_QT_DECODER
   BUF_AUDIO_QTIMAADPCM,
+  #endif
+  #ifdef CONFIG_ADPCM_IMA_WAV_DECODER
   BUF_AUDIO_MSIMAADPCM,
+  #endif
+  #ifdef CONFIG_ADPCM_IMA_DK3_DECODER
   BUF_AUDIO_DK3ADPCM,
+  #endif
+  #ifdef CONFIG_ADPCM_IMA_DK4_DECODER
   BUF_AUDIO_DK4ADPCM,
+  #endif
+  #ifdef CONFIG_ADPCM_IMA_WS_DECODER
+  BUF_AUDIO_VQA_IMA,
+  #endif
+  #ifdef CONFIG_ADPCM_IMA_SMJPEG_DECODER
+  BUF_AUDIO_SMJPEG_IMA,
+  #endif
+  #ifdef CONFIG_ADPCM_XA_DECODER
   BUF_AUDIO_XA_ADPCM,
+  #endif
+  #ifdef CONFIG_ADPCM_4XM_DECODER
+  BUF_AUDIO_4X_ADPCM,
+  #endif
+  #ifdef CONFIG_ADPCM_EA_DECODER
+  BUF_AUDIO_EA_ADPCM,
+  #endif
+  #ifdef CONFIG_PCM_MULAW_DECODER
+  BUF_AUDIO_MULAW,
+  #endif
+  #ifdef CONFIG_PCM_ALAW_DECODER
+  BUF_AUDIO_ALAW,
+  #endif
+  #ifdef CONFIG_ROQ_DPCM_DECODER
   BUF_AUDIO_ROQ,
+  #endif
+  #ifdef CONFIG_INTERPLAY_DPCM_DECODER
   BUF_AUDIO_INTERPLAY,
-  BUF_AUDIO_VQA_IMA,
-  BUF_AUDIO_4X_ADPCM,
+  #endif
+  #ifdef CONFIG_MACE3_DECODER
   BUF_AUDIO_MAC3,
+  #endif
+  #ifdef CONFIG_MACE6_DECODER
   BUF_AUDIO_MAC6,
+  #endif
+  #ifdef CONFIG_XAN_DPCM_DECODER
   BUF_AUDIO_XAN_DPCM,
+  #endif
+  #ifdef CONFIG_VMDAUDIO_DECODER
   BUF_AUDIO_VMD,
-  BUF_AUDIO_EA_ADPCM,
-  BUF_AUDIO_SMJPEG_IMA,
+  #endif
+  #ifdef CONFIG_FLAC_DECODER
   BUF_AUDIO_FLAC,
-  BUF_AUDIO_ALAC,
+  #endif
+  #ifdef CONFIG_SHORTEN_DECODER
   BUF_AUDIO_SHORTEN,
-  BUF_AUDIO_MPEG,
+  #endif
+  #ifdef CONFIG_ALAC_DECODER
+  BUF_AUDIO_ALAC,
+  #endif
+  #ifdef CONFIG_QDM2_DECODER
   BUF_AUDIO_QDESIGN2,
+  #endif
+  #ifdef CONFIG_COOK_DECODER
   BUF_AUDIO_COOK,
+  #endif
+  #ifdef CONFIG_TRUESPEECH_DECODER
   BUF_AUDIO_TRUESPEECH,
+  #endif
+  #ifdef CONFIG_TTA_DECODER
   BUF_AUDIO_TTA,
+  #endif
+  #ifdef CONFIG_SMACKAUDIO_DECODER
   BUF_AUDIO_SMACKER,
+  #endif
+  #ifdef CONFIG_ADPCM_SWF_DECODER
   BUF_AUDIO_FLVADPCM,
+  #endif
+  #ifdef CONFIG_WAVPACK_DECODER
   BUF_AUDIO_WAVPACK,
+  #endif
+  
   0
 };
 
diff --git a/src/libffmpeg/diff_to_ffmpeg_cvs.txt b/src/libffmpeg/diff_to_ffmpeg_cvs.txt
index 7e19e643c..b813b3ab2 100644
--- a/src/libffmpeg/diff_to_ffmpeg_cvs.txt
+++ b/src/libffmpeg/diff_to_ffmpeg_cvs.txt
@@ -1,74 +1,79 @@
-Index: libavcodec/avcodec.h
+Index: libavutil/internal.h
 ===================================================================
---- libavcodec/avcodec.h	(revision 7221)
-+++ libavcodec/avcodec.h	(working copy)
-@@ -47,6 +47,13 @@
- #define AV_TIME_BASE            1000000
- #define AV_TIME_BASE_Q          (AVRational){1, AV_TIME_BASE}
+--- libavutil/internal.h	(revision 7433)
++++ libavutil/internal.h	(working copy)
+@@ -181,11 +181,15 @@
+ #include <assert.h>
  
-+/* FIXME: We cannot use ffmpeg's XvMC capabilities, since that would require
-+ * linking the ffmpeg plugin against XvMC libraries, which is a bad thing,
-+ * since they are output dependend.
-+ * The correct fix would be to reimplement the XvMC functions libavcodec uses
-+ * and do the necessary talking with our XvMC output plugin there. */
-+#undef HAVE_XVMC
-+
- enum CodecID {
-     CODEC_ID_NONE,
-     CODEC_ID_MPEG1VIDEO,
-@@ -2686,6 +2693,13 @@
+ /* dprintf macros */
+-#ifdef DEBUG
+-#    define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__)
+-#else
+-#    define dprintf(fmt,...)
+-#endif
++#    ifdef DEBUG
++#        ifdef __GNUC__
++#            define dprintf(fmt,args...) av_log(NULL, AV_LOG_DEBUG, fmt, ##args)
++#        else
++#            define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__)
++#        endif
++#    else
++#        define dprintf(fmt,...)
++#    endif
  
- extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
+ #define av_abort()      do { av_log(NULL, AV_LOG_ERROR, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0)
  
-+/* unused static macro */
-+#if defined(__GNUC__) && !defined(DEBUG)
-+/* since we do not compile the encoder part of ffmpeg, some static
-+ * functions will be unused; this is ok, the compiler will take care */
-+#  define static static __attribute__((__unused__))
-+#endif
-+
- #ifdef __cplusplus
- }
- #endif
-Index: libavcodec/dsputil.h
+Index: libavutil/integer.c
 ===================================================================
---- libavcodec/dsputil.h	(revision 7221)
-+++ libavcodec/dsputil.h	(working copy)
-@@ -33,6 +33,9 @@
- #include "common.h"
- #include "avcodec.h"
- 
-+#if defined(ARCH_X86) || defined(ARCH_X86_64)
-+#define HAVE_MMX 1
-+#endif
+--- libavutil/integer.c	(revision 7433)
++++ libavutil/integer.c	(working copy)
+@@ -126,8 +126,8 @@
+     AVInteger quot_temp;
+     if(!quot) quot = &quot_temp;
  
- //#define DEBUG
- /* dct code */
-Index: libavcodec/motion_est.c
+-    assert((int16_t)a[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b[AV_INTEGER_SIZE-1] >= 0);
+-    assert(av_log2(b)>=0);
++    assert((int16_t)a.v[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b.v[AV_INTEGER_SIZE-1] >= 0);
++    assert(av_log2_i(b)>=0);
+ 
+     if(i > 0)
+         b= av_shr_i(b, -i);
+Index: libavutil/common.h
 ===================================================================
---- libavcodec/motion_est.c	(revision 7221)
-+++ libavcodec/motion_est.c	(working copy)
-@@ -23,6 +23,9 @@
-  * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
-  */
+--- libavutil/common.h	(revision 7433)
++++ libavutil/common.h	(working copy)
+@@ -345,4 +345,27 @@
+ char *av_strdup(const char *s);
+ void av_freep(void *ptr);
  
-+/* motion estimation only needed for encoders */
-+#ifdef CONFIG_ENCODERS
++/* xine: inline causes trouble for debug compiling */
++#ifdef DISABLE_INLINE
++# ifdef inline
++#  undef inline
++# endif
++# ifdef always_inline
++#  undef always_inline
++# endif
++# define inline
++# define always_inline
++#endif
 +
- /**
-  * @file motion_est.c
-  * Motion estimation.
-@@ -2112,3 +2115,5 @@
-         }
-     }
- }
++/* xine: define ASMALIGN here since it's cleaner that generating it in the configure */
++#if HAVE_ASMALIGN_POT
++# define ASMALIGN(ZEROBITS) ".align " #ZEROBITS "\n\t"
++#else
++# define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t"
++#endif
++
++/* xine: another config.h with codecs to use */
++#include "ffmpeg_config.h"
++
+ #endif /* COMMON_H */
 +
-+#endif /* CONFIG_ENCODERS */
 Index: libavcodec/mjpeg.c
 ===================================================================
-diff -u -r1.38 mjpeg.c
---- libavcodec/mjpeg.c	4 Dec 2006 22:25:19 -0000	1.38
-+++ libavcodec/mjpeg.c	30 Dec 2006 22:21:34 -0000
+--- libavcodec/mjpeg.c	(revision 7433)
++++ libavcodec/mjpeg.c	(working copy)
 @@ -38,6 +38,13 @@
  #include "mpegvideo.h"
  #include "bytestream.h"
@@ -83,27 +88,61 @@ diff -u -r1.38 mjpeg.c
  /* use two quantizer tables (one for luminance and one for chrominance) */
  /* not yet working */
  #undef TWOMATRIXES
-Index: libavcodec/mpeg12.c
+Index: libavcodec/i386/dsputil_mmx.c
 ===================================================================
---- libavcodec/mpeg12.c	(revision 7221)
-+++ libavcodec/mpeg12.c	(working copy)
-@@ -36,6 +36,13 @@
- //#include <assert.h>
- 
- 
-+/* if xine's MPEG encoder is enabled, enable the encoding features in
-+ * this particular module */
-+#if defined(XINE_MPEG_ENCODER) && !defined(CONFIG_ENCODERS)
-+#define CONFIG_ENCODERS
-+#endif
+--- libavcodec/i386/dsputil_mmx.c	(revision 7433)
++++ libavcodec/i386/dsputil_mmx.c	(working copy)
+@@ -2545,33 +2545,39 @@
+                 "pmullw %%mm5, %%mm2 \n\t" // (s-dx)*dy
+                 "pmullw %%mm4, %%mm1 \n\t" // dx*(s-dy)
+ 
+-                "movd   %4,    %%mm5 \n\t"
+-                "movd   %3,    %%mm4 \n\t"
++                "movd   %3,    %%mm5 \n\t"
++                "movd   %2,    %%mm4 \n\t"
+                 "punpcklbw %%mm7, %%mm5 \n\t"
+                 "punpcklbw %%mm7, %%mm4 \n\t"
+                 "pmullw %%mm5, %%mm3 \n\t" // src[1,1] * dx*dy
+                 "pmullw %%mm4, %%mm2 \n\t" // src[0,1] * (s-dx)*dy
+ 
+-                "movd   %2,    %%mm5 \n\t"
+-                "movd   %1,    %%mm4 \n\t"
++                "movd   %1,    %%mm5 \n\t"
++                "movd   %0,    %%mm4 \n\t"
+                 "punpcklbw %%mm7, %%mm5 \n\t"
+                 "punpcklbw %%mm7, %%mm4 \n\t"
+                 "pmullw %%mm5, %%mm1 \n\t" // src[1,0] * dx*(s-dy)
+                 "pmullw %%mm4, %%mm0 \n\t" // src[0,0] * (s-dx)*(s-dy)
+-                "paddw  %5,    %%mm1 \n\t"
++                "paddw  %4,    %%mm1 \n\t"
+                 "paddw  %%mm3, %%mm2 \n\t"
+                 "paddw  %%mm1, %%mm0 \n\t"
+                 "paddw  %%mm2, %%mm0 \n\t"
+ 
+-                "psrlw    %6,    %%mm0 \n\t"
++                "psrlw    %5,    %%mm0 \n\t"
+                 "packuswb %%mm0, %%mm0 \n\t"
+-                "movd     %%mm0, %0    \n\t"
+ 
+-                : "=m"(dst[x+y*stride])
++                : 
+                 : "m"(src[0]), "m"(src[1]),
+                   "m"(src[stride]), "m"(src[stride+1]),
+                   "m"(*r4), "m"(shift2)
+             );
++            
++            asm volatile(
++                "movd     %%mm0, %0    \n\t"
 +
-+
- /* Start codes. */
- #define SEQ_END_CODE            0x000001b7
- #define SEQ_START_CODE          0x000001b3
++                : "=m"(dst[x+y*stride])
++                : 
++            );
+             src += stride;
+         }
+         src += 4-h*stride;
 Index: libavcodec/mpegvideo.c
 ===================================================================
---- libavcodec/mpegvideo.c	(revision 7221)
+--- libavcodec/mpegvideo.c	(revision 7433)
 +++ libavcodec/mpegvideo.c	(working copy)
 @@ -40,6 +40,14 @@
  //#undef NDEBUG
@@ -163,7 +202,7 @@ Index: libavcodec/mpegvideo.c
  
          if(avctx->rc_buffer_size){
              RateControlContext *rcc= &s->rc_context;
-@@ -4575,6 +4593,8 @@
+@@ -4574,6 +4592,8 @@
      case CODEC_ID_MPEG1VIDEO:
      case CODEC_ID_MPEG2VIDEO:
          mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
@@ -172,7 +211,7 @@ Index: libavcodec/mpegvideo.c
      case CODEC_ID_MPEG4:
          mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
      case CODEC_ID_MSMPEG4V2:
-@@ -4595,6 +4615,7 @@
+@@ -4594,6 +4614,7 @@
          h263_encode_mb(s, s->block, motion_x, motion_y); break;
      case CODEC_ID_MJPEG:
          mjpeg_encode_mb(s, s->block); break;
@@ -180,7 +219,7 @@ Index: libavcodec/mpegvideo.c
      default:
          assert(0);
      }
-@@ -4816,6 +4837,8 @@
+@@ -4815,6 +4836,8 @@
                 +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
  }
  
@@ -189,7 +228,7 @@ Index: libavcodec/mpegvideo.c
  static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
      MpegEncContext *s= arg;
  
-@@ -4859,6 +4882,7 @@
+@@ -4860,6 +4883,7 @@
      }
      return 0;
  }
@@ -197,7 +236,7 @@ Index: libavcodec/mpegvideo.c
  
  static int mb_var_thread(AVCodecContext *c, void *arg){
      MpegEncContext *s= arg;
-@@ -4883,6 +4907,8 @@
+@@ -4886,6 +4910,8 @@
  }
  
  static void write_slice_end(MpegEncContext *s){
@@ -206,7 +245,7 @@ Index: libavcodec/mpegvideo.c
      if(s->codec_id==CODEC_ID_MPEG4){
          if(s->partitioned_frame){
              ff_mpeg4_merge_partitions(s);
-@@ -4892,6 +4918,7 @@
+@@ -4895,6 +4921,7 @@
      }else if(s->out_format == FMT_MJPEG){
          ff_mjpeg_stuffing(&s->pb);
      }
@@ -214,7 +253,7 @@ Index: libavcodec/mpegvideo.c
  
      align_put_bits(&s->pb);
      flush_put_bits(&s->pb);
-@@ -4945,10 +4972,13 @@
+@@ -4950,10 +4977,13 @@
      case CODEC_ID_FLV1:
          s->gob_index = ff_h263_get_gob_height(s);
          break;
@@ -228,7 +267,7 @@ Index: libavcodec/mpegvideo.c
      }
  
      s->resync_mb_x=0;
-@@ -5021,9 +5051,12 @@
+@@ -5026,9 +5056,12 @@
                      if(s->start_mb_y != mb_y || mb_x!=0){
                          write_slice_end(s);
  
@@ -241,7 +280,7 @@ Index: libavcodec/mpegvideo.c
                      }
  
                      assert((put_bits_count(&s->pb)&7) == 0);
-@@ -5047,19 +5080,25 @@
+@@ -5052,19 +5085,25 @@
                      }
  
                      switch(s->codec_id){
@@ -267,18 +306,18 @@ Index: libavcodec/mpegvideo.c
                      }
  
                      if(s->flags&CODEC_FLAG_PASS1){
-@@ -5172,7 +5211,10 @@
- 
+@@ -5286,7 +5325,10 @@
+                     backup_s.dquant = 0;
                      s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
                      s->mb_intra= 0;
 +/* xine: do not need this for decode or MPEG-1 encoding modes */
 +#if 0
-                     ff_mpeg4_set_direct_mv(s, mx, my);
+                     ff_mpeg4_set_direct_mv(s, 0, 0);
 +#endif /* #if 0 */
                      encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
-                                  &dmin, &next_block, mx, my);
+                                  &dmin, &next_block, 0, 0);
                  }
-@@ -5354,7 +5396,10 @@
+@@ -5400,7 +5442,10 @@
                      s->mb_intra= 0;
                      motion_x=s->b_direct_mv_table[xy][0];
                      motion_y=s->b_direct_mv_table[xy][1];
@@ -287,9 +326,9 @@ Index: libavcodec/mpegvideo.c
                      ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
 +#endif /* #if 0 */
                      break;
-                 case CANDIDATE_MB_TYPE_BIDIR:
-                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
-@@ -5462,8 +5507,11 @@
+                 case CANDIDATE_MB_TYPE_DIRECT0:
+                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+@@ -5513,8 +5558,11 @@
      }
  
      //not beautiful here but we must write it before flushing so it has to be here
@@ -301,7 +340,7 @@ Index: libavcodec/mpegvideo.c
  
      write_slice_end(s);
  
-@@ -5531,6 +5579,8 @@
+@@ -5582,6 +5630,8 @@
      }
  
      if(s->adaptive_quant){
@@ -310,7 +349,7 @@ Index: libavcodec/mpegvideo.c
          switch(s->codec_id){
          case CODEC_ID_MPEG4:
              ff_clean_mpeg4_qscales(s);
-@@ -5541,6 +5591,7 @@
+@@ -5592,6 +5642,7 @@
              ff_clean_h263_qscales(s);
              break;
          }
@@ -318,7 +357,7 @@ Index: libavcodec/mpegvideo.c
  
          s->lambda= s->lambda_table[0];
          //FIXME broken
-@@ -5562,10 +5613,13 @@
+@@ -5613,10 +5664,13 @@
      s->me.mb_var_sum_temp    =
      s->me.mc_mb_var_sum_temp = 0;
  
@@ -332,7 +371,7 @@ Index: libavcodec/mpegvideo.c
  
      s->me.scene_change_score=0;
  
-@@ -5596,6 +5650,8 @@
+@@ -5647,6 +5701,8 @@
          ff_update_duplicate_context(s->thread_context[i], s);
      }
  
@@ -341,7 +380,7 @@ Index: libavcodec/mpegvideo.c
      ff_init_me(s);
  
      /* Estimate motion for every MB */
-@@ -5610,6 +5666,8 @@
+@@ -5661,6 +5717,8 @@
  
          s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
      }else /* if(s->pict_type == I_TYPE) */{
@@ -350,7 +389,7 @@ Index: libavcodec/mpegvideo.c
          /* I-Frame */
          for(i=0; i<s->mb_stride*s->mb_height; i++)
              s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
-@@ -5633,6 +5691,8 @@
+@@ -5684,6 +5742,8 @@
  //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
      }
  
@@ -359,7 +398,7 @@ Index: libavcodec/mpegvideo.c
      if(!s->umvplus){
          if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
              s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
-@@ -5686,6 +5746,7 @@
+@@ -5737,6 +5797,7 @@
              }
          }
      }
@@ -367,7 +406,7 @@ Index: libavcodec/mpegvideo.c
  
      if (estimate_qp(s, 0) < 0)
          return -1;
-@@ -5717,6 +5778,8 @@
+@@ -5768,6 +5829,8 @@
  
      s->last_bits= put_bits_count(&s->pb);
      switch(s->out_format) {
@@ -376,7 +415,7 @@ Index: libavcodec/mpegvideo.c
      case FMT_MJPEG:
          mjpeg_picture_header(s);
          break;
-@@ -5745,11 +5808,15 @@
+@@ -5796,11 +5859,15 @@
          else
              h263_encode_picture_header(s, picture_number);
          break;
@@ -392,11 +431,49 @@ Index: libavcodec/mpegvideo.c
      default:
          assert(0);
      }
+Index: libavcodec/mpeg12.c
+===================================================================
+--- libavcodec/mpeg12.c	(revision 7433)
++++ libavcodec/mpeg12.c	(working copy)
+@@ -36,6 +36,13 @@
+ //#include <assert.h>
+ 
+ 
++/* if xine's MPEG encoder is enabled, enable the encoding features in
++ * this particular module */
++#if defined(XINE_MPEG_ENCODER) && !defined(CONFIG_ENCODERS)
++#define CONFIG_ENCODERS
++#endif
++
++
+ /* Start codes. */
+ #define SEQ_END_CODE            0x000001b7
+ #define SEQ_START_CODE          0x000001b3
+Index: libavcodec/motion_est.c
+===================================================================
+--- libavcodec/motion_est.c	(revision 7433)
++++ libavcodec/motion_est.c	(working copy)
+@@ -23,6 +23,9 @@
+  * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
+  */
+ 
++/* motion estimation only needed for encoders */
++#ifdef CONFIG_ENCODERS
++
+ /**
+  * @file motion_est.c
+  * Motion estimation.
+@@ -2142,3 +2145,5 @@
+         }
+     }
+ }
++
++#endif /* CONFIG_ENCODERS */
 Index: libavcodec/snow.c
 ===================================================================
---- libavcodec/snow.c	(revision 7221)
+--- libavcodec/snow.c	(revision 7433)
 +++ libavcodec/snow.c	(working copy)
-@@ -1977,6 +1977,7 @@
+@@ -1982,6 +1982,7 @@
  #define P_MV1 P[9]
  #define FLAG_QPEL   1 //must be 1
  
@@ -404,15 +481,15 @@ Index: libavcodec/snow.c
  static int encode_q_branch(SnowContext *s, int level, int x, int y){
      uint8_t p_buffer[1024];
      uint8_t i_buffer[1024];
-@@ -2205,6 +2206,7 @@
+@@ -2210,6 +2211,7 @@
          return score;
      }
  }
 +#endif
  
- static always_inline int same_block(BlockNode *a, BlockNode *b){
+ static av_always_inline int same_block(BlockNode *a, BlockNode *b){
      if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
-@@ -2319,6 +2321,7 @@
+@@ -2322,6 +2324,7 @@
      }
  }
  
@@ -420,7 +497,7 @@ Index: libavcodec/snow.c
  static void encode_blocks(SnowContext *s, int search){
      int x, y;
      int w= s->b_width;
-@@ -2340,6 +2343,7 @@
+@@ -2343,6 +2346,7 @@
          }
      }
  }
@@ -428,7 +505,7 @@ Index: libavcodec/snow.c
  
  static void decode_blocks(SnowContext *s){
      int x, y;
-@@ -3910,6 +3914,7 @@
+@@ -3931,6 +3935,7 @@
      }
  }
  
@@ -436,7 +513,7 @@ Index: libavcodec/snow.c
  static int encode_init(AVCodecContext *avctx)
  {
      SnowContext *s = avctx->priv_data;
-@@ -3997,6 +4002,7 @@
+@@ -4018,6 +4023,7 @@
  
      return 0;
  }
@@ -444,7 +521,7 @@ Index: libavcodec/snow.c
  
  static int frame_start(SnowContext *s){
     AVFrame tmp;
-@@ -4035,6 +4041,7 @@
+@@ -4056,6 +4062,7 @@
      return 0;
  }
  
@@ -452,7 +529,7 @@ Index: libavcodec/snow.c
  static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
      SnowContext *s = avctx->priv_data;
      RangeCoder * const c= &s->c;
-@@ -4288,6 +4295,7 @@
+@@ -4308,6 +4315,7 @@
  
      return ff_rac_terminate(c);
  }
@@ -460,7 +537,7 @@ Index: libavcodec/snow.c
  
  static void common_end(SnowContext *s){
      int plane_index, level, orientation, i;
-@@ -4319,6 +4327,7 @@
+@@ -4339,6 +4347,7 @@
      }
  }
  
@@ -468,7 +545,7 @@ Index: libavcodec/snow.c
  static int encode_end(AVCodecContext *avctx)
  {
      SnowContext *s = avctx->priv_data;
-@@ -4328,6 +4337,7 @@
+@@ -4348,6 +4357,7 @@
  
      return 0;
  }
@@ -476,86 +553,9 @@ Index: libavcodec/snow.c
  
  static int decode_init(AVCodecContext *avctx)
  {
-Index: libavutil/common.h
-===================================================================
---- libavutil/common.h	(revision 7221)
-+++ libavutil/common.h	(working copy)
-@@ -375,7 +375,7 @@
-         );
-         return (d << 32) | (a & 0xffffffff);
- }
--#elif defined(ARCH_X86_32)
-+#elif defined(ARCH_X86)
- static inline long long read_time(void)
- {
-         long long l;
-@@ -446,4 +446,23 @@
- char *av_strdup(const char *s);
- void av_freep(void *ptr);
- 
-+/* xine: inline causes trouble for debug compiling */
-+#ifdef DISABLE_INLINE
-+# ifdef inline
-+#  undef inline
-+# endif
-+# ifdef always_inline
-+#  undef always_inline
-+# endif
-+# define inline
-+# define always_inline
-+#endif
-+
-+/* xine: define ASMALIGN here since it's cleaner that generating it in the configure */
-+#if HAVE_ASMALIGN_POT
-+# define ASMALIGN(ZEROBITS) ".align " #ZEROBITS "\n\t"
-+#else
-+# define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t"
-+#endif
-+
- #endif /* COMMON_H */
-Index: libavutil/integer.c
-===================================================================
---- libavutil/integer.c	(revision 7221)
-+++ libavutil/integer.c	(working copy)
-@@ -126,8 +126,8 @@
-     AVInteger quot_temp;
-     if(!quot) quot = &quot_temp;
- 
--    assert((int16_t)a[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b[AV_INTEGER_SIZE-1] >= 0);
--    assert(av_log2(b)>=0);
-+    assert((int16_t)a.v[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b.v[AV_INTEGER_SIZE-1] >= 0);
-+    assert(av_log2_i(b)>=0);
- 
-     if(i > 0)
-         b= av_shr_i(b, -i);
-Index: libavutil/internal.h
-===================================================================
---- libavutil/internal.h	(revision 7221)
-+++ libavutil/internal.h	(working copy)
-@@ -93,11 +93,15 @@
- #include <assert.h>
- 
- /* dprintf macros */
--#ifdef DEBUG
--#    define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__)
--#else
--#    define dprintf(fmt,...)
--#endif
-+#    ifdef DEBUG
-+#        ifdef __GNUC__
-+#            define dprintf(fmt,args...) av_log(NULL, AV_LOG_DEBUG, fmt, ##args)
-+#        else
-+#            define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__)
-+#        endif
-+#    else
-+#        define dprintf(fmt,...)
-+#    endif
- 
- #define av_abort()      do { av_log(NULL, AV_LOG_ERROR, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0)
- 
 Index: libavcodec/mlib/dsputil_mlib.c
 ===================================================================
---- libavcodec/mlib/dsputil_mlib.c	(revision 7221)
+--- libavcodec/mlib/dsputil_mlib.c	(revision 7433)
 +++ libavcodec/mlib/dsputil_mlib.c	(working copy)
 @@ -22,6 +22,8 @@
  #include "../dsputil.h"
@@ -566,3 +566,35 @@ Index: libavcodec/mlib/dsputil_mlib.c
  #include <mlib_types.h>
  #include <mlib_status.h>
  #include <mlib_sys.h>
+Index: libavcodec/avcodec.h
+===================================================================
+--- libavcodec/avcodec.h	(revision 7433)
++++ libavcodec/avcodec.h	(working copy)
+@@ -47,6 +47,13 @@
+ #define AV_TIME_BASE            1000000
+ #define AV_TIME_BASE_Q          (AVRational){1, AV_TIME_BASE}
+ 
++/* FIXME: We cannot use ffmpeg's XvMC capabilities, since that would require
++ * linking the ffmpeg plugin against XvMC libraries, which is a bad thing,
++ * since they are output dependend.
++ * The correct fix would be to reimplement the XvMC functions libavcodec uses
++ * and do the necessary talking with our XvMC output plugin there. */
++#undef HAVE_XVMC
++
+ enum CodecID {
+     CODEC_ID_NONE,
+     CODEC_ID_MPEG1VIDEO,
+@@ -2688,6 +2695,13 @@
+ 
+ extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
+ 
++/* unused static macro */
++#if defined(__GNUC__) && !defined(DEBUG)
++/* since we do not compile the encoder part of ffmpeg, some static
++ * functions will be unused; this is ok, the compiler will take care */
++#  define static static __attribute__((__unused__))
++#endif
++
+ #ifdef __cplusplus
+ }
+ #endif
diff --git a/src/libffmpeg/libavcodec/Makefile.am b/src/libffmpeg/libavcodec/Makefile.am
index cf34b0d28..cae72eeff 100644
--- a/src/libffmpeg/libavcodec/Makefile.am
+++ b/src/libffmpeg/libavcodec/Makefile.am
@@ -4,14 +4,15 @@ SUBDIRS = armv4l i386 mlib alpha ppc sparc libpostproc
 
 # some of ffmpeg's decoders are not used by xine yet
 EXTRA_DIST = motion_est_template.c \
-	adx.c cljr.c fdctref.c ffv1.c g726.c jpeg_ls.c mdec.c raw.c snow.c svq3.c wmv2.c
+	adx.c cljr.c fdctref.c ffv1.c g726.c jpeg_ls.c mdec.c raw.c svq3.c wmv2.c
 
 # we need to compile everything in debug mode, including the encoders,
 # otherwise we get unresolved symbols, because some unsatisfied function calls
 # are not optimized away with debug optimization
-AM_CFLAGS = `test "$(CFLAGS)" = "$(DEBUG_CFLAGS)" && echo -DCONFIG_ENCODERS` -fno-strict-aliasing -DCONFIG_VC1_DECODER
+#AM_CFLAGS = `test "$(CFLAGS)" = "$(DEBUG_CFLAGS)" && echo -DCONFIG_ENCODERS` -fno-strict-aliasing
+AM_CFLAGS = `test "$(CFLAGS)" = "$(DEBUG_CFLAGS)"` -fno-strict-aliasing
 AM_CPPFLAGS = $(ZLIB_CPPFLAGS) $(LIBFFMPEG_CPPFLAGS) \
-	-I$(top_srcdir)/src/libffmpeg/libavutil
+	-I$(top_srcdir)/src/libffmpeg/libavutil -I$(top_srcdir)/src/libffmpeg
 ASFLAGS =
 
 noinst_LTLIBRARIES = libavcodec.la
@@ -94,6 +95,7 @@ libavcodec_la_SOURCES = \
 	simple_idct.c \
 	smacker.c \
 	smc.c \
+	snow.c \
 	svq1.c \
 	tscc.c \
 	truemotion1.c \
@@ -110,7 +112,12 @@ libavcodec_la_SOURCES = \
 	vorbis_data.c \
 	vp3.c \
 	vp3dsp.c \
+	vp5.c \
+        vp56.c \
+        vp56data.c \
+        vp6.c \
 	vqavideo.c \
+	wavpack.c \
 	wmadec.c \
 	wnv1.c \
 	xan.c \
@@ -175,4 +182,8 @@ noinst_HEADERS = \
 	vc1acdata.h \
 	vc1data.h \
 	vp3data.h \
+	vp56.h \
+	vp56data.h \
+	vp5data.h \
+	vp6data.h \
 	wmadata.h
diff --git a/src/libffmpeg/libavcodec/armv4l/Makefile.am b/src/libffmpeg/libavcodec/armv4l/Makefile.am
index 0f3d230f6..33e0882c9 100644
--- a/src/libffmpeg/libavcodec/armv4l/Makefile.am
+++ b/src/libffmpeg/libavcodec/armv4l/Makefile.am
@@ -6,7 +6,12 @@ ASFLAGS =
 
 noinst_LTLIBRARIES = libavcodec_armv4l.la
 
-libavcodec_armv4l_src = dsputil_arm.c jrevdct_arm.S mpegvideo_arm.c simple_idct_arm.S
+libavcodec_armv4l_src = dsputil_arm.c jrevdct_arm.S mpegvideo_arm.c simple_idct_arm.S \
+                       dsputil_arm_s.S dsputil_iwmmxt.c dsputil_iwmmxt_rnd.h  \
+                       mpegvideo_armv5te.c mpegvideo_iwmmxt.c simple_idct_armv5te.S
+
+noinst_HEADERS = mathops.h
+
 libavcodec_armv4l_dummy =  libavcodec_armv4l_dummy.c
 EXTRA_DIST =  $(libavcodec_armv4l_src) $(libavcodec_armv4l_dummy)
 
diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_arm_s.S b/src/libffmpeg/libavcodec/armv4l/dsputil_arm_s.S
new file mode 100644
index 000000000..2a3ee9c50
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/dsputil_arm_s.S
@@ -0,0 +1,696 @@
+@
+@ ARMv4L optimized DSP utils
+@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
+@
+@ This file is part of FFmpeg.
+@
+@ FFmpeg is free software; you can redistribute it and/or
+@ modify it under the terms of the GNU Lesser General Public
+@ License as published by the Free Software Foundation; either
+@ version 2.1 of the License, or (at your option) any later version.
+@
+@ FFmpeg is distributed in the hope that it will be useful,
+@ but WITHOUT ANY WARRANTY; without even the implied warranty of
+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+@ Lesser General Public License for more details.
+@
+@ You should have received a copy of the GNU Lesser General Public
+@ License along with FFmpeg; if not, write to the Free Software
+@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+@
+
+.macro  ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
+        mov \Rd0, \Rn0, lsr #(\shift * 8)
+        mov \Rd1, \Rn1, lsr #(\shift * 8)
+        mov \Rd2, \Rn2, lsr #(\shift * 8)
+        mov \Rd3, \Rn3, lsr #(\shift * 8)
+        orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
+        orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
+        orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
+        orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
+.endm
+.macro  ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
+        mov \R0, \R0, lsr #(\shift * 8)
+        orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
+        mov \R1, \R1, lsr #(\shift * 8)
+        orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
+.endm
+.macro  ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
+        mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
+        mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
+        orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
+        orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
+.endm
+
+.macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
+        @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
+        @ Rmask = 0xFEFEFEFE
+        @ Rn = destroy
+        eor \Rd0, \Rn0, \Rm0
+        eor \Rd1, \Rn1, \Rm1
+        orr \Rn0, \Rn0, \Rm0
+        orr \Rn1, \Rn1, \Rm1
+        and \Rd0, \Rd0, \Rmask
+        and \Rd1, \Rd1, \Rmask
+        sub \Rd0, \Rn0, \Rd0, lsr #1
+        sub \Rd1, \Rn1, \Rd1, lsr #1
+.endm
+
+.macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
+        @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
+        @ Rmask = 0xFEFEFEFE
+        @ Rn = destroy
+        eor \Rd0, \Rn0, \Rm0
+        eor \Rd1, \Rn1, \Rm1
+        and \Rn0, \Rn0, \Rm0
+        and \Rn1, \Rn1, \Rm1
+        and \Rd0, \Rd0, \Rmask
+        and \Rd1, \Rd1, \Rmask
+        add \Rd0, \Rn0, \Rd0, lsr #1
+        add \Rd1, \Rn1, \Rd1, lsr #1
+.endm
+
+@ ----------------------------------------------------------------
+        .align 8
+        .global put_pixels16_arm
+put_pixels16_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r11, lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        bic r1, r1, #3
+        add r5, r5, r4, lsl #2
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r7}
+        add r1, r1, r2
+        stmia r0, {r4-r7}
+        pld [r1]
+        subs r3, r3, #1
+        add r0, r0, r2
+        bne 1b
+        ldmfd sp!, {r4-r11, pc}
+        .align 8
+2:
+        ldmia r1, {r4-r8}
+        add r1, r1, r2
+        ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r9-r12}
+        add r0, r0, r2
+        bne 2b
+        ldmfd sp!, {r4-r11, pc}
+        .align 8
+3:
+        ldmia r1, {r4-r8}
+        add r1, r1, r2
+        ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r9-r12}
+        add r0, r0, r2
+        bne 3b
+        ldmfd sp!, {r4-r11, pc}
+        .align 8
+4:
+        ldmia r1, {r4-r8}
+        add r1, r1, r2
+        ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r9-r12}
+        add r0, r0, r2
+        bne 4b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+5:
+        .word 1b
+        .word 2b
+        .word 3b
+        .word 4b
+
+@ ----------------------------------------------------------------
+        .align 8
+        .global put_pixels8_arm
+put_pixels8_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r5,lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        bic r1, r1, #3
+        add r5, r5, r4, lsl #2
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r5}
+        add r1, r1, r2
+        subs r3, r3, #1
+        pld [r1]
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 1b
+        ldmfd sp!, {r4-r5,pc}
+        .align 8
+2:
+        ldmia r1, {r4-r5, r12}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 2b
+        ldmfd sp!, {r4-r5,pc}
+        .align 8
+3:
+        ldmia r1, {r4-r5, r12}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 3b
+        ldmfd sp!, {r4-r5,pc}
+        .align 8
+4:
+        ldmia r1, {r4-r5, r12}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 4b
+        ldmfd sp!, {r4-r5,pc}
+        .align 8
+5:
+        .word 1b
+        .word 2b
+        .word 3b
+        .word 4b
+
+@ ----------------------------------------------------------------
+        .align 8
+        .global put_pixels8_x2_arm
+put_pixels8_x2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        ldr r12, [r5]
+        add r5, r5, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+        pld [r1]
+        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 1b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+2:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
+        pld [r1]
+        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 2b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+3:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
+        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
+        pld [r1]
+        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 3b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+4:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
+        pld [r1]
+        RND_AVG32 r8, r9, r6, r7, r5, r10, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 4b
+        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
+        .align 8
+5:
+        .word 0xFEFEFEFE
+        .word 2b
+        .word 3b
+        .word 4b
+
+        .align 8
+        .global put_no_rnd_pixels8_x2_arm
+put_no_rnd_pixels8_x2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        ldr r12, [r5]
+        add r5, r5, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+        pld [r1]
+        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 1b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+2:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
+        pld [r1]
+        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 2b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+3:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
+        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
+        pld [r1]
+        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 3b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+4:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
+        pld [r1]
+        NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 4b
+        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
+        .align 8
+5:
+        .word 0xFEFEFEFE
+        .word 2b
+        .word 3b
+        .word 4b
+
+
+@ ----------------------------------------------------------------
+        .align 8
+        .global put_pixels8_y2_arm
+put_pixels8_y2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        mov r3, r3, lsr #1
+        ldr r12, [r5]
+        add r5, r5, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r5}
+        add r1, r1, r2
+6:      ldmia r1, {r6-r7}
+        add r1, r1, r2
+        pld [r1]
+        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+        ldmia r1, {r4-r5}
+        add r1, r1, r2
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        pld [r1]
+        RND_AVG32 r8, r9, r6, r7, r4, r5, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+2:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
+        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+        subs r3, r3, #1
+        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+3:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
+        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+        subs r3, r3, #1
+        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+4:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
+        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+        subs r3, r3, #1
+        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+
+        .align 8
+5:
+        .word 0xFEFEFEFE
+        .word 2b
+        .word 3b
+        .word 4b
+
+        .align 8
+        .global put_no_rnd_pixels8_y2_arm
+put_no_rnd_pixels8_y2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        mov r3, r3, lsr #1
+        ldr r12, [r5]
+        add r5, r5, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r5}
+        add r1, r1, r2
+6:      ldmia r1, {r6-r7}
+        add r1, r1, r2
+        pld [r1]
+        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+        ldmia r1, {r4-r5}
+        add r1, r1, r2
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        pld [r1]
+        NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+2:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
+        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+        subs r3, r3, #1
+        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+3:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
+        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+        subs r3, r3, #1
+        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+4:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
+        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+        subs r3, r3, #1
+        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+5:
+        .word 0xFEFEFEFE
+        .word 2b
+        .word 3b
+        .word 4b
+
+@ ----------------------------------------------------------------
+.macro  RND_XY2_IT align, rnd
+        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
+        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
+.if \align == 0
+        ldmia r1, {r6-r8}
+.elseif \align == 3
+        ldmia r1, {r5-r7}
+.else
+        ldmia r1, {r8-r10}
+.endif
+        add r1, r1, r2
+        pld [r1]
+.if \align == 0
+        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
+.elseif \align == 1
+        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
+        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
+.elseif \align == 2
+        ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
+        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
+.elseif \align == 3
+        ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
+.endif
+        ldr r14, [r12, #0]      @ 0x03030303
+        tst r3, #1
+        and r8, r4, r14
+        and r9, r5, r14
+        and r10, r6, r14
+        and r11, r7, r14
+.if \rnd == 1
+        ldreq r14, [r12, #16]   @ 0x02020202
+.else
+        ldreq r14, [r12, #28]   @ 0x01010101
+.endif
+        add r8, r8, r10
+        add r9, r9, r11
+        addeq r8, r8, r14
+        addeq r9, r9, r14
+        ldr r14, [r12, #20]     @ 0xFCFCFCFC >> 2
+        and r4, r14, r4, lsr #2
+        and r5, r14, r5, lsr #2
+        and r6, r14, r6, lsr #2
+        and r7, r14, r7, lsr #2
+        add r10, r4, r6
+        add r11, r5, r7
+.endm
+
+.macro RND_XY2_EXPAND align, rnd
+        RND_XY2_IT \align, \rnd
+6:      stmfd sp!, {r8-r11}
+        RND_XY2_IT \align, \rnd
+        ldmfd sp!, {r4-r7}
+        add r4, r4, r8
+        add r5, r5, r9
+        add r6, r6, r10
+        add r7, r7, r11
+        ldr r14, [r12, #24]     @ 0x0F0F0F0F
+        and r4, r14, r4, lsr #2
+        and r5, r14, r5, lsr #2
+        add r4, r4, r6
+        add r5, r5, r7
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+.endm
+
+        .align 8
+        .global put_pixels8_xy2_arm
+put_pixels8_xy2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+        adrl r12, 5f
+        ands r4, r1, #3
+        add r5, r12, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        RND_XY2_EXPAND 0, 1
+
+        .align 8
+2:
+        RND_XY2_EXPAND 1, 1
+
+        .align 8
+3:
+        RND_XY2_EXPAND 2, 1
+
+        .align 8
+4:
+        RND_XY2_EXPAND 3, 1
+
+5:
+        .word 0x03030303
+        .word 2b
+        .word 3b
+        .word 4b
+        .word 0x02020202
+        .word 0xFCFCFCFC >> 2
+        .word 0x0F0F0F0F
+        .word 0x01010101
+
+        .align 8
+        .global put_no_rnd_pixels8_xy2_arm
+put_no_rnd_pixels8_xy2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+        adrl r12, 5f
+        ands r4, r1, #3
+        add r5, r12, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        RND_XY2_EXPAND 0, 0
+
+        .align 8
+2:
+        RND_XY2_EXPAND 1, 0
+
+        .align 8
+3:
+        RND_XY2_EXPAND 2, 0
+
+        .align 8
+4:
+        RND_XY2_EXPAND 3, 0
+
+5:
+        .word 0x03030303
+        .word 2b
+        .word 3b
+        .word 4b
+        .word 0x02020202
+        .word 0xFCFCFCFC >> 2
+        .word 0x0F0F0F0F
+        .word 0x01010101
diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c
new file mode 100644
index 000000000..d7401e760
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c
@@ -0,0 +1,188 @@
+/*
+ * iWMMXt optimized DSP utils
+ * Copyright (c) 2004 AGAWA Koji
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+
+#define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
+#define SET_RND(regd)  __asm__ __volatile__ ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
+#define WAVG2B "wavg2b"
+#include "dsputil_iwmmxt_rnd.h"
+#undef DEF
+#undef SET_RND
+#undef WAVG2B
+
+#define DEF(x, y) x ## _ ## y ##_iwmmxt
+#define SET_RND(regd)  __asm__ __volatile__ ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
+#define WAVG2B "wavg2br"
+#include "dsputil_iwmmxt_rnd.h"
+#undef DEF
+#undef SET_RND
+#undef WAVG2BR
+
+// need scheduling
+#define OP(AVG)                                         \
+    asm volatile (                                      \
+        /* alignment */                                 \
+        "and r12, %[pixels], #7 \n\t"                   \
+        "bic %[pixels], %[pixels], #7 \n\t"             \
+        "tmcr wcgr1, r12 \n\t"                          \
+                                                        \
+        "wldrd wr0, [%[pixels]] \n\t"                   \
+        "wldrd wr1, [%[pixels], #8] \n\t"               \
+        "add %[pixels], %[pixels], %[line_size] \n\t"   \
+        "walignr1 wr4, wr0, wr1 \n\t"                   \
+                                                        \
+        "1: \n\t"                                       \
+                                                        \
+        "wldrd wr2, [%[pixels]] \n\t"                   \
+        "wldrd wr3, [%[pixels], #8] \n\t"               \
+        "add %[pixels], %[pixels], %[line_size] \n\t"   \
+        "pld [%[pixels]] \n\t"                          \
+        "walignr1 wr5, wr2, wr3 \n\t"                   \
+        AVG " wr6, wr4, wr5 \n\t"                       \
+        "wstrd wr6, [%[block]] \n\t"                    \
+        "add %[block], %[block], %[line_size] \n\t"     \
+                                                        \
+        "wldrd wr0, [%[pixels]] \n\t"                   \
+        "wldrd wr1, [%[pixels], #8] \n\t"               \
+        "add %[pixels], %[pixels], %[line_size] \n\t"   \
+        "walignr1 wr4, wr0, wr1 \n\t"                   \
+        "pld [%[pixels]] \n\t"                          \
+        AVG " wr6, wr4, wr5 \n\t"                       \
+        "wstrd wr6, [%[block]] \n\t"                    \
+        "add %[block], %[block], %[line_size] \n\t"     \
+                                                        \
+        "subs %[h], %[h], #2 \n\t"                      \
+        "bne 1b \n\t"                                   \
+        : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h)  \
+        : [line_size]"r"(line_size) \
+        : "memory", "r12");
+void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    OP("wavg2br");
+}
+void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    OP("wavg2b");
+}
+#undef OP
+
+void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size)
+{
+    uint8_t *pixels2 = pixels + line_size;
+
+    __asm__ __volatile__ (
+        "mov            r12, #4                 \n\t"
+        "1:                                     \n\t"
+        "pld            [%[pixels], %[line_size2]]              \n\t"
+        "pld            [%[pixels2], %[line_size2]]             \n\t"
+        "wldrd          wr4, [%[pixels]]        \n\t"
+        "wldrd          wr5, [%[pixels2]]       \n\t"
+        "pld            [%[block], #32]         \n\t"
+        "wunpckelub     wr6, wr4                \n\t"
+        "wldrd          wr0, [%[block]]         \n\t"
+        "wunpckehub     wr7, wr4                \n\t"
+        "wldrd          wr1, [%[block], #8]     \n\t"
+        "wunpckelub     wr8, wr5                \n\t"
+        "wldrd          wr2, [%[block], #16]    \n\t"
+        "wunpckehub     wr9, wr5                \n\t"
+        "wldrd          wr3, [%[block], #24]    \n\t"
+        "add            %[block], %[block], #32 \n\t"
+        "waddhss        wr10, wr0, wr6          \n\t"
+        "waddhss        wr11, wr1, wr7          \n\t"
+        "waddhss        wr12, wr2, wr8          \n\t"
+        "waddhss        wr13, wr3, wr9          \n\t"
+        "wpackhus       wr14, wr10, wr11        \n\t"
+        "wpackhus       wr15, wr12, wr13        \n\t"
+        "wstrd          wr14, [%[pixels]]       \n\t"
+        "add            %[pixels], %[pixels], %[line_size2]     \n\t"
+        "subs           r12, r12, #1            \n\t"
+        "wstrd          wr15, [%[pixels2]]      \n\t"
+        "add            %[pixels2], %[pixels2], %[line_size2]   \n\t"
+        "bne            1b                      \n\t"
+        : [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2)
+        : [line_size2]"r"(line_size << 1)
+        : "cc", "memory", "r12");
+}
+
+static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    return;
+}
+
+int mm_flags; /* multimedia extension flags */
+
+int mm_support(void)
+{
+    return 0; /* TODO, implement proper detection */
+}
+
+void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
+{
+    mm_flags = mm_support();
+
+    if (avctx->dsp_mask) {
+        if (avctx->dsp_mask & FF_MM_FORCE)
+            mm_flags |= (avctx->dsp_mask & 0xffff);
+        else
+            mm_flags &= ~(avctx->dsp_mask & 0xffff);
+    }
+
+    if (!(mm_flags & MM_IWMMXT)) return;
+
+    c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
+
+    c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
+    c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt;
+    c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt;
+    c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt;
+    c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt;
+    c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt;
+    c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt;
+    c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt;
+
+    c->put_pixels_tab[1][0] = put_pixels8_iwmmxt;
+    c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt;
+    c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt;
+    c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt;
+    c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt;
+    c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt;
+    c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt;
+    c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt;
+
+    c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt;
+    c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt;
+    c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt;
+    c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt;
+    c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt;
+
+    c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt;
+    c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt;
+    c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt;
+    c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt;
+    c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
+}
diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h
new file mode 100644
index 000000000..51ba61c47
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h
@@ -0,0 +1,1114 @@
+/*
+ * iWMMXt optimized DSP utils
+ * copyright (c) 2004 AGAWA Koji
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    __asm__ __volatile__ (
+        "and r12, %[pixels], #7 \n\t"
+        "bic %[pixels], %[pixels], #7 \n\t"
+        "tmcr wcgr1, r12 \n\t"
+        "add r4, %[pixels], %[line_size] \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "1: \n\t"
+        "wldrd wr0, [%[pixels]] \n\t"
+        "subs %[h], %[h], #2 \n\t"
+        "wldrd wr1, [%[pixels], #8] \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr3, [r4] \n\t"
+        "pld [%[pixels]] \n\t"
+        "pld [%[pixels], #32] \n\t"
+        "wldrd wr4, [r4, #8] \n\t"
+        "add r4, r4, %[line_size] \n\t"
+        "walignr1 wr8, wr0, wr1 \n\t"
+        "pld [r4] \n\t"
+        "pld [r4, #32] \n\t"
+        "walignr1 wr10, wr3, wr4 \n\t"
+        "wstrd wr8, [%[block]] \n\t"
+        "add %[block], %[block], %[line_size] \n\t"
+        "wstrd wr10, [r5] \n\t"
+        "add r5, r5, %[line_size] \n\t"
+        "bne 1b \n\t"
+        : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+        :
+        : "memory", "r4", "r5", "r12");
+}
+
+void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    __asm__ __volatile__ (
+        "and r12, %[pixels], #7 \n\t"
+        "bic %[pixels], %[pixels], #7 \n\t"
+        "tmcr wcgr1, r12 \n\t"
+        "add r4, %[pixels], %[line_size] \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "1: \n\t"
+        "wldrd wr0, [%[pixels]] \n\t"
+        "subs %[h], %[h], #2 \n\t"
+        "wldrd wr1, [%[pixels], #8] \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr3, [r4] \n\t"
+        "pld [%[pixels]] \n\t"
+        "pld [%[pixels], #32] \n\t"
+        "wldrd wr4, [r4, #8] \n\t"
+        "add r4, r4, %[line_size] \n\t"
+        "walignr1 wr8, wr0, wr1 \n\t"
+        "wldrd wr0, [%[block]] \n\t"
+        "wldrd wr2, [r5] \n\t"
+        "pld [r4] \n\t"
+        "pld [r4, #32] \n\t"
+        "walignr1 wr10, wr3, wr4 \n\t"
+        WAVG2B" wr8, wr8, wr0 \n\t"
+        WAVG2B" wr10, wr10, wr2 \n\t"
+        "wstrd wr8, [%[block]] \n\t"
+        "add %[block], %[block], %[line_size] \n\t"
+        "wstrd wr10, [r5] \n\t"
+        "pld [%[block]] \n\t"
+        "pld [%[block], #32] \n\t"
+        "add r5, r5, %[line_size] \n\t"
+        "pld [r5] \n\t"
+        "pld [r5, #32] \n\t"
+        "bne 1b \n\t"
+        : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+        :
+        : "memory", "r4", "r5", "r12");
+}
+
+void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    __asm__ __volatile__ (
+        "and r12, %[pixels], #7 \n\t"
+        "bic %[pixels], %[pixels], #7 \n\t"
+        "tmcr wcgr1, r12 \n\t"
+        "add r4, %[pixels], %[line_size] \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "1: \n\t"
+        "wldrd wr0, [%[pixels]] \n\t"
+        "wldrd wr1, [%[pixels], #8] \n\t"
+        "subs %[h], %[h], #2 \n\t"
+        "wldrd wr2, [%[pixels], #16] \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr3, [r4] \n\t"
+        "pld [%[pixels]] \n\t"
+        "pld [%[pixels], #32] \n\t"
+        "walignr1 wr8, wr0, wr1 \n\t"
+        "wldrd wr4, [r4, #8] \n\t"
+        "walignr1 wr9, wr1, wr2 \n\t"
+        "wldrd wr5, [r4, #16] \n\t"
+        "add r4, r4, %[line_size] \n\t"
+        "pld [r4] \n\t"
+        "pld [r4, #32] \n\t"
+        "walignr1 wr10, wr3, wr4 \n\t"
+        "wstrd wr8, [%[block]] \n\t"
+        "walignr1 wr11, wr4, wr5 \n\t"
+        "wstrd wr9, [%[block], #8] \n\t"
+        "add %[block], %[block], %[line_size] \n\t"
+        "wstrd wr10, [r5] \n\t"
+        "wstrd wr11, [r5, #8] \n\t"
+        "add r5, r5, %[line_size] \n\t"
+        "bne 1b \n\t"
+        : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+        :
+        : "memory", "r4", "r5", "r12");
+}
+
+void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    __asm__ __volatile__ (
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r4, %[pixels], %[line_size]\n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "1:                             \n\t"
+        "wldrd wr0, [%[pixels]]         \n\t"
+        "wldrd wr1, [%[pixels], #8]     \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wldrd wr2, [%[pixels], #16]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr3, [r4]                \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr8, wr0, wr1         \n\t"
+        "wldrd wr4, [r4, #8]            \n\t"
+        "walignr1 wr9, wr1, wr2         \n\t"
+        "wldrd wr5, [r4, #16]           \n\t"
+        "add r4, r4, %[line_size]       \n\t"
+        "wldrd wr0, [%[block]]          \n\t"
+        "pld [r4]                       \n\t"
+        "wldrd wr1, [%[block], #8]      \n\t"
+        "pld [r4, #32]                  \n\t"
+        "wldrd wr2, [r5]                \n\t"
+        "walignr1 wr10, wr3, wr4        \n\t"
+        "wldrd wr3, [r5, #8]            \n\t"
+        WAVG2B" wr8, wr8, wr0           \n\t"
+        WAVG2B" wr9, wr9, wr1           \n\t"
+        WAVG2B" wr10, wr10, wr2         \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "walignr1 wr11, wr4, wr5        \n\t"
+        WAVG2B" wr11, wr11, wr3         \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size] \n\t"
+        "wstrd wr10, [r5]               \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "wstrd wr11, [r5, #8]           \n\t"
+        "add r5, r5, %[line_size]       \n\t"
+        "pld [r5]                       \n\t"
+        "pld [r5, #32]                  \n\t"
+        "bne 1b \n\t"
+        : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+        :
+        : "memory", "r4", "r5", "r12");
+}
+
+void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "add r4, %[pixels], %[line_size]\n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr13, [r4]               \n\t"
+        "pld [%[pixels]]                \n\t"
+        "wldrd wr14, [r4, #8]           \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "add r4, r4, %[line_size]       \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "pld [r4]                       \n\t"
+        "pld [r4, #32]                  \n\t"
+        "walignr1 wr2, wr13, wr14       \n\t"
+        "wmoveq wr4, wr11               \n\t"
+        "wmoveq wr6, wr14               \n\t"
+        "walignr2ne wr4, wr10, wr11     \n\t"
+        "walignr2ne wr6, wr13, wr14     \n\t"
+        WAVG2B" wr0, wr0, wr4           \n\t"
+        WAVG2B" wr2, wr2, wr6           \n\t"
+        "wstrd wr0, [%[block]]          \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wstrd wr2, [r5]                \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "add r5, r5, %[line_size]       \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "r4", "r5", "r12", "memory");
+}
+
+void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "add r4, %[pixels], %[line_size]\n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr13, [r4]               \n\t"
+        "pld [%[pixels]]                \n\t"
+        "wldrd wr14, [r4, #8]           \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wldrd wr15, [r4, #16]          \n\t"
+        "add r4, r4, %[line_size]       \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "pld [r4]                       \n\t"
+        "pld [r4, #32]                  \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+        "walignr1 wr2, wr13, wr14       \n\t"
+        "walignr1 wr3, wr14, wr15       \n\t"
+        "wmoveq wr4, wr11               \n\t"
+        "wmoveq wr5, wr12               \n\t"
+        "wmoveq wr6, wr14               \n\t"
+        "wmoveq wr7, wr15               \n\t"
+        "walignr2ne wr4, wr10, wr11     \n\t"
+        "walignr2ne wr5, wr11, wr12     \n\t"
+        "walignr2ne wr6, wr13, wr14     \n\t"
+        "walignr2ne wr7, wr14, wr15     \n\t"
+        WAVG2B" wr0, wr0, wr4           \n\t"
+        WAVG2B" wr1, wr1, wr5           \n\t"
+        "wstrd wr0, [%[block]]          \n\t"
+        WAVG2B" wr2, wr2, wr6           \n\t"
+        "wstrd wr1, [%[block], #8]      \n\t"
+        WAVG2B" wr3, wr3, wr7           \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "wstrd wr2, [r5]                \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wstrd wr3, [r5, #8]            \n\t"
+        "add r5, r5, %[line_size]       \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "add r4, %[pixels], %[line_size]\n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "pld [r5]                       \n\t"
+        "pld [r5, #32]                  \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr13, [r4]               \n\t"
+        "pld [%[pixels]]                \n\t"
+        "wldrd wr14, [r4, #8]           \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "add r4, r4, %[line_size]       \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "pld [r4]                       \n\t"
+        "pld [r4, #32]                  \n\t"
+        "walignr1 wr2, wr13, wr14       \n\t"
+        "wmoveq wr4, wr11               \n\t"
+        "wmoveq wr6, wr14               \n\t"
+        "walignr2ne wr4, wr10, wr11     \n\t"
+        "wldrd wr10, [%[block]]         \n\t"
+        "walignr2ne wr6, wr13, wr14     \n\t"
+        "wldrd wr12, [r5]               \n\t"
+        WAVG2B" wr0, wr0, wr4           \n\t"
+        WAVG2B" wr2, wr2, wr6           \n\t"
+        WAVG2B" wr0, wr0, wr10          \n\t"
+        WAVG2B" wr2, wr2, wr12          \n\t"
+        "wstrd wr0, [%[block]]          \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wstrd wr2, [r5]                \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "add r5, r5, %[line_size]       \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "pld [r5]                       \n\t"
+        "pld [r5, #32]                  \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "add r4, %[pixels], %[line_size]\n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "pld [r5]                       \n\t"
+        "pld [r5, #32]                  \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr13, [r4]               \n\t"
+        "pld [%[pixels]]                \n\t"
+        "wldrd wr14, [r4, #8]           \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wldrd wr15, [r4, #16]          \n\t"
+        "add r4, r4, %[line_size]       \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "pld [r4]                       \n\t"
+        "pld [r4, #32]                  \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+        "walignr1 wr2, wr13, wr14       \n\t"
+        "walignr1 wr3, wr14, wr15       \n\t"
+        "wmoveq wr4, wr11               \n\t"
+        "wmoveq wr5, wr12               \n\t"
+        "wmoveq wr6, wr14               \n\t"
+        "wmoveq wr7, wr15               \n\t"
+        "walignr2ne wr4, wr10, wr11     \n\t"
+        "walignr2ne wr5, wr11, wr12     \n\t"
+        "walignr2ne wr6, wr13, wr14     \n\t"
+        "walignr2ne wr7, wr14, wr15     \n\t"
+        "wldrd wr10, [%[block]]         \n\t"
+        WAVG2B" wr0, wr0, wr4           \n\t"
+        "wldrd wr11, [%[block], #8]     \n\t"
+        WAVG2B" wr1, wr1, wr5           \n\t"
+        "wldrd wr12, [r5]               \n\t"
+        WAVG2B" wr2, wr2, wr6           \n\t"
+        "wldrd wr13, [r5, #8]           \n\t"
+        WAVG2B" wr3, wr3, wr7           \n\t"
+        WAVG2B" wr0, wr0, wr10          \n\t"
+        WAVG2B" wr1, wr1, wr11          \n\t"
+        WAVG2B" wr2, wr2, wr12          \n\t"
+        WAVG2B" wr3, wr3, wr13          \n\t"
+        "wstrd wr0, [%[block]]          \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wstrd wr1, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "wstrd wr2, [r5]                \n\t"
+        "pld [%[block]]                 \n\t"
+        "wstrd wr3, [r5, #8]            \n\t"
+        "add r5, r5, %[line_size]       \n\t"
+        "pld [%[block], #32]            \n\t"
+        "pld [r5]                       \n\t"
+        "pld [r5, #32]                  \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        :"r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    __asm__ __volatile__(
+        "pld            [%[pixels]]                             \n\t"
+        "pld            [%[pixels], #32]                        \n\t"
+        "and            r12, %[pixels], #7                      \n\t"
+        "tmcr           wcgr1, r12                              \n\t"
+        "bic            %[pixels], %[pixels], #7                \n\t"
+
+        "wldrd          wr10, [%[pixels]]                       \n\t"
+        "wldrd          wr11, [%[pixels], #8]                   \n\t"
+        "pld            [%[block]]                              \n\t"
+        "add            %[pixels], %[pixels], %[line_size]      \n\t"
+        "walignr1       wr0, wr10, wr11                         \n\t"
+        "pld            [%[pixels]]                             \n\t"
+        "pld            [%[pixels], #32]                        \n\t"
+
+      "1:                                                       \n\t"
+        "wldrd          wr10, [%[pixels]]                       \n\t"
+        "wldrd          wr11, [%[pixels], #8]                   \n\t"
+        "add            %[pixels], %[pixels], %[line_size]      \n\t"
+        "pld            [%[pixels]]                             \n\t"
+        "pld            [%[pixels], #32]                        \n\t"
+        "walignr1       wr4, wr10, wr11                         \n\t"
+        "wldrd          wr10, [%[block]]                        \n\t"
+         WAVG2B"        wr8, wr0, wr4                           \n\t"
+         WAVG2B"        wr8, wr8, wr10                          \n\t"
+        "wstrd          wr8, [%[block]]                         \n\t"
+        "add            %[block], %[block], %[line_size]        \n\t"
+
+        "wldrd          wr10, [%[pixels]]                       \n\t"
+        "wldrd          wr11, [%[pixels], #8]                   \n\t"
+        "pld            [%[block]]                              \n\t"
+        "add            %[pixels], %[pixels], %[line_size]      \n\t"
+        "pld            [%[pixels]]                             \n\t"
+        "pld            [%[pixels], #32]                        \n\t"
+        "walignr1       wr0, wr10, wr11                         \n\t"
+        "wldrd          wr10, [%[block]]                        \n\t"
+         WAVG2B"        wr8, wr0, wr4                           \n\t"
+         WAVG2B"        wr8, wr8, wr10                          \n\t"
+        "wstrd          wr8, [%[block]]                         \n\t"
+        "add            %[block], %[block], %[line_size]        \n\t"
+
+        "subs           %[h], %[h], #2                          \n\t"
+        "pld            [%[block]]                              \n\t"
+        "bne            1b                                      \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "cc", "memory", "r12");
+}
+
+void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr4, wr10, wr11       \n\t"
+        "walignr1 wr5, wr11, wr12       \n\t"
+        WAVG2B" wr8, wr0, wr4           \n\t"
+        WAVG2B" wr9, wr1, wr5           \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+        WAVG2B" wr8, wr0, wr4           \n\t"
+        WAVG2B" wr9, wr1, wr5           \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        "subs %[h], %[h], #2            \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "pld [%[block]]                 \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr4, wr10, wr11       \n\t"
+        "walignr1 wr5, wr11, wr12       \n\t"
+        "wldrd wr10, [%[block]]         \n\t"
+        "wldrd wr11, [%[block], #8]     \n\t"
+        WAVG2B" wr8, wr0, wr4           \n\t"
+        WAVG2B" wr9, wr1, wr5           \n\t"
+        WAVG2B" wr8, wr8, wr10          \n\t"
+        WAVG2B" wr9, wr9, wr11          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "pld [%[block]]                 \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+        "wldrd wr10, [%[block]]         \n\t"
+        "wldrd wr11, [%[block], #8]     \n\t"
+        WAVG2B" wr8, wr0, wr4           \n\t"
+        WAVG2B" wr9, wr1, wr5           \n\t"
+        WAVG2B" wr8, wr8, wr10          \n\t"
+        WAVG2B" wr9, wr9, wr11          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        "subs %[h], %[h], #2            \n\t"
+        "pld [%[block]]                 \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "r4", "r5", "r12", "memory");
+}
+
+void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "mov r12, #2                    \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "tmcr wcgr0, r12                \n\t" /* for shift value */
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "add r12, r12, #1               \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "cmp r12, #8                    \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+
+        "1:                             \n\t"
+        // [wr0 wr1 wr2 wr3]
+        // [wr4 wr5 wr6 wr7] <= *
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr6, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr4, wr6            \n\t"
+        "wunpckehub wr5, wr6            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr4, wr4, wr8          \n\t"
+        "waddhus wr5, wr5, wr9          \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+        : [line_size]"r"(line_size)
+        : "r12", "memory");
+}
+
+void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "mov r12, #2                    \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "tmcr wcgr0, r12                \n\t" /* for shift value */
+        /* alignment */
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "tmcr wcgr2, r12                \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr3, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr2, wr3            \n\t"
+        "wunpckehub wr3, wr3            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr2, wr2, wr10         \n\t"
+        "waddhus wr3, wr3, wr11         \n\t"
+
+        "1:                             \n\t"
+        // [wr0 wr1 wr2 wr3]
+        // [wr4 wr5 wr6 wr7] <= *
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr6, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr7, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr4, wr6            \n\t"
+        "wunpckehub wr5, wr6            \n\t"
+        "wunpckelub wr6, wr7            \n\t"
+        "wunpckehub wr7, wr7            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr4, wr4, wr8          \n\t"
+        "waddhus wr5, wr5, wr9          \n\t"
+        "waddhus wr6, wr6, wr10         \n\t"
+        "waddhus wr7, wr7, wr11         \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr10, wr2, wr6         \n\t"
+        "waddhus wr11, wr3, wr7         \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "waddhus wr10, wr10, wr15       \n\t"
+        "waddhus wr11, wr11, wr15       \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wsrlhg wr10, wr10, wcgr0       \n\t"
+        "wsrlhg wr11, wr11, wcgr0       \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "wpackhus wr9, wr10, wr11       \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr3, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr2, wr3            \n\t"
+        "wunpckehub wr3, wr3            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr2, wr2, wr10         \n\t"
+        "waddhus wr3, wr3, wr11         \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr10, wr2, wr6         \n\t"
+        "waddhus wr11, wr3, wr7         \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "waddhus wr10, wr10, wr15       \n\t"
+        "waddhus wr11, wr11, wr15       \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wsrlhg wr10, wr10, wcgr0       \n\t"
+        "wsrlhg wr11, wr11, wcgr0       \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "wpackhus wr9, wr10, wr11       \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        "subs %[h], %[h], #2            \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+        : [line_size]"r"(line_size)
+        : "r12", "memory");
+}
+
+void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "pld [%[pixels]]                \n\t"
+        "mov r12, #2                    \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "tmcr wcgr0, r12                \n\t" /* for shift value */
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "add r12, r12, #1               \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "cmp r12, #8                    \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+
+        "1:                             \n\t"
+        // [wr0 wr1 wr2 wr3]
+        // [wr4 wr5 wr6 wr7] <= *
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr6, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr4, wr6            \n\t"
+        "wunpckehub wr5, wr6            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr4, wr4, wr8          \n\t"
+        "waddhus wr5, wr5, wr9          \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "wldrd wr12, [%[block]]         \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        WAVG2B" wr8, wr8, wr12          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "wldrd wr12, [%[block]]         \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        WAVG2B" wr8, wr8, wr12          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+        : [line_size]"r"(line_size)
+        : "r12", "memory");
+}
+
+void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "pld [%[pixels]]                \n\t"
+        "mov r12, #2                    \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "tmcr wcgr0, r12                \n\t" /* for shift value */
+        /* alignment */
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7           \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "tmcr wcgr2, r12                \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr3, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr2, wr3            \n\t"
+        "wunpckehub wr3, wr3            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr2, wr2, wr10         \n\t"
+        "waddhus wr3, wr3, wr11         \n\t"
+
+        "1:                             \n\t"
+        // [wr0 wr1 wr2 wr3]
+        // [wr4 wr5 wr6 wr7] <= *
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr6, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr7, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr4, wr6            \n\t"
+        "wunpckehub wr5, wr6            \n\t"
+        "wunpckelub wr6, wr7            \n\t"
+        "wunpckehub wr7, wr7            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr4, wr4, wr8          \n\t"
+        "waddhus wr5, wr5, wr9          \n\t"
+        "waddhus wr6, wr6, wr10         \n\t"
+        "waddhus wr7, wr7, wr11         \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr10, wr2, wr6         \n\t"
+        "waddhus wr11, wr3, wr7         \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "waddhus wr10, wr10, wr15       \n\t"
+        "waddhus wr11, wr11, wr15       \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wldrd wr12, [%[block]]         \n\t"
+        "wldrd wr13, [%[block], #8]     \n\t"
+        "wsrlhg wr10, wr10, wcgr0       \n\t"
+        "wsrlhg wr11, wr11, wcgr0       \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "wpackhus wr9, wr10, wr11       \n\t"
+        WAVG2B" wr8, wr8, wr12          \n\t"
+        WAVG2B" wr9, wr9, wr13          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "pld [%[block]]                 \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "pld [%[block], #32]            \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr3, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr2, wr3            \n\t"
+        "wunpckehub wr3, wr3            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr2, wr2, wr10         \n\t"
+        "waddhus wr3, wr3, wr11         \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr10, wr2, wr6         \n\t"
+        "waddhus wr11, wr3, wr7         \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "waddhus wr10, wr10, wr15       \n\t"
+        "waddhus wr11, wr11, wr15       \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wldrd wr12, [%[block]]         \n\t"
+        "wldrd wr13, [%[block], #8]     \n\t"
+        "wsrlhg wr10, wr10, wcgr0       \n\t"
+        "wsrlhg wr11, wr11, wcgr0       \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "wpackhus wr9, wr10, wr11       \n\t"
+        WAVG2B" wr8, wr8, wr12          \n\t"
+        WAVG2B" wr9, wr9, wr13          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+        : [line_size]"r"(line_size)
+        : "r12", "memory");
+}
diff --git a/src/libffmpeg/libavcodec/armv4l/mathops.h b/src/libffmpeg/libavcodec/armv4l/mathops.h
new file mode 100644
index 000000000..7ddd0ec6e
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/mathops.h
@@ -0,0 +1,49 @@
+/*
+ * simple math operations
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef FRAC_BITS
+#   define MULL(a, b) \
+        ({  int lo, hi;\
+         asm("smull %0, %1, %2, %3     \n\t"\
+             "mov   %0, %0,     lsr %4\n\t"\
+             "add   %1, %0, %1, lsl %5\n\t"\
+             : "=&r"(lo), "=&r"(hi)\
+             : "r"(b), "r"(a), "i"(FRAC_BITS), "i"(32-FRAC_BITS));\
+         hi; })
+#endif
+
+#define MULH(a, b) \
+    ({ int lo, hi;\
+     asm ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));\
+     hi; })
+
+#if defined(HAVE_ARMV5TE)
+
+/* signed 16x16 -> 32 multiply add accumulate */
+#   define MAC16(rt, ra, rb) \
+        asm ("smlabb %0, %2, %3, %0" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
+/* signed 16x16 -> 32 multiply */
+#   define MUL16(ra, rb)                                                \
+        ({ int __rt;                                                    \
+         asm ("smulbb %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb));  \
+         __rt; })
+
+#endif
diff --git a/src/libffmpeg/libavcodec/armv4l/mpegvideo_armv5te.c b/src/libffmpeg/libavcodec/armv4l/mpegvideo_armv5te.c
new file mode 100644
index 000000000..a8d09b8ce
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/mpegvideo_armv5te.c
@@ -0,0 +1,213 @@
+/*
+ * Optimization of some functions from mpegvideo.c for armv5te
+ * Copyright (c) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * Some useful links for those who may be interested in optimizing code for ARM.
+ * ARM Architecture Reference Manual: http://www.arm.com/community/academy/resources.html
+ * Instructions timings and optimization guide for ARM9E: http://www.arm.com/pdfs/DDI0222B_9EJS_r1p2.pdf
+ */
+
+#include "../dsputil.h"
+#include "../mpegvideo.h"
+#include "../avcodec.h"
+
+
+#ifdef ENABLE_ARM_TESTS
+/**
+ * h263 dequantizer supplementary function, it is performance critical and needs to
+ * have optimized implementations for each architecture. Is also used as a reference
+ * implementation in regression tests
+ */
+static inline void dct_unquantize_h263_helper_c(DCTELEM *block, int qmul, int qadd, int count)
+{
+    int i, level;
+    for (i = 0; i < count; i++) {
+        level = block[i];
+        if (level) {
+            if (level < 0) {
+                level = level * qmul - qadd;
+            } else {
+                level = level * qmul + qadd;
+            }
+            block[i] = level;
+        }
+    }
+}
+#endif
+
+/* GCC 3.1 or higher is required to support symbolic names in assembly code */
+#if (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))
+
+/**
+ * Special optimized version of dct_unquantize_h263_helper_c, it requires the block
+ * to be at least 8 bytes aligned, and may process more elements than requested.
+ * But it is guaranteed to never process more than 64 elements provided that
+ * xxcount argument is <= 64, so it is safe. This macro is optimized for a common
+ * distribution of values for nCoeffs (they are mostly multiple of 8 plus one or
+ * two extra elements). So this macro processes data as 8 elements per loop iteration
+ * and contains optional 2 elements processing in the end.
+ *
+ * Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770)
+ */
+#define dct_unquantize_h263_special_helper_armv5te(xxblock, xxqmul, xxqadd, xxcount) \
+({ DCTELEM *xblock = xxblock; \
+   int xqmul = xxqmul, xqadd = xxqadd, xcount = xxcount, xtmp; \
+   int xdata1, xdata2; \
+__asm__ __volatile__( \
+        "subs %[count], #2                 \n\t" \
+        "ble 2f                            \n\t" \
+        "ldrd r4, [%[block], #0]           \n\t" \
+        "1:                                \n\t" \
+        "ldrd r6, [%[block], #8]           \n\t" \
+\
+        "rsbs %[data1], %[zero], r4, asr #16 \n\t" \
+        "addgt %[data1], %[qadd], #0       \n\t" \
+        "rsblt %[data1], %[qadd], #0       \n\t" \
+        "smlatbne %[data1], r4, %[qmul], %[data1] \n\t" \
+\
+        "rsbs %[data2], %[zero], r5, asr #16 \n\t" \
+        "addgt %[data2], %[qadd], #0       \n\t" \
+        "rsblt %[data2], %[qadd], #0       \n\t" \
+        "smlatbne %[data2], r5, %[qmul], %[data2] \n\t" \
+\
+        "rsbs %[tmp], %[zero], r4, asl #16 \n\t" \
+        "addgt %[tmp], %[qadd], #0         \n\t" \
+        "rsblt %[tmp], %[qadd], #0         \n\t" \
+        "smlabbne r4, r4, %[qmul], %[tmp]  \n\t" \
+\
+        "rsbs %[tmp], %[zero], r5, asl #16 \n\t" \
+        "addgt %[tmp], %[qadd], #0         \n\t" \
+        "rsblt %[tmp], %[qadd], #0         \n\t" \
+        "smlabbne r5, r5, %[qmul], %[tmp]  \n\t" \
+\
+        "strh r4, [%[block]], #2           \n\t" \
+        "strh %[data1], [%[block]], #2     \n\t" \
+        "strh r5, [%[block]], #2           \n\t" \
+        "strh %[data2], [%[block]], #2     \n\t" \
+\
+        "rsbs %[data1], %[zero], r6, asr #16 \n\t" \
+        "addgt %[data1], %[qadd], #0        \n\t" \
+        "rsblt %[data1], %[qadd], #0        \n\t" \
+        "smlatbne %[data1], r6, %[qmul], %[data1] \n\t" \
+\
+        "rsbs %[data2], %[zero], r7, asr #16 \n\t" \
+        "addgt %[data2], %[qadd], #0        \n\t" \
+        "rsblt %[data2], %[qadd], #0        \n\t" \
+        "smlatbne %[data2], r7, %[qmul], %[data2] \n\t" \
+\
+        "rsbs %[tmp], %[zero], r6, asl #16  \n\t" \
+        "addgt %[tmp], %[qadd], #0          \n\t" \
+        "rsblt %[tmp], %[qadd], #0          \n\t" \
+        "smlabbne r6, r6, %[qmul], %[tmp]   \n\t" \
+\
+        "rsbs %[tmp], %[zero], r7, asl #16  \n\t" \
+        "addgt %[tmp], %[qadd], #0          \n\t" \
+        "rsblt %[tmp], %[qadd], #0          \n\t" \
+        "smlabbne r7, r7, %[qmul], %[tmp]   \n\t" \
+\
+        "strh r6, [%[block]], #2            \n\t" \
+        "strh %[data1], [%[block]], #2      \n\t" \
+        "strh r7, [%[block]], #2            \n\t" \
+        "strh %[data2], [%[block]], #2      \n\t" \
+\
+        "subs %[count], #8                  \n\t" \
+        "ldrgtd r4, [%[block], #0]          \n\t" /* load data early to avoid load/use pipeline stall */ \
+        "bgt 1b                             \n\t" \
+\
+        "adds %[count], #2                  \n\t" \
+        "ble  3f                            \n\t" \
+        "2:                                 \n\t" \
+        "ldrsh %[data1], [%[block], #0]     \n\t" \
+        "ldrsh %[data2], [%[block], #2]     \n\t" \
+        "mov  %[tmp], %[qadd]               \n\t" \
+        "cmp  %[data1], #0                  \n\t" \
+        "rsblt %[tmp], %[qadd], #0          \n\t" \
+        "smlabbne %[data1], %[data1], %[qmul], %[tmp] \n\t" \
+        "mov  %[tmp], %[qadd]               \n\t" \
+        "cmp  %[data2], #0                  \n\t" \
+        "rsblt %[tmp], %[qadd], #0          \n\t" \
+        "smlabbne %[data2], %[data2], %[qmul], %[tmp] \n\t" \
+        "strh %[data1], [%[block]], #2      \n\t" \
+        "strh %[data2], [%[block]], #2      \n\t" \
+        "3:                                 \n\t" \
+        : [block] "+&r" (xblock), [count] "+&r" (xcount), [tmp] "=&r" (xtmp), \
+          [data1] "=&r" (xdata1), [data2] "=&r" (xdata2)  \
+        : [qmul] "r" (xqmul), [qadd] "r" (xqadd), [zero] "r" (0) \
+        : "r4", "r5", "r6", "r7", "cc", "memory" \
+); \
+})
+
+static void dct_unquantize_h263_intra_armv5te(MpegEncContext *s,
+                                  DCTELEM *block, int n, int qscale)
+{
+    int i, level, qmul, qadd;
+    int nCoeffs;
+
+    assert(s->block_last_index[n]>=0);
+
+    qmul = qscale << 1;
+
+    if (!s->h263_aic) {
+        if (n < 4)
+            level = block[0] * s->y_dc_scale;
+        else
+            level = block[0] * s->c_dc_scale;
+        qadd = (qscale - 1) | 1;
+    }else{
+        qadd = 0;
+        level = block[0];
+    }
+    if(s->ac_pred)
+        nCoeffs=63;
+    else
+        nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+    dct_unquantize_h263_special_helper_armv5te(block, qmul, qadd, nCoeffs + 1);
+    block[0] = level;
+}
+
+static void dct_unquantize_h263_inter_armv5te(MpegEncContext *s,
+                                  DCTELEM *block, int n, int qscale)
+{
+    int i, level, qmul, qadd;
+    int nCoeffs;
+
+    assert(s->block_last_index[n]>=0);
+
+    qadd = (qscale - 1) | 1;
+    qmul = qscale << 1;
+
+    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+    dct_unquantize_h263_special_helper_armv5te(block, qmul, qadd, nCoeffs + 1);
+}
+
+#define HAVE_DCT_UNQUANTIZE_H263_ARMV5TE_OPTIMIZED
+
+#endif
+
+void MPV_common_init_armv5te(MpegEncContext *s)
+{
+#ifdef HAVE_DCT_UNQUANTIZE_H263_ARMV5TE_OPTIMIZED
+    s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_armv5te;
+    s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_armv5te;
+#endif
+}
diff --git a/src/libffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c b/src/libffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c
new file mode 100644
index 000000000..1336ac5f8
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c
@@ -0,0 +1,119 @@
+/*
+ * copyright (c) 2004 AGAWA Koji
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+#include "../mpegvideo.h"
+#include "../avcodec.h"
+
+static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s,
+                                             DCTELEM *block, int n, int qscale)
+{
+    int level, qmul, qadd;
+    int nCoeffs;
+    DCTELEM *block_orig = block;
+
+    assert(s->block_last_index[n]>=0);
+
+    qmul = qscale << 1;
+
+    if (!s->h263_aic) {
+        if (n < 4)
+            level = block[0] * s->y_dc_scale;
+        else
+            level = block[0] * s->c_dc_scale;
+        qadd = (qscale - 1) | 1;
+    }else{
+        qadd = 0;
+        level = block[0];
+    }
+    if(s->ac_pred)
+        nCoeffs=63;
+    else
+        nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+    __asm__ __volatile__ (
+/*      "movd %1, %%mm6                 \n\t" //qmul */
+/*      "packssdw %%mm6, %%mm6          \n\t" */
+/*      "packssdw %%mm6, %%mm6          \n\t" */
+        "tbcsth wr6, %[qmul]            \n\t"
+/*      "movd %2, %%mm5                 \n\t" //qadd */
+/*      "packssdw %%mm5, %%mm5          \n\t" */
+/*      "packssdw %%mm5, %%mm5          \n\t" */
+        "tbcsth wr5, %[qadd]            \n\t"
+        "wzero wr7                      \n\t" /* "pxor %%mm7, %%mm7             \n\t" */
+        "wzero wr4                      \n\t" /* "pxor %%mm4, %%mm4             \n\t" */
+        "wsubh wr7, wr5, wr7            \n\t" /* "psubw %%mm5, %%mm7            \n\t" */
+        "1:                             \n\t"
+        "wldrd wr2, [%[block]]          \n\t" /* "movq (%0, %3), %%mm0          \n\t" */
+        "wldrd wr3, [%[block], #8]      \n\t" /* "movq 8(%0, %3), %%mm1         \n\t" */
+        "wmulsl wr0, wr6, wr2           \n\t" /* "pmullw %%mm6, %%mm0           \n\t" */
+        "wmulsl wr1, wr6, wr3           \n\t" /* "pmullw %%mm6, %%mm1           \n\t" */
+/*      "movq (%0, %3), %%mm2           \n\t" */
+/*      "movq 8(%0, %3), %%mm3          \n\t" */
+        "wcmpgtsh wr2, wr4, wr2         \n\t" /* "pcmpgtw %%mm4, %%mm2          \n\t" // block[i] < 0 ? -1 : 0 */
+        "wcmpgtsh wr3, wr4, wr2         \n\t" /* "pcmpgtw %%mm4, %%mm3          \n\t" // block[i] < 0 ? -1 : 0 */
+        "wxor wr0, wr2, wr0             \n\t" /* "pxor %%mm2, %%mm0             \n\t" */
+        "wxor wr1, wr3, wr1             \n\t" /* "pxor %%mm3, %%mm1             \n\t" */
+        "waddh wr0, wr7, wr0            \n\t" /* "paddw %%mm7, %%mm0            \n\t" */
+        "waddh wr1, wr7, wr1            \n\t" /* "paddw %%mm7, %%mm1            \n\t" */
+        "wxor wr2, wr0, wr2             \n\t" /* "pxor %%mm0, %%mm2             \n\t" */
+        "wxor wr3, wr1, wr3             \n\t" /* "pxor %%mm1, %%mm3             \n\t" */
+        "wcmpeqh wr0, wr7, wr0          \n\t" /* "pcmpeqw %%mm7, %%mm0          \n\t" // block[i] == 0 ? -1 : 0 */
+        "wcmpeqh wr1, wr7, wr1          \n\t" /* "pcmpeqw %%mm7, %%mm1          \n\t" // block[i] == 0 ? -1 : 0 */
+        "wandn wr0, wr2, wr0            \n\t" /* "pandn %%mm2, %%mm0            \n\t" */
+        "wandn wr1, wr3, wr1            \n\t" /* "pandn %%mm3, %%mm1            \n\t" */
+        "wstrd wr0, [%[block]]          \n\t" /* "movq %%mm0, (%0, %3)          \n\t" */
+        "wstrd wr1, [%[block], #8]      \n\t" /* "movq %%mm1, 8(%0, %3)         \n\t" */
+        "add %[block], %[block], #16    \n\t" /* "addl $16, %3                  \n\t" */
+        "subs %[i], %[i], #1            \n\t"
+        "bne 1b                         \n\t" /* "jng 1b                                \n\t" */
+        :[block]"+r"(block)
+        :[i]"r"((nCoeffs + 8) / 8), [qmul]"r"(qmul), [qadd]"r"(qadd)
+        :"memory");
+
+    block_orig[0] = level;
+}
+
+#if 0
+static void dct_unquantize_h263_inter_iwmmxt(MpegEncContext *s,
+                                             DCTELEM *block, int n, int qscale)
+{
+    int nCoeffs;
+
+    assert(s->block_last_index[n]>=0);
+
+    if(s->ac_pred)
+        nCoeffs=63;
+    else
+        nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+    ippiQuantInvInter_Compact_H263_16s_I(block, nCoeffs+1, qscale);
+}
+#endif
+
+void MPV_common_init_iwmmxt(MpegEncContext *s)
+{
+    if (!(mm_flags & MM_IWMMXT)) return;
+
+    s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_iwmmxt;
+#if 0
+    s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_iwmmxt;
+#endif
+}
diff --git a/src/libffmpeg/libavcodec/armv4l/simple_idct_armv5te.S b/src/libffmpeg/libavcodec/armv4l/simple_idct_armv5te.S
new file mode 100644
index 000000000..28bee0643
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/simple_idct_armv5te.S
@@ -0,0 +1,718 @@
+/*
+ * Simple IDCT
+ *
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2006 Mans Rullgard <mru@inprovide.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define W1  22725   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W2  21407   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W3  19266   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W4  16383   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W5  12873   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W6  8867    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W7  4520    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define ROW_SHIFT 11
+#define COL_SHIFT 20
+
+#define W13 (W1 | (W3 << 16))
+#define W26 (W2 | (W6 << 16))
+#define W57 (W5 | (W7 << 16))
+
+        .text
+        .align
+w13:    .long W13
+w26:    .long W26
+w57:    .long W57
+
+        .align
+        .func idct_row_armv5te
+idct_row_armv5te:
+        str    lr, [sp, #-4]!
+
+        ldrd   v1, [a1, #8]
+        ldrd   a3, [a1]              /* a3 = row[1:0], a4 = row[3:2] */
+        orrs   v1, v1, v2
+        cmpeq  v1, a4
+        cmpeq  v1, a3, lsr #16
+        beq    row_dc_only
+
+        mov    v1, #(1<<(ROW_SHIFT-1))
+        mov    ip, #16384
+        sub    ip, ip, #1            /* ip = W4 */
+        smlabb v1, ip, a3, v1        /* v1 = W4*row[0]+(1<<(RS-1)) */
+        ldr    ip, [pc, #(w26-.-8)]  /* ip = W2 | (W6 << 16) */
+        smultb a2, ip, a4
+        smulbb lr, ip, a4
+        add    v2, v1, a2
+        sub    v3, v1, a2
+        sub    v4, v1, lr
+        add    v1, v1, lr
+
+        ldr    ip, [pc, #(w13-.-8)]  /* ip = W1 | (W3 << 16) */
+        ldr    lr, [pc, #(w57-.-8)]  /* lr = W5 | (W7 << 16) */
+        smulbt v5, ip, a3
+        smultt v6, lr, a4
+        smlatt v5, ip, a4, v5
+        smultt a2, ip, a3
+        smulbt v7, lr, a3
+        sub    v6, v6, a2
+        smulbt a2, ip, a4
+        smultt fp, lr, a3
+        sub    v7, v7, a2
+        smulbt a2, lr, a4
+        ldrd   a3, [a1, #8]          /* a3=row[5:4] a4=row[7:6] */
+        sub    fp, fp, a2
+
+        orrs   a2, a3, a4
+        beq    1f
+
+        smlabt v5, lr, a3, v5
+        smlabt v6, ip, a3, v6
+        smlatt v5, lr, a4, v5
+        smlabt v6, lr, a4, v6
+        smlatt v7, lr, a3, v7
+        smlatt fp, ip, a3, fp
+        smulbt a2, ip, a4
+        smlatt v7, ip, a4, v7
+        sub    fp, fp, a2
+
+        ldr    ip, [pc, #(w26-.-8)]  /* ip = W2 | (W6 << 16) */
+        mov    a2, #16384
+        sub    a2, a2, #1            /* a2 =  W4 */
+        smulbb a2, a2, a3            /* a2 =  W4*row[4] */
+        smultb lr, ip, a4            /* lr =  W6*row[6] */
+        add    v1, v1, a2            /* v1 += W4*row[4] */
+        add    v1, v1, lr            /* v1 += W6*row[6] */
+        add    v4, v4, a2            /* v4 += W4*row[4] */
+        sub    v4, v4, lr            /* v4 -= W6*row[6] */
+        smulbb lr, ip, a4            /* lr =  W2*row[6] */
+        sub    v2, v2, a2            /* v2 -= W4*row[4] */
+        sub    v2, v2, lr            /* v2 -= W2*row[6] */
+        sub    v3, v3, a2            /* v3 -= W4*row[4] */
+        add    v3, v3, lr            /* v3 += W2*row[6] */
+
+1:      add    a2, v1, v5
+        mov    a3, a2, lsr #11
+        bic    a3, a3, #0x1f0000
+        sub    a2, v2, v6
+        mov    a2, a2, lsr #11
+        add    a3, a3, a2, lsl #16
+        add    a2, v3, v7
+        mov    a4, a2, lsr #11
+        bic    a4, a4, #0x1f0000
+        add    a2, v4, fp
+        mov    a2, a2, lsr #11
+        add    a4, a4, a2, lsl #16
+        strd   a3, [a1]
+
+        sub    a2, v4, fp
+        mov    a3, a2, lsr #11
+        bic    a3, a3, #0x1f0000
+        sub    a2, v3, v7
+        mov    a2, a2, lsr #11
+        add    a3, a3, a2, lsl #16
+        add    a2, v2, v6
+        mov    a4, a2, lsr #11
+        bic    a4, a4, #0x1f0000
+        sub    a2, v1, v5
+        mov    a2, a2, lsr #11
+        add    a4, a4, a2, lsl #16
+        strd   a3, [a1, #8]
+
+        ldr    pc, [sp], #4
+
+row_dc_only:
+        orr    a3, a3, a3, lsl #16
+        bic    a3, a3, #0xe000
+        mov    a3, a3, lsl #3
+        mov    a4, a3
+        strd   a3, [a1]
+        strd   a3, [a1, #8]
+
+        ldr    pc, [sp], #4
+        .endfunc
+
+        .macro idct_col
+        ldr    a4, [a1]              /* a4 = col[1:0] */
+        mov    ip, #16384
+        sub    ip, ip, #1            /* ip = W4 */
+#if 0
+        mov    v1, #(1<<(COL_SHIFT-1))
+        smlabt v2, ip, a4, v1        /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */
+        smlabb v1, ip, a4, v1        /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */
+        ldr    a4, [a1, #(16*4)]
+#else
+        mov    v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */
+        add    v2, v1, a4, asr #16
+        rsb    v2, v2, v2, lsl #14
+        mov    a4, a4, lsl #16
+        add    v1, v1, a4, asr #16
+        ldr    a4, [a1, #(16*4)]
+        rsb    v1, v1, v1, lsl #14
+#endif
+
+        smulbb lr, ip, a4
+        smulbt a3, ip, a4
+        sub    v3, v1, lr
+        sub    v5, v1, lr
+        add    v7, v1, lr
+        add    v1, v1, lr
+        sub    v4, v2, a3
+        sub    v6, v2, a3
+        add    fp, v2, a3
+        ldr    ip, [pc, #(w26-.-8)]
+        ldr    a4, [a1, #(16*2)]
+        add    v2, v2, a3
+
+        smulbb lr, ip, a4
+        smultb a3, ip, a4
+        add    v1, v1, lr
+        sub    v7, v7, lr
+        add    v3, v3, a3
+        sub    v5, v5, a3
+        smulbt lr, ip, a4
+        smultt a3, ip, a4
+        add    v2, v2, lr
+        sub    fp, fp, lr
+        add    v4, v4, a3
+        ldr    a4, [a1, #(16*6)]
+        sub    v6, v6, a3
+
+        smultb lr, ip, a4
+        smulbb a3, ip, a4
+        add    v1, v1, lr
+        sub    v7, v7, lr
+        sub    v3, v3, a3
+        add    v5, v5, a3
+        smultt lr, ip, a4
+        smulbt a3, ip, a4
+        add    v2, v2, lr
+        sub    fp, fp, lr
+        sub    v4, v4, a3
+        add    v6, v6, a3
+
+        stmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
+
+        ldr    ip, [pc, #(w13-.-8)]
+        ldr    a4, [a1, #(16*1)]
+        ldr    lr, [pc, #(w57-.-8)]
+        smulbb v1, ip, a4
+        smultb v3, ip, a4
+        smulbb v5, lr, a4
+        smultb v7, lr, a4
+        smulbt v2, ip, a4
+        smultt v4, ip, a4
+        smulbt v6, lr, a4
+        smultt fp, lr, a4
+        rsb    v4, v4, #0
+        ldr    a4, [a1, #(16*3)]
+        rsb    v3, v3, #0
+
+        smlatb v1, ip, a4, v1
+        smlatb v3, lr, a4, v3
+        smulbb a3, ip, a4
+        smulbb a2, lr, a4
+        sub    v5, v5, a3
+        sub    v7, v7, a2
+        smlatt v2, ip, a4, v2
+        smlatt v4, lr, a4, v4
+        smulbt a3, ip, a4
+        smulbt a2, lr, a4
+        sub    v6, v6, a3
+        ldr    a4, [a1, #(16*5)]
+        sub    fp, fp, a2
+
+        smlabb v1, lr, a4, v1
+        smlabb v3, ip, a4, v3
+        smlatb v5, lr, a4, v5
+        smlatb v7, ip, a4, v7
+        smlabt v2, lr, a4, v2
+        smlabt v4, ip, a4, v4
+        smlatt v6, lr, a4, v6
+        ldr    a3, [a1, #(16*7)]
+        smlatt fp, ip, a4, fp
+
+        smlatb v1, lr, a3, v1
+        smlabb v3, lr, a3, v3
+        smlatb v5, ip, a3, v5
+        smulbb a4, ip, a3
+        smlatt v2, lr, a3, v2
+        sub    v7, v7, a4
+        smlabt v4, lr, a3, v4
+        smulbt a4, ip, a3
+        smlatt v6, ip, a3, v6
+        sub    fp, fp, a4
+        .endm
+
+        .align
+        .func idct_col_armv5te
+idct_col_armv5te:
+        str    lr, [sp, #-4]!
+
+        idct_col
+
+        ldmfd  sp!, {a3, a4}
+        adds   a2, a3, v1
+        mov    a2, a2, lsr #20
+        orrmi  a2, a2, #0xf000
+        add    ip, a4, v2
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1]
+        subs   a3, a3, v1
+        mov    a2, a3, lsr #20
+        orrmi  a2, a2, #0xf000
+        sub    a4, a4, v2
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        ldmfd  sp!, {a3, a4}
+        str    a2, [a1, #(16*7)]
+
+        subs   a2, a3, v3
+        mov    a2, a2, lsr #20
+        orrmi  a2, a2, #0xf000
+        sub    ip, a4, v4
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1, #(16*1)]
+        adds   a3, a3, v3
+        mov    a2, a3, lsr #20
+        orrmi  a2, a2, #0xf000
+        add    a4, a4, v4
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        ldmfd  sp!, {a3, a4}
+        str    a2, [a1, #(16*6)]
+
+        adds   a2, a3, v5
+        mov    a2, a2, lsr #20
+        orrmi  a2, a2, #0xf000
+        add    ip, a4, v6
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1, #(16*2)]
+        subs   a3, a3, v5
+        mov    a2, a3, lsr #20
+        orrmi  a2, a2, #0xf000
+        sub    a4, a4, v6
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        ldmfd  sp!, {a3, a4}
+        str    a2, [a1, #(16*5)]
+
+        adds   a2, a3, v7
+        mov    a2, a2, lsr #20
+        orrmi  a2, a2, #0xf000
+        add    ip, a4, fp
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1, #(16*3)]
+        subs   a3, a3, v7
+        mov    a2, a3, lsr #20
+        orrmi  a2, a2, #0xf000
+        sub    a4, a4, fp
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        str    a2, [a1, #(16*4)]
+
+        ldr    pc, [sp], #4
+        .endfunc
+
+        .align
+        .func idct_col_put_armv5te
+idct_col_put_armv5te:
+        str    lr, [sp, #-4]!
+
+        idct_col
+
+        ldmfd  sp!, {a3, a4}
+        ldr    lr, [sp, #32]
+        add    a2, a3, v1
+        movs   a2, a2, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    ip, a4, v2
+        movs   ip, ip, asr #20
+        movmi  ip, #0
+        cmp    ip, #255
+        movgt  ip, #255
+        orr    a2, a2, ip, lsl #8
+        sub    a3, a3, v1
+        movs   a3, a3, asr #20
+        movmi  a3, #0
+        cmp    a3, #255
+        movgt  a3, #255
+        sub    a4, a4, v2
+        movs   a4, a4, asr #20
+        movmi  a4, #0
+        cmp    a4, #255
+        ldr    v1, [sp, #28]
+        movgt  a4, #255
+        strh   a2, [v1]
+        add    a2, v1, #2
+        str    a2, [sp, #28]
+        orr    a2, a3, a4, lsl #8
+        rsb    v2, lr, lr, lsl #3
+        ldmfd  sp!, {a3, a4}
+        strh   a2, [v2, v1]!
+
+        sub    a2, a3, v3
+        movs   a2, a2, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        sub    ip, a4, v4
+        movs   ip, ip, asr #20
+        movmi  ip, #0
+        cmp    ip, #255
+        movgt  ip, #255
+        orr    a2, a2, ip, lsl #8
+        strh   a2, [v1, lr]!
+        add    a3, a3, v3
+        movs   a2, a3, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    a4, a4, v4
+        movs   a4, a4, asr #20
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a2, a4, lsl #8
+        ldmfd  sp!, {a3, a4}
+        strh   a2, [v2, -lr]!
+
+        add    a2, a3, v5
+        movs   a2, a2, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    ip, a4, v6
+        movs   ip, ip, asr #20
+        movmi  ip, #0
+        cmp    ip, #255
+        movgt  ip, #255
+        orr    a2, a2, ip, lsl #8
+        strh   a2, [v1, lr]!
+        sub    a3, a3, v5
+        movs   a2, a3, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        sub    a4, a4, v6
+        movs   a4, a4, asr #20
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a2, a4, lsl #8
+        ldmfd  sp!, {a3, a4}
+        strh   a2, [v2, -lr]!
+
+        add    a2, a3, v7
+        movs   a2, a2, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    ip, a4, fp
+        movs   ip, ip, asr #20
+        movmi  ip, #0
+        cmp    ip, #255
+        movgt  ip, #255
+        orr    a2, a2, ip, lsl #8
+        strh   a2, [v1, lr]
+        sub    a3, a3, v7
+        movs   a2, a3, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        sub    a4, a4, fp
+        movs   a4, a4, asr #20
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a2, a4, lsl #8
+        strh   a2, [v2, -lr]
+
+        ldr    pc, [sp], #4
+        .endfunc
+
+        .align
+        .func idct_col_add_armv5te
+idct_col_add_armv5te:
+        str    lr, [sp, #-4]!
+
+        idct_col
+
+        ldr    lr, [sp, #36]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh   ip, [lr]
+        add    a2, a3, v1
+        mov    a2, a2, asr #20
+        sub    a3, a3, v1
+        and    v1, ip, #255
+        adds   a2, a2, v1
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    v1, a4, v2
+        mov    v1, v1, asr #20
+        adds   v1, v1, ip, lsr #8
+        movmi  v1, #0
+        cmp    v1, #255
+        movgt  v1, #255
+        orr    a2, a2, v1, lsl #8
+        ldr    v1, [sp, #32]
+        sub    a4, a4, v2
+        rsb    v2, v1, v1, lsl #3
+        ldrh   ip, [v2, lr]!
+        strh   a2, [lr]
+        mov    a3, a3, asr #20
+        and    a2, ip, #255
+        adds   a3, a3, a2
+        movmi  a3, #0
+        cmp    a3, #255
+        movgt  a3, #255
+        mov    a4, a4, asr #20
+        adds   a4, a4, ip, lsr #8
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        add    a2, lr, #2
+        str    a2, [sp, #28]
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh   ip, [lr, v1]!
+        sub    a2, a3, v3
+        mov    a2, a2, asr #20
+        add    a3, a3, v3
+        and    v3, ip, #255
+        adds   a2, a2, v3
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        sub    v3, a4, v4
+        mov    v3, v3, asr #20
+        adds   v3, v3, ip, lsr #8
+        movmi  v3, #0
+        cmp    v3, #255
+        movgt  v3, #255
+        orr    a2, a2, v3, lsl #8
+        add    a4, a4, v4
+        ldrh   ip, [v2, -v1]!
+        strh   a2, [lr]
+        mov    a3, a3, asr #20
+        and    a2, ip, #255
+        adds   a3, a3, a2
+        movmi  a3, #0
+        cmp    a3, #255
+        movgt  a3, #255
+        mov    a4, a4, asr #20
+        adds   a4, a4, ip, lsr #8
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh   ip, [lr, v1]!
+        add    a2, a3, v5
+        mov    a2, a2, asr #20
+        sub    a3, a3, v5
+        and    v3, ip, #255
+        adds   a2, a2, v3
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    v3, a4, v6
+        mov    v3, v3, asr #20
+        adds   v3, v3, ip, lsr #8
+        movmi  v3, #0
+        cmp    v3, #255
+        movgt  v3, #255
+        orr    a2, a2, v3, lsl #8
+        sub    a4, a4, v6
+        ldrh   ip, [v2, -v1]!
+        strh   a2, [lr]
+        mov    a3, a3, asr #20
+        and    a2, ip, #255
+        adds   a3, a3, a2
+        movmi  a3, #0
+        cmp    a3, #255
+        movgt  a3, #255
+        mov    a4, a4, asr #20
+        adds   a4, a4, ip, lsr #8
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh   ip, [lr, v1]!
+        add    a2, a3, v7
+        mov    a2, a2, asr #20
+        sub    a3, a3, v7
+        and    v3, ip, #255
+        adds   a2, a2, v3
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    v3, a4, fp
+        mov    v3, v3, asr #20
+        adds   v3, v3, ip, lsr #8
+        movmi  v3, #0
+        cmp    v3, #255
+        movgt  v3, #255
+        orr    a2, a2, v3, lsl #8
+        sub    a4, a4, fp
+        ldrh   ip, [v2, -v1]!
+        strh   a2, [lr]
+        mov    a3, a3, asr #20
+        and    a2, ip, #255
+        adds   a3, a3, a2
+        movmi  a3, #0
+        cmp    a3, #255
+        movgt  a3, #255
+        mov    a4, a4, asr #20
+        adds   a4, a4, ip, lsr #8
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldr    pc, [sp], #4
+        .endfunc
+
+        .align
+        .global simple_idct_armv5te
+        .func simple_idct_armv5te
+simple_idct_armv5te:
+        stmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
+
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+
+        sub    a1, a1, #(16*7)
+
+        bl     idct_col_armv5te
+        add    a1, a1, #4
+        bl     idct_col_armv5te
+        add    a1, a1, #4
+        bl     idct_col_armv5te
+        add    a1, a1, #4
+        bl     idct_col_armv5te
+
+        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
+        .endfunc
+
+        .align
+        .global simple_idct_add_armv5te
+        .func simple_idct_add_armv5te
+simple_idct_add_armv5te:
+        stmfd  sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
+
+        mov    a1, a3
+
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+
+        sub    a1, a1, #(16*7)
+
+        bl     idct_col_add_armv5te
+        add    a1, a1, #4
+        bl     idct_col_add_armv5te
+        add    a1, a1, #4
+        bl     idct_col_add_armv5te
+        add    a1, a1, #4
+        bl     idct_col_add_armv5te
+
+        add    sp, sp, #8
+        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
+        .endfunc
+
+        .align
+        .global simple_idct_put_armv5te
+        .func simple_idct_put_armv5te
+simple_idct_put_armv5te:
+        stmfd  sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
+
+        mov    a1, a3
+
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+
+        sub    a1, a1, #(16*7)
+
+        bl     idct_col_put_armv5te
+        add    a1, a1, #4
+        bl     idct_col_put_armv5te
+        add    a1, a1, #4
+        bl     idct_col_put_armv5te
+        add    a1, a1, #4
+        bl     idct_col_put_armv5te
+
+        add    sp, sp, #8
+        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
+        .endfunc
diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h
index d8090ed32..7d7678455 100644
--- a/src/libffmpeg/libavcodec/avcodec.h
+++ b/src/libffmpeg/libavcodec/avcodec.h
@@ -37,13 +37,13 @@ extern "C" {
 #define AV_STRINGIFY(s)         AV_TOSTRING(s)
 #define AV_TOSTRING(s) #s
 
-#define LIBAVCODEC_VERSION_INT  ((51<<16)+(25<<8)+0)
-#define LIBAVCODEC_VERSION      51.25.0
+#define LIBAVCODEC_VERSION_INT  ((51<<16)+(28<<8)+0)
+#define LIBAVCODEC_VERSION      51.28.0
 #define LIBAVCODEC_BUILD        LIBAVCODEC_VERSION_INT
 
 #define LIBAVCODEC_IDENT        "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
 
-#define AV_NOPTS_VALUE          int64_t_C(0x8000000000000000)
+#define AV_NOPTS_VALUE          INT64_C(0x8000000000000000)
 #define AV_TIME_BASE            1000000
 #define AV_TIME_BASE_Q          (AVRational){1, AV_TIME_BASE}
 
@@ -156,6 +156,7 @@ enum CodecID {
     CODEC_ID_TIERTEXSEQVIDEO,
     CODEC_ID_TIFF,
     CODEC_ID_GIF,
+    CODEC_ID_FFH264,
 
     /* various pcm "codecs" */
     CODEC_ID_PCM_S16LE= 0x10000,
@@ -243,6 +244,7 @@ enum CodecID {
     CODEC_ID_WAVPACK,
     CODEC_ID_DSICINAUDIO,
     CODEC_ID_IMC,
+    CODEC_ID_MUSEPACK7,
 
     /* subtitle codecs */
     CODEC_ID_DVD_SUBTITLE= 0x17000,
@@ -372,7 +374,7 @@ typedef struct RcOverride{
 #define CODEC_FLAG2_LOCAL_HEADER  0x00000008 ///< place global headers at every keyframe instead of in extradata
 #define CODEC_FLAG2_BPYRAMID      0x00000010 ///< H.264 allow b-frames to be used as references
 #define CODEC_FLAG2_WPRED         0x00000020 ///< H.264 weighted biprediction for b-frames
-#define CODEC_FLAG2_MIXED_REFS    0x00000040 ///< H.264 multiple references per partition
+#define CODEC_FLAG2_MIXED_REFS    0x00000040 ///< H.264 one reference per partition, as opposed to one reference per macroblock
 #define CODEC_FLAG2_8X8DCT        0x00000080 ///< H.264 high profile 8x8 transform
 #define CODEC_FLAG2_FASTPSKIP     0x00000100 ///< H.264 fast pskip
 #define CODEC_FLAG2_AUD           0x00000200 ///< H.264 access unit delimiters
@@ -380,6 +382,7 @@ typedef struct RcOverride{
 #define CODEC_FLAG2_INTRA_VLC     0x00000800 ///< use MPEG-2 intra VLC table
 #define CODEC_FLAG2_MEMC_ONLY     0x00001000 ///< only do ME/MC (I frames -> ref, P frame -> ME+MC)
 #define CODEC_FLAG2_DROP_FRAME_TIMECODE 0x00002000 ///< timecode is in drop frame format
+#define CODEC_FLAG2_SKIP_RD       0x00004000 ///< RD optimal MB level residual skiping
 
 /* Unsupported options :
  *              Syntax Arithmetic coding (SAC)
@@ -2090,9 +2093,6 @@ typedef struct AVCodec {
     int (*decode)(AVCodecContext *, void *outdata, int *outdata_size,
                   uint8_t *buf, int buf_size);
     int capabilities;
-#if LIBAVCODEC_VERSION_INT < ((50<<16)+(0<<8)+0)
-    void *dummy; // FIXME remove next time we break binary compatibility
-#endif
     struct AVCodec *next;
     void (*flush)(AVCodecContext *);
     const AVRational *supported_framerates; ///array of supported framerates, or NULL if any, array is terminated by {0,0}
@@ -2310,6 +2310,7 @@ extern AVCodec libgsm_decoder;
 extern AVCodec bmp_decoder;
 extern AVCodec mmvideo_decoder;
 extern AVCodec zmbv_decoder;
+extern AVCodec zmbv_encoder;
 extern AVCodec avs_decoder;
 extern AVCodec smacker_decoder;
 extern AVCodec smackaud_decoder;
@@ -2324,6 +2325,7 @@ extern AVCodec dsicinaudio_decoder;
 extern AVCodec tiertexseqvideo_decoder;
 extern AVCodec tiff_decoder;
 extern AVCodec imc_decoder;
+extern AVCodec mpc7_decoder;
 
 /* pcm codecs */
 #define PCM_CODEC(id, name) \
@@ -2691,20 +2693,6 @@ int img_crop(AVPicture *dst, const AVPicture *src,
 int img_pad(AVPicture *dst, const AVPicture *src, int height, int width, int pix_fmt,
             int padtop, int padbottom, int padleft, int padright, int *color);
 
-/* endian macros */
-#if !defined(BE_16) || !defined(BE_32) || !defined(LE_16) || !defined(LE_32)
-#define BE_16(x)  ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1])
-#define BE_32(x)  ((((uint8_t*)(x))[0] << 24) | \
-                   (((uint8_t*)(x))[1] << 16) | \
-                   (((uint8_t*)(x))[2] << 8) | \
-                    ((uint8_t*)(x))[3])
-#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
-#define LE_32(x)  ((((uint8_t*)(x))[3] << 24) | \
-                   (((uint8_t*)(x))[2] << 16) | \
-                   (((uint8_t*)(x))[1] << 8) | \
-                    ((uint8_t*)(x))[0])
-#endif
-
 extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
 
 /* unused static macro */
diff --git a/src/libffmpeg/libavcodec/bitstream.h b/src/libffmpeg/libavcodec/bitstream.h
index af25b6dcf..29e0f441e 100644
--- a/src/libffmpeg/libavcodec/bitstream.h
+++ b/src/libffmpeg/libavcodec/bitstream.h
@@ -187,12 +187,12 @@ static inline uint##x##_t unaligned##x(const void *v) { \
 }
 #    elif defined(__DECC)
 #    define unaligned(x)                                        \
-static inline uint##x##_t unaligned##x##(const void *v) {       \
+static inline uint##x##_t unaligned##x(const void *v) {         \
     return *(const __unaligned uint##x##_t *) v;                \
 }
 #    else
 #    define unaligned(x)                                        \
-static inline uint##x##_t unaligned##x##(const void *v) {       \
+static inline uint##x##_t unaligned##x(const void *v) {         \
     return *(const uint##x##_t *) v;                            \
 }
 #    endif
@@ -877,7 +877,7 @@ void free_vlc(VLC *vlc);
  *                  read the longest vlc code
  *                  = (max_vlc_length + bits - 1) / bits
  */
-static always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
+static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
                                   int bits, int max_depth)
 {
     int code;
diff --git a/src/libffmpeg/libavcodec/bytestream.h b/src/libffmpeg/libavcodec/bytestream.h
index 25c457fe4..a742fa1c1 100644
--- a/src/libffmpeg/libavcodec/bytestream.h
+++ b/src/libffmpeg/libavcodec/bytestream.h
@@ -22,32 +22,32 @@
 #ifndef FFMPEG_BYTESTREAM_H
 #define FFMPEG_BYTESTREAM_H
 
-static always_inline unsigned int bytestream_get_le32(uint8_t **b)
+static av_always_inline unsigned int bytestream_get_le32(uint8_t **b)
 {
     (*b) += 4;
     return LE_32(*b - 4);
 }
 
-static always_inline unsigned int bytestream_get_le16(uint8_t **b)
+static av_always_inline unsigned int bytestream_get_le16(uint8_t **b)
 {
     (*b) += 2;
     return LE_16(*b - 2);
 }
 
-static always_inline unsigned int bytestream_get_byte(uint8_t **b)
+static av_always_inline unsigned int bytestream_get_byte(uint8_t **b)
 {
     (*b)++;
     return (*b)[-1];
 }
 
-static always_inline unsigned int bytestream_get_buffer(uint8_t **b, uint8_t *dst, unsigned int size)
+static av_always_inline unsigned int bytestream_get_buffer(uint8_t **b, uint8_t *dst, unsigned int size)
 {
     memcpy(dst, *b, size);
     (*b) += size;
     return size;
 }
 
-static always_inline void bytestream_put_be32(uint8_t **b, const unsigned int value)
+static av_always_inline void bytestream_put_be32(uint8_t **b, const unsigned int value)
 {
     *(*b)++ = value >> 24;
     *(*b)++ = value >> 16;
@@ -55,13 +55,13 @@ static always_inline void bytestream_put_be32(uint8_t **b, const unsigned int va
     *(*b)++ = value;
 };
 
-static always_inline void bytestream_put_be16(uint8_t **b, const unsigned int value)
+static av_always_inline void bytestream_put_be16(uint8_t **b, const unsigned int value)
 {
     *(*b)++ = value >> 8;
     *(*b)++ = value;
 }
 
-static always_inline void bytestream_put_le32(uint8_t **b, const unsigned int value)
+static av_always_inline void bytestream_put_le32(uint8_t **b, const unsigned int value)
 {
     *(*b)++ = value;
     *(*b)++ = value >> 8;
@@ -69,18 +69,18 @@ static always_inline void bytestream_put_le32(uint8_t **b, const unsigned int va
     *(*b)++ = value >> 24;
 }
 
-static always_inline void bytestream_put_le16(uint8_t **b, const unsigned int value)
+static av_always_inline void bytestream_put_le16(uint8_t **b, const unsigned int value)
 {
     *(*b)++ = value;
     *(*b)++ = value >> 8;
 }
 
-static always_inline void bytestream_put_byte(uint8_t **b, const unsigned int value)
+static av_always_inline void bytestream_put_byte(uint8_t **b, const unsigned int value)
 {
     *(*b)++ = value;
 }
 
-static always_inline void bytestream_put_buffer(uint8_t **b, const uint8_t *src, unsigned int size)
+static av_always_inline void bytestream_put_buffer(uint8_t **b, const uint8_t *src, unsigned int size)
 {
     memcpy(*b, src, size);
     (*b) += size;
diff --git a/src/libffmpeg/libavcodec/cabac.h b/src/libffmpeg/libavcodec/cabac.h
index 43fe78e3b..f47406a9e 100644
--- a/src/libffmpeg/libavcodec/cabac.h
+++ b/src/libffmpeg/libavcodec/cabac.h
@@ -363,7 +363,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){
         refill(c);
 }
 
-static int always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){
+static int av_always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){
     //FIXME gcc generates duplicate load/stores for c->low and c->range
 #define LOW          "0"
 #define RANGE        "4"
@@ -631,7 +631,7 @@ static int get_cabac_bypass(CABACContext *c){
 }
 
 
-static always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
+static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
 #if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
     asm volatile(
         "movl "RANGE    "(%1), %%ebx            \n\t"
diff --git a/src/libffmpeg/libavcodec/cinepak.c b/src/libffmpeg/libavcodec/cinepak.c
index e137377e5..fd95b739e 100644
--- a/src/libffmpeg/libavcodec/cinepak.c
+++ b/src/libffmpeg/libavcodec/cinepak.c
@@ -26,6 +26,8 @@
  * by Ewald Snel <ewald@rambo.its.tudelft.nl>
  * For more information on the Cinepak algorithm, visit:
  *   http://www.csse.monash.edu.au/~timf/
+ * For more information on the quirky data inside Sega FILM/CPK files, visit:
+ *   http://wiki.multimedia.cx/index.php?title=Sega_FILM
  */
 
 #include <stdio.h>
@@ -67,6 +69,8 @@ typedef struct CinepakContext {
     int palette_video;
     cvid_strip_t strips[MAX_STRIPS];
 
+    int sega_film_skip_bytes;
+
 } CinepakContext;
 
 static void cinepak_decode_codebook (cvid_codebook_t *codebook,
@@ -319,8 +323,6 @@ static int cinepak_decode (CinepakContext *s)
     int           i, result, strip_size, frame_flags, num_strips;
     int           y0 = 0;
     int           encoded_buf_size;
-    /* if true, Cinepak data is from a Sega FILM/CPK file */
-    int           sega_film_data = 0;
 
     if (s->size < 10)
         return -1;
@@ -328,12 +330,29 @@ static int cinepak_decode (CinepakContext *s)
     frame_flags = s->data[0];
     num_strips  = BE_16 (&s->data[8]);
     encoded_buf_size = ((s->data[1] << 16) | BE_16 (&s->data[2]));
-    if (encoded_buf_size != s->size)
-        sega_film_data = 1;
-    if (sega_film_data)
-        s->data    += 12;
-    else
-        s->data    += 10;
+
+    /* if this is the first frame, check for deviant Sega FILM data */
+    if (s->sega_film_skip_bytes == -1) {
+        if (encoded_buf_size != s->size) {
+            /* If the encoded frame size differs from the frame size as indicated
+             * by the container file, this data likely comes from a Sega FILM/CPK file.
+             * If the frame header is followed by the bytes FE 00 00 06 00 00 then
+             * this is probably one of the two known files that have 6 extra bytes
+             * after the frame header. Else, assume 2 extra bytes. */
+            if ((s->data[10] == 0xFE) &&
+                (s->data[11] == 0x00) &&
+                (s->data[12] == 0x00) &&
+                (s->data[13] == 0x06) &&
+                (s->data[14] == 0x00) &&
+                (s->data[15] == 0x00))
+                s->sega_film_skip_bytes = 6;
+            else
+                s->sega_film_skip_bytes = 2;
+        } else
+            s->sega_film_skip_bytes = 0;
+    }
+
+    s->data += 10 + s->sega_film_skip_bytes;
 
     if (num_strips > MAX_STRIPS)
         num_strips = MAX_STRIPS;
@@ -377,6 +396,7 @@ static int cinepak_decode_init(AVCodecContext *avctx)
     s->avctx = avctx;
     s->width = (avctx->width + 3) & ~3;
     s->height = (avctx->height + 3) & ~3;
+    s->sega_film_skip_bytes = -1;  /* uninitialized state */
 
     // check for paletted data
     if ((avctx->palctrl == NULL) || (avctx->bits_per_sample == 40)) {
diff --git a/src/libffmpeg/libavcodec/cook.c b/src/libffmpeg/libavcodec/cook.c
index 47d9ce2c3..943addb89 100644
--- a/src/libffmpeg/libavcodec/cook.c
+++ b/src/libffmpeg/libavcodec/cook.c
@@ -312,7 +312,7 @@ static int cook_decode_close(AVCodecContext *avctx)
 {
     int i;
     COOKContext *q = avctx->priv_data;
-    av_log(NULL,AV_LOG_DEBUG, "Deallocating memory.\n");
+    av_log(avctx,AV_LOG_DEBUG, "Deallocating memory.\n");
 
     /* Free allocated memory buffers. */
     av_free(q->mlt_window);
@@ -1160,12 +1160,12 @@ static int cook_decode_init(AVCodecContext *avctx)
 
     /* Take care of the codec specific extradata. */
     if (avctx->extradata_size <= 0) {
-        av_log(NULL,AV_LOG_ERROR,"Necessary extradata missing!\n");
+        av_log(avctx,AV_LOG_ERROR,"Necessary extradata missing!\n");
         return -1;
     } else {
         /* 8 for mono, 16 for stereo, ? for multichannel
            Swap to right endianness so we don't need to care later on. */
-        av_log(NULL,AV_LOG_DEBUG,"codecdata_length=%d\n",avctx->extradata_size);
+        av_log(avctx,AV_LOG_DEBUG,"codecdata_length=%d\n",avctx->extradata_size);
         if (avctx->extradata_size >= 8){
             e->cookversion = be2me_32(e->cookversion);
             e->samples_per_frame = be2me_16(e->samples_per_frame);
@@ -1201,24 +1201,24 @@ static int cook_decode_init(AVCodecContext *avctx)
     switch (e->cookversion) {
         case MONO_COOK1:
             if (q->nb_channels != 1) {
-                av_log(NULL,AV_LOG_ERROR,"Container channels != 1, report sample!\n");
+                av_log(avctx,AV_LOG_ERROR,"Container channels != 1, report sample!\n");
                 return -1;
             }
-            av_log(NULL,AV_LOG_DEBUG,"MONO_COOK1\n");
+            av_log(avctx,AV_LOG_DEBUG,"MONO_COOK1\n");
             break;
         case MONO_COOK2:
             if (q->nb_channels != 1) {
                 q->joint_stereo = 0;
                 q->bits_per_subpacket = q->bits_per_subpacket/2;
             }
-            av_log(NULL,AV_LOG_DEBUG,"MONO_COOK2\n");
+            av_log(avctx,AV_LOG_DEBUG,"MONO_COOK2\n");
             break;
         case JOINT_STEREO:
             if (q->nb_channels != 2) {
-                av_log(NULL,AV_LOG_ERROR,"Container channels != 2, report sample!\n");
+                av_log(avctx,AV_LOG_ERROR,"Container channels != 2, report sample!\n");
                 return -1;
             }
-            av_log(NULL,AV_LOG_DEBUG,"JOINT_STEREO\n");
+            av_log(avctx,AV_LOG_DEBUG,"JOINT_STEREO\n");
             if (avctx->extradata_size >= 16){
                 q->total_subbands = q->subbands + e->js_subband_start;
                 q->js_subband_start = e->js_subband_start;
@@ -1233,11 +1233,11 @@ static int cook_decode_init(AVCodecContext *avctx)
             }
             break;
         case MC_COOK:
-            av_log(NULL,AV_LOG_ERROR,"MC_COOK not supported!\n");
+            av_log(avctx,AV_LOG_ERROR,"MC_COOK not supported!\n");
             return -1;
             break;
         default:
-            av_log(NULL,AV_LOG_ERROR,"Unknown Cook version, report sample!\n");
+            av_log(avctx,AV_LOG_ERROR,"Unknown Cook version, report sample!\n");
             return -1;
             break;
     }
@@ -1280,16 +1280,16 @@ static int cook_decode_init(AVCodecContext *avctx)
 
     /* Try to catch some obviously faulty streams, othervise it might be exploitable */
     if (q->total_subbands > 53) {
-        av_log(NULL,AV_LOG_ERROR,"total_subbands > 53, report sample!\n");
+        av_log(avctx,AV_LOG_ERROR,"total_subbands > 53, report sample!\n");
         return -1;
     }
     if (q->subbands > 50) {
-        av_log(NULL,AV_LOG_ERROR,"subbands > 50, report sample!\n");
+        av_log(avctx,AV_LOG_ERROR,"subbands > 50, report sample!\n");
         return -1;
     }
     if ((q->samples_per_channel == 256) || (q->samples_per_channel == 512) || (q->samples_per_channel == 1024)) {
     } else {
-        av_log(NULL,AV_LOG_ERROR,"unknown amount of samples_per_channel = %d, report sample!\n",q->samples_per_channel);
+        av_log(avctx,AV_LOG_ERROR,"unknown amount of samples_per_channel = %d, report sample!\n",q->samples_per_channel);
         return -1;
     }
 
diff --git a/src/libffmpeg/libavcodec/cscd.c b/src/libffmpeg/libavcodec/cscd.c
index e4257f4c0..d8733d6dd 100644
--- a/src/libffmpeg/libavcodec/cscd.c
+++ b/src/libffmpeg/libavcodec/cscd.c
@@ -220,12 +220,12 @@ static int decode_init(AVCodecContext *avctx) {
     }
     avctx->has_b_frames = 0;
     switch (avctx->bits_per_sample) {
-        case 16: avctx->pix_fmt = PIX_FMT_RGB565; break;
+        case 16: avctx->pix_fmt = PIX_FMT_RGB555; break;
         case 24: avctx->pix_fmt = PIX_FMT_BGR24; break;
         case 32: avctx->pix_fmt = PIX_FMT_RGBA32; break;
         default:
             av_log(avctx, AV_LOG_ERROR,
-                   "CamStudio codec error: unvalid depth %i bpp\n",
+                   "CamStudio codec error: invalid depth %i bpp\n",
                    avctx->bits_per_sample);
              return 1;
     }
diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c
index 51eddbc60..916d8658c 100644
--- a/src/libffmpeg/libavcodec/dsputil.c
+++ b/src/libffmpeg/libavcodec/dsputil.c
@@ -2549,6 +2549,11 @@ void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
 }
 #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
 
+#if defined(CONFIG_H264_ENCODER)
+/* H264 specific */
+void ff_h264dsp_init(DSPContext* c, AVCodecContext *avctx);
+#endif /* CONFIG_H264_ENCODER */
+
 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
     int i;
@@ -3801,11 +3806,31 @@ void dsputil_static_init(void)
     for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
 }
 
+int ff_check_alignment(void){
+    static int did_fail=0;
+    DECLARE_ALIGNED_16(int, aligned);
+
+    if((int)&aligned & 15){
+        if(!did_fail){
+#if defined(HAVE_MMX) || defined(HAVE_ALTIVEC)
+            av_log(NULL, AV_LOG_ERROR,
+                "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
+                "and may be very slow or crash. This is not a bug in libavcodec,\n"
+                "but in the compiler. Do not report crashes to FFmpeg developers.\n");
+#endif
+            did_fail=1;
+        }
+        return -1;
+    }
+    return 0;
+}
 
 void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 {
     int i;
 
+    ff_check_alignment();
+
 #ifdef CONFIG_ENCODERS
     if(avctx->dct_algo==FF_DCT_FASTINT) {
         c->fdct = fdct_ifast;
@@ -4006,6 +4031,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
     ff_vc1dsp_init(c,avctx);
 #endif
+#if defined(CONFIG_H264_ENCODER)
+    ff_h264dsp_init(c,avctx);
+#endif
 
     c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
     c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h
index de3c1d564..78109f7b9 100644
--- a/src/libffmpeg/libavcodec/dsputil.h
+++ b/src/libffmpeg/libavcodec/dsputil.h
@@ -33,9 +33,6 @@
 #include "common.h"
 #include "avcodec.h"
 
-#if defined(ARCH_X86) || defined(ARCH_X86_64)
-#define HAVE_MMX 1
-#endif
 
 //#define DEBUG
 /* dct code */
@@ -381,10 +378,12 @@ typedef struct DSPContext {
 #define BASIS_SHIFT 16
 #define RECON_SHIFT 6
 
+    /* h264 functions */
     void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride);
     void (*h264_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
     void (*h264_idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
     void (*h264_idct8_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
+    void (*h264_dct)(DCTELEM block[4][4]);
 
     /* snow wavelet */
     void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
@@ -411,6 +410,8 @@ typedef struct DSPContext {
 void dsputil_static_init(void);
 void dsputil_init(DSPContext* p, AVCodecContext *avctx);
 
+int ff_check_alignment(void);
+
 /**
  * permute block according to permuatation.
  * @param last last non zero element in scantable order
@@ -483,6 +484,7 @@ int mm_support(void);
 #define MM_SSE2   0x0010 /* PIV SSE2 functions */
 #define MM_3DNOWEXT  0x0020 /* AMD 3DNowExt */
 #define MM_SSE3   0x0040 /* Prescott SSE3 functions */
+#define MM_SSSE3  0x0080 /* Conroe SSSE3 functions */
 
 extern int mm_flags;
 
@@ -593,30 +595,6 @@ void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
 
 #endif
 
-#ifdef __GNUC__
-
-struct unaligned_64 { uint64_t l; } __attribute__((packed));
-struct unaligned_32 { uint32_t l; } __attribute__((packed));
-struct unaligned_16 { uint16_t l; } __attribute__((packed));
-
-#define LD16(a) (((const struct unaligned_16 *) (a))->l)
-#define LD32(a) (((const struct unaligned_32 *) (a))->l)
-#define LD64(a) (((const struct unaligned_64 *) (a))->l)
-
-#define ST16(a, b) (((struct unaligned_16 *) (a))->l) = (b)
-#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b)
-
-#else /* __GNUC__ */
-
-#define LD16(a) (*((uint16_t*)(a)))
-#define LD32(a) (*((uint32_t*)(a)))
-#define LD64(a) (*((uint64_t*)(a)))
-
-#define ST16(a, b) *((uint16_t*)(a)) = (b)
-#define ST32(a, b) *((uint32_t*)(a)) = (b)
-
-#endif /* !__GNUC__ */
-
 /* PSNR */
 void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
               int orig_linesize[3], int coded_linesize,
diff --git a/src/libffmpeg/libavcodec/dv.c b/src/libffmpeg/libavcodec/dv.c
index 76095a481..803d3502d 100644
--- a/src/libffmpeg/libavcodec/dv.c
+++ b/src/libffmpeg/libavcodec/dv.c
@@ -560,7 +560,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
 
 #ifdef DV_CODEC_TINY_TARGET
 /* Converts run and level (where level != 0) pair into vlc, returning bit size */
-static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
+static av_always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
 {
     int size;
     if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
@@ -585,7 +585,7 @@ static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
     return size;
 }
 
-static always_inline int dv_rl2vlc_size(int run, int level)
+static av_always_inline int dv_rl2vlc_size(int run, int level)
 {
     int size;
 
@@ -601,13 +601,13 @@ static always_inline int dv_rl2vlc_size(int run, int level)
     return size;
 }
 #else
-static always_inline int dv_rl2vlc(int run, int l, int sign, uint32_t* vlc)
+static av_always_inline int dv_rl2vlc(int run, int l, int sign, uint32_t* vlc)
 {
     *vlc = dv_vlc_map[run][l].vlc | sign;
     return dv_vlc_map[run][l].size;
 }
 
-static always_inline int dv_rl2vlc_size(int run, int l)
+static av_always_inline int dv_rl2vlc_size(int run, int l)
 {
     return dv_vlc_map[run][l].size;
 }
@@ -627,7 +627,7 @@ typedef struct EncBlockInfo {
     uint32_t partial_bit_buffer; /* we can't use uint16_t here */
 } EncBlockInfo;
 
-static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool,
+static av_always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool,
                                        PutBitContext* pb_end)
 {
     int prev;
@@ -670,7 +670,7 @@ static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext
     return pb;
 }
 
-static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
+static av_always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
                                               const uint8_t* zigzag_scan, const int *weight, int bias)
 {
     int i, area;
@@ -742,7 +742,7 @@ static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
 
 //FIXME replace this by dsputil
 #define SC(x, y) ((s[x] - s[y]) ^ ((s[x] - s[y]) >> 7))
-static always_inline int dv_guess_dct_mode(DCTELEM *blk) {
+static av_always_inline int dv_guess_dct_mode(DCTELEM *blk) {
     DCTELEM *s;
     int score88 = 0;
     int score248 = 0;
diff --git a/src/libffmpeg/libavcodec/faandct.c b/src/libffmpeg/libavcodec/faandct.c
index e3c0d84a2..6f73ee5e9 100644
--- a/src/libffmpeg/libavcodec/faandct.c
+++ b/src/libffmpeg/libavcodec/faandct.c
@@ -70,7 +70,7 @@ B6*B0, B6*B1, B6*B2, B6*B3, B6*B4, B6*B5, B6*B6, B6*B7,
 B7*B0, B7*B1, B7*B2, B7*B3, B7*B4, B7*B5, B7*B6, B7*B7,
 };
 
-static always_inline void row_fdct(FLOAT temp[64], DCTELEM * data)
+static av_always_inline void row_fdct(FLOAT temp[64], DCTELEM * data)
 {
     FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
     FLOAT tmp10, tmp11, tmp12, tmp13;
diff --git a/src/libffmpeg/libavcodec/ffv1.c b/src/libffmpeg/libavcodec/ffv1.c
index 62623e591..1ca18a4e8 100644
--- a/src/libffmpeg/libavcodec/ffv1.c
+++ b/src/libffmpeg/libavcodec/ffv1.c
@@ -186,7 +186,7 @@ typedef struct FFV1Context{
     DSPContext dsp;
 }FFV1Context;
 
-static always_inline int fold(int diff, int bits){
+static av_always_inline int fold(int diff, int bits){
     if(bits==8)
         diff= (int8_t)diff;
     else{
diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c
index ba51c245a..af5fa50e6 100644
--- a/src/libffmpeg/libavcodec/h263.c
+++ b/src/libffmpeg/libavcodec/h263.c
@@ -487,12 +487,28 @@ static inline void restore_ac_coeffs(MpegEncContext * s, DCTELEM block[6][64], i
 }
 
 /**
+ * init s->current_picture.qscale_table from s->lambda_table
+ */
+static void ff_init_qscale_tab(MpegEncContext *s){
+    int8_t * const qscale_table= s->current_picture.qscale_table;
+    int i;
+
+    for(i=0; i<s->mb_num; i++){
+        unsigned int lam= s->lambda_table[ s->mb_index2xy[i] ];
+        int qp= (lam*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
+        qscale_table[ s->mb_index2xy[i] ]= clip(qp, s->avctx->qmin, s->avctx->qmax);
+    }
+}
+
+/**
  * modify qscale so that encoding is acually possible in h263 (limit difference to -2..2)
  */
 void ff_clean_h263_qscales(MpegEncContext *s){
     int i;
     int8_t * const qscale_table= s->current_picture.qscale_table;
 
+    ff_init_qscale_tab(s);
+
     for(i=1; i<s->mb_num; i++){
         if(qscale_table[ s->mb_index2xy[i] ] - qscale_table[ s->mb_index2xy[i-1] ] >2)
             qscale_table[ s->mb_index2xy[i] ]= qscale_table[ s->mb_index2xy[i-1] ]+2;
@@ -507,7 +523,6 @@ void ff_clean_h263_qscales(MpegEncContext *s){
             int mb_xy= s->mb_index2xy[i];
 
             if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTER4V)){
-                s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_INTER4V;
                 s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_INTER;
             }
         }
@@ -546,7 +561,6 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
         for(i=1; i<s->mb_num; i++){
             int mb_xy= s->mb_index2xy[i];
             if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_DIRECT)){
-                s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_DIRECT;
                 s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_BIDIR;
             }
         }
diff --git a/src/libffmpeg/libavcodec/h264.c b/src/libffmpeg/libavcodec/h264.c
index ad23ae120..d7c48bd4a 100644
--- a/src/libffmpeg/libavcodec/h264.c
+++ b/src/libffmpeg/libavcodec/h264.c
@@ -165,20 +165,6 @@ typedef struct H264Context{
     MpegEncContext s;
     int nal_ref_idc;
     int nal_unit_type;
-#define NAL_SLICE                1
-#define NAL_DPA                  2
-#define NAL_DPB                  3
-#define NAL_DPC                  4
-#define NAL_IDR_SLICE            5
-#define NAL_SEI                  6
-#define NAL_SPS                  7
-#define NAL_PPS                  8
-#define NAL_AUD                  9
-#define NAL_END_SEQUENCE        10
-#define NAL_END_STREAM          11
-#define NAL_FILLER_DATA         12
-#define NAL_SPS_EXT             13
-#define NAL_AUXILIARY_SLICE     19
     uint8_t *rbsp_buffer;
     unsigned int rbsp_buffer_size;
 
@@ -414,7 +400,7 @@ static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, in
 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
 
-static always_inline uint32_t pack16to32(int a, int b){
+static av_always_inline uint32_t pack16to32(int a, int b){
 #ifdef WORDS_BIGENDIAN
    return (b&0xFFFF) + (a<<16);
 #else
@@ -422,13 +408,22 @@ static always_inline uint32_t pack16to32(int a, int b){
 #endif
 }
 
+const uint8_t ff_rem6[52]={
+0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
+};
+
+const uint8_t ff_div6[52]={
+0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
+};
+
+
 /**
  * fill a rectangle.
  * @param h height of the rectangle, should be a constant
  * @param w width of the rectangle, should be a constant
  * @param size the size of val (1 or 4), should be a constant
  */
-static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
+static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
     uint8_t *p= (uint8_t*)vp;
     assert(size==1 || size==4);
     assert(w<=4);
@@ -1808,81 +1803,6 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c
     return dst;
 }
 
-#if 0
-/**
- * @param src the data which should be escaped
- * @param dst the target buffer, dst+1 == src is allowed as a special case
- * @param length the length of the src data
- * @param dst_length the length of the dst array
- * @returns length of escaped data in bytes or -1 if an error occured
- */
-static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
-    int i, escape_count, si, di;
-    uint8_t *temp;
-
-    assert(length>=0);
-    assert(dst_length>0);
-
-    dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
-
-    if(length==0) return 1;
-
-    escape_count= 0;
-    for(i=0; i<length; i+=2){
-        if(src[i]) continue;
-        if(i>0 && src[i-1]==0)
-            i--;
-        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
-            escape_count++;
-            i+=2;
-        }
-    }
-
-    if(escape_count==0){
-        if(dst+1 != src)
-            memcpy(dst+1, src, length);
-        return length + 1;
-    }
-
-    if(length + escape_count + 1> dst_length)
-        return -1;
-
-    //this should be damn rare (hopefully)
-
-    h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
-    temp= h->rbsp_buffer;
-//printf("encoding esc\n");
-
-    si= 0;
-    di= 0;
-    while(si < length){
-        if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
-            temp[di++]= 0; si++;
-            temp[di++]= 0; si++;
-            temp[di++]= 3;
-            temp[di++]= src[si++];
-        }
-        else
-            temp[di++]= src[si++];
-    }
-    memcpy(dst+1, temp, length+escape_count);
-
-    assert(di == length+escape_count);
-
-    return di + 1;
-}
-
-/**
- * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
- */
-static void encode_rbsp_trailing(PutBitContext *pb){
-    int length;
-    put_bits(pb, 1, 1);
-    length= (-put_bits_count(pb))&7;
-    if(length) put_bits(pb, length, 0);
-}
-#endif
-
 /**
  * identifies the exact end of the bitstream
  * @return the length of the trailing, or 0 if damaged
@@ -2035,42 +1955,6 @@ static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
     return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
 }
 
-
-#if 0
-static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
-    int i;
-    //FIXME try int temp instead of block
-
-    for(i=0; i<4; i++){
-        const int d0= src1[0 + i*stride] - src2[0 + i*stride];
-        const int d1= src1[1 + i*stride] - src2[1 + i*stride];
-        const int d2= src1[2 + i*stride] - src2[2 + i*stride];
-        const int d3= src1[3 + i*stride] - src2[3 + i*stride];
-        const int z0= d0 + d3;
-        const int z3= d0 - d3;
-        const int z1= d1 + d2;
-        const int z2= d1 - d2;
-
-        block[0 + 4*i]=   z0 +   z1;
-        block[1 + 4*i]= 2*z3 +   z2;
-        block[2 + 4*i]=   z0 -   z1;
-        block[3 + 4*i]=   z3 - 2*z2;
-    }
-
-    for(i=0; i<4; i++){
-        const int z0= block[0*4 + i] + block[3*4 + i];
-        const int z3= block[0*4 + i] - block[3*4 + i];
-        const int z1= block[1*4 + i] + block[2*4 + i];
-        const int z2= block[1*4 + i] - block[2*4 + i];
-
-        block[0*4 + i]=   z0 +   z1;
-        block[1*4 + i]= 2*z3 +   z2;
-        block[2*4 + i]=   z0 -   z1;
-        block[3*4 + i]=   z3 - 2*z2;
-    }
-}
-#endif
-
 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
@@ -2357,7 +2241,7 @@ static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int strid
     src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
 }
 
-static void pred16x16_vertical_c(uint8_t *src, int stride){
+void ff_pred16x16_vertical_c(uint8_t *src, int stride){
     int i;
     const uint32_t a= ((uint32_t*)(src-stride))[0];
     const uint32_t b= ((uint32_t*)(src-stride))[1];
@@ -2372,7 +2256,7 @@ static void pred16x16_vertical_c(uint8_t *src, int stride){
     }
 }
 
-static void pred16x16_horizontal_c(uint8_t *src, int stride){
+void ff_pred16x16_horizontal_c(uint8_t *src, int stride){
     int i;
 
     for(i=0; i<16; i++){
@@ -2383,7 +2267,7 @@ static void pred16x16_horizontal_c(uint8_t *src, int stride){
     }
 }
 
-static void pred16x16_dc_c(uint8_t *src, int stride){
+void ff_pred16x16_dc_c(uint8_t *src, int stride){
     int i, dc=0;
 
     for(i=0;i<16; i++){
@@ -2437,7 +2321,7 @@ static void pred16x16_top_dc_c(uint8_t *src, int stride){
     }
 }
 
-static void pred16x16_128_dc_c(uint8_t *src, int stride){
+void ff_pred16x16_128_dc_c(uint8_t *src, int stride){
     int i;
 
     for(i=0; i<16; i++){
@@ -2488,11 +2372,11 @@ static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int
   }
 }
 
-static void pred16x16_plane_c(uint8_t *src, int stride){
+void ff_pred16x16_plane_c(uint8_t *src, int stride){
     pred16x16_plane_compat_c(src, stride, 0);
 }
 
-static void pred8x8_vertical_c(uint8_t *src, int stride){
+void ff_pred8x8_vertical_c(uint8_t *src, int stride){
     int i;
     const uint32_t a= ((uint32_t*)(src-stride))[0];
     const uint32_t b= ((uint32_t*)(src-stride))[1];
@@ -2503,7 +2387,7 @@ static void pred8x8_vertical_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_horizontal_c(uint8_t *src, int stride){
+void ff_pred8x8_horizontal_c(uint8_t *src, int stride){
     int i;
 
     for(i=0; i<8; i++){
@@ -2512,7 +2396,7 @@ static void pred8x8_horizontal_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_128_dc_c(uint8_t *src, int stride){
+void ff_pred8x8_128_dc_c(uint8_t *src, int stride){
     int i;
 
     for(i=0; i<8; i++){
@@ -2566,7 +2450,7 @@ static void pred8x8_top_dc_c(uint8_t *src, int stride){
 }
 
 
-static void pred8x8_dc_c(uint8_t *src, int stride){
+void ff_pred8x8_dc_c(uint8_t *src, int stride){
     int i;
     int dc0, dc1, dc2, dc3;
 
@@ -2591,7 +2475,7 @@ static void pred8x8_dc_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_plane_c(uint8_t *src, int stride){
+void ff_pred8x8_plane_c(uint8_t *src, int stride){
   int j, k;
   int a;
   uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
@@ -3220,21 +3104,21 @@ static void init_pred_ptrs(H264Context *h){
     h->pred8x8l[TOP_DC_PRED         ]= pred8x8l_top_dc_c;
     h->pred8x8l[DC_128_PRED         ]= pred8x8l_128_dc_c;
 
-    h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_c;
-    h->pred8x8[VERT_PRED8x8   ]= pred8x8_vertical_c;
-    h->pred8x8[HOR_PRED8x8    ]= pred8x8_horizontal_c;
-    h->pred8x8[PLANE_PRED8x8  ]= pred8x8_plane_c;
+    h->pred8x8[DC_PRED8x8     ]= ff_pred8x8_dc_c;
+    h->pred8x8[VERT_PRED8x8   ]= ff_pred8x8_vertical_c;
+    h->pred8x8[HOR_PRED8x8    ]= ff_pred8x8_horizontal_c;
+    h->pred8x8[PLANE_PRED8x8  ]= ff_pred8x8_plane_c;
     h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
     h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
-    h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
+    h->pred8x8[DC_128_PRED8x8 ]= ff_pred8x8_128_dc_c;
 
-    h->pred16x16[DC_PRED8x8     ]= pred16x16_dc_c;
-    h->pred16x16[VERT_PRED8x8   ]= pred16x16_vertical_c;
-    h->pred16x16[HOR_PRED8x8    ]= pred16x16_horizontal_c;
-    h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_c;
+    h->pred16x16[DC_PRED8x8     ]= ff_pred16x16_dc_c;
+    h->pred16x16[VERT_PRED8x8   ]= ff_pred16x16_vertical_c;
+    h->pred16x16[HOR_PRED8x8    ]= ff_pred16x16_horizontal_c;
+    h->pred16x16[PLANE_PRED8x8  ]= ff_pred16x16_plane_c;
     h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
     h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
-    h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
+    h->pred16x16[DC_128_PRED8x8 ]= ff_pred16x16_128_dc_c;
 }
 
 static void free_tables(H264Context *h){
@@ -3269,8 +3153,8 @@ static void init_dequant8_coeff_table(H264Context *h){
         }
 
         for(q=0; q<52; q++){
-            int shift = div6[q];
-            int idx = rem6[q];
+            int shift = ff_div6[q];
+            int idx = ff_rem6[q];
             for(x=0; x<64; x++)
                 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
@@ -3294,8 +3178,8 @@ static void init_dequant4_coeff_table(H264Context *h){
             continue;
 
         for(q=0; q<52; q++){
-            int shift = div6[q] + 2;
-            int idx = rem6[q];
+            int shift = ff_div6[q] + 2;
+            int idx = ff_rem6[q];
             for(x=0; x<16; x++)
                 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
@@ -4972,6 +4856,10 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
 
     if(total_coeff==0)
         return 0;
+    if(total_coeff<0) {
+        av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff<0)\n", s->mb_x, s->mb_y);
+        return -1;
+    }
 
     trailing_ones= coeff_token&3;
     tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
diff --git a/src/libffmpeg/libavcodec/h264data.h b/src/libffmpeg/libavcodec/h264data.h
index 2dea3580f..74e720421 100644
--- a/src/libffmpeg/libavcodec/h264data.h
+++ b/src/libffmpeg/libavcodec/h264data.h
@@ -53,6 +53,24 @@
 
 #define EXTENDED_SAR          255
 
+/* NAL unit types */
+enum {
+NAL_SLICE=1,
+NAL_DPA,
+NAL_DPB,
+NAL_DPC,
+NAL_IDR_SLICE,
+NAL_SEI,
+NAL_SPS,
+NAL_PPS,
+NAL_AUD,
+NAL_END_SEQUENCE,
+NAL_END_STREAM,
+NAL_FILLER_DATA,
+NAL_SPS_EXT,
+NAL_AUXILIARY_SLICE=19
+};
+
 static const AVRational pixel_aspect[14]={
  {0, 1},
  {1, 1},
@@ -488,15 +506,6 @@ static const PMbInfo b_sub_mb_type_info[13]={
 {MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, },
 };
 
-
-static const uint8_t rem6[52]={
-0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
-};
-
-static const uint8_t div6[52]={
-0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
-};
-
 static const uint8_t default_scaling4[2][16]={
 {   6,13,20,28,
    13,20,28,32,
diff --git a/src/libffmpeg/libavcodec/h264idct.c b/src/libffmpeg/libavcodec/h264idct.c
index 3506418ad..a6a56d33a 100755
--- a/src/libffmpeg/libavcodec/h264idct.c
+++ b/src/libffmpeg/libavcodec/h264idct.c
@@ -28,7 +28,7 @@
 
 #include "dsputil.h"
 
-static always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
+static av_always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
     int i;
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
diff --git a/src/libffmpeg/libavcodec/i386/Makefile.am b/src/libffmpeg/libavcodec/i386/Makefile.am
index 15ab4db89..ee170efd5 100644
--- a/src/libffmpeg/libavcodec/i386/Makefile.am
+++ b/src/libffmpeg/libavcodec/i386/Makefile.am
@@ -6,7 +6,7 @@ AM_CFLAGS = -fomit-frame-pointer -fno-strict-aliasing
 # CFLAGS is here to filter out -funroll-loops because it causes bad
 # behavior of libavcodec
 CFLAGS := `echo @CFLAGS@ | sed -e 's/-funroll-loops//g'`
-AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS) -I$(top_srcdir)/src/libffmpeg/libavutil
+AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS) -I$(top_srcdir)/src/libffmpeg/libavutil -I$(top_srcdir)/src/libffmpeg
 
 # Avoid "can't find register" failures with -O1 and higher
 dsputil_mmx.o dsputil_mmx.lo: CFLAGS=$(shell echo @CFLAGS@ | sed -e 's/-funroll-loops//g; s/$$/ -Os/')
@@ -42,10 +42,10 @@ EXTRA_DIST = \
 	h264dsp_mmx.c \
 	mpegvideo_mmx_template.c
 
-if HAVE_FFMMX
+if HAVE_MMX
 mmx_modules = $(libavcodec_mmx_src)
 endif
 
 libavcodec_mmx_la_SOURCES = $(mmx_modules) $(libavcodec_mmx_dummy)
 
-noinst_HEADERS = dsputil_mmx_avg.h dsputil_mmx_rnd.h mmx.h
+noinst_HEADERS = dsputil_mmx_avg.h dsputil_mmx_rnd.h mathops.h mmx.h
diff --git a/src/libffmpeg/libavcodec/i386/cputest.c b/src/libffmpeg/libavcodec/i386/cputest.c
index 262786b71..0705ab3e5 100644
--- a/src/libffmpeg/libavcodec/i386/cputest.c
+++ b/src/libffmpeg/libavcodec/i386/cputest.c
@@ -87,6 +87,8 @@ int mm_support(void)
             rval |= MM_SSE2;
         if (ecx & 1)
             rval |= MM_SSE3;
+        if (ecx & 0x00000200 )
+            rval |= MM_SSSE3;
     }
 
     cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
@@ -104,11 +106,13 @@ int mm_support(void)
     }
 
 #if 0
-    av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n",
+    av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s%s%s\n",
         (rval&MM_MMX) ? "MMX ":"",
         (rval&MM_MMXEXT) ? "MMX2 ":"",
         (rval&MM_SSE) ? "SSE ":"",
         (rval&MM_SSE2) ? "SSE2 ":"",
+        (rval&MM_SSE3) ? "SSE3 ":"",
+        (rval&MM_SSSE3) ? "SSSE3 ":"",
         (rval&MM_3DNOW) ? "3DNow ":"",
         (rval&MM_3DNOWEXT) ? "3DNowExt ":"");
 #endif
diff --git a/src/libffmpeg/libavcodec/i386/fdct_mmx.c b/src/libffmpeg/libavcodec/i386/fdct_mmx.c
index 2ffbfecf6..7e2682a4a 100644
--- a/src/libffmpeg/libavcodec/i386/fdct_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/fdct_mmx.c
@@ -284,7 +284,7 @@ TABLE_SSE2
 }};
 
 
-static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
+static av_always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
 {
     movq_m2r(*(in + offset + 1 * 8), mm0);
     movq_m2r(*(in + offset + 6 * 8), mm1);
@@ -364,7 +364,7 @@ static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
 }
 
 
-static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
+static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
 {
     asm volatile(
 #define FDCT_ROW_SSE2_H1(i,t)                    \
@@ -426,7 +426,7 @@ static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
     );
 }
 
-static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
+static av_always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
 {
     pshufw_m2r(*(in + 4), mm5, 0x1B);
     movq_m2r(*(in + 0), mm0);
@@ -469,7 +469,7 @@ static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const i
     movq_r2m(mm7, *(out + 4));
 }
 
-static always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
+static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
 {
 //FIXME reorder (i dont have a old mmx only cpu here to benchmark ...)
     movd_m2r(*(in + 6), mm1);
diff --git a/src/libffmpeg/libavcodec/i386/mathops.h b/src/libffmpeg/libavcodec/i386/mathops.h
new file mode 100644
index 000000000..3553a4025
--- /dev/null
+++ b/src/libffmpeg/libavcodec/i386/mathops.h
@@ -0,0 +1,41 @@
+/*
+ * simple math operations
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef FRAC_BITS
+#   define MULL(ra, rb) \
+        ({ int rt, dummy; asm (\
+            "imull %3               \n\t"\
+            "shrdl %4, %%edx, %%eax \n\t"\
+            : "=a"(rt), "=d"(dummy)\
+            : "a" (ra), "rm" (rb), "i"(FRAC_BITS));\
+         rt; })
+#endif
+
+#define MULH(ra, rb) \
+    ({ int rt, dummy;\
+     asm ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" (ra), "rm" (rb));\
+     rt; })
+
+#define MUL64(ra, rb) \
+    ({ int64_t rt;\
+     asm ("imull %2\n\t" : "=A"(rt) : "a" (ra), "g" (rb));\
+     rt; })
+
diff --git a/src/libffmpeg/libavcodec/jfdctfst.c b/src/libffmpeg/libavcodec/jfdctfst.c
index 38424563d..a9dcfab82 100644
--- a/src/libffmpeg/libavcodec/jfdctfst.c
+++ b/src/libffmpeg/libavcodec/jfdctfst.c
@@ -145,7 +145,7 @@
 
 #define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
 
-static always_inline void row_fdct(DCTELEM * data){
+static av_always_inline void row_fdct(DCTELEM * data){
   int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   int_fast16_t tmp10, tmp11, tmp12, tmp13;
   int_fast16_t z1, z2, z3, z4, z5, z11, z13;
diff --git a/src/libffmpeg/libavcodec/jfdctint.c b/src/libffmpeg/libavcodec/jfdctint.c
index 58f3a1446..250312467 100644
--- a/src/libffmpeg/libavcodec/jfdctint.c
+++ b/src/libffmpeg/libavcodec/jfdctint.c
@@ -181,7 +181,7 @@
 #endif
 
 
-static always_inline void row_fdct(DCTELEM * data){
+static av_always_inline void row_fdct(DCTELEM * data){
   int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   int_fast32_t tmp10, tmp11, tmp12, tmp13;
   int_fast32_t z1, z2, z3, z4, z5;
diff --git a/src/libffmpeg/libavcodec/jpeg_ls.c b/src/libffmpeg/libavcodec/jpeg_ls.c
index 1b4df2b1a..4629176ad 100644
--- a/src/libffmpeg/libavcodec/jpeg_ls.c
+++ b/src/libffmpeg/libavcodec/jpeg_ls.c
@@ -804,11 +804,16 @@ static int encode_picture_ls(AVCodecContext *avctx, unsigned char *buf, int buf_
     av_free(zero);
     av_free(state);
 
+    // the specification says that after doing 0xff escaping unused bits in the
+    // last byte must be set to 0, so just append 7 "optional" zero-bits to
+    // avoid special-casing.
+    put_bits(&pb2, 7, 0);
+    size = put_bits_count(&pb2);
     flush_put_bits(&pb2);
     /* do escape coding */
-    size = put_bits_count(&pb2) >> 3;
     init_get_bits(&gb, buf2, size);
-    while(get_bits_count(&gb) < size * 8){
+    size -= 7;
+    while(get_bits_count(&gb) < size){
         int v;
         v = get_bits(&gb, 8);
         put_bits(&pb, 8, v);
diff --git a/src/libffmpeg/libavcodec/mathops.h b/src/libffmpeg/libavcodec/mathops.h
index 9ae34d71b..c6ec70597 100644
--- a/src/libffmpeg/libavcodec/mathops.h
+++ b/src/libffmpeg/libavcodec/mathops.h
@@ -46,7 +46,7 @@
 //gcc 3.4 creates an incredibly bloated mess out of this
 //#    define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32)
 
-static always_inline int MULH(int a, int b){
+static av_always_inline int MULH(int a, int b){
     return ((int64_t)(a) * (int64_t)(b))>>32;
 }
 #endif
diff --git a/src/libffmpeg/libavcodec/motion_est.c b/src/libffmpeg/libavcodec/motion_est.c
index 0e1504147..a11787bac 100644
--- a/src/libffmpeg/libavcodec/motion_est.c
+++ b/src/libffmpeg/libavcodec/motion_est.c
@@ -106,7 +106,7 @@ static int get_flags(MotionEstContext *c, int direct, int chroma){
            + (chroma ? FLAG_CHROMA : 0);
 }
 
-static always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
+static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                       const int size, const int h, int ref_index, int src_index,
                       me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
     MotionEstContext * const c= &s->me;
@@ -122,6 +122,7 @@ static always_inline int cmp(MpegEncContext *s, const int x, const int y, const
     int d;
     //FIXME check chroma 4mv, (no crashes ...)
     if(flags&FLAG_DIRECT){
+        assert(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
         if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
             const int time_pp= s->pp_time;
             const int time_pb= s->pb_time;
@@ -233,8 +234,14 @@ static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){
 
 void ff_init_me(MpegEncContext *s){
     MotionEstContext * const c= &s->me;
+    int cache_size= FFMIN(ME_MAP_SIZE>>ME_MAP_SHIFT, 1<<ME_MAP_SHIFT);
+    int dia_size= FFMAX(FFABS(s->avctx->dia_size)&255, FFABS(s->avctx->pre_dia_size)&255);
     c->avctx= s->avctx;
 
+    if(cache_size < 2*dia_size && !c->stride){
+        av_log(s->avctx, AV_LOG_INFO, "ME_MAP size may be a little small for the selected diamond size\n");
+    }
+
     ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, c->avctx->me_pre_cmp);
     ff_set_cmp(&s->dsp, s->dsp.me_cmp, c->avctx->me_cmp);
     ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, c->avctx->me_sub_cmp);
@@ -692,6 +699,7 @@ static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
 static inline void get_limits(MpegEncContext *s, int x, int y)
 {
     MotionEstContext * const c= &s->me;
+    int range= c->avctx->me_range >> (1 + !!(c->flags&FLAG_QPEL));
 /*
     if(c->avctx->me_range) c->range= c->avctx->me_range >> 1;
     else                   c->range= 16;
@@ -713,6 +721,12 @@ static inline void get_limits(MpegEncContext *s, int x, int y)
         c->xmax = - x + s->mb_width *16 - 16;
         c->ymax = - y + s->mb_height*16 - 16;
     }
+    if(range){
+        c->xmin = FFMAX(c->xmin,-range);
+        c->xmax = FFMIN(c->xmax, range);
+        c->ymin = FFMAX(c->ymin,-range);
+        c->ymax = FFMIN(c->ymax, range);
+    }
 }
 
 static inline void init_mv4_ref(MotionEstContext *c){
@@ -1148,7 +1162,9 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
 {
     MotionEstContext * const c= &s->me;
     uint8_t *pix, *ppix;
-    int sum, varc, vard, mx, my, dmin;
+    int sum, mx, my, dmin;
+    int varc;            ///< the variance of the block (sum of squared (p[y][x]-average))
+    int vard;            ///< sum of squared differences with the estimated motion vector
     int P[10][2];
     const int shift= 1+s->quarter_sample;
     int mb_type=0;
@@ -1810,8 +1826,8 @@ static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
 
     get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed
 
-    s->b_direct_mv_table[mot_xy][0]= mx;
-    s->b_direct_mv_table[mot_xy][1]= my;
+    mv_table[mot_xy][0]= mx;
+    mv_table[mot_xy][1]= my;
     c->flags     &= ~FLAG_DIRECT;
     c->sub_flags &= ~FLAG_DIRECT;
 
@@ -1831,6 +1847,18 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
     get_limits(s, 16*mb_x, 16*mb_y);
 
     c->skip=0;
+
+    if(s->codec_id == CODEC_ID_MPEG4 && s->next_picture.mbskip_table[xy]){
+        int score= direct_search(s, mb_x, mb_y); //FIXME just check 0,0
+
+        score= ((unsigned)(score*score + 128*256))>>16;
+        c->mc_mb_var_sum_temp += score;
+        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
+        s->mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_DIRECT0;
+
+        return;
+    }
+
     if(c->avctx->me_threshold){
         int vard= check_input_motion(s, mb_x, mb_y, 0);
 
@@ -1953,6 +1981,8 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
         }
          //FIXME something smarter
         if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
+        if(s->codec_id == CODEC_ID_MPEG4 && type&CANDIDATE_MB_TYPE_DIRECT && s->flags&CODEC_FLAG_MV0 && *(uint32_t*)s->b_direct_mv_table[xy])
+            type |= CANDIDATE_MB_TYPE_DIRECT0;
 #if 0
         if(s->out_format == FMT_MPEG1)
             type |= CANDIDATE_MB_TYPE_INTRA;
diff --git a/src/libffmpeg/libavcodec/motion_est_template.c b/src/libffmpeg/libavcodec/motion_est_template.c
index d8feaff5a..897c08e3d 100644
--- a/src/libffmpeg/libavcodec/motion_est_template.c
+++ b/src/libffmpeg/libavcodec/motion_est_template.c
@@ -555,7 +555,7 @@ if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x,
     const int qpel= flags&FLAG_QPEL;\
     const int shift= 1+qpel;\
 
-static always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
+static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
                                        int src_index, int ref_index, int const penalty_factor,
                                        int size, int h, int flags)
 {
@@ -667,31 +667,28 @@ static int hex_search(MpegEncContext * s, int *best, int dmin,
     LOAD_COMMON
     LOAD_COMMON2
     int map_generation= c->map_generation;
-    int x,y,i,d;
-    static const int hex[6][2]={{-2, 0}, { 2,0}, {-1,-2}, {1,-2}, {-1,2},{1,2}};
+    int x,y,d;
+    const int dec= dia_size & (dia_size-1);
 
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
-    for(;dia_size; dia_size--){
+    for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
         do{
             x= best[0];
             y= best[1];
-            for(i=0; i<6; i++){
-                CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
+
+            CHECK_CLIPPED_MV(x  -dia_size    , y);
+            CHECK_CLIPPED_MV(x+  dia_size    , y);
+            CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
+            CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
+            if(dia_size>1){
+                CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
+                CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
             }
         }while(best[0] != x || best[1] != y);
     }
 
-    do{
-        x= best[0];
-        y= best[1];
-        CHECK_CLIPPED_MV(x+1, y);
-        CHECK_CLIPPED_MV(x, y+1);
-        CHECK_CLIPPED_MV(x-1, y);
-        CHECK_CLIPPED_MV(x, y-1);
-    }while(best[0] != x || best[1] != y);
-
     return dmin;
 }
 
@@ -704,14 +701,16 @@ static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
     LOAD_COMMON
     LOAD_COMMON2
     int map_generation= c->map_generation;
-    int x,y,i,d, dia_size;
+    int x,y,i,d;
+    int dia_size= c->dia_size&0xFF;
+    const int dec= dia_size & (dia_size-1);
     static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
                                 { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
 
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
-    for(dia_size= c->dia_size&0xFF; dia_size; dia_size--){
+    for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
         do{
             x= best[0];
             y= best[1];
@@ -775,7 +774,7 @@ static int umh_search(MpegEncContext * s, int *best, int dmin,
         }
     }
 
-    return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 1);
+    return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
 }
 
 #define SAB_CHECK_MV(ax,ay)\
@@ -824,20 +823,27 @@ static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
-    for(j=i=0; i<ME_MAP_SIZE; i++){
+    /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
+      become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
+     */
+    for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
         uint32_t key= map[i];
 
         key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
 
         if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
 
-        assert(j<MAX_SAB_SIZE); //max j = number of predictors
-
         minima[j].height= score_map[i];
         minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
         minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
         minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
         minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
+
+        // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
+        if(   minima[j].x > xmax || minima[j].x < xmin
+           || minima[j].y > ymax || minima[j].y < ymin)
+            continue;
+
         minima[j].checked=0;
         if(minima[j].x || minima[j].y)
             minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
@@ -965,7 +971,7 @@ if(256*256*256*64 % (stats[0]+1)==0){
     return dmin;
 }
 
-static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
+static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
                                        int src_index, int ref_index, int const penalty_factor,
                                        int size, int h, int flags){
     MotionEstContext * const c= &s->me;
@@ -985,7 +991,7 @@ static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
         return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
 }
 
-static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
+static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
                              int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
                              int ref_mv_scale, int flags, int size, int h)
 {
@@ -1018,6 +1024,10 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx
     map[0]= map_generation;
     score_map[0]= dmin;
 
+    //FIXME precalc first term below?
+    if((s->pict_type == B_TYPE && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
+        dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
+
     /* first line */
     if (s->first_slice_line) {
         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c
index e3a4c2da5..8af7bdfa7 100644
--- a/src/libffmpeg/libavcodec/mpeg12.c
+++ b/src/libffmpeg/libavcodec/mpeg12.c
@@ -515,7 +515,7 @@ static inline void put_mb_modes(MpegEncContext *s, int n, int bits,
     }
 }
 
-static always_inline void mpeg1_encode_mb_internal(MpegEncContext *s,
+static av_always_inline void mpeg1_encode_mb_internal(MpegEncContext *s,
                                                    DCTELEM block[6][64],
                                                    int motion_x, int motion_y,
                                                    int mb_block_count)
diff --git a/src/libffmpeg/libavcodec/mpegaudiodec.c b/src/libffmpeg/libavcodec/mpegaudiodec.c
index 54bcee3b0..367400581 100644
--- a/src/libffmpeg/libavcodec/mpegaudiodec.c
+++ b/src/libffmpeg/libavcodec/mpegaudiodec.c
@@ -327,7 +327,7 @@ static int decode_init(AVCodecContext * avctx)
         for(i=0;i<15;i++) {
             int n, norm;
             n = i + 2;
-            norm = ((int64_t_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
+            norm = ((INT64_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
             scale_factor_mult[i][0] = MULL(FIXR(1.0 * 2.0), norm);
             scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm);
             scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm);
@@ -1749,7 +1749,7 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
     /* skip extension bits */
     bits_left = end_pos - get_bits_count(&s->gb);
 //av_log(NULL, AV_LOG_ERROR, "left:%d buf:%p\n", bits_left, s->in_gb.buffer);
-    if (bits_left < 0 || bits_left > 16) {
+    if (bits_left < 0 || bits_left > 500) {
         av_log(NULL, AV_LOG_ERROR, "bits_left=%d\n", bits_left);
         s_index=0;
     }else if(bits_left > 0 && s->error_resilience >= FF_ER_AGGRESSIVE){
diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c
index a9d877fff..a33485549 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.c
+++ b/src/libffmpeg/libavcodec/mpegvideo.c
@@ -140,7 +140,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[
                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 
-                qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
+                qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
                                 (qscale * quant_matrix[j]));
             }
         } else if (dsp->fdct == fdct_ifast
@@ -155,7 +155,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[
                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 
-                qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
+                qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
                                 (aanscales[i] * qscale * quant_matrix[j]));
             }
         } else {
@@ -166,7 +166,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[
                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
                 */
-                qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
+                qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 
@@ -2964,7 +2964,7 @@ static inline int hpel_motion_lowres(MpegEncContext *s,
 }
 
 /* apply one mpeg motion vector to the three components */
-static always_inline void mpeg_motion(MpegEncContext *s,
+static av_always_inline void mpeg_motion(MpegEncContext *s,
                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                                int field_based, int bottom_field, int field_select,
                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
@@ -3081,7 +3081,7 @@ if(s->quarter_sample)
 }
 
 /* apply one mpeg motion vector to the three components */
-static always_inline void mpeg_motion_lowres(MpegEncContext *s,
+static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                                int field_based, int bottom_field, int field_select,
                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
@@ -3913,7 +3913,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s)
    s->mv       : motion vector
    s->interlaced_dct : true if interlaced dct used (mpeg2)
  */
-static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
+static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
 {
     int mb_x, mb_y;
     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
@@ -4336,7 +4336,7 @@ static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
     }
 }
 
-static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
+static av_always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
 {
     int16_t weight[8][64];
     DCTELEM orig[8][64];
@@ -4348,7 +4348,7 @@ static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, in
     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
     int wrap_y, wrap_c;
 
-    for(i=0; i<mb_block_count; i++) skip_dct[i]=0;
+    for(i=0; i<mb_block_count; i++) skip_dct[i]=s->skipdct;
 
     if(s->adaptive_quant){
         const int last_qp= s->qscale;
@@ -4358,17 +4358,16 @@ static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, in
         update_qscale(s);
 
         if(!(s->flags&CODEC_FLAG_QP_RD)){
+            s->qscale= s->current_picture_ptr->qscale_table[mb_xy];
             s->dquant= s->qscale - last_qp;
 
             if(s->out_format==FMT_H263){
-                s->dquant= clip(s->dquant, -2, 2); //FIXME RD
+                s->dquant= clip(s->dquant, -2, 2);
 
                 if(s->codec_id==CODEC_ID_MPEG4){
                     if(!s->mb_intra){
                         if(s->pict_type == B_TYPE){
-                            if(s->dquant&1)
-                                s->dquant= (s->dquant/2)*2;
-                            if(s->mv_dir&MV_DIRECT)
+                            if(s->dquant&1 || s->mv_dir&MV_DIRECT)
                                 s->dquant= 0;
                         }
                         if(s->mv_type==MV_TYPE_8X8)
@@ -4621,7 +4620,7 @@ static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, in
     }
 }
 
-static always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
+static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
 {
     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
@@ -4861,6 +4860,8 @@ static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
 static int estimate_motion_thread(AVCodecContext *c, void *arg){
     MpegEncContext *s= arg;
 
+    ff_check_alignment();
+
     s->me.dia_size= s->avctx->dia_size;
     s->first_slice_line=1;
     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
@@ -4888,6 +4889,8 @@ static int mb_var_thread(AVCodecContext *c, void *arg){
     MpegEncContext *s= arg;
     int mb_x, mb_y;
 
+    ff_check_alignment();
+
     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
             int xx = mb_x * 16;
@@ -4938,6 +4941,8 @@ static int encode_thread(AVCodecContext *c, void *arg){
     PutBitContext pb[2], pb2[2], tex_pb[2];
 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
 
+    ff_check_alignment();
+
     for(i=0; i<2; i++){
         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
@@ -5205,19 +5210,6 @@ static int encode_thread(AVCodecContext *c, void *arg){
                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
                                  &dmin, &next_block, 0, 0);
                 }
-                if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
-                    int mx= s->b_direct_mv_table[xy][0];
-                    int my= s->b_direct_mv_table[xy][1];
-
-                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
-                    s->mb_intra= 0;
-/* xine: do not need this for decode or MPEG-1 encoding modes */
-#if 0
-                    ff_mpeg4_set_direct_mv(s, mx, my);
-#endif /* #if 0 */
-                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
-                                 &dmin, &next_block, mx, my);
-                }
                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
                     s->mv_dir = MV_DIR_FORWARD;
                     s->mv_type = MV_TYPE_FIELD;
@@ -5272,8 +5264,8 @@ static int encode_thread(AVCodecContext *c, void *arg){
                     }
                 }
 
-                if(s->flags & CODEC_FLAG_QP_RD){
-                    if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
+                if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
+                    if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
                         const int last_qp= backup_s.qscale;
                         int qpi, qp, dc[6];
                         DCTELEM ac[6][16];
@@ -5316,10 +5308,64 @@ static int encode_thread(AVCodecContext *c, void *arg){
                                 }
                             }
                         }
-                        qp= best_s.qscale;
-                        s->current_picture.qscale_table[xy]= qp;
                     }
                 }
+                if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
+                    int mx= s->b_direct_mv_table[xy][0];
+                    int my= s->b_direct_mv_table[xy][1];
+
+                    backup_s.dquant = 0;
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+                    s->mb_intra= 0;
+                    ff_mpeg4_set_direct_mv(s, mx, my);
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
+                                 &dmin, &next_block, mx, my);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_DIRECT0){
+                    backup_s.dquant = 0;
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+                    s->mb_intra= 0;
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
+                    ff_mpeg4_set_direct_mv(s, 0, 0);
+#endif /* #if 0 */
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
+                                 &dmin, &next_block, 0, 0);
+                }
+                if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
+                    int coded=0;
+                    for(i=0; i<6; i++)
+                        coded |= s->block_last_index[i];
+                    if(coded){
+                        int mx,my;
+                        memcpy(s->mv, best_s.mv, sizeof(s->mv));
+                        if(best_s.mv_dir & MV_DIRECT){
+                            mx=my=0; //FIXME find the one we actually used
+                            ff_mpeg4_set_direct_mv(s, mx, my);
+                        }else if(best_s.mv_dir&MV_DIR_BACKWARD){
+                            mx= s->mv[1][0][0];
+                            my= s->mv[1][0][1];
+                        }else{
+                            mx= s->mv[0][0][0];
+                            my= s->mv[0][0][1];
+                        }
+
+                        s->mv_dir= best_s.mv_dir;
+                        s->mv_type = best_s.mv_type;
+                        s->mb_intra= 0;
+/*                        s->mv[0][0][0] = best_s.mv[0][0][0];
+                        s->mv[0][0][1] = best_s.mv[0][0][1];
+                        s->mv[1][0][0] = best_s.mv[1][0][0];
+                        s->mv[1][0][1] = best_s.mv[1][0][1];*/
+                        backup_s.dquant= 0;
+                        s->skipdct=1;
+                        encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
+                                        &dmin, &next_block, mx, my);
+                        s->skipdct=0;
+                    }
+                }
+
+                s->current_picture.qscale_table[xy]= best_s.qscale;
 
                 copy_context_after_encode(s, &best_s, -1);
 
@@ -5401,6 +5447,11 @@ static int encode_thread(AVCodecContext *c, void *arg){
                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
 #endif /* #if 0 */
                     break;
+                case CANDIDATE_MB_TYPE_DIRECT0:
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+                    s->mb_intra= 0;
+                    ff_mpeg4_set_direct_mv(s, 0, 0);
+                    break;
                 case CANDIDATE_MB_TYPE_BIDIR:
                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
                     s->mb_intra= 0;
diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h
index 011678a42..ed02759ae 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.h
+++ b/src/libffmpeg/libavcodec/mpegvideo.h
@@ -324,6 +324,7 @@ typedef struct MpegEncContext {
     int dropable;
     int frame_rate_index;
     int last_lambda_for[5];     ///< last lambda for a specific pict type
+    int skipdct;                ///< skip dct and code zero residual
 
     /* motion compensation */
     int unrestricted_mv;        ///< mv can point outside of the coded picture
@@ -402,6 +403,8 @@ typedef struct MpegEncContext {
 #define CANDIDATE_MB_TYPE_BACKWARD_I 0x400
 #define CANDIDATE_MB_TYPE_BIDIR_I    0x800
 
+#define CANDIDATE_MB_TYPE_DIRECT0    0x1000
+
     int block_index[6]; ///< index to current MB in block based arrays with edges
     int block_wrap[6];
     uint8_t *dest[3];
diff --git a/src/libffmpeg/libavcodec/parser.c b/src/libffmpeg/libavcodec/parser.c
index 72a3e55a3..740ad855c 100644
--- a/src/libffmpeg/libavcodec/parser.c
+++ b/src/libffmpeg/libavcodec/parser.c
@@ -91,7 +91,8 @@ AVCodecParserContext *av_parser_init(int codec_id)
  *       in_data += len;
  *       in_len  -= len;
  *
- *       decode_frame(data, size);
+ *       if(size)
+ *          decode_frame(data, size);
  *   }
  * @endcode
  */
diff --git a/src/libffmpeg/libavcodec/ppc/Makefile.am b/src/libffmpeg/libavcodec/ppc/Makefile.am
index 00e796f6d..d52cc481e 100644
--- a/src/libffmpeg/libavcodec/ppc/Makefile.am
+++ b/src/libffmpeg/libavcodec/ppc/Makefile.am
@@ -12,14 +12,17 @@ noinst_LTLIBRARIES = libavcodec_ppc.la
 
 libavcodec_ppc_src =  dsputil_altivec.c \
 		      dsputil_ppc.c \
-		      dsputil_h264_altivec.c \
-		      dsputil_h264_template_altivec.c \
+		      h264_altivec.c \
+		      h264_template_altivec.c \
 		      fdct_altivec.c \
 		      fft_altivec.c \
+		      float_altivec.c \
 		      idct_altivec.c \
 		      gmc_altivec.c \
 		      mpegvideo_altivec.c \
-		      mpegvideo_ppc.c
+		      mpegvideo_ppc.c \
+		      snow_altivec.c \
+		      vc1dsp_altivec.c
 libavcodec_ppc_dummy = libavcodec_ppc_dummy.c
 
 EXTRA_DIST =  $(libavcodec_ppc_src) $(libavcodec_ppc_dummy)
@@ -28,7 +31,6 @@ EXTRA_DIST =  $(libavcodec_ppc_src) $(libavcodec_ppc_dummy)
 #ppc_modules = $(libavcodec_ppc_src)
 #endif
 
-
 libavcodec_ppc_la_SOURCES = $(ppc_modules) $(libavcodec_ppc_dummy)
 
-noinst_HEADERS = dsputil_altivec.h dsputil_ppc.h gcc_fixes.h
+noinst_HEADERS = dsputil_altivec.h dsputil_ppc.h gcc_fixes.h mathops.h types_altivec.h
diff --git a/src/libffmpeg/libavcodec/ppc/float_altivec.c b/src/libffmpeg/libavcodec/ppc/float_altivec.c
new file mode 100644
index 000000000..c6e43dec2
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/float_altivec.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+
+static void vector_fmul_altivec(float *dst, const float *src, int len)
+{
+    int i;
+    vector float d0, d1, s, zero = (vector float)vec_splat_u32(0);
+    for(i=0; i<len-7; i+=8) {
+        d0 = vec_ld(0, dst+i);
+        s = vec_ld(0, src+i);
+        d1 = vec_ld(16, dst+i);
+        d0 = vec_madd(d0, s, zero);
+        d1 = vec_madd(d1, vec_ld(16,src+i), zero);
+        vec_st(d0, 0, dst+i);
+        vec_st(d1, 16, dst+i);
+    }
+}
+
+static void vector_fmul_reverse_altivec(float *dst, const float *src0,
+                                        const float *src1, int len)
+{
+    int i;
+    vector float d, s0, s1, h0, l0,
+                 s2, s3, zero = (vector float)vec_splat_u32(0);
+    src1 += len-4;
+    for(i=0; i<len-7; i+=8) {
+        s1 = vec_ld(0, src1-i);              // [a,b,c,d]
+        s0 = vec_ld(0, src0+i);
+        l0 = vec_mergel(s1, s1);             // [c,c,d,d]
+        s3 = vec_ld(-16, src1-i);
+        h0 = vec_mergeh(s1, s1);             // [a,a,b,b]
+        s2 = vec_ld(16, src0+i);
+        s1 = vec_mergeh(vec_mergel(l0,h0),   // [d,b,d,b]
+                        vec_mergeh(l0,h0));  // [c,a,c,a]
+                                             // [d,c,b,a]
+        l0 = vec_mergel(s3, s3);
+        d = vec_madd(s0, s1, zero);
+        h0 = vec_mergeh(s3, s3);
+        vec_st(d, 0, dst+i);
+        s3 = vec_mergeh(vec_mergel(l0,h0),
+                        vec_mergeh(l0,h0));
+        d = vec_madd(s2, s3, zero);
+        vec_st(d, 16, dst+i);
+    }
+}
+
+static void vector_fmul_add_add_altivec(float *dst, const float *src0,
+                                        const float *src1, const float *src2,
+                                        int src3, int len, int step)
+{
+    int i;
+    vector float d, s0, s1, s2, t0, t1, edges;
+    vector unsigned char align = vec_lvsr(0,dst),
+                         mask = vec_lvsl(0, dst);
+
+    t0 = vec_ld(0, dst);
+#if 0 //FIXME: there is still something wrong
+    if (step == 2) {
+        int y;
+        vector float d0, d1, s3, t2;
+        vector unsigned int sel =
+                vec_mergeh(vec_splat_u32(-1), vec_splat_u32(0));
+        t1 = vec_ld(16, dst);
+        for (i=0,y=0; i<len-3; i+=4,y+=8) {
+
+            s0 = vec_ld(0,src0+i);
+            s1 = vec_ld(0,src1+i);
+            s2 = vec_ld(0,src2+i);
+
+//          t0 = vec_ld(0, dst+y);  //[x x x|a]
+//          t1 = vec_ld(16, dst+y); //[b c d|e]
+            t2 = vec_ld(31, dst+y); //[f g h|x]
+
+            d = vec_madd(s0,s1,s2); // [A B C D]
+
+                                                 // [A A B B]
+
+                                                 // [C C D D]
+
+            d0 = vec_perm(t0, t1, mask); // [a b c d]
+
+            d0 = vec_sel(vec_mergeh(d, d), d0, sel);   // [A b B d]
+
+            edges = vec_perm(t1, t0, mask);
+
+            t0 = vec_perm(edges, d0, align); // [x x x|A]
+
+            t1 = vec_perm(d0, edges, align); // [b B d|e]
+
+            vec_stl(t0, 0, dst+y);
+
+            d1 = vec_perm(t1, t2, mask); // [e f g h]
+
+            d1 = vec_sel(vec_mergel(d, d), d1, sel); // [C f D h]
+
+            edges = vec_perm(t2, t1, mask);
+
+            t1 = vec_perm(edges, d1, align); // [b B d|C]
+
+            t2 = vec_perm(d1, edges, align); // [f D h|x]
+
+            vec_stl(t1, 16, dst+y);
+
+            t0 = t1;
+
+            vec_stl(t2, 31, dst+y);
+
+            t1 = t2;
+        }
+    } else
+    #endif
+    if (step == 1 && src3 == 0)
+        for (i=0; i<len-3; i+=4) {
+            t1 = vec_ld(15, dst+i);
+            s0 = vec_ld(0, src0+i);
+            s1 = vec_ld(0, src1+i);
+            s2 = vec_ld(0, src2+i);
+            edges = vec_perm(t1 ,t0, mask);
+            d = vec_madd(s0,s1,s2);
+            t1 = vec_perm(d, edges, align);
+            t0 = vec_perm(edges, d, align);
+            vec_st(t1, 15, dst+i);
+            vec_st(t0, 0, dst+i);
+            t0 = t1;
+        }
+    else
+        ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
+}
+
+void float_to_int16_altivec(int16_t *dst, const float *src, int len)
+{
+    int i;
+    vector float s0, s1;
+    vector signed int t0, t1;
+    vector signed short d0, d1, d;
+    vector unsigned char align;
+    if(((long)dst)&15) //FIXME
+    for(i=0; i<len-7; i+=8) {
+        s0 = vec_ld(0, src+i);
+        s1 = vec_ld(16, src+i);
+        t0 = vec_cts(s0, 0);
+        d0 = vec_ld(0, dst+i);
+        t1 = vec_cts(s1, 0);
+        d1 = vec_ld(15, dst+i);
+        d = vec_packs(t0,t1);
+        d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i));
+        align = vec_lvsr(0, dst+i);
+        d0 = vec_perm(d1, d, align);
+        d1 = vec_perm(d, d1, align);
+        vec_st(d0, 0, dst+i);
+        vec_st(d1,15, dst+i);
+    }
+    else
+    for(i=0; i<len-7; i+=8) {
+        s0 = vec_ld(0, src+i);
+        s1 = vec_ld(16, src+i);
+        t0 = vec_cts(s0, 0);
+        t1 = vec_cts(s1, 0);
+        d = vec_packs(t0,t1);
+        vec_st(d, 0, dst+i);
+    }
+}
+
+void float_init_altivec(DSPContext* c, AVCodecContext *avctx)
+{
+    c->vector_fmul = vector_fmul_altivec;
+    c->vector_fmul_reverse = vector_fmul_reverse_altivec;
+    c->vector_fmul_add_add = vector_fmul_add_add_altivec;
+    if(!(avctx->flags & CODEC_FLAG_BITEXACT))
+        c->float_to_int16 = float_to_int16_altivec;
+}
diff --git a/src/libffmpeg/libavcodec/ppc/h264_altivec.c b/src/libffmpeg/libavcodec/ppc/h264_altivec.c
new file mode 100644
index 000000000..bac620e82
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/h264_altivec.c
@@ -0,0 +1,565 @@
+/*
+ * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+#include "types_altivec.h"
+
+#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
+#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
+
+#define OP_U8_ALTIVEC                          PUT_OP_U8_ALTIVEC
+#define PREFIX_h264_chroma_mc8_altivec         put_h264_chroma_mc8_altivec
+#define PREFIX_h264_chroma_mc8_num             altivec_put_h264_chroma_mc8_num
+#define PREFIX_h264_qpel16_h_lowpass_altivec   put_h264_qpel16_h_lowpass_altivec
+#define PREFIX_h264_qpel16_h_lowpass_num       altivec_put_h264_qpel16_h_lowpass_num
+#define PREFIX_h264_qpel16_v_lowpass_altivec   put_h264_qpel16_v_lowpass_altivec
+#define PREFIX_h264_qpel16_v_lowpass_num       altivec_put_h264_qpel16_v_lowpass_num
+#define PREFIX_h264_qpel16_hv_lowpass_altivec  put_h264_qpel16_hv_lowpass_altivec
+#define PREFIX_h264_qpel16_hv_lowpass_num      altivec_put_h264_qpel16_hv_lowpass_num
+#include "h264_template_altivec.c"
+#undef OP_U8_ALTIVEC
+#undef PREFIX_h264_chroma_mc8_altivec
+#undef PREFIX_h264_chroma_mc8_num
+#undef PREFIX_h264_qpel16_h_lowpass_altivec
+#undef PREFIX_h264_qpel16_h_lowpass_num
+#undef PREFIX_h264_qpel16_v_lowpass_altivec
+#undef PREFIX_h264_qpel16_v_lowpass_num
+#undef PREFIX_h264_qpel16_hv_lowpass_altivec
+#undef PREFIX_h264_qpel16_hv_lowpass_num
+
+#define OP_U8_ALTIVEC                          AVG_OP_U8_ALTIVEC
+#define PREFIX_h264_chroma_mc8_altivec         avg_h264_chroma_mc8_altivec
+#define PREFIX_h264_chroma_mc8_num             altivec_avg_h264_chroma_mc8_num
+#define PREFIX_h264_qpel16_h_lowpass_altivec   avg_h264_qpel16_h_lowpass_altivec
+#define PREFIX_h264_qpel16_h_lowpass_num       altivec_avg_h264_qpel16_h_lowpass_num
+#define PREFIX_h264_qpel16_v_lowpass_altivec   avg_h264_qpel16_v_lowpass_altivec
+#define PREFIX_h264_qpel16_v_lowpass_num       altivec_avg_h264_qpel16_v_lowpass_num
+#define PREFIX_h264_qpel16_hv_lowpass_altivec  avg_h264_qpel16_hv_lowpass_altivec
+#define PREFIX_h264_qpel16_hv_lowpass_num      altivec_avg_h264_qpel16_hv_lowpass_num
+#include "h264_template_altivec.c"
+#undef OP_U8_ALTIVEC
+#undef PREFIX_h264_chroma_mc8_altivec
+#undef PREFIX_h264_chroma_mc8_num
+#undef PREFIX_h264_qpel16_h_lowpass_altivec
+#undef PREFIX_h264_qpel16_h_lowpass_num
+#undef PREFIX_h264_qpel16_v_lowpass_altivec
+#undef PREFIX_h264_qpel16_v_lowpass_num
+#undef PREFIX_h264_qpel16_hv_lowpass_altivec
+#undef PREFIX_h264_qpel16_hv_lowpass_num
+
+#define H264_MC(OPNAME, SIZE, CODETYPE) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
+    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
+}\
+
+/* this code assume that stride % 16 == 0 */
+void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
+    signed int ABCD[4] __attribute__((aligned(16))) =
+                        {((8 - x) * (8 - y)),
+                          ((x) * (8 - y)),
+                          ((8 - x) * (y)),
+                          ((x) * (y))};
+    register int i;
+    vector unsigned char fperm;
+    const vector signed int vABCD = vec_ld(0, ABCD);
+    const vector signed short vA = vec_splat((vector signed short)vABCD, 1);
+    const vector signed short vB = vec_splat((vector signed short)vABCD, 3);
+    const vector signed short vC = vec_splat((vector signed short)vABCD, 5);
+    const vector signed short vD = vec_splat((vector signed short)vABCD, 7);
+    const vector signed int vzero = vec_splat_s32(0);
+    const vector signed short v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
+    const vector unsigned short v6us = vec_splat_u16(6);
+    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
+    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
+
+    vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
+    vector unsigned char vsrc0uc, vsrc1uc;
+    vector signed short vsrc0ssH, vsrc1ssH;
+    vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;
+    vector signed short vsrc2ssH, vsrc3ssH, psum;
+    vector unsigned char vdst, ppsum, fsum;
+
+    if (((unsigned long)dst) % 16 == 0) {
+      fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,
+                                        0x14, 0x15, 0x16, 0x17,
+                                        0x08, 0x09, 0x0A, 0x0B,
+                                        0x0C, 0x0D, 0x0E, 0x0F);
+    } else {
+      fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,
+                                        0x04, 0x05, 0x06, 0x07,
+                                        0x18, 0x19, 0x1A, 0x1B,
+                                        0x1C, 0x1D, 0x1E, 0x1F);
+    }
+
+    vsrcAuc = vec_ld(0, src);
+
+    if (loadSecond)
+      vsrcBuc = vec_ld(16, src);
+    vsrcperm0 = vec_lvsl(0, src);
+    vsrcperm1 = vec_lvsl(1, src);
+
+    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
+    if (reallyBadAlign)
+      vsrc1uc = vsrcBuc;
+    else
+      vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
+
+    vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                               (vector unsigned char)vsrc0uc);
+    vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                               (vector unsigned char)vsrc1uc);
+
+    if (!loadSecond) {// -> !reallyBadAlign
+      for (i = 0 ; i < h ; i++) {
+
+
+        vsrcCuc = vec_ld(stride + 0, src);
+
+        vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
+        vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
+
+        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc2uc);
+        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc3uc);
+
+        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+        psum = vec_mladd(vB, vsrc1ssH, psum);
+        psum = vec_mladd(vC, vsrc2ssH, psum);
+        psum = vec_mladd(vD, vsrc3ssH, psum);
+        psum = vec_add(v28ss, psum);
+        psum = vec_sra(psum, v6us);
+
+        vdst = vec_ld(0, dst);
+        ppsum = (vector unsigned char)vec_packsu(psum, psum);
+        fsum = vec_perm(vdst, ppsum, fperm);
+
+        vec_st(fsum, 0, dst);
+
+        vsrc0ssH = vsrc2ssH;
+        vsrc1ssH = vsrc3ssH;
+
+        dst += stride;
+        src += stride;
+      }
+    } else {
+        vector unsigned char vsrcDuc;
+      for (i = 0 ; i < h ; i++) {
+        vsrcCuc = vec_ld(stride + 0, src);
+        vsrcDuc = vec_ld(stride + 16, src);
+
+        vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+        if (reallyBadAlign)
+          vsrc3uc = vsrcDuc;
+        else
+          vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
+
+        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc2uc);
+        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc3uc);
+
+        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+        psum = vec_mladd(vB, vsrc1ssH, psum);
+        psum = vec_mladd(vC, vsrc2ssH, psum);
+        psum = vec_mladd(vD, vsrc3ssH, psum);
+        psum = vec_add(v28ss, psum);
+        psum = vec_sr(psum, v6us);
+
+        vdst = vec_ld(0, dst);
+        ppsum = (vector unsigned char)vec_pack(psum, psum);
+        fsum = vec_perm(vdst, ppsum, fperm);
+
+        vec_st(fsum, 0, dst);
+
+        vsrc0ssH = vsrc2ssH;
+        vsrc1ssH = vsrc3ssH;
+
+        dst += stride;
+        src += stride;
+      }
+    }
+}
+
+static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
+                                    const uint8_t * src2, int dst_stride,
+                                    int src_stride1, int h)
+{
+    int i;
+    vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
+
+    mask_ = vec_lvsl(0, src2);
+
+    for (i = 0; i < h; i++) {
+
+        tmp1 = vec_ld(i * src_stride1, src1);
+        mask = vec_lvsl(i * src_stride1, src1);
+        tmp2 = vec_ld(i * src_stride1 + 15, src1);
+
+        a = vec_perm(tmp1, tmp2, mask);
+
+        tmp1 = vec_ld(i * 16, src2);
+        tmp2 = vec_ld(i * 16 + 15, src2);
+
+        b = vec_perm(tmp1, tmp2, mask_);
+
+        tmp1 = vec_ld(0, dst);
+        mask = vec_lvsl(0, dst);
+        tmp2 = vec_ld(15, dst);
+
+        d = vec_avg(a, b);
+
+        edges = vec_perm(tmp2, tmp1, mask);
+
+        align = vec_lvsr(0, dst);
+
+        tmp2 = vec_perm(d, edges, align);
+        tmp1 = vec_perm(edges, d, align);
+
+        vec_st(tmp2, 15, dst);
+        vec_st(tmp1, 0 , dst);
+
+        dst += dst_stride;
+    }
+}
+
+static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
+                                    const uint8_t * src2, int dst_stride,
+                                    int src_stride1, int h)
+{
+    int i;
+    vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
+
+    mask_ = vec_lvsl(0, src2);
+
+    for (i = 0; i < h; i++) {
+
+        tmp1 = vec_ld(i * src_stride1, src1);
+        mask = vec_lvsl(i * src_stride1, src1);
+        tmp2 = vec_ld(i * src_stride1 + 15, src1);
+
+        a = vec_perm(tmp1, tmp2, mask);
+
+        tmp1 = vec_ld(i * 16, src2);
+        tmp2 = vec_ld(i * 16 + 15, src2);
+
+        b = vec_perm(tmp1, tmp2, mask_);
+
+        tmp1 = vec_ld(0, dst);
+        mask = vec_lvsl(0, dst);
+        tmp2 = vec_ld(15, dst);
+
+        d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
+
+        edges = vec_perm(tmp2, tmp1, mask);
+
+        align = vec_lvsr(0, dst);
+
+        tmp2 = vec_perm(d, edges, align);
+        tmp1 = vec_perm(edges, d, align);
+
+        vec_st(tmp2, 15, dst);
+        vec_st(tmp1, 0 , dst);
+
+        dst += dst_stride;
+    }
+}
+
+/* Implemented but could be faster
+#define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
+#define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
+ */
+
+  H264_MC(put_, 16, altivec)
+  H264_MC(avg_, 16, altivec)
+
+
+/****************************************************************************
+ * IDCT transform:
+ ****************************************************************************/
+
+#define IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7,  d0, d1, d2, d3, d4, d5, d6, d7) {\
+    /*        a0  = SRC(0) + SRC(4); */ \
+    vec_s16_t a0v = vec_add(s0, s4);    \
+    /*        a2  = SRC(0) - SRC(4); */ \
+    vec_s16_t a2v = vec_sub(s0, s4);    \
+    /*        a4  =           (SRC(2)>>1) - SRC(6); */ \
+    vec_s16_t a4v = vec_sub(vec_sra(s2, onev), s6);    \
+    /*        a6  =           (SRC(6)>>1) + SRC(2); */ \
+    vec_s16_t a6v = vec_add(vec_sra(s6, onev), s2);    \
+    /*        b0  =         a0 + a6; */ \
+    vec_s16_t b0v = vec_add(a0v, a6v);  \
+    /*        b2  =         a2 + a4; */ \
+    vec_s16_t b2v = vec_add(a2v, a4v);  \
+    /*        b4  =         a2 - a4; */ \
+    vec_s16_t b4v = vec_sub(a2v, a4v);  \
+    /*        b6  =         a0 - a6; */ \
+    vec_s16_t b6v = vec_sub(a0v, a6v);  \
+    /* a1 =  SRC(5) - SRC(3) - SRC(7) - (SRC(7)>>1); */ \
+    /*        a1 =             (SRC(5)-SRC(3)) -  (SRC(7)  +  (SRC(7)>>1)); */ \
+    vec_s16_t a1v = vec_sub( vec_sub(s5, s3), vec_add(s7, vec_sra(s7, onev)) ); \
+    /* a3 =  SRC(7) + SRC(1) - SRC(3) - (SRC(3)>>1); */ \
+    /*        a3 =             (SRC(7)+SRC(1)) -  (SRC(3)  +  (SRC(3)>>1)); */ \
+    vec_s16_t a3v = vec_sub( vec_add(s7, s1), vec_add(s3, vec_sra(s3, onev)) );\
+    /* a5 =  SRC(7) - SRC(1) + SRC(5) + (SRC(5)>>1); */ \
+    /*        a5 =             (SRC(7)-SRC(1)) +   SRC(5) +   (SRC(5)>>1); */ \
+    vec_s16_t a5v = vec_add( vec_sub(s7, s1), vec_add(s5, vec_sra(s5, onev)) );\
+    /*        a7 =                SRC(5)+SRC(3) +  SRC(1) +   (SRC(1)>>1); */ \
+    vec_s16_t a7v = vec_add( vec_add(s5, s3), vec_add(s1, vec_sra(s1, onev)) );\
+    /*        b1 =                  (a7>>2)  +  a1; */ \
+    vec_s16_t b1v = vec_add( vec_sra(a7v, twov), a1v); \
+    /*        b3 =          a3 +        (a5>>2); */ \
+    vec_s16_t b3v = vec_add(a3v, vec_sra(a5v, twov)); \
+    /*        b5 =                  (a3>>2)  -   a5; */ \
+    vec_s16_t b5v = vec_sub( vec_sra(a3v, twov), a5v); \
+    /*        b7 =           a7 -        (a1>>2); */ \
+    vec_s16_t b7v = vec_sub( a7v, vec_sra(a1v, twov)); \
+    /* DST(0,    b0 + b7); */ \
+    d0 = vec_add(b0v, b7v); \
+    /* DST(1,    b2 + b5); */ \
+    d1 = vec_add(b2v, b5v); \
+    /* DST(2,    b4 + b3); */ \
+    d2 = vec_add(b4v, b3v); \
+    /* DST(3,    b6 + b1); */ \
+    d3 = vec_add(b6v, b1v); \
+    /* DST(4,    b6 - b1); */ \
+    d4 = vec_sub(b6v, b1v); \
+    /* DST(5,    b4 - b3); */ \
+    d5 = vec_sub(b4v, b3v); \
+    /* DST(6,    b2 - b5); */ \
+    d6 = vec_sub(b2v, b5v); \
+    /* DST(7,    b0 - b7); */ \
+    d7 = vec_sub(b0v, b7v); \
+}
+
+#define ALTIVEC_STORE_SUM_CLIP(dest, idctv, perm_ldv, perm_stv, sel) { \
+    /* unaligned load */                                       \
+    vec_u8_t hv = vec_ld( 0, dest );                           \
+    vec_u8_t lv = vec_ld( 7, dest );                           \
+    vec_u8_t dstv   = vec_perm( hv, lv, (vec_u8_t)perm_ldv );  \
+    vec_s16_t idct_sh6 = vec_sra(idctv, sixv);                 \
+    vec_u16_t dst16 = (vec_u16_t)vec_mergeh(zero_u8v, dstv);   \
+    vec_s16_t idstsum = vec_adds(idct_sh6, (vec_s16_t)dst16);  \
+    vec_u8_t idstsum8 = vec_packsu(zero_s16v, idstsum);        \
+    vec_u8_t edgehv;                                           \
+    /* unaligned store */                                      \
+    vec_u8_t bodyv  = vec_perm( idstsum8, idstsum8, perm_stv );\
+    vec_u8_t edgelv = vec_perm( sel, zero_u8v, perm_stv );     \
+    lv    = vec_sel( lv, bodyv, edgelv );                      \
+    vec_st( lv, 7, dest );                                     \
+    hv    = vec_ld( 0, dest );                                 \
+    edgehv = vec_perm( zero_u8v, sel, perm_stv );              \
+    hv    = vec_sel( hv, bodyv, edgehv );                      \
+    vec_st( hv, 0, dest );                                     \
+ }
+
+void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) {
+    vec_s16_t s0, s1, s2, s3, s4, s5, s6, s7;
+    vec_s16_t d0, d1, d2, d3, d4, d5, d6, d7;
+    vec_s16_t idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;
+
+    vec_u8_t perm_ldv = vec_lvsl(0, dst);
+    vec_u8_t perm_stv = vec_lvsr(8, dst);
+
+    const vec_u16_t onev = vec_splat_u16(1);
+    const vec_u16_t twov = vec_splat_u16(2);
+    const vec_u16_t sixv = vec_splat_u16(6);
+
+    const vec_u8_t sel = (vec_u8_t) AVV(0,0,0,0,0,0,0,0,
+                                        -1,-1,-1,-1,-1,-1,-1,-1);
+    LOAD_ZERO;
+
+    dct[0] += 32; // rounding for the >>6 at the end
+
+    s0 = vec_ld(0x00, (int16_t*)dct);
+    s1 = vec_ld(0x10, (int16_t*)dct);
+    s2 = vec_ld(0x20, (int16_t*)dct);
+    s3 = vec_ld(0x30, (int16_t*)dct);
+    s4 = vec_ld(0x40, (int16_t*)dct);
+    s5 = vec_ld(0x50, (int16_t*)dct);
+    s6 = vec_ld(0x60, (int16_t*)dct);
+    s7 = vec_ld(0x70, (int16_t*)dct);
+
+    IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7,
+                     d0, d1, d2, d3, d4, d5, d6, d7);
+
+    TRANSPOSE8( d0,  d1,  d2,  d3,  d4,  d5,  d6, d7 );
+
+    IDCT8_1D_ALTIVEC(d0,  d1,  d2,  d3,  d4,  d5,  d6, d7,
+                     idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7);
+
+    ALTIVEC_STORE_SUM_CLIP(&dst[0*stride], idct0, perm_ldv, perm_stv, sel);
+    ALTIVEC_STORE_SUM_CLIP(&dst[1*stride], idct1, perm_ldv, perm_stv, sel);
+    ALTIVEC_STORE_SUM_CLIP(&dst[2*stride], idct2, perm_ldv, perm_stv, sel);
+    ALTIVEC_STORE_SUM_CLIP(&dst[3*stride], idct3, perm_ldv, perm_stv, sel);
+    ALTIVEC_STORE_SUM_CLIP(&dst[4*stride], idct4, perm_ldv, perm_stv, sel);
+    ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel);
+    ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel);
+    ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);
+}
+
+void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
+
+#ifdef HAVE_ALTIVEC
+  if (has_altivec()) {
+    c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
+    c->put_no_rnd_h264_chroma_pixels_tab[0] = put_no_rnd_h264_chroma_mc8_altivec;
+    c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
+    c->h264_idct8_add = ff_h264_idct8_add_altivec;
+
+#define dspfunc(PFX, IDX, NUM) \
+    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
+    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
+    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
+    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
+    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
+    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
+    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
+
+    dspfunc(put_h264_qpel, 0, 16);
+    dspfunc(avg_h264_qpel, 0, 16);
+#undef dspfunc
+
+  } else
+#endif /* HAVE_ALTIVEC */
+  {
+    // Non-AltiVec PPC optimisations
+
+    // ... pending ...
+  }
+}
diff --git a/src/libffmpeg/libavcodec/ppc/h264_template_altivec.c b/src/libffmpeg/libavcodec/ppc/h264_template_altivec.c
new file mode 100644
index 000000000..e8ad67f2f
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/h264_template_altivec.c
@@ -0,0 +1,719 @@
+/*
+ * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* this code assume that stride % 16 == 0 */
+void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
+  POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);
+    signed int ABCD[4] __attribute__((aligned(16))) =
+                        {((8 - x) * (8 - y)),
+                          ((x) * (8 - y)),
+                          ((8 - x) * (y)),
+                          ((x) * (y))};
+    register int i;
+    vector unsigned char fperm;
+    const vector signed int vABCD = vec_ld(0, ABCD);
+    const vector signed short vA = vec_splat((vector signed short)vABCD, 1);
+    const vector signed short vB = vec_splat((vector signed short)vABCD, 3);
+    const vector signed short vC = vec_splat((vector signed short)vABCD, 5);
+    const vector signed short vD = vec_splat((vector signed short)vABCD, 7);
+    const vector signed int vzero = vec_splat_s32(0);
+    const vector signed short v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
+    const vector unsigned short v6us = vec_splat_u16(6);
+    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
+    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
+
+    vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
+    vector unsigned char vsrc0uc, vsrc1uc;
+    vector signed short vsrc0ssH, vsrc1ssH;
+    vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;
+    vector signed short vsrc2ssH, vsrc3ssH, psum;
+    vector unsigned char vdst, ppsum, vfdst, fsum;
+
+  POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1);
+
+    if (((unsigned long)dst) % 16 == 0) {
+      fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,
+                                        0x14, 0x15, 0x16, 0x17,
+                                        0x08, 0x09, 0x0A, 0x0B,
+                                        0x0C, 0x0D, 0x0E, 0x0F);
+    } else {
+      fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,
+                                        0x04, 0x05, 0x06, 0x07,
+                                        0x18, 0x19, 0x1A, 0x1B,
+                                        0x1C, 0x1D, 0x1E, 0x1F);
+    }
+
+    vsrcAuc = vec_ld(0, src);
+
+    if (loadSecond)
+      vsrcBuc = vec_ld(16, src);
+    vsrcperm0 = vec_lvsl(0, src);
+    vsrcperm1 = vec_lvsl(1, src);
+
+    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
+    if (reallyBadAlign)
+      vsrc1uc = vsrcBuc;
+    else
+      vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
+
+    vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                               (vector unsigned char)vsrc0uc);
+    vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                               (vector unsigned char)vsrc1uc);
+
+    if (!loadSecond) {// -> !reallyBadAlign
+      for (i = 0 ; i < h ; i++) {
+
+
+        vsrcCuc = vec_ld(stride + 0, src);
+
+        vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
+        vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
+
+        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc2uc);
+        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc3uc);
+
+        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+        psum = vec_mladd(vB, vsrc1ssH, psum);
+        psum = vec_mladd(vC, vsrc2ssH, psum);
+        psum = vec_mladd(vD, vsrc3ssH, psum);
+        psum = vec_add(v32ss, psum);
+        psum = vec_sra(psum, v6us);
+
+        vdst = vec_ld(0, dst);
+        ppsum = (vector unsigned char)vec_packsu(psum, psum);
+        vfdst = vec_perm(vdst, ppsum, fperm);
+
+        OP_U8_ALTIVEC(fsum, vfdst, vdst);
+
+        vec_st(fsum, 0, dst);
+
+        vsrc0ssH = vsrc2ssH;
+        vsrc1ssH = vsrc3ssH;
+
+        dst += stride;
+        src += stride;
+      }
+    } else {
+        vector unsigned char vsrcDuc;
+      for (i = 0 ; i < h ; i++) {
+        vsrcCuc = vec_ld(stride + 0, src);
+        vsrcDuc = vec_ld(stride + 16, src);
+
+        vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+        if (reallyBadAlign)
+          vsrc3uc = vsrcDuc;
+        else
+          vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
+
+        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc2uc);
+        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc3uc);
+
+        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+        psum = vec_mladd(vB, vsrc1ssH, psum);
+        psum = vec_mladd(vC, vsrc2ssH, psum);
+        psum = vec_mladd(vD, vsrc3ssH, psum);
+        psum = vec_add(v32ss, psum);
+        psum = vec_sr(psum, v6us);
+
+        vdst = vec_ld(0, dst);
+        ppsum = (vector unsigned char)vec_pack(psum, psum);
+        vfdst = vec_perm(vdst, ppsum, fperm);
+
+        OP_U8_ALTIVEC(fsum, vfdst, vdst);
+
+        vec_st(fsum, 0, dst);
+
+        vsrc0ssH = vsrc2ssH;
+        vsrc1ssH = vsrc3ssH;
+
+        dst += stride;
+        src += stride;
+      }
+    }
+    POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1);
+}
+
+/* this code assume stride % 16 == 0 */
+static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
+  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1);
+  register int i;
+
+  const vector signed int vzero = vec_splat_s32(0);
+  const vector unsigned char permM2 = vec_lvsl(-2, src);
+  const vector unsigned char permM1 = vec_lvsl(-1, src);
+  const vector unsigned char permP0 = vec_lvsl(+0, src);
+  const vector unsigned char permP1 = vec_lvsl(+1, src);
+  const vector unsigned char permP2 = vec_lvsl(+2, src);
+  const vector unsigned char permP3 = vec_lvsl(+3, src);
+  const vector signed short v5ss = vec_splat_s16(5);
+  const vector unsigned short v5us = vec_splat_u16(5);
+  const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+  const vector signed short v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
+  const vector unsigned char dstperm = vec_lvsr(0, dst);
+  const vector unsigned char neg1 =
+                                (const vector unsigned char) vec_splat_s8(-1);
+
+  const vector unsigned char dstmask =
+                                vec_perm((const vector unsigned char)vzero,
+                                                               neg1, dstperm);
+
+  vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
+
+  register int align = ((((unsigned long)src) - 2) % 16);
+
+  vector signed short srcP0A, srcP0B, srcP1A, srcP1B,
+                      srcP2A, srcP2B, srcP3A, srcP3B,
+                      srcM1A, srcM1B, srcM2A, srcM2B,
+                      sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
+                      pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
+                      psumA, psumB, sumA, sumB;
+
+  vector unsigned char sum, dst1, dst2, vdst, fsum,
+                       rsum, fdst1, fdst2;
+
+  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
+
+  for (i = 0 ; i < 16 ; i ++) {
+    vector unsigned char srcR1 = vec_ld(-2, src);
+    vector unsigned char srcR2 = vec_ld(14, src);
+
+    switch (align) {
+    default: {
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = vec_perm(srcR1, srcR2, permP2);
+      srcP3 = vec_perm(srcR1, srcR2, permP3);
+    } break;
+    case 11: {
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = vec_perm(srcR1, srcR2, permP2);
+      srcP3 = srcR2;
+    } break;
+    case 12: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = srcR2;
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 13: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = srcR2;
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 14: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = srcR2;
+      srcP1 = vec_perm(srcR2, srcR3, permP1);
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 15: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = srcR2;
+      srcP0 = vec_perm(srcR2, srcR3, permP0);
+      srcP1 = vec_perm(srcR2, srcR3, permP1);
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    }
+
+    srcP0A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcP0);
+    srcP0B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcP0);
+    srcP1A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcP1);
+    srcP1B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcP1);
+
+    srcP2A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcP2);
+    srcP2B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcP2);
+    srcP3A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcP3);
+    srcP3B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcP3);
+
+    srcM1A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcM1);
+    srcM1B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcM1);
+    srcM2A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcM2);
+    srcM2B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcM2);
+
+    sum1A = vec_adds(srcP0A, srcP1A);
+    sum1B = vec_adds(srcP0B, srcP1B);
+    sum2A = vec_adds(srcM1A, srcP2A);
+    sum2B = vec_adds(srcM1B, srcP2B);
+    sum3A = vec_adds(srcM2A, srcP3A);
+    sum3B = vec_adds(srcM2B, srcP3B);
+
+    pp1A = vec_mladd(sum1A, v20ss, v16ss);
+    pp1B = vec_mladd(sum1B, v20ss, v16ss);
+
+    pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
+    pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
+
+    pp3A = vec_add(sum3A, pp1A);
+    pp3B = vec_add(sum3B, pp1B);
+
+    psumA = vec_sub(pp3A, pp2A);
+    psumB = vec_sub(pp3B, pp2B);
+
+    sumA = vec_sra(psumA, v5us);
+    sumB = vec_sra(psumB, v5us);
+
+    sum = vec_packsu(sumA, sumB);
+
+    dst1 = vec_ld(0, dst);
+    dst2 = vec_ld(16, dst);
+    vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));
+
+    OP_U8_ALTIVEC(fsum, sum, vdst);
+
+    rsum = vec_perm(fsum, fsum, dstperm);
+    fdst1 = vec_sel(dst1, rsum, dstmask);
+    fdst2 = vec_sel(rsum, dst2, dstmask);
+
+    vec_st(fdst1, 0, dst);
+    vec_st(fdst2, 16, dst);
+
+    src += srcStride;
+    dst += dstStride;
+  }
+POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
+}
+
+/* this code assume stride % 16 == 0 */
+static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
+  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);
+
+  register int i;
+
+  const vector signed int vzero = vec_splat_s32(0);
+  const vector unsigned char perm = vec_lvsl(0, src);
+  const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+  const vector unsigned short v5us = vec_splat_u16(5);
+  const vector signed short v5ss = vec_splat_s16(5);
+  const vector signed short v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
+  const vector unsigned char dstperm = vec_lvsr(0, dst);
+  const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1);
+  const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);
+
+  uint8_t *srcbis = src - (srcStride * 2);
+
+  const vector unsigned char srcM2a = vec_ld(0, srcbis);
+  const vector unsigned char srcM2b = vec_ld(16, srcbis);
+  const vector unsigned char srcM2 = vec_perm(srcM2a, srcM2b, perm);
+//  srcbis += srcStride;
+  const vector unsigned char srcM1a = vec_ld(0, srcbis += srcStride);
+  const vector unsigned char srcM1b = vec_ld(16, srcbis);
+  const vector unsigned char srcM1 = vec_perm(srcM1a, srcM1b, perm);
+//  srcbis += srcStride;
+  const vector unsigned char srcP0a = vec_ld(0, srcbis += srcStride);
+  const vector unsigned char srcP0b = vec_ld(16, srcbis);
+  const vector unsigned char srcP0 = vec_perm(srcP0a, srcP0b, perm);
+//  srcbis += srcStride;
+  const vector unsigned char srcP1a = vec_ld(0, srcbis += srcStride);
+  const vector unsigned char srcP1b = vec_ld(16, srcbis);
+  const vector unsigned char srcP1 = vec_perm(srcP1a, srcP1b, perm);
+//  srcbis += srcStride;
+  const vector unsigned char srcP2a = vec_ld(0, srcbis += srcStride);
+  const vector unsigned char srcP2b = vec_ld(16, srcbis);
+  const vector unsigned char srcP2 = vec_perm(srcP2a, srcP2b, perm);
+//  srcbis += srcStride;
+
+  vector signed short srcM2ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcM2);
+  vector signed short srcM2ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcM2);
+  vector signed short srcM1ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcM1);
+  vector signed short srcM1ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcM1);
+  vector signed short srcP0ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcP0);
+  vector signed short srcP0ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcP0);
+  vector signed short srcP1ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcP1);
+  vector signed short srcP1ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcP1);
+  vector signed short srcP2ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcP2);
+  vector signed short srcP2ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcP2);
+
+  vector signed short pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
+                      psumA, psumB, sumA, sumB,
+                      srcP3ssA, srcP3ssB,
+                      sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;
+
+  vector unsigned char sum, dst1, dst2, vdst, fsum, rsum, fdst1, fdst2,
+                       srcP3a, srcP3b, srcP3;
+
+  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
+
+  for (i = 0 ; i < 16 ; i++) {
+    srcP3a = vec_ld(0, srcbis += srcStride);
+    srcP3b = vec_ld(16, srcbis);
+    srcP3 = vec_perm(srcP3a, srcP3b, perm);
+    srcP3ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcP3);
+    srcP3ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcP3);
+//    srcbis += srcStride;
+
+    sum1A = vec_adds(srcP0ssA, srcP1ssA);
+    sum1B = vec_adds(srcP0ssB, srcP1ssB);
+    sum2A = vec_adds(srcM1ssA, srcP2ssA);
+    sum2B = vec_adds(srcM1ssB, srcP2ssB);
+    sum3A = vec_adds(srcM2ssA, srcP3ssA);
+    sum3B = vec_adds(srcM2ssB, srcP3ssB);
+
+    srcM2ssA = srcM1ssA;
+    srcM2ssB = srcM1ssB;
+    srcM1ssA = srcP0ssA;
+    srcM1ssB = srcP0ssB;
+    srcP0ssA = srcP1ssA;
+    srcP0ssB = srcP1ssB;
+    srcP1ssA = srcP2ssA;
+    srcP1ssB = srcP2ssB;
+    srcP2ssA = srcP3ssA;
+    srcP2ssB = srcP3ssB;
+
+    pp1A = vec_mladd(sum1A, v20ss, v16ss);
+    pp1B = vec_mladd(sum1B, v20ss, v16ss);
+
+    pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
+    pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
+
+    pp3A = vec_add(sum3A, pp1A);
+    pp3B = vec_add(sum3B, pp1B);
+
+    psumA = vec_sub(pp3A, pp2A);
+    psumB = vec_sub(pp3B, pp2B);
+
+    sumA = vec_sra(psumA, v5us);
+    sumB = vec_sra(psumB, v5us);
+
+    sum = vec_packsu(sumA, sumB);
+
+    dst1 = vec_ld(0, dst);
+    dst2 = vec_ld(16, dst);
+    vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));
+
+    OP_U8_ALTIVEC(fsum, sum, vdst);
+
+    rsum = vec_perm(fsum, fsum, dstperm);
+    fdst1 = vec_sel(dst1, rsum, dstmask);
+    fdst2 = vec_sel(rsum, dst2, dstmask);
+
+    vec_st(fdst1, 0, dst);
+    vec_st(fdst2, 16, dst);
+
+    dst += dstStride;
+  }
+  POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
+}
+
+/* this code assume stride % 16 == 0 *and* tmp is properly aligned */
+static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) {
+  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+  register int i;
+  const vector signed int vzero = vec_splat_s32(0);
+  const vector unsigned char permM2 = vec_lvsl(-2, src);
+  const vector unsigned char permM1 = vec_lvsl(-1, src);
+  const vector unsigned char permP0 = vec_lvsl(+0, src);
+  const vector unsigned char permP1 = vec_lvsl(+1, src);
+  const vector unsigned char permP2 = vec_lvsl(+2, src);
+  const vector unsigned char permP3 = vec_lvsl(+3, src);
+  const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+  const vector unsigned int v10ui = vec_splat_u32(10);
+  const vector signed short v5ss = vec_splat_s16(5);
+  const vector signed short v1ss = vec_splat_s16(1);
+  const vector signed int v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9));
+  const vector unsigned int v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4));
+
+  register int align = ((((unsigned long)src) - 2) % 16);
+
+  const vector unsigned char neg1 = (const vector unsigned char)
+                                                        vec_splat_s8(-1);
+
+  vector signed short srcP0A, srcP0B, srcP1A, srcP1B,
+                      srcP2A, srcP2B, srcP3A, srcP3B,
+                      srcM1A, srcM1B, srcM2A, srcM2B,
+                      sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
+                      pp1A, pp1B, pp2A, pp2B, psumA, psumB;
+
+  const vector unsigned char dstperm = vec_lvsr(0, dst);
+
+  const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);
+
+  const vector unsigned char mperm = (const vector unsigned char)
+    AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,
+        0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F);
+  int16_t *tmpbis = tmp;
+
+  vector signed short tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB,
+                      tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB,
+                      tmpP2ssA, tmpP2ssB;
+
+  vector signed int pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo,
+                    pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo,
+                    pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo,
+                    ssumAe, ssumAo, ssumBe, ssumBo;
+  vector unsigned char fsum, sumv, sum, dst1, dst2, vdst,
+                       rsum, fdst1, fdst2;
+  vector signed short ssume, ssumo;
+
+  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+  src -= (2 * srcStride);
+  for (i = 0 ; i < 21 ; i ++) {
+    vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
+    vector unsigned char srcR1 = vec_ld(-2, src);
+    vector unsigned char srcR2 = vec_ld(14, src);
+
+    switch (align) {
+    default: {
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = vec_perm(srcR1, srcR2, permP2);
+      srcP3 = vec_perm(srcR1, srcR2, permP3);
+    } break;
+    case 11: {
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = vec_perm(srcR1, srcR2, permP2);
+      srcP3 = srcR2;
+    } break;
+    case 12: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = srcR2;
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 13: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = srcR2;
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 14: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = srcR2;
+      srcP1 = vec_perm(srcR2, srcR3, permP1);
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 15: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = srcR2;
+      srcP0 = vec_perm(srcR2, srcR3, permP0);
+      srcP1 = vec_perm(srcR2, srcR3, permP1);
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    }
+
+    srcP0A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcP0);
+    srcP0B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcP0);
+    srcP1A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcP1);
+    srcP1B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcP1);
+
+    srcP2A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcP2);
+    srcP2B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcP2);
+    srcP3A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcP3);
+    srcP3B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcP3);
+
+    srcM1A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcM1);
+    srcM1B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcM1);
+    srcM2A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcM2);
+    srcM2B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcM2);
+
+    sum1A = vec_adds(srcP0A, srcP1A);
+    sum1B = vec_adds(srcP0B, srcP1B);
+    sum2A = vec_adds(srcM1A, srcP2A);
+    sum2B = vec_adds(srcM1B, srcP2B);
+    sum3A = vec_adds(srcM2A, srcP3A);
+    sum3B = vec_adds(srcM2B, srcP3B);
+
+    pp1A = vec_mladd(sum1A, v20ss, sum3A);
+    pp1B = vec_mladd(sum1B, v20ss, sum3B);
+
+    pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
+    pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
+
+    psumA = vec_sub(pp1A, pp2A);
+    psumB = vec_sub(pp1B, pp2B);
+
+    vec_st(psumA, 0, tmp);
+    vec_st(psumB, 16, tmp);
+
+    src += srcStride;
+    tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */
+  }
+
+  tmpM2ssA = vec_ld(0, tmpbis);
+  tmpM2ssB = vec_ld(16, tmpbis);
+  tmpbis += tmpStride;
+  tmpM1ssA = vec_ld(0, tmpbis);
+  tmpM1ssB = vec_ld(16, tmpbis);
+  tmpbis += tmpStride;
+  tmpP0ssA = vec_ld(0, tmpbis);
+  tmpP0ssB = vec_ld(16, tmpbis);
+  tmpbis += tmpStride;
+  tmpP1ssA = vec_ld(0, tmpbis);
+  tmpP1ssB = vec_ld(16, tmpbis);
+  tmpbis += tmpStride;
+  tmpP2ssA = vec_ld(0, tmpbis);
+  tmpP2ssB = vec_ld(16, tmpbis);
+  tmpbis += tmpStride;
+
+  for (i = 0 ; i < 16 ; i++) {
+    const vector signed short tmpP3ssA = vec_ld(0, tmpbis);
+    const vector signed short tmpP3ssB = vec_ld(16, tmpbis);
+
+    const vector signed short sum1A = vec_adds(tmpP0ssA, tmpP1ssA);
+    const vector signed short sum1B = vec_adds(tmpP0ssB, tmpP1ssB);
+    const vector signed short sum2A = vec_adds(tmpM1ssA, tmpP2ssA);
+    const vector signed short sum2B = vec_adds(tmpM1ssB, tmpP2ssB);
+    const vector signed short sum3A = vec_adds(tmpM2ssA, tmpP3ssA);
+    const vector signed short sum3B = vec_adds(tmpM2ssB, tmpP3ssB);
+
+    tmpbis += tmpStride;
+
+    tmpM2ssA = tmpM1ssA;
+    tmpM2ssB = tmpM1ssB;
+    tmpM1ssA = tmpP0ssA;
+    tmpM1ssB = tmpP0ssB;
+    tmpP0ssA = tmpP1ssA;
+    tmpP0ssB = tmpP1ssB;
+    tmpP1ssA = tmpP2ssA;
+    tmpP1ssB = tmpP2ssB;
+    tmpP2ssA = tmpP3ssA;
+    tmpP2ssB = tmpP3ssB;
+
+    pp1Ae = vec_mule(sum1A, v20ss);
+    pp1Ao = vec_mulo(sum1A, v20ss);
+    pp1Be = vec_mule(sum1B, v20ss);
+    pp1Bo = vec_mulo(sum1B, v20ss);
+
+    pp2Ae = vec_mule(sum2A, v5ss);
+    pp2Ao = vec_mulo(sum2A, v5ss);
+    pp2Be = vec_mule(sum2B, v5ss);
+    pp2Bo = vec_mulo(sum2B, v5ss);
+
+    pp3Ae = vec_sra((vector signed int)sum3A, v16ui);
+    pp3Ao = vec_mulo(sum3A, v1ss);
+    pp3Be = vec_sra((vector signed int)sum3B, v16ui);
+    pp3Bo = vec_mulo(sum3B, v1ss);
+
+    pp1cAe = vec_add(pp1Ae, v512si);
+    pp1cAo = vec_add(pp1Ao, v512si);
+    pp1cBe = vec_add(pp1Be, v512si);
+    pp1cBo = vec_add(pp1Bo, v512si);
+
+    pp32Ae = vec_sub(pp3Ae, pp2Ae);
+    pp32Ao = vec_sub(pp3Ao, pp2Ao);
+    pp32Be = vec_sub(pp3Be, pp2Be);
+    pp32Bo = vec_sub(pp3Bo, pp2Bo);
+
+    sumAe = vec_add(pp1cAe, pp32Ae);
+    sumAo = vec_add(pp1cAo, pp32Ao);
+    sumBe = vec_add(pp1cBe, pp32Be);
+    sumBo = vec_add(pp1cBo, pp32Bo);
+
+    ssumAe = vec_sra(sumAe, v10ui);
+    ssumAo = vec_sra(sumAo, v10ui);
+    ssumBe = vec_sra(sumBe, v10ui);
+    ssumBo = vec_sra(sumBo, v10ui);
+
+    ssume = vec_packs(ssumAe, ssumBe);
+    ssumo = vec_packs(ssumAo, ssumBo);
+
+    sumv = vec_packsu(ssume, ssumo);
+    sum = vec_perm(sumv, sumv, mperm);
+
+    dst1 = vec_ld(0, dst);
+    dst2 = vec_ld(16, dst);
+    vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));
+
+    OP_U8_ALTIVEC(fsum, sum, vdst);
+
+    rsum = vec_perm(fsum, fsum, dstperm);
+    fdst1 = vec_sel(dst1, rsum, dstmask);
+    fdst2 = vec_sel(rsum, dst2, dstmask);
+
+    vec_st(fdst1, 0, dst);
+    vec_st(fdst2, 16, dst);
+
+    dst += dstStride;
+  }
+  POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+}
diff --git a/src/libffmpeg/libavcodec/ppc/mathops.h b/src/libffmpeg/libavcodec/ppc/mathops.h
new file mode 100644
index 000000000..6af23f246
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/mathops.h
@@ -0,0 +1,33 @@
+/*
+ * simple math operations
+ * Copyright (c) 2001, 2002 Fabrice Bellard.
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#if defined(ARCH_POWERPC_405)
+/* signed 16x16 -> 32 multiply add accumulate */
+#   define MAC16(rt, ra, rb) \
+        asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
+
+/* signed 16x16 -> 32 multiply */
+#   define MUL16(ra, rb) \
+        ({ int __rt;
+         asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb));
+         __rt; })
+#endif
diff --git a/src/libffmpeg/libavcodec/ppc/snow_altivec.c b/src/libffmpeg/libavcodec/ppc/snow_altivec.c
new file mode 100644
index 000000000..b15672ffe
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/snow_altivec.c
@@ -0,0 +1,788 @@
+/*
+ * Altivec optimized snow DSP utils
+ * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+#include "dsputil_altivec.h"
+#include "../snow.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+
+
+//FIXME remove this replication
+#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
+
+static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
+{
+    int offset;
+    DWTELEM * buffer;
+
+//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
+
+    assert(buf->data_stack_top >= 0);
+//  assert(!buf->line[line]);
+    if (buf->line[line])
+        return buf->line[line];
+
+    offset = buf->line_width * line;
+    buffer = buf->data_stack[buf->data_stack_top];
+    buf->data_stack_top--;
+    buf->line[line] = buffer;
+
+//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
+
+    return buffer;
+}
+
+
+//altivec code
+
+void ff_snow_horizontal_compose97i_altivec(DWTELEM *b, int width)
+{
+    const int w2= (width+1)>>1;
+    DECLARE_ALIGNED_16(DWTELEM, temp[(width>>1)]);
+    const int w_l= (width>>1);
+    const int w_r= w2 - 1;
+    int i;
+    vector signed int t1, t2, x, y, tmp1, tmp2;
+    vector signed int *vbuf, *vtmp;
+    vector unsigned char align;
+
+
+
+    { // Lift 0
+        DWTELEM * const ref = b + w2 - 1;
+        DWTELEM b_0 = b[0];
+        vbuf = (vector signed int *)b;
+
+        tmp1 = vec_ld (0, ref);
+        align = vec_lvsl (0, ref);
+        tmp2 = vec_ld (15, ref);
+        t1= vec_perm(tmp1, tmp2, align);
+
+        i = 0;
+
+        for (i=0; i<w_l-15; i+=16) {
+#if 0
+        b[i+0] = b[i+0] - ((3 * (ref[i+0] + ref[i+1]) + 4) >> 3);
+        b[i+1] = b[i+1] - ((3 * (ref[i+1] + ref[i+2]) + 4) >> 3);
+        b[i+2] = b[i+2] - ((3 * (ref[i+2] + ref[i+3]) + 4) >> 3);
+        b[i+3] = b[i+3] - ((3 * (ref[i+3] + ref[i+4]) + 4) >> 3);
+#else
+
+        tmp1 = vec_ld (0, ref+4+i);
+        tmp2 = vec_ld (15, ref+4+i);
+
+        t2 = vec_perm(tmp1, tmp2, align);
+
+        y = vec_add(t1,vec_sld(t1,t2,4));
+        y = vec_add(vec_add(y,y),y);
+
+        tmp1 = vec_ld (0, ref+8+i);
+
+        y = vec_add(y, vec_splat_s32(4));
+        y = vec_sra(y, vec_splat_u32(3));
+
+        tmp2 = vec_ld (15, ref+8+i);
+
+        *vbuf = vec_sub(*vbuf, y);
+
+        t1=t2;
+
+        vbuf++;
+
+        t2 = vec_perm(tmp1, tmp2, align);
+
+        y = vec_add(t1,vec_sld(t1,t2,4));
+        y = vec_add(vec_add(y,y),y);
+
+        tmp1 = vec_ld (0, ref+12+i);
+
+        y = vec_add(y, vec_splat_s32(4));
+        y = vec_sra(y, vec_splat_u32(3));
+
+        tmp2 = vec_ld (15, ref+12+i);
+
+        *vbuf = vec_sub(*vbuf, y);
+
+        t1=t2;
+
+        vbuf++;
+
+        t2 = vec_perm(tmp1, tmp2, align);
+
+        y = vec_add(t1,vec_sld(t1,t2,4));
+        y = vec_add(vec_add(y,y),y);
+
+        tmp1 = vec_ld (0, ref+16+i);
+
+        y = vec_add(y, vec_splat_s32(4));
+        y = vec_sra(y, vec_splat_u32(3));
+
+        tmp2 = vec_ld (15, ref+16+i);
+
+        *vbuf = vec_sub(*vbuf, y);
+
+        t1=t2;
+
+        t2 = vec_perm(tmp1, tmp2, align);
+
+        y = vec_add(t1,vec_sld(t1,t2,4));
+        y = vec_add(vec_add(y,y),y);
+
+        vbuf++;
+
+        y = vec_add(y, vec_splat_s32(4));
+        y = vec_sra(y, vec_splat_u32(3));
+        *vbuf = vec_sub(*vbuf, y);
+
+        t1=t2;
+
+        vbuf++;
+
+#endif
+        }
+
+        snow_horizontal_compose_lift_lead_out(i, b, b, ref, width, w_l, 0, W_DM, W_DO, W_DS);
+        b[0] = b_0 - ((W_DM * 2 * ref[1]+W_DO)>>W_DS);
+    }
+
+    { // Lift 1
+        DWTELEM * const dst = b+w2;
+
+        i = 0;
+        for(; (((long)&dst[i]) & 0xF) && i<w_r; i++){
+            dst[i] = dst[i] - (b[i] + b[i + 1]);
+        }
+
+        align = vec_lvsl(0, b+i);
+        tmp1 = vec_ld(0, b+i);
+        vbuf = (vector signed int*) (dst + i);
+        tmp2 = vec_ld(15, b+i);
+
+        t1 = vec_perm(tmp1, tmp2, align);
+
+        for (; i<w_r-3; i+=4) {
+
+#if 0
+            dst[i]   = dst[i]   - (b[i]   + b[i + 1]);
+            dst[i+1] = dst[i+1] - (b[i+1] + b[i + 2]);
+            dst[i+2] = dst[i+2] - (b[i+2] + b[i + 3]);
+            dst[i+3] = dst[i+3] - (b[i+3] + b[i + 4]);
+#else
+
+        tmp1 = vec_ld(0, b+4+i);
+        tmp2 = vec_ld(15, b+4+i);
+
+        t2 = vec_perm(tmp1, tmp2, align);
+
+        y = vec_add(t1, vec_sld(t1,t2,4));
+        *vbuf = vec_sub (*vbuf, y);
+
+        vbuf++;
+
+        t1 = t2;
+
+#endif
+
+        }
+
+        snow_horizontal_compose_lift_lead_out(i, dst, dst, b, width, w_r, 1, W_CM, W_CO, W_CS);
+    }
+
+    { // Lift 2
+        DWTELEM * const ref = b+w2 - 1;
+        DWTELEM b_0 = b[0];
+        vbuf= (vector signed int *) b;
+
+        tmp1 = vec_ld (0, ref);
+        align = vec_lvsl (0, ref);
+        tmp2 = vec_ld (15, ref);
+        t1= vec_perm(tmp1, tmp2, align);
+
+        i = 0;
+        for (; i<w_l-15; i+=16) {
+#if 0
+            b[i]   = b[i]   - (((8 -(ref[i]   + ref[i+1])) - (b[i]  <<2)) >> 4);
+            b[i+1] = b[i+1] - (((8 -(ref[i+1] + ref[i+2])) - (b[i+1]<<2)) >> 4);
+            b[i+2] = b[i+2] - (((8 -(ref[i+2] + ref[i+3])) - (b[i+2]<<2)) >> 4);
+            b[i+3] = b[i+3] - (((8 -(ref[i+3] + ref[i+4])) - (b[i+3]<<2)) >> 4);
+#else
+            tmp1 = vec_ld (0, ref+4+i);
+            tmp2 = vec_ld (15, ref+4+i);
+
+            t2 = vec_perm(tmp1, tmp2, align);
+
+            y = vec_add(t1,vec_sld(t1,t2,4));
+            y = vec_sub(vec_splat_s32(8),y);
+
+            tmp1 = vec_ld (0, ref+8+i);
+
+            x = vec_sl(*vbuf,vec_splat_u32(2));
+            y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+
+            tmp2 = vec_ld (15, ref+8+i);
+
+            *vbuf = vec_sub( *vbuf, y);
+
+            t1 = t2;
+
+            vbuf++;
+
+            t2 = vec_perm(tmp1, tmp2, align);
+
+            y = vec_add(t1,vec_sld(t1,t2,4));
+            y = vec_sub(vec_splat_s32(8),y);
+
+            tmp1 = vec_ld (0, ref+12+i);
+
+            x = vec_sl(*vbuf,vec_splat_u32(2));
+            y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+
+            tmp2 = vec_ld (15, ref+12+i);
+
+            *vbuf = vec_sub( *vbuf, y);
+
+            t1 = t2;
+
+            vbuf++;
+
+            t2 = vec_perm(tmp1, tmp2, align);
+
+            y = vec_add(t1,vec_sld(t1,t2,4));
+            y = vec_sub(vec_splat_s32(8),y);
+
+            tmp1 = vec_ld (0, ref+16+i);
+
+            x = vec_sl(*vbuf,vec_splat_u32(2));
+            y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+
+            tmp2 = vec_ld (15, ref+16+i);
+
+            *vbuf = vec_sub( *vbuf, y);
+
+            t1 = t2;
+
+            vbuf++;
+
+            t2 = vec_perm(tmp1, tmp2, align);
+
+            y = vec_add(t1,vec_sld(t1,t2,4));
+            y = vec_sub(vec_splat_s32(8),y);
+
+            t1 = t2;
+
+            x = vec_sl(*vbuf,vec_splat_u32(2));
+            y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+            *vbuf = vec_sub( *vbuf, y);
+
+            vbuf++;
+
+#endif
+        }
+
+        snow_horizontal_compose_liftS_lead_out(i, b, b, ref, width, w_l);
+        b[0] = b_0 - (((-2 * ref[1] + W_BO) - 4 * b_0) >> W_BS);
+    }
+
+    { // Lift 3
+        DWTELEM * const src = b+w2;
+
+        vbuf = (vector signed int *)b;
+        vtmp = (vector signed int *)temp;
+
+        i = 0;
+        align = vec_lvsl(0, src);
+
+        for (; i<w_r-3; i+=4) {
+#if 0
+            temp[i] = src[i] - ((-3*(b[i] + b[i+1]))>>1);
+            temp[i+1] = src[i+1] - ((-3*(b[i+1] + b[i+2]))>>1);
+            temp[i+2] = src[i+2] - ((-3*(b[i+2] + b[i+3]))>>1);
+            temp[i+3] = src[i+3] - ((-3*(b[i+3] + b[i+4]))>>1);
+#else
+            tmp1 = vec_ld(0,src+i);
+            t1 = vec_add(vbuf[0],vec_sld(vbuf[0],vbuf[1],4));
+            tmp2 = vec_ld(15,src+i);
+            t1 = vec_sub(vec_splat_s32(0),t1); //bad!
+            t1 = vec_add(t1,vec_add(t1,t1));
+            t2 = vec_perm(tmp1 ,tmp2 ,align);
+            t1 = vec_sra(t1,vec_splat_u32(1));
+            vbuf++;
+            *vtmp = vec_sub(t2,t1);
+            vtmp++;
+
+#endif
+
+        }
+
+        snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -3, 0, 1);
+    }
+
+    {
+    //Interleave
+        int a;
+        vector signed int *t = (vector signed int *)temp,
+                          *v = (vector signed int *)b;
+
+        snow_interleave_line_header(&i, width, b, temp);
+
+        for (; (i & 0xE) != 0xE; i-=2){
+            b[i+1] = temp[i>>1];
+            b[i] = b[i>>1];
+        }
+        for (i-=14; i>=0; i-=16){
+           a=i/4;
+
+           v[a+3]=vec_mergel(v[(a>>1)+1],t[(a>>1)+1]);
+           v[a+2]=vec_mergeh(v[(a>>1)+1],t[(a>>1)+1]);
+           v[a+1]=vec_mergel(v[a>>1],t[a>>1]);
+           v[a]=vec_mergeh(v[a>>1],t[a>>1]);
+
+        }
+
+    }
+}
+
+void ff_snow_vertical_compose97i_altivec(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width)
+{
+    int i, w4 = width/4;
+    vector signed int *v0, *v1,*v2,*v3,*v4,*v5;
+    vector signed int t1, t2;
+
+    v0=(vector signed int *)b0;
+    v1=(vector signed int *)b1;
+    v2=(vector signed int *)b2;
+    v3=(vector signed int *)b3;
+    v4=(vector signed int *)b4;
+    v5=(vector signed int *)b5;
+
+    for (i=0; i< w4;i++)
+    {
+
+    #if 0
+        b4[i] -= (3*(b3[i] + b5[i])+4)>>3;
+        b3[i] -= ((b2[i] + b4[i]));
+        b2[i] += ((b1[i] + b3[i])+4*b2[i]+8)>>4;
+        b1[i] += (3*(b0[i] + b2[i]))>>1;
+    #else
+        t1 = vec_add(v3[i], v5[i]);
+        t2 = vec_add(t1, vec_add(t1,t1));
+        t1 = vec_add(t2, vec_splat_s32(4));
+        v4[i] = vec_sub(v4[i], vec_sra(t1,vec_splat_u32(3)));
+
+        v3[i] = vec_sub(v3[i], vec_add(v2[i], v4[i]));
+
+        t1 = vec_add(vec_splat_s32(8), vec_add(v1[i], v3[i]));
+        t2 = vec_sl(v2[i], vec_splat_u32(2));
+        v2[i] = vec_add(v2[i], vec_sra(vec_add(t1,t2),vec_splat_u32(4)));
+        t1 = vec_add(v0[i], v2[i]);
+        t2 = vec_add(t1, vec_add(t1,t1));
+        v1[i] = vec_add(v1[i], vec_sra(t2,vec_splat_u32(1)));
+
+    #endif
+    }
+
+    for(i*=4; i < width; i++)
+    {
+        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
+        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
+        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
+        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
+    }
+}
+
+#define LOAD_BLOCKS \
+            tmp1 = vec_ld(0, &block[3][y*src_stride]);\
+            align = vec_lvsl(0, &block[3][y*src_stride]);\
+            tmp2 = vec_ld(15, &block[3][y*src_stride]);\
+\
+            b3 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, &block[2][y*src_stride]);\
+            align = vec_lvsl(0, &block[2][y*src_stride]);\
+            tmp2 = vec_ld(15, &block[2][y*src_stride]);\
+\
+            b2 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, &block[1][y*src_stride]);\
+            align = vec_lvsl(0, &block[1][y*src_stride]);\
+            tmp2 = vec_ld(15, &block[1][y*src_stride]);\
+\
+            b1 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, &block[0][y*src_stride]);\
+            align = vec_lvsl(0, &block[0][y*src_stride]);\
+            tmp2 = vec_ld(15, &block[0][y*src_stride]);\
+\
+            b0 = vec_perm(tmp1,tmp2,align);
+
+#define LOAD_OBMCS \
+            tmp1 = vec_ld(0, obmc1);\
+            align = vec_lvsl(0, obmc1);\
+            tmp2 = vec_ld(15, obmc1);\
+\
+            ob1 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, obmc2);\
+            align = vec_lvsl(0, obmc2);\
+            tmp2 = vec_ld(15, obmc2);\
+\
+            ob2 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, obmc3);\
+            align = vec_lvsl(0, obmc3);\
+            tmp2 = vec_ld(15, obmc3);\
+\
+            ob3 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, obmc4);\
+            align = vec_lvsl(0, obmc4);\
+            tmp2 = vec_ld(15, obmc4);\
+\
+            ob4 = vec_perm(tmp1,tmp2,align);
+
+/* interleave logic
+ * h1 <- [ a,b,a,b, a,b,a,b, a,b,a,b, a,b,a,b ]
+ * h2 <- [ c,d,c,d, c,d,c,d, c,d,c,d, c,d,c,d ]
+ * h  <- [ a,b,c,d, a,b,c,d, a,b,c,d, a,b,c,d ]
+ */
+
+#define STEPS_0_1\
+            h1 = (vector unsigned short)\
+                 vec_mergeh(ob1, ob2);\
+\
+            h2 = (vector unsigned short)\
+                 vec_mergeh(ob3, ob4);\
+\
+            ih = (vector unsigned char)\
+                 vec_mergeh(h1,h2);\
+\
+            l1 = (vector unsigned short) vec_mergeh(b3, b2);\
+\
+            ih1 = (vector unsigned char) vec_mergel(h1, h2);\
+\
+            l2 = (vector unsigned short) vec_mergeh(b1, b0);\
+\
+            il = (vector unsigned char) vec_mergeh(l1, l2);\
+\
+            v[0] = (vector signed int) vec_msum(ih, il, vec_splat_u32(0));\
+\
+            il1 = (vector unsigned char) vec_mergel(l1, l2);\
+\
+            v[1] = (vector signed int) vec_msum(ih1, il1, vec_splat_u32(0));
+
+#define FINAL_STEP_SCALAR\
+        for(x=0; x<b_w; x++)\
+            if(add){\
+                vbuf[x] += dst[x + src_x];\
+                vbuf[x] = (vbuf[x] + (1<<(FRAC_BITS-1))) >> FRAC_BITS;\
+                if(vbuf[x]&(~255)) vbuf[x]= ~(vbuf[x]>>31);\
+                dst8[x + y*src_stride] = vbuf[x];\
+            }else{\
+                dst[x + src_x] -= vbuf[x];\
+            }
+
+static void inner_add_yblock_bw_8_obmc_16_altivec(uint8_t *obmc,
+                                             const int obmc_stride,
+                                             uint8_t * * block, int b_w,
+                                             int b_h, int src_x, int src_y,
+                                             int src_stride, slice_buffer * sb,
+                                             int add, uint8_t * dst8)
+{
+    int y, x;
+    DWTELEM * dst;
+    vector unsigned short h1, h2, l1, l2;
+    vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+    vector unsigned char b0,b1,b2,b3;
+    vector unsigned char ob1,ob2,ob3,ob4;
+
+    DECLARE_ALIGNED_16(int, vbuf[16]);
+    vector signed int *v = (vector signed int *)vbuf, *d;
+
+    for(y=0; y<b_h; y++){
+        //FIXME ugly missue of obmc_stride
+
+        uint8_t *obmc1= obmc + y*obmc_stride;
+        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+        dst = slice_buffer_get_line(sb, src_y + y);
+        d = (vector signed int *)(dst + src_x);
+
+//FIXME i could avoid some loads!
+
+        // load blocks
+        LOAD_BLOCKS
+
+        // load obmcs
+        LOAD_OBMCS
+
+        // steps 0 1
+        STEPS_0_1
+
+        FINAL_STEP_SCALAR
+
+       }
+
+}
+
+#define STEPS_2_3\
+            h1 = (vector unsigned short) vec_mergel(ob1, ob2);\
+\
+            h2 = (vector unsigned short) vec_mergel(ob3, ob4);\
+\
+            ih = (vector unsigned char) vec_mergeh(h1,h2);\
+\
+            l1 = (vector unsigned short) vec_mergel(b3, b2);\
+\
+            l2 = (vector unsigned short) vec_mergel(b1, b0);\
+\
+            ih1 = (vector unsigned char) vec_mergel(h1,h2);\
+\
+            il = (vector unsigned char) vec_mergeh(l1,l2);\
+\
+            v[2] = (vector signed int) vec_msum(ih, il, vec_splat_u32(0));\
+\
+            il1 = (vector unsigned char) vec_mergel(l1,l2);\
+\
+            v[3] = (vector signed int) vec_msum(ih1, il1, vec_splat_u32(0));
+
+
+static void inner_add_yblock_bw_16_obmc_32_altivec(uint8_t *obmc,
+                                             const int obmc_stride,
+                                             uint8_t * * block, int b_w,
+                                             int b_h, int src_x, int src_y,
+                                             int src_stride, slice_buffer * sb,
+                                             int add, uint8_t * dst8)
+{
+    int y, x;
+    DWTELEM * dst;
+    vector unsigned short h1, h2, l1, l2;
+    vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+    vector unsigned char b0,b1,b2,b3;
+    vector unsigned char ob1,ob2,ob3,ob4;
+    DECLARE_ALIGNED_16(int, vbuf[b_w]);
+    vector signed int *v = (vector signed int *)vbuf, *d;
+
+    for(y=0; y<b_h; y++){
+        //FIXME ugly missue of obmc_stride
+
+        uint8_t *obmc1= obmc + y*obmc_stride;
+        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+        dst = slice_buffer_get_line(sb, src_y + y);
+        d = (vector signed int *)(dst + src_x);
+
+        // load blocks
+        LOAD_BLOCKS
+
+        // load obmcs
+        LOAD_OBMCS
+
+        // steps 0 1 2 3
+        STEPS_0_1
+
+        STEPS_2_3
+
+        FINAL_STEP_SCALAR
+
+    }
+}
+
+#define FINAL_STEP_VEC \
+\
+    if(add)\
+        {\
+            for(x=0; x<b_w/4; x++)\
+            {\
+                v[x] = vec_add(v[x], d[x]);\
+                v[x] = vec_sra(vec_add(v[x],\
+                                       vec_sl( vec_splat_s32(1),\
+                                               vec_splat_u32(7))),\
+                               vec_splat_u32(8));\
+\
+                mask = (vector bool int) vec_sl((vector signed int)\
+                        vec_cmpeq(v[x],v[x]),vec_splat_u32(8));\
+                mask = (vector bool int) vec_and(v[x],vec_nor(mask,mask));\
+\
+                mask = (vector bool int)\
+                        vec_cmpeq((vector signed int)mask,\
+                                  (vector signed int)vec_splat_u32(0));\
+\
+                vs = vec_sra(v[x],vec_splat_u32(8));\
+                vs = vec_sra(v[x],vec_splat_u32(8));\
+                vs = vec_sra(v[x],vec_splat_u32(15));\
+\
+                vs = vec_nor(vs,vs);\
+\
+                v[x]= vec_sel(v[x],vs,mask);\
+            }\
+\
+            for(x=0; x<b_w; x++)\
+                dst8[x + y*src_stride] = vbuf[x];\
+\
+        }\
+         else\
+            for(x=0; x<b_w/4; x++)\
+                d[x] = vec_sub(d[x], v[x]);
+
+static void inner_add_yblock_a_bw_8_obmc_16_altivec(uint8_t *obmc,
+                                             const int obmc_stride,
+                                             uint8_t * * block, int b_w,
+                                             int b_h, int src_x, int src_y,
+                                             int src_stride, slice_buffer * sb,
+                                             int add, uint8_t * dst8)
+{
+    int y, x;
+    DWTELEM * dst;
+    vector bool int mask;
+    vector signed int vs;
+    vector unsigned short h1, h2, l1, l2;
+    vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+    vector unsigned char b0,b1,b2,b3;
+    vector unsigned char ob1,ob2,ob3,ob4;
+
+    DECLARE_ALIGNED_16(int, vbuf[16]);
+    vector signed int *v = (vector signed int *)vbuf, *d;
+
+    for(y=0; y<b_h; y++){
+        //FIXME ugly missue of obmc_stride
+
+        uint8_t *obmc1= obmc + y*obmc_stride;
+        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+        dst = slice_buffer_get_line(sb, src_y + y);
+        d = (vector signed int *)(dst + src_x);
+
+//FIXME i could avoid some loads!
+
+        // load blocks
+        LOAD_BLOCKS
+
+        // load obmcs
+        LOAD_OBMCS
+
+        // steps 0 1
+        STEPS_0_1
+
+        FINAL_STEP_VEC
+
+       }
+
+}
+
+static void inner_add_yblock_a_bw_16_obmc_32_altivec(uint8_t *obmc,
+                                             const int obmc_stride,
+                                             uint8_t * * block, int b_w,
+                                             int b_h, int src_x, int src_y,
+                                             int src_stride, slice_buffer * sb,
+                                             int add, uint8_t * dst8)
+{
+    int y, x;
+    DWTELEM * dst;
+    vector bool int mask;
+    vector signed int vs;
+    vector unsigned short h1, h2, l1, l2;
+    vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+    vector unsigned char b0,b1,b2,b3;
+    vector unsigned char ob1,ob2,ob3,ob4;
+    DECLARE_ALIGNED_16(int, vbuf[b_w]);
+    vector signed int *v = (vector signed int *)vbuf, *d;
+
+    for(y=0; y<b_h; y++){
+        //FIXME ugly missue of obmc_stride
+
+        uint8_t *obmc1= obmc + y*obmc_stride;
+        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+        dst = slice_buffer_get_line(sb, src_y + y);
+        d = (vector signed int *)(dst + src_x);
+
+        // load blocks
+        LOAD_BLOCKS
+
+        // load obmcs
+        LOAD_OBMCS
+
+        // steps 0 1 2 3
+        STEPS_0_1
+
+        STEPS_2_3
+
+        FINAL_STEP_VEC
+
+    }
+}
+
+
+void ff_snow_inner_add_yblock_altivec(uint8_t *obmc, const int obmc_stride,
+                                      uint8_t * * block, int b_w, int b_h,
+                                      int src_x, int src_y, int src_stride,
+                                      slice_buffer * sb, int add,
+                                      uint8_t * dst8)
+{
+    if (src_x&15) {
+        if (b_w == 16)
+            inner_add_yblock_bw_16_obmc_32_altivec(obmc, obmc_stride, block,
+                                                   b_w, b_h, src_x, src_y,
+                                                   src_stride, sb, add, dst8);
+        else if (b_w == 8)
+            inner_add_yblock_bw_8_obmc_16_altivec(obmc, obmc_stride, block,
+                                                  b_w, b_h, src_x, src_y,
+                                                  src_stride, sb, add, dst8);
+        else
+            ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,
+                                     src_y, src_stride, sb, add, dst8);
+    } else {
+        if (b_w == 16)
+            inner_add_yblock_a_bw_16_obmc_32_altivec(obmc, obmc_stride, block,
+                                                     b_w, b_h, src_x, src_y,
+                                                     src_stride, sb, add, dst8);
+        else if (b_w == 8)
+            inner_add_yblock_a_bw_8_obmc_16_altivec(obmc, obmc_stride, block,
+                                                    b_w, b_h, src_x, src_y,
+                                                    src_stride, sb, add, dst8);
+        else
+            ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,
+                                     src_y, src_stride, sb, add, dst8);
+    }
+}
+
+
+void snow_init_altivec(DSPContext* c, AVCodecContext *avctx)
+{
+        c->horizontal_compose97i = ff_snow_horizontal_compose97i_altivec;
+        c->vertical_compose97i = ff_snow_vertical_compose97i_altivec;
+        c->inner_add_yblock = ff_snow_inner_add_yblock_altivec;
+}
diff --git a/src/libffmpeg/libavcodec/ppc/types_altivec.h b/src/libffmpeg/libavcodec/ppc/types_altivec.h
new file mode 100644
index 000000000..f29026e04
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/types_altivec.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2006 Guillaume Poirier <gpoirier@mplayerhq.hu>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/***********************************************************************
+ * Vector types
+ **********************************************************************/
+#define vec_u8_t  vector unsigned char
+#define vec_s8_t  vector signed char
+#define vec_u16_t vector unsigned short
+#define vec_s16_t vector signed short
+#define vec_u32_t vector unsigned int
+#define vec_s32_t vector signed int
+
+/***********************************************************************
+ * Null vector
+ **********************************************************************/
+#define LOAD_ZERO const vec_u8_t zerov = vec_splat_u8( 0 )
+
+#define zero_u8v  (vec_u8_t)  zerov
+#define zero_s8v  (vec_s8_t)  zerov
+#define zero_u16v (vec_u16_t) zerov
+#define zero_s16v (vec_s16_t) zerov
+#define zero_u32v (vec_u32_t) zerov
+#define zero_s32v (vec_s32_t) zerov
diff --git a/src/libffmpeg/libavcodec/ppc/vc1dsp_altivec.c b/src/libffmpeg/libavcodec/ppc/vc1dsp_altivec.c
new file mode 100644
index 000000000..114c9d41f
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/vc1dsp_altivec.c
@@ -0,0 +1,338 @@
+/*
+ * VC-1 and WMV3 decoder - DSP functions AltiVec-optimized
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+
+// main steps of 8x8 transform
+#define STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_rnd) \
+do { \
+    t0 = vec_sl(vec_add(s0, s4), vec_2); \
+    t0 = vec_add(vec_sl(t0, vec_1), t0); \
+    t0 = vec_add(t0, vec_rnd); \
+    t1 = vec_sl(vec_sub(s0, s4), vec_2); \
+    t1 = vec_add(vec_sl(t1, vec_1), t1); \
+    t1 = vec_add(t1, vec_rnd); \
+    t2 = vec_add(vec_sl(s6, vec_2), vec_sl(s6, vec_1)); \
+    t2 = vec_add(t2, vec_sl(s2, vec_4)); \
+    t3 = vec_add(vec_sl(s2, vec_2), vec_sl(s2, vec_1)); \
+    t3 = vec_sub(t3, vec_sl(s6, vec_4)); \
+    t4 = vec_add(t0, t2); \
+    t5 = vec_add(t1, t3); \
+    t6 = vec_sub(t1, t3); \
+    t7 = vec_sub(t0, t2); \
+\
+    t0 = vec_sl(vec_add(s1, s3), vec_4); \
+    t0 = vec_add(t0, vec_sl(s5, vec_3)); \
+    t0 = vec_add(t0, vec_sl(s7, vec_2)); \
+    t0 = vec_add(t0, vec_sub(s5, s3)); \
+\
+    t1 = vec_sl(vec_sub(s1, s5), vec_4); \
+    t1 = vec_sub(t1, vec_sl(s7, vec_3)); \
+    t1 = vec_sub(t1, vec_sl(s3, vec_2)); \
+    t1 = vec_sub(t1, vec_add(s1, s7)); \
+\
+    t2 = vec_sl(vec_sub(s7, s3), vec_4); \
+    t2 = vec_add(t2, vec_sl(s1, vec_3)); \
+    t2 = vec_add(t2, vec_sl(s5, vec_2)); \
+    t2 = vec_add(t2, vec_sub(s1, s7)); \
+\
+    t3 = vec_sl(vec_sub(s5, s7), vec_4); \
+    t3 = vec_sub(t3, vec_sl(s3, vec_3)); \
+    t3 = vec_add(t3, vec_sl(s1, vec_2)); \
+    t3 = vec_sub(t3, vec_add(s3, s5)); \
+\
+    s0 = vec_add(t4, t0); \
+    s1 = vec_add(t5, t1); \
+    s2 = vec_add(t6, t2); \
+    s3 = vec_add(t7, t3); \
+    s4 = vec_sub(t7, t3); \
+    s5 = vec_sub(t6, t2); \
+    s6 = vec_sub(t5, t1); \
+    s7 = vec_sub(t4, t0); \
+}while(0)
+
+#define SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7) \
+do { \
+    s0 = vec_sra(s0, vec_3); \
+    s1 = vec_sra(s1, vec_3); \
+    s2 = vec_sra(s2, vec_3); \
+    s3 = vec_sra(s3, vec_3); \
+    s4 = vec_sra(s4, vec_3); \
+    s5 = vec_sra(s5, vec_3); \
+    s6 = vec_sra(s6, vec_3); \
+    s7 = vec_sra(s7, vec_3); \
+}while(0)
+
+#define SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7) \
+do { \
+    s0 = vec_sra(s0, vec_7); \
+    s1 = vec_sra(s1, vec_7); \
+    s2 = vec_sra(s2, vec_7); \
+    s3 = vec_sra(s3, vec_7); \
+    s4 = vec_sra(vec_add(s4, vec_1s), vec_7); \
+    s5 = vec_sra(vec_add(s5, vec_1s), vec_7); \
+    s6 = vec_sra(vec_add(s6, vec_1s), vec_7); \
+    s7 = vec_sra(vec_add(s7, vec_1s), vec_7); \
+}while(0)
+
+/* main steps of 4x4 transform */
+#define STEP4(s0, s1, s2, s3, vec_rnd) \
+do { \
+    t1 = vec_add(vec_sl(s0, vec_4), s0); \
+    t1 = vec_add(t1, vec_rnd); \
+    t2 = vec_add(vec_sl(s2, vec_4), s2); \
+    t0 = vec_add(t1, t2); \
+    t1 = vec_sub(t1, t2); \
+    t3 = vec_sl(vec_sub(s3, s1), vec_1); \
+    t3 = vec_add(t3, vec_sl(t3, vec_2)); \
+    t2 = vec_add(t3, vec_sl(s1, vec_5)); \
+    t3 = vec_add(t3, vec_sl(s3, vec_3)); \
+    t3 = vec_add(t3, vec_sl(s3, vec_2)); \
+    s0 = vec_add(t0, t2); \
+    s1 = vec_sub(t1, t3); \
+    s2 = vec_add(t1, t3); \
+    s3 = vec_sub(t0, t2); \
+}while (0)
+
+#define SHIFT_HOR4(s0, s1, s2, s3) \
+    s0 = vec_sra(s0, vec_3); \
+    s1 = vec_sra(s1, vec_3); \
+    s2 = vec_sra(s2, vec_3); \
+    s3 = vec_sra(s3, vec_3);
+
+#define SHIFT_VERT4(s0, s1, s2, s3) \
+    s0 = vec_sra(s0, vec_7); \
+    s1 = vec_sra(s1, vec_7); \
+    s2 = vec_sra(s2, vec_7); \
+    s3 = vec_sra(s3, vec_7);
+
+/** Do inverse transform on 8x8 block
+*/
+static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
+{
+    vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
+    vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
+    vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
+    vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
+    const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));
+    const vector unsigned int vec_7 = vec_splat_u32(7);
+    const vector unsigned int vec_5 = vec_splat_u32(5);
+    const vector unsigned int vec_4 = vec_splat_u32(4);
+    const vector  signed int vec_4s = vec_splat_s32(4);
+    const vector unsigned int vec_3 = vec_splat_u32(3);
+    const vector unsigned int vec_2 = vec_splat_u32(2);
+    const vector  signed int vec_1s = vec_splat_s32(1);
+    const vector unsigned int vec_1 = vec_splat_u32(1);
+
+
+    src0 = vec_ld(  0, block);
+    src1 = vec_ld( 16, block);
+    src2 = vec_ld( 32, block);
+    src3 = vec_ld( 48, block);
+    src4 = vec_ld( 64, block);
+    src5 = vec_ld( 80, block);
+    src6 = vec_ld( 96, block);
+    src7 = vec_ld(112, block);
+
+    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+    s0 = vec_unpackl(src0);
+    s1 = vec_unpackl(src1);
+    s2 = vec_unpackl(src2);
+    s3 = vec_unpackl(src3);
+    s4 = vec_unpackl(src4);
+    s5 = vec_unpackl(src5);
+    s6 = vec_unpackl(src6);
+    s7 = vec_unpackl(src7);
+    s8 = vec_unpackh(src0);
+    s9 = vec_unpackh(src1);
+    sA = vec_unpackh(src2);
+    sB = vec_unpackh(src3);
+    sC = vec_unpackh(src4);
+    sD = vec_unpackh(src5);
+    sE = vec_unpackh(src6);
+    sF = vec_unpackh(src7);
+    STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
+    SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
+    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
+    SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
+    src0 = vec_pack(s8, s0);
+    src1 = vec_pack(s9, s1);
+    src2 = vec_pack(sA, s2);
+    src3 = vec_pack(sB, s3);
+    src4 = vec_pack(sC, s4);
+    src5 = vec_pack(sD, s5);
+    src6 = vec_pack(sE, s6);
+    src7 = vec_pack(sF, s7);
+    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+
+    s0 = vec_unpackl(src0);
+    s1 = vec_unpackl(src1);
+    s2 = vec_unpackl(src2);
+    s3 = vec_unpackl(src3);
+    s4 = vec_unpackl(src4);
+    s5 = vec_unpackl(src5);
+    s6 = vec_unpackl(src6);
+    s7 = vec_unpackl(src7);
+    s8 = vec_unpackh(src0);
+    s9 = vec_unpackh(src1);
+    sA = vec_unpackh(src2);
+    sB = vec_unpackh(src3);
+    sC = vec_unpackh(src4);
+    sD = vec_unpackh(src5);
+    sE = vec_unpackh(src6);
+    sF = vec_unpackh(src7);
+    STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_64);
+    SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7);
+    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_64);
+    SHIFT_VERT8(s8, s9, sA, sB, sC, sD, sE, sF);
+    src0 = vec_pack(s8, s0);
+    src1 = vec_pack(s9, s1);
+    src2 = vec_pack(sA, s2);
+    src3 = vec_pack(sB, s3);
+    src4 = vec_pack(sC, s4);
+    src5 = vec_pack(sD, s5);
+    src6 = vec_pack(sE, s6);
+    src7 = vec_pack(sF, s7);
+
+    vec_st(src0,  0, block);
+    vec_st(src1, 16, block);
+    vec_st(src2, 32, block);
+    vec_st(src3, 48, block);
+    vec_st(src4, 64, block);
+    vec_st(src5, 80, block);
+    vec_st(src6, 96, block);
+    vec_st(src7,112, block);
+}
+
+/** Do inverse transform on 8x4 part of block
+*/
+static void vc1_inv_trans_8x4_altivec(DCTELEM block[64], int n)
+{
+    vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
+    vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
+    vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
+    vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
+    const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));
+    const vector unsigned int vec_7 = vec_splat_u32(7);
+    const vector unsigned int vec_5 = vec_splat_u32(5);
+    const vector unsigned int vec_4 = vec_splat_u32(4);
+    const vector  signed int vec_4s = vec_splat_s32(4);
+    const vector unsigned int vec_3 = vec_splat_u32(3);
+    const vector unsigned int vec_2 = vec_splat_u32(2);
+    const vector unsigned int vec_1 = vec_splat_u32(1);
+
+    src0 = vec_ld(  0, block);
+    src1 = vec_ld( 16, block);
+    src2 = vec_ld( 32, block);
+    src3 = vec_ld( 48, block);
+    src4 = vec_ld( 64, block);
+    src5 = vec_ld( 80, block);
+    src6 = vec_ld( 96, block);
+    src7 = vec_ld(112, block);
+
+    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+    s0 = vec_unpackl(src0);
+    s1 = vec_unpackl(src1);
+    s2 = vec_unpackl(src2);
+    s3 = vec_unpackl(src3);
+    s4 = vec_unpackl(src4);
+    s5 = vec_unpackl(src5);
+    s6 = vec_unpackl(src6);
+    s7 = vec_unpackl(src7);
+    s8 = vec_unpackh(src0);
+    s9 = vec_unpackh(src1);
+    sA = vec_unpackh(src2);
+    sB = vec_unpackh(src3);
+    sC = vec_unpackh(src4);
+    sD = vec_unpackh(src5);
+    sE = vec_unpackh(src6);
+    sF = vec_unpackh(src7);
+    STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
+    SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
+    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
+    SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
+    src0 = vec_pack(s8, s0);
+    src1 = vec_pack(s9, s1);
+    src2 = vec_pack(sA, s2);
+    src3 = vec_pack(sB, s3);
+    src4 = vec_pack(sC, s4);
+    src5 = vec_pack(sD, s5);
+    src6 = vec_pack(sE, s6);
+    src7 = vec_pack(sF, s7);
+    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+
+    if(!n){ // upper half of block
+        s0 = vec_unpackh(src0);
+        s1 = vec_unpackh(src1);
+        s2 = vec_unpackh(src2);
+        s3 = vec_unpackh(src3);
+        s8 = vec_unpackl(src0);
+        s9 = vec_unpackl(src1);
+        sA = vec_unpackl(src2);
+        sB = vec_unpackl(src3);
+        STEP4(s0, s1, s2, s3, vec_64);
+        SHIFT_VERT4(s0, s1, s2, s3);
+        STEP4(s8, s9, sA, sB, vec_64);
+        SHIFT_VERT4(s8, s9, sA, sB);
+        src0 = vec_pack(s0, s8);
+        src1 = vec_pack(s1, s9);
+        src2 = vec_pack(s2, sA);
+        src3 = vec_pack(s3, sB);
+
+        vec_st(src0,  0, block);
+        vec_st(src1, 16, block);
+        vec_st(src2, 32, block);
+        vec_st(src3, 48, block);
+    } else { //lower half of block
+        s0 = vec_unpackh(src4);
+        s1 = vec_unpackh(src5);
+        s2 = vec_unpackh(src6);
+        s3 = vec_unpackh(src7);
+        s8 = vec_unpackl(src4);
+        s9 = vec_unpackl(src5);
+        sA = vec_unpackl(src6);
+        sB = vec_unpackl(src7);
+        STEP4(s0, s1, s2, s3, vec_64);
+        SHIFT_VERT4(s0, s1, s2, s3);
+        STEP4(s8, s9, sA, sB, vec_64);
+        SHIFT_VERT4(s8, s9, sA, sB);
+        src4 = vec_pack(s0, s8);
+        src5 = vec_pack(s1, s9);
+        src6 = vec_pack(s2, sA);
+        src7 = vec_pack(s3, sB);
+
+        vec_st(src4, 64, block);
+        vec_st(src5, 80, block);
+        vec_st(src6, 96, block);
+        vec_st(src7,112, block);
+    }
+}
+
+
+void vc1dsp_init_altivec(DSPContext* dsp, AVCodecContext *avctx) {
+    dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec;
+    dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec;
+}
diff --git a/src/libffmpeg/libavcodec/smacker.c b/src/libffmpeg/libavcodec/smacker.c
index 2f2185848..2e1784075 100644
--- a/src/libffmpeg/libavcodec/smacker.c
+++ b/src/libffmpeg/libavcodec/smacker.c
@@ -320,12 +320,12 @@ static int decode_header_trees(SmackVContext *smk) {
     return 0;
 }
 
-static always_inline void last_reset(int *recode, int *last) {
+static av_always_inline void last_reset(int *recode, int *last) {
     recode[last[0]] = recode[last[1]] = recode[last[2]] = 0;
 }
 
 /* get code and update history */
-static always_inline int smk_get_code(GetBitContext *gb, int *recode, int *last) {
+static av_always_inline int smk_get_code(GetBitContext *gb, int *recode, int *last) {
     register int *table = recode;
     int v, b;
 
diff --git a/src/libffmpeg/libavcodec/snow.c b/src/libffmpeg/libavcodec/snow.c
index 346d56861..5e93d40a1 100644
--- a/src/libffmpeg/libavcodec/snow.c
+++ b/src/libffmpeg/libavcodec/snow.c
@@ -439,6 +439,7 @@ typedef struct SnowContext{
     int always_reset;
     int version;
     int spatial_decomposition_type;
+    int last_spatial_decomposition_type;
     int temporal_decomposition_type;
     int spatial_decomposition_count;
     int temporal_decomposition_count;
@@ -452,15 +453,19 @@ typedef struct SnowContext{
     int chroma_v_shift;
     int spatial_scalability;
     int qlog;
+    int last_qlog;
     int lambda;
     int lambda2;
     int pass1_rc;
     int mv_scale;
+    int last_mv_scale;
     int qbias;
+    int last_qbias;
 #define QBIAS_SHIFT 3
     int b_width;
     int b_height;
     int block_max_depth;
+    int last_block_max_depth;
     Plane plane[MAX_PLANES];
     BlockNode *block;
 #define ME_CACHE_SIZE 1024
@@ -709,7 +714,7 @@ static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
     return v;
 }
 
-static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
     const int mirror_left= !highpass;
     const int mirror_right= (width&1) ^ highpass;
     const int w= (width>>1) - 1 + (highpass & width);
@@ -732,7 +737,7 @@ static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst
 }
 
 #ifndef lift5
-static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+static av_always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
     const int mirror_left= !highpass;
     const int mirror_right= (width&1) ^ highpass;
     const int w= (width>>1) - 1 + (highpass & width);
@@ -764,7 +769,7 @@ static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int ds
 #endif
 
 #ifndef liftS
-static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
     const int mirror_left= !highpass;
     const int mirror_right= (width&1) ^ highpass;
     const int w= (width>>1) - 1 + (highpass & width);
@@ -1849,7 +1854,7 @@ static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, sli
     return;
 }
 
-static void reset_contexts(SnowContext *s){
+static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
     int plane_index, level, orientation;
 
     for(plane_index=0; plane_index<3; plane_index++){
@@ -2208,7 +2213,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
 }
 #endif
 
-static always_inline int same_block(BlockNode *a, BlockNode *b){
+static av_always_inline int same_block(BlockNode *a, BlockNode *b){
     if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
         return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
     }else{
@@ -2287,12 +2292,10 @@ static void decode_q_branch(SnowContext *s, int level, int x, int y){
     }
 
     if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
-        int type;
+        int type, mx, my;
         int l = left->color[0];
         int cb= left->color[1];
         int cr= left->color[2];
-        int mx= mid_pred(left->mx, top->mx, tr->mx);
-        int my= mid_pred(left->my, top->my, tr->my);
         int ref = 0;
         int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
         int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
@@ -2557,7 +2560,7 @@ void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * *
 }
 
 //FIXME name clenup (b_w, block_w, b_width stuff)
-static always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
+static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
     const int b_width = s->b_width  << s->block_max_depth;
     const int b_height= s->b_height << s->block_max_depth;
     const int b_stride= b_width;
@@ -2716,7 +2719,7 @@ assert(src_stride > 2*MB_SIZE + 5);
 #endif
 }
 
-static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
+static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
     Plane *p= &s->plane[plane_index];
     const int mb_w= s->b_width  << s->block_max_depth;
     const int mb_h= s->b_height << s->block_max_depth;
@@ -2783,7 +2786,7 @@ static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer *
     STOP_TIMER("predict_slice")
 }
 
-static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
+static av_always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
     Plane *p= &s->plane[plane_index];
     const int mb_w= s->b_width  << s->block_max_depth;
     const int mb_h= s->b_height << s->block_max_depth;
@@ -2840,7 +2843,7 @@ static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_
     STOP_TIMER("predict_slice")
 }
 
-static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
+static av_always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
     const int mb_h= s->b_height << s->block_max_depth;
     int mb_y;
     for(mb_y=0; mb_y<=mb_h; mb_y++)
@@ -3098,7 +3101,7 @@ static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
     return distortion + rate*penalty_factor;
 }
 
-static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
+static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
     const int b_stride= s->b_width << s->block_max_depth;
     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
     BlockNode backup= *block;
@@ -3137,12 +3140,12 @@ static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3
 }
 
 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
-static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
+static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
     int p[2] = {p0, p1};
     return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
 }
 
-static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
+static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
     const int b_stride= s->b_width << s->block_max_depth;
     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
     BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
@@ -3607,8 +3610,14 @@ static void encode_header(SnowContext *s){
     memset(kstate, MID_STATE, sizeof(kstate));
 
     put_rac(&s->c, kstate, s->keyframe);
-    if(s->keyframe || s->always_reset)
+    if(s->keyframe || s->always_reset){
         reset_contexts(s);
+        s->last_spatial_decomposition_type=
+        s->last_qlog=
+        s->last_qbias=
+        s->last_mv_scale=
+        s->last_block_max_depth= 0;
+    }
     if(s->keyframe){
         put_symbol(&s->c, s->header_state, s->version, 0);
         put_rac(&s->c, s->header_state, s->always_reset);
@@ -3631,11 +3640,17 @@ static void encode_header(SnowContext *s){
             }
         }
     }
-    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
-    put_symbol(&s->c, s->header_state, s->qlog, 1);
-    put_symbol(&s->c, s->header_state, s->mv_scale, 0);
-    put_symbol(&s->c, s->header_state, s->qbias, 1);
-    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
+    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
+    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
+    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
+    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
+    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
+
+    s->last_spatial_decomposition_type= s->spatial_decomposition_type;
+    s->last_qlog                      = s->qlog;
+    s->last_qbias                     = s->qbias;
+    s->last_mv_scale                  = s->mv_scale;
+    s->last_block_max_depth           = s->block_max_depth;
 }
 
 static int decode_header(SnowContext *s){
@@ -3645,8 +3660,14 @@ static int decode_header(SnowContext *s){
     memset(kstate, MID_STATE, sizeof(kstate));
 
     s->keyframe= get_rac(&s->c, kstate);
-    if(s->keyframe || s->always_reset)
+    if(s->keyframe || s->always_reset){
         reset_contexts(s);
+        s->spatial_decomposition_type=
+        s->qlog=
+        s->qbias=
+        s->mv_scale=
+        s->block_max_depth= 0;
+    }
     if(s->keyframe){
         s->version= get_symbol(&s->c, s->header_state, 0);
         if(s->version>0){
@@ -3677,16 +3698,16 @@ static int decode_header(SnowContext *s){
         }
     }
 
-    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
+    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
     if(s->spatial_decomposition_type > 2){
         av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
         return -1;
     }
 
-    s->qlog= get_symbol(&s->c, s->header_state, 1);
-    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
-    s->qbias= get_symbol(&s->c, s->header_state, 1);
-    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
+    s->qlog           += get_symbol(&s->c, s->header_state, 1);
+    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
+    s->qbias          += get_symbol(&s->c, s->header_state, 1);
+    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
     if(s->block_max_depth > 1 || s->block_max_depth < 0){
         av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
         s->block_max_depth= 0;
@@ -4177,7 +4198,6 @@ redo_frame:
             pict->pict_type= FF_I_TYPE;
             s->keyframe=1;
             s->current_picture.key_frame=1;
-            reset_contexts(s);
             goto redo_frame;
         }
 
diff --git a/src/libffmpeg/libavcodec/snow.h b/src/libffmpeg/libavcodec/snow.h
index f7cee131a..6794d2c5a 100644
--- a/src/libffmpeg/libavcodec/snow.h
+++ b/src/libffmpeg/libavcodec/snow.h
@@ -137,7 +137,7 @@ static int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
 
 /* C bits used by mmx/sse2/altivec */
 
-static always_inline void snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){
+static av_always_inline void snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){
     (*i) = (width) - 2;
 
     if (width & 1){
@@ -146,14 +146,14 @@ static always_inline void snow_interleave_line_header(int * i, int width, DWTELE
     }
 }
 
-static always_inline void snow_interleave_line_footer(int * i, DWTELEM * low, DWTELEM * high){
+static av_always_inline void snow_interleave_line_footer(int * i, DWTELEM * low, DWTELEM * high){
     for (; (*i)>=0; (*i)-=2){
         low[(*i)+1] = high[(*i)>>1];
         low[*i] = low[(*i)>>1];
     }
 }
 
-static always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){
+static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){
     for(; i<w; i++){
         dst[i] = src[i] - ((mul * (ref[i] + ref[i + 1]) + add) >> shift);
     }
@@ -163,7 +163,7 @@ static always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM *
     }
 }
 
-static always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){
+static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){
         for(; i<w; i++){
             dst[i] = src[i] - (((-(ref[i] + ref[(i+1)])+W_BO) - 4 * src[i]) >> W_BS);
         }
diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c
index c3661dda7..36dcc7746 100644
--- a/src/libffmpeg/libavcodec/utils.c
+++ b/src/libffmpeg/libavcodec/utils.c
@@ -421,7 +421,7 @@ static const char* context_to_name(void* ptr) {
 
 static const AVOption options[]={
 {"b", "set video bitrate (in bits/s)", OFFSET(bit_rate), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE, INT_MIN, INT_MAX, V|A|E},
-{"bt", "set video bitrate tolerance (in bits/s)", OFFSET(bit_rate_tolerance), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE*20, INT_MIN, INT_MAX, V|E},
+{"bt", "set video bitrate tolerance (in bits/s)", OFFSET(bit_rate_tolerance), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE*20, 1, INT_MAX, V|E},
 {"flags", NULL, OFFSET(flags), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|A|E|D, "flags"},
 {"mv4", "use four motion vector by macroblock (mpeg4)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_4MV, INT_MIN, INT_MAX, V|E, "flags"},
 {"obmc", "use overlapped block motion compensation (h263+)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_OBMC, INT_MIN, INT_MAX, V|E, "flags"},
@@ -464,7 +464,7 @@ static const AVOption options[]={
 {"extradata_size", NULL, OFFSET(extradata_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
 {"time_base", NULL, OFFSET(time_base), FF_OPT_TYPE_RATIONAL, DEFAULT, INT_MIN, INT_MAX},
 {"g", "set the group of picture size", OFFSET(gop_size), FF_OPT_TYPE_INT, 12, INT_MIN, INT_MAX, V|E},
-{"rate_emu", NULL, OFFSET(rate_emu), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"rate_emu", "frame rate emulation", OFFSET(rate_emu), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
 {"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
 {"ac", "set number of audio channels", OFFSET(channels), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
 {"cutoff", "set cutoff bandwidth", OFFSET(cutoff), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|E},
@@ -509,15 +509,15 @@ static const AVOption options[]={
 {"edge", "edge padding bug (autodetected per fourcc/version)", 0, FF_OPT_TYPE_CONST, FF_BUG_EDGE, INT_MIN, INT_MAX, V|D, "bug"},
 {"hpel_chroma", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_HPEL_CHROMA, INT_MIN, INT_MAX, V|D, "bug"},
 {"dc_clip", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_DC_CLIP, INT_MIN, INT_MAX, V|D, "bug"},
-{"ms", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_MS, INT_MIN, INT_MAX, V|D, "bug"},
+{"ms", "workaround various bugs in microsofts broken decoders", 0, FF_OPT_TYPE_CONST, FF_BUG_MS, INT_MIN, INT_MAX, V|D, "bug"},
 {"lelim", "single coefficient elimination threshold for luminance (negative values also consider dc coefficient)", OFFSET(luma_elim_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"celim", "single coefficient elimination threshold for chrominance (negative values also consider dc coefficient)", OFFSET(chroma_elim_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"strict", "how strictly to follow the standards", OFFSET(strict_std_compliance), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "strict"},
-{"very", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_VERY_STRICT, INT_MIN, INT_MAX, V|E, "strict"},
-{"strict", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_STRICT, INT_MIN, INT_MAX, V|E, "strict"},
+{"very", "strictly conform to a older more strict version of the spec or reference software", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_VERY_STRICT, INT_MIN, INT_MAX, V|E, "strict"},
+{"strict", "strictly conform to all the things in the spec no matter what consequences", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_STRICT, INT_MIN, INT_MAX, V|E, "strict"},
 {"normal", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_NORMAL, INT_MIN, INT_MAX, V|E, "strict"},
-{"inofficial", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_INOFFICIAL, INT_MIN, INT_MAX, V|E, "strict"},
-{"experimental", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_EXPERIMENTAL, INT_MIN, INT_MAX, V|E, "strict"},
+{"inofficial", "allow inofficial extensions", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_INOFFICIAL, INT_MIN, INT_MAX, V|E, "strict"},
+{"experimental", "allow non standarized experimental things", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_EXPERIMENTAL, INT_MIN, INT_MAX, V|E, "strict"},
 {"b_qoffset", "qp offset between p and b frames", OFFSET(b_quant_offset), FF_OPT_TYPE_FLOAT, 1.25, FLT_MIN, FLT_MAX, V|E},
 {"er", "set error resilience strategy", OFFSET(error_resilience), FF_OPT_TYPE_INT, FF_ER_CAREFUL, INT_MIN, INT_MAX, V|D, "er"},
 {"careful", NULL, 0, FF_OPT_TYPE_CONST, FF_ER_CAREFUL, INT_MIN, INT_MAX, V|D, "er"},
@@ -549,14 +549,14 @@ static const AVOption options[]={
 {"mmx", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_MMX, INT_MIN, INT_MAX, V|E, "dct"},
 {"mlib", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_MLIB, INT_MIN, INT_MAX, V|E, "dct"},
 {"altivec", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_ALTIVEC, INT_MIN, INT_MAX, V|E, "dct"},
-{"faan", "floating point AAN", 0, FF_OPT_TYPE_CONST, FF_DCT_FAAN, INT_MIN, INT_MAX, V|E, "dct"},
+{"faan", "floating point AAN DCT", 0, FF_OPT_TYPE_CONST, FF_DCT_FAAN, INT_MIN, INT_MAX, V|E, "dct"},
 {"lumi_mask", "compresses bright areas stronger than medium ones", OFFSET(lumi_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
 {"tcplx_mask", "temporal complexity masking", OFFSET(temporal_cplx_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
 {"scplx_mask", "spatial complexity masking", OFFSET(spatial_cplx_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
 {"p_mask", "inter masking", OFFSET(p_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
 {"dark_mask", "compresses dark areas stronger than medium ones", OFFSET(dark_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
 {"unused", NULL, OFFSET(unused), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"idct", "use interlaced DCT", OFFSET(idct_algo), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, V|E|D, "idct"},
+{"idct", "select IDCT implementation", OFFSET(idct_algo), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, V|E|D, "idct"},
 {"auto", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_AUTO, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"int", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_INT, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"simple", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLE, INT_MIN, INT_MAX, V|E|D, "idct"},
@@ -582,7 +582,7 @@ static const AVOption options[]={
 {"left", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_LEFT, INT_MIN, INT_MAX, V|E, "pred"},
 {"plane", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_PLANE, INT_MIN, INT_MAX, V|E, "pred"},
 {"median", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_MEDIAN, INT_MIN, INT_MAX, V|E, "pred"},
-{"aspect", NULL, OFFSET(sample_aspect_ratio), FF_OPT_TYPE_RATIONAL, DEFAULT, 0, 10, V|E},
+{"aspect", "sample aspect ratio", OFFSET(sample_aspect_ratio), FF_OPT_TYPE_RATIONAL, DEFAULT, 0, 10, V|E},
 {"debug", "print specific debug info", OFFSET(debug), FF_OPT_TYPE_FLAGS, DEFAULT, 0, INT_MAX, V|A|S|E|D, "debug"},
 {"pict", "picture info", 0, FF_OPT_TYPE_CONST, FF_DEBUG_PICT_INFO, INT_MIN, INT_MAX, V|D, "debug"},
 {"rc", "rate control", 0, FF_OPT_TYPE_CONST, FF_DEBUG_RC, INT_MIN, INT_MAX, V|E, "debug"},
@@ -603,8 +603,8 @@ static const AVOption options[]={
 {"pf", "forward predicted MVs of P-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_P_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"},
 {"bf", "forward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"},
 {"bb", "backward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_BACK, INT_MIN, INT_MAX, V|D, "debug_mv"},
-{"mb_qmin", "obsolete, use vqmin", OFFSET(mb_qmin), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"mb_qmax", "obsolete, use vqmax", OFFSET(mb_qmax), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"mb_qmin", "obsolete, use qmin", OFFSET(mb_qmin), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"mb_qmax", "obsolete, use qmax", OFFSET(mb_qmax), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"cmp", "full pel me compare function", OFFSET(me_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"subcmp", "sub pel me compare function", OFFSET(me_sub_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"mbcmp", "macroblock compare function", OFFSET(mb_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
@@ -654,11 +654,11 @@ static const AVOption options[]={
 {"lmin", "min lagrange factor (VBR)", OFFSET(lmin), FF_OPT_TYPE_INT,  2*FF_QP2LAMBDA, 0, INT_MAX, V|E},
 {"lmax", "max lagrange factor (VBR)", OFFSET(lmax), FF_OPT_TYPE_INT, 31*FF_QP2LAMBDA, 0, INT_MAX, V|E},
 {"nr", "noise reduction", OFFSET(noise_reduction), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"rc_init_occupancy", NULL, OFFSET(rc_initial_buffer_occupancy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"rc_init_occupancy", "number of bits which should be loaded into the rc buffer before decoding starts", OFFSET(rc_initial_buffer_occupancy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"inter_threshold", NULL, OFFSET(inter_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"flags2", NULL, OFFSET(flags2), FF_OPT_TYPE_FLAGS, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|A|E|D, "flags2"},
 {"error", NULL, OFFSET(error_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"antialias", NULL, OFFSET(antialias_algo), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D, "aa"},
+{"antialias", "MP3 antialias algorithm", OFFSET(antialias_algo), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D, "aa"},
 {"auto", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_AUTO, INT_MIN, INT_MAX, V|D, "aa"},
 {"fastint", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_FASTINT, INT_MIN, INT_MAX, V|D, "aa"},
 {"int", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_INT, INT_MIN, INT_MAX, V|D, "aa"},
@@ -669,8 +669,8 @@ static const AVOption options[]={
 {"mb_threshold", "macroblock threshold", OFFSET(mb_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"dc", "intra_dc_precision", OFFSET(intra_dc_precision), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
 {"nssew", "nsse weight", OFFSET(nsse_weight), FF_OPT_TYPE_INT, 8, INT_MIN, INT_MAX, V|E},
-{"skip_top", NULL, OFFSET(skip_top), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
-{"skip_bottom", NULL, OFFSET(skip_bottom), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
+{"skip_top", "number of macroblock rows at the top which are skipped", OFFSET(skip_top), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
+{"skip_bottom", "number of macroblock rows at the bottom which are skipped", OFFSET(skip_bottom), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
 {"profile", NULL, OFFSET(profile), FF_OPT_TYPE_INT, FF_PROFILE_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "profile"},
 {"unknown", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "profile"},
 {"level", NULL, OFFSET(level), FF_OPT_TYPE_INT, FF_LEVEL_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "level"},
@@ -687,42 +687,43 @@ static const AVOption options[]={
 {"bidir_refine", "refine the two motion vectors used in bidirectional macroblocks", OFFSET(bidir_refine), FF_OPT_TYPE_INT, DEFAULT, 0, 4, V|E},
 {"brd_scale", "downscales frames for dynamic B-frame decision", OFFSET(brd_scale), FF_OPT_TYPE_INT, DEFAULT, 0, 10, V|E},
 {"crf", "enables constant quality mode, and selects the quality (x264)", OFFSET(crf), FF_OPT_TYPE_FLOAT, DEFAULT, 0, 51, V|E},
-{"cqp", NULL, OFFSET(cqp), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, V|E},
+{"cqp", "constant quantization parameter rate control method", OFFSET(cqp), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, V|E},
 {"keyint_min", "minimum interval between IDR-frames (x264)", OFFSET(keyint_min), FF_OPT_TYPE_INT, 25, INT_MIN, INT_MAX, V|E},
 {"refs", "reference frames to consider for motion compensation (Snow)", OFFSET(refs), FF_OPT_TYPE_INT, 1, INT_MIN, INT_MAX, V|E},
-{"chromaoffset", NULL, OFFSET(chromaoffset), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"bframebias", NULL, OFFSET(bframebias), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"chromaoffset", "chroma qp offset from luma", OFFSET(chromaoffset), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"bframebias", "influences how often B-frames are used", OFFSET(bframebias), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"trellis", "rate-distortion optimal quantization", OFFSET(trellis), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|A|E},
-{"directpred", NULL, OFFSET(directpred), FF_OPT_TYPE_INT, 2, INT_MIN, INT_MAX, V|E},
+{"directpred", "direct mv prediction mode - 0 (none), 1 (spatial), 2 (temporal)", OFFSET(directpred), FF_OPT_TYPE_INT, 2, INT_MIN, INT_MAX, V|E},
 {"bpyramid", "allows B-frames to be used as references for predicting", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BPYRAMID, INT_MIN, INT_MAX, V|E, "flags2"},
-{"wpred", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_WPRED, INT_MIN, INT_MAX, V|E, "flags2"},
-{"mixed_refs", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_MIXED_REFS, INT_MIN, INT_MAX, V|E, "flags2"},
-{"8x8dct", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_8X8DCT, INT_MIN, INT_MAX, V|E, "flags2"},
-{"fastpskip", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|E, "flags2"},
-{"aud", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_AUD, INT_MIN, INT_MAX, V|E, "flags2"},
-{"brdo", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BRDO, INT_MIN, INT_MAX, V|E, "flags2"},
-{"complexityblur", NULL, OFFSET(complexityblur), FF_OPT_TYPE_FLOAT, 20.0, FLT_MIN, FLT_MAX, V|E},
-{"deblockalpha", NULL, OFFSET(deblockalpha), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"deblockbeta", NULL, OFFSET(deblockbeta), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"partitions", NULL, OFFSET(partitions), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|E, "partitions"},
+{"wpred", "weighted biprediction for b-frames (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_WPRED, INT_MIN, INT_MAX, V|E, "flags2"},
+{"mixed_refs", "one reference per partition, as opposed to one reference per macroblock", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_MIXED_REFS, INT_MIN, INT_MAX, V|E, "flags2"},
+{"8x8dct", "high profile 8x8 transform (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_8X8DCT, INT_MIN, INT_MAX, V|E, "flags2"},
+{"fastpskip", "fast pskip (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|E, "flags2"},
+{"aud", "access unit delimiters (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_AUD, INT_MIN, INT_MAX, V|E, "flags2"},
+{"brdo", "b-frame rate-distortion optimization", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BRDO, INT_MIN, INT_MAX, V|E, "flags2"},
+{"skiprd", "RD optimal MB level residual skiping", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_SKIP_RD, INT_MIN, INT_MAX, V|E, "flags2"},
+{"complexityblur", "reduce fluctuations in qp (before curve compression)", OFFSET(complexityblur), FF_OPT_TYPE_FLOAT, 20.0, FLT_MIN, FLT_MAX, V|E},
+{"deblockalpha", "in-loop deblocking filter alphac0 parameter", OFFSET(deblockalpha), FF_OPT_TYPE_INT, DEFAULT, -6, 6, V|E},
+{"deblockbeta", "in-loop deblocking filter beta parameter", OFFSET(deblockbeta), FF_OPT_TYPE_INT, DEFAULT, -6, 6, V|E},
+{"partitions", "macroblock subpartition sizes to consider", OFFSET(partitions), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|E, "partitions"},
 {"parti4x4", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_I4X4, INT_MIN, INT_MAX, V|E, "partitions"},
 {"parti8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_I8X8, INT_MIN, INT_MAX, V|E, "partitions"},
 {"partp4x4", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_P4X4, INT_MIN, INT_MAX, V|E, "partitions"},
 {"partp8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_P8X8, INT_MIN, INT_MAX, V|E, "partitions"},
 {"partb8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_B8X8, INT_MIN, INT_MAX, V|E, "partitions"},
-{"sc_factor", NULL, OFFSET(scenechange_factor), FF_OPT_TYPE_INT, 6, 0, INT_MAX, V|E},
+{"sc_factor", "multiplied by qscale for each frame and added to scene_change_score", OFFSET(scenechange_factor), FF_OPT_TYPE_INT, 6, 0, INT_MAX, V|E},
 {"mv0_threshold", NULL, OFFSET(mv0_threshold), FF_OPT_TYPE_INT, 256, 0, INT_MAX, V|E},
 {"ivlc", "intra vlc table", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_INTRA_VLC, INT_MIN, INT_MAX, V|E, "flags2"},
-{"b_sensitivity", NULL, OFFSET(b_sensitivity), FF_OPT_TYPE_INT, 40, 1, INT_MAX, V|E},
+{"b_sensitivity", "adjusts sensitivity of b_frame_strategy 1", OFFSET(b_sensitivity), FF_OPT_TYPE_INT, 40, 1, INT_MAX, V|E},
 {"compression_level", NULL, OFFSET(compression_level), FF_OPT_TYPE_INT, FF_COMPRESSION_DEFAULT, INT_MIN, INT_MAX, V|A|E},
-{"use_lpc", NULL, OFFSET(use_lpc), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
-{"lpc_coeff_precision", NULL, OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, A|E},
+{"use_lpc", "sets whether to use LPC mode (FLAC)", OFFSET(use_lpc), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"lpc_coeff_precision", "LPC coefficient precision (FLAC)", OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, A|E},
 {"min_prediction_order", NULL, OFFSET(min_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
 {"max_prediction_order", NULL, OFFSET(max_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
-{"prediction_order_method", NULL, OFFSET(prediction_order_method), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"prediction_order_method", "search method for selecting prediction order", OFFSET(prediction_order_method), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
 {"min_partition_order", NULL, OFFSET(min_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
 {"max_partition_order", NULL, OFFSET(max_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
-{"timecode_frame_start", NULL, OFFSET(timecode_frame_start), FF_OPT_TYPE_INT, 0, 0, INT_MAX, V|E},
+{"timecode_frame_start", "GOP timecode frame start number, in non drop frame format", OFFSET(timecode_frame_start), FF_OPT_TYPE_INT, 0, 0, INT_MAX, V|E},
 {"drop_frame_timecode", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_DROP_FRAME_TIMECODE, INT_MIN, INT_MAX, V|E, "flags2"},
 {NULL},
 };
diff --git a/src/libffmpeg/libavcodec/vc1.c b/src/libffmpeg/libavcodec/vc1.c
index 7b385ca47..231f3ca26 100644
--- a/src/libffmpeg/libavcodec/vc1.c
+++ b/src/libffmpeg/libavcodec/vc1.c
@@ -2140,7 +2140,7 @@ static void vc1_interp_mc(VC1Context *v)
     dsp->avg_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
 }
 
-static always_inline int scale_mv(int value, int bfrac, int inv, int qs)
+static av_always_inline int scale_mv(int value, int bfrac, int inv, int qs)
 {
     int n = bfrac;
 
@@ -3072,8 +3072,8 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c
         ac_val -= 16 * s->block_wrap[n];
 
     q1 = s->current_picture.qscale_table[mb_pos];
-    if(dc_pred_dir && c_avail) q2 = s->current_picture.qscale_table[mb_pos - 1];
-    if(!dc_pred_dir && a_avail) q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride];
+    if(dc_pred_dir && c_avail && mb_pos) q2 = s->current_picture.qscale_table[mb_pos - 1];
+    if(!dc_pred_dir && a_avail && mb_pos >= s->mb_stride) q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride];
     if(n && n<4) q2 = q1;
 
     if(coded) {
diff --git a/src/libffmpeg/libavcodec/vc1dsp.c b/src/libffmpeg/libavcodec/vc1dsp.c
index 9139ffb28..f19f266d1 100644
--- a/src/libffmpeg/libavcodec/vc1dsp.c
+++ b/src/libffmpeg/libavcodec/vc1dsp.c
@@ -326,7 +326,7 @@ static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n)
 
 /** Filter used to interpolate fractional pel values
  */
-static always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r)
+static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r)
 {
     switch(mode){
     case 0: //no shift
diff --git a/src/libffmpeg/libavcodec/vp3dsp.c b/src/libffmpeg/libavcodec/vp3dsp.c
index a48515a5e..bb9fed091 100644
--- a/src/libffmpeg/libavcodec/vp3dsp.c
+++ b/src/libffmpeg/libavcodec/vp3dsp.c
@@ -39,7 +39,7 @@
 
 #define M(a,b) (((a) * (b))>>16)
 
-static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
+static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
 {
     int16_t *ip = input;
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
diff --git a/src/libffmpeg/libavcodec/vp5.c b/src/libffmpeg/libavcodec/vp5.c
new file mode 100644
index 000000000..ac953c7aa
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp5.c
@@ -0,0 +1,290 @@
+/**
+ * @file vp5.c
+ * VP5 compatible video decoder
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "bitstream.h"
+#include "mpegvideo.h"
+
+#include "vp56.h"
+#include "vp56data.h"
+#include "vp5data.h"
+
+
+static int vp5_parse_header(vp56_context_t *s, uint8_t *buf, int buf_size,
+                            int *golden_frame)
+{
+    vp56_range_coder_t *c = &s->c;
+    int rows, cols;
+
+    vp56_init_range_decoder(&s->c, buf, buf_size);
+    s->frames[VP56_FRAME_CURRENT].key_frame = !vp56_rac_get(c);
+    vp56_rac_get(c);
+    vp56_init_dequant(s, vp56_rac_gets(c, 6));
+    if (s->frames[VP56_FRAME_CURRENT].key_frame)
+    {
+        vp56_rac_gets(c, 8);
+        if(vp56_rac_gets(c, 5) > 5)
+            return 0;
+        vp56_rac_gets(c, 2);
+        if (vp56_rac_get(c)) {
+            av_log(s->avctx, AV_LOG_ERROR, "interlacing not supported\n");
+            return 0;
+        }
+        rows = vp56_rac_gets(c, 8);  /* number of stored macroblock rows */
+        cols = vp56_rac_gets(c, 8);  /* number of stored macroblock cols */
+        vp56_rac_gets(c, 8);  /* number of displayed macroblock rows */
+        vp56_rac_gets(c, 8);  /* number of displayed macroblock cols */
+        vp56_rac_gets(c, 2);
+        if (16*cols != s->avctx->coded_width ||
+            16*rows != s->avctx->coded_height) {
+            avcodec_set_dimensions(s->avctx, 16*cols, 16*rows);
+            return 2;
+        }
+    }
+    return 1;
+}
+
+/* Gives very similar result than the vp6 version except in a few cases */
+static int vp5_adjust(int v, int t)
+{
+    int s2, s1 = v >> 31;
+    v ^= s1;
+    v -= s1;
+    v *= v < 2*t;
+    v -= t;
+    s2 = v >> 31;
+    v ^= s2;
+    v -= s2;
+    v = t - v;
+    v += s1;
+    v ^= s1;
+    return v;
+}
+
+static void vp5_parse_vector_adjustment(vp56_context_t *s, vp56_mv_t *vect)
+{
+    vp56_range_coder_t *c = &s->c;
+    int comp, di;
+
+    for (comp=0; comp<2; comp++) {
+        int delta = 0;
+        if (vp56_rac_get_prob(c, s->vector_model_dct[comp])) {
+            int sign = vp56_rac_get_prob(c, s->vector_model_sig[comp]);
+            di  = vp56_rac_get_prob(c, s->vector_model_pdi[comp][0]);
+            di |= vp56_rac_get_prob(c, s->vector_model_pdi[comp][1]) << 1;
+            delta = vp56_rac_get_tree(c, vp56_pva_tree,
+                                      s->vector_model_pdv[comp]);
+            delta = di | (delta << 2);
+            delta = (delta ^ -sign) + sign;
+        }
+        if (!comp)
+            vect->x = delta;
+        else
+            vect->y = delta;
+    }
+}
+
+static void vp5_parse_vector_models(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    int comp, node;
+
+    for (comp=0; comp<2; comp++) {
+        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][0]))
+            s->vector_model_dct[comp] = vp56_rac_gets_nn(c, 7);
+        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][1]))
+            s->vector_model_sig[comp] = vp56_rac_gets_nn(c, 7);
+        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][2]))
+            s->vector_model_pdi[comp][0] = vp56_rac_gets_nn(c, 7);
+        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][3]))
+            s->vector_model_pdi[comp][1] = vp56_rac_gets_nn(c, 7);
+    }
+
+    for (comp=0; comp<2; comp++)
+        for (node=0; node<7; node++)
+            if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][4 + node]))
+                s->vector_model_pdv[comp][node] = vp56_rac_gets_nn(c, 7);
+}
+
+static void vp5_parse_coeff_models(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    uint8_t def_prob[11];
+    int node, cg, ctx;
+    int ct;    /* code type */
+    int pt;    /* plane type (0 for Y, 1 for U or V) */
+
+    memset(def_prob, 0x80, sizeof(def_prob));
+
+    for (pt=0; pt<2; pt++)
+        for (node=0; node<11; node++)
+            if (vp56_rac_get_prob(c, vp5_dccv_pct[pt][node])) {
+                def_prob[node] = vp56_rac_gets_nn(c, 7);
+                s->coeff_model_dccv[pt][node] = def_prob[node];
+            } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+                s->coeff_model_dccv[pt][node] = def_prob[node];
+            }
+
+    for (ct=0; ct<3; ct++)
+        for (pt=0; pt<2; pt++)
+            for (cg=0; cg<6; cg++)
+                for (node=0; node<11; node++)
+                    if (vp56_rac_get_prob(c, vp5_ract_pct[ct][pt][cg][node])) {
+                        def_prob[node] = vp56_rac_gets_nn(c, 7);
+                        s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+                    } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+                        s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+                    }
+
+    /* coeff_model_dcct is a linear combination of coeff_model_dccv */
+    for (pt=0; pt<2; pt++)
+        for (ctx=0; ctx<36; ctx++)
+            for (node=0; node<5; node++)
+                s->coeff_model_dcct[pt][ctx][node] = clip(((s->coeff_model_dccv[pt][node] * vp5_dccv_lc[node][ctx][0] + 128) >> 8) + vp5_dccv_lc[node][ctx][1], 1, 254);
+
+    /* coeff_model_acct is a linear combination of coeff_model_ract */
+    for (ct=0; ct<3; ct++)
+        for (pt=0; pt<2; pt++)
+            for (cg=0; cg<3; cg++)
+                for (ctx=0; ctx<6; ctx++)
+                    for (node=0; node<5; node++)
+                        s->coeff_model_acct[pt][ct][cg][ctx][node] = clip(((s->coeff_model_ract[pt][ct][cg][node] * vp5_ract_lc[ct][cg][node][ctx][0] + 128) >> 8) + vp5_ract_lc[ct][cg][node][ctx][1], 1, 254);
+}
+
+static void vp5_parse_coeff(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    uint8_t *permute = s->scantable.permutated;
+    uint8_t *model, *model2;
+    int coeff, sign, coeff_idx;
+    int b, i, cg, idx, ctx, ctx_last;
+    int pt = 0;    /* plane type (0 for Y, 1 for U or V) */
+
+    for (b=0; b<6; b++) {
+        int ct = 1;    /* code type */
+
+        if (b > 3) pt = 1;
+
+        ctx = 6*s->coeff_ctx[vp56_b6to4[b]][0]
+              + s->above_blocks[s->above_block_idx[b]].not_null_dc;
+        model = s->coeff_model_dccv[pt];
+        model2 = s->coeff_model_dcct[pt][ctx];
+
+        for (coeff_idx=0; coeff_idx<64; ) {
+            if (vp56_rac_get_prob(c, model2[0])) {
+                if (vp56_rac_get_prob(c, model2[2])) {
+                    if (vp56_rac_get_prob(c, model2[3])) {
+                        s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 4;
+                        idx = vp56_rac_get_tree(c, vp56_pc_tree, model);
+                        sign = vp56_rac_get(c);
+                        coeff = vp56_coeff_bias[idx];
+                        for (i=vp56_coeff_bit_length[idx]; i>=0; i--)
+                            coeff += vp56_rac_get_prob(c, vp56_coeff_parse_table[idx][i]) << i;
+                    } else {
+                        if (vp56_rac_get_prob(c, model2[4])) {
+                            coeff = 3 + vp56_rac_get_prob(c, model[5]);
+                            s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 3;
+                        } else {
+                            coeff = 2;
+                            s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 2;
+                        }
+                        sign = vp56_rac_get(c);
+                    }
+                    ct = 2;
+                } else {
+                    ct = 1;
+                    s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 1;
+                    sign = vp56_rac_get(c);
+                    coeff = 1;
+                }
+                coeff = (coeff ^ -sign) + sign;
+                if (coeff_idx)
+                    coeff *= s->dequant_ac;
+                s->block_coeff[b][permute[coeff_idx]] = coeff;
+            } else {
+                if (ct && !vp56_rac_get_prob(c, model2[1]))
+                    break;
+                ct = 0;
+                s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 0;
+            }
+
+            cg = vp5_coeff_groups[++coeff_idx];
+            ctx = s->coeff_ctx[vp56_b6to4[b]][coeff_idx];
+            model = s->coeff_model_ract[pt][ct][cg];
+            model2 = cg > 2 ? model : s->coeff_model_acct[pt][ct][cg][ctx];
+        }
+
+        ctx_last = FFMIN(s->coeff_ctx_last[vp56_b6to4[b]], 24);
+        s->coeff_ctx_last[vp56_b6to4[b]] = coeff_idx;
+        if (coeff_idx < ctx_last)
+            for (i=coeff_idx; i<=ctx_last; i++)
+                s->coeff_ctx[vp56_b6to4[b]][i] = 5;
+        s->above_blocks[s->above_block_idx[b]].not_null_dc = s->coeff_ctx[vp56_b6to4[b]][0];
+    }
+}
+
+static void vp5_default_models_init(vp56_context_t *s)
+{
+    int i;
+
+    for (i=0; i<2; i++) {
+        s->vector_model_sig[i] = 0x80;
+        s->vector_model_dct[i] = 0x80;
+        s->vector_model_pdi[i][0] = 0x55;
+        s->vector_model_pdi[i][1] = 0x80;
+    }
+    memcpy(s->mb_types_stats, vp56_def_mb_types_stats, sizeof(s->mb_types_stats));
+    memset(s->vector_model_pdv, 0x80, sizeof(s->vector_model_pdv));
+}
+
+static int vp5_decode_init(AVCodecContext *avctx)
+{
+    vp56_context_t *s = avctx->priv_data;
+
+    vp56_init(s, avctx, 1);
+    s->vp56_coord_div = vp5_coord_div;
+    s->parse_vector_adjustment = vp5_parse_vector_adjustment;
+    s->adjust = vp5_adjust;
+    s->parse_coeff = vp5_parse_coeff;
+    s->default_models_init = vp5_default_models_init;
+    s->parse_vector_models = vp5_parse_vector_models;
+    s->parse_coeff_models = vp5_parse_coeff_models;
+    s->parse_header = vp5_parse_header;
+
+    return 0;
+}
+
+AVCodec vp5_decoder = {
+    "vp5",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VP5,
+    sizeof(vp56_context_t),
+    vp5_decode_init,
+    NULL,
+    vp56_free,
+    vp56_decode_frame,
+};
diff --git a/src/libffmpeg/libavcodec/vp56.c b/src/libffmpeg/libavcodec/vp56.c
new file mode 100644
index 000000000..eb78d02e4
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp56.c
@@ -0,0 +1,665 @@
+/**
+ * @file vp56.c
+ * VP5 and VP6 compatible video decoder (common features)
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "avcodec.h"
+
+#include "vp56.h"
+#include "vp56data.h"
+
+
+void vp56_init_dequant(vp56_context_t *s, int quantizer)
+{
+    s->quantizer = quantizer;
+    s->dequant_dc = vp56_dc_dequant[quantizer] << 2;
+    s->dequant_ac = vp56_ac_dequant[quantizer] << 2;
+}
+
+static int vp56_get_vectors_predictors(vp56_context_t *s, int row, int col,
+                                       vp56_frame_t ref_frame)
+{
+    int nb_pred = 0;
+    vp56_mv_t vect[2] = {{0,0}, {0,0}};
+    int pos, offset;
+    vp56_mv_t mvp;
+
+    for (pos=0; pos<12; pos++) {
+        mvp.x = col + vp56_candidate_predictor_pos[pos][0];
+        mvp.y = row + vp56_candidate_predictor_pos[pos][1];
+        if (mvp.x < 0 || mvp.x >= s->mb_width ||
+            mvp.y < 0 || mvp.y >= s->mb_height)
+            continue;
+        offset = mvp.x + s->mb_width*mvp.y;
+
+        if (vp56_reference_frame[s->macroblocks[offset].type] != ref_frame)
+            continue;
+        if ((s->macroblocks[offset].mv.x == vect[0].x &&
+             s->macroblocks[offset].mv.y == vect[0].y) ||
+            (s->macroblocks[offset].mv.x == 0 &&
+             s->macroblocks[offset].mv.y == 0))
+            continue;
+
+        vect[nb_pred++] = s->macroblocks[offset].mv;
+        if (nb_pred > 1) {
+            nb_pred = -1;
+            break;
+        }
+        s->vector_candidate_pos = pos;
+    }
+
+    s->vector_candidate[0] = vect[0];
+    s->vector_candidate[1] = vect[1];
+
+    return nb_pred+1;
+}
+
+static void vp56_parse_mb_type_models(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    int i, ctx, type;
+
+    for (ctx=0; ctx<3; ctx++) {
+        if (vp56_rac_get_prob(c, 174)) {
+            int idx = vp56_rac_gets(c, 4);
+            memcpy(s->mb_types_stats[ctx],vp56_pre_def_mb_type_stats[idx][ctx],
+                   sizeof(s->mb_types_stats[ctx]));
+        }
+        if (vp56_rac_get_prob(c, 254)) {
+            for (type=0; type<10; type++) {
+                for(i=0; i<2; i++) {
+                    if (vp56_rac_get_prob(c, 205)) {
+                        int delta, sign = vp56_rac_get(c);
+
+                        delta = vp56_rac_get_tree(c, vp56_pmbtm_tree,
+                                                  vp56_mb_type_model_model);
+                        if (!delta)
+                            delta = 4 * vp56_rac_gets(c, 7);
+                        s->mb_types_stats[ctx][type][i] += (delta ^ -sign) + sign;
+                    }
+                }
+            }
+        }
+    }
+
+    /* compute MB type probability tables based on previous MB type */
+    for (ctx=0; ctx<3; ctx++) {
+        int p[10];
+
+        for (type=0; type<10; type++)
+            p[type] = 100 * s->mb_types_stats[ctx][type][1];
+
+        for (type=0; type<10; type++) {
+            int p02, p34, p0234, p17, p56, p89, p5689, p156789;
+
+            /* conservative MB type probability */
+            s->mb_type_model[ctx][type][0] = 255 - (255 * s->mb_types_stats[ctx][type][0]) / (1 + s->mb_types_stats[ctx][type][0] + s->mb_types_stats[ctx][type][1]);
+
+            p[type] = 0;    /* same MB type => weight is null */
+
+            /* binary tree parsing probabilities */
+            p02 = p[0] + p[2];
+            p34 = p[3] + p[4];
+            p0234 = p02 + p34;
+            p17 = p[1] + p[7];
+            p56 = p[5] + p[6];
+            p89 = p[8] + p[9];
+            p5689 = p56 + p89;
+            p156789 = p17 + p5689;
+
+            s->mb_type_model[ctx][type][1] = 1 + 255 * p0234/(1+p0234+p156789);
+            s->mb_type_model[ctx][type][2] = 1 + 255 * p02  / (1+p0234);
+            s->mb_type_model[ctx][type][3] = 1 + 255 * p17  / (1+p156789);
+            s->mb_type_model[ctx][type][4] = 1 + 255 * p[0] / (1+p02);
+            s->mb_type_model[ctx][type][5] = 1 + 255 * p[3] / (1+p34);
+            s->mb_type_model[ctx][type][6] = 1 + 255 * p[1] / (1+p17);
+            s->mb_type_model[ctx][type][7] = 1 + 255 * p56  / (1+p5689);
+            s->mb_type_model[ctx][type][8] = 1 + 255 * p[5] / (1+p56);
+            s->mb_type_model[ctx][type][9] = 1 + 255 * p[8] / (1+p89);
+
+            /* restore initial value */
+            p[type] = 100 * s->mb_types_stats[ctx][type][1];
+        }
+    }
+}
+
+static vp56_mb_t vp56_parse_mb_type(vp56_context_t *s,
+                                    vp56_mb_t prev_type, int ctx)
+{
+    uint8_t *mb_type_model = s->mb_type_model[ctx][prev_type];
+    vp56_range_coder_t *c = &s->c;
+
+    if (vp56_rac_get_prob(c, mb_type_model[0]))
+        return prev_type;
+    else
+        return vp56_rac_get_tree(c, vp56_pmbt_tree, mb_type_model);
+}
+
+static void vp56_decode_4mv(vp56_context_t *s, int row, int col)
+{
+    vp56_mv_t mv = {0,0};
+    int type[4];
+    int b;
+
+    /* parse each block type */
+    for (b=0; b<4; b++) {
+        type[b] = vp56_rac_gets(&s->c, 2);
+        if (type[b])
+            type[b]++;  /* only returns 0, 2, 3 or 4 (all INTER_PF) */
+    }
+
+    /* get vectors */
+    for (b=0; b<4; b++) {
+        switch (type[b]) {
+            case VP56_MB_INTER_NOVEC_PF:
+                s->mv[b] = (vp56_mv_t) {0,0};
+                break;
+            case VP56_MB_INTER_DELTA_PF:
+                s->parse_vector_adjustment(s, &s->mv[b]);
+                break;
+            case VP56_MB_INTER_V1_PF:
+                s->mv[b] = s->vector_candidate[0];
+                break;
+            case VP56_MB_INTER_V2_PF:
+                s->mv[b] = s->vector_candidate[1];
+                break;
+        }
+        mv.x += s->mv[b].x;
+        mv.y += s->mv[b].y;
+    }
+
+    /* this is the one selected for the whole MB for prediction */
+    s->macroblocks[row * s->mb_width + col].mv = s->mv[3];
+
+    /* chroma vectors are average luma vectors */
+    if (s->avctx->codec->id == CODEC_ID_VP5) {
+        s->mv[4].x = s->mv[5].x = RSHIFT(mv.x,2);
+        s->mv[4].y = s->mv[5].y = RSHIFT(mv.y,2);
+    } else {
+        s->mv[4] = s->mv[5] = (vp56_mv_t) {mv.x/4, mv.y/4};
+    }
+}
+
+static vp56_mb_t vp56_decode_mv(vp56_context_t *s, int row, int col)
+{
+    vp56_mv_t *mv, vect = {0,0};
+    int ctx, b;
+
+    ctx = vp56_get_vectors_predictors(s, row, col, VP56_FRAME_PREVIOUS);
+    s->mb_type = vp56_parse_mb_type(s, s->mb_type, ctx);
+    s->macroblocks[row * s->mb_width + col].type = s->mb_type;
+
+    switch (s->mb_type) {
+        case VP56_MB_INTER_V1_PF:
+            mv = &s->vector_candidate[0];
+            break;
+
+        case VP56_MB_INTER_V2_PF:
+            mv = &s->vector_candidate[1];
+            break;
+
+        case VP56_MB_INTER_V1_GF:
+            vp56_get_vectors_predictors(s, row, col, VP56_FRAME_GOLDEN);
+            mv = &s->vector_candidate[0];
+            break;
+
+        case VP56_MB_INTER_V2_GF:
+            vp56_get_vectors_predictors(s, row, col, VP56_FRAME_GOLDEN);
+            mv = &s->vector_candidate[1];
+            break;
+
+        case VP56_MB_INTER_DELTA_PF:
+            s->parse_vector_adjustment(s, &vect);
+            mv = &vect;
+            break;
+
+        case VP56_MB_INTER_DELTA_GF:
+            vp56_get_vectors_predictors(s, row, col, VP56_FRAME_GOLDEN);
+            s->parse_vector_adjustment(s, &vect);
+            mv = &vect;
+            break;
+
+        case VP56_MB_INTER_4V:
+            vp56_decode_4mv(s, row, col);
+            return s->mb_type;
+
+        default:
+            mv = &vect;
+            break;
+    }
+
+    s->macroblocks[row*s->mb_width + col].mv = *mv;
+
+    /* same vector for all blocks */
+    for (b=0; b<6; b++)
+        s->mv[b] = *mv;
+
+    return s->mb_type;
+}
+
+static void vp56_add_predictors_dc(vp56_context_t *s, vp56_frame_t ref_frame)
+{
+    int idx = s->scantable.permutated[0];
+    int i;
+
+    for (i=0; i<6; i++) {
+        vp56_ref_dc_t *ab = &s->above_blocks[s->above_block_idx[i]];
+        vp56_ref_dc_t *lb = &s->left_block[vp56_b6to4[i]];
+        int count = 0;
+        int dc = 0;
+
+        if (ref_frame == lb->ref_frame) {
+            dc += lb->dc_coeff;
+            count++;
+        }
+        if (ref_frame == ab->ref_frame) {
+            dc += ab->dc_coeff;
+            count++;
+        }
+        if (s->avctx->codec->id == CODEC_ID_VP5) {
+            if (count < 2 && ref_frame == ab[-1].ref_frame) {
+                dc += ab[-1].dc_coeff;
+                count++;
+            }
+            if (count < 2 && ref_frame == ab[1].ref_frame) {
+                dc += ab[1].dc_coeff;
+                count++;
+            }
+        }
+        if (count == 0)
+            dc = s->prev_dc[vp56_b6to3[i]][ref_frame];
+        else if (count == 2)
+            dc /= 2;
+
+        s->block_coeff[i][idx] += dc;
+        s->prev_dc[vp56_b6to3[i]][ref_frame] = s->block_coeff[i][idx];
+        ab->dc_coeff = s->block_coeff[i][idx];
+        ab->ref_frame = ref_frame;
+        lb->dc_coeff = s->block_coeff[i][idx];
+        lb->ref_frame = ref_frame;
+        s->block_coeff[i][idx] *= s->dequant_dc;
+    }
+}
+
+static void vp56_edge_filter(vp56_context_t *s, uint8_t *yuv,
+                             int pix_inc, int line_inc, int t)
+{
+    int pix2_inc = 2 * pix_inc;
+    int i, v;
+
+    for (i=0; i<12; i++) {
+        v = (yuv[-pix2_inc] + 3*(yuv[0]-yuv[-pix_inc]) - yuv[pix_inc] + 4) >>3;
+        v = s->adjust(v, t);
+        yuv[-pix_inc] = clip_uint8(yuv[-pix_inc] + v);
+        yuv[0] = clip_uint8(yuv[0] - v);
+        yuv += line_inc;
+    }
+}
+
+static void vp56_deblock_filter(vp56_context_t *s, uint8_t *yuv,
+                                int stride, int dx, int dy)
+{
+    int t = vp56_filter_threshold[s->quantizer];
+    if (dx)  vp56_edge_filter(s, yuv +         10-dx ,      1, stride, t);
+    if (dy)  vp56_edge_filter(s, yuv + stride*(10-dy), stride,      1, t);
+}
+
+static void vp56_mc(vp56_context_t *s, int b, uint8_t *src,
+                    int stride, int x, int y)
+{
+    int plane = vp56_b6to3[b];
+    uint8_t *dst= s->frames[VP56_FRAME_CURRENT].data[plane]+s->block_offset[b];
+    uint8_t *src_block;
+    int src_offset;
+    int overlap_offset = 0;
+    int mask = s->vp56_coord_div[b] - 1;
+    int deblock_filtering = s->deblock_filtering;
+    int dx;
+    int dy;
+
+    if (s->avctx->skip_loop_filter >= AVDISCARD_ALL ||
+        (s->avctx->skip_loop_filter >= AVDISCARD_NONKEY
+         && !s->frames[VP56_FRAME_CURRENT].key_frame))
+        deblock_filtering = 0;
+
+    dx = s->mv[b].x / s->vp56_coord_div[b];
+    dy = s->mv[b].y / s->vp56_coord_div[b];
+
+    if (b >= 4) {
+        x /= 2;
+        y /= 2;
+    }
+    x += dx - 2;
+    y += dy - 2;
+
+    if (x<0 || x+12>=s->plane_width[plane] ||
+        y<0 || y+12>=s->plane_height[plane]) {
+        ff_emulated_edge_mc(s->edge_emu_buffer,
+                            src + s->block_offset[b] + (dy-2)*stride + (dx-2),
+                            stride, 12, 12, x, y,
+                            s->plane_width[plane],
+                            s->plane_height[plane]);
+        src_block = s->edge_emu_buffer;
+        src_offset = 2 + 2*stride;
+    } else if (deblock_filtering) {
+        /* only need a 12x12 block, but there is no such dsp function, */
+        /* so copy a 16x12 block */
+        s->dsp.put_pixels_tab[0][0](s->edge_emu_buffer,
+                                    src + s->block_offset[b] + (dy-2)*stride + (dx-2),
+                                    stride, 12);
+        src_block = s->edge_emu_buffer;
+        src_offset = 2 + 2*stride;
+    } else {
+        src_block = src;
+        src_offset = s->block_offset[b] + dy*stride + dx;
+    }
+
+    if (deblock_filtering)
+        vp56_deblock_filter(s, src_block, stride, dx&7, dy&7);
+
+    if (s->mv[b].x & mask)
+        overlap_offset += (s->mv[b].x > 0) ? 1 : -1;
+    if (s->mv[b].y & mask)
+        overlap_offset += (s->mv[b].y > 0) ? stride : -stride;
+
+    if (overlap_offset) {
+        if (s->filter)
+            s->filter(s, dst, src_block, src_offset, src_offset+overlap_offset,
+                      stride, s->mv[b], mask, s->filter_selection, b<4);
+        else
+            s->dsp.put_no_rnd_pixels_l2[1](dst, src_block+src_offset,
+                                           src_block+src_offset+overlap_offset,
+                                           stride, 8);
+    } else {
+        s->dsp.put_pixels_tab[1][0](dst, src_block+src_offset, stride, 8);
+    }
+}
+
+static void vp56_decode_mb(vp56_context_t *s, int row, int col)
+{
+    AVFrame *frame_current, *frame_ref;
+    vp56_mb_t mb_type;
+    vp56_frame_t ref_frame;
+    int b, plan, off;
+
+    if (s->frames[VP56_FRAME_CURRENT].key_frame)
+        mb_type = VP56_MB_INTRA;
+    else
+        mb_type = vp56_decode_mv(s, row, col);
+    ref_frame = vp56_reference_frame[mb_type];
+
+    memset(s->block_coeff, 0, sizeof(s->block_coeff));
+
+    s->parse_coeff(s);
+
+    vp56_add_predictors_dc(s, ref_frame);
+
+    frame_current = &s->frames[VP56_FRAME_CURRENT];
+    frame_ref = &s->frames[ref_frame];
+
+    switch (mb_type) {
+        case VP56_MB_INTRA:
+            for (b=0; b<6; b++) {
+                plan = vp56_b6to3[b];
+                s->dsp.idct_put(frame_current->data[plan] + s->block_offset[b],
+                                s->stride[plan], s->block_coeff[b]);
+            }
+            break;
+
+        case VP56_MB_INTER_NOVEC_PF:
+        case VP56_MB_INTER_NOVEC_GF:
+            for (b=0; b<6; b++) {
+                plan = vp56_b6to3[b];
+                off = s->block_offset[b];
+                s->dsp.put_pixels_tab[1][0](frame_current->data[plan] + off,
+                                            frame_ref->data[plan] + off,
+                                            s->stride[plan], 8);
+                s->dsp.idct_add(frame_current->data[plan] + off,
+                                s->stride[plan], s->block_coeff[b]);
+            }
+            break;
+
+        case VP56_MB_INTER_DELTA_PF:
+        case VP56_MB_INTER_V1_PF:
+        case VP56_MB_INTER_V2_PF:
+        case VP56_MB_INTER_DELTA_GF:
+        case VP56_MB_INTER_4V:
+        case VP56_MB_INTER_V1_GF:
+        case VP56_MB_INTER_V2_GF:
+            for (b=0; b<6; b++) {
+                int x_off = b==1 || b==3 ? 8 : 0;
+                int y_off = b==2 || b==3 ? 8 : 0;
+                plan = vp56_b6to3[b];
+                vp56_mc(s, b, frame_ref->data[plan], s->stride[plan],
+                        16*col+x_off, 16*row+y_off);
+                s->dsp.idct_add(frame_current->data[plan] + s->block_offset[b],
+                                s->stride[plan], s->block_coeff[b]);
+            }
+            break;
+    }
+}
+
+static int vp56_size_changed(AVCodecContext *avctx, vp56_context_t *s)
+{
+    int stride = s->frames[VP56_FRAME_CURRENT].linesize[0];
+    int i;
+
+    s->plane_width[0] = s->avctx->coded_width;
+    s->plane_width[1] = s->plane_width[2] = s->avctx->coded_width/2;
+    s->plane_height[0] = s->avctx->coded_height;
+    s->plane_height[1] = s->plane_height[2] = s->avctx->coded_height/2;
+
+    for (i=0; i<3; i++)
+        s->stride[i] = s->flip * s->frames[VP56_FRAME_CURRENT].linesize[i];
+
+    s->mb_width = (s->avctx->coded_width+15) / 16;
+    s->mb_height = (s->avctx->coded_height+15) / 16;
+
+    if (s->mb_width > 1000 || s->mb_height > 1000) {
+        av_log(avctx, AV_LOG_ERROR, "picture too big\n");
+        return -1;
+    }
+
+    s->above_blocks = av_realloc(s->above_blocks,
+                                 (4*s->mb_width+6) * sizeof(*s->above_blocks));
+    s->macroblocks = av_realloc(s->macroblocks,
+                                s->mb_width*s->mb_height*sizeof(*s->macroblocks));
+    av_free(s->edge_emu_buffer_alloc);
+    s->edge_emu_buffer_alloc = av_malloc(16*stride);
+    s->edge_emu_buffer = s->edge_emu_buffer_alloc;
+    if (s->flip < 0)
+        s->edge_emu_buffer += 15 * stride;
+
+    return 0;
+}
+
+int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+                      uint8_t *buf, int buf_size)
+{
+    vp56_context_t *s = avctx->priv_data;
+    AVFrame *const p = &s->frames[VP56_FRAME_CURRENT];
+    AVFrame *picture = data;
+    int mb_row, mb_col, mb_row_flip, mb_offset = 0;
+    int block, y, uv, stride_y, stride_uv;
+    int golden_frame = 0;
+    int res;
+
+    res = s->parse_header(s, buf, buf_size, &golden_frame);
+    if (!res)
+        return -1;
+
+    p->reference = 1;
+    if (avctx->get_buffer(avctx, p) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    if (res == 2)
+        if (vp56_size_changed(avctx, s)) {
+            avctx->release_buffer(avctx, p);
+            return -1;
+        }
+
+    if (p->key_frame) {
+        p->pict_type = FF_I_TYPE;
+        s->default_models_init(s);
+        for (block=0; block<s->mb_height*s->mb_width; block++)
+            s->macroblocks[block].type = VP56_MB_INTRA;
+    } else {
+        p->pict_type = FF_P_TYPE;
+        vp56_parse_mb_type_models(s);
+        s->parse_vector_models(s);
+        s->mb_type = VP56_MB_INTER_NOVEC_PF;
+    }
+
+    s->parse_coeff_models(s);
+
+    memset(s->prev_dc, 0, sizeof(s->prev_dc));
+    s->prev_dc[1][VP56_FRAME_CURRENT] = 128;
+    s->prev_dc[2][VP56_FRAME_CURRENT] = 128;
+
+    for (block=0; block < 4*s->mb_width+6; block++) {
+        s->above_blocks[block].ref_frame = -1;
+        s->above_blocks[block].dc_coeff = 0;
+        s->above_blocks[block].not_null_dc = 0;
+    }
+    s->above_blocks[2*s->mb_width + 2].ref_frame = 0;
+    s->above_blocks[3*s->mb_width + 4].ref_frame = 0;
+
+    stride_y  = p->linesize[0];
+    stride_uv = p->linesize[1];
+
+    if (s->flip < 0)
+        mb_offset = 7;
+
+    /* main macroblocks loop */
+    for (mb_row=0; mb_row<s->mb_height; mb_row++) {
+        if (s->flip < 0)
+            mb_row_flip = s->mb_height - mb_row - 1;
+        else
+            mb_row_flip = mb_row;
+
+        for (block=0; block<4; block++) {
+            s->left_block[block].ref_frame = -1;
+            s->left_block[block].dc_coeff = 0;
+            s->left_block[block].not_null_dc = 0;
+            memset(s->coeff_ctx[block], 0, 64*sizeof(s->coeff_ctx[block][0]));
+        }
+        memset(s->coeff_ctx_last, 24, sizeof(s->coeff_ctx_last));
+
+        s->above_block_idx[0] = 1;
+        s->above_block_idx[1] = 2;
+        s->above_block_idx[2] = 1;
+        s->above_block_idx[3] = 2;
+        s->above_block_idx[4] = 2*s->mb_width + 2 + 1;
+        s->above_block_idx[5] = 3*s->mb_width + 4 + 1;
+
+        s->block_offset[s->frbi] = (mb_row_flip*16 + mb_offset) * stride_y;
+        s->block_offset[s->srbi] = s->block_offset[s->frbi] + 8*stride_y;
+        s->block_offset[1] = s->block_offset[0] + 8;
+        s->block_offset[3] = s->block_offset[2] + 8;
+        s->block_offset[4] = (mb_row_flip*8 + mb_offset) * stride_uv;
+        s->block_offset[5] = s->block_offset[4];
+
+        for (mb_col=0; mb_col<s->mb_width; mb_col++) {
+            vp56_decode_mb(s, mb_row, mb_col);
+
+            for (y=0; y<4; y++) {
+                s->above_block_idx[y] += 2;
+                s->block_offset[y] += 16;
+            }
+
+            for (uv=4; uv<6; uv++) {
+                s->above_block_idx[uv] += 1;
+                s->block_offset[uv] += 8;
+            }
+        }
+    }
+
+    if (s->frames[VP56_FRAME_PREVIOUS].data[0]
+        && (s->frames[VP56_FRAME_PREVIOUS].data[0]
+            != s->frames[VP56_FRAME_GOLDEN].data[0])) {
+        avctx->release_buffer(avctx, &s->frames[VP56_FRAME_PREVIOUS]);
+    }
+    if (p->key_frame || golden_frame) {
+        if (s->frames[VP56_FRAME_GOLDEN].data[0])
+            avctx->release_buffer(avctx, &s->frames[VP56_FRAME_GOLDEN]);
+        s->frames[VP56_FRAME_GOLDEN] = *p;
+    }
+    s->frames[VP56_FRAME_PREVIOUS] = *p;
+
+    *picture = *p;
+    *data_size = sizeof(AVPicture);
+
+    return buf_size;
+}
+
+void vp56_init(vp56_context_t *s, AVCodecContext *avctx, int flip)
+{
+    int i;
+
+    s->avctx = avctx;
+    avctx->pix_fmt = PIX_FMT_YUV420P;
+
+    if (s->avctx->idct_algo == FF_IDCT_AUTO)
+        s->avctx->idct_algo = FF_IDCT_VP3;
+    dsputil_init(&s->dsp, s->avctx);
+    ff_init_scantable(s->dsp.idct_permutation, &s->scantable,ff_zigzag_direct);
+
+    avcodec_set_dimensions(s->avctx, 0, 0);
+
+    for (i=0; i<3; i++)
+        s->frames[i].data[0] = NULL;
+    s->edge_emu_buffer_alloc = NULL;
+
+    s->above_blocks = NULL;
+    s->macroblocks = NULL;
+    s->quantizer = -1;
+    s->deblock_filtering = 1;
+
+    s->filter = NULL;
+
+    if (flip) {
+        s->flip = -1;
+        s->frbi = 2;
+        s->srbi = 0;
+    } else {
+        s->flip = 1;
+        s->frbi = 0;
+        s->srbi = 2;
+    }
+}
+
+int vp56_free(AVCodecContext *avctx)
+{
+    vp56_context_t *s = avctx->priv_data;
+
+    av_free(s->above_blocks);
+    av_free(s->macroblocks);
+    av_free(s->edge_emu_buffer_alloc);
+    if (s->frames[VP56_FRAME_GOLDEN].data[0]
+        && (s->frames[VP56_FRAME_PREVIOUS].data[0]
+            != s->frames[VP56_FRAME_GOLDEN].data[0]))
+        avctx->release_buffer(avctx, &s->frames[VP56_FRAME_GOLDEN]);
+    if (s->frames[VP56_FRAME_PREVIOUS].data[0])
+        avctx->release_buffer(avctx, &s->frames[VP56_FRAME_PREVIOUS]);
+    return 0;
+}
diff --git a/src/libffmpeg/libavcodec/vp56.h b/src/libffmpeg/libavcodec/vp56.h
new file mode 100644
index 000000000..f8b3a8e4b
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp56.h
@@ -0,0 +1,249 @@
+/**
+ * @file vp56.h
+ * VP5 and VP6 compatible video decoder (common features)
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef VP56_H
+#define VP56_H
+
+#include "vp56data.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+
+typedef struct vp56_context vp56_context_t;
+typedef struct vp56_mv vp56_mv_t;
+
+typedef void (*vp56_parse_vector_adjustment_t)(vp56_context_t *s,
+                                               vp56_mv_t *vect);
+typedef int (*vp56_adjust_t)(int v, int t);
+typedef void (*vp56_filter_t)(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+                              int offset1, int offset2, int stride,
+                              vp56_mv_t mv, int mask, int select, int luma);
+typedef void (*vp56_parse_coeff_t)(vp56_context_t *s);
+typedef void (*vp56_default_models_init_t)(vp56_context_t *s);
+typedef void (*vp56_parse_vector_models_t)(vp56_context_t *s);
+typedef void (*vp56_parse_coeff_models_t)(vp56_context_t *s);
+typedef int (*vp56_parse_header_t)(vp56_context_t *s, uint8_t *buf,
+                                   int buf_size, int *golden_frame);
+
+typedef struct {
+    int high;
+    int bits;
+    const uint8_t *buffer;
+    unsigned long code_word;
+} vp56_range_coder_t;
+
+typedef struct {
+    uint8_t not_null_dc;
+    vp56_frame_t ref_frame;
+    DCTELEM dc_coeff;
+} vp56_ref_dc_t;
+
+struct vp56_mv {
+    int x;
+    int y;
+};
+
+typedef struct {
+    uint8_t type;
+    vp56_mv_t mv;
+} vp56_macroblock_t;
+
+struct vp56_context {
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    ScanTable scantable;
+    AVFrame frames[3];
+    uint8_t *edge_emu_buffer_alloc;
+    uint8_t *edge_emu_buffer;
+    vp56_range_coder_t c;
+    int sub_version;
+
+    /* frame info */
+    int plane_width[3];
+    int plane_height[3];
+    int mb_width;   /* number of horizontal MB */
+    int mb_height;  /* number of vertical MB */
+    int block_offset[6];
+
+    int quantizer;
+    uint16_t dequant_dc;
+    uint16_t dequant_ac;
+
+    /* DC predictors management */
+    vp56_ref_dc_t *above_blocks;
+    vp56_ref_dc_t left_block[4];
+    int above_block_idx[6];
+    DCTELEM prev_dc[3][3];    /* [plan][ref_frame] */
+
+    /* blocks / macroblock */
+    vp56_mb_t mb_type;
+    vp56_macroblock_t *macroblocks;
+    DECLARE_ALIGNED_16(DCTELEM, block_coeff[6][64]);
+    uint8_t coeff_reorder[64];       /* used in vp6 only */
+    uint8_t coeff_index_to_pos[64];  /* used in vp6 only */
+
+    /* motion vectors */
+    vp56_mv_t mv[6];  /* vectors for each block in MB */
+    vp56_mv_t vector_candidate[2];
+    int vector_candidate_pos;
+
+    /* filtering hints */
+    int deblock_filtering;
+    int filter_selection;
+    int filter_mode;
+    int max_vector_length;
+    int sample_variance_threshold;
+
+    /* AC models */
+    uint8_t vector_model_sig[2];           /* delta sign */
+    uint8_t vector_model_dct[2];           /* delta coding types */
+    uint8_t vector_model_pdi[2][2];        /* predefined delta init */
+    uint8_t vector_model_pdv[2][7];        /* predefined delta values */
+    uint8_t vector_model_fdv[2][8];        /* 8 bit delta value definition */
+    uint8_t mb_type_model[3][10][10];      /* model for decoding MB type */
+    uint8_t coeff_model_dccv[2][11];       /* DC coeff value */
+    uint8_t coeff_model_ract[2][3][6][11]; /* Run/AC coding type and AC coeff value */
+    uint8_t coeff_model_acct[2][3][3][6][5];/* vp5 only AC coding type for coding group < 3 */
+    uint8_t coeff_model_dcct[2][36][5];    /* DC coeff coding type */
+    uint8_t coeff_model_runv[2][14];       /* run value (vp6 only) */
+    uint8_t mb_types_stats[3][10][2];      /* contextual, next MB type stats */
+    uint8_t coeff_ctx[4][64];              /* used in vp5 only */
+    uint8_t coeff_ctx_last[4];             /* used in vp5 only */
+
+    /* upside-down flipping hints */
+    int flip;  /* are we flipping ? */
+    int frbi;  /* first row block index in MB */
+    int srbi;  /* second row block index in MB */
+    int stride[3];  /* stride for each plan */
+
+    const uint8_t *vp56_coord_div;
+    vp56_parse_vector_adjustment_t parse_vector_adjustment;
+    vp56_adjust_t adjust;
+    vp56_filter_t filter;
+    vp56_parse_coeff_t parse_coeff;
+    vp56_default_models_init_t default_models_init;
+    vp56_parse_vector_models_t parse_vector_models;
+    vp56_parse_coeff_models_t parse_coeff_models;
+    vp56_parse_header_t parse_header;
+};
+
+
+void vp56_init(vp56_context_t *s, AVCodecContext *avctx, int flip);
+int vp56_free(AVCodecContext *avctx);
+void vp56_init_dequant(vp56_context_t *s, int quantizer);
+int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+                      uint8_t *buf, int buf_size);
+
+
+/**
+ * vp56 specific range coder implementation
+ */
+
+static inline void vp56_init_range_decoder(vp56_range_coder_t *c,
+                                           const uint8_t *buf, int buf_size)
+{
+    c->high = 255;
+    c->bits = 8;
+    c->buffer = buf;
+    c->code_word = *c->buffer++ << 8;
+    c->code_word |= *c->buffer++;
+}
+
+static inline int vp56_rac_get_prob(vp56_range_coder_t *c, uint8_t prob)
+{
+    unsigned int low = 1 + (((c->high - 1) * prob) / 256);
+    unsigned int low_shift = low << 8;
+    int bit = c->code_word >= low_shift;
+
+    if (bit) {
+        c->high -= low;
+        c->code_word -= low_shift;
+    } else {
+        c->high = low;
+    }
+
+    /* normalize */
+    while (c->high < 128) {
+        c->high <<= 1;
+        c->code_word <<= 1;
+        if (--c->bits == 0) {
+            c->bits = 8;
+            c->code_word |= *c->buffer++;
+        }
+    }
+    return bit;
+}
+
+static inline int vp56_rac_get(vp56_range_coder_t *c)
+{
+    /* equiprobable */
+    int low = (c->high + 1) >> 1;
+    unsigned int low_shift = low << 8;
+    int bit = c->code_word >= low_shift;
+    if (bit) {
+        c->high = (c->high - low) << 1;
+        c->code_word -= low_shift;
+    } else {
+        c->high = low << 1;
+    }
+
+    /* normalize */
+    c->code_word <<= 1;
+    if (--c->bits == 0) {
+        c->bits = 8;
+        c->code_word |= *c->buffer++;
+    }
+    return bit;
+}
+
+static inline int vp56_rac_gets(vp56_range_coder_t *c, int bits)
+{
+    int value = 0;
+
+    while (bits--) {
+        value = (value << 1) | vp56_rac_get(c);
+    }
+
+    return value;
+}
+
+static inline int vp56_rac_gets_nn(vp56_range_coder_t *c, int bits)
+{
+    int v = vp56_rac_gets(c, 7) << 1;
+    return v + !v;
+}
+
+static inline int vp56_rac_get_tree(vp56_range_coder_t *c,
+                                    const vp56_tree_t *tree,
+                                    const uint8_t *probs)
+{
+    while (tree->val > 0) {
+        if (vp56_rac_get_prob(c, probs[tree->prob_idx]))
+            tree += tree->val;
+        else
+            tree++;
+    }
+    return -tree->val;
+}
+
+#endif /* VP56_H */
diff --git a/src/libffmpeg/libavcodec/vp56data.c b/src/libffmpeg/libavcodec/vp56data.c
new file mode 100644
index 000000000..e75c6d1ce
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp56data.c
@@ -0,0 +1,66 @@
+/**
+ * @file vp56data.c
+ * VP5 and VP6 compatible video decoder (common data)
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "vp56data.h"
+
+const uint8_t vp56_b6to3[] = { 0, 0, 0, 0, 1, 2 };
+const uint8_t vp56_b6to4[] = { 0, 0, 1, 1, 2, 3 };
+
+const uint8_t vp56_coeff_parse_table[6][11] = {
+    { 159,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0 },
+    { 145, 165,   0,   0,   0,   0,   0,   0,   0,   0,   0 },
+    { 140, 148, 173,   0,   0,   0,   0,   0,   0,   0,   0 },
+    { 135, 140, 155, 176,   0,   0,   0,   0,   0,   0,   0 },
+    { 130, 134, 141, 157, 180,   0,   0,   0,   0,   0,   0 },
+    { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254 },
+};
+
+const uint8_t vp56_def_mb_types_stats[3][10][2] = {
+    { {  69, 42 }, {   1,  2 }, {  1,   7 }, {  44, 42 }, {  6, 22 },
+      {   1,  3 }, {   0,  2 }, {  1,   5 }, {   0,  1 }, {  0,  0 }, },
+    { { 229,  8 }, {   1,  1 }, {  0,   8 }, {   0,  0 }, {  0,  0 },
+      {   1,  2 }, {   0,  1 }, {  0,   0 }, {   1,  1 }, {  0,  0 }, },
+    { { 122, 35 }, {   1,  1 }, {  1,   6 }, {  46, 34 }, {  0,  0 },
+      {   1,  2 }, {   0,  1 }, {  0,   1 }, {   1,  1 }, {  0,  0 }, },
+};
+
+const vp56_tree_t vp56_pva_tree[] = {
+    { 8, 0},
+    { 4, 1},
+    { 2, 2}, {-0}, {-1},
+    { 2, 3}, {-2}, {-3},
+    { 4, 4},
+    { 2, 5}, {-4}, {-5},
+    { 2, 6}, {-6}, {-7},
+};
+
+const vp56_tree_t vp56_pc_tree[] = {
+    { 4, 6},
+    { 2, 7}, {-0}, {-1},
+    { 4, 8},
+    { 2, 9}, {-2}, {-3},
+    { 2,10}, {-4}, {-5},
+};
+
+const uint8_t vp56_coeff_bias[] = { 5, 7, 11, 19, 35, 67 };
+const uint8_t vp56_coeff_bit_length[] = { 0, 1, 2, 3, 4, 10 };
diff --git a/src/libffmpeg/libavcodec/vp56data.h b/src/libffmpeg/libavcodec/vp56data.h
new file mode 100644
index 000000000..dbf92dd68
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp56data.h
@@ -0,0 +1,248 @@
+/**
+ * @file vp56data.h
+ * VP5 and VP6 compatible video decoder (common data)
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef VP56DATA_H
+#define VP56DATA_H
+
+#include "common.h"
+
+typedef enum {
+    VP56_FRAME_CURRENT  = 0,
+    VP56_FRAME_PREVIOUS = 1,
+    VP56_FRAME_GOLDEN   = 2,
+} vp56_frame_t;
+
+typedef enum {
+    VP56_MB_INTER_NOVEC_PF = 0,  /**< Inter MB, no vector, from previous frame */
+    VP56_MB_INTRA          = 1,  /**< Intra MB */
+    VP56_MB_INTER_DELTA_PF = 2,  /**< Inter MB, above/left vector + delta, from previous frame */
+    VP56_MB_INTER_V1_PF    = 3,  /**< Inter MB, first vector, from previous frame */
+    VP56_MB_INTER_V2_PF    = 4,  /**< Inter MB, second vector, from previous frame */
+    VP56_MB_INTER_NOVEC_GF = 5,  /**< Inter MB, no vector, from golden frame */
+    VP56_MB_INTER_DELTA_GF = 6,  /**< Inter MB, above/left vector + delta, from golden frame */
+    VP56_MB_INTER_4V       = 7,  /**< Inter MB, 4 vectors, from previous frame */
+    VP56_MB_INTER_V1_GF    = 8,  /**< Inter MB, first vector, from golden frame */
+    VP56_MB_INTER_V2_GF    = 9,  /**< Inter MB, second vector, from golden frame */
+} vp56_mb_t;
+
+typedef struct {
+  int8_t val;
+  int8_t prob_idx;
+} vp56_tree_t;
+
+extern const uint8_t vp56_b6to3[];
+extern const uint8_t vp56_b6to4[];
+extern const uint8_t vp56_coeff_parse_table[6][11];
+extern const uint8_t vp56_def_mb_types_stats[3][10][2];
+extern const vp56_tree_t vp56_pva_tree[];
+extern const vp56_tree_t vp56_pc_tree[];
+extern const uint8_t vp56_coeff_bias[];
+extern const uint8_t vp56_coeff_bit_length[];
+
+static const vp56_frame_t vp56_reference_frame[] = {
+    VP56_FRAME_PREVIOUS,  /* VP56_MB_INTER_NOVEC_PF */
+    VP56_FRAME_CURRENT,   /* VP56_MB_INTRA */
+    VP56_FRAME_PREVIOUS,  /* VP56_MB_INTER_DELTA_PF */
+    VP56_FRAME_PREVIOUS,  /* VP56_MB_INTER_V1_PF */
+    VP56_FRAME_PREVIOUS,  /* VP56_MB_INTER_V2_PF */
+    VP56_FRAME_GOLDEN,    /* VP56_MB_INTER_NOVEC_GF */
+    VP56_FRAME_GOLDEN,    /* VP56_MB_INTER_DELTA_GF */
+    VP56_FRAME_PREVIOUS,  /* VP56_MB_INTER_4V */
+    VP56_FRAME_GOLDEN,    /* VP56_MB_INTER_V1_GF */
+    VP56_FRAME_GOLDEN,    /* VP56_MB_INTER_V2_GF */
+};
+
+static const uint8_t vp56_ac_dequant[64] = {
+    94, 92, 90, 88, 86, 82, 78, 74,
+    70, 66, 62, 58, 54, 53, 52, 51,
+    50, 49, 48, 47, 46, 45, 44, 43,
+    42, 40, 39, 37, 36, 35, 34, 33,
+    32, 31, 30, 29, 28, 27, 26, 25,
+    24, 23, 22, 21, 20, 19, 18, 17,
+    16, 15, 14, 13, 12, 11, 10,  9,
+     8,  7,  6,  5,  4,  3,  2,  1,
+};
+
+static const uint8_t vp56_dc_dequant[64] = {
+    47, 47, 47, 47, 45, 43, 43, 43,
+    43, 43, 42, 41, 41, 40, 40, 40,
+    40, 35, 35, 35, 35, 33, 33, 33,
+    33, 32, 32, 32, 27, 27, 26, 26,
+    25, 25, 24, 24, 23, 23, 19, 19,
+    19, 19, 18, 18, 17, 16, 16, 16,
+    16, 16, 15, 11, 11, 11, 10, 10,
+     9,  8,  7,  5,  3,  3,  2,  2,
+};
+
+static const uint8_t vp56_pre_def_mb_type_stats[16][3][10][2] = {
+  { { {   9, 15 }, {  32, 25 }, {  7,  19 }, {   9, 21 }, {  1, 12 },
+      {  14, 12 }, {   3, 18 }, { 14,  23 }, {   3, 10 }, {  0,  4 }, },
+    { {  41, 22 }, {   1,  0 }, {  1,  31 }, {   0,  0 }, {  0,  0 },
+      {   0,  1 }, {   1,  7 }, {  0,   1 }, {  98, 25 }, {  4, 10 }, },
+    { {   2,  3 }, {   2,  3 }, {  0,   2 }, {   0,  2 }, {  0,  0 },
+      {  11,  4 }, {   1,  4 }, {  0,   2 }, {   3,  2 }, {  0,  4 }, }, },
+  { { {  48, 39 }, {   1,  2 }, { 11,  27 }, {  29, 44 }, {  7, 27 },
+      {   1,  4 }, {   0,  3 }, {  1,   6 }, {   1,  2 }, {  0,  0 }, },
+    { { 123, 37 }, {   6,  4 }, {  1,  27 }, {   0,  0 }, {  0,  0 },
+      {   5,  8 }, {   1,  7 }, {  0,   1 }, {  12, 10 }, {  0,  2 }, },
+    { {  49, 46 }, {   3,  4 }, {  7,  31 }, {  42, 41 }, {  0,  0 },
+      {   2,  6 }, {   1,  7 }, {  1,   4 }, {   2,  4 }, {  0,  1 }, }, },
+  { { {  21, 32 }, {   1,  2 }, {  4,  10 }, {  32, 43 }, {  6, 23 },
+      {   2,  3 }, {   1, 19 }, {  1,   6 }, {  12, 21 }, {  0,  7 }, },
+    { {  26, 14 }, {  14, 12 }, {  0,  24 }, {   0,  0 }, {  0,  0 },
+      {  55, 17 }, {   1,  9 }, {  0,  36 }, {   5,  7 }, {  1,  3 }, },
+    { {  26, 25 }, {   1,  1 }, {  2,  10 }, {  67, 39 }, {  0,  0 },
+      {   1,  1 }, {   0, 14 }, {  0,   2 }, {  31, 26 }, {  1,  6 }, }, },
+  { { {  69, 83 }, {   0,  0 }, {  0,   2 }, {  10, 29 }, {  3, 12 },
+      {   0,  1 }, {   0,  3 }, {  0,   3 }, {   2,  2 }, {  0,  0 }, },
+    { { 209,  5 }, {   0,  0 }, {  0,  27 }, {   0,  0 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   0,  0 }, {  0,  0 }, },
+    { { 103, 46 }, {   1,  2 }, {  2,  10 }, {  33, 42 }, {  0,  0 },
+      {   1,  4 }, {   0,  3 }, {  0,   1 }, {   1,  3 }, {  0,  0 }, }, },
+  { { {  11, 20 }, {   1,  4 }, { 18,  36 }, {  43, 48 }, { 13, 35 },
+      {   0,  2 }, {   0,  5 }, {  3,  12 }, {   1,  2 }, {  0,  0 }, },
+    { {   2,  5 }, {   4,  5 }, {  0, 121 }, {   0,  0 }, {  0,  0 },
+      {   0,  3 }, {   2,  4 }, {  1,   4 }, {   2,  2 }, {  0,  1 }, },
+    { {  14, 31 }, {   9, 13 }, { 14,  54 }, {  22, 29 }, {  0,  0 },
+      {   2,  6 }, {   4, 18 }, {  6,  13 }, {   1,  5 }, {  0,  1 }, }, },
+  { { {  70, 44 }, {   0,  1 }, {  2,  10 }, {  37, 46 }, {  8, 26 },
+      {   0,  2 }, {   0,  2 }, {  0,   2 }, {   0,  1 }, {  0,  0 }, },
+    { { 175,  5 }, {   0,  1 }, {  0,  48 }, {   0,  0 }, {  0,  0 },
+      {   0,  2 }, {   0,  1 }, {  0,   2 }, {   0,  1 }, {  0,  0 }, },
+    { {  85, 39 }, {   0,  0 }, {  1,   9 }, {  69, 40 }, {  0,  0 },
+      {   0,  1 }, {   0,  3 }, {  0,   1 }, {   2,  3 }, {  0,  0 }, }, },
+  { { {   8, 15 }, {   0,  1 }, {  8,  21 }, {  74, 53 }, { 22, 42 },
+      {   0,  1 }, {   0,  2 }, {  0,   3 }, {   1,  2 }, {  0,  0 }, },
+    { {  83,  5 }, {   2,  3 }, {  0, 102 }, {   0,  0 }, {  0,  0 },
+      {   1,  3 }, {   0,  2 }, {  0,   1 }, {   0,  0 }, {  0,  0 }, },
+    { {  31, 28 }, {   0,  0 }, {  3,  14 }, { 130, 34 }, {  0,  0 },
+      {   0,  1 }, {   0,  3 }, {  0,   1 }, {   3,  3 }, {  0,  1 }, }, },
+  { { { 141, 42 }, {   0,  0 }, {  1,   4 }, {  11, 24 }, {  1, 11 },
+      {   0,  1 }, {   0,  1 }, {  0,   2 }, {   0,  0 }, {  0,  0 }, },
+    { { 233,  6 }, {   0,  0 }, {  0,   8 }, {   0,  0 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   0 }, {   0,  1 }, {  0,  0 }, },
+    { { 171, 25 }, {   0,  0 }, {  1,   5 }, {  25, 21 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   0 }, {   0,  0 }, {  0,  0 }, }, },
+  { { {   8, 19 }, {   4, 10 }, { 24,  45 }, {  21, 37 }, {  9, 29 },
+      {   0,  3 }, {   1,  7 }, { 11,  25 }, {   0,  2 }, {  0,  1 }, },
+    { {  34, 16 }, { 112, 21 }, {  1,  28 }, {   0,  0 }, {  0,  0 },
+      {   6,  8 }, {   1,  7 }, {  0,   3 }, {   2,  5 }, {  0,  2 }, },
+    { {  17, 21 }, {  68, 29 }, {  6,  15 }, {  13, 22 }, {  0,  0 },
+      {   6, 12 }, {   3, 14 }, {  4,  10 }, {   1,  7 }, {  0,  3 }, }, },
+  { { {  46, 42 }, {   0,  1 }, {  2,  10 }, {  54, 51 }, { 10, 30 },
+      {   0,  2 }, {   0,  2 }, {  0,   1 }, {   0,  1 }, {  0,  0 }, },
+    { { 159, 35 }, {   2,  2 }, {  0,  25 }, {   0,  0 }, {  0,  0 },
+      {   3,  6 }, {   0,  5 }, {  0,   1 }, {   4,  4 }, {  0,  1 }, },
+    { {  51, 39 }, {   0,  1 }, {  2,  12 }, {  91, 44 }, {  0,  0 },
+      {   0,  2 }, {   0,  3 }, {  0,   1 }, {   2,  3 }, {  0,  1 }, }, },
+  { { {  28, 32 }, {   0,  0 }, {  3,  10 }, {  75, 51 }, { 14, 33 },
+      {   0,  1 }, {   0,  2 }, {  0,   1 }, {   1,  2 }, {  0,  0 }, },
+    { {  75, 39 }, {   5,  7 }, {  2,  48 }, {   0,  0 }, {  0,  0 },
+      {   3, 11 }, {   2, 16 }, {  1,   4 }, {   7, 10 }, {  0,  2 }, },
+    { {  81, 25 }, {   0,  0 }, {  2,   9 }, { 106, 26 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   1,  1 }, {  0,  0 }, }, },
+  { { { 100, 46 }, {   0,  1 }, {  3,   9 }, {  21, 37 }, {  5, 20 },
+      {   0,  1 }, {   0,  2 }, {  1,   2 }, {   0,  1 }, {  0,  0 }, },
+    { { 212, 21 }, {   0,  1 }, {  0,   9 }, {   0,  0 }, {  0,  0 },
+      {   1,  2 }, {   0,  2 }, {  0,   0 }, {   2,  2 }, {  0,  0 }, },
+    { { 140, 37 }, {   0,  1 }, {  1,   8 }, {  24, 33 }, {  0,  0 },
+      {   1,  2 }, {   0,  2 }, {  0,   1 }, {   1,  2 }, {  0,  0 }, }, },
+  { { {  27, 29 }, {   0,  1 }, {  9,  25 }, {  53, 51 }, { 12, 34 },
+      {   0,  1 }, {   0,  3 }, {  1,   5 }, {   0,  2 }, {  0,  0 }, },
+    { {   4,  2 }, {   0,  0 }, {  0, 172 }, {   0,  0 }, {  0,  0 },
+      {   0,  1 }, {   0,  2 }, {  0,   0 }, {   2,  0 }, {  0,  0 }, },
+    { {  14, 23 }, {   1,  3 }, { 11,  53 }, {  90, 31 }, {  0,  0 },
+      {   0,  3 }, {   1,  5 }, {  2,   6 }, {   1,  2 }, {  0,  0 }, }, },
+  { { {  80, 38 }, {   0,  0 }, {  1,   4 }, {  69, 33 }, {  5, 16 },
+      {   0,  1 }, {   0,  1 }, {  0,   0 }, {   0,  1 }, {  0,  0 }, },
+    { { 187, 22 }, {   1,  1 }, {  0,  17 }, {   0,  0 }, {  0,  0 },
+      {   3,  6 }, {   0,  4 }, {  0,   1 }, {   4,  4 }, {  0,  1 }, },
+    { { 123, 29 }, {   0,  0 }, {  1,   7 }, {  57, 30 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   0,  1 }, {  0,  0 }, }, },
+  { { {  16, 20 }, {   0,  0 }, {  2,   8 }, { 104, 49 }, { 15, 33 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   1,  1 }, {  0,  0 }, },
+    { { 133,  6 }, {   1,  2 }, {  1,  70 }, {   0,  0 }, {  0,  0 },
+      {   0,  2 }, {   0,  4 }, {  0,   3 }, {   1,  1 }, {  0,  0 }, },
+    { {  13, 14 }, {   0,  0 }, {  4,  20 }, { 175, 20 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   1,  1 }, {  0,  0 }, }, },
+  { { { 194, 16 }, {   0,  0 }, {  1,   1 }, {   1,  9 }, {  1,  3 },
+      {   0,  0 }, {   0,  1 }, {  0,   1 }, {   0,  0 }, {  0,  0 }, },
+    { { 251,  1 }, {   0,  0 }, {  0,   2 }, {   0,  0 }, {  0,  0 },
+      {   0,  0 }, {   0,  0 }, {  0,   0 }, {   0,  0 }, {  0,  0 }, },
+    { { 202, 23 }, {   0,  0 }, {  1,   3 }, {   2,  9 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   0,  0 }, {  0,  0 }, }, },
+};
+
+static const uint8_t vp56_filter_threshold[] = {
+    14, 14, 13, 13, 12, 12, 10, 10,
+    10, 10,  8,  8,  8,  8,  8,  8,
+     8,  8,  8,  8,  8,  8,  8,  8,
+     8,  8,  8,  8,  8,  8,  8,  8,
+     8,  8,  8,  8,  7,  7,  7,  7,
+     7,  7,  6,  6,  6,  6,  6,  6,
+     5,  5,  5,  5,  4,  4,  4,  4,
+     4,  4,  4,  3,  3,  3,  3,  2,
+};
+
+static const uint8_t vp56_mb_type_model_model[] = {
+    171, 83, 199, 140, 125, 104,
+};
+
+static const vp56_tree_t vp56_pmbtm_tree[] = {
+    { 4, 0},
+    { 2, 1}, {-8}, {-4},
+    { 8, 2},
+    { 6, 3},
+    { 4, 4},
+    { 2, 5}, {-24}, {-20}, {-16}, {-12}, {-0},
+};
+
+static const vp56_tree_t vp56_pmbt_tree[] = {
+    { 8, 1},
+    { 4, 2},
+    { 2, 4}, {-VP56_MB_INTER_NOVEC_PF}, {-VP56_MB_INTER_DELTA_PF},
+    { 2, 5}, {-VP56_MB_INTER_V1_PF},    {-VP56_MB_INTER_V2_PF},
+    { 4, 3},
+    { 2, 6}, {-VP56_MB_INTRA},          {-VP56_MB_INTER_4V},
+    { 4, 7},
+    { 2, 8}, {-VP56_MB_INTER_NOVEC_GF}, {-VP56_MB_INTER_DELTA_GF},
+    { 2, 9}, {-VP56_MB_INTER_V1_GF},    {-VP56_MB_INTER_V2_GF},
+};
+
+/* relative pos of surrounding blocks, from closest to farthest */
+static const int8_t vp56_candidate_predictor_pos[12][2] = {
+    {  0, -1 },
+    { -1,  0 },
+    { -1, -1 },
+    {  1, -1 },
+    {  0, -2 },
+    { -2,  0 },
+    { -2, -1 },
+    { -1, -2 },
+    {  1, -2 },
+    {  2, -1 },
+    { -2, -2 },
+    {  2, -2 },
+};
+
+#endif /* VP56DATA */
diff --git a/src/libffmpeg/libavcodec/vp5data.h b/src/libffmpeg/libavcodec/vp5data.h
new file mode 100644
index 000000000..effc17c2c
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp5data.h
@@ -0,0 +1,173 @@
+/**
+ * @file vp5data.h
+ * VP5 compatible video decoder
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef VP5DATA_H
+#define VP5DATA_H
+
+static const uint8_t vp5_coeff_groups[] = {
+    -1, 0, 1, 1, 2, 1, 1, 2,
+     2, 1, 1, 2, 2, 2, 1, 2,
+     2, 2, 2, 2, 1, 1, 2, 2,
+     3, 3, 4, 3, 4, 4, 4, 3,
+     3, 3, 3, 3, 4, 3, 3, 3,
+     4, 4, 4, 4, 4, 3, 3, 4,
+     4, 4, 3, 4, 4, 4, 4, 4,
+     4, 4, 5, 5, 5, 5, 5, 5,
+};
+
+static const uint8_t vp5_vmc_pct[2][11] = {
+    { 243, 220, 251, 253, 237, 232, 241, 245, 247, 251, 253 },
+    { 235, 211, 246, 249, 234, 231, 248, 249, 252, 252, 254 },
+};
+
+static const uint8_t vp5_dccv_pct[2][11] = {
+    { 146, 197, 181, 207, 232, 243, 238, 251, 244, 250, 249 },
+    { 179, 219, 214, 240, 250, 254, 244, 254, 254, 254, 254 },
+};
+
+static const uint8_t vp5_ract_pct[3][2][6][11] = {
+    { { { 227, 246, 230, 247, 244, 254, 254, 254, 254, 254, 254 },
+        { 202, 254, 209, 231, 231, 249, 249, 253, 254, 254, 254 },
+        { 206, 254, 225, 242, 241, 251, 253, 254, 254, 254, 254 },
+        { 235, 254, 241, 253, 252, 254, 254, 254, 254, 254, 254 },
+        { 234, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } },
+      { { 240, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 238, 254, 240, 253, 254, 254, 254, 254, 254, 254, 254 },
+        { 244, 254, 251, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } } },
+    { { { 206, 203, 227, 239, 247, 254, 253, 254, 254, 254, 254 },
+        { 207, 199, 220, 236, 243, 252, 252, 254, 254, 254, 254 },
+        { 212, 219, 230, 243, 244, 253, 252, 254, 254, 254, 254 },
+        { 236, 237, 247, 252, 253, 254, 254, 254, 254, 254, 254 },
+        { 240, 240, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } },
+      { { 230, 233, 249, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 238, 238, 250, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 248, 251, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } } },
+    { { { 225, 239, 227, 231, 244, 253, 243, 254, 254, 253, 254 },
+        { 232, 234, 224, 228, 242, 249, 242, 252, 251, 251, 254 },
+        { 235, 249, 238, 240, 251, 254, 249, 254, 253, 253, 254 },
+        { 249, 253, 251, 250, 254, 254, 254, 254, 254, 254, 254 },
+        { 251, 250, 249, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } },
+      { { 243, 244, 250, 250, 254, 254, 254, 254, 254, 254, 254 },
+        { 249, 248, 250, 253, 254, 254, 254, 254, 254, 254, 254 },
+        { 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } } },
+};
+
+static const int16_t vp5_dccv_lc[5][36][2] = {
+    { {154,  61}, {141,  54}, { 90,  45}, { 54,  34}, { 54,  13}, {128, 109},
+      {136,  54}, {148,  45}, { 92,  41}, { 54,  33}, { 51,  15}, { 87, 113},
+      { 87,  44}, { 97,  40}, { 67,  36}, { 46,  29}, { 41,  15}, { 64,  80},
+      { 59,  33}, { 61,  31}, { 51,  28}, { 44,  22}, { 33,  12}, { 49,  63},
+      { 69,  12}, { 59,  16}, { 46,  14}, { 31,  13}, { 26,   6}, { 92,  26},
+      {128, 108}, { 77, 119}, { 54,  84}, { 26,  71}, { 87,  19}, { 95, 155} },
+    { {154,   4}, {182,   0}, {159,  -8}, {128,  -5}, {143,  -5}, {187,  55},
+      {182,   0}, {228,  -3}, {187,  -7}, {174,  -9}, {189, -11}, {169,  79},
+      {161,  -9}, {192,  -8}, {187,  -9}, {169, -10}, {136,  -9}, {184,  40},
+      {164, -11}, {179, -10}, {174, -10}, {161, -10}, {115,  -7}, {197,  20},
+      {195, -11}, {195, -11}, {146, -10}, {110,  -6}, { 95,  -4}, {195,  39},
+      {182,  55}, {172,  77}, {177,  37}, {169,  29}, {172,  52}, { 92, 162} },
+    { {174,  80}, {164,  80}, { 95,  80}, { 46,  66}, { 56,  24}, { 36, 193},
+      {164,  80}, {166,  77}, {105,  76}, { 49,  68}, { 46,  31}, { 49, 186},
+      { 97,  78}, {110,  74}, { 72,  72}, { 44,  60}, { 33,  30}, { 69, 131},
+      { 61,  61}, { 69,  63}, { 51,  57}, { 31,  48}, { 26,  27}, { 64,  89},
+      { 67,  23}, { 51,  32}, { 36,  33}, { 26,  28}, { 20,  12}, { 44,  68},
+      { 26, 197}, { 41, 189}, { 61, 129}, { 28, 103}, { 49,  52}, {-12, 245} },
+    { {102, 141}, { 79, 166}, { 72, 162}, { 97, 125}, {179,   4}, {307,   0},
+      { 72, 168}, { 69, 175}, { 84, 160}, {105, 127}, {148,  34}, {310,   0},
+      { 84, 151}, { 82, 161}, { 87, 153}, { 87, 135}, {115,  51}, {317,   0},
+      { 97, 125}, {102, 131}, {105, 125}, { 87, 122}, { 84,  64}, { 54, 184},
+      {166,  18}, {146,  43}, {125,  51}, { 90,  64}, { 95,   7}, { 38, 154},
+      {294,   0}, { 13, 225}, { 10, 225}, { 67, 168}, {  0, 167}, {161,  94} },
+    { {172,  76}, {172,  75}, {136,  80}, { 64,  98}, { 74,  67}, {315,   0},
+      {169,  76}, {207,  56}, {164,  66}, { 97,  80}, { 67,  72}, {328,   0},
+      {136,  80}, {187,  53}, {154,  62}, { 72,  85}, { -2, 105}, {305,   0},
+      { 74,  91}, {128,  64}, {113,  64}, { 61,  77}, { 41,  75}, {259,   0},
+      { 46,  84}, { 51,  81}, { 28,  89}, { 31,  78}, { 23,  77}, {202,   0},
+      {323,   0}, {323,   0}, {300,   0}, {236,   0}, {195,   0}, {328,   0} },
+};
+
+static const int16_t vp5_ract_lc[3][3][5][6][2] = {
+    { { { {276,  0}, {238,  0}, {195,  0}, {156,  0}, {113,  0}, {274,  0} },
+        { {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1} },
+        { {192, 59}, {182, 50}, {141, 48}, {110, 40}, { 92, 19}, {125,128} },
+        { {169, 87}, {169, 83}, {184, 62}, {220, 16}, {184,  0}, {264,  0} },
+        { {212, 40}, {212, 36}, {169, 49}, {174, 27}, {  8,120}, {182, 71} } },
+      { { {259, 10}, {197, 19}, {143, 22}, {123, 16}, {110,  8}, {133, 88} },
+        { {  0,  1}, {256,  0}, {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1} },
+        { {207, 46}, {187, 50}, { 97, 83}, { 23,100}, { 41, 56}, { 56,188} },
+        { {166, 90}, {146,108}, {161, 88}, {136, 95}, {174,  0}, {266,  0} },
+        { {264,  7}, {243, 18}, {184, 43}, {-14,154}, { 20,112}, { 20,199} } },
+      { { {230, 26}, {197, 22}, {159, 20}, {146, 12}, {136,  4}, { 54,162} },
+        { {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1} },
+        { {192, 59}, {156, 72}, { 84,101}, { 49,101}, { 79, 47}, { 79,167} },
+        { {138,115}, {136,116}, {166, 80}, {238,  0}, {195,  0}, {261,  0} },
+        { {225, 33}, {205, 42}, {159, 61}, { 79, 96}, { 92, 66}, { 28,195} } },
+    }, {
+      { { {200, 37}, {197, 18}, {159, 13}, {143,  7}, {102,  5}, {123,126} },
+        { {197,  3}, {220, -9}, {210,-12}, {187, -6}, {151, -2}, {174, 80} },
+        { {200, 53}, {187, 47}, {159, 40}, {118, 38}, {100, 18}, {141,111} },
+        { {179, 78}, {166, 86}, {197, 50}, {207, 27}, {187,  0}, {115,139} },
+        { {218, 34}, {220, 29}, {174, 46}, {128, 61}, { 54, 89}, {187, 65} } },
+      { { {238, 14}, {197, 18}, {125, 26}, { 90, 25}, { 82, 13}, {161, 86} },
+        { {189,  1}, {205, -2}, {156, -4}, {143, -4}, {146, -4}, {172, 72} },
+        { {230, 31}, {192, 45}, {102, 76}, { 38, 85}, { 56, 41}, { 64,173} },
+        { {166, 91}, {141,111}, {128,116}, {118,109}, {177,  0}, { 23,222} },
+        { {253, 14}, {236, 21}, {174, 49}, { 33,118}, { 44, 93}, { 23,187} } },
+      { { {218, 28}, {179, 28}, {118, 35}, { 95, 30}, { 72, 24}, {128,108} },
+        { {187,  1}, {174, -1}, {125, -1}, {110, -1}, {108, -1}, {202, 52} },
+        { {197, 53}, {146, 75}, { 46,118}, { 33,103}, { 64, 50}, {118,126} },
+        { {138,114}, {128,122}, {161, 86}, {243, -6}, {195,  0}, { 38,210} },
+        { {215, 39}, {179, 58}, { 97,101}, { 95, 85}, { 87, 70}, { 69,152} } },
+    }, {
+      { { {236, 24}, {205, 18}, {172, 12}, {154,  6}, {125,  1}, {169, 75} },
+        { {187,  4}, {230, -2}, {228, -4}, {236, -4}, {241, -2}, {192, 66} },
+        { {200, 46}, {187, 42}, {159, 34}, {136, 25}, {105, 10}, {179, 62} },
+        { {207, 55}, {192, 63}, {192, 54}, {195, 36}, {177,  1}, {143, 98} },
+        { {225, 27}, {207, 34}, {200, 30}, {131, 57}, { 97, 60}, {197, 45} } },
+      { { {271,  8}, {218, 13}, {133, 19}, { 90, 19}, { 72,  7}, {182, 51} },
+        { {179,  1}, {225, -1}, {154, -2}, {110, -1}, { 92,  0}, {195, 41} },
+        { {241, 26}, {189, 40}, { 82, 64}, { 33, 60}, { 67, 17}, {120, 94} },
+        { {192, 68}, {151, 94}, {146, 90}, {143, 72}, {161,  0}, {113,128} },
+        { {256, 12}, {218, 29}, {166, 48}, { 44, 99}, { 31, 87}, {148, 78} } },
+      { { {238, 20}, {184, 22}, {113, 27}, { 90, 22}, { 74,  9}, {192, 37} },
+        { {184,  0}, {215, -1}, {141, -1}, { 97,  0}, { 49,  0}, {264, 13} },
+        { {182, 51}, {138, 61}, { 95, 63}, { 54, 59}, { 64, 25}, {200, 45} },
+        { {179, 75}, {156, 87}, {174, 65}, {177, 44}, {174,  0}, {164, 85} },
+        { {195, 45}, {148, 65}, {105, 79}, { 95, 72}, { 87, 60}, {169, 63} } },
+    }
+};
+
+static const uint8_t vp5_coord_div[] = { 2, 2, 2, 2, 4, 4 };
+
+#endif /* VP5DATA_H */
diff --git a/src/libffmpeg/libavcodec/vp6.c b/src/libffmpeg/libavcodec/vp6.c
new file mode 100644
index 000000000..381fcc8ee
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp6.c
@@ -0,0 +1,537 @@
+/**
+ * @file vp6.c
+ * VP6 compatible video decoder
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ *
+ * The VP6F decoder accept an optional 1 byte extradata. It is composed of:
+ *  - upper 4bits: difference between encoded width and visible width
+ *  - lower 4bits: difference between encoded height and visible height
+ */
+
+#include <stdlib.h>
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "bitstream.h"
+#include "mpegvideo.h"
+
+#include "vp56.h"
+#include "vp56data.h"
+#include "vp6data.h"
+
+
+static int vp6_parse_header(vp56_context_t *s, uint8_t *buf, int buf_size,
+                            int *golden_frame)
+{
+    vp56_range_coder_t *c = &s->c;
+    int parse_filter_info = 0;
+    int vrt_shift = 0;
+    int sub_version;
+    int rows, cols;
+    int res = 1;
+
+    if (buf[0] & 1)
+        return 0;
+
+    s->frames[VP56_FRAME_CURRENT].key_frame = !(buf[0] & 0x80);
+    vp56_init_dequant(s, (buf[0] >> 1) & 0x3F);
+
+    if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+        sub_version = buf[1] >> 3;
+        if (sub_version > 8)
+            return 0;
+        if ((buf[1] & 0x06) != 0x06)
+            return 0;
+        if (buf[1] & 1) {
+            av_log(s->avctx, AV_LOG_ERROR, "interlacing not supported\n");
+            return 0;
+        }
+
+        rows = buf[2];  /* number of stored macroblock rows */
+        cols = buf[3];  /* number of stored macroblock cols */
+        /* buf[4] is number of displayed macroblock rows */
+        /* buf[5] is number of displayed macroblock cols */
+
+        if (16*cols != s->avctx->coded_width ||
+            16*rows != s->avctx->coded_height) {
+            avcodec_set_dimensions(s->avctx, 16*cols, 16*rows);
+            if (s->avctx->extradata_size == 1) {
+                s->avctx->width  -= s->avctx->extradata[0] >> 4;
+                s->avctx->height -= s->avctx->extradata[0] & 0x0F;
+            }
+            res = 2;
+        }
+
+        vp56_init_range_decoder(c, buf+6, buf_size-6);
+        vp56_rac_gets(c, 2);
+
+        parse_filter_info = 1;
+        if (sub_version < 8)
+            vrt_shift = 5;
+        s->sub_version = sub_version;
+    } else {
+        if (!s->sub_version)
+            return 0;
+
+        vp56_init_range_decoder(c, buf+1, buf_size-1);
+
+        *golden_frame = vp56_rac_get(c);
+        s->deblock_filtering = vp56_rac_get(c);
+        if (s->deblock_filtering)
+            vp56_rac_get(c);
+        if (s->sub_version > 7)
+            parse_filter_info = vp56_rac_get(c);
+    }
+
+    if (parse_filter_info) {
+        if (vp56_rac_get(c)) {
+            s->filter_mode = 2;
+            s->sample_variance_threshold = vp56_rac_gets(c, 5) << vrt_shift;
+            s->max_vector_length = 2 << vp56_rac_gets(c, 3);
+        } else if (vp56_rac_get(c)) {
+            s->filter_mode = 1;
+        } else {
+            s->filter_mode = 0;
+        }
+        if (s->sub_version > 7)
+            s->filter_selection = vp56_rac_gets(c, 4);
+        else
+            s->filter_selection = 16;
+    }
+
+    vp56_rac_get(c);
+    return res;
+}
+
+static void vp6_coeff_order_table_init(vp56_context_t *s)
+{
+    int i, pos, idx = 1;
+
+    s->coeff_index_to_pos[0] = 0;
+    for (i=0; i<16; i++)
+        for (pos=1; pos<64; pos++)
+            if (s->coeff_reorder[pos] == i)
+                s->coeff_index_to_pos[idx++] = pos;
+}
+
+static void vp6_default_models_init(vp56_context_t *s)
+{
+    s->vector_model_dct[0] = 0xA2;
+    s->vector_model_dct[1] = 0xA4;
+    s->vector_model_sig[0] = 0x80;
+    s->vector_model_sig[1] = 0x80;
+
+    memcpy(s->mb_types_stats, vp56_def_mb_types_stats, sizeof(s->mb_types_stats));
+    memcpy(s->vector_model_fdv, vp6_def_fdv_vector_model, sizeof(s->vector_model_fdv));
+    memcpy(s->vector_model_pdv, vp6_def_pdv_vector_model, sizeof(s->vector_model_pdv));
+    memcpy(s->coeff_model_runv, vp6_def_runv_coeff_model, sizeof(s->coeff_model_runv));
+    memcpy(s->coeff_reorder, vp6_def_coeff_reorder, sizeof(s->coeff_reorder));
+
+    vp6_coeff_order_table_init(s);
+}
+
+static void vp6_parse_vector_models(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    int comp, node;
+
+    for (comp=0; comp<2; comp++) {
+        if (vp56_rac_get_prob(c, vp6_sig_dct_pct[comp][0]))
+            s->vector_model_dct[comp] = vp56_rac_gets_nn(c, 7);
+        if (vp56_rac_get_prob(c, vp6_sig_dct_pct[comp][1]))
+            s->vector_model_sig[comp] = vp56_rac_gets_nn(c, 7);
+    }
+
+    for (comp=0; comp<2; comp++)
+        for (node=0; node<7; node++)
+            if (vp56_rac_get_prob(c, vp6_pdv_pct[comp][node]))
+                s->vector_model_pdv[comp][node] = vp56_rac_gets_nn(c, 7);
+
+    for (comp=0; comp<2; comp++)
+        for (node=0; node<8; node++)
+            if (vp56_rac_get_prob(c, vp6_fdv_pct[comp][node]))
+                s->vector_model_fdv[comp][node] = vp56_rac_gets_nn(c, 7);
+}
+
+static void vp6_parse_coeff_models(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    int def_prob[11];
+    int node, cg, ctx, pos;
+    int ct;    /* code type */
+    int pt;    /* plane type (0 for Y, 1 for U or V) */
+
+    memset(def_prob, 0x80, sizeof(def_prob));
+
+    for (pt=0; pt<2; pt++)
+        for (node=0; node<11; node++)
+            if (vp56_rac_get_prob(c, vp6_dccv_pct[pt][node])) {
+                def_prob[node] = vp56_rac_gets_nn(c, 7);
+                s->coeff_model_dccv[pt][node] = def_prob[node];
+            } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+                s->coeff_model_dccv[pt][node] = def_prob[node];
+            }
+
+    if (vp56_rac_get(c)) {
+        for (pos=1; pos<64; pos++)
+            if (vp56_rac_get_prob(c, vp6_coeff_reorder_pct[pos]))
+                s->coeff_reorder[pos] = vp56_rac_gets(c, 4);
+        vp6_coeff_order_table_init(s);
+    }
+
+    for (cg=0; cg<2; cg++)
+        for (node=0; node<14; node++)
+            if (vp56_rac_get_prob(c, vp6_runv_pct[cg][node]))
+                s->coeff_model_runv[cg][node] = vp56_rac_gets_nn(c, 7);
+
+    for (ct=0; ct<3; ct++)
+        for (pt=0; pt<2; pt++)
+            for (cg=0; cg<6; cg++)
+                for (node=0; node<11; node++)
+                    if (vp56_rac_get_prob(c, vp6_ract_pct[ct][pt][cg][node])) {
+                        def_prob[node] = vp56_rac_gets_nn(c, 7);
+                        s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+                    } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+                        s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+                    }
+
+    /* coeff_model_dcct is a linear combination of coeff_model_dccv */
+    for (pt=0; pt<2; pt++)
+        for (ctx=0; ctx<3; ctx++)
+            for (node=0; node<5; node++)
+                s->coeff_model_dcct[pt][ctx][node] = clip(((s->coeff_model_dccv[pt][node] * vp6_dccv_lc[ctx][node][0] + 128) >> 8) + vp6_dccv_lc[ctx][node][1], 1, 255);
+}
+
+static void vp6_parse_vector_adjustment(vp56_context_t *s, vp56_mv_t *vect)
+{
+    vp56_range_coder_t *c = &s->c;
+    int comp;
+
+    *vect = (vp56_mv_t) {0,0};
+    if (s->vector_candidate_pos < 2)
+        *vect = s->vector_candidate[0];
+
+    for (comp=0; comp<2; comp++) {
+        int i, delta = 0;
+
+        if (vp56_rac_get_prob(c, s->vector_model_dct[comp])) {
+            static const uint8_t prob_order[] = {0, 1, 2, 7, 6, 5, 4};
+            for (i=0; i<sizeof(prob_order); i++) {
+                int j = prob_order[i];
+                delta |= vp56_rac_get_prob(c, s->vector_model_fdv[comp][j])<<j;
+            }
+            if (delta & 0xF0)
+                delta |= vp56_rac_get_prob(c, s->vector_model_fdv[comp][3])<<3;
+            else
+                delta |= 8;
+        } else {
+            delta = vp56_rac_get_tree(c, vp56_pva_tree,
+                                      s->vector_model_pdv[comp]);
+        }
+
+        if (delta && vp56_rac_get_prob(c, s->vector_model_sig[comp]))
+            delta = -delta;
+
+        if (!comp)
+            vect->x += delta;
+        else
+            vect->y += delta;
+    }
+}
+
+static void vp6_parse_coeff(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    uint8_t *permute = s->scantable.permutated;
+    uint8_t *model, *model2, *model3;
+    int coeff, sign, coeff_idx;
+    int b, i, cg, idx, ctx;
+    int pt = 0;    /* plane type (0 for Y, 1 for U or V) */
+
+    for (b=0; b<6; b++) {
+        int ct = 1;    /* code type */
+        int run = 1;
+
+        if (b > 3) pt = 1;
+
+        ctx = s->left_block[vp56_b6to4[b]].not_null_dc
+              + s->above_blocks[s->above_block_idx[b]].not_null_dc;
+        model = s->coeff_model_dccv[pt];
+        model2 = s->coeff_model_dcct[pt][ctx];
+
+        for (coeff_idx=0; coeff_idx<64; ) {
+            if ((coeff_idx>1 && ct==0) || vp56_rac_get_prob(c, model2[0])) {
+                /* parse a coeff */
+                if (coeff_idx == 0) {
+                    s->left_block[vp56_b6to4[b]].not_null_dc = 1;
+                    s->above_blocks[s->above_block_idx[b]].not_null_dc = 1;
+                }
+
+                if (vp56_rac_get_prob(c, model2[2])) {
+                    if (vp56_rac_get_prob(c, model2[3])) {
+                        idx = vp56_rac_get_tree(c, vp56_pc_tree, model);
+                        coeff = vp56_coeff_bias[idx];
+                        for (i=vp56_coeff_bit_length[idx]; i>=0; i--)
+                            coeff += vp56_rac_get_prob(c, vp56_coeff_parse_table[idx][i]) << i;
+                    } else {
+                        if (vp56_rac_get_prob(c, model2[4]))
+                            coeff = 3 + vp56_rac_get_prob(c, model[5]);
+                        else
+                            coeff = 2;
+                    }
+                    ct = 2;
+                } else {
+                    ct = 1;
+                    coeff = 1;
+                }
+                sign = vp56_rac_get(c);
+                coeff = (coeff ^ -sign) + sign;
+                if (coeff_idx)
+                    coeff *= s->dequant_ac;
+                idx = s->coeff_index_to_pos[coeff_idx];
+                s->block_coeff[b][permute[idx]] = coeff;
+                run = 1;
+            } else {
+                /* parse a run */
+                ct = 0;
+                if (coeff_idx == 0) {
+                    s->left_block[vp56_b6to4[b]].not_null_dc = 0;
+                    s->above_blocks[s->above_block_idx[b]].not_null_dc = 0;
+                } else {
+                    if (!vp56_rac_get_prob(c, model2[1]))
+                        break;
+
+                    model3 = s->coeff_model_runv[coeff_idx >= 6];
+                    run = vp56_rac_get_tree(c, vp6_pcr_tree, model3);
+                    if (!run)
+                        for (run=9, i=0; i<6; i++)
+                            run += vp56_rac_get_prob(c, model3[i+8]) << i;
+                }
+            }
+
+            cg = vp6_coeff_groups[coeff_idx+=run];
+            model = model2 = s->coeff_model_ract[pt][ct][cg];
+        }
+    }
+}
+
+static int vp6_adjust(int v, int t)
+{
+    int V = v, s = v >> 31;
+    V ^= s;
+    V -= s;
+    if (V-t-1 >= (unsigned)(t-1))
+        return v;
+    V = 2*t - V;
+    V += s;
+    V ^= s;
+    return V;
+}
+
+static int vp6_block_variance(uint8_t *src, int stride)
+{
+    int sum = 0, square_sum = 0;
+    int y, x;
+
+    for (y=0; y<8; y+=2) {
+        for (x=0; x<8; x+=2) {
+            sum += src[x];
+            square_sum += src[x]*src[x];
+        }
+        src += 2*stride;
+    }
+    return (16*square_sum - sum*sum) >> 8;
+}
+
+static void vp6_filter_hv2(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+                           int stride, int delta, int16_t weight)
+{
+    s->dsp.put_pixels_tab[1][0](dst, src, stride, 8);
+    s->dsp.biweight_h264_pixels_tab[3](dst, src+delta, stride, 2,
+                                       8-weight, weight, 0);
+}
+
+static void vp6_filter_hv4(uint8_t *dst, uint8_t *src, int stride,
+                           int delta, const int16_t *weights)
+{
+    int x, y;
+
+    for (y=0; y<8; y++) {
+        for (x=0; x<8; x++) {
+            dst[x] = clip_uint8((  src[x-delta  ] * weights[0]
+                                 + src[x        ] * weights[1]
+                                 + src[x+delta  ] * weights[2]
+                                 + src[x+2*delta] * weights[3] + 64) >> 7);
+        }
+        src += stride;
+        dst += stride;
+    }
+}
+
+static void vp6_filter_diag2(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+                             int stride, int h_weight, int v_weight)
+{
+    uint8_t *tmp = s->edge_emu_buffer+16;
+    int x, xmax;
+
+    s->dsp.put_pixels_tab[1][0](tmp, src, stride, 8);
+    s->dsp.biweight_h264_pixels_tab[3](tmp, src+1, stride, 2,
+                                       8-h_weight, h_weight, 0);
+    /* we need a 8x9 block to do vertical filter, so compute one more line */
+    for (x=8*stride, xmax=x+8; x<xmax; x++)
+        tmp[x] = (src[x]*(8-h_weight) + src[x+1]*h_weight + 4) >> 3;
+
+    s->dsp.put_pixels_tab[1][0](dst, tmp, stride, 8);
+    s->dsp.biweight_h264_pixels_tab[3](dst, tmp+stride, stride, 2,
+                                       8-v_weight, v_weight, 0);
+}
+
+static void vp6_filter_diag4(uint8_t *dst, uint8_t *src, int stride,
+                             const int16_t *h_weights,const int16_t *v_weights)
+{
+    int x, y;
+    int tmp[8*11];
+    int *t = tmp;
+
+    src -= stride;
+
+    for (y=0; y<11; y++) {
+        for (x=0; x<8; x++) {
+            t[x] = clip_uint8((  src[x-1] * h_weights[0]
+                               + src[x  ] * h_weights[1]
+                               + src[x+1] * h_weights[2]
+                               + src[x+2] * h_weights[3] + 64) >> 7);
+        }
+        src += stride;
+        t += 8;
+    }
+
+    t = tmp + 8;
+    for (y=0; y<8; y++) {
+        for (x=0; x<8; x++) {
+            dst[x] = clip_uint8((  t[x-8 ] * v_weights[0]
+                                 + t[x   ] * v_weights[1]
+                                 + t[x+8 ] * v_weights[2]
+                                 + t[x+16] * v_weights[3] + 64) >> 7);
+        }
+        dst += stride;
+        t += 8;
+    }
+}
+
+static void vp6_filter(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+                       int offset1, int offset2, int stride,
+                       vp56_mv_t mv, int mask, int select, int luma)
+{
+    int filter4 = 0;
+    int x8 = mv.x & mask;
+    int y8 = mv.y & mask;
+
+    if (luma) {
+        x8 *= 2;
+        y8 *= 2;
+        filter4 = s->filter_mode;
+        if (filter4 == 2) {
+            if (s->max_vector_length &&
+                (FFABS(mv.x) > s->max_vector_length ||
+                 FFABS(mv.y) > s->max_vector_length)) {
+                filter4 = 0;
+            } else if (s->sample_variance_threshold
+                       && (vp6_block_variance(src+offset1, stride)
+                           < s->sample_variance_threshold)) {
+                filter4 = 0;
+            }
+        }
+    }
+
+    if ((y8 && (offset2-offset1)*s->flip<0) || (!y8 && offset1 > offset2)) {
+        offset1 = offset2;
+    }
+
+    if (filter4) {
+        if (!y8) {                      /* left or right combine */
+            vp6_filter_hv4(dst, src+offset1, stride, 1,
+                           vp6_block_copy_filter[select][x8]);
+        } else if (!x8) {               /* above or below combine */
+            vp6_filter_hv4(dst, src+offset1, stride, stride,
+                           vp6_block_copy_filter[select][y8]);
+        } else if ((mv.x^mv.y) >> 31) { /* lower-left or upper-right combine */
+            vp6_filter_diag4(dst, src+offset1-1, stride,
+                             vp6_block_copy_filter[select][x8],
+                             vp6_block_copy_filter[select][y8]);
+        } else {                        /* lower-right or upper-left combine */
+            vp6_filter_diag4(dst, src+offset1, stride,
+                             vp6_block_copy_filter[select][x8],
+                             vp6_block_copy_filter[select][y8]);
+        }
+    } else {
+        if (!y8) {                      /* left or right combine */
+            vp6_filter_hv2(s, dst, src+offset1, stride, 1, x8);
+        } else if (!x8) {               /* above or below combine */
+            vp6_filter_hv2(s, dst, src+offset1, stride, stride, y8);
+        } else if ((mv.x^mv.y) >> 31) { /* lower-left or upper-right combine */
+            vp6_filter_diag2(s, dst, src+offset1-1, stride, x8, y8);
+        } else {                        /* lower-right or upper-left combine */
+            vp6_filter_diag2(s, dst, src+offset1, stride, x8, y8);
+        }
+    }
+}
+
+static int vp6_decode_init(AVCodecContext *avctx)
+{
+    vp56_context_t *s = avctx->priv_data;
+
+    vp56_init(s, avctx, avctx->codec->id == CODEC_ID_VP6);
+    s->vp56_coord_div = vp6_coord_div;
+    s->parse_vector_adjustment = vp6_parse_vector_adjustment;
+    s->adjust = vp6_adjust;
+    s->filter = vp6_filter;
+    s->parse_coeff = vp6_parse_coeff;
+    s->default_models_init = vp6_default_models_init;
+    s->parse_vector_models = vp6_parse_vector_models;
+    s->parse_coeff_models = vp6_parse_coeff_models;
+    s->parse_header = vp6_parse_header;
+
+    return 0;
+}
+
+AVCodec vp6_decoder = {
+    "vp6",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VP6,
+    sizeof(vp56_context_t),
+    vp6_decode_init,
+    NULL,
+    vp56_free,
+    vp56_decode_frame,
+};
+
+/* flash version, not flipped upside-down */
+AVCodec vp6f_decoder = {
+    "vp6f",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VP6F,
+    sizeof(vp56_context_t),
+    vp6_decode_init,
+    NULL,
+    vp56_free,
+    vp56_decode_frame,
+};
diff --git a/src/libffmpeg/libavcodec/vp6data.h b/src/libffmpeg/libavcodec/vp6data.h
new file mode 100644
index 000000000..0545a9d66
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp6data.h
@@ -0,0 +1,300 @@
+/**
+ * @file vp6data.h
+ * VP6 compatible video decoder
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef VP6DATA_H
+#define VP6DATA_H
+
+#include "vp56data.h"
+
+static const uint8_t vp6_def_fdv_vector_model[2][8] = {
+    { 247, 210, 135, 68, 138, 220, 239, 246 },
+    { 244, 184, 201, 44, 173, 221, 239, 253 },
+};
+
+static const uint8_t vp6_def_pdv_vector_model[2][7] = {
+    { 225, 146, 172, 147, 214,  39, 156 },
+    { 204, 170, 119, 235, 140, 230, 228 },
+};
+
+static const uint8_t vp6_def_coeff_reorder[] = {
+     0,  0,  1,  1,  1,  2,  2,  2,
+     2,  2,  2,  3,  3,  4,  4,  4,
+     5,  5,  5,  5,  6,  6,  7,  7,
+     7,  7,  7,  8,  8,  9,  9,  9,
+     9,  9,  9, 10, 10, 11, 11, 11,
+    11, 11, 11, 12, 12, 12, 12, 12,
+    12, 13, 13, 13, 13, 13, 14, 14,
+    14, 14, 15, 15, 15, 15, 15, 15,
+};
+
+static const uint8_t vp6_def_runv_coeff_model[2][14] = {
+    { 198, 197, 196, 146, 198, 204, 169, 142, 130, 136, 149, 149, 191, 249 },
+    { 135, 201, 181, 154,  98, 117, 132, 126, 146, 169, 184, 240, 246, 254 },
+};
+
+static const uint8_t vp6_sig_dct_pct[2][2] = {
+    { 237, 246 },
+    { 231, 243 },
+};
+
+static const uint8_t vp6_pdv_pct[2][7] = {
+    { 253, 253, 254, 254, 254, 254, 254 },
+    { 245, 253, 254, 254, 254, 254, 254 },
+};
+
+static const uint8_t vp6_fdv_pct[2][8] = {
+    { 254, 254, 254, 254, 254, 250, 250, 252 },
+    { 254, 254, 254, 254, 254, 251, 251, 254 },
+};
+
+static const uint8_t vp6_dccv_pct[2][11] = {
+    { 146, 255, 181, 207, 232, 243, 238, 251, 244, 250, 249 },
+    { 179, 255, 214, 240, 250, 255, 244, 255, 255, 255, 255 },
+};
+
+static const uint8_t vp6_coeff_reorder_pct[] =  {
+    255, 132, 132, 159, 153, 151, 161, 170,
+    164, 162, 136, 110, 103, 114, 129, 118,
+    124, 125, 132, 136, 114, 110, 142, 135,
+    134, 123, 143, 126, 153, 183, 166, 161,
+    171, 180, 179, 164, 203, 218, 225, 217,
+    215, 206, 203, 217, 229, 241, 248, 243,
+    253, 255, 253, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255,
+};
+
+static const uint8_t vp6_runv_pct[2][14] = {
+  { 219, 246, 238, 249, 232, 239, 249, 255, 248, 253, 239, 244, 241, 248 },
+  { 198, 232, 251, 253, 219, 241, 253, 255, 248, 249, 244, 238, 251, 255 },
+};
+
+static const uint8_t vp6_ract_pct[3][2][6][11] = {
+  { { { 227, 246, 230, 247, 244, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 209, 231, 231, 249, 249, 253, 255, 255, 255 },
+      { 255, 255, 225, 242, 241, 251, 253, 255, 255, 255, 255 },
+      { 255, 255, 241, 253, 252, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } },
+    { { 240, 255, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 240, 253, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } } },
+  { { { 206, 203, 227, 239, 247, 255, 253, 255, 255, 255, 255 },
+      { 207, 199, 220, 236, 243, 252, 252, 255, 255, 255, 255 },
+      { 212, 219, 230, 243, 244, 253, 252, 255, 255, 255, 255 },
+      { 236, 237, 247, 252, 253, 255, 255, 255, 255, 255, 255 },
+      { 240, 240, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } },
+    { { 230, 233, 249, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 238, 238, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 248, 251, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } } },
+  { { { 225, 239, 227, 231, 244, 253, 243, 255, 255, 253, 255 },
+      { 232, 234, 224, 228, 242, 249, 242, 252, 251, 251, 255 },
+      { 235, 249, 238, 240, 251, 255, 249, 255, 253, 253, 255 },
+      { 249, 253, 251, 250, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 250, 249, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } },
+    { { 243, 244, 250, 250, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 248, 250, 253, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } } }
+};
+
+static const int vp6_dccv_lc[3][5][2] = {
+  { { 122, 133 }, { 0, 1 }, { 78,  171 }, { 139, 117 }, { 168, 79 } },
+  { { 133,  51 }, { 0, 1 }, { 169,  71 }, { 214,  44 }, { 210, 38 } },
+  { { 142, -16 }, { 0, 1 }, { 221, -30 }, { 246,  -3 }, { 203, 17 } },
+};
+
+static const uint8_t vp6_coeff_groups[] = {
+    0, 0, 1, 1, 1, 2, 2, 2,
+    2, 2, 2, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5,
+};
+
+static const int16_t vp6_block_copy_filter[17][8][4] = {
+  { {   0, 128,   0,   0  },  /* 0 */
+    {  -3, 122,   9,   0  },
+    {  -4, 109,  24,  -1  },
+    {  -5,  91,  45,  -3  },
+    {  -4,  68,  68,  -4  },
+    {  -3,  45,  91,  -5  },
+    {  -1,  24, 109,  -4  },
+    {   0,   9, 122,  -3  } },
+  { {   0, 128,   0,   0  },  /* 1 */
+    {  -4, 124,   9,  -1  },
+    {  -5, 110,  25,  -2  },
+    {  -6,  91,  46,  -3  },
+    {  -5,  69,  69,  -5  },
+    {  -3,  46,  91,  -6  },
+    {  -2,  25, 110,  -5  },
+    {  -1,   9, 124,  -4  } },
+  { {   0, 128,   0,   0  },  /* 2 */
+    {  -4, 123,  10,  -1  },
+    {  -6, 110,  26,  -2  },
+    {  -7,  92,  47,  -4  },
+    {  -6,  70,  70,  -6  },
+    {  -4,  47,  92,  -7  },
+    {  -2,  26, 110,  -6  },
+    {  -1,  10, 123,  -4  } },
+  { {   0, 128,   0,   0  },  /* 3 */
+    {  -5, 124,  10,  -1  },
+    {  -7, 110,  27,  -2  },
+    {  -7,  91,  48,  -4  },
+    {  -6,  70,  70,  -6  },
+    {  -4,  48,  92,  -8  },
+    {  -2,  27, 110,  -7  },
+    {  -1,  10, 124,  -5  } },
+  { {   0, 128,   0,   0  },  /* 4 */
+    {  -6, 124,  11,  -1  },
+    {  -8, 111,  28,  -3  },
+    {  -8,  92,  49,  -5  },
+    {  -7,  71,  71,  -7  },
+    {  -5,  49,  92,  -8  },
+    {  -3,  28, 111,  -8  },
+    {  -1,  11, 124,  -6  } },
+  { {  0,  128,   0,   0  },  /* 5 */
+    {  -6, 123,  12,  -1  },
+    {  -9, 111,  29,  -3  },
+    {  -9,  93,  50,  -6  },
+    {  -8,  72,  72,  -8  },
+    {  -6,  50,  93,  -9  },
+    {  -3,  29, 111,  -9  },
+    {  -1,  12, 123,  -6  } },
+  { {   0, 128,   0,   0  },  /* 6 */
+    {  -7, 124,  12,  -1  },
+    { -10, 111,  30,  -3  },
+    { -10,  93,  51,  -6  },
+    {  -9,  73,  73,  -9  },
+    {  -6,  51,  93, -10  },
+    {  -3,  30, 111, -10  },
+    {  -1,  12, 124,  -7  } },
+  { {   0, 128,   0,   0  },  /* 7 */
+    {  -7, 123,  13,  -1  },
+    { -11, 112,  31,  -4  },
+    { -11,  94,  52,  -7  },
+    { -10,  74,  74, -10  },
+    {  -7,  52,  94, -11  },
+    {  -4,  31, 112, -11  },
+    {  -1,  13, 123,  -7  } },
+  { {   0, 128,   0,  0  },  /* 8 */
+    {  -8, 124,  13,  -1  },
+    { -12, 112,  32,  -4  },
+    { -12,  94,  53,  -7  },
+    { -10,  74,  74, -10  },
+    {  -7,  53,  94, -12  },
+    {  -4,  32, 112, -12  },
+    {  -1,  13, 124,  -8  } },
+  { {   0, 128,   0,   0  },  /* 9 */
+    {  -9, 124,  14,  -1  },
+    { -13, 112,  33,  -4  },
+    { -13,  95,  54,  -8  },
+    { -11,  75,  75, -11  },
+    {  -8,  54,  95, -13  },
+    {  -4,  33, 112, -13  },
+    {  -1,  14, 124,  -9  } },
+  { {   0, 128,   0,   0  },  /* 10 */
+    {  -9, 123,  15,  -1  },
+    { -14, 113,  34,  -5  },
+    { -14,  95,  55,  -8  },
+    { -12,  76,  76, -12  },
+    {  -8,  55,  95, -14  },
+    {  -5,  34, 112, -13  },
+    {  -1,  15, 123,  -9  } },
+  { {   0, 128,   0,   0  },  /* 11 */
+    { -10, 124,  15,  -1  },
+    { -14, 113,  34,  -5  },
+    { -15,  96,  56,  -9  },
+    { -13,  77,  77, -13  },
+    {  -9,  56,  96, -15  },
+    {  -5,  34, 113, -14  },
+    {  -1,  15, 124, -10  } },
+  { {   0, 128,   0,   0  },  /* 12 */
+    { -10, 123,  16,  -1  },
+    { -15, 113,  35,  -5  },
+    { -16,  98,  56, -10  },
+    { -14,  78,  78, -14  },
+    { -10,  56,  98, -16  },
+    {  -5,  35, 113, -15  },
+    {  -1,  16, 123, -10  } },
+  { {   0, 128,   0,   0  },  /* 13 */
+    { -11, 124,  17,  -2  },
+    { -16, 113,  36,  -5  },
+    { -17,  98,  57, -10  },
+    { -14,  78,  78, -14  },
+    { -10,  57,  98, -17  },
+    {  -5,  36, 113, -16  },
+    {  -2,  17, 124, -11  } },
+  { {   0, 128,   0,   0  },  /* 14 */
+    { -12, 125,  17,  -2  },
+    { -17, 114,  37,  -6  },
+    { -18,  99,  58, -11  },
+    { -15,  79,  79, -15  },
+    { -11,  58,  99, -18  },
+    {  -6,  37, 114, -17  },
+    {  -2,  17, 125, -12  } },
+  { {   0, 128,   0,   0  },  /* 15 */
+    { -12, 124,  18,  -2  },
+    { -18, 114,  38,  -6  },
+    { -19,  99,  59, -11  },
+    { -16,  80,  80, -16  },
+    { -11,  59,  99, -19  },
+    {  -6,  38, 114, -18  },
+    {  -2,  18, 124, -12  } },
+  { {   0, 128,   0,   0  },  /* 16 */
+    {  -4, 118,  16,  -2  },
+    {  -7, 106,  34,  -5  },
+    {  -8,  90,  53,  -7  },
+    {  -8,  72,  72,  -8  },
+    {  -7,  53,  90,  -8  },
+    {  -5,  34, 106,  -7  },
+    {  -2,  16, 118,  -4  } },
+};
+
+static const vp56_tree_t vp6_pcr_tree[] = {
+    { 8, 0},
+    { 4, 1},
+    { 2, 2}, {-1}, {-2},
+    { 2, 3}, {-3}, {-4},
+    { 8, 4},
+    { 4, 5},
+    { 2, 6}, {-5}, {-6},
+    { 2, 7}, {-7}, {-8},
+             {-0},
+};
+
+static const uint8_t vp6_coord_div[] = { 4, 4, 4, 4, 8, 8 };
+
+#endif /* VP6DATA_H */
diff --git a/src/libffmpeg/libavcodec/wmadec.c b/src/libffmpeg/libavcodec/wmadec.c
index 684aea2c8..bbf4970ce 100644
--- a/src/libffmpeg/libavcodec/wmadec.c
+++ b/src/libffmpeg/libavcodec/wmadec.c
@@ -115,6 +115,8 @@ typedef struct WMADecodeContext {
     float max_exponent[MAX_CHANNELS];
     int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
     DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]);
+    DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]);
+    DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]);
     MDCTContext mdct_ctx[BLOCK_NB_SIZES];
     float *windows[BLOCK_NB_SIZES];
     DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); /* temporary storage for imdct */
@@ -717,7 +719,6 @@ static int wma_decode_block(WMADecodeContext *s)
 {
     int n, v, a, ch, code, bsize;
     int coef_nb_bits, total_gain, parse_exponents;
-    DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]);
     int nb_coefs[MAX_CHANNELS];
     float mdct_norm;
 
@@ -1072,7 +1073,7 @@ static int wma_decode_block(WMADecodeContext *s)
         next_block_len = 1 << s->next_block_len_bits;
 
         /* right part */
-        wptr = window + block_len;
+        wptr = s->window + block_len;
         if (block_len <= next_block_len) {
             for(i=0;i<block_len;i++)
                 *wptr++ = s->windows[bsize][i];
@@ -1088,7 +1089,7 @@ static int wma_decode_block(WMADecodeContext *s)
         }
 
         /* left part */
-        wptr = window + block_len;
+        wptr = s->window + block_len;
         if (block_len <= prev_block_len) {
             for(i=0;i<block_len;i++)
                 *--wptr = s->windows[bsize][i];
@@ -1107,14 +1108,13 @@ static int wma_decode_block(WMADecodeContext *s)
 
     for(ch = 0; ch < s->nb_channels; ch++) {
         if (s->channel_coded[ch]) {
-            DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]);
             float *ptr;
             int n4, index, n;
 
             n = s->block_len;
             n4 = s->block_len / 2;
             s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize],
-                          output, s->coefs[ch], s->mdct_tmp);
+                          s->output, s->coefs[ch], s->mdct_tmp);
 
             /* XXX: optimize all that by build the window and
                multipying/adding at the same time */
@@ -1122,13 +1122,13 @@ static int wma_decode_block(WMADecodeContext *s)
             /* multiply by the window and add in the frame */
             index = (s->frame_len / 2) + s->block_pos - n4;
             ptr = &s->frame_out[ch][index];
-            s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1);
+            s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1);
 
             /* specific fast case for ms-stereo : add to second
                channel if it is not coded */
             if (s->ms_stereo && !s->channel_coded[1]) {
                 ptr = &s->frame_out[1][index];
-                s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1);
+                s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1);
             }
         }
     }
diff --git a/src/libffmpeg/libavcodec/wmv2.c b/src/libffmpeg/libavcodec/wmv2.c
index 5abc51775..f3d4f0f23 100644
--- a/src/libffmpeg/libavcodec/wmv2.c
+++ b/src/libffmpeg/libavcodec/wmv2.c
@@ -643,6 +643,12 @@ void ff_mspel_motion(MpegEncContext *s,
     v_edge_pos = s->v_edge_pos;
     src_x = clip(src_x, -16, s->width);
     src_y = clip(src_y, -16, s->height);
+
+    if(src_x<=-16 || src_x >= s->width)
+        dxy &= ~3;
+    if(src_y<=-16 || src_y >= s->height)
+        dxy &= ~4;
+
     linesize   = s->linesize;
     uvlinesize = s->uvlinesize;
     ptr = ref_picture[0] + (src_y * linesize) + src_x;
diff --git a/src/libffmpeg/libavutil/Makefile.am b/src/libffmpeg/libavutil/Makefile.am
index 76340cf14..6e507cb67 100644
--- a/src/libffmpeg/libavutil/Makefile.am
+++ b/src/libffmpeg/libavutil/Makefile.am
@@ -1,6 +1,6 @@
 include $(top_srcdir)/misc/Makefile.common
 
-AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS)
+AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS) -I$(top_srcdir)/src/libffmpeg
 AM_CFLAGS = -fno-strict-aliasing
 ASFLAGS =
 
@@ -28,6 +28,7 @@ noinst_HEADERS = \
 	integer.h \
 	internal.h \
 	intfloat_readwrite.h \
+	intreadwrite.h \
 	lls.h \
 	log.h \
 	mathematics.h \
diff --git a/src/libffmpeg/libavutil/bswap.h b/src/libffmpeg/libavutil/bswap.h
index 4614c9045..03d613db2 100644
--- a/src/libffmpeg/libavutil/bswap.h
+++ b/src/libffmpeg/libavutil/bswap.h
@@ -37,7 +37,7 @@
 #endif
 
 #if defined(ARCH_X86)
-static always_inline uint16_t bswap_16(uint16_t x)
+static av_always_inline uint16_t bswap_16(uint16_t x)
 {
   __asm("rorw $8, %0"   :
         LEGACY_REGS (x) :
@@ -45,7 +45,7 @@ static always_inline uint16_t bswap_16(uint16_t x)
     return x;
 }
 
-static always_inline uint32_t bswap_32(uint32_t x)
+static av_always_inline uint32_t bswap_32(uint32_t x)
 {
 #if __CPU__ != 386
  __asm("bswap   %0":
@@ -82,12 +82,12 @@ static inline uint64_t bswap_64(uint64_t x)
 
 #elif defined(ARCH_SH4)
 
-static always_inline uint16_t bswap_16(uint16_t x) {
+static av_always_inline uint16_t bswap_16(uint16_t x) {
         __asm__("swap.b %0,%0":"=r"(x):"0"(x));
         return x;
 }
 
-static always_inline uint32_t bswap_32(uint32_t x) {
+static av_always_inline uint32_t bswap_32(uint32_t x) {
         __asm__(
         "swap.b %0,%0\n"
         "swap.w %0,%0\n"
@@ -110,12 +110,12 @@ static inline uint64_t bswap_64(uint64_t x)
 }
 #else
 
-static always_inline uint16_t bswap_16(uint16_t x){
+static av_always_inline uint16_t bswap_16(uint16_t x){
     return (x>>8) | (x<<8);
 }
 
 #ifdef ARCH_ARM
-static always_inline uint32_t bswap_32(uint32_t x){
+static av_always_inline uint32_t bswap_32(uint32_t x){
     uint32_t t;
     __asm__ (
       "eor %1, %0, %0, ror #16 \n\t"
@@ -126,7 +126,7 @@ static always_inline uint32_t bswap_32(uint32_t x){
     return x;
 }
 #else
-static always_inline uint32_t bswap_32(uint32_t x){
+static av_always_inline uint32_t bswap_32(uint32_t x){
     x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
     return (x>>16) | (x<<16);
 }
diff --git a/src/libffmpeg/libavutil/common.h b/src/libffmpeg/libavutil/common.h
index d167404b6..0e093616c 100644
--- a/src/libffmpeg/libavutil/common.h
+++ b/src/libffmpeg/libavutil/common.h
@@ -26,9 +26,7 @@
 #ifndef COMMON_H
 #define COMMON_H
 
-#ifndef M_PI
-#define M_PI    3.14159265358979323846
-#endif
+#include <inttypes.h>
 
 #ifdef HAVE_AV_CONFIG_H
 /* only include the following when compiling package */
@@ -47,34 +45,17 @@
 #    include <math.h>
 #endif /* HAVE_AV_CONFIG_H */
 
-/* Suppress restrict if it was not defined in config.h.  */
-#ifndef restrict
-#    define restrict
-#endif
-
-#ifndef always_inline
-#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
-#    define always_inline __attribute__((always_inline)) inline
-#else
-#    define always_inline inline
-#endif
-#endif
-
-#ifndef attribute_used
+#ifndef av_always_inline
 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
-#    define attribute_used __attribute__((used))
+#    define av_always_inline __attribute__((always_inline)) inline
 #else
-#    define attribute_used
+#    define av_always_inline inline
 #endif
 #endif
 
-#ifndef attribute_unused
-#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
-#    define attribute_unused __attribute__((unused))
-#else
-#    define attribute_unused
-#endif
-#endif
+#ifdef HAVE_AV_CONFIG_H
+#    include "internal.h"
+#endif /* HAVE_AV_CONFIG_H */
 
 #ifndef attribute_deprecated
 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
@@ -84,91 +65,9 @@
 #endif
 #endif
 
-#   include <inttypes.h>
-
-#ifndef PRId64
-#define PRId64 "lld"
-#endif
-
-#ifndef PRIu64
-#define PRIu64 "llu"
-#endif
-
-#ifndef PRIx64
-#define PRIx64 "llx"
-#endif
-
-#ifndef PRIX64
-#define PRIX64 "llX"
-#endif
-
-#ifndef PRId32
-#define PRId32 "d"
-#endif
-
-#ifndef PRIdFAST16
-#define PRIdFAST16 PRId32
-#endif
-
-#ifndef PRIdFAST32
-#define PRIdFAST32 PRId32
-#endif
-
-#ifndef INT16_MIN
-#define INT16_MIN       (-0x7fff-1)
-#endif
-
-#ifndef INT16_MAX
-#define INT16_MAX       0x7fff
-#endif
-
-#ifndef INT32_MIN
-#define INT32_MIN       (-0x7fffffff-1)
-#endif
-
-#ifndef INT32_MAX
-#define INT32_MAX       0x7fffffff
-#endif
-
-#ifndef UINT32_MAX
-#define UINT32_MAX      0xffffffff
-#endif
-
-#ifndef INT64_MIN
-#define INT64_MIN       (-0x7fffffffffffffffLL-1)
-#endif
-
-#ifndef INT64_MAX
-#define INT64_MAX int64_t_C(9223372036854775807)
-#endif
-
-#ifndef UINT64_MAX
-#define UINT64_MAX uint64_t_C(0xFFFFFFFFFFFFFFFF)
-#endif
-
-#ifndef INT_BIT
-#    if INT_MAX != 2147483647
-#        define INT_BIT 64
-#    else
-#        define INT_BIT 32
-#    endif
-#endif
-
-#ifndef int64_t_C
-#define int64_t_C(c)     (c ## LL)
-#define uint64_t_C(c)    (c ## ULL)
-#endif
-
-#if defined(__MINGW32__) && !defined(BUILD_AVUTIL) && defined(BUILD_SHARED_AV)
-#  define FF_IMPORT_ATTR __declspec(dllimport)
-#else
-#  define FF_IMPORT_ATTR
-#endif
-
-
-#ifdef HAVE_AV_CONFIG_H
-/* only include the following when compiling package */
-#    include "internal.h"
+#ifndef INT64_C
+#define INT64_C(c)     (c ## LL)
+#define UINT64_C(c)    (c ## ULL)
 #endif
 
 //rounded divison & shift
@@ -184,7 +83,7 @@
 #define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0)
 
 /* misc math functions */
-extern FF_IMPORT_ATTR const uint8_t ff_log2_tab[256];
+extern const uint8_t ff_log2_tab[256];
 
 static inline int av_log2(unsigned int v)
 {
@@ -375,7 +274,7 @@ static inline uint64_t read_time(void)
         );
         return (d << 32) | (a & 0xffffffff);
 }
-#elif defined(ARCH_X86)
+#elif defined(ARCH_X86_32)
 static inline long long read_time(void)
 {
         long long l;
@@ -465,4 +364,8 @@ void av_freep(void *ptr);
 # define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t"
 #endif
 
+/* xine: another config.h with codecs to use */
+#include "ffmpeg_config.h"
+
 #endif /* COMMON_H */
+
diff --git a/src/libffmpeg/libavutil/internal.h b/src/libffmpeg/libavutil/internal.h
index 7d850141b..0c4b44170 100644
--- a/src/libffmpeg/libavutil/internal.h
+++ b/src/libffmpeg/libavutil/internal.h
@@ -26,6 +26,94 @@
 #ifndef INTERNAL_H
 #define INTERNAL_H
 
+#ifndef attribute_used
+#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
+#    define attribute_used __attribute__((used))
+#else
+#    define attribute_used
+#endif
+#endif
+
+#ifndef attribute_unused
+#if defined(__GNUC__)
+#    define attribute_unused __attribute__((unused))
+#else
+#    define attribute_unused
+#endif
+#endif
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846
+#endif
+
+#ifndef PRId64
+#define PRId64 "lld"
+#endif
+
+#ifndef PRIu64
+#define PRIu64 "llu"
+#endif
+
+#ifndef PRIx64
+#define PRIx64 "llx"
+#endif
+
+#ifndef PRIX64
+#define PRIX64 "llX"
+#endif
+
+#ifndef PRId32
+#define PRId32 "d"
+#endif
+
+#ifndef PRIdFAST16
+#define PRIdFAST16 PRId32
+#endif
+
+#ifndef PRIdFAST32
+#define PRIdFAST32 PRId32
+#endif
+
+#ifndef INT16_MIN
+#define INT16_MIN       (-0x7fff-1)
+#endif
+
+#ifndef INT16_MAX
+#define INT16_MAX       0x7fff
+#endif
+
+#ifndef INT32_MIN
+#define INT32_MIN       (-0x7fffffff-1)
+#endif
+
+#ifndef INT32_MAX
+#define INT32_MAX       0x7fffffff
+#endif
+
+#ifndef UINT32_MAX
+#define UINT32_MAX      0xffffffff
+#endif
+
+#ifndef INT64_MIN
+#define INT64_MIN       (-0x7fffffffffffffffLL-1)
+#endif
+
+#ifndef INT64_MAX
+#define INT64_MAX INT64_C(9223372036854775807)
+#endif
+
+#ifndef UINT64_MAX
+#define UINT64_MAX UINT64_C(0xFFFFFFFFFFFFFFFF)
+#endif
+
+#ifndef INT_BIT
+#    if INT_MAX != 2147483647
+#        define INT_BIT 64
+#    else
+#        define INT_BIT 32
+#    endif
+#endif
+
 #if ( defined(__PIC__) || defined(__pic__) ) && ! defined(PIC)
 #    define PIC
 #endif
@@ -34,6 +122,7 @@
 #    define ENODATA  61
 #endif
 
+#include "intreadwrite.h"
 #include "bswap.h"
 
 #include <stddef.h>
@@ -136,7 +225,7 @@ extern const uint32_t ff_inverse[256];
 #    define FASTDIV(a,b)   ((a)/(b))
 #endif
 
-extern FF_IMPORT_ATTR const uint8_t ff_sqrt_tab[128];
+extern const uint8_t ff_sqrt_tab[128];
 
 static inline int ff_sqrt(int a)
 {
@@ -216,7 +305,7 @@ if((y)<(x)){\
 /* XXX: add ISOC specific test to avoid specific BSD testing. */
 /* better than nothing implementation. */
 /* btw, rintf() is existing on fbsd too -- alex */
-static always_inline long int lrintf(float x)
+static av_always_inline long int lrintf(float x)
 {
 #ifdef __MINGW32__
 #  ifdef ARCH_X86_32
diff --git a/src/libffmpeg/libavutil/intreadwrite.h b/src/libffmpeg/libavutil/intreadwrite.h
new file mode 100644
index 000000000..c43f9d651
--- /dev/null
+++ b/src/libffmpeg/libavutil/intreadwrite.h
@@ -0,0 +1,42 @@
+#ifndef INTREADWRITE_H
+#define INTREADWRITE_H
+
+#ifdef __GNUC__
+
+struct unaligned_64 { uint64_t l; } __attribute__((packed));
+struct unaligned_32 { uint32_t l; } __attribute__((packed));
+struct unaligned_16 { uint16_t l; } __attribute__((packed));
+
+#define LD16(a) (((const struct unaligned_16 *) (a))->l)
+#define LD32(a) (((const struct unaligned_32 *) (a))->l)
+#define LD64(a) (((const struct unaligned_64 *) (a))->l)
+
+#define ST16(a, b) (((struct unaligned_16 *) (a))->l) = (b)
+#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b)
+
+#else /* __GNUC__ */
+
+#define LD16(a) (*((uint16_t*)(a)))
+#define LD32(a) (*((uint32_t*)(a)))
+#define LD64(a) (*((uint64_t*)(a)))
+
+#define ST16(a, b) *((uint16_t*)(a)) = (b)
+#define ST32(a, b) *((uint32_t*)(a)) = (b)
+
+#endif /* !__GNUC__ */
+
+/* endian macros */
+#if !defined(BE_16) || !defined(BE_32) || !defined(LE_16) || !defined(LE_32)
+#define BE_16(x)  ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1])
+#define BE_32(x)  ((((uint8_t*)(x))[0] << 24) | \
+                   (((uint8_t*)(x))[1] << 16) | \
+                   (((uint8_t*)(x))[2] << 8) | \
+                    ((uint8_t*)(x))[3])
+#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
+#define LE_32(x)  ((((uint8_t*)(x))[3] << 24) | \
+                   (((uint8_t*)(x))[2] << 16) | \
+                   (((uint8_t*)(x))[1] << 8) | \
+                    ((uint8_t*)(x))[0])
+#endif
+
+#endif /* INTREADWRITE_H */
diff --git a/src/libffmpeg/libavutil/rational.c b/src/libffmpeg/libavutil/rational.c
index 0e018c41b..0480aa882 100644
--- a/src/libffmpeg/libavutil/rational.c
+++ b/src/libffmpeg/libavutil/rational.c
@@ -38,8 +38,10 @@ int av_reduce(int *dst_nom, int *dst_den, int64_t nom, int64_t den, int64_t max)
     int sign= (nom<0) ^ (den<0);
     int64_t gcd= ff_gcd(FFABS(nom), FFABS(den));
 
-    nom = FFABS(nom)/gcd;
-    den = FFABS(den)/gcd;
+    if(gcd){
+        nom = FFABS(nom)/gcd;
+        den = FFABS(den)/gcd;
+    }
     if(nom<=max && den<=max){
         a1= (AVRational){nom, den};
         den=0;
@@ -65,7 +67,7 @@ int av_reduce(int *dst_nom, int *dst_den, int64_t nom, int64_t den, int64_t max)
         nom= den;
         den= next_den;
     }
-    assert(ff_gcd(a1.num, a1.den) == 1);
+    assert(ff_gcd(a1.num, a1.den) <= 1U);
 
     *dst_nom = sign ? -a1.num : a1.num;
     *dst_den = a1.den;
diff --git a/src/libffmpeg/video_decoder.c b/src/libffmpeg/video_decoder.c
index ad2bc99b4..b019d52d3 100644
--- a/src/libffmpeg/video_decoder.c
+++ b/src/libffmpeg/video_decoder.c
@@ -17,7 +17,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
  *
- * $Id: video_decoder.c,v 1.64 2006/12/02 21:06:18 miguelfreitas Exp $
+ * $Id: video_decoder.c,v 1.65 2007/01/13 21:19:52 miguelfreitas Exp $
  *
  * xine video decoder plugin using ffmpeg
  *
@@ -25,6 +25,7 @@
  
 #ifdef HAVE_CONFIG_H
 #include "config.h"
+#include "ffmpeg_config.h"
 #endif
 
 #include <stdlib.h>
@@ -116,6 +117,8 @@ struct ff_video_decoder_s {
   int               is_direct_rendering_disabled;
 
   AVPaletteControl  palette_control;
+  
+  xine_list_t       *dr1_frames;
 };
 
 
@@ -203,16 +206,25 @@ static int get_buffer(AVCodecContext *context, AVFrame *av_frame){
 
   av_frame->type= FF_BUFFER_TYPE_USER;
 
+  xine_list_push_back(this->dr1_frames, av_frame);
+
   return 0;
 }
 
 static void release_buffer(struct AVCodecContext *context, AVFrame *av_frame){
+  ff_video_decoder_t *this = (ff_video_decoder_t *)context->opaque;
 
   if (av_frame->type == FF_BUFFER_TYPE_USER) {
     vo_frame_t *img = (vo_frame_t *)av_frame->opaque;
+    xine_list_iterator_t it;
     
     assert(av_frame->opaque);  
     img->free(img);
+    
+    it = xine_list_find(this->dr1_frames, av_frame);
+    assert(it);
+    if( it != NULL )
+      xine_list_remove(this->dr1_frames, it);
   } else {
     avcodec_default_release_buffer(context, av_frame);
   }
@@ -249,6 +261,8 @@ static const ff_codec_t ff_video_lookup[] = {
   {BUF_VIDEO_DV,          CODEC_ID_DVVIDEO,   "DV (ffmpeg)"},
   {BUF_VIDEO_HUFFYUV,     CODEC_ID_HUFFYUV,   "HuffYUV (ffmpeg)"},
   {BUF_VIDEO_VP31,        CODEC_ID_VP3,       "On2 VP3.1 (ffmpeg)"},
+  {BUF_VIDEO_VP5,         CODEC_ID_VP5,       "On2 VP5 (ffmpeg)"},
+  {BUF_VIDEO_VP6,         CODEC_ID_VP6,       "On2 VP6 (ffmpeg)"},
   {BUF_VIDEO_4XM,         CODEC_ID_4XM,       "4X Video (ffmpeg)"},
   {BUF_VIDEO_CINEPAK,     CODEC_ID_CINEPAK,   "Cinepak (ffmpeg)"},
   {BUF_VIDEO_MSVC,        CODEC_ID_MSVIDEO1,  "Microsoft Video 1 (ffmpeg)"},
@@ -376,7 +390,7 @@ static void init_video_codec (ff_video_decoder_t *this, unsigned int codec_type)
   /* enable direct rendering by default */
   this->output_format = XINE_IMGFMT_YV12;
 #ifdef ENABLE_DIRECT_RENDERING
-  if( this->codec->capabilities & CODEC_CAP_DR1 ) {
+  if( this->codec->capabilities & CODEC_CAP_DR1 && this->codec->id != CODEC_ID_H264 ) {
     this->context->get_buffer = get_buffer;
     this->context->release_buffer = release_buffer;
     xprintf(this->stream->xine, XINE_VERBOSITY_LOG, 
@@ -801,7 +815,7 @@ static void ff_check_bufsize (ff_video_decoder_t *this, int size) {
     xprintf(this->stream->xine, XINE_VERBOSITY_LOG, 
 	    _("ffmpeg_video_dec: increasing buffer to %d to avoid overflow.\n"), 
 	    this->bufsize);
-    this->buf = realloc(this->buf, this->bufsize);
+    this->buf = realloc(this->buf, this->bufsize + FF_INPUT_BUFFER_PADDING_SIZE );
   }
 }
 
@@ -826,7 +840,7 @@ static void ff_handle_header_buffer (ff_video_decoder_t *this, buf_element_t *bu
   lprintf ("header buffer\n");
 
   /* accumulate data */
-  ff_check_bufsize(this, this->size + buf->size + FF_INPUT_BUFFER_PADDING_SIZE);
+  ff_check_bufsize(this, this->size + buf->size);
   xine_fast_memcpy (&this->buf[this->size], buf->content, buf->size);
   this->size += buf->size;
 
@@ -1102,7 +1116,7 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) {
       lprintf("no memcpy needed to accumulate data\n");
     } else {
       /* copy data into our internal buffer */
-      ff_check_bufsize(this, this->size + buf->size + FF_INPUT_BUFFER_PADDING_SIZE);
+      ff_check_bufsize(this, this->size + buf->size);
       chunk_buf = this->buf; /* ff_check_bufsize might realloc this->buf */
 
       xine_fast_memcpy (&this->buf[this->size], buf->content, buf->size);
@@ -1122,7 +1136,13 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) {
     int         codec_type = buf->type & 0xFFFF0000;
 
     /* pad input data */
-    chunk_buf[this->size] = 0;
+    /* note: bitstream, alt bitstream reader or something will cause
+     * severe mpeg4 artifacts if padding is less than 32 bits.
+     */
+    chunk_buf[this->size+0] = 0;
+    chunk_buf[this->size+1] = 0;
+    chunk_buf[this->size+2] = 0;
+    chunk_buf[this->size+3] = 0;
 
     while (this->size > 0) {
       
@@ -1150,7 +1170,7 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) {
           this->size -= len;
 
           if (this->size > 0) {
-            ff_check_bufsize(this, this->size + FF_INPUT_BUFFER_PADDING_SIZE);
+            ff_check_bufsize(this, this->size);
             memmove (this->buf, &chunk_buf[offset], this->size);
             chunk_buf = this->buf;
           }
@@ -1256,7 +1276,7 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) {
         img->crop_bottom = this->crop_bottom;
         
         this->skipframes = img->draw(img, this->stream);
-
+        
         if(free_img)
           img->free(img);
       }
@@ -1360,12 +1380,23 @@ static void ff_dispose (video_decoder_t *this_gen) {
   ff_video_decoder_t *this = (ff_video_decoder_t *) this_gen;
 
   lprintf ("ff_dispose\n");
-
+  
   if (this->decoder_ok) {
+    xine_list_iterator_t it;
+    AVFrame *av_frame;
+        
     pthread_mutex_lock(&ffmpeg_lock);
     avcodec_close (this->context);
     pthread_mutex_unlock(&ffmpeg_lock);
-
+    
+    /* frame garbage collector here - workaround for buggy ffmpeg codecs that
+     * don't release their DR1 frames */
+    while( (it = xine_list_front(this->dr1_frames)) != NULL )
+    {
+      av_frame = (AVFrame *)xine_list_get_value(this->dr1_frames, it);
+      release_buffer(this->context, av_frame);
+    }
+    
     this->stream->video_out->close(this->stream->video_out, this->stream);
     this->decoder_ok = 0;
   }
@@ -1394,6 +1425,8 @@ static void ff_dispose (video_decoder_t *this_gen) {
     
   if(this->pp_mode)
     pp_free_mode(this->pp_mode);
+    
+  xine_list_delete(this->dr1_frames);
   
   free (this_gen);
 }
@@ -1433,6 +1466,8 @@ static video_decoder_t *ff_video_open_plugin (video_decoder_class_t *class_gen,
   this->pp_context        = NULL;
   this->pp_mode           = NULL;
   
+  this->dr1_frames        = xine_list_new();
+  
   mpeg_parser_init(&this->mpeg_parser);
 
   return &this->video_decoder;
@@ -1483,73 +1518,223 @@ void *init_video_plugin (xine_t *xine, void *data) {
 }
 
 static uint32_t supported_video_types[] = { 
-  BUF_VIDEO_MSMPEG4_V1, 
+  #ifdef CONFIG_MSMPEG4V1_DECODER
+  BUF_VIDEO_MSMPEG4_V1,
+  #endif
+  #ifdef CONFIG_MSMPEG4V2_DECODER
   BUF_VIDEO_MSMPEG4_V2,
-  BUF_VIDEO_MSMPEG4_V3, 
-  BUF_VIDEO_WMV7, 
+  #endif
+  #ifdef CONFIG_MSMPEG4V3_DECODER
+  BUF_VIDEO_MSMPEG4_V3,
+  #endif
+  #ifdef CONFIG_WMV1_DECODER
+  BUF_VIDEO_WMV7,
+  #endif
+  #ifdef CONFIG_WMV2_DECODER
+  BUF_VIDEO_WMV8,
+  #endif
+  #ifdef CONFIG_WMV3_DECODER
+  BUF_VIDEO_WMV9,
+  #endif
+  #ifdef CONFIG_MPEG4_DECODER
   BUF_VIDEO_MPEG4,
-  BUF_VIDEO_XVID, 
-  BUF_VIDEO_DIVX5, 
+  #endif
+  #ifdef CONFIG_MPEG4_DECODER
+  BUF_VIDEO_XVID,
+  #endif
+  #ifdef CONFIG_MPEG4_DECODER
+  BUF_VIDEO_DIVX5,
+  #endif
+  #ifdef CONFIG_MPEG4_DECODER
   BUF_VIDEO_3IVX,
+  #endif
+  #ifdef CONFIG_MJPEG_DECODER
+  BUF_VIDEO_JPEG,
+  #endif
+  #ifdef CONFIG_MJPEG_DECODER
   BUF_VIDEO_MJPEG,
+  #endif
+  #ifdef CONFIG_MJPEGB_DECODER
   BUF_VIDEO_MJPEG_B,
+  #endif
+  #ifdef CONFIG_H263I_DECODER
+  BUF_VIDEO_I263,
+  #endif
+  #ifdef CONFIG_H263_DECODER
   BUF_VIDEO_H263,
+  #endif
+  #ifdef CONFIG_RV10_DECODER
   BUF_VIDEO_RV10,
+  #endif
+  #ifdef CONFIG_RV20_DECODER
   BUF_VIDEO_RV20,
+  #endif
+  #ifdef CONFIG_INDEO3_DECODER
   BUF_VIDEO_IV31,
+  #endif
+  #ifdef CONFIG_INDEO3_DECODER
   BUF_VIDEO_IV32,
+  #endif
+  #ifdef CONFIG_SVQ1_DECODER
   BUF_VIDEO_SORENSON_V1,
+  #endif
+  #ifdef CONFIG_SVQ3_DECODER
   BUF_VIDEO_SORENSON_V3,
-  BUF_VIDEO_JPEG, 
-  BUF_VIDEO_MPEG, 
+  #endif
+  #ifdef CONFIG_DVVIDEO_DECODER
   BUF_VIDEO_DV,
+  #endif
+  #ifdef CONFIG_HUFFYUV_DECODER
   BUF_VIDEO_HUFFYUV,
+  #endif
+  #ifdef CONFIG_VP3_DECODER
   BUF_VIDEO_VP31,
+  #endif
+  #ifdef CONFIG_VP5_DECODER
+  BUF_VIDEO_VP5,
+  #endif
+  #ifdef CONFIG_VP6_DECODER
+  BUF_VIDEO_VP6,
+  #endif
+  #ifdef CONFIG_4XM_DECODER
   BUF_VIDEO_4XM,
+  #endif
+  #ifdef CONFIG_CINEPAK_DECODER
   BUF_VIDEO_CINEPAK,
+  #endif
+  #ifdef CONFIG_MSVIDEO1_DECODER
   BUF_VIDEO_MSVC,
+  #endif
+  #ifdef CONFIG_MSRLE_DECODER
   BUF_VIDEO_MSRLE,
+  #endif
+  #ifdef CONFIG_RPZA_DECODER
   BUF_VIDEO_RPZA,
+  #endif
+  #ifdef CONFIG_CYUV_DECODER
   BUF_VIDEO_CYUV,
+  #endif
+  #ifdef CONFIG_ROQ_DECODER
   BUF_VIDEO_ROQ,
+  #endif
+  #ifdef CONFIG_IDCIN_DECODER
   BUF_VIDEO_IDCIN,
+  #endif
+  #ifdef CONFIG_XAN_WC3_DECODER
   BUF_VIDEO_WC3,
+  #endif
+  #ifdef CONFIG_WS_VQA_DECODER
   BUF_VIDEO_VQA,
+  #endif
+  #ifdef CONFIG_INTERPLAY_VIDEO_DECODER
   BUF_VIDEO_INTERPLAY,
+  #endif
+  #ifdef CONFIG_FLIC_DECODER
   BUF_VIDEO_FLI,
+  #endif
+  #ifdef CONFIG_8BPS_DECODER
   BUF_VIDEO_8BPS,
+  #endif
+  #ifdef CONFIG_SMC_DECODER
   BUF_VIDEO_SMC,
-  BUF_VIDEO_VMD,
+  #endif
+  #ifdef CONFIG_TRUEMOTION1_DECODER
   BUF_VIDEO_DUCKTM1,
+  #endif
+  #ifdef CONFIG_TRUEMOTION2_DECODER
   BUF_VIDEO_DUCKTM2,
+  #endif
+  #ifdef CONFIG_VMDVIDEO_DECODER
+  BUF_VIDEO_VMD,
+  #endif
+  #ifdef CONFIG_ZLIB_DECODER
   BUF_VIDEO_ZLIB,
+  #endif
+  #ifdef CONFIG_MSZH_DECODER
   BUF_VIDEO_MSZH,
+  #endif
+  #ifdef CONFIG_ASV1_DECODER
   BUF_VIDEO_ASV1,
+  #endif
+  #ifdef CONFIG_ASV2_DECODER
   BUF_VIDEO_ASV2,
+  #endif
+  #ifdef CONFIG_VCR1_DECODER
   BUF_VIDEO_ATIVCR1,
+  #endif
+  #ifdef CONFIG_FLV1_DECODER
   BUF_VIDEO_FLV1,
+  #endif
+  #ifdef CONFIG_QTRLE_DECODER
   BUF_VIDEO_QTRLE,
+  #endif
+  #ifdef CONFIG_H264_DECODER
   BUF_VIDEO_H264,
+  #endif
+  #ifdef CONFIG_H261_DECODER
   BUF_VIDEO_H261,
+  #endif
+  #ifdef CONFIG_AASC_DECODER
   BUF_VIDEO_AASC,
+  #endif
+  #ifdef CONFIG_LOCO_DECODER
   BUF_VIDEO_LOCO,
+  #endif
+  #ifdef CONFIG_QDRAW_DECODER
   BUF_VIDEO_QDRW,
+  #endif
+  #ifdef CONFIG_QPEG_DECODER
   BUF_VIDEO_QPEG,
+  #endif
+  #ifdef CONFIG_TSCC_DECODER
   BUF_VIDEO_TSCC,
+  #endif
+  #ifdef CONFIG_ULTI_DECODER
   BUF_VIDEO_ULTI,
+  #endif
+  #ifdef CONFIG_WNV1_DECODER
   BUF_VIDEO_WNV1,
+  #endif
+  #ifdef CONFIG_VIXL_DECODER
   BUF_VIDEO_XL,
+  #endif
+  #ifdef CONFIG_INDEO2_DECODER
   BUF_VIDEO_RT21,
+  #endif
+  #ifdef CONFIG_FRAPS_DECODER
   BUF_VIDEO_FPS1,
+  #endif
+  #ifdef CONFIG_MPEG1VIDEO_DECODER
+  BUF_VIDEO_MPEG,
+  #endif
+  #ifdef CONFIG_CSCD_DECODER
   BUF_VIDEO_CSCD,
+  #endif
+  #ifdef CONFIG_AVS_DECODER
+  BUF_VIDEO_AVS,
+  #endif
+  #ifdef CONFIG_MMVIDEO_DECODER
   BUF_VIDEO_ALGMM,
+  #endif
+  #ifdef CONFIG_ZMBV_DECODER
   BUF_VIDEO_ZMBV,
-  BUF_VIDEO_AVS,
+  #endif
+  #ifdef CONFIG_SMACKVIDEO_DECODER
   BUF_VIDEO_SMACKER,
+  #endif
+  #ifdef CONFIG_NUV_DECODER
   BUF_VIDEO_NUV,
+  #endif
+  #ifdef CONFIG_KMVC_DECODER
   BUF_VIDEO_KMVC,
+  #endif
+  #ifdef CONFIG_FLASHSV_DECODER
   BUF_VIDEO_FLASHSV,
+  #endif
+  #ifdef CONFIG_CAVS_DECODER
   BUF_VIDEO_CAVS,
+  #endif
+
   0 
 };
 
diff --git a/src/libffmpeg/xine_decoder.c b/src/libffmpeg/xine_decoder.c
index 02d19cc1a..2eeb9746b 100644
--- a/src/libffmpeg/xine_decoder.c
+++ b/src/libffmpeg/xine_decoder.c
@@ -17,7 +17,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
  *
- * $Id: xine_decoder.c,v 1.172 2006/12/04 22:25:13 miguelfreitas Exp $
+ * $Id: xine_decoder.c,v 1.173 2007/01/13 21:19:52 miguelfreitas Exp $
  *
  * xine decoder plugin using ffmpeg
  *
@@ -25,6 +25,7 @@
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
+#include "ffmpeg_config.h"
 #endif
 
 #include "xine_internal.h"
@@ -39,114 +40,273 @@ pthread_once_t once_control = PTHREAD_ONCE_INIT;
 pthread_mutex_t ffmpeg_lock;
 
 #ifndef HAVE_FFMPEG
+
+#define REGISTER_ENCODER(X,x) \
+          if(ENABLE_##X##_ENCODER)  register_avcodec(&x##_encoder)
+#define REGISTER_DECODER(X,x) \
+          if(ENABLE_##X##_DECODER)  register_avcodec(&x##_decoder)
+#define REGISTER_ENCDEC(X,x)  REGISTER_ENCODER(X,x); REGISTER_DECODER(X,x)
+
+#define REGISTER_PARSER(X,x) \
+          if(ENABLE_##X##_PARSER)  av_register_codec_parser(&x##_parser)
+
+/* If you do not call this function, then you can select exactly which
+   formats you want to support */
+
+/**
+ * simple call to register all the codecs.
+ */
 void avcodec_register_all(void)
 {
     static int inited = 0;
-    
+
     if (inited != 0)
-	return;
+        return;
     inited = 1;
 
-    /* decoders */
-    register_avcodec(&h263_decoder);
-    register_avcodec(&mpeg4_decoder);
-    register_avcodec(&msmpeg4v1_decoder);
-    register_avcodec(&msmpeg4v2_decoder);
-    register_avcodec(&msmpeg4v3_decoder);
-    register_avcodec(&wmv1_decoder);
-    register_avcodec(&wmv2_decoder);
-    register_avcodec(&h263i_decoder);
-    register_avcodec(&rv10_decoder);
-    register_avcodec(&rv20_decoder);
-    register_avcodec(&svq1_decoder);
-    register_avcodec(&svq3_decoder);
-    register_avcodec(&wmav1_decoder);
-    register_avcodec(&wmav2_decoder);
-    register_avcodec(&indeo3_decoder);
-    register_avcodec(&mpeg1video_decoder);
-    register_avcodec(&dvvideo_decoder);
-    register_avcodec(&pcm_s16le_decoder);
-    register_avcodec(&mjpeg_decoder);
-    register_avcodec(&mjpegb_decoder);
-    register_avcodec(&mp2_decoder);
-    register_avcodec(&mp3_decoder);
-    register_avcodec(&mace3_decoder);
-    register_avcodec(&mace6_decoder);
-    register_avcodec(&huffyuv_decoder);
-    register_avcodec(&cyuv_decoder);
-    register_avcodec(&h264_decoder);
-    register_avcodec(&vp3_decoder);
-    register_avcodec(&fourxm_decoder);
-    register_avcodec(&ra_144_decoder);
-    register_avcodec(&ra_288_decoder);
-    register_avcodec(&adpcm_ms_decoder);
-    register_avcodec(&adpcm_ima_qt_decoder);
-    register_avcodec(&adpcm_ima_wav_decoder);
-    register_avcodec(&adpcm_ima_dk3_decoder);
-    register_avcodec(&adpcm_ima_dk4_decoder);
-    register_avcodec(&adpcm_ima_ws_decoder);
-    register_avcodec(&adpcm_ima_smjpeg_decoder);
-    register_avcodec(&adpcm_xa_decoder);
-    register_avcodec(&adpcm_4xm_decoder);
-    register_avcodec(&adpcm_ea_decoder);
-    register_avcodec(&pcm_alaw_decoder);
-    register_avcodec(&pcm_mulaw_decoder);
-    register_avcodec(&roq_dpcm_decoder);
-    register_avcodec(&interplay_dpcm_decoder);
-    register_avcodec(&cinepak_decoder);
-    register_avcodec(&msvideo1_decoder);
-    register_avcodec(&msrle_decoder);
-    register_avcodec(&rpza_decoder);
-    register_avcodec(&roq_decoder);
-    register_avcodec(&idcin_decoder);
-    register_avcodec(&xan_wc3_decoder);
-    register_avcodec(&vqa_decoder);
-    register_avcodec(&interplay_video_decoder);
-    register_avcodec(&flic_decoder);
-    register_avcodec(&smc_decoder);
-    register_avcodec(&eightbps_decoder);
-    register_avcodec(&vmdvideo_decoder);
-    register_avcodec(&vmdaudio_decoder);
-    register_avcodec(&truemotion1_decoder);
-    //register_avcodec(&mszh_decoder);
-    //register_avcodec(&zlib_decoder);
-    register_avcodec(&xan_dpcm_decoder);
-    register_avcodec(&asv1_decoder);
-    register_avcodec(&asv2_decoder);
-    register_avcodec(&vcr1_decoder);
-    register_avcodec(&flv_decoder);
-    register_avcodec(&qtrle_decoder);
-    register_avcodec(&flac_decoder);
-    register_avcodec(&aasc_decoder);
-    register_avcodec(&alac_decoder);
-    register_avcodec(&h261_decoder);
-    register_avcodec(&loco_decoder);
-    register_avcodec(&qdraw_decoder);
-    register_avcodec(&qpeg_decoder);
-    register_avcodec(&tscc_decoder);
-    register_avcodec(&ulti_decoder);
-    register_avcodec(&wnv1_decoder);
-    register_avcodec(&xl_decoder);
-    register_avcodec(&indeo2_decoder);
-    register_avcodec(&fraps_decoder);
-    register_avcodec(&shorten_decoder);
-    register_avcodec(&qdm2_decoder);
-    register_avcodec(&truemotion2_decoder);
-    register_avcodec(&wmv3_decoder);
-    register_avcodec(&cscd_decoder);
-    register_avcodec(&mmvideo_decoder);
-    register_avcodec(&zmbv_decoder);
-    register_avcodec(&avs_decoder);
-    register_avcodec(&smacker_decoder);
-    register_avcodec(&smackaud_decoder);
-    register_avcodec(&nuv_decoder);
-    register_avcodec(&kmvc_decoder);
-    register_avcodec(&flashsv_decoder);
-    //register_avcodec(&cavs_decoder);
-    register_avcodec(&cook_decoder);
-    register_avcodec(&truespeech_decoder);
-    register_avcodec(&tta_decoder);
+    /* video codecs */
+    REGISTER_DECODER(AASC, aasc);
+    REGISTER_ENCDEC (ASV1, asv1);
+    REGISTER_ENCDEC (ASV2, asv2);
+    REGISTER_DECODER(AVS, avs);
+    REGISTER_DECODER(BMP, bmp);
+    REGISTER_DECODER(CAVS, cavs);
+    REGISTER_DECODER(CINEPAK, cinepak);
+    REGISTER_DECODER(CLJR, cljr);
+    REGISTER_DECODER(CSCD, cscd);
+    REGISTER_DECODER(CYUV, cyuv);
+    REGISTER_DECODER(DSICINVIDEO, dsicinvideo);
+    REGISTER_ENCDEC (DVVIDEO, dvvideo);
+    REGISTER_DECODER(EIGHTBPS, eightbps);
+    REGISTER_ENCDEC (FFV1, ffv1);
+    REGISTER_ENCDEC (FFVHUFF, ffvhuff);
+    REGISTER_DECODER(FLASHSV, flashsv);
+    REGISTER_DECODER(FLIC, flic);
+    REGISTER_ENCDEC (FLV, flv);
+    REGISTER_DECODER(FOURXM, fourxm);
+    REGISTER_DECODER(FRAPS, fraps);
+    REGISTER_ENCDEC (GIF, gif);
+    REGISTER_ENCDEC (H261, h261);
+    REGISTER_ENCDEC (H263, h263);
+    REGISTER_DECODER(H263I, h263i);
+    REGISTER_ENCODER(H263P, h263p);
+    REGISTER_DECODER(H264, h264);
+    REGISTER_ENCDEC (HUFFYUV, huffyuv);
+    REGISTER_DECODER(IDCIN, idcin);
+    REGISTER_DECODER(INDEO2, indeo2);
+    REGISTER_DECODER(INDEO3, indeo3);
+    REGISTER_DECODER(INTERPLAY_VIDEO, interplay_video);
+    REGISTER_ENCODER(JPEGLS, jpegls);
+    REGISTER_DECODER(KMVC, kmvc);
+    REGISTER_ENCODER(LJPEG, ljpeg);
+    REGISTER_DECODER(LOCO, loco);
+    REGISTER_DECODER(MDEC, mdec);
+    REGISTER_ENCDEC (MJPEG, mjpeg);
+    REGISTER_DECODER(MJPEGB, mjpegb);
+    REGISTER_DECODER(MMVIDEO, mmvideo);
+#ifdef HAVE_XVMC
+    REGISTER_DECODER(MPEG_XVMC, mpeg_xvmc);
+#endif
+    REGISTER_ENCDEC (MPEG1VIDEO, mpeg1video);
+    REGISTER_ENCDEC (MPEG2VIDEO, mpeg2video);
+    REGISTER_ENCDEC (MPEG4, mpeg4);
+    REGISTER_DECODER(MPEGVIDEO, mpegvideo);
+    REGISTER_ENCDEC (MSMPEG4V1, msmpeg4v1);
+    REGISTER_ENCDEC (MSMPEG4V2, msmpeg4v2);
+    REGISTER_ENCDEC (MSMPEG4V3, msmpeg4v3);
+    REGISTER_DECODER(MSRLE, msrle);
+    REGISTER_DECODER(MSVIDEO1, msvideo1);
+    REGISTER_DECODER(MSZH, mszh);
+    REGISTER_DECODER(NUV, nuv);
+    REGISTER_ENCODER(PAM, pam);
+    REGISTER_ENCODER(PBM, pbm);
+    REGISTER_ENCODER(PGM, pgm);
+    REGISTER_ENCODER(PGMYUV, pgmyuv);
+#ifdef CONFIG_ZLIB
+    REGISTER_ENCDEC (PNG, png);
+#endif
+    REGISTER_ENCODER(PPM, ppm);
+    REGISTER_DECODER(QDRAW, qdraw);
+    REGISTER_DECODER(QPEG, qpeg);
+    REGISTER_DECODER(QTRLE, qtrle);
+    REGISTER_ENCDEC (RAWVIDEO, rawvideo);
+    REGISTER_DECODER(ROQ, roq);
+    REGISTER_DECODER(RPZA, rpza);
+    REGISTER_ENCDEC (RV10, rv10);
+    REGISTER_ENCDEC (RV20, rv20);
+    REGISTER_DECODER(SMACKER, smacker);
+    REGISTER_DECODER(SMC, smc);
+    REGISTER_ENCDEC (SNOW, snow);
+    REGISTER_DECODER(SP5X, sp5x);
+    REGISTER_ENCDEC (SVQ1, svq1);
+    REGISTER_DECODER(SVQ3, svq3);
+    REGISTER_DECODER(TARGA, targa);
+    REGISTER_DECODER(THEORA, theora);
+    REGISTER_DECODER(TIERTEXSEQVIDEO, tiertexseqvideo);
+    REGISTER_DECODER(TIFF, tiff);
+    REGISTER_DECODER(TRUEMOTION1, truemotion1);
+    REGISTER_DECODER(TRUEMOTION2, truemotion2);
+    REGISTER_DECODER(TSCC, tscc);
+    REGISTER_DECODER(ULTI, ulti);
+    REGISTER_DECODER(VC1, vc1);
+    REGISTER_DECODER(VCR1, vcr1);
+    REGISTER_DECODER(VMDVIDEO, vmdvideo);
+    REGISTER_DECODER(VMNC, vmnc);
+    REGISTER_DECODER(VP3, vp3);
+    REGISTER_DECODER(VP5, vp5);
+    REGISTER_DECODER(VP6, vp6);
+    REGISTER_DECODER(VP6F, vp6f);
+    REGISTER_DECODER(VQA, vqa);
+    REGISTER_ENCDEC (WMV1, wmv1);
+    REGISTER_ENCDEC (WMV2, wmv2);
+    REGISTER_DECODER(WMV3, wmv3);
+    REGISTER_DECODER(WNV1, wnv1);
+#ifdef CONFIG_X264
+    REGISTER_ENCODER(X264, x264);
+#endif
+    REGISTER_DECODER(XAN_WC3, xan_wc3);
+    REGISTER_DECODER(XL, xl);
+#ifdef CONFIG_XVID
+    REGISTER_ENCODER(XVID, xvid);
+#endif
+    REGISTER_ENCDEC (ZLIB, zlib);
+#ifdef CONFIG_ZLIB
+    REGISTER_ENCDEC (ZMBV, zmbv);
+#endif
+
+    /* audio codecs */
+#ifdef CONFIG_LIBFAAD
+    REGISTER_DECODER(AAC, aac);
+    REGISTER_DECODER(MPEG4AAC, mpeg4aac);
+#endif
+#ifdef CONFIG_LIBA52
+    REGISTER_DECODER(AC3, ac3);
+#endif
+    REGISTER_ENCODER(AC3, ac3);
+    REGISTER_DECODER(ALAC, alac);
+#if defined(CONFIG_AMR_NB) || defined(CONFIG_AMR_NB_FIXED)
+    REGISTER_ENCDEC (AMR_NB, amr_nb);
+#endif
+#ifdef CONFIG_AMR_WB
+    REGISTER_ENCDEC (AMR_WB, amr_wb);
+#endif
+    REGISTER_DECODER(COOK, cook);
+    REGISTER_DECODER(DSICINAUDIO, dsicinaudio);
+#ifdef CONFIG_LIBDTS
+    REGISTER_DECODER(DTS, dts);
+#endif
+#ifdef CONFIG_LIBFAAC
+    REGISTER_ENCODER(FAAC, faac);
+#endif
+    REGISTER_ENCDEC (FLAC, flac);
+    REGISTER_DECODER(IMC, imc);
+#ifdef CONFIG_LIBGSM
+    REGISTER_ENCDEC (LIBGSM, libgsm);
+#endif
+    REGISTER_DECODER(MACE3, mace3);
+    REGISTER_DECODER(MACE6, mace6);
+    REGISTER_ENCDEC (MP2, mp2);
+    REGISTER_DECODER(MP3, mp3);
+    REGISTER_DECODER(MP3ADU, mp3adu);
+#ifdef CONFIG_LIBMP3LAME
+    REGISTER_ENCODER(MP3LAME, mp3lame);
+#endif
+    REGISTER_DECODER(MP3ON4, mp3on4);
+    REGISTER_DECODER(MPC7, mpc7);
+#ifdef CONFIG_LIBVORBIS
+    if (!ENABLE_VORBIS_ENCODER)  REGISTER_ENCODER(OGGVORBIS, oggvorbis);
+    if (!ENABLE_VORBIS_DECODER)  REGISTER_DECODER(OGGVORBIS, oggvorbis);
+#endif
+    REGISTER_DECODER(QDM2, qdm2);
+    REGISTER_DECODER(RA_144, ra_144);
+    REGISTER_DECODER(RA_288, ra_288);
+    REGISTER_DECODER(SHORTEN, shorten);
+    REGISTER_DECODER(SMACKAUD, smackaud);
+    REGISTER_ENCDEC (SONIC, sonic);
+    REGISTER_ENCODER(SONIC_LS, sonic_ls);
+    REGISTER_DECODER(TRUESPEECH, truespeech);
+    REGISTER_DECODER(TTA, tta);
+    REGISTER_DECODER(VMDAUDIO, vmdaudio);
+    REGISTER_ENCDEC (VORBIS, vorbis);
+    REGISTER_DECODER(WAVPACK, wavpack);
+    REGISTER_DECODER(WMAV1, wmav1);
+    REGISTER_DECODER(WMAV2, wmav2);
+    REGISTER_DECODER(WS_SND1, ws_snd1);
+
+    /* pcm codecs */
+    REGISTER_ENCDEC (PCM_ALAW, pcm_alaw);
+    REGISTER_ENCDEC (PCM_MULAW, pcm_mulaw);
+    REGISTER_ENCDEC (PCM_S8, pcm_s8);
+    REGISTER_ENCDEC (PCM_S16BE, pcm_s16be);
+    REGISTER_ENCDEC (PCM_S16LE, pcm_s16le);
+    REGISTER_ENCDEC (PCM_S24BE, pcm_s24be);
+    REGISTER_ENCDEC (PCM_S24DAUD, pcm_s24daud);
+    REGISTER_ENCDEC (PCM_S24LE, pcm_s24le);
+    REGISTER_ENCDEC (PCM_S32BE, pcm_s32be);
+    REGISTER_ENCDEC (PCM_S32LE, pcm_s32le);
+    REGISTER_ENCDEC (PCM_U8, pcm_u8);
+    REGISTER_ENCDEC (PCM_U16BE, pcm_u16be);
+    REGISTER_ENCDEC (PCM_U16LE, pcm_u16le);
+    REGISTER_ENCDEC (PCM_U24BE, pcm_u24be);
+    REGISTER_ENCDEC (PCM_U24LE, pcm_u24le);
+    REGISTER_ENCDEC (PCM_U32BE, pcm_u32be);
+    REGISTER_ENCDEC (PCM_U32LE, pcm_u32le);
+
+    /* dpcm codecs */
+    REGISTER_DECODER(INTERPLAY_DPCM, interplay_dpcm);
+    REGISTER_DECODER(ROQ_DPCM, roq_dpcm);
+    REGISTER_DECODER(SOL_DPCM, sol_dpcm);
+    REGISTER_DECODER(XAN_DPCM, xan_dpcm);
+
+    /* adpcm codecs */
+    REGISTER_ENCDEC (ADPCM_4XM, adpcm_4xm);
+    REGISTER_ENCDEC (ADPCM_ADX, adpcm_adx);
+    REGISTER_ENCDEC (ADPCM_CT, adpcm_ct);
+    REGISTER_ENCDEC (ADPCM_EA, adpcm_ea);
+    REGISTER_ENCDEC (ADPCM_G726, adpcm_g726);
+    REGISTER_ENCDEC (ADPCM_IMA_DK3, adpcm_ima_dk3);
+    REGISTER_ENCDEC (ADPCM_IMA_DK4, adpcm_ima_dk4);
+    REGISTER_ENCDEC (ADPCM_IMA_QT, adpcm_ima_qt);
+    REGISTER_ENCDEC (ADPCM_IMA_SMJPEG, adpcm_ima_smjpeg);
+    REGISTER_ENCDEC (ADPCM_IMA_WAV, adpcm_ima_wav);
+    REGISTER_ENCDEC (ADPCM_IMA_WS, adpcm_ima_ws);
+    REGISTER_ENCDEC (ADPCM_MS, adpcm_ms);
+    REGISTER_ENCDEC (ADPCM_SBPRO_2, adpcm_sbpro_2);
+    REGISTER_ENCDEC (ADPCM_SBPRO_3, adpcm_sbpro_3);
+    REGISTER_ENCDEC (ADPCM_SBPRO_4, adpcm_sbpro_4);
+    REGISTER_ENCDEC (ADPCM_SWF, adpcm_swf);
+    REGISTER_ENCDEC (ADPCM_XA, adpcm_xa);
+    REGISTER_ENCDEC (ADPCM_YAMAHA, adpcm_yamaha);
+
+    /* subtitles */
+    REGISTER_ENCDEC (DVBSUB, dvbsub);
+    REGISTER_ENCDEC (DVDSUB, dvdsub);
+
+    /* parsers */
+    REGISTER_PARSER (AAC, aac);
+    REGISTER_PARSER (AC3, ac3);
+    REGISTER_PARSER (CAVSVIDEO, cavsvideo);
+    REGISTER_PARSER (DVBSUB, dvbsub);
+    REGISTER_PARSER (DVDSUB, dvdsub);
+    REGISTER_PARSER (H261, h261);
+    REGISTER_PARSER (H263, h263);
+    REGISTER_PARSER (H264, h264);
+    REGISTER_PARSER (MJPEG, mjpeg);
+    REGISTER_PARSER (MPEG4VIDEO, mpeg4video);
+    REGISTER_PARSER (MPEGAUDIO, mpegaudio);
+    REGISTER_PARSER (MPEGVIDEO, mpegvideo);
+    REGISTER_PARSER (PNM, pnm);
+
+    /*
+    av_register_bitstream_filter(&dump_extradata_bsf);
+    av_register_bitstream_filter(&remove_extradata_bsf);
+    av_register_bitstream_filter(&noise_bsf);
+    av_register_bitstream_filter(&mp3_header_compress_bsf);
+    av_register_bitstream_filter(&mp3_header_decompress_bsf);
+    av_register_bitstream_filter(&mjpega_dump_header_bsf);
+    */
 }
+
 #endif
 
 void init_once_routine(void) {