summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMiguel Freitas <miguelfreitas@users.sourceforge.net>2007-01-13 21:19:52 +0000
committerMiguel Freitas <miguelfreitas@users.sourceforge.net>2007-01-13 21:19:52 +0000
commit6e8ff6e5c232de4b8235626af31ab85345120a93 (patch)
tree25930156aa9f4f2014bf6fe3d65c183262626b8d /src
parent2f5905081ee2040537f043fe4afabbb66d26354e (diff)
downloadxine-lib-6e8ff6e5c232de4b8235626af31ab85345120a93.tar.gz
xine-lib-6e8ff6e5c232de4b8235626af31ab85345120a93.tar.bz2
* ffmpeg update to 51.28.0
* Workaround ffmpeg buggy codecs that don't release their DR1 frames. * Fix several segfaults and freezing problem with H264 streams that use a lot of reference frames (eg. 15) * Initial support to enable/disable ffmpeg codecs. Codecs may be disabled in groups by --disable-ffmpeg-uncommon-codecs/--disable-ffmpeg-popular-codecs Think of "uncommon" codecs what people would never want to play with their PDAs (they will save memory by removing them). Note: currently both uncommon/popular codecs are _build_ but disabled. that is, build system still need some improvements to really save memory. warning: non-autoconf guru playing with the build system, likely breakage. CVS patchset: 8499 CVS date: 2007/01/13 21:19:52
Diffstat (limited to 'src')
-rw-r--r--src/libffmpeg/audio_decoder.c90
-rw-r--r--src/libffmpeg/diff_to_ffmpeg_cvs.txt416
-rw-r--r--src/libffmpeg/libavcodec/Makefile.am17
-rw-r--r--src/libffmpeg/libavcodec/armv4l/Makefile.am7
-rw-r--r--src/libffmpeg/libavcodec/armv4l/dsputil_arm_s.S696
-rw-r--r--src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c188
-rw-r--r--src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h1114
-rw-r--r--src/libffmpeg/libavcodec/armv4l/mathops.h49
-rw-r--r--src/libffmpeg/libavcodec/armv4l/mpegvideo_armv5te.c213
-rw-r--r--src/libffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c119
-rw-r--r--src/libffmpeg/libavcodec/armv4l/simple_idct_armv5te.S718
-rw-r--r--src/libffmpeg/libavcodec/avcodec.h30
-rw-r--r--src/libffmpeg/libavcodec/bitstream.h6
-rw-r--r--src/libffmpeg/libavcodec/bytestream.h20
-rw-r--r--src/libffmpeg/libavcodec/cabac.h4
-rw-r--r--src/libffmpeg/libavcodec/cinepak.c36
-rw-r--r--src/libffmpeg/libavcodec/cook.c26
-rw-r--r--src/libffmpeg/libavcodec/cscd.c4
-rw-r--r--src/libffmpeg/libavcodec/dsputil.c28
-rw-r--r--src/libffmpeg/libavcodec/dsputil.h32
-rw-r--r--src/libffmpeg/libavcodec/dv.c14
-rw-r--r--src/libffmpeg/libavcodec/faandct.c2
-rw-r--r--src/libffmpeg/libavcodec/ffv1.c2
-rw-r--r--src/libffmpeg/libavcodec/h263.c18
-rw-r--r--src/libffmpeg/libavcodec/h264.c190
-rw-r--r--src/libffmpeg/libavcodec/h264data.h27
-rwxr-xr-xsrc/libffmpeg/libavcodec/h264idct.c2
-rw-r--r--src/libffmpeg/libavcodec/i386/Makefile.am6
-rw-r--r--src/libffmpeg/libavcodec/i386/cputest.c6
-rw-r--r--src/libffmpeg/libavcodec/i386/fdct_mmx.c8
-rw-r--r--src/libffmpeg/libavcodec/i386/mathops.h41
-rw-r--r--src/libffmpeg/libavcodec/jfdctfst.c2
-rw-r--r--src/libffmpeg/libavcodec/jfdctint.c2
-rw-r--r--src/libffmpeg/libavcodec/jpeg_ls.c9
-rw-r--r--src/libffmpeg/libavcodec/mathops.h2
-rw-r--r--src/libffmpeg/libavcodec/motion_est.c38
-rw-r--r--src/libffmpeg/libavcodec/motion_est_template.c56
-rw-r--r--src/libffmpeg/libavcodec/mpeg12.c2
-rw-r--r--src/libffmpeg/libavcodec/mpegaudiodec.c4
-rw-r--r--src/libffmpeg/libavcodec/mpegvideo.c111
-rw-r--r--src/libffmpeg/libavcodec/mpegvideo.h3
-rw-r--r--src/libffmpeg/libavcodec/parser.c3
-rw-r--r--src/libffmpeg/libavcodec/ppc/Makefile.am12
-rw-r--r--src/libffmpeg/libavcodec/ppc/float_altivec.c194
-rw-r--r--src/libffmpeg/libavcodec/ppc/h264_altivec.c565
-rw-r--r--src/libffmpeg/libavcodec/ppc/h264_template_altivec.c719
-rw-r--r--src/libffmpeg/libavcodec/ppc/mathops.h33
-rw-r--r--src/libffmpeg/libavcodec/ppc/snow_altivec.c788
-rw-r--r--src/libffmpeg/libavcodec/ppc/types_altivec.h41
-rw-r--r--src/libffmpeg/libavcodec/ppc/vc1dsp_altivec.c338
-rw-r--r--src/libffmpeg/libavcodec/smacker.c4
-rw-r--r--src/libffmpeg/libavcodec/snow.c76
-rw-r--r--src/libffmpeg/libavcodec/snow.h8
-rw-r--r--src/libffmpeg/libavcodec/utils.c73
-rw-r--r--src/libffmpeg/libavcodec/vc1.c6
-rw-r--r--src/libffmpeg/libavcodec/vc1dsp.c2
-rw-r--r--src/libffmpeg/libavcodec/vp3dsp.c2
-rw-r--r--src/libffmpeg/libavcodec/vp5.c290
-rw-r--r--src/libffmpeg/libavcodec/vp56.c665
-rw-r--r--src/libffmpeg/libavcodec/vp56.h249
-rw-r--r--src/libffmpeg/libavcodec/vp56data.c66
-rw-r--r--src/libffmpeg/libavcodec/vp56data.h248
-rw-r--r--src/libffmpeg/libavcodec/vp5data.h173
-rw-r--r--src/libffmpeg/libavcodec/vp6.c537
-rw-r--r--src/libffmpeg/libavcodec/vp6data.h300
-rw-r--r--src/libffmpeg/libavcodec/wmadec.c14
-rw-r--r--src/libffmpeg/libavcodec/wmv2.c6
-rw-r--r--src/libffmpeg/libavutil/Makefile.am3
-rw-r--r--src/libffmpeg/libavutil/bswap.h14
-rw-r--r--src/libffmpeg/libavutil/common.h129
-rw-r--r--src/libffmpeg/libavutil/internal.h93
-rw-r--r--src/libffmpeg/libavutil/intreadwrite.h42
-rw-r--r--src/libffmpeg/libavutil/rational.c8
-rw-r--r--src/libffmpeg/video_decoder.c223
-rw-r--r--src/libffmpeg/xine_decoder.c364
75 files changed, 9773 insertions, 873 deletions
diff --git a/src/libffmpeg/audio_decoder.c b/src/libffmpeg/audio_decoder.c
index 22f567e9c..8f0425775 100644
--- a/src/libffmpeg/audio_decoder.c
+++ b/src/libffmpeg/audio_decoder.c
@@ -17,7 +17,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*
- * $Id: audio_decoder.c,v 1.31 2006/12/26 03:20:12 dgp85 Exp $
+ * $Id: audio_decoder.c,v 1.32 2007/01/13 21:19:52 miguelfreitas Exp $
*
* xine audio decoder plugin using ffmpeg
*
@@ -25,6 +25,7 @@
#ifdef HAVE_CONFIG_H
#include "config.h"
+#include "ffmpeg_config.h"
#endif
#include <stdlib.h>
@@ -107,8 +108,8 @@ static const ff_codec_t ff_audio_lookup[] = {
{BUF_AUDIO_TRUESPEECH, CODEC_ID_TRUESPEECH, "TrueSpeech (ffmpeg)"},
{BUF_AUDIO_TTA, CODEC_ID_TTA, "True Audio Lossless (ffmpeg)"},
{BUF_AUDIO_SMACKER, CODEC_ID_SMACKAUDIO, "Smacker (ffmpeg)"},
- {BUF_AUDIO_FLVADPCM, CODEC_ID_ADPCM_SWF, "Flash ADPCM (ffmpeg)"},
- {BUF_AUDIO_WAVPACK, CODEC_ID_WAVPACK, "WavPack (ffmpeg)"},
+ {BUF_AUDIO_FLVADPCM, CODEC_ID_ADPCM_SWF, "Flash ADPCM (ffmpeg)"},
+ {BUF_AUDIO_WAVPACK, CODEC_ID_WAVPACK, "WavPack (ffmpeg)"},
};
@@ -443,39 +444,106 @@ void *init_audio_plugin (xine_t *xine, void *data) {
}
static uint32_t supported_audio_types[] = {
+ #ifdef CONFIG_WMAV1_DECODER
BUF_AUDIO_WMAV1,
+ #endif
+ #ifdef CONFIG_WMAV2_DECODER
BUF_AUDIO_WMAV2,
+ #endif
+ #ifdef CONFIG_RA_144_DECODER
BUF_AUDIO_14_4,
+ #endif
+ #ifdef CONFIG_RA_288_DECODER
BUF_AUDIO_28_8,
- BUF_AUDIO_MULAW,
- BUF_AUDIO_ALAW,
+ #endif
+ #ifdef CONFIG_MP3_DECODER
+ BUF_AUDIO_MPEG,
+ #endif
+ #ifdef CONFIG_ADPCM_MS_DECODER
BUF_AUDIO_MSADPCM,
+ #endif
+ #ifdef CONFIG_ADPCM_IMA_QT_DECODER
BUF_AUDIO_QTIMAADPCM,
+ #endif
+ #ifdef CONFIG_ADPCM_IMA_WAV_DECODER
BUF_AUDIO_MSIMAADPCM,
+ #endif
+ #ifdef CONFIG_ADPCM_IMA_DK3_DECODER
BUF_AUDIO_DK3ADPCM,
+ #endif
+ #ifdef CONFIG_ADPCM_IMA_DK4_DECODER
BUF_AUDIO_DK4ADPCM,
+ #endif
+ #ifdef CONFIG_ADPCM_IMA_WS_DECODER
+ BUF_AUDIO_VQA_IMA,
+ #endif
+ #ifdef CONFIG_ADPCM_IMA_SMJPEG_DECODER
+ BUF_AUDIO_SMJPEG_IMA,
+ #endif
+ #ifdef CONFIG_ADPCM_XA_DECODER
BUF_AUDIO_XA_ADPCM,
+ #endif
+ #ifdef CONFIG_ADPCM_4XM_DECODER
+ BUF_AUDIO_4X_ADPCM,
+ #endif
+ #ifdef CONFIG_ADPCM_EA_DECODER
+ BUF_AUDIO_EA_ADPCM,
+ #endif
+ #ifdef CONFIG_PCM_MULAW_DECODER
+ BUF_AUDIO_MULAW,
+ #endif
+ #ifdef CONFIG_PCM_ALAW_DECODER
+ BUF_AUDIO_ALAW,
+ #endif
+ #ifdef CONFIG_ROQ_DPCM_DECODER
BUF_AUDIO_ROQ,
+ #endif
+ #ifdef CONFIG_INTERPLAY_DPCM_DECODER
BUF_AUDIO_INTERPLAY,
- BUF_AUDIO_VQA_IMA,
- BUF_AUDIO_4X_ADPCM,
+ #endif
+ #ifdef CONFIG_MACE3_DECODER
BUF_AUDIO_MAC3,
+ #endif
+ #ifdef CONFIG_MACE6_DECODER
BUF_AUDIO_MAC6,
+ #endif
+ #ifdef CONFIG_XAN_DPCM_DECODER
BUF_AUDIO_XAN_DPCM,
+ #endif
+ #ifdef CONFIG_VMDAUDIO_DECODER
BUF_AUDIO_VMD,
- BUF_AUDIO_EA_ADPCM,
- BUF_AUDIO_SMJPEG_IMA,
+ #endif
+ #ifdef CONFIG_FLAC_DECODER
BUF_AUDIO_FLAC,
- BUF_AUDIO_ALAC,
+ #endif
+ #ifdef CONFIG_SHORTEN_DECODER
BUF_AUDIO_SHORTEN,
- BUF_AUDIO_MPEG,
+ #endif
+ #ifdef CONFIG_ALAC_DECODER
+ BUF_AUDIO_ALAC,
+ #endif
+ #ifdef CONFIG_QDM2_DECODER
BUF_AUDIO_QDESIGN2,
+ #endif
+ #ifdef CONFIG_COOK_DECODER
BUF_AUDIO_COOK,
+ #endif
+ #ifdef CONFIG_TRUESPEECH_DECODER
BUF_AUDIO_TRUESPEECH,
+ #endif
+ #ifdef CONFIG_TTA_DECODER
BUF_AUDIO_TTA,
+ #endif
+ #ifdef CONFIG_SMACKAUDIO_DECODER
BUF_AUDIO_SMACKER,
+ #endif
+ #ifdef CONFIG_ADPCM_SWF_DECODER
BUF_AUDIO_FLVADPCM,
+ #endif
+ #ifdef CONFIG_WAVPACK_DECODER
BUF_AUDIO_WAVPACK,
+ #endif
+
0
};
diff --git a/src/libffmpeg/diff_to_ffmpeg_cvs.txt b/src/libffmpeg/diff_to_ffmpeg_cvs.txt
index 7e19e643c..b813b3ab2 100644
--- a/src/libffmpeg/diff_to_ffmpeg_cvs.txt
+++ b/src/libffmpeg/diff_to_ffmpeg_cvs.txt
@@ -1,74 +1,79 @@
-Index: libavcodec/avcodec.h
+Index: libavutil/internal.h
===================================================================
---- libavcodec/avcodec.h (revision 7221)
-+++ libavcodec/avcodec.h (working copy)
-@@ -47,6 +47,13 @@
- #define AV_TIME_BASE 1000000
- #define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE}
+--- libavutil/internal.h (revision 7433)
++++ libavutil/internal.h (working copy)
+@@ -181,11 +181,15 @@
+ #include <assert.h>
-+/* FIXME: We cannot use ffmpeg's XvMC capabilities, since that would require
-+ * linking the ffmpeg plugin against XvMC libraries, which is a bad thing,
-+ * since they are output dependend.
-+ * The correct fix would be to reimplement the XvMC functions libavcodec uses
-+ * and do the necessary talking with our XvMC output plugin there. */
-+#undef HAVE_XVMC
-+
- enum CodecID {
- CODEC_ID_NONE,
- CODEC_ID_MPEG1VIDEO,
-@@ -2686,6 +2693,13 @@
+ /* dprintf macros */
+-#ifdef DEBUG
+-# define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__)
+-#else
+-# define dprintf(fmt,...)
+-#endif
++# ifdef DEBUG
++# ifdef __GNUC__
++# define dprintf(fmt,args...) av_log(NULL, AV_LOG_DEBUG, fmt, ##args)
++# else
++# define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__)
++# endif
++# else
++# define dprintf(fmt,...)
++# endif
- extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
+ #define av_abort() do { av_log(NULL, AV_LOG_ERROR, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0)
-+/* unused static macro */
-+#if defined(__GNUC__) && !defined(DEBUG)
-+/* since we do not compile the encoder part of ffmpeg, some static
-+ * functions will be unused; this is ok, the compiler will take care */
-+# define static static __attribute__((__unused__))
-+#endif
-+
- #ifdef __cplusplus
- }
- #endif
-Index: libavcodec/dsputil.h
+Index: libavutil/integer.c
===================================================================
---- libavcodec/dsputil.h (revision 7221)
-+++ libavcodec/dsputil.h (working copy)
-@@ -33,6 +33,9 @@
- #include "common.h"
- #include "avcodec.h"
-
-+#if defined(ARCH_X86) || defined(ARCH_X86_64)
-+#define HAVE_MMX 1
-+#endif
+--- libavutil/integer.c (revision 7433)
++++ libavutil/integer.c (working copy)
+@@ -126,8 +126,8 @@
+ AVInteger quot_temp;
+ if(!quot) quot = &quot_temp;
- //#define DEBUG
- /* dct code */
-Index: libavcodec/motion_est.c
+- assert((int16_t)a[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b[AV_INTEGER_SIZE-1] >= 0);
+- assert(av_log2(b)>=0);
++ assert((int16_t)a.v[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b.v[AV_INTEGER_SIZE-1] >= 0);
++ assert(av_log2_i(b)>=0);
+
+ if(i > 0)
+ b= av_shr_i(b, -i);
+Index: libavutil/common.h
===================================================================
---- libavcodec/motion_est.c (revision 7221)
-+++ libavcodec/motion_est.c (working copy)
-@@ -23,6 +23,9 @@
- * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
- */
+--- libavutil/common.h (revision 7433)
++++ libavutil/common.h (working copy)
+@@ -345,4 +345,27 @@
+ char *av_strdup(const char *s);
+ void av_freep(void *ptr);
-+/* motion estimation only needed for encoders */
-+#ifdef CONFIG_ENCODERS
++/* xine: inline causes trouble for debug compiling */
++#ifdef DISABLE_INLINE
++# ifdef inline
++# undef inline
++# endif
++# ifdef always_inline
++# undef always_inline
++# endif
++# define inline
++# define always_inline
++#endif
+
- /**
- * @file motion_est.c
- * Motion estimation.
-@@ -2112,3 +2115,5 @@
- }
- }
- }
++/* xine: define ASMALIGN here since it's cleaner that generating it in the configure */
++#if HAVE_ASMALIGN_POT
++# define ASMALIGN(ZEROBITS) ".align " #ZEROBITS "\n\t"
++#else
++# define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t"
++#endif
++
++/* xine: another config.h with codecs to use */
++#include "ffmpeg_config.h"
++
+ #endif /* COMMON_H */
+
-+#endif /* CONFIG_ENCODERS */
Index: libavcodec/mjpeg.c
===================================================================
-diff -u -r1.38 mjpeg.c
---- libavcodec/mjpeg.c 4 Dec 2006 22:25:19 -0000 1.38
-+++ libavcodec/mjpeg.c 30 Dec 2006 22:21:34 -0000
+--- libavcodec/mjpeg.c (revision 7433)
++++ libavcodec/mjpeg.c (working copy)
@@ -38,6 +38,13 @@
#include "mpegvideo.h"
#include "bytestream.h"
@@ -83,27 +88,61 @@ diff -u -r1.38 mjpeg.c
/* use two quantizer tables (one for luminance and one for chrominance) */
/* not yet working */
#undef TWOMATRIXES
-Index: libavcodec/mpeg12.c
+Index: libavcodec/i386/dsputil_mmx.c
===================================================================
---- libavcodec/mpeg12.c (revision 7221)
-+++ libavcodec/mpeg12.c (working copy)
-@@ -36,6 +36,13 @@
- //#include <assert.h>
-
-
-+/* if xine's MPEG encoder is enabled, enable the encoding features in
-+ * this particular module */
-+#if defined(XINE_MPEG_ENCODER) && !defined(CONFIG_ENCODERS)
-+#define CONFIG_ENCODERS
-+#endif
+--- libavcodec/i386/dsputil_mmx.c (revision 7433)
++++ libavcodec/i386/dsputil_mmx.c (working copy)
+@@ -2545,33 +2545,39 @@
+ "pmullw %%mm5, %%mm2 \n\t" // (s-dx)*dy
+ "pmullw %%mm4, %%mm1 \n\t" // dx*(s-dy)
+
+- "movd %4, %%mm5 \n\t"
+- "movd %3, %%mm4 \n\t"
++ "movd %3, %%mm5 \n\t"
++ "movd %2, %%mm4 \n\t"
+ "punpcklbw %%mm7, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+ "pmullw %%mm5, %%mm3 \n\t" // src[1,1] * dx*dy
+ "pmullw %%mm4, %%mm2 \n\t" // src[0,1] * (s-dx)*dy
+
+- "movd %2, %%mm5 \n\t"
+- "movd %1, %%mm4 \n\t"
++ "movd %1, %%mm5 \n\t"
++ "movd %0, %%mm4 \n\t"
+ "punpcklbw %%mm7, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+ "pmullw %%mm5, %%mm1 \n\t" // src[1,0] * dx*(s-dy)
+ "pmullw %%mm4, %%mm0 \n\t" // src[0,0] * (s-dx)*(s-dy)
+- "paddw %5, %%mm1 \n\t"
++ "paddw %4, %%mm1 \n\t"
+ "paddw %%mm3, %%mm2 \n\t"
+ "paddw %%mm1, %%mm0 \n\t"
+ "paddw %%mm2, %%mm0 \n\t"
+
+- "psrlw %6, %%mm0 \n\t"
++ "psrlw %5, %%mm0 \n\t"
+ "packuswb %%mm0, %%mm0 \n\t"
+- "movd %%mm0, %0 \n\t"
+
+- : "=m"(dst[x+y*stride])
++ :
+ : "m"(src[0]), "m"(src[1]),
+ "m"(src[stride]), "m"(src[stride+1]),
+ "m"(*r4), "m"(shift2)
+ );
++
++ asm volatile(
++ "movd %%mm0, %0 \n\t"
+
-+
- /* Start codes. */
- #define SEQ_END_CODE 0x000001b7
- #define SEQ_START_CODE 0x000001b3
++ : "=m"(dst[x+y*stride])
++ :
++ );
+ src += stride;
+ }
+ src += 4-h*stride;
Index: libavcodec/mpegvideo.c
===================================================================
---- libavcodec/mpegvideo.c (revision 7221)
+--- libavcodec/mpegvideo.c (revision 7433)
+++ libavcodec/mpegvideo.c (working copy)
@@ -40,6 +40,14 @@
//#undef NDEBUG
@@ -163,7 +202,7 @@ Index: libavcodec/mpegvideo.c
if(avctx->rc_buffer_size){
RateControlContext *rcc= &s->rc_context;
-@@ -4575,6 +4593,8 @@
+@@ -4574,6 +4592,8 @@
case CODEC_ID_MPEG1VIDEO:
case CODEC_ID_MPEG2VIDEO:
mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
@@ -172,7 +211,7 @@ Index: libavcodec/mpegvideo.c
case CODEC_ID_MPEG4:
mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
case CODEC_ID_MSMPEG4V2:
-@@ -4595,6 +4615,7 @@
+@@ -4594,6 +4614,7 @@
h263_encode_mb(s, s->block, motion_x, motion_y); break;
case CODEC_ID_MJPEG:
mjpeg_encode_mb(s, s->block); break;
@@ -180,7 +219,7 @@ Index: libavcodec/mpegvideo.c
default:
assert(0);
}
-@@ -4816,6 +4837,8 @@
+@@ -4815,6 +4836,8 @@
+sse(s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
}
@@ -189,7 +228,7 @@ Index: libavcodec/mpegvideo.c
static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
MpegEncContext *s= arg;
-@@ -4859,6 +4882,7 @@
+@@ -4860,6 +4883,7 @@
}
return 0;
}
@@ -197,7 +236,7 @@ Index: libavcodec/mpegvideo.c
static int mb_var_thread(AVCodecContext *c, void *arg){
MpegEncContext *s= arg;
-@@ -4883,6 +4907,8 @@
+@@ -4886,6 +4910,8 @@
}
static void write_slice_end(MpegEncContext *s){
@@ -206,7 +245,7 @@ Index: libavcodec/mpegvideo.c
if(s->codec_id==CODEC_ID_MPEG4){
if(s->partitioned_frame){
ff_mpeg4_merge_partitions(s);
-@@ -4892,6 +4918,7 @@
+@@ -4895,6 +4921,7 @@
}else if(s->out_format == FMT_MJPEG){
ff_mjpeg_stuffing(&s->pb);
}
@@ -214,7 +253,7 @@ Index: libavcodec/mpegvideo.c
align_put_bits(&s->pb);
flush_put_bits(&s->pb);
-@@ -4945,10 +4972,13 @@
+@@ -4950,10 +4977,13 @@
case CODEC_ID_FLV1:
s->gob_index = ff_h263_get_gob_height(s);
break;
@@ -228,7 +267,7 @@ Index: libavcodec/mpegvideo.c
}
s->resync_mb_x=0;
-@@ -5021,9 +5051,12 @@
+@@ -5026,9 +5056,12 @@
if(s->start_mb_y != mb_y || mb_x!=0){
write_slice_end(s);
@@ -241,7 +280,7 @@ Index: libavcodec/mpegvideo.c
}
assert((put_bits_count(&s->pb)&7) == 0);
-@@ -5047,19 +5080,25 @@
+@@ -5052,19 +5085,25 @@
}
switch(s->codec_id){
@@ -267,18 +306,18 @@ Index: libavcodec/mpegvideo.c
}
if(s->flags&CODEC_FLAG_PASS1){
-@@ -5172,7 +5211,10 @@
-
+@@ -5286,7 +5325,10 @@
+ backup_s.dquant = 0;
s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
s->mb_intra= 0;
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
- ff_mpeg4_set_direct_mv(s, mx, my);
+ ff_mpeg4_set_direct_mv(s, 0, 0);
+#endif /* #if 0 */
encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
- &dmin, &next_block, mx, my);
+ &dmin, &next_block, 0, 0);
}
-@@ -5354,7 +5396,10 @@
+@@ -5400,7 +5442,10 @@
s->mb_intra= 0;
motion_x=s->b_direct_mv_table[xy][0];
motion_y=s->b_direct_mv_table[xy][1];
@@ -287,9 +326,9 @@ Index: libavcodec/mpegvideo.c
ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
+#endif /* #if 0 */
break;
- case CANDIDATE_MB_TYPE_BIDIR:
- s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
-@@ -5462,8 +5507,11 @@
+ case CANDIDATE_MB_TYPE_DIRECT0:
+ s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+@@ -5513,8 +5558,11 @@
}
//not beautiful here but we must write it before flushing so it has to be here
@@ -301,7 +340,7 @@ Index: libavcodec/mpegvideo.c
write_slice_end(s);
-@@ -5531,6 +5579,8 @@
+@@ -5582,6 +5630,8 @@
}
if(s->adaptive_quant){
@@ -310,7 +349,7 @@ Index: libavcodec/mpegvideo.c
switch(s->codec_id){
case CODEC_ID_MPEG4:
ff_clean_mpeg4_qscales(s);
-@@ -5541,6 +5591,7 @@
+@@ -5592,6 +5642,7 @@
ff_clean_h263_qscales(s);
break;
}
@@ -318,7 +357,7 @@ Index: libavcodec/mpegvideo.c
s->lambda= s->lambda_table[0];
//FIXME broken
-@@ -5562,10 +5613,13 @@
+@@ -5613,10 +5664,13 @@
s->me.mb_var_sum_temp =
s->me.mc_mb_var_sum_temp = 0;
@@ -332,7 +371,7 @@ Index: libavcodec/mpegvideo.c
s->me.scene_change_score=0;
-@@ -5596,6 +5650,8 @@
+@@ -5647,6 +5701,8 @@
ff_update_duplicate_context(s->thread_context[i], s);
}
@@ -341,7 +380,7 @@ Index: libavcodec/mpegvideo.c
ff_init_me(s);
/* Estimate motion for every MB */
-@@ -5610,6 +5666,8 @@
+@@ -5661,6 +5717,8 @@
s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
}else /* if(s->pict_type == I_TYPE) */{
@@ -350,7 +389,7 @@ Index: libavcodec/mpegvideo.c
/* I-Frame */
for(i=0; i<s->mb_stride*s->mb_height; i++)
s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
-@@ -5633,6 +5691,8 @@
+@@ -5684,6 +5742,8 @@
//printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
}
@@ -359,7 +398,7 @@ Index: libavcodec/mpegvideo.c
if(!s->umvplus){
if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
-@@ -5686,6 +5746,7 @@
+@@ -5737,6 +5797,7 @@
}
}
}
@@ -367,7 +406,7 @@ Index: libavcodec/mpegvideo.c
if (estimate_qp(s, 0) < 0)
return -1;
-@@ -5717,6 +5778,8 @@
+@@ -5768,6 +5829,8 @@
s->last_bits= put_bits_count(&s->pb);
switch(s->out_format) {
@@ -376,7 +415,7 @@ Index: libavcodec/mpegvideo.c
case FMT_MJPEG:
mjpeg_picture_header(s);
break;
-@@ -5745,11 +5808,15 @@
+@@ -5796,11 +5859,15 @@
else
h263_encode_picture_header(s, picture_number);
break;
@@ -392,11 +431,49 @@ Index: libavcodec/mpegvideo.c
default:
assert(0);
}
+Index: libavcodec/mpeg12.c
+===================================================================
+--- libavcodec/mpeg12.c (revision 7433)
++++ libavcodec/mpeg12.c (working copy)
+@@ -36,6 +36,13 @@
+ //#include <assert.h>
+
+
++/* if xine's MPEG encoder is enabled, enable the encoding features in
++ * this particular module */
++#if defined(XINE_MPEG_ENCODER) && !defined(CONFIG_ENCODERS)
++#define CONFIG_ENCODERS
++#endif
++
++
+ /* Start codes. */
+ #define SEQ_END_CODE 0x000001b7
+ #define SEQ_START_CODE 0x000001b3
+Index: libavcodec/motion_est.c
+===================================================================
+--- libavcodec/motion_est.c (revision 7433)
++++ libavcodec/motion_est.c (working copy)
+@@ -23,6 +23,9 @@
+ * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
+ */
+
++/* motion estimation only needed for encoders */
++#ifdef CONFIG_ENCODERS
++
+ /**
+ * @file motion_est.c
+ * Motion estimation.
+@@ -2142,3 +2145,5 @@
+ }
+ }
+ }
++
++#endif /* CONFIG_ENCODERS */
Index: libavcodec/snow.c
===================================================================
---- libavcodec/snow.c (revision 7221)
+--- libavcodec/snow.c (revision 7433)
+++ libavcodec/snow.c (working copy)
-@@ -1977,6 +1977,7 @@
+@@ -1982,6 +1982,7 @@
#define P_MV1 P[9]
#define FLAG_QPEL 1 //must be 1
@@ -404,15 +481,15 @@ Index: libavcodec/snow.c
static int encode_q_branch(SnowContext *s, int level, int x, int y){
uint8_t p_buffer[1024];
uint8_t i_buffer[1024];
-@@ -2205,6 +2206,7 @@
+@@ -2210,6 +2211,7 @@
return score;
}
}
+#endif
- static always_inline int same_block(BlockNode *a, BlockNode *b){
+ static av_always_inline int same_block(BlockNode *a, BlockNode *b){
if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
-@@ -2319,6 +2321,7 @@
+@@ -2322,6 +2324,7 @@
}
}
@@ -420,7 +497,7 @@ Index: libavcodec/snow.c
static void encode_blocks(SnowContext *s, int search){
int x, y;
int w= s->b_width;
-@@ -2340,6 +2343,7 @@
+@@ -2343,6 +2346,7 @@
}
}
}
@@ -428,7 +505,7 @@ Index: libavcodec/snow.c
static void decode_blocks(SnowContext *s){
int x, y;
-@@ -3910,6 +3914,7 @@
+@@ -3931,6 +3935,7 @@
}
}
@@ -436,7 +513,7 @@ Index: libavcodec/snow.c
static int encode_init(AVCodecContext *avctx)
{
SnowContext *s = avctx->priv_data;
-@@ -3997,6 +4002,7 @@
+@@ -4018,6 +4023,7 @@
return 0;
}
@@ -444,7 +521,7 @@ Index: libavcodec/snow.c
static int frame_start(SnowContext *s){
AVFrame tmp;
-@@ -4035,6 +4041,7 @@
+@@ -4056,6 +4062,7 @@
return 0;
}
@@ -452,7 +529,7 @@ Index: libavcodec/snow.c
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
SnowContext *s = avctx->priv_data;
RangeCoder * const c= &s->c;
-@@ -4288,6 +4295,7 @@
+@@ -4308,6 +4315,7 @@
return ff_rac_terminate(c);
}
@@ -460,7 +537,7 @@ Index: libavcodec/snow.c
static void common_end(SnowContext *s){
int plane_index, level, orientation, i;
-@@ -4319,6 +4327,7 @@
+@@ -4339,6 +4347,7 @@
}
}
@@ -468,7 +545,7 @@ Index: libavcodec/snow.c
static int encode_end(AVCodecContext *avctx)
{
SnowContext *s = avctx->priv_data;
-@@ -4328,6 +4337,7 @@
+@@ -4348,6 +4357,7 @@
return 0;
}
@@ -476,86 +553,9 @@ Index: libavcodec/snow.c
static int decode_init(AVCodecContext *avctx)
{
-Index: libavutil/common.h
-===================================================================
---- libavutil/common.h (revision 7221)
-+++ libavutil/common.h (working copy)
-@@ -375,7 +375,7 @@
- );
- return (d << 32) | (a & 0xffffffff);
- }
--#elif defined(ARCH_X86_32)
-+#elif defined(ARCH_X86)
- static inline long long read_time(void)
- {
- long long l;
-@@ -446,4 +446,23 @@
- char *av_strdup(const char *s);
- void av_freep(void *ptr);
-
-+/* xine: inline causes trouble for debug compiling */
-+#ifdef DISABLE_INLINE
-+# ifdef inline
-+# undef inline
-+# endif
-+# ifdef always_inline
-+# undef always_inline
-+# endif
-+# define inline
-+# define always_inline
-+#endif
-+
-+/* xine: define ASMALIGN here since it's cleaner that generating it in the configure */
-+#if HAVE_ASMALIGN_POT
-+# define ASMALIGN(ZEROBITS) ".align " #ZEROBITS "\n\t"
-+#else
-+# define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t"
-+#endif
-+
- #endif /* COMMON_H */
-Index: libavutil/integer.c
-===================================================================
---- libavutil/integer.c (revision 7221)
-+++ libavutil/integer.c (working copy)
-@@ -126,8 +126,8 @@
- AVInteger quot_temp;
- if(!quot) quot = &quot_temp;
-
-- assert((int16_t)a[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b[AV_INTEGER_SIZE-1] >= 0);
-- assert(av_log2(b)>=0);
-+ assert((int16_t)a.v[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b.v[AV_INTEGER_SIZE-1] >= 0);
-+ assert(av_log2_i(b)>=0);
-
- if(i > 0)
- b= av_shr_i(b, -i);
-Index: libavutil/internal.h
-===================================================================
---- libavutil/internal.h (revision 7221)
-+++ libavutil/internal.h (working copy)
-@@ -93,11 +93,15 @@
- #include <assert.h>
-
- /* dprintf macros */
--#ifdef DEBUG
--# define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__)
--#else
--# define dprintf(fmt,...)
--#endif
-+# ifdef DEBUG
-+# ifdef __GNUC__
-+# define dprintf(fmt,args...) av_log(NULL, AV_LOG_DEBUG, fmt, ##args)
-+# else
-+# define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__)
-+# endif
-+# else
-+# define dprintf(fmt,...)
-+# endif
-
- #define av_abort() do { av_log(NULL, AV_LOG_ERROR, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0)
-
Index: libavcodec/mlib/dsputil_mlib.c
===================================================================
---- libavcodec/mlib/dsputil_mlib.c (revision 7221)
+--- libavcodec/mlib/dsputil_mlib.c (revision 7433)
+++ libavcodec/mlib/dsputil_mlib.c (working copy)
@@ -22,6 +22,8 @@
#include "../dsputil.h"
@@ -566,3 +566,35 @@ Index: libavcodec/mlib/dsputil_mlib.c
#include <mlib_types.h>
#include <mlib_status.h>
#include <mlib_sys.h>
+Index: libavcodec/avcodec.h
+===================================================================
+--- libavcodec/avcodec.h (revision 7433)
++++ libavcodec/avcodec.h (working copy)
+@@ -47,6 +47,13 @@
+ #define AV_TIME_BASE 1000000
+ #define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE}
+
++/* FIXME: We cannot use ffmpeg's XvMC capabilities, since that would require
++ * linking the ffmpeg plugin against XvMC libraries, which is a bad thing,
++ * since they are output dependend.
++ * The correct fix would be to reimplement the XvMC functions libavcodec uses
++ * and do the necessary talking with our XvMC output plugin there. */
++#undef HAVE_XVMC
++
+ enum CodecID {
+ CODEC_ID_NONE,
+ CODEC_ID_MPEG1VIDEO,
+@@ -2688,6 +2695,13 @@
+
+ extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
+
++/* unused static macro */
++#if defined(__GNUC__) && !defined(DEBUG)
++/* since we do not compile the encoder part of ffmpeg, some static
++ * functions will be unused; this is ok, the compiler will take care */
++# define static static __attribute__((__unused__))
++#endif
++
+ #ifdef __cplusplus
+ }
+ #endif
diff --git a/src/libffmpeg/libavcodec/Makefile.am b/src/libffmpeg/libavcodec/Makefile.am
index cf34b0d28..cae72eeff 100644
--- a/src/libffmpeg/libavcodec/Makefile.am
+++ b/src/libffmpeg/libavcodec/Makefile.am
@@ -4,14 +4,15 @@ SUBDIRS = armv4l i386 mlib alpha ppc sparc libpostproc
# some of ffmpeg's decoders are not used by xine yet
EXTRA_DIST = motion_est_template.c \
- adx.c cljr.c fdctref.c ffv1.c g726.c jpeg_ls.c mdec.c raw.c snow.c svq3.c wmv2.c
+ adx.c cljr.c fdctref.c ffv1.c g726.c jpeg_ls.c mdec.c raw.c svq3.c wmv2.c
# we need to compile everything in debug mode, including the encoders,
# otherwise we get unresolved symbols, because some unsatisfied function calls
# are not optimized away with debug optimization
-AM_CFLAGS = `test "$(CFLAGS)" = "$(DEBUG_CFLAGS)" && echo -DCONFIG_ENCODERS` -fno-strict-aliasing -DCONFIG_VC1_DECODER
+#AM_CFLAGS = `test "$(CFLAGS)" = "$(DEBUG_CFLAGS)" && echo -DCONFIG_ENCODERS` -fno-strict-aliasing
+AM_CFLAGS = `test "$(CFLAGS)" = "$(DEBUG_CFLAGS)"` -fno-strict-aliasing
AM_CPPFLAGS = $(ZLIB_CPPFLAGS) $(LIBFFMPEG_CPPFLAGS) \
- -I$(top_srcdir)/src/libffmpeg/libavutil
+ -I$(top_srcdir)/src/libffmpeg/libavutil -I$(top_srcdir)/src/libffmpeg
ASFLAGS =
noinst_LTLIBRARIES = libavcodec.la
@@ -94,6 +95,7 @@ libavcodec_la_SOURCES = \
simple_idct.c \
smacker.c \
smc.c \
+ snow.c \
svq1.c \
tscc.c \
truemotion1.c \
@@ -110,7 +112,12 @@ libavcodec_la_SOURCES = \
vorbis_data.c \
vp3.c \
vp3dsp.c \
+ vp5.c \
+ vp56.c \
+ vp56data.c \
+ vp6.c \
vqavideo.c \
+ wavpack.c \
wmadec.c \
wnv1.c \
xan.c \
@@ -175,4 +182,8 @@ noinst_HEADERS = \
vc1acdata.h \
vc1data.h \
vp3data.h \
+ vp56.h \
+ vp56data.h \
+ vp5data.h \
+ vp6data.h \
wmadata.h
diff --git a/src/libffmpeg/libavcodec/armv4l/Makefile.am b/src/libffmpeg/libavcodec/armv4l/Makefile.am
index 0f3d230f6..33e0882c9 100644
--- a/src/libffmpeg/libavcodec/armv4l/Makefile.am
+++ b/src/libffmpeg/libavcodec/armv4l/Makefile.am
@@ -6,7 +6,12 @@ ASFLAGS =
noinst_LTLIBRARIES = libavcodec_armv4l.la
-libavcodec_armv4l_src = dsputil_arm.c jrevdct_arm.S mpegvideo_arm.c simple_idct_arm.S
+libavcodec_armv4l_src = dsputil_arm.c jrevdct_arm.S mpegvideo_arm.c simple_idct_arm.S \
+ dsputil_arm_s.S dsputil_iwmmxt.c dsputil_iwmmxt_rnd.h \
+ mpegvideo_armv5te.c mpegvideo_iwmmxt.c simple_idct_armv5te.S
+
+noinst_HEADERS = mathops.h
+
libavcodec_armv4l_dummy = libavcodec_armv4l_dummy.c
EXTRA_DIST = $(libavcodec_armv4l_src) $(libavcodec_armv4l_dummy)
diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_arm_s.S b/src/libffmpeg/libavcodec/armv4l/dsputil_arm_s.S
new file mode 100644
index 000000000..2a3ee9c50
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/dsputil_arm_s.S
@@ -0,0 +1,696 @@
+@
+@ ARMv4L optimized DSP utils
+@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
+@
+@ This file is part of FFmpeg.
+@
+@ FFmpeg is free software; you can redistribute it and/or
+@ modify it under the terms of the GNU Lesser General Public
+@ License as published by the Free Software Foundation; either
+@ version 2.1 of the License, or (at your option) any later version.
+@
+@ FFmpeg is distributed in the hope that it will be useful,
+@ but WITHOUT ANY WARRANTY; without even the implied warranty of
+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+@ Lesser General Public License for more details.
+@
+@ You should have received a copy of the GNU Lesser General Public
+@ License along with FFmpeg; if not, write to the Free Software
+@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+@
+
+.macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
+ mov \Rd0, \Rn0, lsr #(\shift * 8)
+ mov \Rd1, \Rn1, lsr #(\shift * 8)
+ mov \Rd2, \Rn2, lsr #(\shift * 8)
+ mov \Rd3, \Rn3, lsr #(\shift * 8)
+ orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
+ orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
+ orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
+ orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
+.endm
+.macro ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
+ mov \R0, \R0, lsr #(\shift * 8)
+ orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
+ mov \R1, \R1, lsr #(\shift * 8)
+ orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
+.endm
+.macro ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
+ mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
+ mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
+ orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
+ orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
+.endm
+
+.macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
+ @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
+ @ Rmask = 0xFEFEFEFE
+ @ Rn = destroy
+ eor \Rd0, \Rn0, \Rm0
+ eor \Rd1, \Rn1, \Rm1
+ orr \Rn0, \Rn0, \Rm0
+ orr \Rn1, \Rn1, \Rm1
+ and \Rd0, \Rd0, \Rmask
+ and \Rd1, \Rd1, \Rmask
+ sub \Rd0, \Rn0, \Rd0, lsr #1
+ sub \Rd1, \Rn1, \Rd1, lsr #1
+.endm
+
+.macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
+ @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
+ @ Rmask = 0xFEFEFEFE
+ @ Rn = destroy
+ eor \Rd0, \Rn0, \Rm0
+ eor \Rd1, \Rn1, \Rm1
+ and \Rn0, \Rn0, \Rm0
+ and \Rn1, \Rn1, \Rm1
+ and \Rd0, \Rd0, \Rmask
+ and \Rd1, \Rd1, \Rmask
+ add \Rd0, \Rn0, \Rd0, lsr #1
+ add \Rd1, \Rn1, \Rd1, lsr #1
+.endm
+
+@ ----------------------------------------------------------------
+ .align 8
+ .global put_pixels16_arm
+put_pixels16_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r11, lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+ bic r1, r1, #3
+ add r5, r5, r4, lsl #2
+ ldrne pc, [r5]
+1:
+ ldmia r1, {r4-r7}
+ add r1, r1, r2
+ stmia r0, {r4-r7}
+ pld [r1]
+ subs r3, r3, #1
+ add r0, r0, r2
+ bne 1b
+ ldmfd sp!, {r4-r11, pc}
+ .align 8
+2:
+ ldmia r1, {r4-r8}
+ add r1, r1, r2
+ ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
+ pld [r1]
+ subs r3, r3, #1
+ stmia r0, {r9-r12}
+ add r0, r0, r2
+ bne 2b
+ ldmfd sp!, {r4-r11, pc}
+ .align 8
+3:
+ ldmia r1, {r4-r8}
+ add r1, r1, r2
+ ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
+ pld [r1]
+ subs r3, r3, #1
+ stmia r0, {r9-r12}
+ add r0, r0, r2
+ bne 3b
+ ldmfd sp!, {r4-r11, pc}
+ .align 8
+4:
+ ldmia r1, {r4-r8}
+ add r1, r1, r2
+ ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
+ pld [r1]
+ subs r3, r3, #1
+ stmia r0, {r9-r12}
+ add r0, r0, r2
+ bne 4b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+5:
+ .word 1b
+ .word 2b
+ .word 3b
+ .word 4b
+
+@ ----------------------------------------------------------------
+ .align 8
+ .global put_pixels8_arm
+put_pixels8_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r5,lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+ bic r1, r1, #3
+ add r5, r5, r4, lsl #2
+ ldrne pc, [r5]
+1:
+ ldmia r1, {r4-r5}
+ add r1, r1, r2
+ subs r3, r3, #1
+ pld [r1]
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 1b
+ ldmfd sp!, {r4-r5,pc}
+ .align 8
+2:
+ ldmia r1, {r4-r5, r12}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
+ pld [r1]
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 2b
+ ldmfd sp!, {r4-r5,pc}
+ .align 8
+3:
+ ldmia r1, {r4-r5, r12}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
+ pld [r1]
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 3b
+ ldmfd sp!, {r4-r5,pc}
+ .align 8
+4:
+ ldmia r1, {r4-r5, r12}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
+ pld [r1]
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 4b
+ ldmfd sp!, {r4-r5,pc}
+ .align 8
+5:
+ .word 1b
+ .word 2b
+ .word 3b
+ .word 4b
+
+@ ----------------------------------------------------------------
+ .align 8
+ .global put_pixels8_x2_arm
+put_pixels8_x2_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r10,lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+ ldr r12, [r5]
+ add r5, r5, r4, lsl #2
+ bic r1, r1, #3
+ ldrne pc, [r5]
+1:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+ pld [r1]
+ RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ bne 1b
+ ldmfd sp!, {r4-r10,pc}
+ .align 8
+2:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+ ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
+ pld [r1]
+ RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 2b
+ ldmfd sp!, {r4-r10,pc}
+ .align 8
+3:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
+ ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
+ pld [r1]
+ RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 3b
+ ldmfd sp!, {r4-r10,pc}
+ .align 8
+4:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
+ pld [r1]
+ RND_AVG32 r8, r9, r6, r7, r5, r10, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ bne 4b
+ ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
+ .align 8
+5:
+ .word 0xFEFEFEFE
+ .word 2b
+ .word 3b
+ .word 4b
+
+ .align 8
+ .global put_no_rnd_pixels8_x2_arm
+put_no_rnd_pixels8_x2_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r10,lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+ ldr r12, [r5]
+ add r5, r5, r4, lsl #2
+ bic r1, r1, #3
+ ldrne pc, [r5]
+1:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+ pld [r1]
+ NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ bne 1b
+ ldmfd sp!, {r4-r10,pc}
+ .align 8
+2:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+ ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
+ pld [r1]
+ NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 2b
+ ldmfd sp!, {r4-r10,pc}
+ .align 8
+3:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
+ ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
+ pld [r1]
+ NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 3b
+ ldmfd sp!, {r4-r10,pc}
+ .align 8
+4:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
+ pld [r1]
+ NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ bne 4b
+ ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
+ .align 8
+5:
+ .word 0xFEFEFEFE
+ .word 2b
+ .word 3b
+ .word 4b
+
+
+@ ----------------------------------------------------------------
+ .align 8
+ .global put_pixels8_y2_arm
+put_pixels8_y2_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+ mov r3, r3, lsr #1
+ ldr r12, [r5]
+ add r5, r5, r4, lsl #2
+ bic r1, r1, #3
+ ldrne pc, [r5]
+1:
+ ldmia r1, {r4-r5}
+ add r1, r1, r2
+6: ldmia r1, {r6-r7}
+ add r1, r1, r2
+ pld [r1]
+ RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+ ldmia r1, {r4-r5}
+ add r1, r1, r2
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ pld [r1]
+ RND_AVG32 r8, r9, r6, r7, r4, r5, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+2:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
+ RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+ subs r3, r3, #1
+ RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+3:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
+ RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+ subs r3, r3, #1
+ RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+4:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
+ RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+ subs r3, r3, #1
+ RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+
+ .align 8
+5:
+ .word 0xFEFEFEFE
+ .word 2b
+ .word 3b
+ .word 4b
+
+ .align 8
+ .global put_no_rnd_pixels8_y2_arm
+put_no_rnd_pixels8_y2_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+ mov r3, r3, lsr #1
+ ldr r12, [r5]
+ add r5, r5, r4, lsl #2
+ bic r1, r1, #3
+ ldrne pc, [r5]
+1:
+ ldmia r1, {r4-r5}
+ add r1, r1, r2
+6: ldmia r1, {r6-r7}
+ add r1, r1, r2
+ pld [r1]
+ NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+ ldmia r1, {r4-r5}
+ add r1, r1, r2
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ pld [r1]
+ NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+2:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
+ NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+ subs r3, r3, #1
+ NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+3:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
+ NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+ subs r3, r3, #1
+ NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+4:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
+ NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+ subs r3, r3, #1
+ NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+5:
+ .word 0xFEFEFEFE
+ .word 2b
+ .word 3b
+ .word 4b
+
+@ ----------------------------------------------------------------
+.macro RND_XY2_IT align, rnd
+ @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
+ @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
+.if \align == 0
+ ldmia r1, {r6-r8}
+.elseif \align == 3
+ ldmia r1, {r5-r7}
+.else
+ ldmia r1, {r8-r10}
+.endif
+ add r1, r1, r2
+ pld [r1]
+.if \align == 0
+ ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
+.elseif \align == 1
+ ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
+ ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
+.elseif \align == 2
+ ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
+ ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
+.elseif \align == 3
+ ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
+.endif
+ ldr r14, [r12, #0] @ 0x03030303
+ tst r3, #1
+ and r8, r4, r14
+ and r9, r5, r14
+ and r10, r6, r14
+ and r11, r7, r14
+.if \rnd == 1
+ ldreq r14, [r12, #16] @ 0x02020202
+.else
+ ldreq r14, [r12, #28] @ 0x01010101
+.endif
+ add r8, r8, r10
+ add r9, r9, r11
+ addeq r8, r8, r14
+ addeq r9, r9, r14
+ ldr r14, [r12, #20] @ 0xFCFCFCFC >> 2
+ and r4, r14, r4, lsr #2
+ and r5, r14, r5, lsr #2
+ and r6, r14, r6, lsr #2
+ and r7, r14, r7, lsr #2
+ add r10, r4, r6
+ add r11, r5, r7
+.endm
+
+.macro RND_XY2_EXPAND align, rnd
+ RND_XY2_IT \align, \rnd
+6: stmfd sp!, {r8-r11}
+ RND_XY2_IT \align, \rnd
+ ldmfd sp!, {r4-r7}
+ add r4, r4, r8
+ add r5, r5, r9
+ add r6, r6, r10
+ add r7, r7, r11
+ ldr r14, [r12, #24] @ 0x0F0F0F0F
+ and r4, r14, r4, lsr #2
+ and r5, r14, r5, lsr #2
+ add r4, r4, r6
+ add r5, r5, r7
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+.endm
+
+ .align 8
+ .global put_pixels8_xy2_arm
+put_pixels8_xy2_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+ adrl r12, 5f
+ ands r4, r1, #3
+ add r5, r12, r4, lsl #2
+ bic r1, r1, #3
+ ldrne pc, [r5]
+1:
+ RND_XY2_EXPAND 0, 1
+
+ .align 8
+2:
+ RND_XY2_EXPAND 1, 1
+
+ .align 8
+3:
+ RND_XY2_EXPAND 2, 1
+
+ .align 8
+4:
+ RND_XY2_EXPAND 3, 1
+
+5:
+ .word 0x03030303
+ .word 2b
+ .word 3b
+ .word 4b
+ .word 0x02020202
+ .word 0xFCFCFCFC >> 2
+ .word 0x0F0F0F0F
+ .word 0x01010101
+
+ .align 8
+ .global put_no_rnd_pixels8_xy2_arm
+put_no_rnd_pixels8_xy2_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+ adrl r12, 5f
+ ands r4, r1, #3
+ add r5, r12, r4, lsl #2
+ bic r1, r1, #3
+ ldrne pc, [r5]
+1:
+ RND_XY2_EXPAND 0, 0
+
+ .align 8
+2:
+ RND_XY2_EXPAND 1, 0
+
+ .align 8
+3:
+ RND_XY2_EXPAND 2, 0
+
+ .align 8
+4:
+ RND_XY2_EXPAND 3, 0
+
+5:
+ .word 0x03030303
+ .word 2b
+ .word 3b
+ .word 4b
+ .word 0x02020202
+ .word 0xFCFCFCFC >> 2
+ .word 0x0F0F0F0F
+ .word 0x01010101
diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c
new file mode 100644
index 000000000..d7401e760
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c
@@ -0,0 +1,188 @@
+/*
+ * iWMMXt optimized DSP utils
+ * Copyright (c) 2004 AGAWA Koji
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+
+#define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
+#define SET_RND(regd) __asm__ __volatile__ ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
+#define WAVG2B "wavg2b"
+#include "dsputil_iwmmxt_rnd.h"
+#undef DEF
+#undef SET_RND
+#undef WAVG2B
+
+#define DEF(x, y) x ## _ ## y ##_iwmmxt
+#define SET_RND(regd) __asm__ __volatile__ ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
+#define WAVG2B "wavg2br"
+#include "dsputil_iwmmxt_rnd.h"
+#undef DEF
+#undef SET_RND
+#undef WAVG2BR
+
+// need scheduling
+#define OP(AVG) \
+ asm volatile ( \
+ /* alignment */ \
+ "and r12, %[pixels], #7 \n\t" \
+ "bic %[pixels], %[pixels], #7 \n\t" \
+ "tmcr wcgr1, r12 \n\t" \
+ \
+ "wldrd wr0, [%[pixels]] \n\t" \
+ "wldrd wr1, [%[pixels], #8] \n\t" \
+ "add %[pixels], %[pixels], %[line_size] \n\t" \
+ "walignr1 wr4, wr0, wr1 \n\t" \
+ \
+ "1: \n\t" \
+ \
+ "wldrd wr2, [%[pixels]] \n\t" \
+ "wldrd wr3, [%[pixels], #8] \n\t" \
+ "add %[pixels], %[pixels], %[line_size] \n\t" \
+ "pld [%[pixels]] \n\t" \
+ "walignr1 wr5, wr2, wr3 \n\t" \
+ AVG " wr6, wr4, wr5 \n\t" \
+ "wstrd wr6, [%[block]] \n\t" \
+ "add %[block], %[block], %[line_size] \n\t" \
+ \
+ "wldrd wr0, [%[pixels]] \n\t" \
+ "wldrd wr1, [%[pixels], #8] \n\t" \
+ "add %[pixels], %[pixels], %[line_size] \n\t" \
+ "walignr1 wr4, wr0, wr1 \n\t" \
+ "pld [%[pixels]] \n\t" \
+ AVG " wr6, wr4, wr5 \n\t" \
+ "wstrd wr6, [%[block]] \n\t" \
+ "add %[block], %[block], %[line_size] \n\t" \
+ \
+ "subs %[h], %[h], #2 \n\t" \
+ "bne 1b \n\t" \
+ : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h) \
+ : [line_size]"r"(line_size) \
+ : "memory", "r12");
+void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ OP("wavg2br");
+}
+void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ OP("wavg2b");
+}
+#undef OP
+
+void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size)
+{
+ uint8_t *pixels2 = pixels + line_size;
+
+ __asm__ __volatile__ (
+ "mov r12, #4 \n\t"
+ "1: \n\t"
+ "pld [%[pixels], %[line_size2]] \n\t"
+ "pld [%[pixels2], %[line_size2]] \n\t"
+ "wldrd wr4, [%[pixels]] \n\t"
+ "wldrd wr5, [%[pixels2]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "wunpckelub wr6, wr4 \n\t"
+ "wldrd wr0, [%[block]] \n\t"
+ "wunpckehub wr7, wr4 \n\t"
+ "wldrd wr1, [%[block], #8] \n\t"
+ "wunpckelub wr8, wr5 \n\t"
+ "wldrd wr2, [%[block], #16] \n\t"
+ "wunpckehub wr9, wr5 \n\t"
+ "wldrd wr3, [%[block], #24] \n\t"
+ "add %[block], %[block], #32 \n\t"
+ "waddhss wr10, wr0, wr6 \n\t"
+ "waddhss wr11, wr1, wr7 \n\t"
+ "waddhss wr12, wr2, wr8 \n\t"
+ "waddhss wr13, wr3, wr9 \n\t"
+ "wpackhus wr14, wr10, wr11 \n\t"
+ "wpackhus wr15, wr12, wr13 \n\t"
+ "wstrd wr14, [%[pixels]] \n\t"
+ "add %[pixels], %[pixels], %[line_size2] \n\t"
+ "subs r12, r12, #1 \n\t"
+ "wstrd wr15, [%[pixels2]] \n\t"
+ "add %[pixels2], %[pixels2], %[line_size2] \n\t"
+ "bne 1b \n\t"
+ : [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2)
+ : [line_size2]"r"(line_size << 1)
+ : "cc", "memory", "r12");
+}
+
+static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ return;
+}
+
+int mm_flags; /* multimedia extension flags */
+
+int mm_support(void)
+{
+ return 0; /* TODO, implement proper detection */
+}
+
+void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
+{
+ mm_flags = mm_support();
+
+ if (avctx->dsp_mask) {
+ if (avctx->dsp_mask & FF_MM_FORCE)
+ mm_flags |= (avctx->dsp_mask & 0xffff);
+ else
+ mm_flags &= ~(avctx->dsp_mask & 0xffff);
+ }
+
+ if (!(mm_flags & MM_IWMMXT)) return;
+
+ c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
+
+ c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
+ c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt;
+ c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt;
+ c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt;
+ c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt;
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt;
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt;
+ c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt;
+
+ c->put_pixels_tab[1][0] = put_pixels8_iwmmxt;
+ c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt;
+ c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt;
+ c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt;
+ c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt;
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt;
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt;
+ c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt;
+
+ c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt;
+ c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt;
+ c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt;
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt;
+ c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt;
+ c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt;
+ c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt;
+ c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt;
+
+ c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt;
+ c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt;
+ c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt;
+ c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt;
+ c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt;
+ c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
+ c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
+ c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
+}
diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h
new file mode 100644
index 000000000..51ba61c47
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h
@@ -0,0 +1,1114 @@
+/*
+ * iWMMXt optimized DSP utils
+ * copyright (c) 2004 AGAWA Koji
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ __asm__ __volatile__ (
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r4, %[pixels], %[line_size] \n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+ "1: \n\t"
+ "wldrd wr0, [%[pixels]] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wldrd wr1, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr3, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wldrd wr4, [r4, #8] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "walignr1 wr8, wr0, wr1 \n\t"
+ "pld [r4] \n\t"
+ "pld [r4, #32] \n\t"
+ "walignr1 wr10, wr3, wr4 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "wstrd wr10, [r5] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "bne 1b \n\t"
+ : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+ :
+ : "memory", "r4", "r5", "r12");
+}
+
+void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ __asm__ __volatile__ (
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r4, %[pixels], %[line_size] \n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+ "1: \n\t"
+ "wldrd wr0, [%[pixels]] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wldrd wr1, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr3, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wldrd wr4, [r4, #8] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "walignr1 wr8, wr0, wr1 \n\t"
+ "wldrd wr0, [%[block]] \n\t"
+ "wldrd wr2, [r5] \n\t"
+ "pld [r4] \n\t"
+ "pld [r4, #32] \n\t"
+ "walignr1 wr10, wr3, wr4 \n\t"
+ WAVG2B" wr8, wr8, wr0 \n\t"
+ WAVG2B" wr10, wr10, wr2 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "wstrd wr10, [r5] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "pld [r5] \n\t"
+ "pld [r5, #32] \n\t"
+ "bne 1b \n\t"
+ : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+ :
+ : "memory", "r4", "r5", "r12");
+}
+
+void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ __asm__ __volatile__ (
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r4, %[pixels], %[line_size] \n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+ "1: \n\t"
+ "wldrd wr0, [%[pixels]] \n\t"
+ "wldrd wr1, [%[pixels], #8] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wldrd wr2, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr3, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr8, wr0, wr1 \n\t"
+ "wldrd wr4, [r4, #8] \n\t"
+ "walignr1 wr9, wr1, wr2 \n\t"
+ "wldrd wr5, [r4, #16] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "pld [r4] \n\t"
+ "pld [r4, #32] \n\t"
+ "walignr1 wr10, wr3, wr4 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "walignr1 wr11, wr4, wr5 \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "wstrd wr10, [r5] \n\t"
+ "wstrd wr11, [r5, #8] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "bne 1b \n\t"
+ : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+ :
+ : "memory", "r4", "r5", "r12");
+}
+
+void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ __asm__ __volatile__ (
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r4, %[pixels], %[line_size]\n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+ "1: \n\t"
+ "wldrd wr0, [%[pixels]] \n\t"
+ "wldrd wr1, [%[pixels], #8] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wldrd wr2, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr3, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr8, wr0, wr1 \n\t"
+ "wldrd wr4, [r4, #8] \n\t"
+ "walignr1 wr9, wr1, wr2 \n\t"
+ "wldrd wr5, [r4, #16] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "wldrd wr0, [%[block]] \n\t"
+ "pld [r4] \n\t"
+ "wldrd wr1, [%[block], #8] \n\t"
+ "pld [r4, #32] \n\t"
+ "wldrd wr2, [r5] \n\t"
+ "walignr1 wr10, wr3, wr4 \n\t"
+ "wldrd wr3, [r5, #8] \n\t"
+ WAVG2B" wr8, wr8, wr0 \n\t"
+ WAVG2B" wr9, wr9, wr1 \n\t"
+ WAVG2B" wr10, wr10, wr2 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "walignr1 wr11, wr4, wr5 \n\t"
+ WAVG2B" wr11, wr11, wr3 \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "wstrd wr10, [r5] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "wstrd wr11, [r5, #8] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "pld [r5] \n\t"
+ "pld [r5, #32] \n\t"
+ "bne 1b \n\t"
+ : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+ :
+ : "memory", "r4", "r5", "r12");
+}
+
+void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ __volatile__(
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r12, r12, #1 \n\t"
+ "add r4, %[pixels], %[line_size]\n\t"
+ "tmcr wcgr2, r12 \n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+
+ "1: \n\t"
+ "wldrd wr10, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr13, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "wldrd wr14, [r4, #8] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "pld [r4] \n\t"
+ "pld [r4, #32] \n\t"
+ "walignr1 wr2, wr13, wr14 \n\t"
+ "wmoveq wr4, wr11 \n\t"
+ "wmoveq wr6, wr14 \n\t"
+ "walignr2ne wr4, wr10, wr11 \n\t"
+ "walignr2ne wr6, wr13, wr14 \n\t"
+ WAVG2B" wr0, wr0, wr4 \n\t"
+ WAVG2B" wr2, wr2, wr6 \n\t"
+ "wstrd wr0, [%[block]] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wstrd wr2, [r5] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+ :
+ : "r4", "r5", "r12", "memory");
+}
+
+void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ __volatile__(
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r12, r12, #1 \n\t"
+ "add r4, %[pixels], %[line_size]\n\t"
+ "tmcr wcgr2, r12 \n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+
+ "1: \n\t"
+ "wldrd wr10, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr13, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "wldrd wr14, [r4, #8] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wldrd wr15, [r4, #16] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "pld [r4] \n\t"
+ "pld [r4, #32] \n\t"
+ "walignr1 wr1, wr11, wr12 \n\t"
+ "walignr1 wr2, wr13, wr14 \n\t"
+ "walignr1 wr3, wr14, wr15 \n\t"
+ "wmoveq wr4, wr11 \n\t"
+ "wmoveq wr5, wr12 \n\t"
+ "wmoveq wr6, wr14 \n\t"
+ "wmoveq wr7, wr15 \n\t"
+ "walignr2ne wr4, wr10, wr11 \n\t"
+ "walignr2ne wr5, wr11, wr12 \n\t"
+ "walignr2ne wr6, wr13, wr14 \n\t"
+ "walignr2ne wr7, wr14, wr15 \n\t"
+ WAVG2B" wr0, wr0, wr4 \n\t"
+ WAVG2B" wr1, wr1, wr5 \n\t"
+ "wstrd wr0, [%[block]] \n\t"
+ WAVG2B" wr2, wr2, wr6 \n\t"
+ "wstrd wr1, [%[block], #8] \n\t"
+ WAVG2B" wr3, wr3, wr7 \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "wstrd wr2, [r5] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wstrd wr3, [r5, #8] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+ :
+ : "r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ __volatile__(
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r12, r12, #1 \n\t"
+ "add r4, %[pixels], %[line_size]\n\t"
+ "tmcr wcgr2, r12 \n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+ "pld [r5] \n\t"
+ "pld [r5, #32] \n\t"
+
+ "1: \n\t"
+ "wldrd wr10, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr13, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "wldrd wr14, [r4, #8] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "pld [r4] \n\t"
+ "pld [r4, #32] \n\t"
+ "walignr1 wr2, wr13, wr14 \n\t"
+ "wmoveq wr4, wr11 \n\t"
+ "wmoveq wr6, wr14 \n\t"
+ "walignr2ne wr4, wr10, wr11 \n\t"
+ "wldrd wr10, [%[block]] \n\t"
+ "walignr2ne wr6, wr13, wr14 \n\t"
+ "wldrd wr12, [r5] \n\t"
+ WAVG2B" wr0, wr0, wr4 \n\t"
+ WAVG2B" wr2, wr2, wr6 \n\t"
+ WAVG2B" wr0, wr0, wr10 \n\t"
+ WAVG2B" wr2, wr2, wr12 \n\t"
+ "wstrd wr0, [%[block]] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wstrd wr2, [r5] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "pld [r5] \n\t"
+ "pld [r5, #32] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+ :
+ : "r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ __volatile__(
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r12, r12, #1 \n\t"
+ "add r4, %[pixels], %[line_size]\n\t"
+ "tmcr wcgr2, r12 \n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+ "pld [r5] \n\t"
+ "pld [r5, #32] \n\t"
+
+ "1: \n\t"
+ "wldrd wr10, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr13, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "wldrd wr14, [r4, #8] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wldrd wr15, [r4, #16] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "pld [r4] \n\t"
+ "pld [r4, #32] \n\t"
+ "walignr1 wr1, wr11, wr12 \n\t"
+ "walignr1 wr2, wr13, wr14 \n\t"
+ "walignr1 wr3, wr14, wr15 \n\t"
+ "wmoveq wr4, wr11 \n\t"
+ "wmoveq wr5, wr12 \n\t"
+ "wmoveq wr6, wr14 \n\t"
+ "wmoveq wr7, wr15 \n\t"
+ "walignr2ne wr4, wr10, wr11 \n\t"
+ "walignr2ne wr5, wr11, wr12 \n\t"
+ "walignr2ne wr6, wr13, wr14 \n\t"
+ "walignr2ne wr7, wr14, wr15 \n\t"
+ "wldrd wr10, [%[block]] \n\t"
+ WAVG2B" wr0, wr0, wr4 \n\t"
+ "wldrd wr11, [%[block], #8] \n\t"
+ WAVG2B" wr1, wr1, wr5 \n\t"
+ "wldrd wr12, [r5] \n\t"
+ WAVG2B" wr2, wr2, wr6 \n\t"
+ "wldrd wr13, [r5, #8] \n\t"
+ WAVG2B" wr3, wr3, wr7 \n\t"
+ WAVG2B" wr0, wr0, wr10 \n\t"
+ WAVG2B" wr1, wr1, wr11 \n\t"
+ WAVG2B" wr2, wr2, wr12 \n\t"
+ WAVG2B" wr3, wr3, wr13 \n\t"
+ "wstrd wr0, [%[block]] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wstrd wr1, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "wstrd wr2, [r5] \n\t"
+ "pld [%[block]] \n\t"
+ "wstrd wr3, [r5, #8] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "pld [%[block], #32] \n\t"
+ "pld [r5] \n\t"
+ "pld [r5, #32] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+ :
+ :"r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ __asm__ __volatile__(
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "pld [%[block]] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+
+ "1: \n\t"
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr4, wr10, wr11 \n\t"
+ "wldrd wr10, [%[block]] \n\t"
+ WAVG2B" wr8, wr0, wr4 \n\t"
+ WAVG2B" wr8, wr8, wr10 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "pld [%[block]] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "wldrd wr10, [%[block]] \n\t"
+ WAVG2B" wr8, wr0, wr4 \n\t"
+ WAVG2B" wr8, wr8, wr10 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ "subs %[h], %[h], #2 \n\t"
+ "pld [%[block]] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+ :
+ : "cc", "memory", "r12");
+}
+
+void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ __asm__ __volatile__(
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "walignr1 wr1, wr11, wr12 \n\t"
+
+ "1: \n\t"
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr4, wr10, wr11 \n\t"
+ "walignr1 wr5, wr11, wr12 \n\t"
+ WAVG2B" wr8, wr0, wr4 \n\t"
+ WAVG2B" wr9, wr1, wr5 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "walignr1 wr1, wr11, wr12 \n\t"
+ WAVG2B" wr8, wr0, wr4 \n\t"
+ WAVG2B" wr9, wr1, wr5 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ "subs %[h], %[h], #2 \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+ :
+ : "r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ __asm__ __volatile__(
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "pld [%[block]] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "walignr1 wr1, wr11, wr12 \n\t"
+
+ "1: \n\t"
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr4, wr10, wr11 \n\t"
+ "walignr1 wr5, wr11, wr12 \n\t"
+ "wldrd wr10, [%[block]] \n\t"
+ "wldrd wr11, [%[block], #8] \n\t"
+ WAVG2B" wr8, wr0, wr4 \n\t"
+ WAVG2B" wr9, wr1, wr5 \n\t"
+ WAVG2B" wr8, wr8, wr10 \n\t"
+ WAVG2B" wr9, wr9, wr11 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "pld [%[block]] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "walignr1 wr1, wr11, wr12 \n\t"
+ "wldrd wr10, [%[block]] \n\t"
+ "wldrd wr11, [%[block], #8] \n\t"
+ WAVG2B" wr8, wr0, wr4 \n\t"
+ WAVG2B" wr9, wr1, wr5 \n\t"
+ WAVG2B" wr8, wr8, wr10 \n\t"
+ WAVG2B" wr9, wr9, wr11 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ "subs %[h], %[h], #2 \n\t"
+ "pld [%[block]] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+ :
+ : "r4", "r5", "r12", "memory");
+}
+
+void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ __volatile__(
+ "pld [%[pixels]] \n\t"
+ "mov r12, #2 \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "tmcr wcgr0, r12 \n\t" /* for shift value */
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr12, [%[pixels]] \n\t"
+ "add r12, r12, #1 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "tmcr wcgr2, r12 \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "cmp r12, #8 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+
+ "1: \n\t"
+ // [wr0 wr1 wr2 wr3]
+ // [wr4 wr5 wr6 wr7] <= *
+ "wldrd wr12, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr6, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "wunpckelub wr4, wr6 \n\t"
+ "wunpckehub wr5, wr6 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "waddhus wr4, wr4, wr8 \n\t"
+ "waddhus wr5, wr5, wr9 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr12, [%[pixels]] \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+ : [line_size]"r"(line_size)
+ : "r12", "memory");
+}
+
+void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ __volatile__(
+ "pld [%[pixels]] \n\t"
+ "mov r12, #2 \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "tmcr wcgr0, r12 \n\t" /* for shift value */
+ /* alignment */
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r12, r12, #1 \n\t"
+ "tmcr wcgr2, r12 \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr12, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "wldrd wr14, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr3, wr13, wr14 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "wmoveq wr11, wr14 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "walignr2ne wr11, wr13, wr14 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr2, wr3 \n\t"
+ "wunpckehub wr3, wr3 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "wunpckelub wr10, wr11 \n\t"
+ "wunpckehub wr11, wr11 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+ "waddhus wr2, wr2, wr10 \n\t"
+ "waddhus wr3, wr3, wr11 \n\t"
+
+ "1: \n\t"
+ // [wr0 wr1 wr2 wr3]
+ // [wr4 wr5 wr6 wr7] <= *
+ "wldrd wr12, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "wldrd wr14, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr6, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr7, wr13, wr14 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "wmoveq wr11, wr14 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "walignr2ne wr11, wr13, wr14 \n\t"
+ "wunpckelub wr4, wr6 \n\t"
+ "wunpckehub wr5, wr6 \n\t"
+ "wunpckelub wr6, wr7 \n\t"
+ "wunpckehub wr7, wr7 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "wunpckelub wr10, wr11 \n\t"
+ "wunpckehub wr11, wr11 \n\t"
+ "waddhus wr4, wr4, wr8 \n\t"
+ "waddhus wr5, wr5, wr9 \n\t"
+ "waddhus wr6, wr6, wr10 \n\t"
+ "waddhus wr7, wr7, wr11 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr10, wr2, wr6 \n\t"
+ "waddhus wr11, wr3, wr7 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "waddhus wr10, wr10, wr15 \n\t"
+ "waddhus wr11, wr11, wr15 \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wsrlhg wr10, wr10, wcgr0 \n\t"
+ "wsrlhg wr11, wr11, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ "wpackhus wr9, wr10, wr11 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr12, [%[pixels]] \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "wldrd wr14, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr3, wr13, wr14 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "wmoveq wr11, wr14 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "walignr2ne wr11, wr13, wr14 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr2, wr3 \n\t"
+ "wunpckehub wr3, wr3 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "wunpckelub wr10, wr11 \n\t"
+ "wunpckehub wr11, wr11 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+ "waddhus wr2, wr2, wr10 \n\t"
+ "waddhus wr3, wr3, wr11 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr10, wr2, wr6 \n\t"
+ "waddhus wr11, wr3, wr7 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "waddhus wr10, wr10, wr15 \n\t"
+ "waddhus wr11, wr11, wr15 \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wsrlhg wr10, wr10, wcgr0 \n\t"
+ "wsrlhg wr11, wr11, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ "wpackhus wr9, wr10, wr11 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ "subs %[h], %[h], #2 \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+ : [line_size]"r"(line_size)
+ : "r12", "memory");
+}
+
+void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ __volatile__(
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "pld [%[pixels]] \n\t"
+ "mov r12, #2 \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "tmcr wcgr0, r12 \n\t" /* for shift value */
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr12, [%[pixels]] \n\t"
+ "add r12, r12, #1 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "tmcr wcgr2, r12 \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "cmp r12, #8 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+
+ "1: \n\t"
+ // [wr0 wr1 wr2 wr3]
+ // [wr4 wr5 wr6 wr7] <= *
+ "wldrd wr12, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr6, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "wunpckelub wr4, wr6 \n\t"
+ "wunpckehub wr5, wr6 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "waddhus wr4, wr4, wr8 \n\t"
+ "waddhus wr5, wr5, wr9 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "wldrd wr12, [%[block]] \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ WAVG2B" wr8, wr8, wr12 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "wldrd wr12, [%[pixels]] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "wldrd wr12, [%[block]] \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ WAVG2B" wr8, wr8, wr12 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+ : [line_size]"r"(line_size)
+ : "r12", "memory");
+}
+
+void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ __volatile__(
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "pld [%[pixels]] \n\t"
+ "mov r12, #2 \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "tmcr wcgr0, r12 \n\t" /* for shift value */
+ /* alignment */
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r12, r12, #1 \n\t"
+ "tmcr wcgr2, r12 \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr12, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "wldrd wr14, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr3, wr13, wr14 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "wmoveq wr11, wr14 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "walignr2ne wr11, wr13, wr14 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr2, wr3 \n\t"
+ "wunpckehub wr3, wr3 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "wunpckelub wr10, wr11 \n\t"
+ "wunpckehub wr11, wr11 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+ "waddhus wr2, wr2, wr10 \n\t"
+ "waddhus wr3, wr3, wr11 \n\t"
+
+ "1: \n\t"
+ // [wr0 wr1 wr2 wr3]
+ // [wr4 wr5 wr6 wr7] <= *
+ "wldrd wr12, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "wldrd wr14, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr6, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr7, wr13, wr14 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "wmoveq wr11, wr14 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "walignr2ne wr11, wr13, wr14 \n\t"
+ "wunpckelub wr4, wr6 \n\t"
+ "wunpckehub wr5, wr6 \n\t"
+ "wunpckelub wr6, wr7 \n\t"
+ "wunpckehub wr7, wr7 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "wunpckelub wr10, wr11 \n\t"
+ "wunpckehub wr11, wr11 \n\t"
+ "waddhus wr4, wr4, wr8 \n\t"
+ "waddhus wr5, wr5, wr9 \n\t"
+ "waddhus wr6, wr6, wr10 \n\t"
+ "waddhus wr7, wr7, wr11 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr10, wr2, wr6 \n\t"
+ "waddhus wr11, wr3, wr7 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "waddhus wr10, wr10, wr15 \n\t"
+ "waddhus wr11, wr11, wr15 \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wldrd wr12, [%[block]] \n\t"
+ "wldrd wr13, [%[block], #8] \n\t"
+ "wsrlhg wr10, wr10, wcgr0 \n\t"
+ "wsrlhg wr11, wr11, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ "wpackhus wr9, wr10, wr11 \n\t"
+ WAVG2B" wr8, wr8, wr12 \n\t"
+ WAVG2B" wr9, wr9, wr13 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr12, [%[pixels]] \n\t"
+ "pld [%[block]] \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "pld [%[block], #32] \n\t"
+ "wldrd wr14, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr3, wr13, wr14 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "wmoveq wr11, wr14 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "walignr2ne wr11, wr13, wr14 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr2, wr3 \n\t"
+ "wunpckehub wr3, wr3 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "wunpckelub wr10, wr11 \n\t"
+ "wunpckehub wr11, wr11 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+ "waddhus wr2, wr2, wr10 \n\t"
+ "waddhus wr3, wr3, wr11 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr10, wr2, wr6 \n\t"
+ "waddhus wr11, wr3, wr7 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "waddhus wr10, wr10, wr15 \n\t"
+ "waddhus wr11, wr11, wr15 \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wldrd wr12, [%[block]] \n\t"
+ "wldrd wr13, [%[block], #8] \n\t"
+ "wsrlhg wr10, wr10, wcgr0 \n\t"
+ "wsrlhg wr11, wr11, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ "wpackhus wr9, wr10, wr11 \n\t"
+ WAVG2B" wr8, wr8, wr12 \n\t"
+ WAVG2B" wr9, wr9, wr13 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+ : [line_size]"r"(line_size)
+ : "r12", "memory");
+}
diff --git a/src/libffmpeg/libavcodec/armv4l/mathops.h b/src/libffmpeg/libavcodec/armv4l/mathops.h
new file mode 100644
index 000000000..7ddd0ec6e
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/mathops.h
@@ -0,0 +1,49 @@
+/*
+ * simple math operations
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef FRAC_BITS
+# define MULL(a, b) \
+ ({ int lo, hi;\
+ asm("smull %0, %1, %2, %3 \n\t"\
+ "mov %0, %0, lsr %4\n\t"\
+ "add %1, %0, %1, lsl %5\n\t"\
+ : "=&r"(lo), "=&r"(hi)\
+ : "r"(b), "r"(a), "i"(FRAC_BITS), "i"(32-FRAC_BITS));\
+ hi; })
+#endif
+
+#define MULH(a, b) \
+ ({ int lo, hi;\
+ asm ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));\
+ hi; })
+
+#if defined(HAVE_ARMV5TE)
+
+/* signed 16x16 -> 32 multiply add accumulate */
+# define MAC16(rt, ra, rb) \
+ asm ("smlabb %0, %2, %3, %0" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
+/* signed 16x16 -> 32 multiply */
+# define MUL16(ra, rb) \
+ ({ int __rt; \
+ asm ("smulbb %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \
+ __rt; })
+
+#endif
diff --git a/src/libffmpeg/libavcodec/armv4l/mpegvideo_armv5te.c b/src/libffmpeg/libavcodec/armv4l/mpegvideo_armv5te.c
new file mode 100644
index 000000000..a8d09b8ce
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/mpegvideo_armv5te.c
@@ -0,0 +1,213 @@
+/*
+ * Optimization of some functions from mpegvideo.c for armv5te
+ * Copyright (c) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * Some useful links for those who may be interested in optimizing code for ARM.
+ * ARM Architecture Reference Manual: http://www.arm.com/community/academy/resources.html
+ * Instructions timings and optimization guide for ARM9E: http://www.arm.com/pdfs/DDI0222B_9EJS_r1p2.pdf
+ */
+
+#include "../dsputil.h"
+#include "../mpegvideo.h"
+#include "../avcodec.h"
+
+
+#ifdef ENABLE_ARM_TESTS
+/**
+ * h263 dequantizer supplementary function, it is performance critical and needs to
+ * have optimized implementations for each architecture. Is also used as a reference
+ * implementation in regression tests
+ */
+static inline void dct_unquantize_h263_helper_c(DCTELEM *block, int qmul, int qadd, int count)
+{
+ int i, level;
+ for (i = 0; i < count; i++) {
+ level = block[i];
+ if (level) {
+ if (level < 0) {
+ level = level * qmul - qadd;
+ } else {
+ level = level * qmul + qadd;
+ }
+ block[i] = level;
+ }
+ }
+}
+#endif
+
+/* GCC 3.1 or higher is required to support symbolic names in assembly code */
+#if (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))
+
+/**
+ * Special optimized version of dct_unquantize_h263_helper_c, it requires the block
+ * to be at least 8 bytes aligned, and may process more elements than requested.
+ * But it is guaranteed to never process more than 64 elements provided that
+ * xxcount argument is <= 64, so it is safe. This macro is optimized for a common
+ * distribution of values for nCoeffs (they are mostly multiple of 8 plus one or
+ * two extra elements). So this macro processes data as 8 elements per loop iteration
+ * and contains optional 2 elements processing in the end.
+ *
+ * Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770)
+ */
+#define dct_unquantize_h263_special_helper_armv5te(xxblock, xxqmul, xxqadd, xxcount) \
+({ DCTELEM *xblock = xxblock; \
+ int xqmul = xxqmul, xqadd = xxqadd, xcount = xxcount, xtmp; \
+ int xdata1, xdata2; \
+__asm__ __volatile__( \
+ "subs %[count], #2 \n\t" \
+ "ble 2f \n\t" \
+ "ldrd r4, [%[block], #0] \n\t" \
+ "1: \n\t" \
+ "ldrd r6, [%[block], #8] \n\t" \
+\
+ "rsbs %[data1], %[zero], r4, asr #16 \n\t" \
+ "addgt %[data1], %[qadd], #0 \n\t" \
+ "rsblt %[data1], %[qadd], #0 \n\t" \
+ "smlatbne %[data1], r4, %[qmul], %[data1] \n\t" \
+\
+ "rsbs %[data2], %[zero], r5, asr #16 \n\t" \
+ "addgt %[data2], %[qadd], #0 \n\t" \
+ "rsblt %[data2], %[qadd], #0 \n\t" \
+ "smlatbne %[data2], r5, %[qmul], %[data2] \n\t" \
+\
+ "rsbs %[tmp], %[zero], r4, asl #16 \n\t" \
+ "addgt %[tmp], %[qadd], #0 \n\t" \
+ "rsblt %[tmp], %[qadd], #0 \n\t" \
+ "smlabbne r4, r4, %[qmul], %[tmp] \n\t" \
+\
+ "rsbs %[tmp], %[zero], r5, asl #16 \n\t" \
+ "addgt %[tmp], %[qadd], #0 \n\t" \
+ "rsblt %[tmp], %[qadd], #0 \n\t" \
+ "smlabbne r5, r5, %[qmul], %[tmp] \n\t" \
+\
+ "strh r4, [%[block]], #2 \n\t" \
+ "strh %[data1], [%[block]], #2 \n\t" \
+ "strh r5, [%[block]], #2 \n\t" \
+ "strh %[data2], [%[block]], #2 \n\t" \
+\
+ "rsbs %[data1], %[zero], r6, asr #16 \n\t" \
+ "addgt %[data1], %[qadd], #0 \n\t" \
+ "rsblt %[data1], %[qadd], #0 \n\t" \
+ "smlatbne %[data1], r6, %[qmul], %[data1] \n\t" \
+\
+ "rsbs %[data2], %[zero], r7, asr #16 \n\t" \
+ "addgt %[data2], %[qadd], #0 \n\t" \
+ "rsblt %[data2], %[qadd], #0 \n\t" \
+ "smlatbne %[data2], r7, %[qmul], %[data2] \n\t" \
+\
+ "rsbs %[tmp], %[zero], r6, asl #16 \n\t" \
+ "addgt %[tmp], %[qadd], #0 \n\t" \
+ "rsblt %[tmp], %[qadd], #0 \n\t" \
+ "smlabbne r6, r6, %[qmul], %[tmp] \n\t" \
+\
+ "rsbs %[tmp], %[zero], r7, asl #16 \n\t" \
+ "addgt %[tmp], %[qadd], #0 \n\t" \
+ "rsblt %[tmp], %[qadd], #0 \n\t" \
+ "smlabbne r7, r7, %[qmul], %[tmp] \n\t" \
+\
+ "strh r6, [%[block]], #2 \n\t" \
+ "strh %[data1], [%[block]], #2 \n\t" \
+ "strh r7, [%[block]], #2 \n\t" \
+ "strh %[data2], [%[block]], #2 \n\t" \
+\
+ "subs %[count], #8 \n\t" \
+ "ldrgtd r4, [%[block], #0] \n\t" /* load data early to avoid load/use pipeline stall */ \
+ "bgt 1b \n\t" \
+\
+ "adds %[count], #2 \n\t" \
+ "ble 3f \n\t" \
+ "2: \n\t" \
+ "ldrsh %[data1], [%[block], #0] \n\t" \
+ "ldrsh %[data2], [%[block], #2] \n\t" \
+ "mov %[tmp], %[qadd] \n\t" \
+ "cmp %[data1], #0 \n\t" \
+ "rsblt %[tmp], %[qadd], #0 \n\t" \
+ "smlabbne %[data1], %[data1], %[qmul], %[tmp] \n\t" \
+ "mov %[tmp], %[qadd] \n\t" \
+ "cmp %[data2], #0 \n\t" \
+ "rsblt %[tmp], %[qadd], #0 \n\t" \
+ "smlabbne %[data2], %[data2], %[qmul], %[tmp] \n\t" \
+ "strh %[data1], [%[block]], #2 \n\t" \
+ "strh %[data2], [%[block]], #2 \n\t" \
+ "3: \n\t" \
+ : [block] "+&r" (xblock), [count] "+&r" (xcount), [tmp] "=&r" (xtmp), \
+ [data1] "=&r" (xdata1), [data2] "=&r" (xdata2) \
+ : [qmul] "r" (xqmul), [qadd] "r" (xqadd), [zero] "r" (0) \
+ : "r4", "r5", "r6", "r7", "cc", "memory" \
+); \
+})
+
+static void dct_unquantize_h263_intra_armv5te(MpegEncContext *s,
+ DCTELEM *block, int n, int qscale)
+{
+ int i, level, qmul, qadd;
+ int nCoeffs;
+
+ assert(s->block_last_index[n]>=0);
+
+ qmul = qscale << 1;
+
+ if (!s->h263_aic) {
+ if (n < 4)
+ level = block[0] * s->y_dc_scale;
+ else
+ level = block[0] * s->c_dc_scale;
+ qadd = (qscale - 1) | 1;
+ }else{
+ qadd = 0;
+ level = block[0];
+ }
+ if(s->ac_pred)
+ nCoeffs=63;
+ else
+ nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+ dct_unquantize_h263_special_helper_armv5te(block, qmul, qadd, nCoeffs + 1);
+ block[0] = level;
+}
+
+static void dct_unquantize_h263_inter_armv5te(MpegEncContext *s,
+ DCTELEM *block, int n, int qscale)
+{
+ int i, level, qmul, qadd;
+ int nCoeffs;
+
+ assert(s->block_last_index[n]>=0);
+
+ qadd = (qscale - 1) | 1;
+ qmul = qscale << 1;
+
+ nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+ dct_unquantize_h263_special_helper_armv5te(block, qmul, qadd, nCoeffs + 1);
+}
+
+#define HAVE_DCT_UNQUANTIZE_H263_ARMV5TE_OPTIMIZED
+
+#endif
+
+void MPV_common_init_armv5te(MpegEncContext *s)
+{
+#ifdef HAVE_DCT_UNQUANTIZE_H263_ARMV5TE_OPTIMIZED
+ s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_armv5te;
+ s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_armv5te;
+#endif
+}
diff --git a/src/libffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c b/src/libffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c
new file mode 100644
index 000000000..1336ac5f8
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c
@@ -0,0 +1,119 @@
+/*
+ * copyright (c) 2004 AGAWA Koji
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+#include "../mpegvideo.h"
+#include "../avcodec.h"
+
+static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s,
+ DCTELEM *block, int n, int qscale)
+{
+ int level, qmul, qadd;
+ int nCoeffs;
+ DCTELEM *block_orig = block;
+
+ assert(s->block_last_index[n]>=0);
+
+ qmul = qscale << 1;
+
+ if (!s->h263_aic) {
+ if (n < 4)
+ level = block[0] * s->y_dc_scale;
+ else
+ level = block[0] * s->c_dc_scale;
+ qadd = (qscale - 1) | 1;
+ }else{
+ qadd = 0;
+ level = block[0];
+ }
+ if(s->ac_pred)
+ nCoeffs=63;
+ else
+ nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+ __asm__ __volatile__ (
+/* "movd %1, %%mm6 \n\t" //qmul */
+/* "packssdw %%mm6, %%mm6 \n\t" */
+/* "packssdw %%mm6, %%mm6 \n\t" */
+ "tbcsth wr6, %[qmul] \n\t"
+/* "movd %2, %%mm5 \n\t" //qadd */
+/* "packssdw %%mm5, %%mm5 \n\t" */
+/* "packssdw %%mm5, %%mm5 \n\t" */
+ "tbcsth wr5, %[qadd] \n\t"
+ "wzero wr7 \n\t" /* "pxor %%mm7, %%mm7 \n\t" */
+ "wzero wr4 \n\t" /* "pxor %%mm4, %%mm4 \n\t" */
+ "wsubh wr7, wr5, wr7 \n\t" /* "psubw %%mm5, %%mm7 \n\t" */
+ "1: \n\t"
+ "wldrd wr2, [%[block]] \n\t" /* "movq (%0, %3), %%mm0 \n\t" */
+ "wldrd wr3, [%[block], #8] \n\t" /* "movq 8(%0, %3), %%mm1 \n\t" */
+ "wmulsl wr0, wr6, wr2 \n\t" /* "pmullw %%mm6, %%mm0 \n\t" */
+ "wmulsl wr1, wr6, wr3 \n\t" /* "pmullw %%mm6, %%mm1 \n\t" */
+/* "movq (%0, %3), %%mm2 \n\t" */
+/* "movq 8(%0, %3), %%mm3 \n\t" */
+ "wcmpgtsh wr2, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 */
+ "wcmpgtsh wr3, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 */
+ "wxor wr0, wr2, wr0 \n\t" /* "pxor %%mm2, %%mm0 \n\t" */
+ "wxor wr1, wr3, wr1 \n\t" /* "pxor %%mm3, %%mm1 \n\t" */
+ "waddh wr0, wr7, wr0 \n\t" /* "paddw %%mm7, %%mm0 \n\t" */
+ "waddh wr1, wr7, wr1 \n\t" /* "paddw %%mm7, %%mm1 \n\t" */
+ "wxor wr2, wr0, wr2 \n\t" /* "pxor %%mm0, %%mm2 \n\t" */
+ "wxor wr3, wr1, wr3 \n\t" /* "pxor %%mm1, %%mm3 \n\t" */
+ "wcmpeqh wr0, wr7, wr0 \n\t" /* "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 */
+ "wcmpeqh wr1, wr7, wr1 \n\t" /* "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 */
+ "wandn wr0, wr2, wr0 \n\t" /* "pandn %%mm2, %%mm0 \n\t" */
+ "wandn wr1, wr3, wr1 \n\t" /* "pandn %%mm3, %%mm1 \n\t" */
+ "wstrd wr0, [%[block]] \n\t" /* "movq %%mm0, (%0, %3) \n\t" */
+ "wstrd wr1, [%[block], #8] \n\t" /* "movq %%mm1, 8(%0, %3) \n\t" */
+ "add %[block], %[block], #16 \n\t" /* "addl $16, %3 \n\t" */
+ "subs %[i], %[i], #1 \n\t"
+ "bne 1b \n\t" /* "jng 1b \n\t" */
+ :[block]"+r"(block)
+ :[i]"r"((nCoeffs + 8) / 8), [qmul]"r"(qmul), [qadd]"r"(qadd)
+ :"memory");
+
+ block_orig[0] = level;
+}
+
+#if 0
+static void dct_unquantize_h263_inter_iwmmxt(MpegEncContext *s,
+ DCTELEM *block, int n, int qscale)
+{
+ int nCoeffs;
+
+ assert(s->block_last_index[n]>=0);
+
+ if(s->ac_pred)
+ nCoeffs=63;
+ else
+ nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+ ippiQuantInvInter_Compact_H263_16s_I(block, nCoeffs+1, qscale);
+}
+#endif
+
+void MPV_common_init_iwmmxt(MpegEncContext *s)
+{
+ if (!(mm_flags & MM_IWMMXT)) return;
+
+ s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_iwmmxt;
+#if 0
+ s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_iwmmxt;
+#endif
+}
diff --git a/src/libffmpeg/libavcodec/armv4l/simple_idct_armv5te.S b/src/libffmpeg/libavcodec/armv4l/simple_idct_armv5te.S
new file mode 100644
index 000000000..28bee0643
--- /dev/null
+++ b/src/libffmpeg/libavcodec/armv4l/simple_idct_armv5te.S
@@ -0,0 +1,718 @@
+/*
+ * Simple IDCT
+ *
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2006 Mans Rullgard <mru@inprovide.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define ROW_SHIFT 11
+#define COL_SHIFT 20
+
+#define W13 (W1 | (W3 << 16))
+#define W26 (W2 | (W6 << 16))
+#define W57 (W5 | (W7 << 16))
+
+ .text
+ .align
+w13: .long W13
+w26: .long W26
+w57: .long W57
+
+ .align
+ .func idct_row_armv5te
+idct_row_armv5te:
+ str lr, [sp, #-4]!
+
+ ldrd v1, [a1, #8]
+ ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */
+ orrs v1, v1, v2
+ cmpeq v1, a4
+ cmpeq v1, a3, lsr #16
+ beq row_dc_only
+
+ mov v1, #(1<<(ROW_SHIFT-1))
+ mov ip, #16384
+ sub ip, ip, #1 /* ip = W4 */
+ smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */
+ ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */
+ smultb a2, ip, a4
+ smulbb lr, ip, a4
+ add v2, v1, a2
+ sub v3, v1, a2
+ sub v4, v1, lr
+ add v1, v1, lr
+
+ ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */
+ ldr lr, [pc, #(w57-.-8)] /* lr = W5 | (W7 << 16) */
+ smulbt v5, ip, a3
+ smultt v6, lr, a4
+ smlatt v5, ip, a4, v5
+ smultt a2, ip, a3
+ smulbt v7, lr, a3
+ sub v6, v6, a2
+ smulbt a2, ip, a4
+ smultt fp, lr, a3
+ sub v7, v7, a2
+ smulbt a2, lr, a4
+ ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */
+ sub fp, fp, a2
+
+ orrs a2, a3, a4
+ beq 1f
+
+ smlabt v5, lr, a3, v5
+ smlabt v6, ip, a3, v6
+ smlatt v5, lr, a4, v5
+ smlabt v6, lr, a4, v6
+ smlatt v7, lr, a3, v7
+ smlatt fp, ip, a3, fp
+ smulbt a2, ip, a4
+ smlatt v7, ip, a4, v7
+ sub fp, fp, a2
+
+ ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */
+ mov a2, #16384
+ sub a2, a2, #1 /* a2 = W4 */
+ smulbb a2, a2, a3 /* a2 = W4*row[4] */
+ smultb lr, ip, a4 /* lr = W6*row[6] */
+ add v1, v1, a2 /* v1 += W4*row[4] */
+ add v1, v1, lr /* v1 += W6*row[6] */
+ add v4, v4, a2 /* v4 += W4*row[4] */
+ sub v4, v4, lr /* v4 -= W6*row[6] */
+ smulbb lr, ip, a4 /* lr = W2*row[6] */
+ sub v2, v2, a2 /* v2 -= W4*row[4] */
+ sub v2, v2, lr /* v2 -= W2*row[6] */
+ sub v3, v3, a2 /* v3 -= W4*row[4] */
+ add v3, v3, lr /* v3 += W2*row[6] */
+
+1: add a2, v1, v5
+ mov a3, a2, lsr #11
+ bic a3, a3, #0x1f0000
+ sub a2, v2, v6
+ mov a2, a2, lsr #11
+ add a3, a3, a2, lsl #16
+ add a2, v3, v7
+ mov a4, a2, lsr #11
+ bic a4, a4, #0x1f0000
+ add a2, v4, fp
+ mov a2, a2, lsr #11
+ add a4, a4, a2, lsl #16
+ strd a3, [a1]
+
+ sub a2, v4, fp
+ mov a3, a2, lsr #11
+ bic a3, a3, #0x1f0000
+ sub a2, v3, v7
+ mov a2, a2, lsr #11
+ add a3, a3, a2, lsl #16
+ add a2, v2, v6
+ mov a4, a2, lsr #11
+ bic a4, a4, #0x1f0000
+ sub a2, v1, v5
+ mov a2, a2, lsr #11
+ add a4, a4, a2, lsl #16
+ strd a3, [a1, #8]
+
+ ldr pc, [sp], #4
+
+row_dc_only:
+ orr a3, a3, a3, lsl #16
+ bic a3, a3, #0xe000
+ mov a3, a3, lsl #3
+ mov a4, a3
+ strd a3, [a1]
+ strd a3, [a1, #8]
+
+ ldr pc, [sp], #4
+ .endfunc
+
+ .macro idct_col
+ ldr a4, [a1] /* a4 = col[1:0] */
+ mov ip, #16384
+ sub ip, ip, #1 /* ip = W4 */
+#if 0
+ mov v1, #(1<<(COL_SHIFT-1))
+ smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */
+ smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */
+ ldr a4, [a1, #(16*4)]
+#else
+ mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */
+ add v2, v1, a4, asr #16
+ rsb v2, v2, v2, lsl #14
+ mov a4, a4, lsl #16
+ add v1, v1, a4, asr #16
+ ldr a4, [a1, #(16*4)]
+ rsb v1, v1, v1, lsl #14
+#endif
+
+ smulbb lr, ip, a4
+ smulbt a3, ip, a4
+ sub v3, v1, lr
+ sub v5, v1, lr
+ add v7, v1, lr
+ add v1, v1, lr
+ sub v4, v2, a3
+ sub v6, v2, a3
+ add fp, v2, a3
+ ldr ip, [pc, #(w26-.-8)]
+ ldr a4, [a1, #(16*2)]
+ add v2, v2, a3
+
+ smulbb lr, ip, a4
+ smultb a3, ip, a4
+ add v1, v1, lr
+ sub v7, v7, lr
+ add v3, v3, a3
+ sub v5, v5, a3
+ smulbt lr, ip, a4
+ smultt a3, ip, a4
+ add v2, v2, lr
+ sub fp, fp, lr
+ add v4, v4, a3
+ ldr a4, [a1, #(16*6)]
+ sub v6, v6, a3
+
+ smultb lr, ip, a4
+ smulbb a3, ip, a4
+ add v1, v1, lr
+ sub v7, v7, lr
+ sub v3, v3, a3
+ add v5, v5, a3
+ smultt lr, ip, a4
+ smulbt a3, ip, a4
+ add v2, v2, lr
+ sub fp, fp, lr
+ sub v4, v4, a3
+ add v6, v6, a3
+
+ stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
+
+ ldr ip, [pc, #(w13-.-8)]
+ ldr a4, [a1, #(16*1)]
+ ldr lr, [pc, #(w57-.-8)]
+ smulbb v1, ip, a4
+ smultb v3, ip, a4
+ smulbb v5, lr, a4
+ smultb v7, lr, a4
+ smulbt v2, ip, a4
+ smultt v4, ip, a4
+ smulbt v6, lr, a4
+ smultt fp, lr, a4
+ rsb v4, v4, #0
+ ldr a4, [a1, #(16*3)]
+ rsb v3, v3, #0
+
+ smlatb v1, ip, a4, v1
+ smlatb v3, lr, a4, v3
+ smulbb a3, ip, a4
+ smulbb a2, lr, a4
+ sub v5, v5, a3
+ sub v7, v7, a2
+ smlatt v2, ip, a4, v2
+ smlatt v4, lr, a4, v4
+ smulbt a3, ip, a4
+ smulbt a2, lr, a4
+ sub v6, v6, a3
+ ldr a4, [a1, #(16*5)]
+ sub fp, fp, a2
+
+ smlabb v1, lr, a4, v1
+ smlabb v3, ip, a4, v3
+ smlatb v5, lr, a4, v5
+ smlatb v7, ip, a4, v7
+ smlabt v2, lr, a4, v2
+ smlabt v4, ip, a4, v4
+ smlatt v6, lr, a4, v6
+ ldr a3, [a1, #(16*7)]
+ smlatt fp, ip, a4, fp
+
+ smlatb v1, lr, a3, v1
+ smlabb v3, lr, a3, v3
+ smlatb v5, ip, a3, v5
+ smulbb a4, ip, a3
+ smlatt v2, lr, a3, v2
+ sub v7, v7, a4
+ smlabt v4, lr, a3, v4
+ smulbt a4, ip, a3
+ smlatt v6, ip, a3, v6
+ sub fp, fp, a4
+ .endm
+
+ .align
+ .func idct_col_armv5te
+idct_col_armv5te:
+ str lr, [sp, #-4]!
+
+ idct_col
+
+ ldmfd sp!, {a3, a4}
+ adds a2, a3, v1
+ mov a2, a2, lsr #20
+ orrmi a2, a2, #0xf000
+ add ip, a4, v2
+ mov ip, ip, asr #20
+ orr a2, a2, ip, lsl #16
+ str a2, [a1]
+ subs a3, a3, v1
+ mov a2, a3, lsr #20
+ orrmi a2, a2, #0xf000
+ sub a4, a4, v2
+ mov a4, a4, asr #20
+ orr a2, a2, a4, lsl #16
+ ldmfd sp!, {a3, a4}
+ str a2, [a1, #(16*7)]
+
+ subs a2, a3, v3
+ mov a2, a2, lsr #20
+ orrmi a2, a2, #0xf000
+ sub ip, a4, v4
+ mov ip, ip, asr #20
+ orr a2, a2, ip, lsl #16
+ str a2, [a1, #(16*1)]
+ adds a3, a3, v3
+ mov a2, a3, lsr #20
+ orrmi a2, a2, #0xf000
+ add a4, a4, v4
+ mov a4, a4, asr #20
+ orr a2, a2, a4, lsl #16
+ ldmfd sp!, {a3, a4}
+ str a2, [a1, #(16*6)]
+
+ adds a2, a3, v5
+ mov a2, a2, lsr #20
+ orrmi a2, a2, #0xf000
+ add ip, a4, v6
+ mov ip, ip, asr #20
+ orr a2, a2, ip, lsl #16
+ str a2, [a1, #(16*2)]
+ subs a3, a3, v5
+ mov a2, a3, lsr #20
+ orrmi a2, a2, #0xf000
+ sub a4, a4, v6
+ mov a4, a4, asr #20
+ orr a2, a2, a4, lsl #16
+ ldmfd sp!, {a3, a4}
+ str a2, [a1, #(16*5)]
+
+ adds a2, a3, v7
+ mov a2, a2, lsr #20
+ orrmi a2, a2, #0xf000
+ add ip, a4, fp
+ mov ip, ip, asr #20
+ orr a2, a2, ip, lsl #16
+ str a2, [a1, #(16*3)]
+ subs a3, a3, v7
+ mov a2, a3, lsr #20
+ orrmi a2, a2, #0xf000
+ sub a4, a4, fp
+ mov a4, a4, asr #20
+ orr a2, a2, a4, lsl #16
+ str a2, [a1, #(16*4)]
+
+ ldr pc, [sp], #4
+ .endfunc
+
+ .align
+ .func idct_col_put_armv5te
+idct_col_put_armv5te:
+ str lr, [sp, #-4]!
+
+ idct_col
+
+ ldmfd sp!, {a3, a4}
+ ldr lr, [sp, #32]
+ add a2, a3, v1
+ movs a2, a2, asr #20
+ movmi a2, #0
+ cmp a2, #255
+ movgt a2, #255
+ add ip, a4, v2
+ movs ip, ip, asr #20
+ movmi ip, #0
+ cmp ip, #255
+ movgt ip, #255
+ orr a2, a2, ip, lsl #8
+ sub a3, a3, v1
+ movs a3, a3, asr #20
+ movmi a3, #0
+ cmp a3, #255
+ movgt a3, #255
+ sub a4, a4, v2
+ movs a4, a4, asr #20
+ movmi a4, #0
+ cmp a4, #255
+ ldr v1, [sp, #28]
+ movgt a4, #255
+ strh a2, [v1]
+ add a2, v1, #2
+ str a2, [sp, #28]
+ orr a2, a3, a4, lsl #8
+ rsb v2, lr, lr, lsl #3
+ ldmfd sp!, {a3, a4}
+ strh a2, [v2, v1]!
+
+ sub a2, a3, v3
+ movs a2, a2, asr #20
+ movmi a2, #0
+ cmp a2, #255
+ movgt a2, #255
+ sub ip, a4, v4
+ movs ip, ip, asr #20
+ movmi ip, #0
+ cmp ip, #255
+ movgt ip, #255
+ orr a2, a2, ip, lsl #8
+ strh a2, [v1, lr]!
+ add a3, a3, v3
+ movs a2, a3, asr #20
+ movmi a2, #0
+ cmp a2, #255
+ movgt a2, #255
+ add a4, a4, v4
+ movs a4, a4, asr #20
+ movmi a4, #0
+ cmp a4, #255
+ movgt a4, #255
+ orr a2, a2, a4, lsl #8
+ ldmfd sp!, {a3, a4}
+ strh a2, [v2, -lr]!
+
+ add a2, a3, v5
+ movs a2, a2, asr #20
+ movmi a2, #0
+ cmp a2, #255
+ movgt a2, #255
+ add ip, a4, v6
+ movs ip, ip, asr #20
+ movmi ip, #0
+ cmp ip, #255
+ movgt ip, #255
+ orr a2, a2, ip, lsl #8
+ strh a2, [v1, lr]!
+ sub a3, a3, v5
+ movs a2, a3, asr #20
+ movmi a2, #0
+ cmp a2, #255
+ movgt a2, #255
+ sub a4, a4, v6
+ movs a4, a4, asr #20
+ movmi a4, #0
+ cmp a4, #255
+ movgt a4, #255
+ orr a2, a2, a4, lsl #8
+ ldmfd sp!, {a3, a4}
+ strh a2, [v2, -lr]!
+
+ add a2, a3, v7
+ movs a2, a2, asr #20
+ movmi a2, #0
+ cmp a2, #255
+ movgt a2, #255
+ add ip, a4, fp
+ movs ip, ip, asr #20
+ movmi ip, #0
+ cmp ip, #255
+ movgt ip, #255
+ orr a2, a2, ip, lsl #8
+ strh a2, [v1, lr]
+ sub a3, a3, v7
+ movs a2, a3, asr #20
+ movmi a2, #0
+ cmp a2, #255
+ movgt a2, #255
+ sub a4, a4, fp
+ movs a4, a4, asr #20
+ movmi a4, #0
+ cmp a4, #255
+ movgt a4, #255
+ orr a2, a2, a4, lsl #8
+ strh a2, [v2, -lr]
+
+ ldr pc, [sp], #4
+ .endfunc
+
+ .align
+ .func idct_col_add_armv5te
+idct_col_add_armv5te:
+ str lr, [sp, #-4]!
+
+ idct_col
+
+ ldr lr, [sp, #36]
+
+ ldmfd sp!, {a3, a4}
+ ldrh ip, [lr]
+ add a2, a3, v1
+ mov a2, a2, asr #20
+ sub a3, a3, v1
+ and v1, ip, #255
+ adds a2, a2, v1
+ movmi a2, #0
+ cmp a2, #255
+ movgt a2, #255
+ add v1, a4, v2
+ mov v1, v1, asr #20
+ adds v1, v1, ip, lsr #8
+ movmi v1, #0
+ cmp v1, #255
+ movgt v1, #255
+ orr a2, a2, v1, lsl #8
+ ldr v1, [sp, #32]
+ sub a4, a4, v2
+ rsb v2, v1, v1, lsl #3
+ ldrh ip, [v2, lr]!
+ strh a2, [lr]
+ mov a3, a3, asr #20
+ and a2, ip, #255
+ adds a3, a3, a2
+ movmi a3, #0
+ cmp a3, #255
+ movgt a3, #255
+ mov a4, a4, asr #20
+ adds a4, a4, ip, lsr #8
+ movmi a4, #0
+ cmp a4, #255
+ movgt a4, #255
+ add a2, lr, #2
+ str a2, [sp, #28]
+ orr a2, a3, a4, lsl #8
+ strh a2, [v2]
+
+ ldmfd sp!, {a3, a4}
+ ldrh ip, [lr, v1]!
+ sub a2, a3, v3
+ mov a2, a2, asr #20
+ add a3, a3, v3
+ and v3, ip, #255
+ adds a2, a2, v3
+ movmi a2, #0
+ cmp a2, #255
+ movgt a2, #255
+ sub v3, a4, v4
+ mov v3, v3, asr #20
+ adds v3, v3, ip, lsr #8
+ movmi v3, #0
+ cmp v3, #255
+ movgt v3, #255
+ orr a2, a2, v3, lsl #8
+ add a4, a4, v4
+ ldrh ip, [v2, -v1]!
+ strh a2, [lr]
+ mov a3, a3, asr #20
+ and a2, ip, #255
+ adds a3, a3, a2
+ movmi a3, #0
+ cmp a3, #255
+ movgt a3, #255
+ mov a4, a4, asr #20
+ adds a4, a4, ip, lsr #8
+ movmi a4, #0
+ cmp a4, #255
+ movgt a4, #255
+ orr a2, a3, a4, lsl #8
+ strh a2, [v2]
+
+ ldmfd sp!, {a3, a4}
+ ldrh ip, [lr, v1]!
+ add a2, a3, v5
+ mov a2, a2, asr #20
+ sub a3, a3, v5
+ and v3, ip, #255
+ adds a2, a2, v3
+ movmi a2, #0
+ cmp a2, #255
+ movgt a2, #255
+ add v3, a4, v6
+ mov v3, v3, asr #20
+ adds v3, v3, ip, lsr #8
+ movmi v3, #0
+ cmp v3, #255
+ movgt v3, #255
+ orr a2, a2, v3, lsl #8
+ sub a4, a4, v6
+ ldrh ip, [v2, -v1]!
+ strh a2, [lr]
+ mov a3, a3, asr #20
+ and a2, ip, #255
+ adds a3, a3, a2
+ movmi a3, #0
+ cmp a3, #255
+ movgt a3, #255
+ mov a4, a4, asr #20
+ adds a4, a4, ip, lsr #8
+ movmi a4, #0
+ cmp a4, #255
+ movgt a4, #255
+ orr a2, a3, a4, lsl #8
+ strh a2, [v2]
+
+ ldmfd sp!, {a3, a4}
+ ldrh ip, [lr, v1]!
+ add a2, a3, v7
+ mov a2, a2, asr #20
+ sub a3, a3, v7
+ and v3, ip, #255
+ adds a2, a2, v3
+ movmi a2, #0
+ cmp a2, #255
+ movgt a2, #255
+ add v3, a4, fp
+ mov v3, v3, asr #20
+ adds v3, v3, ip, lsr #8
+ movmi v3, #0
+ cmp v3, #255
+ movgt v3, #255
+ orr a2, a2, v3, lsl #8
+ sub a4, a4, fp
+ ldrh ip, [v2, -v1]!
+ strh a2, [lr]
+ mov a3, a3, asr #20
+ and a2, ip, #255
+ adds a3, a3, a2
+ movmi a3, #0
+ cmp a3, #255
+ movgt a3, #255
+ mov a4, a4, asr #20
+ adds a4, a4, ip, lsr #8
+ movmi a4, #0
+ cmp a4, #255
+ movgt a4, #255
+ orr a2, a3, a4, lsl #8
+ strh a2, [v2]
+
+ ldr pc, [sp], #4
+ .endfunc
+
+ .align
+ .global simple_idct_armv5te
+ .func simple_idct_armv5te
+simple_idct_armv5te:
+ stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
+
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+
+ sub a1, a1, #(16*7)
+
+ bl idct_col_armv5te
+ add a1, a1, #4
+ bl idct_col_armv5te
+ add a1, a1, #4
+ bl idct_col_armv5te
+ add a1, a1, #4
+ bl idct_col_armv5te
+
+ ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
+ .endfunc
+
+ .align
+ .global simple_idct_add_armv5te
+ .func simple_idct_add_armv5te
+simple_idct_add_armv5te:
+ stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
+
+ mov a1, a3
+
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+
+ sub a1, a1, #(16*7)
+
+ bl idct_col_add_armv5te
+ add a1, a1, #4
+ bl idct_col_add_armv5te
+ add a1, a1, #4
+ bl idct_col_add_armv5te
+ add a1, a1, #4
+ bl idct_col_add_armv5te
+
+ add sp, sp, #8
+ ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
+ .endfunc
+
+ .align
+ .global simple_idct_put_armv5te
+ .func simple_idct_put_armv5te
+simple_idct_put_armv5te:
+ stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
+
+ mov a1, a3
+
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+ add a1, a1, #16
+ bl idct_row_armv5te
+
+ sub a1, a1, #(16*7)
+
+ bl idct_col_put_armv5te
+ add a1, a1, #4
+ bl idct_col_put_armv5te
+ add a1, a1, #4
+ bl idct_col_put_armv5te
+ add a1, a1, #4
+ bl idct_col_put_armv5te
+
+ add sp, sp, #8
+ ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
+ .endfunc
diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h
index d8090ed32..7d7678455 100644
--- a/src/libffmpeg/libavcodec/avcodec.h
+++ b/src/libffmpeg/libavcodec/avcodec.h
@@ -37,13 +37,13 @@ extern "C" {
#define AV_STRINGIFY(s) AV_TOSTRING(s)
#define AV_TOSTRING(s) #s
-#define LIBAVCODEC_VERSION_INT ((51<<16)+(25<<8)+0)
-#define LIBAVCODEC_VERSION 51.25.0
+#define LIBAVCODEC_VERSION_INT ((51<<16)+(28<<8)+0)
+#define LIBAVCODEC_VERSION 51.28.0
#define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT
#define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
-#define AV_NOPTS_VALUE int64_t_C(0x8000000000000000)
+#define AV_NOPTS_VALUE INT64_C(0x8000000000000000)
#define AV_TIME_BASE 1000000
#define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE}
@@ -156,6 +156,7 @@ enum CodecID {
CODEC_ID_TIERTEXSEQVIDEO,
CODEC_ID_TIFF,
CODEC_ID_GIF,
+ CODEC_ID_FFH264,
/* various pcm "codecs" */
CODEC_ID_PCM_S16LE= 0x10000,
@@ -243,6 +244,7 @@ enum CodecID {
CODEC_ID_WAVPACK,
CODEC_ID_DSICINAUDIO,
CODEC_ID_IMC,
+ CODEC_ID_MUSEPACK7,
/* subtitle codecs */
CODEC_ID_DVD_SUBTITLE= 0x17000,
@@ -372,7 +374,7 @@ typedef struct RcOverride{
#define CODEC_FLAG2_LOCAL_HEADER 0x00000008 ///< place global headers at every keyframe instead of in extradata
#define CODEC_FLAG2_BPYRAMID 0x00000010 ///< H.264 allow b-frames to be used as references
#define CODEC_FLAG2_WPRED 0x00000020 ///< H.264 weighted biprediction for b-frames
-#define CODEC_FLAG2_MIXED_REFS 0x00000040 ///< H.264 multiple references per partition
+#define CODEC_FLAG2_MIXED_REFS 0x00000040 ///< H.264 one reference per partition, as opposed to one reference per macroblock
#define CODEC_FLAG2_8X8DCT 0x00000080 ///< H.264 high profile 8x8 transform
#define CODEC_FLAG2_FASTPSKIP 0x00000100 ///< H.264 fast pskip
#define CODEC_FLAG2_AUD 0x00000200 ///< H.264 access unit delimiters
@@ -380,6 +382,7 @@ typedef struct RcOverride{
#define CODEC_FLAG2_INTRA_VLC 0x00000800 ///< use MPEG-2 intra VLC table
#define CODEC_FLAG2_MEMC_ONLY 0x00001000 ///< only do ME/MC (I frames -> ref, P frame -> ME+MC)
#define CODEC_FLAG2_DROP_FRAME_TIMECODE 0x00002000 ///< timecode is in drop frame format
+#define CODEC_FLAG2_SKIP_RD 0x00004000 ///< RD optimal MB level residual skiping
/* Unsupported options :
* Syntax Arithmetic coding (SAC)
@@ -2090,9 +2093,6 @@ typedef struct AVCodec {
int (*decode)(AVCodecContext *, void *outdata, int *outdata_size,
uint8_t *buf, int buf_size);
int capabilities;
-#if LIBAVCODEC_VERSION_INT < ((50<<16)+(0<<8)+0)
- void *dummy; // FIXME remove next time we break binary compatibility
-#endif
struct AVCodec *next;
void (*flush)(AVCodecContext *);
const AVRational *supported_framerates; ///array of supported framerates, or NULL if any, array is terminated by {0,0}
@@ -2310,6 +2310,7 @@ extern AVCodec libgsm_decoder;
extern AVCodec bmp_decoder;
extern AVCodec mmvideo_decoder;
extern AVCodec zmbv_decoder;
+extern AVCodec zmbv_encoder;
extern AVCodec avs_decoder;
extern AVCodec smacker_decoder;
extern AVCodec smackaud_decoder;
@@ -2324,6 +2325,7 @@ extern AVCodec dsicinaudio_decoder;
extern AVCodec tiertexseqvideo_decoder;
extern AVCodec tiff_decoder;
extern AVCodec imc_decoder;
+extern AVCodec mpc7_decoder;
/* pcm codecs */
#define PCM_CODEC(id, name) \
@@ -2691,20 +2693,6 @@ int img_crop(AVPicture *dst, const AVPicture *src,
int img_pad(AVPicture *dst, const AVPicture *src, int height, int width, int pix_fmt,
int padtop, int padbottom, int padleft, int padright, int *color);
-/* endian macros */
-#if !defined(BE_16) || !defined(BE_32) || !defined(LE_16) || !defined(LE_32)
-#define BE_16(x) ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1])
-#define BE_32(x) ((((uint8_t*)(x))[0] << 24) | \
- (((uint8_t*)(x))[1] << 16) | \
- (((uint8_t*)(x))[2] << 8) | \
- ((uint8_t*)(x))[3])
-#define LE_16(x) ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
-#define LE_32(x) ((((uint8_t*)(x))[3] << 24) | \
- (((uint8_t*)(x))[2] << 16) | \
- (((uint8_t*)(x))[1] << 8) | \
- ((uint8_t*)(x))[0])
-#endif
-
extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
/* unused static macro */
diff --git a/src/libffmpeg/libavcodec/bitstream.h b/src/libffmpeg/libavcodec/bitstream.h
index af25b6dcf..29e0f441e 100644
--- a/src/libffmpeg/libavcodec/bitstream.h
+++ b/src/libffmpeg/libavcodec/bitstream.h
@@ -187,12 +187,12 @@ static inline uint##x##_t unaligned##x(const void *v) { \
}
# elif defined(__DECC)
# define unaligned(x) \
-static inline uint##x##_t unaligned##x##(const void *v) { \
+static inline uint##x##_t unaligned##x(const void *v) { \
return *(const __unaligned uint##x##_t *) v; \
}
# else
# define unaligned(x) \
-static inline uint##x##_t unaligned##x##(const void *v) { \
+static inline uint##x##_t unaligned##x(const void *v) { \
return *(const uint##x##_t *) v; \
}
# endif
@@ -877,7 +877,7 @@ void free_vlc(VLC *vlc);
* read the longest vlc code
* = (max_vlc_length + bits - 1) / bits
*/
-static always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
+static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
int bits, int max_depth)
{
int code;
diff --git a/src/libffmpeg/libavcodec/bytestream.h b/src/libffmpeg/libavcodec/bytestream.h
index 25c457fe4..a742fa1c1 100644
--- a/src/libffmpeg/libavcodec/bytestream.h
+++ b/src/libffmpeg/libavcodec/bytestream.h
@@ -22,32 +22,32 @@
#ifndef FFMPEG_BYTESTREAM_H
#define FFMPEG_BYTESTREAM_H
-static always_inline unsigned int bytestream_get_le32(uint8_t **b)
+static av_always_inline unsigned int bytestream_get_le32(uint8_t **b)
{
(*b) += 4;
return LE_32(*b - 4);
}
-static always_inline unsigned int bytestream_get_le16(uint8_t **b)
+static av_always_inline unsigned int bytestream_get_le16(uint8_t **b)
{
(*b) += 2;
return LE_16(*b - 2);
}
-static always_inline unsigned int bytestream_get_byte(uint8_t **b)
+static av_always_inline unsigned int bytestream_get_byte(uint8_t **b)
{
(*b)++;
return (*b)[-1];
}
-static always_inline unsigned int bytestream_get_buffer(uint8_t **b, uint8_t *dst, unsigned int size)
+static av_always_inline unsigned int bytestream_get_buffer(uint8_t **b, uint8_t *dst, unsigned int size)
{
memcpy(dst, *b, size);
(*b) += size;
return size;
}
-static always_inline void bytestream_put_be32(uint8_t **b, const unsigned int value)
+static av_always_inline void bytestream_put_be32(uint8_t **b, const unsigned int value)
{
*(*b)++ = value >> 24;
*(*b)++ = value >> 16;
@@ -55,13 +55,13 @@ static always_inline void bytestream_put_be32(uint8_t **b, const unsigned int va
*(*b)++ = value;
};
-static always_inline void bytestream_put_be16(uint8_t **b, const unsigned int value)
+static av_always_inline void bytestream_put_be16(uint8_t **b, const unsigned int value)
{
*(*b)++ = value >> 8;
*(*b)++ = value;
}
-static always_inline void bytestream_put_le32(uint8_t **b, const unsigned int value)
+static av_always_inline void bytestream_put_le32(uint8_t **b, const unsigned int value)
{
*(*b)++ = value;
*(*b)++ = value >> 8;
@@ -69,18 +69,18 @@ static always_inline void bytestream_put_le32(uint8_t **b, const unsigned int va
*(*b)++ = value >> 24;
}
-static always_inline void bytestream_put_le16(uint8_t **b, const unsigned int value)
+static av_always_inline void bytestream_put_le16(uint8_t **b, const unsigned int value)
{
*(*b)++ = value;
*(*b)++ = value >> 8;
}
-static always_inline void bytestream_put_byte(uint8_t **b, const unsigned int value)
+static av_always_inline void bytestream_put_byte(uint8_t **b, const unsigned int value)
{
*(*b)++ = value;
}
-static always_inline void bytestream_put_buffer(uint8_t **b, const uint8_t *src, unsigned int size)
+static av_always_inline void bytestream_put_buffer(uint8_t **b, const uint8_t *src, unsigned int size)
{
memcpy(*b, src, size);
(*b) += size;
diff --git a/src/libffmpeg/libavcodec/cabac.h b/src/libffmpeg/libavcodec/cabac.h
index 43fe78e3b..f47406a9e 100644
--- a/src/libffmpeg/libavcodec/cabac.h
+++ b/src/libffmpeg/libavcodec/cabac.h
@@ -363,7 +363,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){
refill(c);
}
-static int always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){
+static int av_always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){
//FIXME gcc generates duplicate load/stores for c->low and c->range
#define LOW "0"
#define RANGE "4"
@@ -631,7 +631,7 @@ static int get_cabac_bypass(CABACContext *c){
}
-static always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
+static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
asm volatile(
"movl "RANGE "(%1), %%ebx \n\t"
diff --git a/src/libffmpeg/libavcodec/cinepak.c b/src/libffmpeg/libavcodec/cinepak.c
index e137377e5..fd95b739e 100644
--- a/src/libffmpeg/libavcodec/cinepak.c
+++ b/src/libffmpeg/libavcodec/cinepak.c
@@ -26,6 +26,8 @@
* by Ewald Snel <ewald@rambo.its.tudelft.nl>
* For more information on the Cinepak algorithm, visit:
* http://www.csse.monash.edu.au/~timf/
+ * For more information on the quirky data inside Sega FILM/CPK files, visit:
+ * http://wiki.multimedia.cx/index.php?title=Sega_FILM
*/
#include <stdio.h>
@@ -67,6 +69,8 @@ typedef struct CinepakContext {
int palette_video;
cvid_strip_t strips[MAX_STRIPS];
+ int sega_film_skip_bytes;
+
} CinepakContext;
static void cinepak_decode_codebook (cvid_codebook_t *codebook,
@@ -319,8 +323,6 @@ static int cinepak_decode (CinepakContext *s)
int i, result, strip_size, frame_flags, num_strips;
int y0 = 0;
int encoded_buf_size;
- /* if true, Cinepak data is from a Sega FILM/CPK file */
- int sega_film_data = 0;
if (s->size < 10)
return -1;
@@ -328,12 +330,29 @@ static int cinepak_decode (CinepakContext *s)
frame_flags = s->data[0];
num_strips = BE_16 (&s->data[8]);
encoded_buf_size = ((s->data[1] << 16) | BE_16 (&s->data[2]));
- if (encoded_buf_size != s->size)
- sega_film_data = 1;
- if (sega_film_data)
- s->data += 12;
- else
- s->data += 10;
+
+ /* if this is the first frame, check for deviant Sega FILM data */
+ if (s->sega_film_skip_bytes == -1) {
+ if (encoded_buf_size != s->size) {
+ /* If the encoded frame size differs from the frame size as indicated
+ * by the container file, this data likely comes from a Sega FILM/CPK file.
+ * If the frame header is followed by the bytes FE 00 00 06 00 00 then
+ * this is probably one of the two known files that have 6 extra bytes
+ * after the frame header. Else, assume 2 extra bytes. */
+ if ((s->data[10] == 0xFE) &&
+ (s->data[11] == 0x00) &&
+ (s->data[12] == 0x00) &&
+ (s->data[13] == 0x06) &&
+ (s->data[14] == 0x00) &&
+ (s->data[15] == 0x00))
+ s->sega_film_skip_bytes = 6;
+ else
+ s->sega_film_skip_bytes = 2;
+ } else
+ s->sega_film_skip_bytes = 0;
+ }
+
+ s->data += 10 + s->sega_film_skip_bytes;
if (num_strips > MAX_STRIPS)
num_strips = MAX_STRIPS;
@@ -377,6 +396,7 @@ static int cinepak_decode_init(AVCodecContext *avctx)
s->avctx = avctx;
s->width = (avctx->width + 3) & ~3;
s->height = (avctx->height + 3) & ~3;
+ s->sega_film_skip_bytes = -1; /* uninitialized state */
// check for paletted data
if ((avctx->palctrl == NULL) || (avctx->bits_per_sample == 40)) {
diff --git a/src/libffmpeg/libavcodec/cook.c b/src/libffmpeg/libavcodec/cook.c
index 47d9ce2c3..943addb89 100644
--- a/src/libffmpeg/libavcodec/cook.c
+++ b/src/libffmpeg/libavcodec/cook.c
@@ -312,7 +312,7 @@ static int cook_decode_close(AVCodecContext *avctx)
{
int i;
COOKContext *q = avctx->priv_data;
- av_log(NULL,AV_LOG_DEBUG, "Deallocating memory.\n");
+ av_log(avctx,AV_LOG_DEBUG, "Deallocating memory.\n");
/* Free allocated memory buffers. */
av_free(q->mlt_window);
@@ -1160,12 +1160,12 @@ static int cook_decode_init(AVCodecContext *avctx)
/* Take care of the codec specific extradata. */
if (avctx->extradata_size <= 0) {
- av_log(NULL,AV_LOG_ERROR,"Necessary extradata missing!\n");
+ av_log(avctx,AV_LOG_ERROR,"Necessary extradata missing!\n");
return -1;
} else {
/* 8 for mono, 16 for stereo, ? for multichannel
Swap to right endianness so we don't need to care later on. */
- av_log(NULL,AV_LOG_DEBUG,"codecdata_length=%d\n",avctx->extradata_size);
+ av_log(avctx,AV_LOG_DEBUG,"codecdata_length=%d\n",avctx->extradata_size);
if (avctx->extradata_size >= 8){
e->cookversion = be2me_32(e->cookversion);
e->samples_per_frame = be2me_16(e->samples_per_frame);
@@ -1201,24 +1201,24 @@ static int cook_decode_init(AVCodecContext *avctx)
switch (e->cookversion) {
case MONO_COOK1:
if (q->nb_channels != 1) {
- av_log(NULL,AV_LOG_ERROR,"Container channels != 1, report sample!\n");
+ av_log(avctx,AV_LOG_ERROR,"Container channels != 1, report sample!\n");
return -1;
}
- av_log(NULL,AV_LOG_DEBUG,"MONO_COOK1\n");
+ av_log(avctx,AV_LOG_DEBUG,"MONO_COOK1\n");
break;
case MONO_COOK2:
if (q->nb_channels != 1) {
q->joint_stereo = 0;
q->bits_per_subpacket = q->bits_per_subpacket/2;
}
- av_log(NULL,AV_LOG_DEBUG,"MONO_COOK2\n");
+ av_log(avctx,AV_LOG_DEBUG,"MONO_COOK2\n");
break;
case JOINT_STEREO:
if (q->nb_channels != 2) {
- av_log(NULL,AV_LOG_ERROR,"Container channels != 2, report sample!\n");
+ av_log(avctx,AV_LOG_ERROR,"Container channels != 2, report sample!\n");
return -1;
}
- av_log(NULL,AV_LOG_DEBUG,"JOINT_STEREO\n");
+ av_log(avctx,AV_LOG_DEBUG,"JOINT_STEREO\n");
if (avctx->extradata_size >= 16){
q->total_subbands = q->subbands + e->js_subband_start;
q->js_subband_start = e->js_subband_start;
@@ -1233,11 +1233,11 @@ static int cook_decode_init(AVCodecContext *avctx)
}
break;
case MC_COOK:
- av_log(NULL,AV_LOG_ERROR,"MC_COOK not supported!\n");
+ av_log(avctx,AV_LOG_ERROR,"MC_COOK not supported!\n");
return -1;
break;
default:
- av_log(NULL,AV_LOG_ERROR,"Unknown Cook version, report sample!\n");
+ av_log(avctx,AV_LOG_ERROR,"Unknown Cook version, report sample!\n");
return -1;
break;
}
@@ -1280,16 +1280,16 @@ static int cook_decode_init(AVCodecContext *avctx)
/* Try to catch some obviously faulty streams, othervise it might be exploitable */
if (q->total_subbands > 53) {
- av_log(NULL,AV_LOG_ERROR,"total_subbands > 53, report sample!\n");
+ av_log(avctx,AV_LOG_ERROR,"total_subbands > 53, report sample!\n");
return -1;
}
if (q->subbands > 50) {
- av_log(NULL,AV_LOG_ERROR,"subbands > 50, report sample!\n");
+ av_log(avctx,AV_LOG_ERROR,"subbands > 50, report sample!\n");
return -1;
}
if ((q->samples_per_channel == 256) || (q->samples_per_channel == 512) || (q->samples_per_channel == 1024)) {
} else {
- av_log(NULL,AV_LOG_ERROR,"unknown amount of samples_per_channel = %d, report sample!\n",q->samples_per_channel);
+ av_log(avctx,AV_LOG_ERROR,"unknown amount of samples_per_channel = %d, report sample!\n",q->samples_per_channel);
return -1;
}
diff --git a/src/libffmpeg/libavcodec/cscd.c b/src/libffmpeg/libavcodec/cscd.c
index e4257f4c0..d8733d6dd 100644
--- a/src/libffmpeg/libavcodec/cscd.c
+++ b/src/libffmpeg/libavcodec/cscd.c
@@ -220,12 +220,12 @@ static int decode_init(AVCodecContext *avctx) {
}
avctx->has_b_frames = 0;
switch (avctx->bits_per_sample) {
- case 16: avctx->pix_fmt = PIX_FMT_RGB565; break;
+ case 16: avctx->pix_fmt = PIX_FMT_RGB555; break;
case 24: avctx->pix_fmt = PIX_FMT_BGR24; break;
case 32: avctx->pix_fmt = PIX_FMT_RGBA32; break;
default:
av_log(avctx, AV_LOG_ERROR,
- "CamStudio codec error: unvalid depth %i bpp\n",
+ "CamStudio codec error: invalid depth %i bpp\n",
avctx->bits_per_sample);
return 1;
}
diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c
index 51eddbc60..916d8658c 100644
--- a/src/libffmpeg/libavcodec/dsputil.c
+++ b/src/libffmpeg/libavcodec/dsputil.c
@@ -2549,6 +2549,11 @@ void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
}
#endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
+#if defined(CONFIG_H264_ENCODER)
+/* H264 specific */
+void ff_h264dsp_init(DSPContext* c, AVCodecContext *avctx);
+#endif /* CONFIG_H264_ENCODER */
+
static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
int i;
@@ -3801,11 +3806,31 @@ void dsputil_static_init(void)
for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
}
+int ff_check_alignment(void){
+ static int did_fail=0;
+ DECLARE_ALIGNED_16(int, aligned);
+
+ if((int)&aligned & 15){
+ if(!did_fail){
+#if defined(HAVE_MMX) || defined(HAVE_ALTIVEC)
+ av_log(NULL, AV_LOG_ERROR,
+ "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
+ "and may be very slow or crash. This is not a bug in libavcodec,\n"
+ "but in the compiler. Do not report crashes to FFmpeg developers.\n");
+#endif
+ did_fail=1;
+ }
+ return -1;
+ }
+ return 0;
+}
void dsputil_init(DSPContext* c, AVCodecContext *avctx)
{
int i;
+ ff_check_alignment();
+
#ifdef CONFIG_ENCODERS
if(avctx->dct_algo==FF_DCT_FASTINT) {
c->fdct = fdct_ifast;
@@ -4006,6 +4031,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
#if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
ff_vc1dsp_init(c,avctx);
#endif
+#if defined(CONFIG_H264_ENCODER)
+ ff_h264dsp_init(c,avctx);
+#endif
c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h
index de3c1d564..78109f7b9 100644
--- a/src/libffmpeg/libavcodec/dsputil.h
+++ b/src/libffmpeg/libavcodec/dsputil.h
@@ -33,9 +33,6 @@
#include "common.h"
#include "avcodec.h"
-#if defined(ARCH_X86) || defined(ARCH_X86_64)
-#define HAVE_MMX 1
-#endif
//#define DEBUG
/* dct code */
@@ -381,10 +378,12 @@ typedef struct DSPContext {
#define BASIS_SHIFT 16
#define RECON_SHIFT 6
+ /* h264 functions */
void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*h264_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*h264_idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*h264_idct8_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
+ void (*h264_dct)(DCTELEM block[4][4]);
/* snow wavelet */
void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
@@ -411,6 +410,8 @@ typedef struct DSPContext {
void dsputil_static_init(void);
void dsputil_init(DSPContext* p, AVCodecContext *avctx);
+int ff_check_alignment(void);
+
/**
* permute block according to permuatation.
* @param last last non zero element in scantable order
@@ -483,6 +484,7 @@ int mm_support(void);
#define MM_SSE2 0x0010 /* PIV SSE2 functions */
#define MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */
#define MM_SSE3 0x0040 /* Prescott SSE3 functions */
+#define MM_SSSE3 0x0080 /* Conroe SSSE3 functions */
extern int mm_flags;
@@ -593,30 +595,6 @@ void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
#endif
-#ifdef __GNUC__
-
-struct unaligned_64 { uint64_t l; } __attribute__((packed));
-struct unaligned_32 { uint32_t l; } __attribute__((packed));
-struct unaligned_16 { uint16_t l; } __attribute__((packed));
-
-#define LD16(a) (((const struct unaligned_16 *) (a))->l)
-#define LD32(a) (((const struct unaligned_32 *) (a))->l)
-#define LD64(a) (((const struct unaligned_64 *) (a))->l)
-
-#define ST16(a, b) (((struct unaligned_16 *) (a))->l) = (b)
-#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b)
-
-#else /* __GNUC__ */
-
-#define LD16(a) (*((uint16_t*)(a)))
-#define LD32(a) (*((uint32_t*)(a)))
-#define LD64(a) (*((uint64_t*)(a)))
-
-#define ST16(a, b) *((uint16_t*)(a)) = (b)
-#define ST32(a, b) *((uint32_t*)(a)) = (b)
-
-#endif /* !__GNUC__ */
-
/* PSNR */
void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
int orig_linesize[3], int coded_linesize,
diff --git a/src/libffmpeg/libavcodec/dv.c b/src/libffmpeg/libavcodec/dv.c
index 76095a481..803d3502d 100644
--- a/src/libffmpeg/libavcodec/dv.c
+++ b/src/libffmpeg/libavcodec/dv.c
@@ -560,7 +560,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
#ifdef DV_CODEC_TINY_TARGET
/* Converts run and level (where level != 0) pair into vlc, returning bit size */
-static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
+static av_always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
{
int size;
if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
@@ -585,7 +585,7 @@ static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
return size;
}
-static always_inline int dv_rl2vlc_size(int run, int level)
+static av_always_inline int dv_rl2vlc_size(int run, int level)
{
int size;
@@ -601,13 +601,13 @@ static always_inline int dv_rl2vlc_size(int run, int level)
return size;
}
#else
-static always_inline int dv_rl2vlc(int run, int l, int sign, uint32_t* vlc)
+static av_always_inline int dv_rl2vlc(int run, int l, int sign, uint32_t* vlc)
{
*vlc = dv_vlc_map[run][l].vlc | sign;
return dv_vlc_map[run][l].size;
}
-static always_inline int dv_rl2vlc_size(int run, int l)
+static av_always_inline int dv_rl2vlc_size(int run, int l)
{
return dv_vlc_map[run][l].size;
}
@@ -627,7 +627,7 @@ typedef struct EncBlockInfo {
uint32_t partial_bit_buffer; /* we can't use uint16_t here */
} EncBlockInfo;
-static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool,
+static av_always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool,
PutBitContext* pb_end)
{
int prev;
@@ -670,7 +670,7 @@ static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext
return pb;
}
-static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
+static av_always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
const uint8_t* zigzag_scan, const int *weight, int bias)
{
int i, area;
@@ -742,7 +742,7 @@ static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
//FIXME replace this by dsputil
#define SC(x, y) ((s[x] - s[y]) ^ ((s[x] - s[y]) >> 7))
-static always_inline int dv_guess_dct_mode(DCTELEM *blk) {
+static av_always_inline int dv_guess_dct_mode(DCTELEM *blk) {
DCTELEM *s;
int score88 = 0;
int score248 = 0;
diff --git a/src/libffmpeg/libavcodec/faandct.c b/src/libffmpeg/libavcodec/faandct.c
index e3c0d84a2..6f73ee5e9 100644
--- a/src/libffmpeg/libavcodec/faandct.c
+++ b/src/libffmpeg/libavcodec/faandct.c
@@ -70,7 +70,7 @@ B6*B0, B6*B1, B6*B2, B6*B3, B6*B4, B6*B5, B6*B6, B6*B7,
B7*B0, B7*B1, B7*B2, B7*B3, B7*B4, B7*B5, B7*B6, B7*B7,
};
-static always_inline void row_fdct(FLOAT temp[64], DCTELEM * data)
+static av_always_inline void row_fdct(FLOAT temp[64], DCTELEM * data)
{
FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
FLOAT tmp10, tmp11, tmp12, tmp13;
diff --git a/src/libffmpeg/libavcodec/ffv1.c b/src/libffmpeg/libavcodec/ffv1.c
index 62623e591..1ca18a4e8 100644
--- a/src/libffmpeg/libavcodec/ffv1.c
+++ b/src/libffmpeg/libavcodec/ffv1.c
@@ -186,7 +186,7 @@ typedef struct FFV1Context{
DSPContext dsp;
}FFV1Context;
-static always_inline int fold(int diff, int bits){
+static av_always_inline int fold(int diff, int bits){
if(bits==8)
diff= (int8_t)diff;
else{
diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c
index ba51c245a..af5fa50e6 100644
--- a/src/libffmpeg/libavcodec/h263.c
+++ b/src/libffmpeg/libavcodec/h263.c
@@ -487,12 +487,28 @@ static inline void restore_ac_coeffs(MpegEncContext * s, DCTELEM block[6][64], i
}
/**
+ * init s->current_picture.qscale_table from s->lambda_table
+ */
+static void ff_init_qscale_tab(MpegEncContext *s){
+ int8_t * const qscale_table= s->current_picture.qscale_table;
+ int i;
+
+ for(i=0; i<s->mb_num; i++){
+ unsigned int lam= s->lambda_table[ s->mb_index2xy[i] ];
+ int qp= (lam*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
+ qscale_table[ s->mb_index2xy[i] ]= clip(qp, s->avctx->qmin, s->avctx->qmax);
+ }
+}
+
+/**
* modify qscale so that encoding is acually possible in h263 (limit difference to -2..2)
*/
void ff_clean_h263_qscales(MpegEncContext *s){
int i;
int8_t * const qscale_table= s->current_picture.qscale_table;
+ ff_init_qscale_tab(s);
+
for(i=1; i<s->mb_num; i++){
if(qscale_table[ s->mb_index2xy[i] ] - qscale_table[ s->mb_index2xy[i-1] ] >2)
qscale_table[ s->mb_index2xy[i] ]= qscale_table[ s->mb_index2xy[i-1] ]+2;
@@ -507,7 +523,6 @@ void ff_clean_h263_qscales(MpegEncContext *s){
int mb_xy= s->mb_index2xy[i];
if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTER4V)){
- s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_INTER4V;
s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_INTER;
}
}
@@ -546,7 +561,6 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
for(i=1; i<s->mb_num; i++){
int mb_xy= s->mb_index2xy[i];
if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_DIRECT)){
- s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_DIRECT;
s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_BIDIR;
}
}
diff --git a/src/libffmpeg/libavcodec/h264.c b/src/libffmpeg/libavcodec/h264.c
index ad23ae120..d7c48bd4a 100644
--- a/src/libffmpeg/libavcodec/h264.c
+++ b/src/libffmpeg/libavcodec/h264.c
@@ -165,20 +165,6 @@ typedef struct H264Context{
MpegEncContext s;
int nal_ref_idc;
int nal_unit_type;
-#define NAL_SLICE 1
-#define NAL_DPA 2
-#define NAL_DPB 3
-#define NAL_DPC 4
-#define NAL_IDR_SLICE 5
-#define NAL_SEI 6
-#define NAL_SPS 7
-#define NAL_PPS 8
-#define NAL_AUD 9
-#define NAL_END_SEQUENCE 10
-#define NAL_END_STREAM 11
-#define NAL_FILLER_DATA 12
-#define NAL_SPS_EXT 13
-#define NAL_AUXILIARY_SLICE 19
uint8_t *rbsp_buffer;
unsigned int rbsp_buffer_size;
@@ -414,7 +400,7 @@ static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, in
static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
-static always_inline uint32_t pack16to32(int a, int b){
+static av_always_inline uint32_t pack16to32(int a, int b){
#ifdef WORDS_BIGENDIAN
return (b&0xFFFF) + (a<<16);
#else
@@ -422,13 +408,22 @@ static always_inline uint32_t pack16to32(int a, int b){
#endif
}
+const uint8_t ff_rem6[52]={
+0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
+};
+
+const uint8_t ff_div6[52]={
+0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
+};
+
+
/**
* fill a rectangle.
* @param h height of the rectangle, should be a constant
* @param w width of the rectangle, should be a constant
* @param size the size of val (1 or 4), should be a constant
*/
-static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
+static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
uint8_t *p= (uint8_t*)vp;
assert(size==1 || size==4);
assert(w<=4);
@@ -1808,81 +1803,6 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c
return dst;
}
-#if 0
-/**
- * @param src the data which should be escaped
- * @param dst the target buffer, dst+1 == src is allowed as a special case
- * @param length the length of the src data
- * @param dst_length the length of the dst array
- * @returns length of escaped data in bytes or -1 if an error occured
- */
-static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
- int i, escape_count, si, di;
- uint8_t *temp;
-
- assert(length>=0);
- assert(dst_length>0);
-
- dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
-
- if(length==0) return 1;
-
- escape_count= 0;
- for(i=0; i<length; i+=2){
- if(src[i]) continue;
- if(i>0 && src[i-1]==0)
- i--;
- if(i+2<length && src[i+1]==0 && src[i+2]<=3){
- escape_count++;
- i+=2;
- }
- }
-
- if(escape_count==0){
- if(dst+1 != src)
- memcpy(dst+1, src, length);
- return length + 1;
- }
-
- if(length + escape_count + 1> dst_length)
- return -1;
-
- //this should be damn rare (hopefully)
-
- h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
- temp= h->rbsp_buffer;
-//printf("encoding esc\n");
-
- si= 0;
- di= 0;
- while(si < length){
- if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
- temp[di++]= 0; si++;
- temp[di++]= 0; si++;
- temp[di++]= 3;
- temp[di++]= src[si++];
- }
- else
- temp[di++]= src[si++];
- }
- memcpy(dst+1, temp, length+escape_count);
-
- assert(di == length+escape_count);
-
- return di + 1;
-}
-
-/**
- * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
- */
-static void encode_rbsp_trailing(PutBitContext *pb){
- int length;
- put_bits(pb, 1, 1);
- length= (-put_bits_count(pb))&7;
- if(length) put_bits(pb, length, 0);
-}
-#endif
-
/**
* identifies the exact end of the bitstream
* @return the length of the trailing, or 0 if damaged
@@ -2035,42 +1955,6 @@ static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
}
-
-#if 0
-static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
- int i;
- //FIXME try int temp instead of block
-
- for(i=0; i<4; i++){
- const int d0= src1[0 + i*stride] - src2[0 + i*stride];
- const int d1= src1[1 + i*stride] - src2[1 + i*stride];
- const int d2= src1[2 + i*stride] - src2[2 + i*stride];
- const int d3= src1[3 + i*stride] - src2[3 + i*stride];
- const int z0= d0 + d3;
- const int z3= d0 - d3;
- const int z1= d1 + d2;
- const int z2= d1 - d2;
-
- block[0 + 4*i]= z0 + z1;
- block[1 + 4*i]= 2*z3 + z2;
- block[2 + 4*i]= z0 - z1;
- block[3 + 4*i]= z3 - 2*z2;
- }
-
- for(i=0; i<4; i++){
- const int z0= block[0*4 + i] + block[3*4 + i];
- const int z3= block[0*4 + i] - block[3*4 + i];
- const int z1= block[1*4 + i] + block[2*4 + i];
- const int z2= block[1*4 + i] - block[2*4 + i];
-
- block[0*4 + i]= z0 + z1;
- block[1*4 + i]= 2*z3 + z2;
- block[2*4 + i]= z0 - z1;
- block[3*4 + i]= z3 - 2*z2;
- }
-}
-#endif
-
//FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
//FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
@@ -2357,7 +2241,7 @@ static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int strid
src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
}
-static void pred16x16_vertical_c(uint8_t *src, int stride){
+void ff_pred16x16_vertical_c(uint8_t *src, int stride){
int i;
const uint32_t a= ((uint32_t*)(src-stride))[0];
const uint32_t b= ((uint32_t*)(src-stride))[1];
@@ -2372,7 +2256,7 @@ static void pred16x16_vertical_c(uint8_t *src, int stride){
}
}
-static void pred16x16_horizontal_c(uint8_t *src, int stride){
+void ff_pred16x16_horizontal_c(uint8_t *src, int stride){
int i;
for(i=0; i<16; i++){
@@ -2383,7 +2267,7 @@ static void pred16x16_horizontal_c(uint8_t *src, int stride){
}
}
-static void pred16x16_dc_c(uint8_t *src, int stride){
+void ff_pred16x16_dc_c(uint8_t *src, int stride){
int i, dc=0;
for(i=0;i<16; i++){
@@ -2437,7 +2321,7 @@ static void pred16x16_top_dc_c(uint8_t *src, int stride){
}
}
-static void pred16x16_128_dc_c(uint8_t *src, int stride){
+void ff_pred16x16_128_dc_c(uint8_t *src, int stride){
int i;
for(i=0; i<16; i++){
@@ -2488,11 +2372,11 @@ static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int
}
}
-static void pred16x16_plane_c(uint8_t *src, int stride){
+void ff_pred16x16_plane_c(uint8_t *src, int stride){
pred16x16_plane_compat_c(src, stride, 0);
}
-static void pred8x8_vertical_c(uint8_t *src, int stride){
+void ff_pred8x8_vertical_c(uint8_t *src, int stride){
int i;
const uint32_t a= ((uint32_t*)(src-stride))[0];
const uint32_t b= ((uint32_t*)(src-stride))[1];
@@ -2503,7 +2387,7 @@ static void pred8x8_vertical_c(uint8_t *src, int stride){
}
}
-static void pred8x8_horizontal_c(uint8_t *src, int stride){
+void ff_pred8x8_horizontal_c(uint8_t *src, int stride){
int i;
for(i=0; i<8; i++){
@@ -2512,7 +2396,7 @@ static void pred8x8_horizontal_c(uint8_t *src, int stride){
}
}
-static void pred8x8_128_dc_c(uint8_t *src, int stride){
+void ff_pred8x8_128_dc_c(uint8_t *src, int stride){
int i;
for(i=0; i<8; i++){
@@ -2566,7 +2450,7 @@ static void pred8x8_top_dc_c(uint8_t *src, int stride){
}
-static void pred8x8_dc_c(uint8_t *src, int stride){
+void ff_pred8x8_dc_c(uint8_t *src, int stride){
int i;
int dc0, dc1, dc2, dc3;
@@ -2591,7 +2475,7 @@ static void pred8x8_dc_c(uint8_t *src, int stride){
}
}
-static void pred8x8_plane_c(uint8_t *src, int stride){
+void ff_pred8x8_plane_c(uint8_t *src, int stride){
int j, k;
int a;
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
@@ -3220,21 +3104,21 @@ static void init_pred_ptrs(H264Context *h){
h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
- h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
- h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
- h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
- h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
+ h->pred8x8[DC_PRED8x8 ]= ff_pred8x8_dc_c;
+ h->pred8x8[VERT_PRED8x8 ]= ff_pred8x8_vertical_c;
+ h->pred8x8[HOR_PRED8x8 ]= ff_pred8x8_horizontal_c;
+ h->pred8x8[PLANE_PRED8x8 ]= ff_pred8x8_plane_c;
h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
- h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
+ h->pred8x8[DC_128_PRED8x8 ]= ff_pred8x8_128_dc_c;
- h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
- h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
- h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
- h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
+ h->pred16x16[DC_PRED8x8 ]= ff_pred16x16_dc_c;
+ h->pred16x16[VERT_PRED8x8 ]= ff_pred16x16_vertical_c;
+ h->pred16x16[HOR_PRED8x8 ]= ff_pred16x16_horizontal_c;
+ h->pred16x16[PLANE_PRED8x8 ]= ff_pred16x16_plane_c;
h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
- h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
+ h->pred16x16[DC_128_PRED8x8 ]= ff_pred16x16_128_dc_c;
}
static void free_tables(H264Context *h){
@@ -3269,8 +3153,8 @@ static void init_dequant8_coeff_table(H264Context *h){
}
for(q=0; q<52; q++){
- int shift = div6[q];
- int idx = rem6[q];
+ int shift = ff_div6[q];
+ int idx = ff_rem6[q];
for(x=0; x<64; x++)
h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
@@ -3294,8 +3178,8 @@ static void init_dequant4_coeff_table(H264Context *h){
continue;
for(q=0; q<52; q++){
- int shift = div6[q] + 2;
- int idx = rem6[q];
+ int shift = ff_div6[q] + 2;
+ int idx = ff_rem6[q];
for(x=0; x<16; x++)
h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
@@ -4972,6 +4856,10 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
if(total_coeff==0)
return 0;
+ if(total_coeff<0) {
+ av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff<0)\n", s->mb_x, s->mb_y);
+ return -1;
+ }
trailing_ones= coeff_token&3;
tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
diff --git a/src/libffmpeg/libavcodec/h264data.h b/src/libffmpeg/libavcodec/h264data.h
index 2dea3580f..74e720421 100644
--- a/src/libffmpeg/libavcodec/h264data.h
+++ b/src/libffmpeg/libavcodec/h264data.h
@@ -53,6 +53,24 @@
#define EXTENDED_SAR 255
+/* NAL unit types */
+enum {
+NAL_SLICE=1,
+NAL_DPA,
+NAL_DPB,
+NAL_DPC,
+NAL_IDR_SLICE,
+NAL_SEI,
+NAL_SPS,
+NAL_PPS,
+NAL_AUD,
+NAL_END_SEQUENCE,
+NAL_END_STREAM,
+NAL_FILLER_DATA,
+NAL_SPS_EXT,
+NAL_AUXILIARY_SLICE=19
+};
+
static const AVRational pixel_aspect[14]={
{0, 1},
{1, 1},
@@ -488,15 +506,6 @@ static const PMbInfo b_sub_mb_type_info[13]={
{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, },
};
-
-static const uint8_t rem6[52]={
-0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
-};
-
-static const uint8_t div6[52]={
-0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
-};
-
static const uint8_t default_scaling4[2][16]={
{ 6,13,20,28,
13,20,28,32,
diff --git a/src/libffmpeg/libavcodec/h264idct.c b/src/libffmpeg/libavcodec/h264idct.c
index 3506418ad..a6a56d33a 100755
--- a/src/libffmpeg/libavcodec/h264idct.c
+++ b/src/libffmpeg/libavcodec/h264idct.c
@@ -28,7 +28,7 @@
#include "dsputil.h"
-static always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
+static av_always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
int i;
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
diff --git a/src/libffmpeg/libavcodec/i386/Makefile.am b/src/libffmpeg/libavcodec/i386/Makefile.am
index 15ab4db89..ee170efd5 100644
--- a/src/libffmpeg/libavcodec/i386/Makefile.am
+++ b/src/libffmpeg/libavcodec/i386/Makefile.am
@@ -6,7 +6,7 @@ AM_CFLAGS = -fomit-frame-pointer -fno-strict-aliasing
# CFLAGS is here to filter out -funroll-loops because it causes bad
# behavior of libavcodec
CFLAGS := `echo @CFLAGS@ | sed -e 's/-funroll-loops//g'`
-AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS) -I$(top_srcdir)/src/libffmpeg/libavutil
+AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS) -I$(top_srcdir)/src/libffmpeg/libavutil -I$(top_srcdir)/src/libffmpeg
# Avoid "can't find register" failures with -O1 and higher
dsputil_mmx.o dsputil_mmx.lo: CFLAGS=$(shell echo @CFLAGS@ | sed -e 's/-funroll-loops//g; s/$$/ -Os/')
@@ -42,10 +42,10 @@ EXTRA_DIST = \
h264dsp_mmx.c \
mpegvideo_mmx_template.c
-if HAVE_FFMMX
+if HAVE_MMX
mmx_modules = $(libavcodec_mmx_src)
endif
libavcodec_mmx_la_SOURCES = $(mmx_modules) $(libavcodec_mmx_dummy)
-noinst_HEADERS = dsputil_mmx_avg.h dsputil_mmx_rnd.h mmx.h
+noinst_HEADERS = dsputil_mmx_avg.h dsputil_mmx_rnd.h mathops.h mmx.h
diff --git a/src/libffmpeg/libavcodec/i386/cputest.c b/src/libffmpeg/libavcodec/i386/cputest.c
index 262786b71..0705ab3e5 100644
--- a/src/libffmpeg/libavcodec/i386/cputest.c
+++ b/src/libffmpeg/libavcodec/i386/cputest.c
@@ -87,6 +87,8 @@ int mm_support(void)
rval |= MM_SSE2;
if (ecx & 1)
rval |= MM_SSE3;
+ if (ecx & 0x00000200 )
+ rval |= MM_SSSE3;
}
cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
@@ -104,11 +106,13 @@ int mm_support(void)
}
#if 0
- av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n",
+ av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s%s%s\n",
(rval&MM_MMX) ? "MMX ":"",
(rval&MM_MMXEXT) ? "MMX2 ":"",
(rval&MM_SSE) ? "SSE ":"",
(rval&MM_SSE2) ? "SSE2 ":"",
+ (rval&MM_SSE3) ? "SSE3 ":"",
+ (rval&MM_SSSE3) ? "SSSE3 ":"",
(rval&MM_3DNOW) ? "3DNow ":"",
(rval&MM_3DNOWEXT) ? "3DNowExt ":"");
#endif
diff --git a/src/libffmpeg/libavcodec/i386/fdct_mmx.c b/src/libffmpeg/libavcodec/i386/fdct_mmx.c
index 2ffbfecf6..7e2682a4a 100644
--- a/src/libffmpeg/libavcodec/i386/fdct_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/fdct_mmx.c
@@ -284,7 +284,7 @@ TABLE_SSE2
}};
-static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
+static av_always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
{
movq_m2r(*(in + offset + 1 * 8), mm0);
movq_m2r(*(in + offset + 6 * 8), mm1);
@@ -364,7 +364,7 @@ static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
}
-static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
+static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
{
asm volatile(
#define FDCT_ROW_SSE2_H1(i,t) \
@@ -426,7 +426,7 @@ static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
);
}
-static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
+static av_always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
{
pshufw_m2r(*(in + 4), mm5, 0x1B);
movq_m2r(*(in + 0), mm0);
@@ -469,7 +469,7 @@ static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const i
movq_r2m(mm7, *(out + 4));
}
-static always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
+static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
{
//FIXME reorder (i dont have a old mmx only cpu here to benchmark ...)
movd_m2r(*(in + 6), mm1);
diff --git a/src/libffmpeg/libavcodec/i386/mathops.h b/src/libffmpeg/libavcodec/i386/mathops.h
new file mode 100644
index 000000000..3553a4025
--- /dev/null
+++ b/src/libffmpeg/libavcodec/i386/mathops.h
@@ -0,0 +1,41 @@
+/*
+ * simple math operations
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef FRAC_BITS
+# define MULL(ra, rb) \
+ ({ int rt, dummy; asm (\
+ "imull %3 \n\t"\
+ "shrdl %4, %%edx, %%eax \n\t"\
+ : "=a"(rt), "=d"(dummy)\
+ : "a" (ra), "rm" (rb), "i"(FRAC_BITS));\
+ rt; })
+#endif
+
+#define MULH(ra, rb) \
+ ({ int rt, dummy;\
+ asm ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" (ra), "rm" (rb));\
+ rt; })
+
+#define MUL64(ra, rb) \
+ ({ int64_t rt;\
+ asm ("imull %2\n\t" : "=A"(rt) : "a" (ra), "g" (rb));\
+ rt; })
+
diff --git a/src/libffmpeg/libavcodec/jfdctfst.c b/src/libffmpeg/libavcodec/jfdctfst.c
index 38424563d..a9dcfab82 100644
--- a/src/libffmpeg/libavcodec/jfdctfst.c
+++ b/src/libffmpeg/libavcodec/jfdctfst.c
@@ -145,7 +145,7 @@
#define MULTIPLY(var,const) ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
-static always_inline void row_fdct(DCTELEM * data){
+static av_always_inline void row_fdct(DCTELEM * data){
int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int_fast16_t tmp10, tmp11, tmp12, tmp13;
int_fast16_t z1, z2, z3, z4, z5, z11, z13;
diff --git a/src/libffmpeg/libavcodec/jfdctint.c b/src/libffmpeg/libavcodec/jfdctint.c
index 58f3a1446..250312467 100644
--- a/src/libffmpeg/libavcodec/jfdctint.c
+++ b/src/libffmpeg/libavcodec/jfdctint.c
@@ -181,7 +181,7 @@
#endif
-static always_inline void row_fdct(DCTELEM * data){
+static av_always_inline void row_fdct(DCTELEM * data){
int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int_fast32_t tmp10, tmp11, tmp12, tmp13;
int_fast32_t z1, z2, z3, z4, z5;
diff --git a/src/libffmpeg/libavcodec/jpeg_ls.c b/src/libffmpeg/libavcodec/jpeg_ls.c
index 1b4df2b1a..4629176ad 100644
--- a/src/libffmpeg/libavcodec/jpeg_ls.c
+++ b/src/libffmpeg/libavcodec/jpeg_ls.c
@@ -804,11 +804,16 @@ static int encode_picture_ls(AVCodecContext *avctx, unsigned char *buf, int buf_
av_free(zero);
av_free(state);
+ // the specification says that after doing 0xff escaping unused bits in the
+ // last byte must be set to 0, so just append 7 "optional" zero-bits to
+ // avoid special-casing.
+ put_bits(&pb2, 7, 0);
+ size = put_bits_count(&pb2);
flush_put_bits(&pb2);
/* do escape coding */
- size = put_bits_count(&pb2) >> 3;
init_get_bits(&gb, buf2, size);
- while(get_bits_count(&gb) < size * 8){
+ size -= 7;
+ while(get_bits_count(&gb) < size){
int v;
v = get_bits(&gb, 8);
put_bits(&pb, 8, v);
diff --git a/src/libffmpeg/libavcodec/mathops.h b/src/libffmpeg/libavcodec/mathops.h
index 9ae34d71b..c6ec70597 100644
--- a/src/libffmpeg/libavcodec/mathops.h
+++ b/src/libffmpeg/libavcodec/mathops.h
@@ -46,7 +46,7 @@
//gcc 3.4 creates an incredibly bloated mess out of this
//# define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32)
-static always_inline int MULH(int a, int b){
+static av_always_inline int MULH(int a, int b){
return ((int64_t)(a) * (int64_t)(b))>>32;
}
#endif
diff --git a/src/libffmpeg/libavcodec/motion_est.c b/src/libffmpeg/libavcodec/motion_est.c
index 0e1504147..a11787bac 100644
--- a/src/libffmpeg/libavcodec/motion_est.c
+++ b/src/libffmpeg/libavcodec/motion_est.c
@@ -106,7 +106,7 @@ static int get_flags(MotionEstContext *c, int direct, int chroma){
+ (chroma ? FLAG_CHROMA : 0);
}
-static always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
+static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
const int size, const int h, int ref_index, int src_index,
me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
MotionEstContext * const c= &s->me;
@@ -122,6 +122,7 @@ static always_inline int cmp(MpegEncContext *s, const int x, const int y, const
int d;
//FIXME check chroma 4mv, (no crashes ...)
if(flags&FLAG_DIRECT){
+ assert(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
const int time_pp= s->pp_time;
const int time_pb= s->pb_time;
@@ -233,8 +234,14 @@ static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){
void ff_init_me(MpegEncContext *s){
MotionEstContext * const c= &s->me;
+ int cache_size= FFMIN(ME_MAP_SIZE>>ME_MAP_SHIFT, 1<<ME_MAP_SHIFT);
+ int dia_size= FFMAX(FFABS(s->avctx->dia_size)&255, FFABS(s->avctx->pre_dia_size)&255);
c->avctx= s->avctx;
+ if(cache_size < 2*dia_size && !c->stride){
+ av_log(s->avctx, AV_LOG_INFO, "ME_MAP size may be a little small for the selected diamond size\n");
+ }
+
ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, c->avctx->me_pre_cmp);
ff_set_cmp(&s->dsp, s->dsp.me_cmp, c->avctx->me_cmp);
ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, c->avctx->me_sub_cmp);
@@ -692,6 +699,7 @@ static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
static inline void get_limits(MpegEncContext *s, int x, int y)
{
MotionEstContext * const c= &s->me;
+ int range= c->avctx->me_range >> (1 + !!(c->flags&FLAG_QPEL));
/*
if(c->avctx->me_range) c->range= c->avctx->me_range >> 1;
else c->range= 16;
@@ -713,6 +721,12 @@ static inline void get_limits(MpegEncContext *s, int x, int y)
c->xmax = - x + s->mb_width *16 - 16;
c->ymax = - y + s->mb_height*16 - 16;
}
+ if(range){
+ c->xmin = FFMAX(c->xmin,-range);
+ c->xmax = FFMIN(c->xmax, range);
+ c->ymin = FFMAX(c->ymin,-range);
+ c->ymax = FFMIN(c->ymax, range);
+ }
}
static inline void init_mv4_ref(MotionEstContext *c){
@@ -1148,7 +1162,9 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
{
MotionEstContext * const c= &s->me;
uint8_t *pix, *ppix;
- int sum, varc, vard, mx, my, dmin;
+ int sum, mx, my, dmin;
+ int varc; ///< the variance of the block (sum of squared (p[y][x]-average))
+ int vard; ///< sum of squared differences with the estimated motion vector
int P[10][2];
const int shift= 1+s->quarter_sample;
int mb_type=0;
@@ -1810,8 +1826,8 @@ static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed
- s->b_direct_mv_table[mot_xy][0]= mx;
- s->b_direct_mv_table[mot_xy][1]= my;
+ mv_table[mot_xy][0]= mx;
+ mv_table[mot_xy][1]= my;
c->flags &= ~FLAG_DIRECT;
c->sub_flags &= ~FLAG_DIRECT;
@@ -1831,6 +1847,18 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
get_limits(s, 16*mb_x, 16*mb_y);
c->skip=0;
+
+ if(s->codec_id == CODEC_ID_MPEG4 && s->next_picture.mbskip_table[xy]){
+ int score= direct_search(s, mb_x, mb_y); //FIXME just check 0,0
+
+ score= ((unsigned)(score*score + 128*256))>>16;
+ c->mc_mb_var_sum_temp += score;
+ s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
+ s->mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_DIRECT0;
+
+ return;
+ }
+
if(c->avctx->me_threshold){
int vard= check_input_motion(s, mb_x, mb_y, 0);
@@ -1953,6 +1981,8 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
}
//FIXME something smarter
if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
+ if(s->codec_id == CODEC_ID_MPEG4 && type&CANDIDATE_MB_TYPE_DIRECT && s->flags&CODEC_FLAG_MV0 && *(uint32_t*)s->b_direct_mv_table[xy])
+ type |= CANDIDATE_MB_TYPE_DIRECT0;
#if 0
if(s->out_format == FMT_MPEG1)
type |= CANDIDATE_MB_TYPE_INTRA;
diff --git a/src/libffmpeg/libavcodec/motion_est_template.c b/src/libffmpeg/libavcodec/motion_est_template.c
index d8feaff5a..897c08e3d 100644
--- a/src/libffmpeg/libavcodec/motion_est_template.c
+++ b/src/libffmpeg/libavcodec/motion_est_template.c
@@ -555,7 +555,7 @@ if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x,
const int qpel= flags&FLAG_QPEL;\
const int shift= 1+qpel;\
-static always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
+static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
int src_index, int ref_index, int const penalty_factor,
int size, int h, int flags)
{
@@ -667,31 +667,28 @@ static int hex_search(MpegEncContext * s, int *best, int dmin,
LOAD_COMMON
LOAD_COMMON2
int map_generation= c->map_generation;
- int x,y,i,d;
- static const int hex[6][2]={{-2, 0}, { 2,0}, {-1,-2}, {1,-2}, {-1,2},{1,2}};
+ int x,y,d;
+ const int dec= dia_size & (dia_size-1);
cmpf= s->dsp.me_cmp[size];
chroma_cmpf= s->dsp.me_cmp[size+1];
- for(;dia_size; dia_size--){
+ for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
do{
x= best[0];
y= best[1];
- for(i=0; i<6; i++){
- CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
+
+ CHECK_CLIPPED_MV(x -dia_size , y);
+ CHECK_CLIPPED_MV(x+ dia_size , y);
+ CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
+ CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
+ if(dia_size>1){
+ CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
+ CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
}
}while(best[0] != x || best[1] != y);
}
- do{
- x= best[0];
- y= best[1];
- CHECK_CLIPPED_MV(x+1, y);
- CHECK_CLIPPED_MV(x, y+1);
- CHECK_CLIPPED_MV(x-1, y);
- CHECK_CLIPPED_MV(x, y-1);
- }while(best[0] != x || best[1] != y);
-
return dmin;
}
@@ -704,14 +701,16 @@ static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
LOAD_COMMON
LOAD_COMMON2
int map_generation= c->map_generation;
- int x,y,i,d, dia_size;
+ int x,y,i,d;
+ int dia_size= c->dia_size&0xFF;
+ const int dec= dia_size & (dia_size-1);
static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
{ 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
cmpf= s->dsp.me_cmp[size];
chroma_cmpf= s->dsp.me_cmp[size+1];
- for(dia_size= c->dia_size&0xFF; dia_size; dia_size--){
+ for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
do{
x= best[0];
y= best[1];
@@ -775,7 +774,7 @@ static int umh_search(MpegEncContext * s, int *best, int dmin,
}
}
- return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 1);
+ return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
}
#define SAB_CHECK_MV(ax,ay)\
@@ -824,20 +823,27 @@ static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
cmpf= s->dsp.me_cmp[size];
chroma_cmpf= s->dsp.me_cmp[size+1];
- for(j=i=0; i<ME_MAP_SIZE; i++){
+ /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
+ become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
+ */
+ for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
uint32_t key= map[i];
key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
- assert(j<MAX_SAB_SIZE); //max j = number of predictors
-
minima[j].height= score_map[i];
minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
+
+ // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
+ if( minima[j].x > xmax || minima[j].x < xmin
+ || minima[j].y > ymax || minima[j].y < ymin)
+ continue;
+
minima[j].checked=0;
if(minima[j].x || minima[j].y)
minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
@@ -965,7 +971,7 @@ if(256*256*256*64 % (stats[0]+1)==0){
return dmin;
}
-static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
+static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
int src_index, int ref_index, int const penalty_factor,
int size, int h, int flags){
MotionEstContext * const c= &s->me;
@@ -985,7 +991,7 @@ static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
}
-static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
+static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
int ref_mv_scale, int flags, int size, int h)
{
@@ -1018,6 +1024,10 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx
map[0]= map_generation;
score_map[0]= dmin;
+ //FIXME precalc first term below?
+ if((s->pict_type == B_TYPE && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
+ dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
+
/* first line */
if (s->first_slice_line) {
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c
index e3a4c2da5..8af7bdfa7 100644
--- a/src/libffmpeg/libavcodec/mpeg12.c
+++ b/src/libffmpeg/libavcodec/mpeg12.c
@@ -515,7 +515,7 @@ static inline void put_mb_modes(MpegEncContext *s, int n, int bits,
}
}
-static always_inline void mpeg1_encode_mb_internal(MpegEncContext *s,
+static av_always_inline void mpeg1_encode_mb_internal(MpegEncContext *s,
DCTELEM block[6][64],
int motion_x, int motion_y,
int mb_block_count)
diff --git a/src/libffmpeg/libavcodec/mpegaudiodec.c b/src/libffmpeg/libavcodec/mpegaudiodec.c
index 54bcee3b0..367400581 100644
--- a/src/libffmpeg/libavcodec/mpegaudiodec.c
+++ b/src/libffmpeg/libavcodec/mpegaudiodec.c
@@ -327,7 +327,7 @@ static int decode_init(AVCodecContext * avctx)
for(i=0;i<15;i++) {
int n, norm;
n = i + 2;
- norm = ((int64_t_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
+ norm = ((INT64_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
scale_factor_mult[i][0] = MULL(FIXR(1.0 * 2.0), norm);
scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm);
scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm);
@@ -1749,7 +1749,7 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
/* skip extension bits */
bits_left = end_pos - get_bits_count(&s->gb);
//av_log(NULL, AV_LOG_ERROR, "left:%d buf:%p\n", bits_left, s->in_gb.buffer);
- if (bits_left < 0 || bits_left > 16) {
+ if (bits_left < 0 || bits_left > 500) {
av_log(NULL, AV_LOG_ERROR, "bits_left=%d\n", bits_left);
s_index=0;
}else if(bits_left > 0 && s->error_resilience >= FF_ER_AGGRESSIVE){
diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c
index a9d877fff..a33485549 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.c
+++ b/src/libffmpeg/libavcodec/mpegvideo.c
@@ -140,7 +140,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[
/* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
/* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
- qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
+ qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
(qscale * quant_matrix[j]));
}
} else if (dsp->fdct == fdct_ifast
@@ -155,7 +155,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[
/* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
/* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
- qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
+ qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
(aanscales[i] * qscale * quant_matrix[j]));
}
} else {
@@ -166,7 +166,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[
so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67
*/
- qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
+ qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
// qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
@@ -2964,7 +2964,7 @@ static inline int hpel_motion_lowres(MpegEncContext *s,
}
/* apply one mpeg motion vector to the three components */
-static always_inline void mpeg_motion(MpegEncContext *s,
+static av_always_inline void mpeg_motion(MpegEncContext *s,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int field_based, int bottom_field, int field_select,
uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
@@ -3081,7 +3081,7 @@ if(s->quarter_sample)
}
/* apply one mpeg motion vector to the three components */
-static always_inline void mpeg_motion_lowres(MpegEncContext *s,
+static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int field_based, int bottom_field, int field_select,
uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
@@ -3913,7 +3913,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s)
s->mv : motion vector
s->interlaced_dct : true if interlaced dct used (mpeg2)
*/
-static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
+static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
{
int mb_x, mb_y;
const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
@@ -4336,7 +4336,7 @@ static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
}
}
-static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
+static av_always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
{
int16_t weight[8][64];
DCTELEM orig[8][64];
@@ -4348,7 +4348,7 @@ static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, in
uint8_t *ptr_y, *ptr_cb, *ptr_cr;
int wrap_y, wrap_c;
- for(i=0; i<mb_block_count; i++) skip_dct[i]=0;
+ for(i=0; i<mb_block_count; i++) skip_dct[i]=s->skipdct;
if(s->adaptive_quant){
const int last_qp= s->qscale;
@@ -4358,17 +4358,16 @@ static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, in
update_qscale(s);
if(!(s->flags&CODEC_FLAG_QP_RD)){
+ s->qscale= s->current_picture_ptr->qscale_table[mb_xy];
s->dquant= s->qscale - last_qp;
if(s->out_format==FMT_H263){
- s->dquant= clip(s->dquant, -2, 2); //FIXME RD
+ s->dquant= clip(s->dquant, -2, 2);
if(s->codec_id==CODEC_ID_MPEG4){
if(!s->mb_intra){
if(s->pict_type == B_TYPE){
- if(s->dquant&1)
- s->dquant= (s->dquant/2)*2;
- if(s->mv_dir&MV_DIRECT)
+ if(s->dquant&1 || s->mv_dir&MV_DIRECT)
s->dquant= 0;
}
if(s->mv_type==MV_TYPE_8X8)
@@ -4621,7 +4620,7 @@ static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, in
}
}
-static always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
+static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
{
if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y, 8, 6);
else encode_mb_internal(s, motion_x, motion_y, 16, 8);
@@ -4861,6 +4860,8 @@ static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
static int estimate_motion_thread(AVCodecContext *c, void *arg){
MpegEncContext *s= arg;
+ ff_check_alignment();
+
s->me.dia_size= s->avctx->dia_size;
s->first_slice_line=1;
for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
@@ -4888,6 +4889,8 @@ static int mb_var_thread(AVCodecContext *c, void *arg){
MpegEncContext *s= arg;
int mb_x, mb_y;
+ ff_check_alignment();
+
for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
for(mb_x=0; mb_x < s->mb_width; mb_x++) {
int xx = mb_x * 16;
@@ -4938,6 +4941,8 @@ static int encode_thread(AVCodecContext *c, void *arg){
PutBitContext pb[2], pb2[2], tex_pb[2];
//printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
+ ff_check_alignment();
+
for(i=0; i<2; i++){
init_put_bits(&pb [i], bit_buf [i], MAX_MB_BYTES);
init_put_bits(&pb2 [i], bit_buf2 [i], MAX_MB_BYTES);
@@ -5205,19 +5210,6 @@ static int encode_thread(AVCodecContext *c, void *arg){
encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
&dmin, &next_block, 0, 0);
}
- if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
- int mx= s->b_direct_mv_table[xy][0];
- int my= s->b_direct_mv_table[xy][1];
-
- s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
- s->mb_intra= 0;
-/* xine: do not need this for decode or MPEG-1 encoding modes */
-#if 0
- ff_mpeg4_set_direct_mv(s, mx, my);
-#endif /* #if 0 */
- encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
- &dmin, &next_block, mx, my);
- }
if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
s->mv_dir = MV_DIR_FORWARD;
s->mv_type = MV_TYPE_FIELD;
@@ -5272,8 +5264,8 @@ static int encode_thread(AVCodecContext *c, void *arg){
}
}
- if(s->flags & CODEC_FLAG_QP_RD){
- if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
+ if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
+ if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
const int last_qp= backup_s.qscale;
int qpi, qp, dc[6];
DCTELEM ac[6][16];
@@ -5316,10 +5308,64 @@ static int encode_thread(AVCodecContext *c, void *arg){
}
}
}
- qp= best_s.qscale;
- s->current_picture.qscale_table[xy]= qp;
}
}
+ if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
+ int mx= s->b_direct_mv_table[xy][0];
+ int my= s->b_direct_mv_table[xy][1];
+
+ backup_s.dquant = 0;
+ s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+ s->mb_intra= 0;
+ ff_mpeg4_set_direct_mv(s, mx, my);
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
+ &dmin, &next_block, mx, my);
+ }
+ if(mb_type&CANDIDATE_MB_TYPE_DIRECT0){
+ backup_s.dquant = 0;
+ s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+ s->mb_intra= 0;
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
+ ff_mpeg4_set_direct_mv(s, 0, 0);
+#endif /* #if 0 */
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
+ &dmin, &next_block, 0, 0);
+ }
+ if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
+ int coded=0;
+ for(i=0; i<6; i++)
+ coded |= s->block_last_index[i];
+ if(coded){
+ int mx,my;
+ memcpy(s->mv, best_s.mv, sizeof(s->mv));
+ if(best_s.mv_dir & MV_DIRECT){
+ mx=my=0; //FIXME find the one we actually used
+ ff_mpeg4_set_direct_mv(s, mx, my);
+ }else if(best_s.mv_dir&MV_DIR_BACKWARD){
+ mx= s->mv[1][0][0];
+ my= s->mv[1][0][1];
+ }else{
+ mx= s->mv[0][0][0];
+ my= s->mv[0][0][1];
+ }
+
+ s->mv_dir= best_s.mv_dir;
+ s->mv_type = best_s.mv_type;
+ s->mb_intra= 0;
+/* s->mv[0][0][0] = best_s.mv[0][0][0];
+ s->mv[0][0][1] = best_s.mv[0][0][1];
+ s->mv[1][0][0] = best_s.mv[1][0][0];
+ s->mv[1][0][1] = best_s.mv[1][0][1];*/
+ backup_s.dquant= 0;
+ s->skipdct=1;
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
+ &dmin, &next_block, mx, my);
+ s->skipdct=0;
+ }
+ }
+
+ s->current_picture.qscale_table[xy]= best_s.qscale;
copy_context_after_encode(s, &best_s, -1);
@@ -5401,6 +5447,11 @@ static int encode_thread(AVCodecContext *c, void *arg){
ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
#endif /* #if 0 */
break;
+ case CANDIDATE_MB_TYPE_DIRECT0:
+ s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+ s->mb_intra= 0;
+ ff_mpeg4_set_direct_mv(s, 0, 0);
+ break;
case CANDIDATE_MB_TYPE_BIDIR:
s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
s->mb_intra= 0;
diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h
index 011678a42..ed02759ae 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.h
+++ b/src/libffmpeg/libavcodec/mpegvideo.h
@@ -324,6 +324,7 @@ typedef struct MpegEncContext {
int dropable;
int frame_rate_index;
int last_lambda_for[5]; ///< last lambda for a specific pict type
+ int skipdct; ///< skip dct and code zero residual
/* motion compensation */
int unrestricted_mv; ///< mv can point outside of the coded picture
@@ -402,6 +403,8 @@ typedef struct MpegEncContext {
#define CANDIDATE_MB_TYPE_BACKWARD_I 0x400
#define CANDIDATE_MB_TYPE_BIDIR_I 0x800
+#define CANDIDATE_MB_TYPE_DIRECT0 0x1000
+
int block_index[6]; ///< index to current MB in block based arrays with edges
int block_wrap[6];
uint8_t *dest[3];
diff --git a/src/libffmpeg/libavcodec/parser.c b/src/libffmpeg/libavcodec/parser.c
index 72a3e55a3..740ad855c 100644
--- a/src/libffmpeg/libavcodec/parser.c
+++ b/src/libffmpeg/libavcodec/parser.c
@@ -91,7 +91,8 @@ AVCodecParserContext *av_parser_init(int codec_id)
* in_data += len;
* in_len -= len;
*
- * decode_frame(data, size);
+ * if(size)
+ * decode_frame(data, size);
* }
* @endcode
*/
diff --git a/src/libffmpeg/libavcodec/ppc/Makefile.am b/src/libffmpeg/libavcodec/ppc/Makefile.am
index 00e796f6d..d52cc481e 100644
--- a/src/libffmpeg/libavcodec/ppc/Makefile.am
+++ b/src/libffmpeg/libavcodec/ppc/Makefile.am
@@ -12,14 +12,17 @@ noinst_LTLIBRARIES = libavcodec_ppc.la
libavcodec_ppc_src = dsputil_altivec.c \
dsputil_ppc.c \
- dsputil_h264_altivec.c \
- dsputil_h264_template_altivec.c \
+ h264_altivec.c \
+ h264_template_altivec.c \
fdct_altivec.c \
fft_altivec.c \
+ float_altivec.c \
idct_altivec.c \
gmc_altivec.c \
mpegvideo_altivec.c \
- mpegvideo_ppc.c
+ mpegvideo_ppc.c \
+ snow_altivec.c \
+ vc1dsp_altivec.c
libavcodec_ppc_dummy = libavcodec_ppc_dummy.c
EXTRA_DIST = $(libavcodec_ppc_src) $(libavcodec_ppc_dummy)
@@ -28,7 +31,6 @@ EXTRA_DIST = $(libavcodec_ppc_src) $(libavcodec_ppc_dummy)
#ppc_modules = $(libavcodec_ppc_src)
#endif
-
libavcodec_ppc_la_SOURCES = $(ppc_modules) $(libavcodec_ppc_dummy)
-noinst_HEADERS = dsputil_altivec.h dsputil_ppc.h gcc_fixes.h
+noinst_HEADERS = dsputil_altivec.h dsputil_ppc.h gcc_fixes.h mathops.h types_altivec.h
diff --git a/src/libffmpeg/libavcodec/ppc/float_altivec.c b/src/libffmpeg/libavcodec/ppc/float_altivec.c
new file mode 100644
index 000000000..c6e43dec2
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/float_altivec.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+
+static void vector_fmul_altivec(float *dst, const float *src, int len)
+{
+ int i;
+ vector float d0, d1, s, zero = (vector float)vec_splat_u32(0);
+ for(i=0; i<len-7; i+=8) {
+ d0 = vec_ld(0, dst+i);
+ s = vec_ld(0, src+i);
+ d1 = vec_ld(16, dst+i);
+ d0 = vec_madd(d0, s, zero);
+ d1 = vec_madd(d1, vec_ld(16,src+i), zero);
+ vec_st(d0, 0, dst+i);
+ vec_st(d1, 16, dst+i);
+ }
+}
+
+static void vector_fmul_reverse_altivec(float *dst, const float *src0,
+ const float *src1, int len)
+{
+ int i;
+ vector float d, s0, s1, h0, l0,
+ s2, s3, zero = (vector float)vec_splat_u32(0);
+ src1 += len-4;
+ for(i=0; i<len-7; i+=8) {
+ s1 = vec_ld(0, src1-i); // [a,b,c,d]
+ s0 = vec_ld(0, src0+i);
+ l0 = vec_mergel(s1, s1); // [c,c,d,d]
+ s3 = vec_ld(-16, src1-i);
+ h0 = vec_mergeh(s1, s1); // [a,a,b,b]
+ s2 = vec_ld(16, src0+i);
+ s1 = vec_mergeh(vec_mergel(l0,h0), // [d,b,d,b]
+ vec_mergeh(l0,h0)); // [c,a,c,a]
+ // [d,c,b,a]
+ l0 = vec_mergel(s3, s3);
+ d = vec_madd(s0, s1, zero);
+ h0 = vec_mergeh(s3, s3);
+ vec_st(d, 0, dst+i);
+ s3 = vec_mergeh(vec_mergel(l0,h0),
+ vec_mergeh(l0,h0));
+ d = vec_madd(s2, s3, zero);
+ vec_st(d, 16, dst+i);
+ }
+}
+
+static void vector_fmul_add_add_altivec(float *dst, const float *src0,
+ const float *src1, const float *src2,
+ int src3, int len, int step)
+{
+ int i;
+ vector float d, s0, s1, s2, t0, t1, edges;
+ vector unsigned char align = vec_lvsr(0,dst),
+ mask = vec_lvsl(0, dst);
+
+ t0 = vec_ld(0, dst);
+#if 0 //FIXME: there is still something wrong
+ if (step == 2) {
+ int y;
+ vector float d0, d1, s3, t2;
+ vector unsigned int sel =
+ vec_mergeh(vec_splat_u32(-1), vec_splat_u32(0));
+ t1 = vec_ld(16, dst);
+ for (i=0,y=0; i<len-3; i+=4,y+=8) {
+
+ s0 = vec_ld(0,src0+i);
+ s1 = vec_ld(0,src1+i);
+ s2 = vec_ld(0,src2+i);
+
+// t0 = vec_ld(0, dst+y); //[x x x|a]
+// t1 = vec_ld(16, dst+y); //[b c d|e]
+ t2 = vec_ld(31, dst+y); //[f g h|x]
+
+ d = vec_madd(s0,s1,s2); // [A B C D]
+
+ // [A A B B]
+
+ // [C C D D]
+
+ d0 = vec_perm(t0, t1, mask); // [a b c d]
+
+ d0 = vec_sel(vec_mergeh(d, d), d0, sel); // [A b B d]
+
+ edges = vec_perm(t1, t0, mask);
+
+ t0 = vec_perm(edges, d0, align); // [x x x|A]
+
+ t1 = vec_perm(d0, edges, align); // [b B d|e]
+
+ vec_stl(t0, 0, dst+y);
+
+ d1 = vec_perm(t1, t2, mask); // [e f g h]
+
+ d1 = vec_sel(vec_mergel(d, d), d1, sel); // [C f D h]
+
+ edges = vec_perm(t2, t1, mask);
+
+ t1 = vec_perm(edges, d1, align); // [b B d|C]
+
+ t2 = vec_perm(d1, edges, align); // [f D h|x]
+
+ vec_stl(t1, 16, dst+y);
+
+ t0 = t1;
+
+ vec_stl(t2, 31, dst+y);
+
+ t1 = t2;
+ }
+ } else
+ #endif
+ if (step == 1 && src3 == 0)
+ for (i=0; i<len-3; i+=4) {
+ t1 = vec_ld(15, dst+i);
+ s0 = vec_ld(0, src0+i);
+ s1 = vec_ld(0, src1+i);
+ s2 = vec_ld(0, src2+i);
+ edges = vec_perm(t1 ,t0, mask);
+ d = vec_madd(s0,s1,s2);
+ t1 = vec_perm(d, edges, align);
+ t0 = vec_perm(edges, d, align);
+ vec_st(t1, 15, dst+i);
+ vec_st(t0, 0, dst+i);
+ t0 = t1;
+ }
+ else
+ ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
+}
+
+void float_to_int16_altivec(int16_t *dst, const float *src, int len)
+{
+ int i;
+ vector float s0, s1;
+ vector signed int t0, t1;
+ vector signed short d0, d1, d;
+ vector unsigned char align;
+ if(((long)dst)&15) //FIXME
+ for(i=0; i<len-7; i+=8) {
+ s0 = vec_ld(0, src+i);
+ s1 = vec_ld(16, src+i);
+ t0 = vec_cts(s0, 0);
+ d0 = vec_ld(0, dst+i);
+ t1 = vec_cts(s1, 0);
+ d1 = vec_ld(15, dst+i);
+ d = vec_packs(t0,t1);
+ d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i));
+ align = vec_lvsr(0, dst+i);
+ d0 = vec_perm(d1, d, align);
+ d1 = vec_perm(d, d1, align);
+ vec_st(d0, 0, dst+i);
+ vec_st(d1,15, dst+i);
+ }
+ else
+ for(i=0; i<len-7; i+=8) {
+ s0 = vec_ld(0, src+i);
+ s1 = vec_ld(16, src+i);
+ t0 = vec_cts(s0, 0);
+ t1 = vec_cts(s1, 0);
+ d = vec_packs(t0,t1);
+ vec_st(d, 0, dst+i);
+ }
+}
+
+void float_init_altivec(DSPContext* c, AVCodecContext *avctx)
+{
+ c->vector_fmul = vector_fmul_altivec;
+ c->vector_fmul_reverse = vector_fmul_reverse_altivec;
+ c->vector_fmul_add_add = vector_fmul_add_add_altivec;
+ if(!(avctx->flags & CODEC_FLAG_BITEXACT))
+ c->float_to_int16 = float_to_int16_altivec;
+}
diff --git a/src/libffmpeg/libavcodec/ppc/h264_altivec.c b/src/libffmpeg/libavcodec/ppc/h264_altivec.c
new file mode 100644
index 000000000..bac620e82
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/h264_altivec.c
@@ -0,0 +1,565 @@
+/*
+ * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+#include "types_altivec.h"
+
+#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
+#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
+
+#define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
+#define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec
+#define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num
+#define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
+#define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
+#define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
+#define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
+#define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
+#define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
+#include "h264_template_altivec.c"
+#undef OP_U8_ALTIVEC
+#undef PREFIX_h264_chroma_mc8_altivec
+#undef PREFIX_h264_chroma_mc8_num
+#undef PREFIX_h264_qpel16_h_lowpass_altivec
+#undef PREFIX_h264_qpel16_h_lowpass_num
+#undef PREFIX_h264_qpel16_v_lowpass_altivec
+#undef PREFIX_h264_qpel16_v_lowpass_num
+#undef PREFIX_h264_qpel16_hv_lowpass_altivec
+#undef PREFIX_h264_qpel16_hv_lowpass_num
+
+#define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
+#define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec
+#define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num
+#define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
+#define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
+#define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
+#define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
+#define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
+#define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
+#include "h264_template_altivec.c"
+#undef OP_U8_ALTIVEC
+#undef PREFIX_h264_chroma_mc8_altivec
+#undef PREFIX_h264_chroma_mc8_num
+#undef PREFIX_h264_qpel16_h_lowpass_altivec
+#undef PREFIX_h264_qpel16_h_lowpass_num
+#undef PREFIX_h264_qpel16_v_lowpass_altivec
+#undef PREFIX_h264_qpel16_v_lowpass_num
+#undef PREFIX_h264_qpel16_hv_lowpass_altivec
+#undef PREFIX_h264_qpel16_hv_lowpass_num
+
+#define H264_MC(OPNAME, SIZE, CODETYPE) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
+ DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+ put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+ put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+ DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+ put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+ DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+ put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+ DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+ put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+ DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+ put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+ OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+ DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+ DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+ put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
+ put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+ DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+ DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+ put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
+ put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+ DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+ DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
+ put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+ DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+ DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
+ put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
+}\
+
+/* this code assume that stride % 16 == 0 */
+void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
+ signed int ABCD[4] __attribute__((aligned(16))) =
+ {((8 - x) * (8 - y)),
+ ((x) * (8 - y)),
+ ((8 - x) * (y)),
+ ((x) * (y))};
+ register int i;
+ vector unsigned char fperm;
+ const vector signed int vABCD = vec_ld(0, ABCD);
+ const vector signed short vA = vec_splat((vector signed short)vABCD, 1);
+ const vector signed short vB = vec_splat((vector signed short)vABCD, 3);
+ const vector signed short vC = vec_splat((vector signed short)vABCD, 5);
+ const vector signed short vD = vec_splat((vector signed short)vABCD, 7);
+ const vector signed int vzero = vec_splat_s32(0);
+ const vector signed short v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
+ const vector unsigned short v6us = vec_splat_u16(6);
+ register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
+ register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
+
+ vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
+ vector unsigned char vsrc0uc, vsrc1uc;
+ vector signed short vsrc0ssH, vsrc1ssH;
+ vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;
+ vector signed short vsrc2ssH, vsrc3ssH, psum;
+ vector unsigned char vdst, ppsum, fsum;
+
+ if (((unsigned long)dst) % 16 == 0) {
+ fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17,
+ 0x08, 0x09, 0x0A, 0x0B,
+ 0x0C, 0x0D, 0x0E, 0x0F);
+ } else {
+ fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x18, 0x19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F);
+ }
+
+ vsrcAuc = vec_ld(0, src);
+
+ if (loadSecond)
+ vsrcBuc = vec_ld(16, src);
+ vsrcperm0 = vec_lvsl(0, src);
+ vsrcperm1 = vec_lvsl(1, src);
+
+ vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
+ if (reallyBadAlign)
+ vsrc1uc = vsrcBuc;
+ else
+ vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
+
+ vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+ (vector unsigned char)vsrc0uc);
+ vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+ (vector unsigned char)vsrc1uc);
+
+ if (!loadSecond) {// -> !reallyBadAlign
+ for (i = 0 ; i < h ; i++) {
+
+
+ vsrcCuc = vec_ld(stride + 0, src);
+
+ vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
+ vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
+
+ vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+ (vector unsigned char)vsrc2uc);
+ vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+ (vector unsigned char)vsrc3uc);
+
+ psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+ psum = vec_mladd(vB, vsrc1ssH, psum);
+ psum = vec_mladd(vC, vsrc2ssH, psum);
+ psum = vec_mladd(vD, vsrc3ssH, psum);
+ psum = vec_add(v28ss, psum);
+ psum = vec_sra(psum, v6us);
+
+ vdst = vec_ld(0, dst);
+ ppsum = (vector unsigned char)vec_packsu(psum, psum);
+ fsum = vec_perm(vdst, ppsum, fperm);
+
+ vec_st(fsum, 0, dst);
+
+ vsrc0ssH = vsrc2ssH;
+ vsrc1ssH = vsrc3ssH;
+
+ dst += stride;
+ src += stride;
+ }
+ } else {
+ vector unsigned char vsrcDuc;
+ for (i = 0 ; i < h ; i++) {
+ vsrcCuc = vec_ld(stride + 0, src);
+ vsrcDuc = vec_ld(stride + 16, src);
+
+ vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+ if (reallyBadAlign)
+ vsrc3uc = vsrcDuc;
+ else
+ vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
+
+ vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+ (vector unsigned char)vsrc2uc);
+ vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+ (vector unsigned char)vsrc3uc);
+
+ psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+ psum = vec_mladd(vB, vsrc1ssH, psum);
+ psum = vec_mladd(vC, vsrc2ssH, psum);
+ psum = vec_mladd(vD, vsrc3ssH, psum);
+ psum = vec_add(v28ss, psum);
+ psum = vec_sr(psum, v6us);
+
+ vdst = vec_ld(0, dst);
+ ppsum = (vector unsigned char)vec_pack(psum, psum);
+ fsum = vec_perm(vdst, ppsum, fperm);
+
+ vec_st(fsum, 0, dst);
+
+ vsrc0ssH = vsrc2ssH;
+ vsrc1ssH = vsrc3ssH;
+
+ dst += stride;
+ src += stride;
+ }
+ }
+}
+
+static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
+ const uint8_t * src2, int dst_stride,
+ int src_stride1, int h)
+{
+ int i;
+ vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
+
+ mask_ = vec_lvsl(0, src2);
+
+ for (i = 0; i < h; i++) {
+
+ tmp1 = vec_ld(i * src_stride1, src1);
+ mask = vec_lvsl(i * src_stride1, src1);
+ tmp2 = vec_ld(i * src_stride1 + 15, src1);
+
+ a = vec_perm(tmp1, tmp2, mask);
+
+ tmp1 = vec_ld(i * 16, src2);
+ tmp2 = vec_ld(i * 16 + 15, src2);
+
+ b = vec_perm(tmp1, tmp2, mask_);
+
+ tmp1 = vec_ld(0, dst);
+ mask = vec_lvsl(0, dst);
+ tmp2 = vec_ld(15, dst);
+
+ d = vec_avg(a, b);
+
+ edges = vec_perm(tmp2, tmp1, mask);
+
+ align = vec_lvsr(0, dst);
+
+ tmp2 = vec_perm(d, edges, align);
+ tmp1 = vec_perm(edges, d, align);
+
+ vec_st(tmp2, 15, dst);
+ vec_st(tmp1, 0 , dst);
+
+ dst += dst_stride;
+ }
+}
+
+static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
+ const uint8_t * src2, int dst_stride,
+ int src_stride1, int h)
+{
+ int i;
+ vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
+
+ mask_ = vec_lvsl(0, src2);
+
+ for (i = 0; i < h; i++) {
+
+ tmp1 = vec_ld(i * src_stride1, src1);
+ mask = vec_lvsl(i * src_stride1, src1);
+ tmp2 = vec_ld(i * src_stride1 + 15, src1);
+
+ a = vec_perm(tmp1, tmp2, mask);
+
+ tmp1 = vec_ld(i * 16, src2);
+ tmp2 = vec_ld(i * 16 + 15, src2);
+
+ b = vec_perm(tmp1, tmp2, mask_);
+
+ tmp1 = vec_ld(0, dst);
+ mask = vec_lvsl(0, dst);
+ tmp2 = vec_ld(15, dst);
+
+ d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
+
+ edges = vec_perm(tmp2, tmp1, mask);
+
+ align = vec_lvsr(0, dst);
+
+ tmp2 = vec_perm(d, edges, align);
+ tmp1 = vec_perm(edges, d, align);
+
+ vec_st(tmp2, 15, dst);
+ vec_st(tmp1, 0 , dst);
+
+ dst += dst_stride;
+ }
+}
+
+/* Implemented but could be faster
+#define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
+#define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
+ */
+
+ H264_MC(put_, 16, altivec)
+ H264_MC(avg_, 16, altivec)
+
+
+/****************************************************************************
+ * IDCT transform:
+ ****************************************************************************/
+
+#define IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, d4, d5, d6, d7) {\
+ /* a0 = SRC(0) + SRC(4); */ \
+ vec_s16_t a0v = vec_add(s0, s4); \
+ /* a2 = SRC(0) - SRC(4); */ \
+ vec_s16_t a2v = vec_sub(s0, s4); \
+ /* a4 = (SRC(2)>>1) - SRC(6); */ \
+ vec_s16_t a4v = vec_sub(vec_sra(s2, onev), s6); \
+ /* a6 = (SRC(6)>>1) + SRC(2); */ \
+ vec_s16_t a6v = vec_add(vec_sra(s6, onev), s2); \
+ /* b0 = a0 + a6; */ \
+ vec_s16_t b0v = vec_add(a0v, a6v); \
+ /* b2 = a2 + a4; */ \
+ vec_s16_t b2v = vec_add(a2v, a4v); \
+ /* b4 = a2 - a4; */ \
+ vec_s16_t b4v = vec_sub(a2v, a4v); \
+ /* b6 = a0 - a6; */ \
+ vec_s16_t b6v = vec_sub(a0v, a6v); \
+ /* a1 = SRC(5) - SRC(3) - SRC(7) - (SRC(7)>>1); */ \
+ /* a1 = (SRC(5)-SRC(3)) - (SRC(7) + (SRC(7)>>1)); */ \
+ vec_s16_t a1v = vec_sub( vec_sub(s5, s3), vec_add(s7, vec_sra(s7, onev)) ); \
+ /* a3 = SRC(7) + SRC(1) - SRC(3) - (SRC(3)>>1); */ \
+ /* a3 = (SRC(7)+SRC(1)) - (SRC(3) + (SRC(3)>>1)); */ \
+ vec_s16_t a3v = vec_sub( vec_add(s7, s1), vec_add(s3, vec_sra(s3, onev)) );\
+ /* a5 = SRC(7) - SRC(1) + SRC(5) + (SRC(5)>>1); */ \
+ /* a5 = (SRC(7)-SRC(1)) + SRC(5) + (SRC(5)>>1); */ \
+ vec_s16_t a5v = vec_add( vec_sub(s7, s1), vec_add(s5, vec_sra(s5, onev)) );\
+ /* a7 = SRC(5)+SRC(3) + SRC(1) + (SRC(1)>>1); */ \
+ vec_s16_t a7v = vec_add( vec_add(s5, s3), vec_add(s1, vec_sra(s1, onev)) );\
+ /* b1 = (a7>>2) + a1; */ \
+ vec_s16_t b1v = vec_add( vec_sra(a7v, twov), a1v); \
+ /* b3 = a3 + (a5>>2); */ \
+ vec_s16_t b3v = vec_add(a3v, vec_sra(a5v, twov)); \
+ /* b5 = (a3>>2) - a5; */ \
+ vec_s16_t b5v = vec_sub( vec_sra(a3v, twov), a5v); \
+ /* b7 = a7 - (a1>>2); */ \
+ vec_s16_t b7v = vec_sub( a7v, vec_sra(a1v, twov)); \
+ /* DST(0, b0 + b7); */ \
+ d0 = vec_add(b0v, b7v); \
+ /* DST(1, b2 + b5); */ \
+ d1 = vec_add(b2v, b5v); \
+ /* DST(2, b4 + b3); */ \
+ d2 = vec_add(b4v, b3v); \
+ /* DST(3, b6 + b1); */ \
+ d3 = vec_add(b6v, b1v); \
+ /* DST(4, b6 - b1); */ \
+ d4 = vec_sub(b6v, b1v); \
+ /* DST(5, b4 - b3); */ \
+ d5 = vec_sub(b4v, b3v); \
+ /* DST(6, b2 - b5); */ \
+ d6 = vec_sub(b2v, b5v); \
+ /* DST(7, b0 - b7); */ \
+ d7 = vec_sub(b0v, b7v); \
+}
+
+#define ALTIVEC_STORE_SUM_CLIP(dest, idctv, perm_ldv, perm_stv, sel) { \
+ /* unaligned load */ \
+ vec_u8_t hv = vec_ld( 0, dest ); \
+ vec_u8_t lv = vec_ld( 7, dest ); \
+ vec_u8_t dstv = vec_perm( hv, lv, (vec_u8_t)perm_ldv ); \
+ vec_s16_t idct_sh6 = vec_sra(idctv, sixv); \
+ vec_u16_t dst16 = (vec_u16_t)vec_mergeh(zero_u8v, dstv); \
+ vec_s16_t idstsum = vec_adds(idct_sh6, (vec_s16_t)dst16); \
+ vec_u8_t idstsum8 = vec_packsu(zero_s16v, idstsum); \
+ vec_u8_t edgehv; \
+ /* unaligned store */ \
+ vec_u8_t bodyv = vec_perm( idstsum8, idstsum8, perm_stv );\
+ vec_u8_t edgelv = vec_perm( sel, zero_u8v, perm_stv ); \
+ lv = vec_sel( lv, bodyv, edgelv ); \
+ vec_st( lv, 7, dest ); \
+ hv = vec_ld( 0, dest ); \
+ edgehv = vec_perm( zero_u8v, sel, perm_stv ); \
+ hv = vec_sel( hv, bodyv, edgehv ); \
+ vec_st( hv, 0, dest ); \
+ }
+
+void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) {
+ vec_s16_t s0, s1, s2, s3, s4, s5, s6, s7;
+ vec_s16_t d0, d1, d2, d3, d4, d5, d6, d7;
+ vec_s16_t idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;
+
+ vec_u8_t perm_ldv = vec_lvsl(0, dst);
+ vec_u8_t perm_stv = vec_lvsr(8, dst);
+
+ const vec_u16_t onev = vec_splat_u16(1);
+ const vec_u16_t twov = vec_splat_u16(2);
+ const vec_u16_t sixv = vec_splat_u16(6);
+
+ const vec_u8_t sel = (vec_u8_t) AVV(0,0,0,0,0,0,0,0,
+ -1,-1,-1,-1,-1,-1,-1,-1);
+ LOAD_ZERO;
+
+ dct[0] += 32; // rounding for the >>6 at the end
+
+ s0 = vec_ld(0x00, (int16_t*)dct);
+ s1 = vec_ld(0x10, (int16_t*)dct);
+ s2 = vec_ld(0x20, (int16_t*)dct);
+ s3 = vec_ld(0x30, (int16_t*)dct);
+ s4 = vec_ld(0x40, (int16_t*)dct);
+ s5 = vec_ld(0x50, (int16_t*)dct);
+ s6 = vec_ld(0x60, (int16_t*)dct);
+ s7 = vec_ld(0x70, (int16_t*)dct);
+
+ IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7,
+ d0, d1, d2, d3, d4, d5, d6, d7);
+
+ TRANSPOSE8( d0, d1, d2, d3, d4, d5, d6, d7 );
+
+ IDCT8_1D_ALTIVEC(d0, d1, d2, d3, d4, d5, d6, d7,
+ idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7);
+
+ ALTIVEC_STORE_SUM_CLIP(&dst[0*stride], idct0, perm_ldv, perm_stv, sel);
+ ALTIVEC_STORE_SUM_CLIP(&dst[1*stride], idct1, perm_ldv, perm_stv, sel);
+ ALTIVEC_STORE_SUM_CLIP(&dst[2*stride], idct2, perm_ldv, perm_stv, sel);
+ ALTIVEC_STORE_SUM_CLIP(&dst[3*stride], idct3, perm_ldv, perm_stv, sel);
+ ALTIVEC_STORE_SUM_CLIP(&dst[4*stride], idct4, perm_ldv, perm_stv, sel);
+ ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel);
+ ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel);
+ ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);
+}
+
+void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
+
+#ifdef HAVE_ALTIVEC
+ if (has_altivec()) {
+ c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
+ c->put_no_rnd_h264_chroma_pixels_tab[0] = put_no_rnd_h264_chroma_mc8_altivec;
+ c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
+ c->h264_idct8_add = ff_h264_idct8_add_altivec;
+
+#define dspfunc(PFX, IDX, NUM) \
+ c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
+ c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
+ c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
+ c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
+ c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
+ c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
+ c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
+ c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
+ c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
+ c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
+ c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
+ c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
+ c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
+ c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
+ c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
+ c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
+
+ dspfunc(put_h264_qpel, 0, 16);
+ dspfunc(avg_h264_qpel, 0, 16);
+#undef dspfunc
+
+ } else
+#endif /* HAVE_ALTIVEC */
+ {
+ // Non-AltiVec PPC optimisations
+
+ // ... pending ...
+ }
+}
diff --git a/src/libffmpeg/libavcodec/ppc/h264_template_altivec.c b/src/libffmpeg/libavcodec/ppc/h264_template_altivec.c
new file mode 100644
index 000000000..e8ad67f2f
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/h264_template_altivec.c
@@ -0,0 +1,719 @@
+/*
+ * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* this code assume that stride % 16 == 0 */
+void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
+ POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);
+ signed int ABCD[4] __attribute__((aligned(16))) =
+ {((8 - x) * (8 - y)),
+ ((x) * (8 - y)),
+ ((8 - x) * (y)),
+ ((x) * (y))};
+ register int i;
+ vector unsigned char fperm;
+ const vector signed int vABCD = vec_ld(0, ABCD);
+ const vector signed short vA = vec_splat((vector signed short)vABCD, 1);
+ const vector signed short vB = vec_splat((vector signed short)vABCD, 3);
+ const vector signed short vC = vec_splat((vector signed short)vABCD, 5);
+ const vector signed short vD = vec_splat((vector signed short)vABCD, 7);
+ const vector signed int vzero = vec_splat_s32(0);
+ const vector signed short v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
+ const vector unsigned short v6us = vec_splat_u16(6);
+ register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
+ register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
+
+ vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
+ vector unsigned char vsrc0uc, vsrc1uc;
+ vector signed short vsrc0ssH, vsrc1ssH;
+ vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;
+ vector signed short vsrc2ssH, vsrc3ssH, psum;
+ vector unsigned char vdst, ppsum, vfdst, fsum;
+
+ POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1);
+
+ if (((unsigned long)dst) % 16 == 0) {
+ fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17,
+ 0x08, 0x09, 0x0A, 0x0B,
+ 0x0C, 0x0D, 0x0E, 0x0F);
+ } else {
+ fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x18, 0x19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F);
+ }
+
+ vsrcAuc = vec_ld(0, src);
+
+ if (loadSecond)
+ vsrcBuc = vec_ld(16, src);
+ vsrcperm0 = vec_lvsl(0, src);
+ vsrcperm1 = vec_lvsl(1, src);
+
+ vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
+ if (reallyBadAlign)
+ vsrc1uc = vsrcBuc;
+ else
+ vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
+
+ vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+ (vector unsigned char)vsrc0uc);
+ vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+ (vector unsigned char)vsrc1uc);
+
+ if (!loadSecond) {// -> !reallyBadAlign
+ for (i = 0 ; i < h ; i++) {
+
+
+ vsrcCuc = vec_ld(stride + 0, src);
+
+ vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
+ vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
+
+ vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+ (vector unsigned char)vsrc2uc);
+ vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+ (vector unsigned char)vsrc3uc);
+
+ psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+ psum = vec_mladd(vB, vsrc1ssH, psum);
+ psum = vec_mladd(vC, vsrc2ssH, psum);
+ psum = vec_mladd(vD, vsrc3ssH, psum);
+ psum = vec_add(v32ss, psum);
+ psum = vec_sra(psum, v6us);
+
+ vdst = vec_ld(0, dst);
+ ppsum = (vector unsigned char)vec_packsu(psum, psum);
+ vfdst = vec_perm(vdst, ppsum, fperm);
+
+ OP_U8_ALTIVEC(fsum, vfdst, vdst);
+
+ vec_st(fsum, 0, dst);
+
+ vsrc0ssH = vsrc2ssH;
+ vsrc1ssH = vsrc3ssH;
+
+ dst += stride;
+ src += stride;
+ }
+ } else {
+ vector unsigned char vsrcDuc;
+ for (i = 0 ; i < h ; i++) {
+ vsrcCuc = vec_ld(stride + 0, src);
+ vsrcDuc = vec_ld(stride + 16, src);
+
+ vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+ if (reallyBadAlign)
+ vsrc3uc = vsrcDuc;
+ else
+ vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
+
+ vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+ (vector unsigned char)vsrc2uc);
+ vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+ (vector unsigned char)vsrc3uc);
+
+ psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+ psum = vec_mladd(vB, vsrc1ssH, psum);
+ psum = vec_mladd(vC, vsrc2ssH, psum);
+ psum = vec_mladd(vD, vsrc3ssH, psum);
+ psum = vec_add(v32ss, psum);
+ psum = vec_sr(psum, v6us);
+
+ vdst = vec_ld(0, dst);
+ ppsum = (vector unsigned char)vec_pack(psum, psum);
+ vfdst = vec_perm(vdst, ppsum, fperm);
+
+ OP_U8_ALTIVEC(fsum, vfdst, vdst);
+
+ vec_st(fsum, 0, dst);
+
+ vsrc0ssH = vsrc2ssH;
+ vsrc1ssH = vsrc3ssH;
+
+ dst += stride;
+ src += stride;
+ }
+ }
+ POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1);
+}
+
+/* this code assume stride % 16 == 0 */
+static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
+ POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1);
+ register int i;
+
+ const vector signed int vzero = vec_splat_s32(0);
+ const vector unsigned char permM2 = vec_lvsl(-2, src);
+ const vector unsigned char permM1 = vec_lvsl(-1, src);
+ const vector unsigned char permP0 = vec_lvsl(+0, src);
+ const vector unsigned char permP1 = vec_lvsl(+1, src);
+ const vector unsigned char permP2 = vec_lvsl(+2, src);
+ const vector unsigned char permP3 = vec_lvsl(+3, src);
+ const vector signed short v5ss = vec_splat_s16(5);
+ const vector unsigned short v5us = vec_splat_u16(5);
+ const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+ const vector signed short v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
+ const vector unsigned char dstperm = vec_lvsr(0, dst);
+ const vector unsigned char neg1 =
+ (const vector unsigned char) vec_splat_s8(-1);
+
+ const vector unsigned char dstmask =
+ vec_perm((const vector unsigned char)vzero,
+ neg1, dstperm);
+
+ vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
+
+ register int align = ((((unsigned long)src) - 2) % 16);
+
+ vector signed short srcP0A, srcP0B, srcP1A, srcP1B,
+ srcP2A, srcP2B, srcP3A, srcP3B,
+ srcM1A, srcM1B, srcM2A, srcM2B,
+ sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
+ pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
+ psumA, psumB, sumA, sumB;
+
+ vector unsigned char sum, dst1, dst2, vdst, fsum,
+ rsum, fdst1, fdst2;
+
+ POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
+
+ for (i = 0 ; i < 16 ; i ++) {
+ vector unsigned char srcR1 = vec_ld(-2, src);
+ vector unsigned char srcR2 = vec_ld(14, src);
+
+ switch (align) {
+ default: {
+ srcM2 = vec_perm(srcR1, srcR2, permM2);
+ srcM1 = vec_perm(srcR1, srcR2, permM1);
+ srcP0 = vec_perm(srcR1, srcR2, permP0);
+ srcP1 = vec_perm(srcR1, srcR2, permP1);
+ srcP2 = vec_perm(srcR1, srcR2, permP2);
+ srcP3 = vec_perm(srcR1, srcR2, permP3);
+ } break;
+ case 11: {
+ srcM2 = vec_perm(srcR1, srcR2, permM2);
+ srcM1 = vec_perm(srcR1, srcR2, permM1);
+ srcP0 = vec_perm(srcR1, srcR2, permP0);
+ srcP1 = vec_perm(srcR1, srcR2, permP1);
+ srcP2 = vec_perm(srcR1, srcR2, permP2);
+ srcP3 = srcR2;
+ } break;
+ case 12: {
+ vector unsigned char srcR3 = vec_ld(30, src);
+ srcM2 = vec_perm(srcR1, srcR2, permM2);
+ srcM1 = vec_perm(srcR1, srcR2, permM1);
+ srcP0 = vec_perm(srcR1, srcR2, permP0);
+ srcP1 = vec_perm(srcR1, srcR2, permP1);
+ srcP2 = srcR2;
+ srcP3 = vec_perm(srcR2, srcR3, permP3);
+ } break;
+ case 13: {
+ vector unsigned char srcR3 = vec_ld(30, src);
+ srcM2 = vec_perm(srcR1, srcR2, permM2);
+ srcM1 = vec_perm(srcR1, srcR2, permM1);
+ srcP0 = vec_perm(srcR1, srcR2, permP0);
+ srcP1 = srcR2;
+ srcP2 = vec_perm(srcR2, srcR3, permP2);
+ srcP3 = vec_perm(srcR2, srcR3, permP3);
+ } break;
+ case 14: {
+ vector unsigned char srcR3 = vec_ld(30, src);
+ srcM2 = vec_perm(srcR1, srcR2, permM2);
+ srcM1 = vec_perm(srcR1, srcR2, permM1);
+ srcP0 = srcR2;
+ srcP1 = vec_perm(srcR2, srcR3, permP1);
+ srcP2 = vec_perm(srcR2, srcR3, permP2);
+ srcP3 = vec_perm(srcR2, srcR3, permP3);
+ } break;
+ case 15: {
+ vector unsigned char srcR3 = vec_ld(30, src);
+ srcM2 = vec_perm(srcR1, srcR2, permM2);
+ srcM1 = srcR2;
+ srcP0 = vec_perm(srcR2, srcR3, permP0);
+ srcP1 = vec_perm(srcR2, srcR3, permP1);
+ srcP2 = vec_perm(srcR2, srcR3, permP2);
+ srcP3 = vec_perm(srcR2, srcR3, permP3);
+ } break;
+ }
+
+ srcP0A = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcP0);
+ srcP0B = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcP0);
+ srcP1A = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcP1);
+ srcP1B = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcP1);
+
+ srcP2A = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcP2);
+ srcP2B = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcP2);
+ srcP3A = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcP3);
+ srcP3B = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcP3);
+
+ srcM1A = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcM1);
+ srcM1B = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcM1);
+ srcM2A = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcM2);
+ srcM2B = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcM2);
+
+ sum1A = vec_adds(srcP0A, srcP1A);
+ sum1B = vec_adds(srcP0B, srcP1B);
+ sum2A = vec_adds(srcM1A, srcP2A);
+ sum2B = vec_adds(srcM1B, srcP2B);
+ sum3A = vec_adds(srcM2A, srcP3A);
+ sum3B = vec_adds(srcM2B, srcP3B);
+
+ pp1A = vec_mladd(sum1A, v20ss, v16ss);
+ pp1B = vec_mladd(sum1B, v20ss, v16ss);
+
+ pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
+ pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
+
+ pp3A = vec_add(sum3A, pp1A);
+ pp3B = vec_add(sum3B, pp1B);
+
+ psumA = vec_sub(pp3A, pp2A);
+ psumB = vec_sub(pp3B, pp2B);
+
+ sumA = vec_sra(psumA, v5us);
+ sumB = vec_sra(psumB, v5us);
+
+ sum = vec_packsu(sumA, sumB);
+
+ dst1 = vec_ld(0, dst);
+ dst2 = vec_ld(16, dst);
+ vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));
+
+ OP_U8_ALTIVEC(fsum, sum, vdst);
+
+ rsum = vec_perm(fsum, fsum, dstperm);
+ fdst1 = vec_sel(dst1, rsum, dstmask);
+ fdst2 = vec_sel(rsum, dst2, dstmask);
+
+ vec_st(fdst1, 0, dst);
+ vec_st(fdst2, 16, dst);
+
+ src += srcStride;
+ dst += dstStride;
+ }
+POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
+}
+
+/* this code assume stride % 16 == 0 */
+static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
+ POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);
+
+ register int i;
+
+ const vector signed int vzero = vec_splat_s32(0);
+ const vector unsigned char perm = vec_lvsl(0, src);
+ const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+ const vector unsigned short v5us = vec_splat_u16(5);
+ const vector signed short v5ss = vec_splat_s16(5);
+ const vector signed short v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
+ const vector unsigned char dstperm = vec_lvsr(0, dst);
+ const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1);
+ const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);
+
+ uint8_t *srcbis = src - (srcStride * 2);
+
+ const vector unsigned char srcM2a = vec_ld(0, srcbis);
+ const vector unsigned char srcM2b = vec_ld(16, srcbis);
+ const vector unsigned char srcM2 = vec_perm(srcM2a, srcM2b, perm);
+// srcbis += srcStride;
+ const vector unsigned char srcM1a = vec_ld(0, srcbis += srcStride);
+ const vector unsigned char srcM1b = vec_ld(16, srcbis);
+ const vector unsigned char srcM1 = vec_perm(srcM1a, srcM1b, perm);
+// srcbis += srcStride;
+ const vector unsigned char srcP0a = vec_ld(0, srcbis += srcStride);
+ const vector unsigned char srcP0b = vec_ld(16, srcbis);
+ const vector unsigned char srcP0 = vec_perm(srcP0a, srcP0b, perm);
+// srcbis += srcStride;
+ const vector unsigned char srcP1a = vec_ld(0, srcbis += srcStride);
+ const vector unsigned char srcP1b = vec_ld(16, srcbis);
+ const vector unsigned char srcP1 = vec_perm(srcP1a, srcP1b, perm);
+// srcbis += srcStride;
+ const vector unsigned char srcP2a = vec_ld(0, srcbis += srcStride);
+ const vector unsigned char srcP2b = vec_ld(16, srcbis);
+ const vector unsigned char srcP2 = vec_perm(srcP2a, srcP2b, perm);
+// srcbis += srcStride;
+
+ vector signed short srcM2ssA = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcM2);
+ vector signed short srcM2ssB = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcM2);
+ vector signed short srcM1ssA = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcM1);
+ vector signed short srcM1ssB = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcM1);
+ vector signed short srcP0ssA = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcP0);
+ vector signed short srcP0ssB = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcP0);
+ vector signed short srcP1ssA = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcP1);
+ vector signed short srcP1ssB = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcP1);
+ vector signed short srcP2ssA = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcP2);
+ vector signed short srcP2ssB = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcP2);
+
+ vector signed short pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
+ psumA, psumB, sumA, sumB,
+ srcP3ssA, srcP3ssB,
+ sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;
+
+ vector unsigned char sum, dst1, dst2, vdst, fsum, rsum, fdst1, fdst2,
+ srcP3a, srcP3b, srcP3;
+
+ POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
+
+ for (i = 0 ; i < 16 ; i++) {
+ srcP3a = vec_ld(0, srcbis += srcStride);
+ srcP3b = vec_ld(16, srcbis);
+ srcP3 = vec_perm(srcP3a, srcP3b, perm);
+ srcP3ssA = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcP3);
+ srcP3ssB = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcP3);
+// srcbis += srcStride;
+
+ sum1A = vec_adds(srcP0ssA, srcP1ssA);
+ sum1B = vec_adds(srcP0ssB, srcP1ssB);
+ sum2A = vec_adds(srcM1ssA, srcP2ssA);
+ sum2B = vec_adds(srcM1ssB, srcP2ssB);
+ sum3A = vec_adds(srcM2ssA, srcP3ssA);
+ sum3B = vec_adds(srcM2ssB, srcP3ssB);
+
+ srcM2ssA = srcM1ssA;
+ srcM2ssB = srcM1ssB;
+ srcM1ssA = srcP0ssA;
+ srcM1ssB = srcP0ssB;
+ srcP0ssA = srcP1ssA;
+ srcP0ssB = srcP1ssB;
+ srcP1ssA = srcP2ssA;
+ srcP1ssB = srcP2ssB;
+ srcP2ssA = srcP3ssA;
+ srcP2ssB = srcP3ssB;
+
+ pp1A = vec_mladd(sum1A, v20ss, v16ss);
+ pp1B = vec_mladd(sum1B, v20ss, v16ss);
+
+ pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
+ pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
+
+ pp3A = vec_add(sum3A, pp1A);
+ pp3B = vec_add(sum3B, pp1B);
+
+ psumA = vec_sub(pp3A, pp2A);
+ psumB = vec_sub(pp3B, pp2B);
+
+ sumA = vec_sra(psumA, v5us);
+ sumB = vec_sra(psumB, v5us);
+
+ sum = vec_packsu(sumA, sumB);
+
+ dst1 = vec_ld(0, dst);
+ dst2 = vec_ld(16, dst);
+ vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));
+
+ OP_U8_ALTIVEC(fsum, sum, vdst);
+
+ rsum = vec_perm(fsum, fsum, dstperm);
+ fdst1 = vec_sel(dst1, rsum, dstmask);
+ fdst2 = vec_sel(rsum, dst2, dstmask);
+
+ vec_st(fdst1, 0, dst);
+ vec_st(fdst2, 16, dst);
+
+ dst += dstStride;
+ }
+ POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
+}
+
+/* this code assume stride % 16 == 0 *and* tmp is properly aligned */
+static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) {
+ POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+ register int i;
+ const vector signed int vzero = vec_splat_s32(0);
+ const vector unsigned char permM2 = vec_lvsl(-2, src);
+ const vector unsigned char permM1 = vec_lvsl(-1, src);
+ const vector unsigned char permP0 = vec_lvsl(+0, src);
+ const vector unsigned char permP1 = vec_lvsl(+1, src);
+ const vector unsigned char permP2 = vec_lvsl(+2, src);
+ const vector unsigned char permP3 = vec_lvsl(+3, src);
+ const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+ const vector unsigned int v10ui = vec_splat_u32(10);
+ const vector signed short v5ss = vec_splat_s16(5);
+ const vector signed short v1ss = vec_splat_s16(1);
+ const vector signed int v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9));
+ const vector unsigned int v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4));
+
+ register int align = ((((unsigned long)src) - 2) % 16);
+
+ const vector unsigned char neg1 = (const vector unsigned char)
+ vec_splat_s8(-1);
+
+ vector signed short srcP0A, srcP0B, srcP1A, srcP1B,
+ srcP2A, srcP2B, srcP3A, srcP3B,
+ srcM1A, srcM1B, srcM2A, srcM2B,
+ sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
+ pp1A, pp1B, pp2A, pp2B, psumA, psumB;
+
+ const vector unsigned char dstperm = vec_lvsr(0, dst);
+
+ const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);
+
+ const vector unsigned char mperm = (const vector unsigned char)
+ AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,
+ 0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F);
+ int16_t *tmpbis = tmp;
+
+ vector signed short tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB,
+ tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB,
+ tmpP2ssA, tmpP2ssB;
+
+ vector signed int pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo,
+ pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo,
+ pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo,
+ ssumAe, ssumAo, ssumBe, ssumBo;
+ vector unsigned char fsum, sumv, sum, dst1, dst2, vdst,
+ rsum, fdst1, fdst2;
+ vector signed short ssume, ssumo;
+
+ POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+ src -= (2 * srcStride);
+ for (i = 0 ; i < 21 ; i ++) {
+ vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
+ vector unsigned char srcR1 = vec_ld(-2, src);
+ vector unsigned char srcR2 = vec_ld(14, src);
+
+ switch (align) {
+ default: {
+ srcM2 = vec_perm(srcR1, srcR2, permM2);
+ srcM1 = vec_perm(srcR1, srcR2, permM1);
+ srcP0 = vec_perm(srcR1, srcR2, permP0);
+ srcP1 = vec_perm(srcR1, srcR2, permP1);
+ srcP2 = vec_perm(srcR1, srcR2, permP2);
+ srcP3 = vec_perm(srcR1, srcR2, permP3);
+ } break;
+ case 11: {
+ srcM2 = vec_perm(srcR1, srcR2, permM2);
+ srcM1 = vec_perm(srcR1, srcR2, permM1);
+ srcP0 = vec_perm(srcR1, srcR2, permP0);
+ srcP1 = vec_perm(srcR1, srcR2, permP1);
+ srcP2 = vec_perm(srcR1, srcR2, permP2);
+ srcP3 = srcR2;
+ } break;
+ case 12: {
+ vector unsigned char srcR3 = vec_ld(30, src);
+ srcM2 = vec_perm(srcR1, srcR2, permM2);
+ srcM1 = vec_perm(srcR1, srcR2, permM1);
+ srcP0 = vec_perm(srcR1, srcR2, permP0);
+ srcP1 = vec_perm(srcR1, srcR2, permP1);
+ srcP2 = srcR2;
+ srcP3 = vec_perm(srcR2, srcR3, permP3);
+ } break;
+ case 13: {
+ vector unsigned char srcR3 = vec_ld(30, src);
+ srcM2 = vec_perm(srcR1, srcR2, permM2);
+ srcM1 = vec_perm(srcR1, srcR2, permM1);
+ srcP0 = vec_perm(srcR1, srcR2, permP0);
+ srcP1 = srcR2;
+ srcP2 = vec_perm(srcR2, srcR3, permP2);
+ srcP3 = vec_perm(srcR2, srcR3, permP3);
+ } break;
+ case 14: {
+ vector unsigned char srcR3 = vec_ld(30, src);
+ srcM2 = vec_perm(srcR1, srcR2, permM2);
+ srcM1 = vec_perm(srcR1, srcR2, permM1);
+ srcP0 = srcR2;
+ srcP1 = vec_perm(srcR2, srcR3, permP1);
+ srcP2 = vec_perm(srcR2, srcR3, permP2);
+ srcP3 = vec_perm(srcR2, srcR3, permP3);
+ } break;
+ case 15: {
+ vector unsigned char srcR3 = vec_ld(30, src);
+ srcM2 = vec_perm(srcR1, srcR2, permM2);
+ srcM1 = srcR2;
+ srcP0 = vec_perm(srcR2, srcR3, permP0);
+ srcP1 = vec_perm(srcR2, srcR3, permP1);
+ srcP2 = vec_perm(srcR2, srcR3, permP2);
+ srcP3 = vec_perm(srcR2, srcR3, permP3);
+ } break;
+ }
+
+ srcP0A = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcP0);
+ srcP0B = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcP0);
+ srcP1A = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcP1);
+ srcP1B = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcP1);
+
+ srcP2A = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcP2);
+ srcP2B = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcP2);
+ srcP3A = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcP3);
+ srcP3B = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcP3);
+
+ srcM1A = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcM1);
+ srcM1B = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcM1);
+ srcM2A = (vector signed short)
+ vec_mergeh((vector unsigned char)vzero, srcM2);
+ srcM2B = (vector signed short)
+ vec_mergel((vector unsigned char)vzero, srcM2);
+
+ sum1A = vec_adds(srcP0A, srcP1A);
+ sum1B = vec_adds(srcP0B, srcP1B);
+ sum2A = vec_adds(srcM1A, srcP2A);
+ sum2B = vec_adds(srcM1B, srcP2B);
+ sum3A = vec_adds(srcM2A, srcP3A);
+ sum3B = vec_adds(srcM2B, srcP3B);
+
+ pp1A = vec_mladd(sum1A, v20ss, sum3A);
+ pp1B = vec_mladd(sum1B, v20ss, sum3B);
+
+ pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
+ pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
+
+ psumA = vec_sub(pp1A, pp2A);
+ psumB = vec_sub(pp1B, pp2B);
+
+ vec_st(psumA, 0, tmp);
+ vec_st(psumB, 16, tmp);
+
+ src += srcStride;
+ tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */
+ }
+
+ tmpM2ssA = vec_ld(0, tmpbis);
+ tmpM2ssB = vec_ld(16, tmpbis);
+ tmpbis += tmpStride;
+ tmpM1ssA = vec_ld(0, tmpbis);
+ tmpM1ssB = vec_ld(16, tmpbis);
+ tmpbis += tmpStride;
+ tmpP0ssA = vec_ld(0, tmpbis);
+ tmpP0ssB = vec_ld(16, tmpbis);
+ tmpbis += tmpStride;
+ tmpP1ssA = vec_ld(0, tmpbis);
+ tmpP1ssB = vec_ld(16, tmpbis);
+ tmpbis += tmpStride;
+ tmpP2ssA = vec_ld(0, tmpbis);
+ tmpP2ssB = vec_ld(16, tmpbis);
+ tmpbis += tmpStride;
+
+ for (i = 0 ; i < 16 ; i++) {
+ const vector signed short tmpP3ssA = vec_ld(0, tmpbis);
+ const vector signed short tmpP3ssB = vec_ld(16, tmpbis);
+
+ const vector signed short sum1A = vec_adds(tmpP0ssA, tmpP1ssA);
+ const vector signed short sum1B = vec_adds(tmpP0ssB, tmpP1ssB);
+ const vector signed short sum2A = vec_adds(tmpM1ssA, tmpP2ssA);
+ const vector signed short sum2B = vec_adds(tmpM1ssB, tmpP2ssB);
+ const vector signed short sum3A = vec_adds(tmpM2ssA, tmpP3ssA);
+ const vector signed short sum3B = vec_adds(tmpM2ssB, tmpP3ssB);
+
+ tmpbis += tmpStride;
+
+ tmpM2ssA = tmpM1ssA;
+ tmpM2ssB = tmpM1ssB;
+ tmpM1ssA = tmpP0ssA;
+ tmpM1ssB = tmpP0ssB;
+ tmpP0ssA = tmpP1ssA;
+ tmpP0ssB = tmpP1ssB;
+ tmpP1ssA = tmpP2ssA;
+ tmpP1ssB = tmpP2ssB;
+ tmpP2ssA = tmpP3ssA;
+ tmpP2ssB = tmpP3ssB;
+
+ pp1Ae = vec_mule(sum1A, v20ss);
+ pp1Ao = vec_mulo(sum1A, v20ss);
+ pp1Be = vec_mule(sum1B, v20ss);
+ pp1Bo = vec_mulo(sum1B, v20ss);
+
+ pp2Ae = vec_mule(sum2A, v5ss);
+ pp2Ao = vec_mulo(sum2A, v5ss);
+ pp2Be = vec_mule(sum2B, v5ss);
+ pp2Bo = vec_mulo(sum2B, v5ss);
+
+ pp3Ae = vec_sra((vector signed int)sum3A, v16ui);
+ pp3Ao = vec_mulo(sum3A, v1ss);
+ pp3Be = vec_sra((vector signed int)sum3B, v16ui);
+ pp3Bo = vec_mulo(sum3B, v1ss);
+
+ pp1cAe = vec_add(pp1Ae, v512si);
+ pp1cAo = vec_add(pp1Ao, v512si);
+ pp1cBe = vec_add(pp1Be, v512si);
+ pp1cBo = vec_add(pp1Bo, v512si);
+
+ pp32Ae = vec_sub(pp3Ae, pp2Ae);
+ pp32Ao = vec_sub(pp3Ao, pp2Ao);
+ pp32Be = vec_sub(pp3Be, pp2Be);
+ pp32Bo = vec_sub(pp3Bo, pp2Bo);
+
+ sumAe = vec_add(pp1cAe, pp32Ae);
+ sumAo = vec_add(pp1cAo, pp32Ao);
+ sumBe = vec_add(pp1cBe, pp32Be);
+ sumBo = vec_add(pp1cBo, pp32Bo);
+
+ ssumAe = vec_sra(sumAe, v10ui);
+ ssumAo = vec_sra(sumAo, v10ui);
+ ssumBe = vec_sra(sumBe, v10ui);
+ ssumBo = vec_sra(sumBo, v10ui);
+
+ ssume = vec_packs(ssumAe, ssumBe);
+ ssumo = vec_packs(ssumAo, ssumBo);
+
+ sumv = vec_packsu(ssume, ssumo);
+ sum = vec_perm(sumv, sumv, mperm);
+
+ dst1 = vec_ld(0, dst);
+ dst2 = vec_ld(16, dst);
+ vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));
+
+ OP_U8_ALTIVEC(fsum, sum, vdst);
+
+ rsum = vec_perm(fsum, fsum, dstperm);
+ fdst1 = vec_sel(dst1, rsum, dstmask);
+ fdst2 = vec_sel(rsum, dst2, dstmask);
+
+ vec_st(fdst1, 0, dst);
+ vec_st(fdst2, 16, dst);
+
+ dst += dstStride;
+ }
+ POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+}
diff --git a/src/libffmpeg/libavcodec/ppc/mathops.h b/src/libffmpeg/libavcodec/ppc/mathops.h
new file mode 100644
index 000000000..6af23f246
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/mathops.h
@@ -0,0 +1,33 @@
+/*
+ * simple math operations
+ * Copyright (c) 2001, 2002 Fabrice Bellard.
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#if defined(ARCH_POWERPC_405)
+/* signed 16x16 -> 32 multiply add accumulate */
+# define MAC16(rt, ra, rb) \
+ asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
+
+/* signed 16x16 -> 32 multiply */
+# define MUL16(ra, rb) \
+ ({ int __rt;
+ asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb));
+ __rt; })
+#endif
diff --git a/src/libffmpeg/libavcodec/ppc/snow_altivec.c b/src/libffmpeg/libavcodec/ppc/snow_altivec.c
new file mode 100644
index 000000000..b15672ffe
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/snow_altivec.c
@@ -0,0 +1,788 @@
+/*
+ * Altivec optimized snow DSP utils
+ * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+#include "dsputil_altivec.h"
+#include "../snow.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+
+
+//FIXME remove this replication
+#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
+
+static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
+{
+ int offset;
+ DWTELEM * buffer;
+
+// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
+
+ assert(buf->data_stack_top >= 0);
+// assert(!buf->line[line]);
+ if (buf->line[line])
+ return buf->line[line];
+
+ offset = buf->line_width * line;
+ buffer = buf->data_stack[buf->data_stack_top];
+ buf->data_stack_top--;
+ buf->line[line] = buffer;
+
+// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
+
+ return buffer;
+}
+
+
+//altivec code
+
+void ff_snow_horizontal_compose97i_altivec(DWTELEM *b, int width)
+{
+ const int w2= (width+1)>>1;
+ DECLARE_ALIGNED_16(DWTELEM, temp[(width>>1)]);
+ const int w_l= (width>>1);
+ const int w_r= w2 - 1;
+ int i;
+ vector signed int t1, t2, x, y, tmp1, tmp2;
+ vector signed int *vbuf, *vtmp;
+ vector unsigned char align;
+
+
+
+ { // Lift 0
+ DWTELEM * const ref = b + w2 - 1;
+ DWTELEM b_0 = b[0];
+ vbuf = (vector signed int *)b;
+
+ tmp1 = vec_ld (0, ref);
+ align = vec_lvsl (0, ref);
+ tmp2 = vec_ld (15, ref);
+ t1= vec_perm(tmp1, tmp2, align);
+
+ i = 0;
+
+ for (i=0; i<w_l-15; i+=16) {
+#if 0
+ b[i+0] = b[i+0] - ((3 * (ref[i+0] + ref[i+1]) + 4) >> 3);
+ b[i+1] = b[i+1] - ((3 * (ref[i+1] + ref[i+2]) + 4) >> 3);
+ b[i+2] = b[i+2] - ((3 * (ref[i+2] + ref[i+3]) + 4) >> 3);
+ b[i+3] = b[i+3] - ((3 * (ref[i+3] + ref[i+4]) + 4) >> 3);
+#else
+
+ tmp1 = vec_ld (0, ref+4+i);
+ tmp2 = vec_ld (15, ref+4+i);
+
+ t2 = vec_perm(tmp1, tmp2, align);
+
+ y = vec_add(t1,vec_sld(t1,t2,4));
+ y = vec_add(vec_add(y,y),y);
+
+ tmp1 = vec_ld (0, ref+8+i);
+
+ y = vec_add(y, vec_splat_s32(4));
+ y = vec_sra(y, vec_splat_u32(3));
+
+ tmp2 = vec_ld (15, ref+8+i);
+
+ *vbuf = vec_sub(*vbuf, y);
+
+ t1=t2;
+
+ vbuf++;
+
+ t2 = vec_perm(tmp1, tmp2, align);
+
+ y = vec_add(t1,vec_sld(t1,t2,4));
+ y = vec_add(vec_add(y,y),y);
+
+ tmp1 = vec_ld (0, ref+12+i);
+
+ y = vec_add(y, vec_splat_s32(4));
+ y = vec_sra(y, vec_splat_u32(3));
+
+ tmp2 = vec_ld (15, ref+12+i);
+
+ *vbuf = vec_sub(*vbuf, y);
+
+ t1=t2;
+
+ vbuf++;
+
+ t2 = vec_perm(tmp1, tmp2, align);
+
+ y = vec_add(t1,vec_sld(t1,t2,4));
+ y = vec_add(vec_add(y,y),y);
+
+ tmp1 = vec_ld (0, ref+16+i);
+
+ y = vec_add(y, vec_splat_s32(4));
+ y = vec_sra(y, vec_splat_u32(3));
+
+ tmp2 = vec_ld (15, ref+16+i);
+
+ *vbuf = vec_sub(*vbuf, y);
+
+ t1=t2;
+
+ t2 = vec_perm(tmp1, tmp2, align);
+
+ y = vec_add(t1,vec_sld(t1,t2,4));
+ y = vec_add(vec_add(y,y),y);
+
+ vbuf++;
+
+ y = vec_add(y, vec_splat_s32(4));
+ y = vec_sra(y, vec_splat_u32(3));
+ *vbuf = vec_sub(*vbuf, y);
+
+ t1=t2;
+
+ vbuf++;
+
+#endif
+ }
+
+ snow_horizontal_compose_lift_lead_out(i, b, b, ref, width, w_l, 0, W_DM, W_DO, W_DS);
+ b[0] = b_0 - ((W_DM * 2 * ref[1]+W_DO)>>W_DS);
+ }
+
+ { // Lift 1
+ DWTELEM * const dst = b+w2;
+
+ i = 0;
+ for(; (((long)&dst[i]) & 0xF) && i<w_r; i++){
+ dst[i] = dst[i] - (b[i] + b[i + 1]);
+ }
+
+ align = vec_lvsl(0, b+i);
+ tmp1 = vec_ld(0, b+i);
+ vbuf = (vector signed int*) (dst + i);
+ tmp2 = vec_ld(15, b+i);
+
+ t1 = vec_perm(tmp1, tmp2, align);
+
+ for (; i<w_r-3; i+=4) {
+
+#if 0
+ dst[i] = dst[i] - (b[i] + b[i + 1]);
+ dst[i+1] = dst[i+1] - (b[i+1] + b[i + 2]);
+ dst[i+2] = dst[i+2] - (b[i+2] + b[i + 3]);
+ dst[i+3] = dst[i+3] - (b[i+3] + b[i + 4]);
+#else
+
+ tmp1 = vec_ld(0, b+4+i);
+ tmp2 = vec_ld(15, b+4+i);
+
+ t2 = vec_perm(tmp1, tmp2, align);
+
+ y = vec_add(t1, vec_sld(t1,t2,4));
+ *vbuf = vec_sub (*vbuf, y);
+
+ vbuf++;
+
+ t1 = t2;
+
+#endif
+
+ }
+
+ snow_horizontal_compose_lift_lead_out(i, dst, dst, b, width, w_r, 1, W_CM, W_CO, W_CS);
+ }
+
+ { // Lift 2
+ DWTELEM * const ref = b+w2 - 1;
+ DWTELEM b_0 = b[0];
+ vbuf= (vector signed int *) b;
+
+ tmp1 = vec_ld (0, ref);
+ align = vec_lvsl (0, ref);
+ tmp2 = vec_ld (15, ref);
+ t1= vec_perm(tmp1, tmp2, align);
+
+ i = 0;
+ for (; i<w_l-15; i+=16) {
+#if 0
+ b[i] = b[i] - (((8 -(ref[i] + ref[i+1])) - (b[i] <<2)) >> 4);
+ b[i+1] = b[i+1] - (((8 -(ref[i+1] + ref[i+2])) - (b[i+1]<<2)) >> 4);
+ b[i+2] = b[i+2] - (((8 -(ref[i+2] + ref[i+3])) - (b[i+2]<<2)) >> 4);
+ b[i+3] = b[i+3] - (((8 -(ref[i+3] + ref[i+4])) - (b[i+3]<<2)) >> 4);
+#else
+ tmp1 = vec_ld (0, ref+4+i);
+ tmp2 = vec_ld (15, ref+4+i);
+
+ t2 = vec_perm(tmp1, tmp2, align);
+
+ y = vec_add(t1,vec_sld(t1,t2,4));
+ y = vec_sub(vec_splat_s32(8),y);
+
+ tmp1 = vec_ld (0, ref+8+i);
+
+ x = vec_sl(*vbuf,vec_splat_u32(2));
+ y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+
+ tmp2 = vec_ld (15, ref+8+i);
+
+ *vbuf = vec_sub( *vbuf, y);
+
+ t1 = t2;
+
+ vbuf++;
+
+ t2 = vec_perm(tmp1, tmp2, align);
+
+ y = vec_add(t1,vec_sld(t1,t2,4));
+ y = vec_sub(vec_splat_s32(8),y);
+
+ tmp1 = vec_ld (0, ref+12+i);
+
+ x = vec_sl(*vbuf,vec_splat_u32(2));
+ y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+
+ tmp2 = vec_ld (15, ref+12+i);
+
+ *vbuf = vec_sub( *vbuf, y);
+
+ t1 = t2;
+
+ vbuf++;
+
+ t2 = vec_perm(tmp1, tmp2, align);
+
+ y = vec_add(t1,vec_sld(t1,t2,4));
+ y = vec_sub(vec_splat_s32(8),y);
+
+ tmp1 = vec_ld (0, ref+16+i);
+
+ x = vec_sl(*vbuf,vec_splat_u32(2));
+ y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+
+ tmp2 = vec_ld (15, ref+16+i);
+
+ *vbuf = vec_sub( *vbuf, y);
+
+ t1 = t2;
+
+ vbuf++;
+
+ t2 = vec_perm(tmp1, tmp2, align);
+
+ y = vec_add(t1,vec_sld(t1,t2,4));
+ y = vec_sub(vec_splat_s32(8),y);
+
+ t1 = t2;
+
+ x = vec_sl(*vbuf,vec_splat_u32(2));
+ y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+ *vbuf = vec_sub( *vbuf, y);
+
+ vbuf++;
+
+#endif
+ }
+
+ snow_horizontal_compose_liftS_lead_out(i, b, b, ref, width, w_l);
+ b[0] = b_0 - (((-2 * ref[1] + W_BO) - 4 * b_0) >> W_BS);
+ }
+
+ { // Lift 3
+ DWTELEM * const src = b+w2;
+
+ vbuf = (vector signed int *)b;
+ vtmp = (vector signed int *)temp;
+
+ i = 0;
+ align = vec_lvsl(0, src);
+
+ for (; i<w_r-3; i+=4) {
+#if 0
+ temp[i] = src[i] - ((-3*(b[i] + b[i+1]))>>1);
+ temp[i+1] = src[i+1] - ((-3*(b[i+1] + b[i+2]))>>1);
+ temp[i+2] = src[i+2] - ((-3*(b[i+2] + b[i+3]))>>1);
+ temp[i+3] = src[i+3] - ((-3*(b[i+3] + b[i+4]))>>1);
+#else
+ tmp1 = vec_ld(0,src+i);
+ t1 = vec_add(vbuf[0],vec_sld(vbuf[0],vbuf[1],4));
+ tmp2 = vec_ld(15,src+i);
+ t1 = vec_sub(vec_splat_s32(0),t1); //bad!
+ t1 = vec_add(t1,vec_add(t1,t1));
+ t2 = vec_perm(tmp1 ,tmp2 ,align);
+ t1 = vec_sra(t1,vec_splat_u32(1));
+ vbuf++;
+ *vtmp = vec_sub(t2,t1);
+ vtmp++;
+
+#endif
+
+ }
+
+ snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -3, 0, 1);
+ }
+
+ {
+ //Interleave
+ int a;
+ vector signed int *t = (vector signed int *)temp,
+ *v = (vector signed int *)b;
+
+ snow_interleave_line_header(&i, width, b, temp);
+
+ for (; (i & 0xE) != 0xE; i-=2){
+ b[i+1] = temp[i>>1];
+ b[i] = b[i>>1];
+ }
+ for (i-=14; i>=0; i-=16){
+ a=i/4;
+
+ v[a+3]=vec_mergel(v[(a>>1)+1],t[(a>>1)+1]);
+ v[a+2]=vec_mergeh(v[(a>>1)+1],t[(a>>1)+1]);
+ v[a+1]=vec_mergel(v[a>>1],t[a>>1]);
+ v[a]=vec_mergeh(v[a>>1],t[a>>1]);
+
+ }
+
+ }
+}
+
+void ff_snow_vertical_compose97i_altivec(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width)
+{
+ int i, w4 = width/4;
+ vector signed int *v0, *v1,*v2,*v3,*v4,*v5;
+ vector signed int t1, t2;
+
+ v0=(vector signed int *)b0;
+ v1=(vector signed int *)b1;
+ v2=(vector signed int *)b2;
+ v3=(vector signed int *)b3;
+ v4=(vector signed int *)b4;
+ v5=(vector signed int *)b5;
+
+ for (i=0; i< w4;i++)
+ {
+
+ #if 0
+ b4[i] -= (3*(b3[i] + b5[i])+4)>>3;
+ b3[i] -= ((b2[i] + b4[i]));
+ b2[i] += ((b1[i] + b3[i])+4*b2[i]+8)>>4;
+ b1[i] += (3*(b0[i] + b2[i]))>>1;
+ #else
+ t1 = vec_add(v3[i], v5[i]);
+ t2 = vec_add(t1, vec_add(t1,t1));
+ t1 = vec_add(t2, vec_splat_s32(4));
+ v4[i] = vec_sub(v4[i], vec_sra(t1,vec_splat_u32(3)));
+
+ v3[i] = vec_sub(v3[i], vec_add(v2[i], v4[i]));
+
+ t1 = vec_add(vec_splat_s32(8), vec_add(v1[i], v3[i]));
+ t2 = vec_sl(v2[i], vec_splat_u32(2));
+ v2[i] = vec_add(v2[i], vec_sra(vec_add(t1,t2),vec_splat_u32(4)));
+ t1 = vec_add(v0[i], v2[i]);
+ t2 = vec_add(t1, vec_add(t1,t1));
+ v1[i] = vec_add(v1[i], vec_sra(t2,vec_splat_u32(1)));
+
+ #endif
+ }
+
+ for(i*=4; i < width; i++)
+ {
+ b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
+ b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
+ b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
+ b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
+ }
+}
+
+#define LOAD_BLOCKS \
+ tmp1 = vec_ld(0, &block[3][y*src_stride]);\
+ align = vec_lvsl(0, &block[3][y*src_stride]);\
+ tmp2 = vec_ld(15, &block[3][y*src_stride]);\
+\
+ b3 = vec_perm(tmp1,tmp2,align);\
+\
+ tmp1 = vec_ld(0, &block[2][y*src_stride]);\
+ align = vec_lvsl(0, &block[2][y*src_stride]);\
+ tmp2 = vec_ld(15, &block[2][y*src_stride]);\
+\
+ b2 = vec_perm(tmp1,tmp2,align);\
+\
+ tmp1 = vec_ld(0, &block[1][y*src_stride]);\
+ align = vec_lvsl(0, &block[1][y*src_stride]);\
+ tmp2 = vec_ld(15, &block[1][y*src_stride]);\
+\
+ b1 = vec_perm(tmp1,tmp2,align);\
+\
+ tmp1 = vec_ld(0, &block[0][y*src_stride]);\
+ align = vec_lvsl(0, &block[0][y*src_stride]);\
+ tmp2 = vec_ld(15, &block[0][y*src_stride]);\
+\
+ b0 = vec_perm(tmp1,tmp2,align);
+
+#define LOAD_OBMCS \
+ tmp1 = vec_ld(0, obmc1);\
+ align = vec_lvsl(0, obmc1);\
+ tmp2 = vec_ld(15, obmc1);\
+\
+ ob1 = vec_perm(tmp1,tmp2,align);\
+\
+ tmp1 = vec_ld(0, obmc2);\
+ align = vec_lvsl(0, obmc2);\
+ tmp2 = vec_ld(15, obmc2);\
+\
+ ob2 = vec_perm(tmp1,tmp2,align);\
+\
+ tmp1 = vec_ld(0, obmc3);\
+ align = vec_lvsl(0, obmc3);\
+ tmp2 = vec_ld(15, obmc3);\
+\
+ ob3 = vec_perm(tmp1,tmp2,align);\
+\
+ tmp1 = vec_ld(0, obmc4);\
+ align = vec_lvsl(0, obmc4);\
+ tmp2 = vec_ld(15, obmc4);\
+\
+ ob4 = vec_perm(tmp1,tmp2,align);
+
+/* interleave logic
+ * h1 <- [ a,b,a,b, a,b,a,b, a,b,a,b, a,b,a,b ]
+ * h2 <- [ c,d,c,d, c,d,c,d, c,d,c,d, c,d,c,d ]
+ * h <- [ a,b,c,d, a,b,c,d, a,b,c,d, a,b,c,d ]
+ */
+
+#define STEPS_0_1\
+ h1 = (vector unsigned short)\
+ vec_mergeh(ob1, ob2);\
+\
+ h2 = (vector unsigned short)\
+ vec_mergeh(ob3, ob4);\
+\
+ ih = (vector unsigned char)\
+ vec_mergeh(h1,h2);\
+\
+ l1 = (vector unsigned short) vec_mergeh(b3, b2);\
+\
+ ih1 = (vector unsigned char) vec_mergel(h1, h2);\
+\
+ l2 = (vector unsigned short) vec_mergeh(b1, b0);\
+\
+ il = (vector unsigned char) vec_mergeh(l1, l2);\
+\
+ v[0] = (vector signed int) vec_msum(ih, il, vec_splat_u32(0));\
+\
+ il1 = (vector unsigned char) vec_mergel(l1, l2);\
+\
+ v[1] = (vector signed int) vec_msum(ih1, il1, vec_splat_u32(0));
+
+#define FINAL_STEP_SCALAR\
+ for(x=0; x<b_w; x++)\
+ if(add){\
+ vbuf[x] += dst[x + src_x];\
+ vbuf[x] = (vbuf[x] + (1<<(FRAC_BITS-1))) >> FRAC_BITS;\
+ if(vbuf[x]&(~255)) vbuf[x]= ~(vbuf[x]>>31);\
+ dst8[x + y*src_stride] = vbuf[x];\
+ }else{\
+ dst[x + src_x] -= vbuf[x];\
+ }
+
+static void inner_add_yblock_bw_8_obmc_16_altivec(uint8_t *obmc,
+ const int obmc_stride,
+ uint8_t * * block, int b_w,
+ int b_h, int src_x, int src_y,
+ int src_stride, slice_buffer * sb,
+ int add, uint8_t * dst8)
+{
+ int y, x;
+ DWTELEM * dst;
+ vector unsigned short h1, h2, l1, l2;
+ vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+ vector unsigned char b0,b1,b2,b3;
+ vector unsigned char ob1,ob2,ob3,ob4;
+
+ DECLARE_ALIGNED_16(int, vbuf[16]);
+ vector signed int *v = (vector signed int *)vbuf, *d;
+
+ for(y=0; y<b_h; y++){
+ //FIXME ugly missue of obmc_stride
+
+ uint8_t *obmc1= obmc + y*obmc_stride;
+ uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+ uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+ uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+ dst = slice_buffer_get_line(sb, src_y + y);
+ d = (vector signed int *)(dst + src_x);
+
+//FIXME i could avoid some loads!
+
+ // load blocks
+ LOAD_BLOCKS
+
+ // load obmcs
+ LOAD_OBMCS
+
+ // steps 0 1
+ STEPS_0_1
+
+ FINAL_STEP_SCALAR
+
+ }
+
+}
+
+#define STEPS_2_3\
+ h1 = (vector unsigned short) vec_mergel(ob1, ob2);\
+\
+ h2 = (vector unsigned short) vec_mergel(ob3, ob4);\
+\
+ ih = (vector unsigned char) vec_mergeh(h1,h2);\
+\
+ l1 = (vector unsigned short) vec_mergel(b3, b2);\
+\
+ l2 = (vector unsigned short) vec_mergel(b1, b0);\
+\
+ ih1 = (vector unsigned char) vec_mergel(h1,h2);\
+\
+ il = (vector unsigned char) vec_mergeh(l1,l2);\
+\
+ v[2] = (vector signed int) vec_msum(ih, il, vec_splat_u32(0));\
+\
+ il1 = (vector unsigned char) vec_mergel(l1,l2);\
+\
+ v[3] = (vector signed int) vec_msum(ih1, il1, vec_splat_u32(0));
+
+
+static void inner_add_yblock_bw_16_obmc_32_altivec(uint8_t *obmc,
+ const int obmc_stride,
+ uint8_t * * block, int b_w,
+ int b_h, int src_x, int src_y,
+ int src_stride, slice_buffer * sb,
+ int add, uint8_t * dst8)
+{
+ int y, x;
+ DWTELEM * dst;
+ vector unsigned short h1, h2, l1, l2;
+ vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+ vector unsigned char b0,b1,b2,b3;
+ vector unsigned char ob1,ob2,ob3,ob4;
+ DECLARE_ALIGNED_16(int, vbuf[b_w]);
+ vector signed int *v = (vector signed int *)vbuf, *d;
+
+ for(y=0; y<b_h; y++){
+ //FIXME ugly missue of obmc_stride
+
+ uint8_t *obmc1= obmc + y*obmc_stride;
+ uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+ uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+ uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+ dst = slice_buffer_get_line(sb, src_y + y);
+ d = (vector signed int *)(dst + src_x);
+
+ // load blocks
+ LOAD_BLOCKS
+
+ // load obmcs
+ LOAD_OBMCS
+
+ // steps 0 1 2 3
+ STEPS_0_1
+
+ STEPS_2_3
+
+ FINAL_STEP_SCALAR
+
+ }
+}
+
+#define FINAL_STEP_VEC \
+\
+ if(add)\
+ {\
+ for(x=0; x<b_w/4; x++)\
+ {\
+ v[x] = vec_add(v[x], d[x]);\
+ v[x] = vec_sra(vec_add(v[x],\
+ vec_sl( vec_splat_s32(1),\
+ vec_splat_u32(7))),\
+ vec_splat_u32(8));\
+\
+ mask = (vector bool int) vec_sl((vector signed int)\
+ vec_cmpeq(v[x],v[x]),vec_splat_u32(8));\
+ mask = (vector bool int) vec_and(v[x],vec_nor(mask,mask));\
+\
+ mask = (vector bool int)\
+ vec_cmpeq((vector signed int)mask,\
+ (vector signed int)vec_splat_u32(0));\
+\
+ vs = vec_sra(v[x],vec_splat_u32(8));\
+ vs = vec_sra(v[x],vec_splat_u32(8));\
+ vs = vec_sra(v[x],vec_splat_u32(15));\
+\
+ vs = vec_nor(vs,vs);\
+\
+ v[x]= vec_sel(v[x],vs,mask);\
+ }\
+\
+ for(x=0; x<b_w; x++)\
+ dst8[x + y*src_stride] = vbuf[x];\
+\
+ }\
+ else\
+ for(x=0; x<b_w/4; x++)\
+ d[x] = vec_sub(d[x], v[x]);
+
+static void inner_add_yblock_a_bw_8_obmc_16_altivec(uint8_t *obmc,
+ const int obmc_stride,
+ uint8_t * * block, int b_w,
+ int b_h, int src_x, int src_y,
+ int src_stride, slice_buffer * sb,
+ int add, uint8_t * dst8)
+{
+ int y, x;
+ DWTELEM * dst;
+ vector bool int mask;
+ vector signed int vs;
+ vector unsigned short h1, h2, l1, l2;
+ vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+ vector unsigned char b0,b1,b2,b3;
+ vector unsigned char ob1,ob2,ob3,ob4;
+
+ DECLARE_ALIGNED_16(int, vbuf[16]);
+ vector signed int *v = (vector signed int *)vbuf, *d;
+
+ for(y=0; y<b_h; y++){
+ //FIXME ugly missue of obmc_stride
+
+ uint8_t *obmc1= obmc + y*obmc_stride;
+ uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+ uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+ uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+ dst = slice_buffer_get_line(sb, src_y + y);
+ d = (vector signed int *)(dst + src_x);
+
+//FIXME i could avoid some loads!
+
+ // load blocks
+ LOAD_BLOCKS
+
+ // load obmcs
+ LOAD_OBMCS
+
+ // steps 0 1
+ STEPS_0_1
+
+ FINAL_STEP_VEC
+
+ }
+
+}
+
+static void inner_add_yblock_a_bw_16_obmc_32_altivec(uint8_t *obmc,
+ const int obmc_stride,
+ uint8_t * * block, int b_w,
+ int b_h, int src_x, int src_y,
+ int src_stride, slice_buffer * sb,
+ int add, uint8_t * dst8)
+{
+ int y, x;
+ DWTELEM * dst;
+ vector bool int mask;
+ vector signed int vs;
+ vector unsigned short h1, h2, l1, l2;
+ vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+ vector unsigned char b0,b1,b2,b3;
+ vector unsigned char ob1,ob2,ob3,ob4;
+ DECLARE_ALIGNED_16(int, vbuf[b_w]);
+ vector signed int *v = (vector signed int *)vbuf, *d;
+
+ for(y=0; y<b_h; y++){
+ //FIXME ugly missue of obmc_stride
+
+ uint8_t *obmc1= obmc + y*obmc_stride;
+ uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+ uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+ uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+ dst = slice_buffer_get_line(sb, src_y + y);
+ d = (vector signed int *)(dst + src_x);
+
+ // load blocks
+ LOAD_BLOCKS
+
+ // load obmcs
+ LOAD_OBMCS
+
+ // steps 0 1 2 3
+ STEPS_0_1
+
+ STEPS_2_3
+
+ FINAL_STEP_VEC
+
+ }
+}
+
+
+void ff_snow_inner_add_yblock_altivec(uint8_t *obmc, const int obmc_stride,
+ uint8_t * * block, int b_w, int b_h,
+ int src_x, int src_y, int src_stride,
+ slice_buffer * sb, int add,
+ uint8_t * dst8)
+{
+ if (src_x&15) {
+ if (b_w == 16)
+ inner_add_yblock_bw_16_obmc_32_altivec(obmc, obmc_stride, block,
+ b_w, b_h, src_x, src_y,
+ src_stride, sb, add, dst8);
+ else if (b_w == 8)
+ inner_add_yblock_bw_8_obmc_16_altivec(obmc, obmc_stride, block,
+ b_w, b_h, src_x, src_y,
+ src_stride, sb, add, dst8);
+ else
+ ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,
+ src_y, src_stride, sb, add, dst8);
+ } else {
+ if (b_w == 16)
+ inner_add_yblock_a_bw_16_obmc_32_altivec(obmc, obmc_stride, block,
+ b_w, b_h, src_x, src_y,
+ src_stride, sb, add, dst8);
+ else if (b_w == 8)
+ inner_add_yblock_a_bw_8_obmc_16_altivec(obmc, obmc_stride, block,
+ b_w, b_h, src_x, src_y,
+ src_stride, sb, add, dst8);
+ else
+ ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,
+ src_y, src_stride, sb, add, dst8);
+ }
+}
+
+
+void snow_init_altivec(DSPContext* c, AVCodecContext *avctx)
+{
+ c->horizontal_compose97i = ff_snow_horizontal_compose97i_altivec;
+ c->vertical_compose97i = ff_snow_vertical_compose97i_altivec;
+ c->inner_add_yblock = ff_snow_inner_add_yblock_altivec;
+}
diff --git a/src/libffmpeg/libavcodec/ppc/types_altivec.h b/src/libffmpeg/libavcodec/ppc/types_altivec.h
new file mode 100644
index 000000000..f29026e04
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/types_altivec.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2006 Guillaume Poirier <gpoirier@mplayerhq.hu>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/***********************************************************************
+ * Vector types
+ **********************************************************************/
+#define vec_u8_t vector unsigned char
+#define vec_s8_t vector signed char
+#define vec_u16_t vector unsigned short
+#define vec_s16_t vector signed short
+#define vec_u32_t vector unsigned int
+#define vec_s32_t vector signed int
+
+/***********************************************************************
+ * Null vector
+ **********************************************************************/
+#define LOAD_ZERO const vec_u8_t zerov = vec_splat_u8( 0 )
+
+#define zero_u8v (vec_u8_t) zerov
+#define zero_s8v (vec_s8_t) zerov
+#define zero_u16v (vec_u16_t) zerov
+#define zero_s16v (vec_s16_t) zerov
+#define zero_u32v (vec_u32_t) zerov
+#define zero_s32v (vec_s32_t) zerov
diff --git a/src/libffmpeg/libavcodec/ppc/vc1dsp_altivec.c b/src/libffmpeg/libavcodec/ppc/vc1dsp_altivec.c
new file mode 100644
index 000000000..114c9d41f
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ppc/vc1dsp_altivec.c
@@ -0,0 +1,338 @@
+/*
+ * VC-1 and WMV3 decoder - DSP functions AltiVec-optimized
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+
+// main steps of 8x8 transform
+#define STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_rnd) \
+do { \
+ t0 = vec_sl(vec_add(s0, s4), vec_2); \
+ t0 = vec_add(vec_sl(t0, vec_1), t0); \
+ t0 = vec_add(t0, vec_rnd); \
+ t1 = vec_sl(vec_sub(s0, s4), vec_2); \
+ t1 = vec_add(vec_sl(t1, vec_1), t1); \
+ t1 = vec_add(t1, vec_rnd); \
+ t2 = vec_add(vec_sl(s6, vec_2), vec_sl(s6, vec_1)); \
+ t2 = vec_add(t2, vec_sl(s2, vec_4)); \
+ t3 = vec_add(vec_sl(s2, vec_2), vec_sl(s2, vec_1)); \
+ t3 = vec_sub(t3, vec_sl(s6, vec_4)); \
+ t4 = vec_add(t0, t2); \
+ t5 = vec_add(t1, t3); \
+ t6 = vec_sub(t1, t3); \
+ t7 = vec_sub(t0, t2); \
+\
+ t0 = vec_sl(vec_add(s1, s3), vec_4); \
+ t0 = vec_add(t0, vec_sl(s5, vec_3)); \
+ t0 = vec_add(t0, vec_sl(s7, vec_2)); \
+ t0 = vec_add(t0, vec_sub(s5, s3)); \
+\
+ t1 = vec_sl(vec_sub(s1, s5), vec_4); \
+ t1 = vec_sub(t1, vec_sl(s7, vec_3)); \
+ t1 = vec_sub(t1, vec_sl(s3, vec_2)); \
+ t1 = vec_sub(t1, vec_add(s1, s7)); \
+\
+ t2 = vec_sl(vec_sub(s7, s3), vec_4); \
+ t2 = vec_add(t2, vec_sl(s1, vec_3)); \
+ t2 = vec_add(t2, vec_sl(s5, vec_2)); \
+ t2 = vec_add(t2, vec_sub(s1, s7)); \
+\
+ t3 = vec_sl(vec_sub(s5, s7), vec_4); \
+ t3 = vec_sub(t3, vec_sl(s3, vec_3)); \
+ t3 = vec_add(t3, vec_sl(s1, vec_2)); \
+ t3 = vec_sub(t3, vec_add(s3, s5)); \
+\
+ s0 = vec_add(t4, t0); \
+ s1 = vec_add(t5, t1); \
+ s2 = vec_add(t6, t2); \
+ s3 = vec_add(t7, t3); \
+ s4 = vec_sub(t7, t3); \
+ s5 = vec_sub(t6, t2); \
+ s6 = vec_sub(t5, t1); \
+ s7 = vec_sub(t4, t0); \
+}while(0)
+
+#define SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7) \
+do { \
+ s0 = vec_sra(s0, vec_3); \
+ s1 = vec_sra(s1, vec_3); \
+ s2 = vec_sra(s2, vec_3); \
+ s3 = vec_sra(s3, vec_3); \
+ s4 = vec_sra(s4, vec_3); \
+ s5 = vec_sra(s5, vec_3); \
+ s6 = vec_sra(s6, vec_3); \
+ s7 = vec_sra(s7, vec_3); \
+}while(0)
+
+#define SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7) \
+do { \
+ s0 = vec_sra(s0, vec_7); \
+ s1 = vec_sra(s1, vec_7); \
+ s2 = vec_sra(s2, vec_7); \
+ s3 = vec_sra(s3, vec_7); \
+ s4 = vec_sra(vec_add(s4, vec_1s), vec_7); \
+ s5 = vec_sra(vec_add(s5, vec_1s), vec_7); \
+ s6 = vec_sra(vec_add(s6, vec_1s), vec_7); \
+ s7 = vec_sra(vec_add(s7, vec_1s), vec_7); \
+}while(0)
+
+/* main steps of 4x4 transform */
+#define STEP4(s0, s1, s2, s3, vec_rnd) \
+do { \
+ t1 = vec_add(vec_sl(s0, vec_4), s0); \
+ t1 = vec_add(t1, vec_rnd); \
+ t2 = vec_add(vec_sl(s2, vec_4), s2); \
+ t0 = vec_add(t1, t2); \
+ t1 = vec_sub(t1, t2); \
+ t3 = vec_sl(vec_sub(s3, s1), vec_1); \
+ t3 = vec_add(t3, vec_sl(t3, vec_2)); \
+ t2 = vec_add(t3, vec_sl(s1, vec_5)); \
+ t3 = vec_add(t3, vec_sl(s3, vec_3)); \
+ t3 = vec_add(t3, vec_sl(s3, vec_2)); \
+ s0 = vec_add(t0, t2); \
+ s1 = vec_sub(t1, t3); \
+ s2 = vec_add(t1, t3); \
+ s3 = vec_sub(t0, t2); \
+}while (0)
+
+#define SHIFT_HOR4(s0, s1, s2, s3) \
+ s0 = vec_sra(s0, vec_3); \
+ s1 = vec_sra(s1, vec_3); \
+ s2 = vec_sra(s2, vec_3); \
+ s3 = vec_sra(s3, vec_3);
+
+#define SHIFT_VERT4(s0, s1, s2, s3) \
+ s0 = vec_sra(s0, vec_7); \
+ s1 = vec_sra(s1, vec_7); \
+ s2 = vec_sra(s2, vec_7); \
+ s3 = vec_sra(s3, vec_7);
+
+/** Do inverse transform on 8x8 block
+*/
+static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
+{
+ vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
+ vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
+ vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
+ vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
+ const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));
+ const vector unsigned int vec_7 = vec_splat_u32(7);
+ const vector unsigned int vec_5 = vec_splat_u32(5);
+ const vector unsigned int vec_4 = vec_splat_u32(4);
+ const vector signed int vec_4s = vec_splat_s32(4);
+ const vector unsigned int vec_3 = vec_splat_u32(3);
+ const vector unsigned int vec_2 = vec_splat_u32(2);
+ const vector signed int vec_1s = vec_splat_s32(1);
+ const vector unsigned int vec_1 = vec_splat_u32(1);
+
+
+ src0 = vec_ld( 0, block);
+ src1 = vec_ld( 16, block);
+ src2 = vec_ld( 32, block);
+ src3 = vec_ld( 48, block);
+ src4 = vec_ld( 64, block);
+ src5 = vec_ld( 80, block);
+ src6 = vec_ld( 96, block);
+ src7 = vec_ld(112, block);
+
+ TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+ s0 = vec_unpackl(src0);
+ s1 = vec_unpackl(src1);
+ s2 = vec_unpackl(src2);
+ s3 = vec_unpackl(src3);
+ s4 = vec_unpackl(src4);
+ s5 = vec_unpackl(src5);
+ s6 = vec_unpackl(src6);
+ s7 = vec_unpackl(src7);
+ s8 = vec_unpackh(src0);
+ s9 = vec_unpackh(src1);
+ sA = vec_unpackh(src2);
+ sB = vec_unpackh(src3);
+ sC = vec_unpackh(src4);
+ sD = vec_unpackh(src5);
+ sE = vec_unpackh(src6);
+ sF = vec_unpackh(src7);
+ STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
+ SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
+ STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
+ SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
+ src0 = vec_pack(s8, s0);
+ src1 = vec_pack(s9, s1);
+ src2 = vec_pack(sA, s2);
+ src3 = vec_pack(sB, s3);
+ src4 = vec_pack(sC, s4);
+ src5 = vec_pack(sD, s5);
+ src6 = vec_pack(sE, s6);
+ src7 = vec_pack(sF, s7);
+ TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+
+ s0 = vec_unpackl(src0);
+ s1 = vec_unpackl(src1);
+ s2 = vec_unpackl(src2);
+ s3 = vec_unpackl(src3);
+ s4 = vec_unpackl(src4);
+ s5 = vec_unpackl(src5);
+ s6 = vec_unpackl(src6);
+ s7 = vec_unpackl(src7);
+ s8 = vec_unpackh(src0);
+ s9 = vec_unpackh(src1);
+ sA = vec_unpackh(src2);
+ sB = vec_unpackh(src3);
+ sC = vec_unpackh(src4);
+ sD = vec_unpackh(src5);
+ sE = vec_unpackh(src6);
+ sF = vec_unpackh(src7);
+ STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_64);
+ SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7);
+ STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_64);
+ SHIFT_VERT8(s8, s9, sA, sB, sC, sD, sE, sF);
+ src0 = vec_pack(s8, s0);
+ src1 = vec_pack(s9, s1);
+ src2 = vec_pack(sA, s2);
+ src3 = vec_pack(sB, s3);
+ src4 = vec_pack(sC, s4);
+ src5 = vec_pack(sD, s5);
+ src6 = vec_pack(sE, s6);
+ src7 = vec_pack(sF, s7);
+
+ vec_st(src0, 0, block);
+ vec_st(src1, 16, block);
+ vec_st(src2, 32, block);
+ vec_st(src3, 48, block);
+ vec_st(src4, 64, block);
+ vec_st(src5, 80, block);
+ vec_st(src6, 96, block);
+ vec_st(src7,112, block);
+}
+
+/** Do inverse transform on 8x4 part of block
+*/
+static void vc1_inv_trans_8x4_altivec(DCTELEM block[64], int n)
+{
+ vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
+ vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
+ vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
+ vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
+ const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));
+ const vector unsigned int vec_7 = vec_splat_u32(7);
+ const vector unsigned int vec_5 = vec_splat_u32(5);
+ const vector unsigned int vec_4 = vec_splat_u32(4);
+ const vector signed int vec_4s = vec_splat_s32(4);
+ const vector unsigned int vec_3 = vec_splat_u32(3);
+ const vector unsigned int vec_2 = vec_splat_u32(2);
+ const vector unsigned int vec_1 = vec_splat_u32(1);
+
+ src0 = vec_ld( 0, block);
+ src1 = vec_ld( 16, block);
+ src2 = vec_ld( 32, block);
+ src3 = vec_ld( 48, block);
+ src4 = vec_ld( 64, block);
+ src5 = vec_ld( 80, block);
+ src6 = vec_ld( 96, block);
+ src7 = vec_ld(112, block);
+
+ TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+ s0 = vec_unpackl(src0);
+ s1 = vec_unpackl(src1);
+ s2 = vec_unpackl(src2);
+ s3 = vec_unpackl(src3);
+ s4 = vec_unpackl(src4);
+ s5 = vec_unpackl(src5);
+ s6 = vec_unpackl(src6);
+ s7 = vec_unpackl(src7);
+ s8 = vec_unpackh(src0);
+ s9 = vec_unpackh(src1);
+ sA = vec_unpackh(src2);
+ sB = vec_unpackh(src3);
+ sC = vec_unpackh(src4);
+ sD = vec_unpackh(src5);
+ sE = vec_unpackh(src6);
+ sF = vec_unpackh(src7);
+ STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
+ SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
+ STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
+ SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
+ src0 = vec_pack(s8, s0);
+ src1 = vec_pack(s9, s1);
+ src2 = vec_pack(sA, s2);
+ src3 = vec_pack(sB, s3);
+ src4 = vec_pack(sC, s4);
+ src5 = vec_pack(sD, s5);
+ src6 = vec_pack(sE, s6);
+ src7 = vec_pack(sF, s7);
+ TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+
+ if(!n){ // upper half of block
+ s0 = vec_unpackh(src0);
+ s1 = vec_unpackh(src1);
+ s2 = vec_unpackh(src2);
+ s3 = vec_unpackh(src3);
+ s8 = vec_unpackl(src0);
+ s9 = vec_unpackl(src1);
+ sA = vec_unpackl(src2);
+ sB = vec_unpackl(src3);
+ STEP4(s0, s1, s2, s3, vec_64);
+ SHIFT_VERT4(s0, s1, s2, s3);
+ STEP4(s8, s9, sA, sB, vec_64);
+ SHIFT_VERT4(s8, s9, sA, sB);
+ src0 = vec_pack(s0, s8);
+ src1 = vec_pack(s1, s9);
+ src2 = vec_pack(s2, sA);
+ src3 = vec_pack(s3, sB);
+
+ vec_st(src0, 0, block);
+ vec_st(src1, 16, block);
+ vec_st(src2, 32, block);
+ vec_st(src3, 48, block);
+ } else { //lower half of block
+ s0 = vec_unpackh(src4);
+ s1 = vec_unpackh(src5);
+ s2 = vec_unpackh(src6);
+ s3 = vec_unpackh(src7);
+ s8 = vec_unpackl(src4);
+ s9 = vec_unpackl(src5);
+ sA = vec_unpackl(src6);
+ sB = vec_unpackl(src7);
+ STEP4(s0, s1, s2, s3, vec_64);
+ SHIFT_VERT4(s0, s1, s2, s3);
+ STEP4(s8, s9, sA, sB, vec_64);
+ SHIFT_VERT4(s8, s9, sA, sB);
+ src4 = vec_pack(s0, s8);
+ src5 = vec_pack(s1, s9);
+ src6 = vec_pack(s2, sA);
+ src7 = vec_pack(s3, sB);
+
+ vec_st(src4, 64, block);
+ vec_st(src5, 80, block);
+ vec_st(src6, 96, block);
+ vec_st(src7,112, block);
+ }
+}
+
+
+void vc1dsp_init_altivec(DSPContext* dsp, AVCodecContext *avctx) {
+ dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec;
+ dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec;
+}
diff --git a/src/libffmpeg/libavcodec/smacker.c b/src/libffmpeg/libavcodec/smacker.c
index 2f2185848..2e1784075 100644
--- a/src/libffmpeg/libavcodec/smacker.c
+++ b/src/libffmpeg/libavcodec/smacker.c
@@ -320,12 +320,12 @@ static int decode_header_trees(SmackVContext *smk) {
return 0;
}
-static always_inline void last_reset(int *recode, int *last) {
+static av_always_inline void last_reset(int *recode, int *last) {
recode[last[0]] = recode[last[1]] = recode[last[2]] = 0;
}
/* get code and update history */
-static always_inline int smk_get_code(GetBitContext *gb, int *recode, int *last) {
+static av_always_inline int smk_get_code(GetBitContext *gb, int *recode, int *last) {
register int *table = recode;
int v, b;
diff --git a/src/libffmpeg/libavcodec/snow.c b/src/libffmpeg/libavcodec/snow.c
index 346d56861..5e93d40a1 100644
--- a/src/libffmpeg/libavcodec/snow.c
+++ b/src/libffmpeg/libavcodec/snow.c
@@ -439,6 +439,7 @@ typedef struct SnowContext{
int always_reset;
int version;
int spatial_decomposition_type;
+ int last_spatial_decomposition_type;
int temporal_decomposition_type;
int spatial_decomposition_count;
int temporal_decomposition_count;
@@ -452,15 +453,19 @@ typedef struct SnowContext{
int chroma_v_shift;
int spatial_scalability;
int qlog;
+ int last_qlog;
int lambda;
int lambda2;
int pass1_rc;
int mv_scale;
+ int last_mv_scale;
int qbias;
+ int last_qbias;
#define QBIAS_SHIFT 3
int b_width;
int b_height;
int block_max_depth;
+ int last_block_max_depth;
Plane plane[MAX_PLANES];
BlockNode *block;
#define ME_CACHE_SIZE 1024
@@ -709,7 +714,7 @@ static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
return v;
}
-static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
const int mirror_left= !highpass;
const int mirror_right= (width&1) ^ highpass;
const int w= (width>>1) - 1 + (highpass & width);
@@ -732,7 +737,7 @@ static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst
}
#ifndef lift5
-static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+static av_always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
const int mirror_left= !highpass;
const int mirror_right= (width&1) ^ highpass;
const int w= (width>>1) - 1 + (highpass & width);
@@ -764,7 +769,7 @@ static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int ds
#endif
#ifndef liftS
-static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
const int mirror_left= !highpass;
const int mirror_right= (width&1) ^ highpass;
const int w= (width>>1) - 1 + (highpass & width);
@@ -1849,7 +1854,7 @@ static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, sli
return;
}
-static void reset_contexts(SnowContext *s){
+static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
int plane_index, level, orientation;
for(plane_index=0; plane_index<3; plane_index++){
@@ -2208,7 +2213,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
}
#endif
-static always_inline int same_block(BlockNode *a, BlockNode *b){
+static av_always_inline int same_block(BlockNode *a, BlockNode *b){
if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
}else{
@@ -2287,12 +2292,10 @@ static void decode_q_branch(SnowContext *s, int level, int x, int y){
}
if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
- int type;
+ int type, mx, my;
int l = left->color[0];
int cb= left->color[1];
int cr= left->color[2];
- int mx= mid_pred(left->mx, top->mx, tr->mx);
- int my= mid_pred(left->my, top->my, tr->my);
int ref = 0;
int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
@@ -2557,7 +2560,7 @@ void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * *
}
//FIXME name clenup (b_w, block_w, b_width stuff)
-static always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
+static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
const int b_width = s->b_width << s->block_max_depth;
const int b_height= s->b_height << s->block_max_depth;
const int b_stride= b_width;
@@ -2716,7 +2719,7 @@ assert(src_stride > 2*MB_SIZE + 5);
#endif
}
-static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
+static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
Plane *p= &s->plane[plane_index];
const int mb_w= s->b_width << s->block_max_depth;
const int mb_h= s->b_height << s->block_max_depth;
@@ -2783,7 +2786,7 @@ static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer *
STOP_TIMER("predict_slice")
}
-static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
+static av_always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
Plane *p= &s->plane[plane_index];
const int mb_w= s->b_width << s->block_max_depth;
const int mb_h= s->b_height << s->block_max_depth;
@@ -2840,7 +2843,7 @@ static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_
STOP_TIMER("predict_slice")
}
-static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
+static av_always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
const int mb_h= s->b_height << s->block_max_depth;
int mb_y;
for(mb_y=0; mb_y<=mb_h; mb_y++)
@@ -3098,7 +3101,7 @@ static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
return distortion + rate*penalty_factor;
}
-static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
+static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
const int b_stride= s->b_width << s->block_max_depth;
BlockNode *block= &s->block[mb_x + mb_y * b_stride];
BlockNode backup= *block;
@@ -3137,12 +3140,12 @@ static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3
}
/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
-static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
+static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
int p[2] = {p0, p1};
return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
}
-static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
+static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
const int b_stride= s->b_width << s->block_max_depth;
BlockNode *block= &s->block[mb_x + mb_y * b_stride];
BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
@@ -3607,8 +3610,14 @@ static void encode_header(SnowContext *s){
memset(kstate, MID_STATE, sizeof(kstate));
put_rac(&s->c, kstate, s->keyframe);
- if(s->keyframe || s->always_reset)
+ if(s->keyframe || s->always_reset){
reset_contexts(s);
+ s->last_spatial_decomposition_type=
+ s->last_qlog=
+ s->last_qbias=
+ s->last_mv_scale=
+ s->last_block_max_depth= 0;
+ }
if(s->keyframe){
put_symbol(&s->c, s->header_state, s->version, 0);
put_rac(&s->c, s->header_state, s->always_reset);
@@ -3631,11 +3640,17 @@ static void encode_header(SnowContext *s){
}
}
}
- put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
- put_symbol(&s->c, s->header_state, s->qlog, 1);
- put_symbol(&s->c, s->header_state, s->mv_scale, 0);
- put_symbol(&s->c, s->header_state, s->qbias, 1);
- put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
+ put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
+ put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
+ put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
+ put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
+ put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
+
+ s->last_spatial_decomposition_type= s->spatial_decomposition_type;
+ s->last_qlog = s->qlog;
+ s->last_qbias = s->qbias;
+ s->last_mv_scale = s->mv_scale;
+ s->last_block_max_depth = s->block_max_depth;
}
static int decode_header(SnowContext *s){
@@ -3645,8 +3660,14 @@ static int decode_header(SnowContext *s){
memset(kstate, MID_STATE, sizeof(kstate));
s->keyframe= get_rac(&s->c, kstate);
- if(s->keyframe || s->always_reset)
+ if(s->keyframe || s->always_reset){
reset_contexts(s);
+ s->spatial_decomposition_type=
+ s->qlog=
+ s->qbias=
+ s->mv_scale=
+ s->block_max_depth= 0;
+ }
if(s->keyframe){
s->version= get_symbol(&s->c, s->header_state, 0);
if(s->version>0){
@@ -3677,16 +3698,16 @@ static int decode_header(SnowContext *s){
}
}
- s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
+ s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
if(s->spatial_decomposition_type > 2){
av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
return -1;
}
- s->qlog= get_symbol(&s->c, s->header_state, 1);
- s->mv_scale= get_symbol(&s->c, s->header_state, 0);
- s->qbias= get_symbol(&s->c, s->header_state, 1);
- s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
+ s->qlog += get_symbol(&s->c, s->header_state, 1);
+ s->mv_scale += get_symbol(&s->c, s->header_state, 1);
+ s->qbias += get_symbol(&s->c, s->header_state, 1);
+ s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
if(s->block_max_depth > 1 || s->block_max_depth < 0){
av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
s->block_max_depth= 0;
@@ -4177,7 +4198,6 @@ redo_frame:
pict->pict_type= FF_I_TYPE;
s->keyframe=1;
s->current_picture.key_frame=1;
- reset_contexts(s);
goto redo_frame;
}
diff --git a/src/libffmpeg/libavcodec/snow.h b/src/libffmpeg/libavcodec/snow.h
index f7cee131a..6794d2c5a 100644
--- a/src/libffmpeg/libavcodec/snow.h
+++ b/src/libffmpeg/libavcodec/snow.h
@@ -137,7 +137,7 @@ static int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
/* C bits used by mmx/sse2/altivec */
-static always_inline void snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){
+static av_always_inline void snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){
(*i) = (width) - 2;
if (width & 1){
@@ -146,14 +146,14 @@ static always_inline void snow_interleave_line_header(int * i, int width, DWTELE
}
}
-static always_inline void snow_interleave_line_footer(int * i, DWTELEM * low, DWTELEM * high){
+static av_always_inline void snow_interleave_line_footer(int * i, DWTELEM * low, DWTELEM * high){
for (; (*i)>=0; (*i)-=2){
low[(*i)+1] = high[(*i)>>1];
low[*i] = low[(*i)>>1];
}
}
-static always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){
+static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){
for(; i<w; i++){
dst[i] = src[i] - ((mul * (ref[i] + ref[i + 1]) + add) >> shift);
}
@@ -163,7 +163,7 @@ static always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM *
}
}
-static always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){
+static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){
for(; i<w; i++){
dst[i] = src[i] - (((-(ref[i] + ref[(i+1)])+W_BO) - 4 * src[i]) >> W_BS);
}
diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c
index c3661dda7..36dcc7746 100644
--- a/src/libffmpeg/libavcodec/utils.c
+++ b/src/libffmpeg/libavcodec/utils.c
@@ -421,7 +421,7 @@ static const char* context_to_name(void* ptr) {
static const AVOption options[]={
{"b", "set video bitrate (in bits/s)", OFFSET(bit_rate), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE, INT_MIN, INT_MAX, V|A|E},
-{"bt", "set video bitrate tolerance (in bits/s)", OFFSET(bit_rate_tolerance), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE*20, INT_MIN, INT_MAX, V|E},
+{"bt", "set video bitrate tolerance (in bits/s)", OFFSET(bit_rate_tolerance), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE*20, 1, INT_MAX, V|E},
{"flags", NULL, OFFSET(flags), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|A|E|D, "flags"},
{"mv4", "use four motion vector by macroblock (mpeg4)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_4MV, INT_MIN, INT_MAX, V|E, "flags"},
{"obmc", "use overlapped block motion compensation (h263+)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_OBMC, INT_MIN, INT_MAX, V|E, "flags"},
@@ -464,7 +464,7 @@ static const AVOption options[]={
{"extradata_size", NULL, OFFSET(extradata_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
{"time_base", NULL, OFFSET(time_base), FF_OPT_TYPE_RATIONAL, DEFAULT, INT_MIN, INT_MAX},
{"g", "set the group of picture size", OFFSET(gop_size), FF_OPT_TYPE_INT, 12, INT_MIN, INT_MAX, V|E},
-{"rate_emu", NULL, OFFSET(rate_emu), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"rate_emu", "frame rate emulation", OFFSET(rate_emu), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
{"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
{"ac", "set number of audio channels", OFFSET(channels), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
{"cutoff", "set cutoff bandwidth", OFFSET(cutoff), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|E},
@@ -509,15 +509,15 @@ static const AVOption options[]={
{"edge", "edge padding bug (autodetected per fourcc/version)", 0, FF_OPT_TYPE_CONST, FF_BUG_EDGE, INT_MIN, INT_MAX, V|D, "bug"},
{"hpel_chroma", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_HPEL_CHROMA, INT_MIN, INT_MAX, V|D, "bug"},
{"dc_clip", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_DC_CLIP, INT_MIN, INT_MAX, V|D, "bug"},
-{"ms", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_MS, INT_MIN, INT_MAX, V|D, "bug"},
+{"ms", "workaround various bugs in microsofts broken decoders", 0, FF_OPT_TYPE_CONST, FF_BUG_MS, INT_MIN, INT_MAX, V|D, "bug"},
{"lelim", "single coefficient elimination threshold for luminance (negative values also consider dc coefficient)", OFFSET(luma_elim_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
{"celim", "single coefficient elimination threshold for chrominance (negative values also consider dc coefficient)", OFFSET(chroma_elim_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
{"strict", "how strictly to follow the standards", OFFSET(strict_std_compliance), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "strict"},
-{"very", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_VERY_STRICT, INT_MIN, INT_MAX, V|E, "strict"},
-{"strict", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_STRICT, INT_MIN, INT_MAX, V|E, "strict"},
+{"very", "strictly conform to a older more strict version of the spec or reference software", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_VERY_STRICT, INT_MIN, INT_MAX, V|E, "strict"},
+{"strict", "strictly conform to all the things in the spec no matter what consequences", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_STRICT, INT_MIN, INT_MAX, V|E, "strict"},
{"normal", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_NORMAL, INT_MIN, INT_MAX, V|E, "strict"},
-{"inofficial", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_INOFFICIAL, INT_MIN, INT_MAX, V|E, "strict"},
-{"experimental", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_EXPERIMENTAL, INT_MIN, INT_MAX, V|E, "strict"},
+{"inofficial", "allow inofficial extensions", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_INOFFICIAL, INT_MIN, INT_MAX, V|E, "strict"},
+{"experimental", "allow non standarized experimental things", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_EXPERIMENTAL, INT_MIN, INT_MAX, V|E, "strict"},
{"b_qoffset", "qp offset between p and b frames", OFFSET(b_quant_offset), FF_OPT_TYPE_FLOAT, 1.25, FLT_MIN, FLT_MAX, V|E},
{"er", "set error resilience strategy", OFFSET(error_resilience), FF_OPT_TYPE_INT, FF_ER_CAREFUL, INT_MIN, INT_MAX, V|D, "er"},
{"careful", NULL, 0, FF_OPT_TYPE_CONST, FF_ER_CAREFUL, INT_MIN, INT_MAX, V|D, "er"},
@@ -549,14 +549,14 @@ static const AVOption options[]={
{"mmx", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_MMX, INT_MIN, INT_MAX, V|E, "dct"},
{"mlib", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_MLIB, INT_MIN, INT_MAX, V|E, "dct"},
{"altivec", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_ALTIVEC, INT_MIN, INT_MAX, V|E, "dct"},
-{"faan", "floating point AAN", 0, FF_OPT_TYPE_CONST, FF_DCT_FAAN, INT_MIN, INT_MAX, V|E, "dct"},
+{"faan", "floating point AAN DCT", 0, FF_OPT_TYPE_CONST, FF_DCT_FAAN, INT_MIN, INT_MAX, V|E, "dct"},
{"lumi_mask", "compresses bright areas stronger than medium ones", OFFSET(lumi_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
{"tcplx_mask", "temporal complexity masking", OFFSET(temporal_cplx_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
{"scplx_mask", "spatial complexity masking", OFFSET(spatial_cplx_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
{"p_mask", "inter masking", OFFSET(p_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
{"dark_mask", "compresses dark areas stronger than medium ones", OFFSET(dark_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
{"unused", NULL, OFFSET(unused), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"idct", "use interlaced DCT", OFFSET(idct_algo), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, V|E|D, "idct"},
+{"idct", "select IDCT implementation", OFFSET(idct_algo), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, V|E|D, "idct"},
{"auto", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_AUTO, INT_MIN, INT_MAX, V|E|D, "idct"},
{"int", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_INT, INT_MIN, INT_MAX, V|E|D, "idct"},
{"simple", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLE, INT_MIN, INT_MAX, V|E|D, "idct"},
@@ -582,7 +582,7 @@ static const AVOption options[]={
{"left", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_LEFT, INT_MIN, INT_MAX, V|E, "pred"},
{"plane", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_PLANE, INT_MIN, INT_MAX, V|E, "pred"},
{"median", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_MEDIAN, INT_MIN, INT_MAX, V|E, "pred"},
-{"aspect", NULL, OFFSET(sample_aspect_ratio), FF_OPT_TYPE_RATIONAL, DEFAULT, 0, 10, V|E},
+{"aspect", "sample aspect ratio", OFFSET(sample_aspect_ratio), FF_OPT_TYPE_RATIONAL, DEFAULT, 0, 10, V|E},
{"debug", "print specific debug info", OFFSET(debug), FF_OPT_TYPE_FLAGS, DEFAULT, 0, INT_MAX, V|A|S|E|D, "debug"},
{"pict", "picture info", 0, FF_OPT_TYPE_CONST, FF_DEBUG_PICT_INFO, INT_MIN, INT_MAX, V|D, "debug"},
{"rc", "rate control", 0, FF_OPT_TYPE_CONST, FF_DEBUG_RC, INT_MIN, INT_MAX, V|E, "debug"},
@@ -603,8 +603,8 @@ static const AVOption options[]={
{"pf", "forward predicted MVs of P-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_P_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"},
{"bf", "forward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"},
{"bb", "backward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_BACK, INT_MIN, INT_MAX, V|D, "debug_mv"},
-{"mb_qmin", "obsolete, use vqmin", OFFSET(mb_qmin), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"mb_qmax", "obsolete, use vqmax", OFFSET(mb_qmax), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"mb_qmin", "obsolete, use qmin", OFFSET(mb_qmin), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"mb_qmax", "obsolete, use qmax", OFFSET(mb_qmax), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
{"cmp", "full pel me compare function", OFFSET(me_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
{"subcmp", "sub pel me compare function", OFFSET(me_sub_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
{"mbcmp", "macroblock compare function", OFFSET(mb_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
@@ -654,11 +654,11 @@ static const AVOption options[]={
{"lmin", "min lagrange factor (VBR)", OFFSET(lmin), FF_OPT_TYPE_INT, 2*FF_QP2LAMBDA, 0, INT_MAX, V|E},
{"lmax", "max lagrange factor (VBR)", OFFSET(lmax), FF_OPT_TYPE_INT, 31*FF_QP2LAMBDA, 0, INT_MAX, V|E},
{"nr", "noise reduction", OFFSET(noise_reduction), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"rc_init_occupancy", NULL, OFFSET(rc_initial_buffer_occupancy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"rc_init_occupancy", "number of bits which should be loaded into the rc buffer before decoding starts", OFFSET(rc_initial_buffer_occupancy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
{"inter_threshold", NULL, OFFSET(inter_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
{"flags2", NULL, OFFSET(flags2), FF_OPT_TYPE_FLAGS, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|A|E|D, "flags2"},
{"error", NULL, OFFSET(error_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"antialias", NULL, OFFSET(antialias_algo), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D, "aa"},
+{"antialias", "MP3 antialias algorithm", OFFSET(antialias_algo), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D, "aa"},
{"auto", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_AUTO, INT_MIN, INT_MAX, V|D, "aa"},
{"fastint", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_FASTINT, INT_MIN, INT_MAX, V|D, "aa"},
{"int", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_INT, INT_MIN, INT_MAX, V|D, "aa"},
@@ -669,8 +669,8 @@ static const AVOption options[]={
{"mb_threshold", "macroblock threshold", OFFSET(mb_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
{"dc", "intra_dc_precision", OFFSET(intra_dc_precision), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
{"nssew", "nsse weight", OFFSET(nsse_weight), FF_OPT_TYPE_INT, 8, INT_MIN, INT_MAX, V|E},
-{"skip_top", NULL, OFFSET(skip_top), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
-{"skip_bottom", NULL, OFFSET(skip_bottom), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
+{"skip_top", "number of macroblock rows at the top which are skipped", OFFSET(skip_top), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
+{"skip_bottom", "number of macroblock rows at the bottom which are skipped", OFFSET(skip_bottom), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
{"profile", NULL, OFFSET(profile), FF_OPT_TYPE_INT, FF_PROFILE_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "profile"},
{"unknown", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "profile"},
{"level", NULL, OFFSET(level), FF_OPT_TYPE_INT, FF_LEVEL_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "level"},
@@ -687,42 +687,43 @@ static const AVOption options[]={
{"bidir_refine", "refine the two motion vectors used in bidirectional macroblocks", OFFSET(bidir_refine), FF_OPT_TYPE_INT, DEFAULT, 0, 4, V|E},
{"brd_scale", "downscales frames for dynamic B-frame decision", OFFSET(brd_scale), FF_OPT_TYPE_INT, DEFAULT, 0, 10, V|E},
{"crf", "enables constant quality mode, and selects the quality (x264)", OFFSET(crf), FF_OPT_TYPE_FLOAT, DEFAULT, 0, 51, V|E},
-{"cqp", NULL, OFFSET(cqp), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, V|E},
+{"cqp", "constant quantization parameter rate control method", OFFSET(cqp), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, V|E},
{"keyint_min", "minimum interval between IDR-frames (x264)", OFFSET(keyint_min), FF_OPT_TYPE_INT, 25, INT_MIN, INT_MAX, V|E},
{"refs", "reference frames to consider for motion compensation (Snow)", OFFSET(refs), FF_OPT_TYPE_INT, 1, INT_MIN, INT_MAX, V|E},
-{"chromaoffset", NULL, OFFSET(chromaoffset), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"bframebias", NULL, OFFSET(bframebias), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"chromaoffset", "chroma qp offset from luma", OFFSET(chromaoffset), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"bframebias", "influences how often B-frames are used", OFFSET(bframebias), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
{"trellis", "rate-distortion optimal quantization", OFFSET(trellis), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|A|E},
-{"directpred", NULL, OFFSET(directpred), FF_OPT_TYPE_INT, 2, INT_MIN, INT_MAX, V|E},
+{"directpred", "direct mv prediction mode - 0 (none), 1 (spatial), 2 (temporal)", OFFSET(directpred), FF_OPT_TYPE_INT, 2, INT_MIN, INT_MAX, V|E},
{"bpyramid", "allows B-frames to be used as references for predicting", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BPYRAMID, INT_MIN, INT_MAX, V|E, "flags2"},
-{"wpred", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_WPRED, INT_MIN, INT_MAX, V|E, "flags2"},
-{"mixed_refs", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_MIXED_REFS, INT_MIN, INT_MAX, V|E, "flags2"},
-{"8x8dct", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_8X8DCT, INT_MIN, INT_MAX, V|E, "flags2"},
-{"fastpskip", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|E, "flags2"},
-{"aud", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_AUD, INT_MIN, INT_MAX, V|E, "flags2"},
-{"brdo", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BRDO, INT_MIN, INT_MAX, V|E, "flags2"},
-{"complexityblur", NULL, OFFSET(complexityblur), FF_OPT_TYPE_FLOAT, 20.0, FLT_MIN, FLT_MAX, V|E},
-{"deblockalpha", NULL, OFFSET(deblockalpha), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"deblockbeta", NULL, OFFSET(deblockbeta), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"partitions", NULL, OFFSET(partitions), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|E, "partitions"},
+{"wpred", "weighted biprediction for b-frames (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_WPRED, INT_MIN, INT_MAX, V|E, "flags2"},
+{"mixed_refs", "one reference per partition, as opposed to one reference per macroblock", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_MIXED_REFS, INT_MIN, INT_MAX, V|E, "flags2"},
+{"8x8dct", "high profile 8x8 transform (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_8X8DCT, INT_MIN, INT_MAX, V|E, "flags2"},
+{"fastpskip", "fast pskip (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|E, "flags2"},
+{"aud", "access unit delimiters (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_AUD, INT_MIN, INT_MAX, V|E, "flags2"},
+{"brdo", "b-frame rate-distortion optimization", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BRDO, INT_MIN, INT_MAX, V|E, "flags2"},
+{"skiprd", "RD optimal MB level residual skiping", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_SKIP_RD, INT_MIN, INT_MAX, V|E, "flags2"},
+{"complexityblur", "reduce fluctuations in qp (before curve compression)", OFFSET(complexityblur), FF_OPT_TYPE_FLOAT, 20.0, FLT_MIN, FLT_MAX, V|E},
+{"deblockalpha", "in-loop deblocking filter alphac0 parameter", OFFSET(deblockalpha), FF_OPT_TYPE_INT, DEFAULT, -6, 6, V|E},
+{"deblockbeta", "in-loop deblocking filter beta parameter", OFFSET(deblockbeta), FF_OPT_TYPE_INT, DEFAULT, -6, 6, V|E},
+{"partitions", "macroblock subpartition sizes to consider", OFFSET(partitions), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|E, "partitions"},
{"parti4x4", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_I4X4, INT_MIN, INT_MAX, V|E, "partitions"},
{"parti8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_I8X8, INT_MIN, INT_MAX, V|E, "partitions"},
{"partp4x4", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_P4X4, INT_MIN, INT_MAX, V|E, "partitions"},
{"partp8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_P8X8, INT_MIN, INT_MAX, V|E, "partitions"},
{"partb8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_B8X8, INT_MIN, INT_MAX, V|E, "partitions"},
-{"sc_factor", NULL, OFFSET(scenechange_factor), FF_OPT_TYPE_INT, 6, 0, INT_MAX, V|E},
+{"sc_factor", "multiplied by qscale for each frame and added to scene_change_score", OFFSET(scenechange_factor), FF_OPT_TYPE_INT, 6, 0, INT_MAX, V|E},
{"mv0_threshold", NULL, OFFSET(mv0_threshold), FF_OPT_TYPE_INT, 256, 0, INT_MAX, V|E},
{"ivlc", "intra vlc table", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_INTRA_VLC, INT_MIN, INT_MAX, V|E, "flags2"},
-{"b_sensitivity", NULL, OFFSET(b_sensitivity), FF_OPT_TYPE_INT, 40, 1, INT_MAX, V|E},
+{"b_sensitivity", "adjusts sensitivity of b_frame_strategy 1", OFFSET(b_sensitivity), FF_OPT_TYPE_INT, 40, 1, INT_MAX, V|E},
{"compression_level", NULL, OFFSET(compression_level), FF_OPT_TYPE_INT, FF_COMPRESSION_DEFAULT, INT_MIN, INT_MAX, V|A|E},
-{"use_lpc", NULL, OFFSET(use_lpc), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
-{"lpc_coeff_precision", NULL, OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, A|E},
+{"use_lpc", "sets whether to use LPC mode (FLAC)", OFFSET(use_lpc), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"lpc_coeff_precision", "LPC coefficient precision (FLAC)", OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, A|E},
{"min_prediction_order", NULL, OFFSET(min_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
{"max_prediction_order", NULL, OFFSET(max_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
-{"prediction_order_method", NULL, OFFSET(prediction_order_method), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"prediction_order_method", "search method for selecting prediction order", OFFSET(prediction_order_method), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
{"min_partition_order", NULL, OFFSET(min_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
{"max_partition_order", NULL, OFFSET(max_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
-{"timecode_frame_start", NULL, OFFSET(timecode_frame_start), FF_OPT_TYPE_INT, 0, 0, INT_MAX, V|E},
+{"timecode_frame_start", "GOP timecode frame start number, in non drop frame format", OFFSET(timecode_frame_start), FF_OPT_TYPE_INT, 0, 0, INT_MAX, V|E},
{"drop_frame_timecode", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_DROP_FRAME_TIMECODE, INT_MIN, INT_MAX, V|E, "flags2"},
{NULL},
};
diff --git a/src/libffmpeg/libavcodec/vc1.c b/src/libffmpeg/libavcodec/vc1.c
index 7b385ca47..231f3ca26 100644
--- a/src/libffmpeg/libavcodec/vc1.c
+++ b/src/libffmpeg/libavcodec/vc1.c
@@ -2140,7 +2140,7 @@ static void vc1_interp_mc(VC1Context *v)
dsp->avg_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
}
-static always_inline int scale_mv(int value, int bfrac, int inv, int qs)
+static av_always_inline int scale_mv(int value, int bfrac, int inv, int qs)
{
int n = bfrac;
@@ -3072,8 +3072,8 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c
ac_val -= 16 * s->block_wrap[n];
q1 = s->current_picture.qscale_table[mb_pos];
- if(dc_pred_dir && c_avail) q2 = s->current_picture.qscale_table[mb_pos - 1];
- if(!dc_pred_dir && a_avail) q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride];
+ if(dc_pred_dir && c_avail && mb_pos) q2 = s->current_picture.qscale_table[mb_pos - 1];
+ if(!dc_pred_dir && a_avail && mb_pos >= s->mb_stride) q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride];
if(n && n<4) q2 = q1;
if(coded) {
diff --git a/src/libffmpeg/libavcodec/vc1dsp.c b/src/libffmpeg/libavcodec/vc1dsp.c
index 9139ffb28..f19f266d1 100644
--- a/src/libffmpeg/libavcodec/vc1dsp.c
+++ b/src/libffmpeg/libavcodec/vc1dsp.c
@@ -326,7 +326,7 @@ static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n)
/** Filter used to interpolate fractional pel values
*/
-static always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r)
+static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r)
{
switch(mode){
case 0: //no shift
diff --git a/src/libffmpeg/libavcodec/vp3dsp.c b/src/libffmpeg/libavcodec/vp3dsp.c
index a48515a5e..bb9fed091 100644
--- a/src/libffmpeg/libavcodec/vp3dsp.c
+++ b/src/libffmpeg/libavcodec/vp3dsp.c
@@ -39,7 +39,7 @@
#define M(a,b) (((a) * (b))>>16)
-static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
+static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
{
int16_t *ip = input;
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
diff --git a/src/libffmpeg/libavcodec/vp5.c b/src/libffmpeg/libavcodec/vp5.c
new file mode 100644
index 000000000..ac953c7aa
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp5.c
@@ -0,0 +1,290 @@
+/**
+ * @file vp5.c
+ * VP5 compatible video decoder
+ *
+ * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "bitstream.h"
+#include "mpegvideo.h"
+
+#include "vp56.h"
+#include "vp56data.h"
+#include "vp5data.h"
+
+
+static int vp5_parse_header(vp56_context_t *s, uint8_t *buf, int buf_size,
+ int *golden_frame)
+{
+ vp56_range_coder_t *c = &s->c;
+ int rows, cols;
+
+ vp56_init_range_decoder(&s->c, buf, buf_size);
+ s->frames[VP56_FRAME_CURRENT].key_frame = !vp56_rac_get(c);
+ vp56_rac_get(c);
+ vp56_init_dequant(s, vp56_rac_gets(c, 6));
+ if (s->frames[VP56_FRAME_CURRENT].key_frame)
+ {
+ vp56_rac_gets(c, 8);
+ if(vp56_rac_gets(c, 5) > 5)
+ return 0;
+ vp56_rac_gets(c, 2);
+ if (vp56_rac_get(c)) {
+ av_log(s->avctx, AV_LOG_ERROR, "interlacing not supported\n");
+ return 0;
+ }
+ rows = vp56_rac_gets(c, 8); /* number of stored macroblock rows */
+ cols = vp56_rac_gets(c, 8); /* number of stored macroblock cols */
+ vp56_rac_gets(c, 8); /* number of displayed macroblock rows */
+ vp56_rac_gets(c, 8); /* number of displayed macroblock cols */
+ vp56_rac_gets(c, 2);
+ if (16*cols != s->avctx->coded_width ||
+ 16*rows != s->avctx->coded_height) {
+ avcodec_set_dimensions(s->avctx, 16*cols, 16*rows);
+ return 2;
+ }
+ }
+ return 1;
+}
+
+/* Gives very similar result than the vp6 version except in a few cases */
+static int vp5_adjust(int v, int t)
+{
+ int s2, s1 = v >> 31;
+ v ^= s1;
+ v -= s1;
+ v *= v < 2*t;
+ v -= t;
+ s2 = v >> 31;
+ v ^= s2;
+ v -= s2;
+ v = t - v;
+ v += s1;
+ v ^= s1;
+ return v;
+}
+
+static void vp5_parse_vector_adjustment(vp56_context_t *s, vp56_mv_t *vect)
+{
+ vp56_range_coder_t *c = &s->c;
+ int comp, di;
+
+ for (comp=0; comp<2; comp++) {
+ int delta = 0;
+ if (vp56_rac_get_prob(c, s->vector_model_dct[comp])) {
+ int sign = vp56_rac_get_prob(c, s->vector_model_sig[comp]);
+ di = vp56_rac_get_prob(c, s->vector_model_pdi[comp][0]);
+ di |= vp56_rac_get_prob(c, s->vector_model_pdi[comp][1]) << 1;
+ delta = vp56_rac_get_tree(c, vp56_pva_tree,
+ s->vector_model_pdv[comp]);
+ delta = di | (delta << 2);
+ delta = (delta ^ -sign) + sign;
+ }
+ if (!comp)
+ vect->x = delta;
+ else
+ vect->y = delta;
+ }
+}
+
+static void vp5_parse_vector_models(vp56_context_t *s)
+{
+ vp56_range_coder_t *c = &s->c;
+ int comp, node;
+
+ for (comp=0; comp<2; comp++) {
+ if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][0]))
+ s->vector_model_dct[comp] = vp56_rac_gets_nn(c, 7);
+ if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][1]))
+ s->vector_model_sig[comp] = vp56_rac_gets_nn(c, 7);
+ if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][2]))
+ s->vector_model_pdi[comp][0] = vp56_rac_gets_nn(c, 7);
+ if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][3]))
+ s->vector_model_pdi[comp][1] = vp56_rac_gets_nn(c, 7);
+ }
+
+ for (comp=0; comp<2; comp++)
+ for (node=0; node<7; node++)
+ if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][4 + node]))
+ s->vector_model_pdv[comp][node] = vp56_rac_gets_nn(c, 7);
+}
+
+static void vp5_parse_coeff_models(vp56_context_t *s)
+{
+ vp56_range_coder_t *c = &s->c;
+ uint8_t def_prob[11];
+ int node, cg, ctx;
+ int ct; /* code type */
+ int pt; /* plane type (0 for Y, 1 for U or V) */
+
+ memset(def_prob, 0x80, sizeof(def_prob));
+
+ for (pt=0; pt<2; pt++)
+ for (node=0; node<11; node++)
+ if (vp56_rac_get_prob(c, vp5_dccv_pct[pt][node])) {
+ def_prob[node] = vp56_rac_gets_nn(c, 7);
+ s->coeff_model_dccv[pt][node] = def_prob[node];
+ } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+ s->coeff_model_dccv[pt][node] = def_prob[node];
+ }
+
+ for (ct=0; ct<3; ct++)
+ for (pt=0; pt<2; pt++)
+ for (cg=0; cg<6; cg++)
+ for (node=0; node<11; node++)
+ if (vp56_rac_get_prob(c, vp5_ract_pct[ct][pt][cg][node])) {
+ def_prob[node] = vp56_rac_gets_nn(c, 7);
+ s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+ } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+ s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+ }
+
+ /* coeff_model_dcct is a linear combination of coeff_model_dccv */
+ for (pt=0; pt<2; pt++)
+ for (ctx=0; ctx<36; ctx++)
+ for (node=0; node<5; node++)
+ s->coeff_model_dcct[pt][ctx][node] = clip(((s->coeff_model_dccv[pt][node] * vp5_dccv_lc[node][ctx][0] + 128) >> 8) + vp5_dccv_lc[node][ctx][1], 1, 254);
+
+ /* coeff_model_acct is a linear combination of coeff_model_ract */
+ for (ct=0; ct<3; ct++)
+ for (pt=0; pt<2; pt++)
+ for (cg=0; cg<3; cg++)
+ for (ctx=0; ctx<6; ctx++)
+ for (node=0; node<5; node++)
+ s->coeff_model_acct[pt][ct][cg][ctx][node] = clip(((s->coeff_model_ract[pt][ct][cg][node] * vp5_ract_lc[ct][cg][node][ctx][0] + 128) >> 8) + vp5_ract_lc[ct][cg][node][ctx][1], 1, 254);
+}
+
+static void vp5_parse_coeff(vp56_context_t *s)
+{
+ vp56_range_coder_t *c = &s->c;
+ uint8_t *permute = s->scantable.permutated;
+ uint8_t *model, *model2;
+ int coeff, sign, coeff_idx;
+ int b, i, cg, idx, ctx, ctx_last;
+ int pt = 0; /* plane type (0 for Y, 1 for U or V) */
+
+ for (b=0; b<6; b++) {
+ int ct = 1; /* code type */
+
+ if (b > 3) pt = 1;
+
+ ctx = 6*s->coeff_ctx[vp56_b6to4[b]][0]
+ + s->above_blocks[s->above_block_idx[b]].not_null_dc;
+ model = s->coeff_model_dccv[pt];
+ model2 = s->coeff_model_dcct[pt][ctx];
+
+ for (coeff_idx=0; coeff_idx<64; ) {
+ if (vp56_rac_get_prob(c, model2[0])) {
+ if (vp56_rac_get_prob(c, model2[2])) {
+ if (vp56_rac_get_prob(c, model2[3])) {
+ s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 4;
+ idx = vp56_rac_get_tree(c, vp56_pc_tree, model);
+ sign = vp56_rac_get(c);
+ coeff = vp56_coeff_bias[idx];
+ for (i=vp56_coeff_bit_length[idx]; i>=0; i--)
+ coeff += vp56_rac_get_prob(c, vp56_coeff_parse_table[idx][i]) << i;
+ } else {
+ if (vp56_rac_get_prob(c, model2[4])) {
+ coeff = 3 + vp56_rac_get_prob(c, model[5]);
+ s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 3;
+ } else {
+ coeff = 2;
+ s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 2;
+ }
+ sign = vp56_rac_get(c);
+ }
+ ct = 2;
+ } else {
+ ct = 1;
+ s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 1;
+ sign = vp56_rac_get(c);
+ coeff = 1;
+ }
+ coeff = (coeff ^ -sign) + sign;
+ if (coeff_idx)
+ coeff *= s->dequant_ac;
+ s->block_coeff[b][permute[coeff_idx]] = coeff;
+ } else {
+ if (ct && !vp56_rac_get_prob(c, model2[1]))
+ break;
+ ct = 0;
+ s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 0;
+ }
+
+ cg = vp5_coeff_groups[++coeff_idx];
+ ctx = s->coeff_ctx[vp56_b6to4[b]][coeff_idx];
+ model = s->coeff_model_ract[pt][ct][cg];
+ model2 = cg > 2 ? model : s->coeff_model_acct[pt][ct][cg][ctx];
+ }
+
+ ctx_last = FFMIN(s->coeff_ctx_last[vp56_b6to4[b]], 24);
+ s->coeff_ctx_last[vp56_b6to4[b]] = coeff_idx;
+ if (coeff_idx < ctx_last)
+ for (i=coeff_idx; i<=ctx_last; i++)
+ s->coeff_ctx[vp56_b6to4[b]][i] = 5;
+ s->above_blocks[s->above_block_idx[b]].not_null_dc = s->coeff_ctx[vp56_b6to4[b]][0];
+ }
+}
+
+static void vp5_default_models_init(vp56_context_t *s)
+{
+ int i;
+
+ for (i=0; i<2; i++) {
+ s->vector_model_sig[i] = 0x80;
+ s->vector_model_dct[i] = 0x80;
+ s->vector_model_pdi[i][0] = 0x55;
+ s->vector_model_pdi[i][1] = 0x80;
+ }
+ memcpy(s->mb_types_stats, vp56_def_mb_types_stats, sizeof(s->mb_types_stats));
+ memset(s->vector_model_pdv, 0x80, sizeof(s->vector_model_pdv));
+}
+
+static int vp5_decode_init(AVCodecContext *avctx)
+{
+ vp56_context_t *s = avctx->priv_data;
+
+ vp56_init(s, avctx, 1);
+ s->vp56_coord_div = vp5_coord_div;
+ s->parse_vector_adjustment = vp5_parse_vector_adjustment;
+ s->adjust = vp5_adjust;
+ s->parse_coeff = vp5_parse_coeff;
+ s->default_models_init = vp5_default_models_init;
+ s->parse_vector_models = vp5_parse_vector_models;
+ s->parse_coeff_models = vp5_parse_coeff_models;
+ s->parse_header = vp5_parse_header;
+
+ return 0;
+}
+
+AVCodec vp5_decoder = {
+ "vp5",
+ CODEC_TYPE_VIDEO,
+ CODEC_ID_VP5,
+ sizeof(vp56_context_t),
+ vp5_decode_init,
+ NULL,
+ vp56_free,
+ vp56_decode_frame,
+};
diff --git a/src/libffmpeg/libavcodec/vp56.c b/src/libffmpeg/libavcodec/vp56.c
new file mode 100644
index 000000000..eb78d02e4
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp56.c
@@ -0,0 +1,665 @@
+/**
+ * @file vp56.c
+ * VP5 and VP6 compatible video decoder (common features)
+ *
+ * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+
+#include "vp56.h"
+#include "vp56data.h"
+
+
+void vp56_init_dequant(vp56_context_t *s, int quantizer)
+{
+ s->quantizer = quantizer;
+ s->dequant_dc = vp56_dc_dequant[quantizer] << 2;
+ s->dequant_ac = vp56_ac_dequant[quantizer] << 2;
+}
+
+static int vp56_get_vectors_predictors(vp56_context_t *s, int row, int col,
+ vp56_frame_t ref_frame)
+{
+ int nb_pred = 0;
+ vp56_mv_t vect[2] = {{0,0}, {0,0}};
+ int pos, offset;
+ vp56_mv_t mvp;
+
+ for (pos=0; pos<12; pos++) {
+ mvp.x = col + vp56_candidate_predictor_pos[pos][0];
+ mvp.y = row + vp56_candidate_predictor_pos[pos][1];
+ if (mvp.x < 0 || mvp.x >= s->mb_width ||
+ mvp.y < 0 || mvp.y >= s->mb_height)
+ continue;
+ offset = mvp.x + s->mb_width*mvp.y;
+
+ if (vp56_reference_frame[s->macroblocks[offset].type] != ref_frame)
+ continue;
+ if ((s->macroblocks[offset].mv.x == vect[0].x &&
+ s->macroblocks[offset].mv.y == vect[0].y) ||
+ (s->macroblocks[offset].mv.x == 0 &&
+ s->macroblocks[offset].mv.y == 0))
+ continue;
+
+ vect[nb_pred++] = s->macroblocks[offset].mv;
+ if (nb_pred > 1) {
+ nb_pred = -1;
+ break;
+ }
+ s->vector_candidate_pos = pos;
+ }
+
+ s->vector_candidate[0] = vect[0];
+ s->vector_candidate[1] = vect[1];
+
+ return nb_pred+1;
+}
+
+static void vp56_parse_mb_type_models(vp56_context_t *s)
+{
+ vp56_range_coder_t *c = &s->c;
+ int i, ctx, type;
+
+ for (ctx=0; ctx<3; ctx++) {
+ if (vp56_rac_get_prob(c, 174)) {
+ int idx = vp56_rac_gets(c, 4);
+ memcpy(s->mb_types_stats[ctx],vp56_pre_def_mb_type_stats[idx][ctx],
+ sizeof(s->mb_types_stats[ctx]));
+ }
+ if (vp56_rac_get_prob(c, 254)) {
+ for (type=0; type<10; type++) {
+ for(i=0; i<2; i++) {
+ if (vp56_rac_get_prob(c, 205)) {
+ int delta, sign = vp56_rac_get(c);
+
+ delta = vp56_rac_get_tree(c, vp56_pmbtm_tree,
+ vp56_mb_type_model_model);
+ if (!delta)
+ delta = 4 * vp56_rac_gets(c, 7);
+ s->mb_types_stats[ctx][type][i] += (delta ^ -sign) + sign;
+ }
+ }
+ }
+ }
+ }
+
+ /* compute MB type probability tables based on previous MB type */
+ for (ctx=0; ctx<3; ctx++) {
+ int p[10];
+
+ for (type=0; type<10; type++)
+ p[type] = 100 * s->mb_types_stats[ctx][type][1];
+
+ for (type=0; type<10; type++) {
+ int p02, p34, p0234, p17, p56, p89, p5689, p156789;
+
+ /* conservative MB type probability */
+ s->mb_type_model[ctx][type][0] = 255 - (255 * s->mb_types_stats[ctx][type][0]) / (1 + s->mb_types_stats[ctx][type][0] + s->mb_types_stats[ctx][type][1]);
+
+ p[type] = 0; /* same MB type => weight is null */
+
+ /* binary tree parsing probabilities */
+ p02 = p[0] + p[2];
+ p34 = p[3] + p[4];
+ p0234 = p02 + p34;
+ p17 = p[1] + p[7];
+ p56 = p[5] + p[6];
+ p89 = p[8] + p[9];
+ p5689 = p56 + p89;
+ p156789 = p17 + p5689;
+
+ s->mb_type_model[ctx][type][1] = 1 + 255 * p0234/(1+p0234+p156789);
+ s->mb_type_model[ctx][type][2] = 1 + 255 * p02 / (1+p0234);
+ s->mb_type_model[ctx][type][3] = 1 + 255 * p17 / (1+p156789);
+ s->mb_type_model[ctx][type][4] = 1 + 255 * p[0] / (1+p02);
+ s->mb_type_model[ctx][type][5] = 1 + 255 * p[3] / (1+p34);
+ s->mb_type_model[ctx][type][6] = 1 + 255 * p[1] / (1+p17);
+ s->mb_type_model[ctx][type][7] = 1 + 255 * p56 / (1+p5689);
+ s->mb_type_model[ctx][type][8] = 1 + 255 * p[5] / (1+p56);
+ s->mb_type_model[ctx][type][9] = 1 + 255 * p[8] / (1+p89);
+
+ /* restore initial value */
+ p[type] = 100 * s->mb_types_stats[ctx][type][1];
+ }
+ }
+}
+
+static vp56_mb_t vp56_parse_mb_type(vp56_context_t *s,
+ vp56_mb_t prev_type, int ctx)
+{
+ uint8_t *mb_type_model = s->mb_type_model[ctx][prev_type];
+ vp56_range_coder_t *c = &s->c;
+
+ if (vp56_rac_get_prob(c, mb_type_model[0]))
+ return prev_type;
+ else
+ return vp56_rac_get_tree(c, vp56_pmbt_tree, mb_type_model);
+}
+
+static void vp56_decode_4mv(vp56_context_t *s, int row, int col)
+{
+ vp56_mv_t mv = {0,0};
+ int type[4];
+ int b;
+
+ /* parse each block type */
+ for (b=0; b<4; b++) {
+ type[b] = vp56_rac_gets(&s->c, 2);
+ if (type[b])
+ type[b]++; /* only returns 0, 2, 3 or 4 (all INTER_PF) */
+ }
+
+ /* get vectors */
+ for (b=0; b<4; b++) {
+ switch (type[b]) {
+ case VP56_MB_INTER_NOVEC_PF:
+ s->mv[b] = (vp56_mv_t) {0,0};
+ break;
+ case VP56_MB_INTER_DELTA_PF:
+ s->parse_vector_adjustment(s, &s->mv[b]);
+ break;
+ case VP56_MB_INTER_V1_PF:
+ s->mv[b] = s->vector_candidate[0];
+ break;
+ case VP56_MB_INTER_V2_PF:
+ s->mv[b] = s->vector_candidate[1];
+ break;
+ }
+ mv.x += s->mv[b].x;
+ mv.y += s->mv[b].y;
+ }
+
+ /* this is the one selected for the whole MB for prediction */
+ s->macroblocks[row * s->mb_width + col].mv = s->mv[3];
+
+ /* chroma vectors are average luma vectors */
+ if (s->avctx->codec->id == CODEC_ID_VP5) {
+ s->mv[4].x = s->mv[5].x = RSHIFT(mv.x,2);
+ s->mv[4].y = s->mv[5].y = RSHIFT(mv.y,2);
+ } else {
+ s->mv[4] = s->mv[5] = (vp56_mv_t) {mv.x/4, mv.y/4};
+ }
+}
+
+static vp56_mb_t vp56_decode_mv(vp56_context_t *s, int row, int col)
+{
+ vp56_mv_t *mv, vect = {0,0};
+ int ctx, b;
+
+ ctx = vp56_get_vectors_predictors(s, row, col, VP56_FRAME_PREVIOUS);
+ s->mb_type = vp56_parse_mb_type(s, s->mb_type, ctx);
+ s->macroblocks[row * s->mb_width + col].type = s->mb_type;
+
+ switch (s->mb_type) {
+ case VP56_MB_INTER_V1_PF:
+ mv = &s->vector_candidate[0];
+ break;
+
+ case VP56_MB_INTER_V2_PF:
+ mv = &s->vector_candidate[1];
+ break;
+
+ case VP56_MB_INTER_V1_GF:
+ vp56_get_vectors_predictors(s, row, col, VP56_FRAME_GOLDEN);
+ mv = &s->vector_candidate[0];
+ break;
+
+ case VP56_MB_INTER_V2_GF:
+ vp56_get_vectors_predictors(s, row, col, VP56_FRAME_GOLDEN);
+ mv = &s->vector_candidate[1];
+ break;
+
+ case VP56_MB_INTER_DELTA_PF:
+ s->parse_vector_adjustment(s, &vect);
+ mv = &vect;
+ break;
+
+ case VP56_MB_INTER_DELTA_GF:
+ vp56_get_vectors_predictors(s, row, col, VP56_FRAME_GOLDEN);
+ s->parse_vector_adjustment(s, &vect);
+ mv = &vect;
+ break;
+
+ case VP56_MB_INTER_4V:
+ vp56_decode_4mv(s, row, col);
+ return s->mb_type;
+
+ default:
+ mv = &vect;
+ break;
+ }
+
+ s->macroblocks[row*s->mb_width + col].mv = *mv;
+
+ /* same vector for all blocks */
+ for (b=0; b<6; b++)
+ s->mv[b] = *mv;
+
+ return s->mb_type;
+}
+
+static void vp56_add_predictors_dc(vp56_context_t *s, vp56_frame_t ref_frame)
+{
+ int idx = s->scantable.permutated[0];
+ int i;
+
+ for (i=0; i<6; i++) {
+ vp56_ref_dc_t *ab = &s->above_blocks[s->above_block_idx[i]];
+ vp56_ref_dc_t *lb = &s->left_block[vp56_b6to4[i]];
+ int count = 0;
+ int dc = 0;
+
+ if (ref_frame == lb->ref_frame) {
+ dc += lb->dc_coeff;
+ count++;
+ }
+ if (ref_frame == ab->ref_frame) {
+ dc += ab->dc_coeff;
+ count++;
+ }
+ if (s->avctx->codec->id == CODEC_ID_VP5) {
+ if (count < 2 && ref_frame == ab[-1].ref_frame) {
+ dc += ab[-1].dc_coeff;
+ count++;
+ }
+ if (count < 2 && ref_frame == ab[1].ref_frame) {
+ dc += ab[1].dc_coeff;
+ count++;
+ }
+ }
+ if (count == 0)
+ dc = s->prev_dc[vp56_b6to3[i]][ref_frame];
+ else if (count == 2)
+ dc /= 2;
+
+ s->block_coeff[i][idx] += dc;
+ s->prev_dc[vp56_b6to3[i]][ref_frame] = s->block_coeff[i][idx];
+ ab->dc_coeff = s->block_coeff[i][idx];
+ ab->ref_frame = ref_frame;
+ lb->dc_coeff = s->block_coeff[i][idx];
+ lb->ref_frame = ref_frame;
+ s->block_coeff[i][idx] *= s->dequant_dc;
+ }
+}
+
+static void vp56_edge_filter(vp56_context_t *s, uint8_t *yuv,
+ int pix_inc, int line_inc, int t)
+{
+ int pix2_inc = 2 * pix_inc;
+ int i, v;
+
+ for (i=0; i<12; i++) {
+ v = (yuv[-pix2_inc] + 3*(yuv[0]-yuv[-pix_inc]) - yuv[pix_inc] + 4) >>3;
+ v = s->adjust(v, t);
+ yuv[-pix_inc] = clip_uint8(yuv[-pix_inc] + v);
+ yuv[0] = clip_uint8(yuv[0] - v);
+ yuv += line_inc;
+ }
+}
+
+static void vp56_deblock_filter(vp56_context_t *s, uint8_t *yuv,
+ int stride, int dx, int dy)
+{
+ int t = vp56_filter_threshold[s->quantizer];
+ if (dx) vp56_edge_filter(s, yuv + 10-dx , 1, stride, t);
+ if (dy) vp56_edge_filter(s, yuv + stride*(10-dy), stride, 1, t);
+}
+
+static void vp56_mc(vp56_context_t *s, int b, uint8_t *src,
+ int stride, int x, int y)
+{
+ int plane = vp56_b6to3[b];
+ uint8_t *dst= s->frames[VP56_FRAME_CURRENT].data[plane]+s->block_offset[b];
+ uint8_t *src_block;
+ int src_offset;
+ int overlap_offset = 0;
+ int mask = s->vp56_coord_div[b] - 1;
+ int deblock_filtering = s->deblock_filtering;
+ int dx;
+ int dy;
+
+ if (s->avctx->skip_loop_filter >= AVDISCARD_ALL ||
+ (s->avctx->skip_loop_filter >= AVDISCARD_NONKEY
+ && !s->frames[VP56_FRAME_CURRENT].key_frame))
+ deblock_filtering = 0;
+
+ dx = s->mv[b].x / s->vp56_coord_div[b];
+ dy = s->mv[b].y / s->vp56_coord_div[b];
+
+ if (b >= 4) {
+ x /= 2;
+ y /= 2;
+ }
+ x += dx - 2;
+ y += dy - 2;
+
+ if (x<0 || x+12>=s->plane_width[plane] ||
+ y<0 || y+12>=s->plane_height[plane]) {
+ ff_emulated_edge_mc(s->edge_emu_buffer,
+ src + s->block_offset[b] + (dy-2)*stride + (dx-2),
+ stride, 12, 12, x, y,
+ s->plane_width[plane],
+ s->plane_height[plane]);
+ src_block = s->edge_emu_buffer;
+ src_offset = 2 + 2*stride;
+ } else if (deblock_filtering) {
+ /* only need a 12x12 block, but there is no such dsp function, */
+ /* so copy a 16x12 block */
+ s->dsp.put_pixels_tab[0][0](s->edge_emu_buffer,
+ src + s->block_offset[b] + (dy-2)*stride + (dx-2),
+ stride, 12);
+ src_block = s->edge_emu_buffer;
+ src_offset = 2 + 2*stride;
+ } else {
+ src_block = src;
+ src_offset = s->block_offset[b] + dy*stride + dx;
+ }
+
+ if (deblock_filtering)
+ vp56_deblock_filter(s, src_block, stride, dx&7, dy&7);
+
+ if (s->mv[b].x & mask)
+ overlap_offset += (s->mv[b].x > 0) ? 1 : -1;
+ if (s->mv[b].y & mask)
+ overlap_offset += (s->mv[b].y > 0) ? stride : -stride;
+
+ if (overlap_offset) {
+ if (s->filter)
+ s->filter(s, dst, src_block, src_offset, src_offset+overlap_offset,
+ stride, s->mv[b], mask, s->filter_selection, b<4);
+ else
+ s->dsp.put_no_rnd_pixels_l2[1](dst, src_block+src_offset,
+ src_block+src_offset+overlap_offset,
+ stride, 8);
+ } else {
+ s->dsp.put_pixels_tab[1][0](dst, src_block+src_offset, stride, 8);
+ }
+}
+
+static void vp56_decode_mb(vp56_context_t *s, int row, int col)
+{
+ AVFrame *frame_current, *frame_ref;
+ vp56_mb_t mb_type;
+ vp56_frame_t ref_frame;
+ int b, plan, off;
+
+ if (s->frames[VP56_FRAME_CURRENT].key_frame)
+ mb_type = VP56_MB_INTRA;
+ else
+ mb_type = vp56_decode_mv(s, row, col);
+ ref_frame = vp56_reference_frame[mb_type];
+
+ memset(s->block_coeff, 0, sizeof(s->block_coeff));
+
+ s->parse_coeff(s);
+
+ vp56_add_predictors_dc(s, ref_frame);
+
+ frame_current = &s->frames[VP56_FRAME_CURRENT];
+ frame_ref = &s->frames[ref_frame];
+
+ switch (mb_type) {
+ case VP56_MB_INTRA:
+ for (b=0; b<6; b++) {
+ plan = vp56_b6to3[b];
+ s->dsp.idct_put(frame_current->data[plan] + s->block_offset[b],
+ s->stride[plan], s->block_coeff[b]);
+ }
+ break;
+
+ case VP56_MB_INTER_NOVEC_PF:
+ case VP56_MB_INTER_NOVEC_GF:
+ for (b=0; b<6; b++) {
+ plan = vp56_b6to3[b];
+ off = s->block_offset[b];
+ s->dsp.put_pixels_tab[1][0](frame_current->data[plan] + off,
+ frame_ref->data[plan] + off,
+ s->stride[plan], 8);
+ s->dsp.idct_add(frame_current->data[plan] + off,
+ s->stride[plan], s->block_coeff[b]);
+ }
+ break;
+
+ case VP56_MB_INTER_DELTA_PF:
+ case VP56_MB_INTER_V1_PF:
+ case VP56_MB_INTER_V2_PF:
+ case VP56_MB_INTER_DELTA_GF:
+ case VP56_MB_INTER_4V:
+ case VP56_MB_INTER_V1_GF:
+ case VP56_MB_INTER_V2_GF:
+ for (b=0; b<6; b++) {
+ int x_off = b==1 || b==3 ? 8 : 0;
+ int y_off = b==2 || b==3 ? 8 : 0;
+ plan = vp56_b6to3[b];
+ vp56_mc(s, b, frame_ref->data[plan], s->stride[plan],
+ 16*col+x_off, 16*row+y_off);
+ s->dsp.idct_add(frame_current->data[plan] + s->block_offset[b],
+ s->stride[plan], s->block_coeff[b]);
+ }
+ break;
+ }
+}
+
+static int vp56_size_changed(AVCodecContext *avctx, vp56_context_t *s)
+{
+ int stride = s->frames[VP56_FRAME_CURRENT].linesize[0];
+ int i;
+
+ s->plane_width[0] = s->avctx->coded_width;
+ s->plane_width[1] = s->plane_width[2] = s->avctx->coded_width/2;
+ s->plane_height[0] = s->avctx->coded_height;
+ s->plane_height[1] = s->plane_height[2] = s->avctx->coded_height/2;
+
+ for (i=0; i<3; i++)
+ s->stride[i] = s->flip * s->frames[VP56_FRAME_CURRENT].linesize[i];
+
+ s->mb_width = (s->avctx->coded_width+15) / 16;
+ s->mb_height = (s->avctx->coded_height+15) / 16;
+
+ if (s->mb_width > 1000 || s->mb_height > 1000) {
+ av_log(avctx, AV_LOG_ERROR, "picture too big\n");
+ return -1;
+ }
+
+ s->above_blocks = av_realloc(s->above_blocks,
+ (4*s->mb_width+6) * sizeof(*s->above_blocks));
+ s->macroblocks = av_realloc(s->macroblocks,
+ s->mb_width*s->mb_height*sizeof(*s->macroblocks));
+ av_free(s->edge_emu_buffer_alloc);
+ s->edge_emu_buffer_alloc = av_malloc(16*stride);
+ s->edge_emu_buffer = s->edge_emu_buffer_alloc;
+ if (s->flip < 0)
+ s->edge_emu_buffer += 15 * stride;
+
+ return 0;
+}
+
+int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+ uint8_t *buf, int buf_size)
+{
+ vp56_context_t *s = avctx->priv_data;
+ AVFrame *const p = &s->frames[VP56_FRAME_CURRENT];
+ AVFrame *picture = data;
+ int mb_row, mb_col, mb_row_flip, mb_offset = 0;
+ int block, y, uv, stride_y, stride_uv;
+ int golden_frame = 0;
+ int res;
+
+ res = s->parse_header(s, buf, buf_size, &golden_frame);
+ if (!res)
+ return -1;
+
+ p->reference = 1;
+ if (avctx->get_buffer(avctx, p) < 0) {
+ av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+ return -1;
+ }
+
+ if (res == 2)
+ if (vp56_size_changed(avctx, s)) {
+ avctx->release_buffer(avctx, p);
+ return -1;
+ }
+
+ if (p->key_frame) {
+ p->pict_type = FF_I_TYPE;
+ s->default_models_init(s);
+ for (block=0; block<s->mb_height*s->mb_width; block++)
+ s->macroblocks[block].type = VP56_MB_INTRA;
+ } else {
+ p->pict_type = FF_P_TYPE;
+ vp56_parse_mb_type_models(s);
+ s->parse_vector_models(s);
+ s->mb_type = VP56_MB_INTER_NOVEC_PF;
+ }
+
+ s->parse_coeff_models(s);
+
+ memset(s->prev_dc, 0, sizeof(s->prev_dc));
+ s->prev_dc[1][VP56_FRAME_CURRENT] = 128;
+ s->prev_dc[2][VP56_FRAME_CURRENT] = 128;
+
+ for (block=0; block < 4*s->mb_width+6; block++) {
+ s->above_blocks[block].ref_frame = -1;
+ s->above_blocks[block].dc_coeff = 0;
+ s->above_blocks[block].not_null_dc = 0;
+ }
+ s->above_blocks[2*s->mb_width + 2].ref_frame = 0;
+ s->above_blocks[3*s->mb_width + 4].ref_frame = 0;
+
+ stride_y = p->linesize[0];
+ stride_uv = p->linesize[1];
+
+ if (s->flip < 0)
+ mb_offset = 7;
+
+ /* main macroblocks loop */
+ for (mb_row=0; mb_row<s->mb_height; mb_row++) {
+ if (s->flip < 0)
+ mb_row_flip = s->mb_height - mb_row - 1;
+ else
+ mb_row_flip = mb_row;
+
+ for (block=0; block<4; block++) {
+ s->left_block[block].ref_frame = -1;
+ s->left_block[block].dc_coeff = 0;
+ s->left_block[block].not_null_dc = 0;
+ memset(s->coeff_ctx[block], 0, 64*sizeof(s->coeff_ctx[block][0]));
+ }
+ memset(s->coeff_ctx_last, 24, sizeof(s->coeff_ctx_last));
+
+ s->above_block_idx[0] = 1;
+ s->above_block_idx[1] = 2;
+ s->above_block_idx[2] = 1;
+ s->above_block_idx[3] = 2;
+ s->above_block_idx[4] = 2*s->mb_width + 2 + 1;
+ s->above_block_idx[5] = 3*s->mb_width + 4 + 1;
+
+ s->block_offset[s->frbi] = (mb_row_flip*16 + mb_offset) * stride_y;
+ s->block_offset[s->srbi] = s->block_offset[s->frbi] + 8*stride_y;
+ s->block_offset[1] = s->block_offset[0] + 8;
+ s->block_offset[3] = s->block_offset[2] + 8;
+ s->block_offset[4] = (mb_row_flip*8 + mb_offset) * stride_uv;
+ s->block_offset[5] = s->block_offset[4];
+
+ for (mb_col=0; mb_col<s->mb_width; mb_col++) {
+ vp56_decode_mb(s, mb_row, mb_col);
+
+ for (y=0; y<4; y++) {
+ s->above_block_idx[y] += 2;
+ s->block_offset[y] += 16;
+ }
+
+ for (uv=4; uv<6; uv++) {
+ s->above_block_idx[uv] += 1;
+ s->block_offset[uv] += 8;
+ }
+ }
+ }
+
+ if (s->frames[VP56_FRAME_PREVIOUS].data[0]
+ && (s->frames[VP56_FRAME_PREVIOUS].data[0]
+ != s->frames[VP56_FRAME_GOLDEN].data[0])) {
+ avctx->release_buffer(avctx, &s->frames[VP56_FRAME_PREVIOUS]);
+ }
+ if (p->key_frame || golden_frame) {
+ if (s->frames[VP56_FRAME_GOLDEN].data[0])
+ avctx->release_buffer(avctx, &s->frames[VP56_FRAME_GOLDEN]);
+ s->frames[VP56_FRAME_GOLDEN] = *p;
+ }
+ s->frames[VP56_FRAME_PREVIOUS] = *p;
+
+ *picture = *p;
+ *data_size = sizeof(AVPicture);
+
+ return buf_size;
+}
+
+void vp56_init(vp56_context_t *s, AVCodecContext *avctx, int flip)
+{
+ int i;
+
+ s->avctx = avctx;
+ avctx->pix_fmt = PIX_FMT_YUV420P;
+
+ if (s->avctx->idct_algo == FF_IDCT_AUTO)
+ s->avctx->idct_algo = FF_IDCT_VP3;
+ dsputil_init(&s->dsp, s->avctx);
+ ff_init_scantable(s->dsp.idct_permutation, &s->scantable,ff_zigzag_direct);
+
+ avcodec_set_dimensions(s->avctx, 0, 0);
+
+ for (i=0; i<3; i++)
+ s->frames[i].data[0] = NULL;
+ s->edge_emu_buffer_alloc = NULL;
+
+ s->above_blocks = NULL;
+ s->macroblocks = NULL;
+ s->quantizer = -1;
+ s->deblock_filtering = 1;
+
+ s->filter = NULL;
+
+ if (flip) {
+ s->flip = -1;
+ s->frbi = 2;
+ s->srbi = 0;
+ } else {
+ s->flip = 1;
+ s->frbi = 0;
+ s->srbi = 2;
+ }
+}
+
+int vp56_free(AVCodecContext *avctx)
+{
+ vp56_context_t *s = avctx->priv_data;
+
+ av_free(s->above_blocks);
+ av_free(s->macroblocks);
+ av_free(s->edge_emu_buffer_alloc);
+ if (s->frames[VP56_FRAME_GOLDEN].data[0]
+ && (s->frames[VP56_FRAME_PREVIOUS].data[0]
+ != s->frames[VP56_FRAME_GOLDEN].data[0]))
+ avctx->release_buffer(avctx, &s->frames[VP56_FRAME_GOLDEN]);
+ if (s->frames[VP56_FRAME_PREVIOUS].data[0])
+ avctx->release_buffer(avctx, &s->frames[VP56_FRAME_PREVIOUS]);
+ return 0;
+}
diff --git a/src/libffmpeg/libavcodec/vp56.h b/src/libffmpeg/libavcodec/vp56.h
new file mode 100644
index 000000000..f8b3a8e4b
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp56.h
@@ -0,0 +1,249 @@
+/**
+ * @file vp56.h
+ * VP5 and VP6 compatible video decoder (common features)
+ *
+ * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef VP56_H
+#define VP56_H
+
+#include "vp56data.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+
+typedef struct vp56_context vp56_context_t;
+typedef struct vp56_mv vp56_mv_t;
+
+typedef void (*vp56_parse_vector_adjustment_t)(vp56_context_t *s,
+ vp56_mv_t *vect);
+typedef int (*vp56_adjust_t)(int v, int t);
+typedef void (*vp56_filter_t)(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+ int offset1, int offset2, int stride,
+ vp56_mv_t mv, int mask, int select, int luma);
+typedef void (*vp56_parse_coeff_t)(vp56_context_t *s);
+typedef void (*vp56_default_models_init_t)(vp56_context_t *s);
+typedef void (*vp56_parse_vector_models_t)(vp56_context_t *s);
+typedef void (*vp56_parse_coeff_models_t)(vp56_context_t *s);
+typedef int (*vp56_parse_header_t)(vp56_context_t *s, uint8_t *buf,
+ int buf_size, int *golden_frame);
+
+typedef struct {
+ int high;
+ int bits;
+ const uint8_t *buffer;
+ unsigned long code_word;
+} vp56_range_coder_t;
+
+typedef struct {
+ uint8_t not_null_dc;
+ vp56_frame_t ref_frame;
+ DCTELEM dc_coeff;
+} vp56_ref_dc_t;
+
+struct vp56_mv {
+ int x;
+ int y;
+};
+
+typedef struct {
+ uint8_t type;
+ vp56_mv_t mv;
+} vp56_macroblock_t;
+
+struct vp56_context {
+ AVCodecContext *avctx;
+ DSPContext dsp;
+ ScanTable scantable;
+ AVFrame frames[3];
+ uint8_t *edge_emu_buffer_alloc;
+ uint8_t *edge_emu_buffer;
+ vp56_range_coder_t c;
+ int sub_version;
+
+ /* frame info */
+ int plane_width[3];
+ int plane_height[3];
+ int mb_width; /* number of horizontal MB */
+ int mb_height; /* number of vertical MB */
+ int block_offset[6];
+
+ int quantizer;
+ uint16_t dequant_dc;
+ uint16_t dequant_ac;
+
+ /* DC predictors management */
+ vp56_ref_dc_t *above_blocks;
+ vp56_ref_dc_t left_block[4];
+ int above_block_idx[6];
+ DCTELEM prev_dc[3][3]; /* [plan][ref_frame] */
+
+ /* blocks / macroblock */
+ vp56_mb_t mb_type;
+ vp56_macroblock_t *macroblocks;
+ DECLARE_ALIGNED_16(DCTELEM, block_coeff[6][64]);
+ uint8_t coeff_reorder[64]; /* used in vp6 only */
+ uint8_t coeff_index_to_pos[64]; /* used in vp6 only */
+
+ /* motion vectors */
+ vp56_mv_t mv[6]; /* vectors for each block in MB */
+ vp56_mv_t vector_candidate[2];
+ int vector_candidate_pos;
+
+ /* filtering hints */
+ int deblock_filtering;
+ int filter_selection;
+ int filter_mode;
+ int max_vector_length;
+ int sample_variance_threshold;
+
+ /* AC models */
+ uint8_t vector_model_sig[2]; /* delta sign */
+ uint8_t vector_model_dct[2]; /* delta coding types */
+ uint8_t vector_model_pdi[2][2]; /* predefined delta init */
+ uint8_t vector_model_pdv[2][7]; /* predefined delta values */
+ uint8_t vector_model_fdv[2][8]; /* 8 bit delta value definition */
+ uint8_t mb_type_model[3][10][10]; /* model for decoding MB type */
+ uint8_t coeff_model_dccv[2][11]; /* DC coeff value */
+ uint8_t coeff_model_ract[2][3][6][11]; /* Run/AC coding type and AC coeff value */
+ uint8_t coeff_model_acct[2][3][3][6][5];/* vp5 only AC coding type for coding group < 3 */
+ uint8_t coeff_model_dcct[2][36][5]; /* DC coeff coding type */
+ uint8_t coeff_model_runv[2][14]; /* run value (vp6 only) */
+ uint8_t mb_types_stats[3][10][2]; /* contextual, next MB type stats */
+ uint8_t coeff_ctx[4][64]; /* used in vp5 only */
+ uint8_t coeff_ctx_last[4]; /* used in vp5 only */
+
+ /* upside-down flipping hints */
+ int flip; /* are we flipping ? */
+ int frbi; /* first row block index in MB */
+ int srbi; /* second row block index in MB */
+ int stride[3]; /* stride for each plan */
+
+ const uint8_t *vp56_coord_div;
+ vp56_parse_vector_adjustment_t parse_vector_adjustment;
+ vp56_adjust_t adjust;
+ vp56_filter_t filter;
+ vp56_parse_coeff_t parse_coeff;
+ vp56_default_models_init_t default_models_init;
+ vp56_parse_vector_models_t parse_vector_models;
+ vp56_parse_coeff_models_t parse_coeff_models;
+ vp56_parse_header_t parse_header;
+};
+
+
+void vp56_init(vp56_context_t *s, AVCodecContext *avctx, int flip);
+int vp56_free(AVCodecContext *avctx);
+void vp56_init_dequant(vp56_context_t *s, int quantizer);
+int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+ uint8_t *buf, int buf_size);
+
+
+/**
+ * vp56 specific range coder implementation
+ */
+
+static inline void vp56_init_range_decoder(vp56_range_coder_t *c,
+ const uint8_t *buf, int buf_size)
+{
+ c->high = 255;
+ c->bits = 8;
+ c->buffer = buf;
+ c->code_word = *c->buffer++ << 8;
+ c->code_word |= *c->buffer++;
+}
+
+static inline int vp56_rac_get_prob(vp56_range_coder_t *c, uint8_t prob)
+{
+ unsigned int low = 1 + (((c->high - 1) * prob) / 256);
+ unsigned int low_shift = low << 8;
+ int bit = c->code_word >= low_shift;
+
+ if (bit) {
+ c->high -= low;
+ c->code_word -= low_shift;
+ } else {
+ c->high = low;
+ }
+
+ /* normalize */
+ while (c->high < 128) {
+ c->high <<= 1;
+ c->code_word <<= 1;
+ if (--c->bits == 0) {
+ c->bits = 8;
+ c->code_word |= *c->buffer++;
+ }
+ }
+ return bit;
+}
+
+static inline int vp56_rac_get(vp56_range_coder_t *c)
+{
+ /* equiprobable */
+ int low = (c->high + 1) >> 1;
+ unsigned int low_shift = low << 8;
+ int bit = c->code_word >= low_shift;
+ if (bit) {
+ c->high = (c->high - low) << 1;
+ c->code_word -= low_shift;
+ } else {
+ c->high = low << 1;
+ }
+
+ /* normalize */
+ c->code_word <<= 1;
+ if (--c->bits == 0) {
+ c->bits = 8;
+ c->code_word |= *c->buffer++;
+ }
+ return bit;
+}
+
+static inline int vp56_rac_gets(vp56_range_coder_t *c, int bits)
+{
+ int value = 0;
+
+ while (bits--) {
+ value = (value << 1) | vp56_rac_get(c);
+ }
+
+ return value;
+}
+
+static inline int vp56_rac_gets_nn(vp56_range_coder_t *c, int bits)
+{
+ int v = vp56_rac_gets(c, 7) << 1;
+ return v + !v;
+}
+
+static inline int vp56_rac_get_tree(vp56_range_coder_t *c,
+ const vp56_tree_t *tree,
+ const uint8_t *probs)
+{
+ while (tree->val > 0) {
+ if (vp56_rac_get_prob(c, probs[tree->prob_idx]))
+ tree += tree->val;
+ else
+ tree++;
+ }
+ return -tree->val;
+}
+
+#endif /* VP56_H */
diff --git a/src/libffmpeg/libavcodec/vp56data.c b/src/libffmpeg/libavcodec/vp56data.c
new file mode 100644
index 000000000..e75c6d1ce
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp56data.c
@@ -0,0 +1,66 @@
+/**
+ * @file vp56data.c
+ * VP5 and VP6 compatible video decoder (common data)
+ *
+ * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vp56data.h"
+
+const uint8_t vp56_b6to3[] = { 0, 0, 0, 0, 1, 2 };
+const uint8_t vp56_b6to4[] = { 0, 0, 1, 1, 2, 3 };
+
+const uint8_t vp56_coeff_parse_table[6][11] = {
+ { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0 },
+ { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0 },
+ { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254 },
+};
+
+const uint8_t vp56_def_mb_types_stats[3][10][2] = {
+ { { 69, 42 }, { 1, 2 }, { 1, 7 }, { 44, 42 }, { 6, 22 },
+ { 1, 3 }, { 0, 2 }, { 1, 5 }, { 0, 1 }, { 0, 0 }, },
+ { { 229, 8 }, { 1, 1 }, { 0, 8 }, { 0, 0 }, { 0, 0 },
+ { 1, 2 }, { 0, 1 }, { 0, 0 }, { 1, 1 }, { 0, 0 }, },
+ { { 122, 35 }, { 1, 1 }, { 1, 6 }, { 46, 34 }, { 0, 0 },
+ { 1, 2 }, { 0, 1 }, { 0, 1 }, { 1, 1 }, { 0, 0 }, },
+};
+
+const vp56_tree_t vp56_pva_tree[] = {
+ { 8, 0},
+ { 4, 1},
+ { 2, 2}, {-0}, {-1},
+ { 2, 3}, {-2}, {-3},
+ { 4, 4},
+ { 2, 5}, {-4}, {-5},
+ { 2, 6}, {-6}, {-7},
+};
+
+const vp56_tree_t vp56_pc_tree[] = {
+ { 4, 6},
+ { 2, 7}, {-0}, {-1},
+ { 4, 8},
+ { 2, 9}, {-2}, {-3},
+ { 2,10}, {-4}, {-5},
+};
+
+const uint8_t vp56_coeff_bias[] = { 5, 7, 11, 19, 35, 67 };
+const uint8_t vp56_coeff_bit_length[] = { 0, 1, 2, 3, 4, 10 };
diff --git a/src/libffmpeg/libavcodec/vp56data.h b/src/libffmpeg/libavcodec/vp56data.h
new file mode 100644
index 000000000..dbf92dd68
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp56data.h
@@ -0,0 +1,248 @@
+/**
+ * @file vp56data.h
+ * VP5 and VP6 compatible video decoder (common data)
+ *
+ * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef VP56DATA_H
+#define VP56DATA_H
+
+#include "common.h"
+
+typedef enum {
+ VP56_FRAME_CURRENT = 0,
+ VP56_FRAME_PREVIOUS = 1,
+ VP56_FRAME_GOLDEN = 2,
+} vp56_frame_t;
+
+typedef enum {
+ VP56_MB_INTER_NOVEC_PF = 0, /**< Inter MB, no vector, from previous frame */
+ VP56_MB_INTRA = 1, /**< Intra MB */
+ VP56_MB_INTER_DELTA_PF = 2, /**< Inter MB, above/left vector + delta, from previous frame */
+ VP56_MB_INTER_V1_PF = 3, /**< Inter MB, first vector, from previous frame */
+ VP56_MB_INTER_V2_PF = 4, /**< Inter MB, second vector, from previous frame */
+ VP56_MB_INTER_NOVEC_GF = 5, /**< Inter MB, no vector, from golden frame */
+ VP56_MB_INTER_DELTA_GF = 6, /**< Inter MB, above/left vector + delta, from golden frame */
+ VP56_MB_INTER_4V = 7, /**< Inter MB, 4 vectors, from previous frame */
+ VP56_MB_INTER_V1_GF = 8, /**< Inter MB, first vector, from golden frame */
+ VP56_MB_INTER_V2_GF = 9, /**< Inter MB, second vector, from golden frame */
+} vp56_mb_t;
+
+typedef struct {
+ int8_t val;
+ int8_t prob_idx;
+} vp56_tree_t;
+
+extern const uint8_t vp56_b6to3[];
+extern const uint8_t vp56_b6to4[];
+extern const uint8_t vp56_coeff_parse_table[6][11];
+extern const uint8_t vp56_def_mb_types_stats[3][10][2];
+extern const vp56_tree_t vp56_pva_tree[];
+extern const vp56_tree_t vp56_pc_tree[];
+extern const uint8_t vp56_coeff_bias[];
+extern const uint8_t vp56_coeff_bit_length[];
+
+static const vp56_frame_t vp56_reference_frame[] = {
+ VP56_FRAME_PREVIOUS, /* VP56_MB_INTER_NOVEC_PF */
+ VP56_FRAME_CURRENT, /* VP56_MB_INTRA */
+ VP56_FRAME_PREVIOUS, /* VP56_MB_INTER_DELTA_PF */
+ VP56_FRAME_PREVIOUS, /* VP56_MB_INTER_V1_PF */
+ VP56_FRAME_PREVIOUS, /* VP56_MB_INTER_V2_PF */
+ VP56_FRAME_GOLDEN, /* VP56_MB_INTER_NOVEC_GF */
+ VP56_FRAME_GOLDEN, /* VP56_MB_INTER_DELTA_GF */
+ VP56_FRAME_PREVIOUS, /* VP56_MB_INTER_4V */
+ VP56_FRAME_GOLDEN, /* VP56_MB_INTER_V1_GF */
+ VP56_FRAME_GOLDEN, /* VP56_MB_INTER_V2_GF */
+};
+
+static const uint8_t vp56_ac_dequant[64] = {
+ 94, 92, 90, 88, 86, 82, 78, 74,
+ 70, 66, 62, 58, 54, 53, 52, 51,
+ 50, 49, 48, 47, 46, 45, 44, 43,
+ 42, 40, 39, 37, 36, 35, 34, 33,
+ 32, 31, 30, 29, 28, 27, 26, 25,
+ 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9,
+ 8, 7, 6, 5, 4, 3, 2, 1,
+};
+
+static const uint8_t vp56_dc_dequant[64] = {
+ 47, 47, 47, 47, 45, 43, 43, 43,
+ 43, 43, 42, 41, 41, 40, 40, 40,
+ 40, 35, 35, 35, 35, 33, 33, 33,
+ 33, 32, 32, 32, 27, 27, 26, 26,
+ 25, 25, 24, 24, 23, 23, 19, 19,
+ 19, 19, 18, 18, 17, 16, 16, 16,
+ 16, 16, 15, 11, 11, 11, 10, 10,
+ 9, 8, 7, 5, 3, 3, 2, 2,
+};
+
+static const uint8_t vp56_pre_def_mb_type_stats[16][3][10][2] = {
+ { { { 9, 15 }, { 32, 25 }, { 7, 19 }, { 9, 21 }, { 1, 12 },
+ { 14, 12 }, { 3, 18 }, { 14, 23 }, { 3, 10 }, { 0, 4 }, },
+ { { 41, 22 }, { 1, 0 }, { 1, 31 }, { 0, 0 }, { 0, 0 },
+ { 0, 1 }, { 1, 7 }, { 0, 1 }, { 98, 25 }, { 4, 10 }, },
+ { { 2, 3 }, { 2, 3 }, { 0, 2 }, { 0, 2 }, { 0, 0 },
+ { 11, 4 }, { 1, 4 }, { 0, 2 }, { 3, 2 }, { 0, 4 }, }, },
+ { { { 48, 39 }, { 1, 2 }, { 11, 27 }, { 29, 44 }, { 7, 27 },
+ { 1, 4 }, { 0, 3 }, { 1, 6 }, { 1, 2 }, { 0, 0 }, },
+ { { 123, 37 }, { 6, 4 }, { 1, 27 }, { 0, 0 }, { 0, 0 },
+ { 5, 8 }, { 1, 7 }, { 0, 1 }, { 12, 10 }, { 0, 2 }, },
+ { { 49, 46 }, { 3, 4 }, { 7, 31 }, { 42, 41 }, { 0, 0 },
+ { 2, 6 }, { 1, 7 }, { 1, 4 }, { 2, 4 }, { 0, 1 }, }, },
+ { { { 21, 32 }, { 1, 2 }, { 4, 10 }, { 32, 43 }, { 6, 23 },
+ { 2, 3 }, { 1, 19 }, { 1, 6 }, { 12, 21 }, { 0, 7 }, },
+ { { 26, 14 }, { 14, 12 }, { 0, 24 }, { 0, 0 }, { 0, 0 },
+ { 55, 17 }, { 1, 9 }, { 0, 36 }, { 5, 7 }, { 1, 3 }, },
+ { { 26, 25 }, { 1, 1 }, { 2, 10 }, { 67, 39 }, { 0, 0 },
+ { 1, 1 }, { 0, 14 }, { 0, 2 }, { 31, 26 }, { 1, 6 }, }, },
+ { { { 69, 83 }, { 0, 0 }, { 0, 2 }, { 10, 29 }, { 3, 12 },
+ { 0, 1 }, { 0, 3 }, { 0, 3 }, { 2, 2 }, { 0, 0 }, },
+ { { 209, 5 }, { 0, 0 }, { 0, 27 }, { 0, 0 }, { 0, 0 },
+ { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 0 }, { 0, 0 }, },
+ { { 103, 46 }, { 1, 2 }, { 2, 10 }, { 33, 42 }, { 0, 0 },
+ { 1, 4 }, { 0, 3 }, { 0, 1 }, { 1, 3 }, { 0, 0 }, }, },
+ { { { 11, 20 }, { 1, 4 }, { 18, 36 }, { 43, 48 }, { 13, 35 },
+ { 0, 2 }, { 0, 5 }, { 3, 12 }, { 1, 2 }, { 0, 0 }, },
+ { { 2, 5 }, { 4, 5 }, { 0, 121 }, { 0, 0 }, { 0, 0 },
+ { 0, 3 }, { 2, 4 }, { 1, 4 }, { 2, 2 }, { 0, 1 }, },
+ { { 14, 31 }, { 9, 13 }, { 14, 54 }, { 22, 29 }, { 0, 0 },
+ { 2, 6 }, { 4, 18 }, { 6, 13 }, { 1, 5 }, { 0, 1 }, }, },
+ { { { 70, 44 }, { 0, 1 }, { 2, 10 }, { 37, 46 }, { 8, 26 },
+ { 0, 2 }, { 0, 2 }, { 0, 2 }, { 0, 1 }, { 0, 0 }, },
+ { { 175, 5 }, { 0, 1 }, { 0, 48 }, { 0, 0 }, { 0, 0 },
+ { 0, 2 }, { 0, 1 }, { 0, 2 }, { 0, 1 }, { 0, 0 }, },
+ { { 85, 39 }, { 0, 0 }, { 1, 9 }, { 69, 40 }, { 0, 0 },
+ { 0, 1 }, { 0, 3 }, { 0, 1 }, { 2, 3 }, { 0, 0 }, }, },
+ { { { 8, 15 }, { 0, 1 }, { 8, 21 }, { 74, 53 }, { 22, 42 },
+ { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 2 }, { 0, 0 }, },
+ { { 83, 5 }, { 2, 3 }, { 0, 102 }, { 0, 0 }, { 0, 0 },
+ { 1, 3 }, { 0, 2 }, { 0, 1 }, { 0, 0 }, { 0, 0 }, },
+ { { 31, 28 }, { 0, 0 }, { 3, 14 }, { 130, 34 }, { 0, 0 },
+ { 0, 1 }, { 0, 3 }, { 0, 1 }, { 3, 3 }, { 0, 1 }, }, },
+ { { { 141, 42 }, { 0, 0 }, { 1, 4 }, { 11, 24 }, { 1, 11 },
+ { 0, 1 }, { 0, 1 }, { 0, 2 }, { 0, 0 }, { 0, 0 }, },
+ { { 233, 6 }, { 0, 0 }, { 0, 8 }, { 0, 0 }, { 0, 0 },
+ { 0, 1 }, { 0, 1 }, { 0, 0 }, { 0, 1 }, { 0, 0 }, },
+ { { 171, 25 }, { 0, 0 }, { 1, 5 }, { 25, 21 }, { 0, 0 },
+ { 0, 1 }, { 0, 1 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, }, },
+ { { { 8, 19 }, { 4, 10 }, { 24, 45 }, { 21, 37 }, { 9, 29 },
+ { 0, 3 }, { 1, 7 }, { 11, 25 }, { 0, 2 }, { 0, 1 }, },
+ { { 34, 16 }, { 112, 21 }, { 1, 28 }, { 0, 0 }, { 0, 0 },
+ { 6, 8 }, { 1, 7 }, { 0, 3 }, { 2, 5 }, { 0, 2 }, },
+ { { 17, 21 }, { 68, 29 }, { 6, 15 }, { 13, 22 }, { 0, 0 },
+ { 6, 12 }, { 3, 14 }, { 4, 10 }, { 1, 7 }, { 0, 3 }, }, },
+ { { { 46, 42 }, { 0, 1 }, { 2, 10 }, { 54, 51 }, { 10, 30 },
+ { 0, 2 }, { 0, 2 }, { 0, 1 }, { 0, 1 }, { 0, 0 }, },
+ { { 159, 35 }, { 2, 2 }, { 0, 25 }, { 0, 0 }, { 0, 0 },
+ { 3, 6 }, { 0, 5 }, { 0, 1 }, { 4, 4 }, { 0, 1 }, },
+ { { 51, 39 }, { 0, 1 }, { 2, 12 }, { 91, 44 }, { 0, 0 },
+ { 0, 2 }, { 0, 3 }, { 0, 1 }, { 2, 3 }, { 0, 1 }, }, },
+ { { { 28, 32 }, { 0, 0 }, { 3, 10 }, { 75, 51 }, { 14, 33 },
+ { 0, 1 }, { 0, 2 }, { 0, 1 }, { 1, 2 }, { 0, 0 }, },
+ { { 75, 39 }, { 5, 7 }, { 2, 48 }, { 0, 0 }, { 0, 0 },
+ { 3, 11 }, { 2, 16 }, { 1, 4 }, { 7, 10 }, { 0, 2 }, },
+ { { 81, 25 }, { 0, 0 }, { 2, 9 }, { 106, 26 }, { 0, 0 },
+ { 0, 1 }, { 0, 1 }, { 0, 1 }, { 1, 1 }, { 0, 0 }, }, },
+ { { { 100, 46 }, { 0, 1 }, { 3, 9 }, { 21, 37 }, { 5, 20 },
+ { 0, 1 }, { 0, 2 }, { 1, 2 }, { 0, 1 }, { 0, 0 }, },
+ { { 212, 21 }, { 0, 1 }, { 0, 9 }, { 0, 0 }, { 0, 0 },
+ { 1, 2 }, { 0, 2 }, { 0, 0 }, { 2, 2 }, { 0, 0 }, },
+ { { 140, 37 }, { 0, 1 }, { 1, 8 }, { 24, 33 }, { 0, 0 },
+ { 1, 2 }, { 0, 2 }, { 0, 1 }, { 1, 2 }, { 0, 0 }, }, },
+ { { { 27, 29 }, { 0, 1 }, { 9, 25 }, { 53, 51 }, { 12, 34 },
+ { 0, 1 }, { 0, 3 }, { 1, 5 }, { 0, 2 }, { 0, 0 }, },
+ { { 4, 2 }, { 0, 0 }, { 0, 172 }, { 0, 0 }, { 0, 0 },
+ { 0, 1 }, { 0, 2 }, { 0, 0 }, { 2, 0 }, { 0, 0 }, },
+ { { 14, 23 }, { 1, 3 }, { 11, 53 }, { 90, 31 }, { 0, 0 },
+ { 0, 3 }, { 1, 5 }, { 2, 6 }, { 1, 2 }, { 0, 0 }, }, },
+ { { { 80, 38 }, { 0, 0 }, { 1, 4 }, { 69, 33 }, { 5, 16 },
+ { 0, 1 }, { 0, 1 }, { 0, 0 }, { 0, 1 }, { 0, 0 }, },
+ { { 187, 22 }, { 1, 1 }, { 0, 17 }, { 0, 0 }, { 0, 0 },
+ { 3, 6 }, { 0, 4 }, { 0, 1 }, { 4, 4 }, { 0, 1 }, },
+ { { 123, 29 }, { 0, 0 }, { 1, 7 }, { 57, 30 }, { 0, 0 },
+ { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 0 }, }, },
+ { { { 16, 20 }, { 0, 0 }, { 2, 8 }, { 104, 49 }, { 15, 33 },
+ { 0, 1 }, { 0, 1 }, { 0, 1 }, { 1, 1 }, { 0, 0 }, },
+ { { 133, 6 }, { 1, 2 }, { 1, 70 }, { 0, 0 }, { 0, 0 },
+ { 0, 2 }, { 0, 4 }, { 0, 3 }, { 1, 1 }, { 0, 0 }, },
+ { { 13, 14 }, { 0, 0 }, { 4, 20 }, { 175, 20 }, { 0, 0 },
+ { 0, 1 }, { 0, 1 }, { 0, 1 }, { 1, 1 }, { 0, 0 }, }, },
+ { { { 194, 16 }, { 0, 0 }, { 1, 1 }, { 1, 9 }, { 1, 3 },
+ { 0, 0 }, { 0, 1 }, { 0, 1 }, { 0, 0 }, { 0, 0 }, },
+ { { 251, 1 }, { 0, 0 }, { 0, 2 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, },
+ { { 202, 23 }, { 0, 0 }, { 1, 3 }, { 2, 9 }, { 0, 0 },
+ { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 0 }, { 0, 0 }, }, },
+};
+
+static const uint8_t vp56_filter_threshold[] = {
+ 14, 14, 13, 13, 12, 12, 10, 10,
+ 10, 10, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 7, 7, 7, 7,
+ 7, 7, 6, 6, 6, 6, 6, 6,
+ 5, 5, 5, 5, 4, 4, 4, 4,
+ 4, 4, 4, 3, 3, 3, 3, 2,
+};
+
+static const uint8_t vp56_mb_type_model_model[] = {
+ 171, 83, 199, 140, 125, 104,
+};
+
+static const vp56_tree_t vp56_pmbtm_tree[] = {
+ { 4, 0},
+ { 2, 1}, {-8}, {-4},
+ { 8, 2},
+ { 6, 3},
+ { 4, 4},
+ { 2, 5}, {-24}, {-20}, {-16}, {-12}, {-0},
+};
+
+static const vp56_tree_t vp56_pmbt_tree[] = {
+ { 8, 1},
+ { 4, 2},
+ { 2, 4}, {-VP56_MB_INTER_NOVEC_PF}, {-VP56_MB_INTER_DELTA_PF},
+ { 2, 5}, {-VP56_MB_INTER_V1_PF}, {-VP56_MB_INTER_V2_PF},
+ { 4, 3},
+ { 2, 6}, {-VP56_MB_INTRA}, {-VP56_MB_INTER_4V},
+ { 4, 7},
+ { 2, 8}, {-VP56_MB_INTER_NOVEC_GF}, {-VP56_MB_INTER_DELTA_GF},
+ { 2, 9}, {-VP56_MB_INTER_V1_GF}, {-VP56_MB_INTER_V2_GF},
+};
+
+/* relative pos of surrounding blocks, from closest to farthest */
+static const int8_t vp56_candidate_predictor_pos[12][2] = {
+ { 0, -1 },
+ { -1, 0 },
+ { -1, -1 },
+ { 1, -1 },
+ { 0, -2 },
+ { -2, 0 },
+ { -2, -1 },
+ { -1, -2 },
+ { 1, -2 },
+ { 2, -1 },
+ { -2, -2 },
+ { 2, -2 },
+};
+
+#endif /* VP56DATA */
diff --git a/src/libffmpeg/libavcodec/vp5data.h b/src/libffmpeg/libavcodec/vp5data.h
new file mode 100644
index 000000000..effc17c2c
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp5data.h
@@ -0,0 +1,173 @@
+/**
+ * @file vp5data.h
+ * VP5 compatible video decoder
+ *
+ * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef VP5DATA_H
+#define VP5DATA_H
+
+static const uint8_t vp5_coeff_groups[] = {
+ -1, 0, 1, 1, 2, 1, 1, 2,
+ 2, 1, 1, 2, 2, 2, 1, 2,
+ 2, 2, 2, 2, 1, 1, 2, 2,
+ 3, 3, 4, 3, 4, 4, 4, 3,
+ 3, 3, 3, 3, 4, 3, 3, 3,
+ 4, 4, 4, 4, 4, 3, 3, 4,
+ 4, 4, 3, 4, 4, 4, 4, 4,
+ 4, 4, 5, 5, 5, 5, 5, 5,
+};
+
+static const uint8_t vp5_vmc_pct[2][11] = {
+ { 243, 220, 251, 253, 237, 232, 241, 245, 247, 251, 253 },
+ { 235, 211, 246, 249, 234, 231, 248, 249, 252, 252, 254 },
+};
+
+static const uint8_t vp5_dccv_pct[2][11] = {
+ { 146, 197, 181, 207, 232, 243, 238, 251, 244, 250, 249 },
+ { 179, 219, 214, 240, 250, 254, 244, 254, 254, 254, 254 },
+};
+
+static const uint8_t vp5_ract_pct[3][2][6][11] = {
+ { { { 227, 246, 230, 247, 244, 254, 254, 254, 254, 254, 254 },
+ { 202, 254, 209, 231, 231, 249, 249, 253, 254, 254, 254 },
+ { 206, 254, 225, 242, 241, 251, 253, 254, 254, 254, 254 },
+ { 235, 254, 241, 253, 252, 254, 254, 254, 254, 254, 254 },
+ { 234, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } },
+ { { 240, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 238, 254, 240, 253, 254, 254, 254, 254, 254, 254, 254 },
+ { 244, 254, 251, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } } },
+ { { { 206, 203, 227, 239, 247, 254, 253, 254, 254, 254, 254 },
+ { 207, 199, 220, 236, 243, 252, 252, 254, 254, 254, 254 },
+ { 212, 219, 230, 243, 244, 253, 252, 254, 254, 254, 254 },
+ { 236, 237, 247, 252, 253, 254, 254, 254, 254, 254, 254 },
+ { 240, 240, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } },
+ { { 230, 233, 249, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 238, 238, 250, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 248, 251, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } } },
+ { { { 225, 239, 227, 231, 244, 253, 243, 254, 254, 253, 254 },
+ { 232, 234, 224, 228, 242, 249, 242, 252, 251, 251, 254 },
+ { 235, 249, 238, 240, 251, 254, 249, 254, 253, 253, 254 },
+ { 249, 253, 251, 250, 254, 254, 254, 254, 254, 254, 254 },
+ { 251, 250, 249, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } },
+ { { 243, 244, 250, 250, 254, 254, 254, 254, 254, 254, 254 },
+ { 249, 248, 250, 253, 254, 254, 254, 254, 254, 254, 254 },
+ { 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } } },
+};
+
+static const int16_t vp5_dccv_lc[5][36][2] = {
+ { {154, 61}, {141, 54}, { 90, 45}, { 54, 34}, { 54, 13}, {128, 109},
+ {136, 54}, {148, 45}, { 92, 41}, { 54, 33}, { 51, 15}, { 87, 113},
+ { 87, 44}, { 97, 40}, { 67, 36}, { 46, 29}, { 41, 15}, { 64, 80},
+ { 59, 33}, { 61, 31}, { 51, 28}, { 44, 22}, { 33, 12}, { 49, 63},
+ { 69, 12}, { 59, 16}, { 46, 14}, { 31, 13}, { 26, 6}, { 92, 26},
+ {128, 108}, { 77, 119}, { 54, 84}, { 26, 71}, { 87, 19}, { 95, 155} },
+ { {154, 4}, {182, 0}, {159, -8}, {128, -5}, {143, -5}, {187, 55},
+ {182, 0}, {228, -3}, {187, -7}, {174, -9}, {189, -11}, {169, 79},
+ {161, -9}, {192, -8}, {187, -9}, {169, -10}, {136, -9}, {184, 40},
+ {164, -11}, {179, -10}, {174, -10}, {161, -10}, {115, -7}, {197, 20},
+ {195, -11}, {195, -11}, {146, -10}, {110, -6}, { 95, -4}, {195, 39},
+ {182, 55}, {172, 77}, {177, 37}, {169, 29}, {172, 52}, { 92, 162} },
+ { {174, 80}, {164, 80}, { 95, 80}, { 46, 66}, { 56, 24}, { 36, 193},
+ {164, 80}, {166, 77}, {105, 76}, { 49, 68}, { 46, 31}, { 49, 186},
+ { 97, 78}, {110, 74}, { 72, 72}, { 44, 60}, { 33, 30}, { 69, 131},
+ { 61, 61}, { 69, 63}, { 51, 57}, { 31, 48}, { 26, 27}, { 64, 89},
+ { 67, 23}, { 51, 32}, { 36, 33}, { 26, 28}, { 20, 12}, { 44, 68},
+ { 26, 197}, { 41, 189}, { 61, 129}, { 28, 103}, { 49, 52}, {-12, 245} },
+ { {102, 141}, { 79, 166}, { 72, 162}, { 97, 125}, {179, 4}, {307, 0},
+ { 72, 168}, { 69, 175}, { 84, 160}, {105, 127}, {148, 34}, {310, 0},
+ { 84, 151}, { 82, 161}, { 87, 153}, { 87, 135}, {115, 51}, {317, 0},
+ { 97, 125}, {102, 131}, {105, 125}, { 87, 122}, { 84, 64}, { 54, 184},
+ {166, 18}, {146, 43}, {125, 51}, { 90, 64}, { 95, 7}, { 38, 154},
+ {294, 0}, { 13, 225}, { 10, 225}, { 67, 168}, { 0, 167}, {161, 94} },
+ { {172, 76}, {172, 75}, {136, 80}, { 64, 98}, { 74, 67}, {315, 0},
+ {169, 76}, {207, 56}, {164, 66}, { 97, 80}, { 67, 72}, {328, 0},
+ {136, 80}, {187, 53}, {154, 62}, { 72, 85}, { -2, 105}, {305, 0},
+ { 74, 91}, {128, 64}, {113, 64}, { 61, 77}, { 41, 75}, {259, 0},
+ { 46, 84}, { 51, 81}, { 28, 89}, { 31, 78}, { 23, 77}, {202, 0},
+ {323, 0}, {323, 0}, {300, 0}, {236, 0}, {195, 0}, {328, 0} },
+};
+
+static const int16_t vp5_ract_lc[3][3][5][6][2] = {
+ { { { {276, 0}, {238, 0}, {195, 0}, {156, 0}, {113, 0}, {274, 0} },
+ { { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1} },
+ { {192, 59}, {182, 50}, {141, 48}, {110, 40}, { 92, 19}, {125,128} },
+ { {169, 87}, {169, 83}, {184, 62}, {220, 16}, {184, 0}, {264, 0} },
+ { {212, 40}, {212, 36}, {169, 49}, {174, 27}, { 8,120}, {182, 71} } },
+ { { {259, 10}, {197, 19}, {143, 22}, {123, 16}, {110, 8}, {133, 88} },
+ { { 0, 1}, {256, 0}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1} },
+ { {207, 46}, {187, 50}, { 97, 83}, { 23,100}, { 41, 56}, { 56,188} },
+ { {166, 90}, {146,108}, {161, 88}, {136, 95}, {174, 0}, {266, 0} },
+ { {264, 7}, {243, 18}, {184, 43}, {-14,154}, { 20,112}, { 20,199} } },
+ { { {230, 26}, {197, 22}, {159, 20}, {146, 12}, {136, 4}, { 54,162} },
+ { { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1} },
+ { {192, 59}, {156, 72}, { 84,101}, { 49,101}, { 79, 47}, { 79,167} },
+ { {138,115}, {136,116}, {166, 80}, {238, 0}, {195, 0}, {261, 0} },
+ { {225, 33}, {205, 42}, {159, 61}, { 79, 96}, { 92, 66}, { 28,195} } },
+ }, {
+ { { {200, 37}, {197, 18}, {159, 13}, {143, 7}, {102, 5}, {123,126} },
+ { {197, 3}, {220, -9}, {210,-12}, {187, -6}, {151, -2}, {174, 80} },
+ { {200, 53}, {187, 47}, {159, 40}, {118, 38}, {100, 18}, {141,111} },
+ { {179, 78}, {166, 86}, {197, 50}, {207, 27}, {187, 0}, {115,139} },
+ { {218, 34}, {220, 29}, {174, 46}, {128, 61}, { 54, 89}, {187, 65} } },
+ { { {238, 14}, {197, 18}, {125, 26}, { 90, 25}, { 82, 13}, {161, 86} },
+ { {189, 1}, {205, -2}, {156, -4}, {143, -4}, {146, -4}, {172, 72} },
+ { {230, 31}, {192, 45}, {102, 76}, { 38, 85}, { 56, 41}, { 64,173} },
+ { {166, 91}, {141,111}, {128,116}, {118,109}, {177, 0}, { 23,222} },
+ { {253, 14}, {236, 21}, {174, 49}, { 33,118}, { 44, 93}, { 23,187} } },
+ { { {218, 28}, {179, 28}, {118, 35}, { 95, 30}, { 72, 24}, {128,108} },
+ { {187, 1}, {174, -1}, {125, -1}, {110, -1}, {108, -1}, {202, 52} },
+ { {197, 53}, {146, 75}, { 46,118}, { 33,103}, { 64, 50}, {118,126} },
+ { {138,114}, {128,122}, {161, 86}, {243, -6}, {195, 0}, { 38,210} },
+ { {215, 39}, {179, 58}, { 97,101}, { 95, 85}, { 87, 70}, { 69,152} } },
+ }, {
+ { { {236, 24}, {205, 18}, {172, 12}, {154, 6}, {125, 1}, {169, 75} },
+ { {187, 4}, {230, -2}, {228, -4}, {236, -4}, {241, -2}, {192, 66} },
+ { {200, 46}, {187, 42}, {159, 34}, {136, 25}, {105, 10}, {179, 62} },
+ { {207, 55}, {192, 63}, {192, 54}, {195, 36}, {177, 1}, {143, 98} },
+ { {225, 27}, {207, 34}, {200, 30}, {131, 57}, { 97, 60}, {197, 45} } },
+ { { {271, 8}, {218, 13}, {133, 19}, { 90, 19}, { 72, 7}, {182, 51} },
+ { {179, 1}, {225, -1}, {154, -2}, {110, -1}, { 92, 0}, {195, 41} },
+ { {241, 26}, {189, 40}, { 82, 64}, { 33, 60}, { 67, 17}, {120, 94} },
+ { {192, 68}, {151, 94}, {146, 90}, {143, 72}, {161, 0}, {113,128} },
+ { {256, 12}, {218, 29}, {166, 48}, { 44, 99}, { 31, 87}, {148, 78} } },
+ { { {238, 20}, {184, 22}, {113, 27}, { 90, 22}, { 74, 9}, {192, 37} },
+ { {184, 0}, {215, -1}, {141, -1}, { 97, 0}, { 49, 0}, {264, 13} },
+ { {182, 51}, {138, 61}, { 95, 63}, { 54, 59}, { 64, 25}, {200, 45} },
+ { {179, 75}, {156, 87}, {174, 65}, {177, 44}, {174, 0}, {164, 85} },
+ { {195, 45}, {148, 65}, {105, 79}, { 95, 72}, { 87, 60}, {169, 63} } },
+ }
+};
+
+static const uint8_t vp5_coord_div[] = { 2, 2, 2, 2, 4, 4 };
+
+#endif /* VP5DATA_H */
diff --git a/src/libffmpeg/libavcodec/vp6.c b/src/libffmpeg/libavcodec/vp6.c
new file mode 100644
index 000000000..381fcc8ee
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp6.c
@@ -0,0 +1,537 @@
+/**
+ * @file vp6.c
+ * VP6 compatible video decoder
+ *
+ * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *
+ * The VP6F decoder accept an optional 1 byte extradata. It is composed of:
+ * - upper 4bits: difference between encoded width and visible width
+ * - lower 4bits: difference between encoded height and visible height
+ */
+
+#include <stdlib.h>
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "bitstream.h"
+#include "mpegvideo.h"
+
+#include "vp56.h"
+#include "vp56data.h"
+#include "vp6data.h"
+
+
+static int vp6_parse_header(vp56_context_t *s, uint8_t *buf, int buf_size,
+ int *golden_frame)
+{
+ vp56_range_coder_t *c = &s->c;
+ int parse_filter_info = 0;
+ int vrt_shift = 0;
+ int sub_version;
+ int rows, cols;
+ int res = 1;
+
+ if (buf[0] & 1)
+ return 0;
+
+ s->frames[VP56_FRAME_CURRENT].key_frame = !(buf[0] & 0x80);
+ vp56_init_dequant(s, (buf[0] >> 1) & 0x3F);
+
+ if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+ sub_version = buf[1] >> 3;
+ if (sub_version > 8)
+ return 0;
+ if ((buf[1] & 0x06) != 0x06)
+ return 0;
+ if (buf[1] & 1) {
+ av_log(s->avctx, AV_LOG_ERROR, "interlacing not supported\n");
+ return 0;
+ }
+
+ rows = buf[2]; /* number of stored macroblock rows */
+ cols = buf[3]; /* number of stored macroblock cols */
+ /* buf[4] is number of displayed macroblock rows */
+ /* buf[5] is number of displayed macroblock cols */
+
+ if (16*cols != s->avctx->coded_width ||
+ 16*rows != s->avctx->coded_height) {
+ avcodec_set_dimensions(s->avctx, 16*cols, 16*rows);
+ if (s->avctx->extradata_size == 1) {
+ s->avctx->width -= s->avctx->extradata[0] >> 4;
+ s->avctx->height -= s->avctx->extradata[0] & 0x0F;
+ }
+ res = 2;
+ }
+
+ vp56_init_range_decoder(c, buf+6, buf_size-6);
+ vp56_rac_gets(c, 2);
+
+ parse_filter_info = 1;
+ if (sub_version < 8)
+ vrt_shift = 5;
+ s->sub_version = sub_version;
+ } else {
+ if (!s->sub_version)
+ return 0;
+
+ vp56_init_range_decoder(c, buf+1, buf_size-1);
+
+ *golden_frame = vp56_rac_get(c);
+ s->deblock_filtering = vp56_rac_get(c);
+ if (s->deblock_filtering)
+ vp56_rac_get(c);
+ if (s->sub_version > 7)
+ parse_filter_info = vp56_rac_get(c);
+ }
+
+ if (parse_filter_info) {
+ if (vp56_rac_get(c)) {
+ s->filter_mode = 2;
+ s->sample_variance_threshold = vp56_rac_gets(c, 5) << vrt_shift;
+ s->max_vector_length = 2 << vp56_rac_gets(c, 3);
+ } else if (vp56_rac_get(c)) {
+ s->filter_mode = 1;
+ } else {
+ s->filter_mode = 0;
+ }
+ if (s->sub_version > 7)
+ s->filter_selection = vp56_rac_gets(c, 4);
+ else
+ s->filter_selection = 16;
+ }
+
+ vp56_rac_get(c);
+ return res;
+}
+
+static void vp6_coeff_order_table_init(vp56_context_t *s)
+{
+ int i, pos, idx = 1;
+
+ s->coeff_index_to_pos[0] = 0;
+ for (i=0; i<16; i++)
+ for (pos=1; pos<64; pos++)
+ if (s->coeff_reorder[pos] == i)
+ s->coeff_index_to_pos[idx++] = pos;
+}
+
+static void vp6_default_models_init(vp56_context_t *s)
+{
+ s->vector_model_dct[0] = 0xA2;
+ s->vector_model_dct[1] = 0xA4;
+ s->vector_model_sig[0] = 0x80;
+ s->vector_model_sig[1] = 0x80;
+
+ memcpy(s->mb_types_stats, vp56_def_mb_types_stats, sizeof(s->mb_types_stats));
+ memcpy(s->vector_model_fdv, vp6_def_fdv_vector_model, sizeof(s->vector_model_fdv));
+ memcpy(s->vector_model_pdv, vp6_def_pdv_vector_model, sizeof(s->vector_model_pdv));
+ memcpy(s->coeff_model_runv, vp6_def_runv_coeff_model, sizeof(s->coeff_model_runv));
+ memcpy(s->coeff_reorder, vp6_def_coeff_reorder, sizeof(s->coeff_reorder));
+
+ vp6_coeff_order_table_init(s);
+}
+
+static void vp6_parse_vector_models(vp56_context_t *s)
+{
+ vp56_range_coder_t *c = &s->c;
+ int comp, node;
+
+ for (comp=0; comp<2; comp++) {
+ if (vp56_rac_get_prob(c, vp6_sig_dct_pct[comp][0]))
+ s->vector_model_dct[comp] = vp56_rac_gets_nn(c, 7);
+ if (vp56_rac_get_prob(c, vp6_sig_dct_pct[comp][1]))
+ s->vector_model_sig[comp] = vp56_rac_gets_nn(c, 7);
+ }
+
+ for (comp=0; comp<2; comp++)
+ for (node=0; node<7; node++)
+ if (vp56_rac_get_prob(c, vp6_pdv_pct[comp][node]))
+ s->vector_model_pdv[comp][node] = vp56_rac_gets_nn(c, 7);
+
+ for (comp=0; comp<2; comp++)
+ for (node=0; node<8; node++)
+ if (vp56_rac_get_prob(c, vp6_fdv_pct[comp][node]))
+ s->vector_model_fdv[comp][node] = vp56_rac_gets_nn(c, 7);
+}
+
+static void vp6_parse_coeff_models(vp56_context_t *s)
+{
+ vp56_range_coder_t *c = &s->c;
+ int def_prob[11];
+ int node, cg, ctx, pos;
+ int ct; /* code type */
+ int pt; /* plane type (0 for Y, 1 for U or V) */
+
+ memset(def_prob, 0x80, sizeof(def_prob));
+
+ for (pt=0; pt<2; pt++)
+ for (node=0; node<11; node++)
+ if (vp56_rac_get_prob(c, vp6_dccv_pct[pt][node])) {
+ def_prob[node] = vp56_rac_gets_nn(c, 7);
+ s->coeff_model_dccv[pt][node] = def_prob[node];
+ } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+ s->coeff_model_dccv[pt][node] = def_prob[node];
+ }
+
+ if (vp56_rac_get(c)) {
+ for (pos=1; pos<64; pos++)
+ if (vp56_rac_get_prob(c, vp6_coeff_reorder_pct[pos]))
+ s->coeff_reorder[pos] = vp56_rac_gets(c, 4);
+ vp6_coeff_order_table_init(s);
+ }
+
+ for (cg=0; cg<2; cg++)
+ for (node=0; node<14; node++)
+ if (vp56_rac_get_prob(c, vp6_runv_pct[cg][node]))
+ s->coeff_model_runv[cg][node] = vp56_rac_gets_nn(c, 7);
+
+ for (ct=0; ct<3; ct++)
+ for (pt=0; pt<2; pt++)
+ for (cg=0; cg<6; cg++)
+ for (node=0; node<11; node++)
+ if (vp56_rac_get_prob(c, vp6_ract_pct[ct][pt][cg][node])) {
+ def_prob[node] = vp56_rac_gets_nn(c, 7);
+ s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+ } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+ s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+ }
+
+ /* coeff_model_dcct is a linear combination of coeff_model_dccv */
+ for (pt=0; pt<2; pt++)
+ for (ctx=0; ctx<3; ctx++)
+ for (node=0; node<5; node++)
+ s->coeff_model_dcct[pt][ctx][node] = clip(((s->coeff_model_dccv[pt][node] * vp6_dccv_lc[ctx][node][0] + 128) >> 8) + vp6_dccv_lc[ctx][node][1], 1, 255);
+}
+
+static void vp6_parse_vector_adjustment(vp56_context_t *s, vp56_mv_t *vect)
+{
+ vp56_range_coder_t *c = &s->c;
+ int comp;
+
+ *vect = (vp56_mv_t) {0,0};
+ if (s->vector_candidate_pos < 2)
+ *vect = s->vector_candidate[0];
+
+ for (comp=0; comp<2; comp++) {
+ int i, delta = 0;
+
+ if (vp56_rac_get_prob(c, s->vector_model_dct[comp])) {
+ static const uint8_t prob_order[] = {0, 1, 2, 7, 6, 5, 4};
+ for (i=0; i<sizeof(prob_order); i++) {
+ int j = prob_order[i];
+ delta |= vp56_rac_get_prob(c, s->vector_model_fdv[comp][j])<<j;
+ }
+ if (delta & 0xF0)
+ delta |= vp56_rac_get_prob(c, s->vector_model_fdv[comp][3])<<3;
+ else
+ delta |= 8;
+ } else {
+ delta = vp56_rac_get_tree(c, vp56_pva_tree,
+ s->vector_model_pdv[comp]);
+ }
+
+ if (delta && vp56_rac_get_prob(c, s->vector_model_sig[comp]))
+ delta = -delta;
+
+ if (!comp)
+ vect->x += delta;
+ else
+ vect->y += delta;
+ }
+}
+
+static void vp6_parse_coeff(vp56_context_t *s)
+{
+ vp56_range_coder_t *c = &s->c;
+ uint8_t *permute = s->scantable.permutated;
+ uint8_t *model, *model2, *model3;
+ int coeff, sign, coeff_idx;
+ int b, i, cg, idx, ctx;
+ int pt = 0; /* plane type (0 for Y, 1 for U or V) */
+
+ for (b=0; b<6; b++) {
+ int ct = 1; /* code type */
+ int run = 1;
+
+ if (b > 3) pt = 1;
+
+ ctx = s->left_block[vp56_b6to4[b]].not_null_dc
+ + s->above_blocks[s->above_block_idx[b]].not_null_dc;
+ model = s->coeff_model_dccv[pt];
+ model2 = s->coeff_model_dcct[pt][ctx];
+
+ for (coeff_idx=0; coeff_idx<64; ) {
+ if ((coeff_idx>1 && ct==0) || vp56_rac_get_prob(c, model2[0])) {
+ /* parse a coeff */
+ if (coeff_idx == 0) {
+ s->left_block[vp56_b6to4[b]].not_null_dc = 1;
+ s->above_blocks[s->above_block_idx[b]].not_null_dc = 1;
+ }
+
+ if (vp56_rac_get_prob(c, model2[2])) {
+ if (vp56_rac_get_prob(c, model2[3])) {
+ idx = vp56_rac_get_tree(c, vp56_pc_tree, model);
+ coeff = vp56_coeff_bias[idx];
+ for (i=vp56_coeff_bit_length[idx]; i>=0; i--)
+ coeff += vp56_rac_get_prob(c, vp56_coeff_parse_table[idx][i]) << i;
+ } else {
+ if (vp56_rac_get_prob(c, model2[4]))
+ coeff = 3 + vp56_rac_get_prob(c, model[5]);
+ else
+ coeff = 2;
+ }
+ ct = 2;
+ } else {
+ ct = 1;
+ coeff = 1;
+ }
+ sign = vp56_rac_get(c);
+ coeff = (coeff ^ -sign) + sign;
+ if (coeff_idx)
+ coeff *= s->dequant_ac;
+ idx = s->coeff_index_to_pos[coeff_idx];
+ s->block_coeff[b][permute[idx]] = coeff;
+ run = 1;
+ } else {
+ /* parse a run */
+ ct = 0;
+ if (coeff_idx == 0) {
+ s->left_block[vp56_b6to4[b]].not_null_dc = 0;
+ s->above_blocks[s->above_block_idx[b]].not_null_dc = 0;
+ } else {
+ if (!vp56_rac_get_prob(c, model2[1]))
+ break;
+
+ model3 = s->coeff_model_runv[coeff_idx >= 6];
+ run = vp56_rac_get_tree(c, vp6_pcr_tree, model3);
+ if (!run)
+ for (run=9, i=0; i<6; i++)
+ run += vp56_rac_get_prob(c, model3[i+8]) << i;
+ }
+ }
+
+ cg = vp6_coeff_groups[coeff_idx+=run];
+ model = model2 = s->coeff_model_ract[pt][ct][cg];
+ }
+ }
+}
+
+static int vp6_adjust(int v, int t)
+{
+ int V = v, s = v >> 31;
+ V ^= s;
+ V -= s;
+ if (V-t-1 >= (unsigned)(t-1))
+ return v;
+ V = 2*t - V;
+ V += s;
+ V ^= s;
+ return V;
+}
+
+static int vp6_block_variance(uint8_t *src, int stride)
+{
+ int sum = 0, square_sum = 0;
+ int y, x;
+
+ for (y=0; y<8; y+=2) {
+ for (x=0; x<8; x+=2) {
+ sum += src[x];
+ square_sum += src[x]*src[x];
+ }
+ src += 2*stride;
+ }
+ return (16*square_sum - sum*sum) >> 8;
+}
+
+static void vp6_filter_hv2(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+ int stride, int delta, int16_t weight)
+{
+ s->dsp.put_pixels_tab[1][0](dst, src, stride, 8);
+ s->dsp.biweight_h264_pixels_tab[3](dst, src+delta, stride, 2,
+ 8-weight, weight, 0);
+}
+
+static void vp6_filter_hv4(uint8_t *dst, uint8_t *src, int stride,
+ int delta, const int16_t *weights)
+{
+ int x, y;
+
+ for (y=0; y<8; y++) {
+ for (x=0; x<8; x++) {
+ dst[x] = clip_uint8(( src[x-delta ] * weights[0]
+ + src[x ] * weights[1]
+ + src[x+delta ] * weights[2]
+ + src[x+2*delta] * weights[3] + 64) >> 7);
+ }
+ src += stride;
+ dst += stride;
+ }
+}
+
+static void vp6_filter_diag2(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+ int stride, int h_weight, int v_weight)
+{
+ uint8_t *tmp = s->edge_emu_buffer+16;
+ int x, xmax;
+
+ s->dsp.put_pixels_tab[1][0](tmp, src, stride, 8);
+ s->dsp.biweight_h264_pixels_tab[3](tmp, src+1, stride, 2,
+ 8-h_weight, h_weight, 0);
+ /* we need a 8x9 block to do vertical filter, so compute one more line */
+ for (x=8*stride, xmax=x+8; x<xmax; x++)
+ tmp[x] = (src[x]*(8-h_weight) + src[x+1]*h_weight + 4) >> 3;
+
+ s->dsp.put_pixels_tab[1][0](dst, tmp, stride, 8);
+ s->dsp.biweight_h264_pixels_tab[3](dst, tmp+stride, stride, 2,
+ 8-v_weight, v_weight, 0);
+}
+
+static void vp6_filter_diag4(uint8_t *dst, uint8_t *src, int stride,
+ const int16_t *h_weights,const int16_t *v_weights)
+{
+ int x, y;
+ int tmp[8*11];
+ int *t = tmp;
+
+ src -= stride;
+
+ for (y=0; y<11; y++) {
+ for (x=0; x<8; x++) {
+ t[x] = clip_uint8(( src[x-1] * h_weights[0]
+ + src[x ] * h_weights[1]
+ + src[x+1] * h_weights[2]
+ + src[x+2] * h_weights[3] + 64) >> 7);
+ }
+ src += stride;
+ t += 8;
+ }
+
+ t = tmp + 8;
+ for (y=0; y<8; y++) {
+ for (x=0; x<8; x++) {
+ dst[x] = clip_uint8(( t[x-8 ] * v_weights[0]
+ + t[x ] * v_weights[1]
+ + t[x+8 ] * v_weights[2]
+ + t[x+16] * v_weights[3] + 64) >> 7);
+ }
+ dst += stride;
+ t += 8;
+ }
+}
+
+static void vp6_filter(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+ int offset1, int offset2, int stride,
+ vp56_mv_t mv, int mask, int select, int luma)
+{
+ int filter4 = 0;
+ int x8 = mv.x & mask;
+ int y8 = mv.y & mask;
+
+ if (luma) {
+ x8 *= 2;
+ y8 *= 2;
+ filter4 = s->filter_mode;
+ if (filter4 == 2) {
+ if (s->max_vector_length &&
+ (FFABS(mv.x) > s->max_vector_length ||
+ FFABS(mv.y) > s->max_vector_length)) {
+ filter4 = 0;
+ } else if (s->sample_variance_threshold
+ && (vp6_block_variance(src+offset1, stride)
+ < s->sample_variance_threshold)) {
+ filter4 = 0;
+ }
+ }
+ }
+
+ if ((y8 && (offset2-offset1)*s->flip<0) || (!y8 && offset1 > offset2)) {
+ offset1 = offset2;
+ }
+
+ if (filter4) {
+ if (!y8) { /* left or right combine */
+ vp6_filter_hv4(dst, src+offset1, stride, 1,
+ vp6_block_copy_filter[select][x8]);
+ } else if (!x8) { /* above or below combine */
+ vp6_filter_hv4(dst, src+offset1, stride, stride,
+ vp6_block_copy_filter[select][y8]);
+ } else if ((mv.x^mv.y) >> 31) { /* lower-left or upper-right combine */
+ vp6_filter_diag4(dst, src+offset1-1, stride,
+ vp6_block_copy_filter[select][x8],
+ vp6_block_copy_filter[select][y8]);
+ } else { /* lower-right or upper-left combine */
+ vp6_filter_diag4(dst, src+offset1, stride,
+ vp6_block_copy_filter[select][x8],
+ vp6_block_copy_filter[select][y8]);
+ }
+ } else {
+ if (!y8) { /* left or right combine */
+ vp6_filter_hv2(s, dst, src+offset1, stride, 1, x8);
+ } else if (!x8) { /* above or below combine */
+ vp6_filter_hv2(s, dst, src+offset1, stride, stride, y8);
+ } else if ((mv.x^mv.y) >> 31) { /* lower-left or upper-right combine */
+ vp6_filter_diag2(s, dst, src+offset1-1, stride, x8, y8);
+ } else { /* lower-right or upper-left combine */
+ vp6_filter_diag2(s, dst, src+offset1, stride, x8, y8);
+ }
+ }
+}
+
+static int vp6_decode_init(AVCodecContext *avctx)
+{
+ vp56_context_t *s = avctx->priv_data;
+
+ vp56_init(s, avctx, avctx->codec->id == CODEC_ID_VP6);
+ s->vp56_coord_div = vp6_coord_div;
+ s->parse_vector_adjustment = vp6_parse_vector_adjustment;
+ s->adjust = vp6_adjust;
+ s->filter = vp6_filter;
+ s->parse_coeff = vp6_parse_coeff;
+ s->default_models_init = vp6_default_models_init;
+ s->parse_vector_models = vp6_parse_vector_models;
+ s->parse_coeff_models = vp6_parse_coeff_models;
+ s->parse_header = vp6_parse_header;
+
+ return 0;
+}
+
+AVCodec vp6_decoder = {
+ "vp6",
+ CODEC_TYPE_VIDEO,
+ CODEC_ID_VP6,
+ sizeof(vp56_context_t),
+ vp6_decode_init,
+ NULL,
+ vp56_free,
+ vp56_decode_frame,
+};
+
+/* flash version, not flipped upside-down */
+AVCodec vp6f_decoder = {
+ "vp6f",
+ CODEC_TYPE_VIDEO,
+ CODEC_ID_VP6F,
+ sizeof(vp56_context_t),
+ vp6_decode_init,
+ NULL,
+ vp56_free,
+ vp56_decode_frame,
+};
diff --git a/src/libffmpeg/libavcodec/vp6data.h b/src/libffmpeg/libavcodec/vp6data.h
new file mode 100644
index 000000000..0545a9d66
--- /dev/null
+++ b/src/libffmpeg/libavcodec/vp6data.h
@@ -0,0 +1,300 @@
+/**
+ * @file vp6data.h
+ * VP6 compatible video decoder
+ *
+ * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef VP6DATA_H
+#define VP6DATA_H
+
+#include "vp56data.h"
+
+static const uint8_t vp6_def_fdv_vector_model[2][8] = {
+ { 247, 210, 135, 68, 138, 220, 239, 246 },
+ { 244, 184, 201, 44, 173, 221, 239, 253 },
+};
+
+static const uint8_t vp6_def_pdv_vector_model[2][7] = {
+ { 225, 146, 172, 147, 214, 39, 156 },
+ { 204, 170, 119, 235, 140, 230, 228 },
+};
+
+static const uint8_t vp6_def_coeff_reorder[] = {
+ 0, 0, 1, 1, 1, 2, 2, 2,
+ 2, 2, 2, 3, 3, 4, 4, 4,
+ 5, 5, 5, 5, 6, 6, 7, 7,
+ 7, 7, 7, 8, 8, 9, 9, 9,
+ 9, 9, 9, 10, 10, 11, 11, 11,
+ 11, 11, 11, 12, 12, 12, 12, 12,
+ 12, 13, 13, 13, 13, 13, 14, 14,
+ 14, 14, 15, 15, 15, 15, 15, 15,
+};
+
+static const uint8_t vp6_def_runv_coeff_model[2][14] = {
+ { 198, 197, 196, 146, 198, 204, 169, 142, 130, 136, 149, 149, 191, 249 },
+ { 135, 201, 181, 154, 98, 117, 132, 126, 146, 169, 184, 240, 246, 254 },
+};
+
+static const uint8_t vp6_sig_dct_pct[2][2] = {
+ { 237, 246 },
+ { 231, 243 },
+};
+
+static const uint8_t vp6_pdv_pct[2][7] = {
+ { 253, 253, 254, 254, 254, 254, 254 },
+ { 245, 253, 254, 254, 254, 254, 254 },
+};
+
+static const uint8_t vp6_fdv_pct[2][8] = {
+ { 254, 254, 254, 254, 254, 250, 250, 252 },
+ { 254, 254, 254, 254, 254, 251, 251, 254 },
+};
+
+static const uint8_t vp6_dccv_pct[2][11] = {
+ { 146, 255, 181, 207, 232, 243, 238, 251, 244, 250, 249 },
+ { 179, 255, 214, 240, 250, 255, 244, 255, 255, 255, 255 },
+};
+
+static const uint8_t vp6_coeff_reorder_pct[] = {
+ 255, 132, 132, 159, 153, 151, 161, 170,
+ 164, 162, 136, 110, 103, 114, 129, 118,
+ 124, 125, 132, 136, 114, 110, 142, 135,
+ 134, 123, 143, 126, 153, 183, 166, 161,
+ 171, 180, 179, 164, 203, 218, 225, 217,
+ 215, 206, 203, 217, 229, 241, 248, 243,
+ 253, 255, 253, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255,
+};
+
+static const uint8_t vp6_runv_pct[2][14] = {
+ { 219, 246, 238, 249, 232, 239, 249, 255, 248, 253, 239, 244, 241, 248 },
+ { 198, 232, 251, 253, 219, 241, 253, 255, 248, 249, 244, 238, 251, 255 },
+};
+
+static const uint8_t vp6_ract_pct[3][2][6][11] = {
+ { { { 227, 246, 230, 247, 244, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 209, 231, 231, 249, 249, 253, 255, 255, 255 },
+ { 255, 255, 225, 242, 241, 251, 253, 255, 255, 255, 255 },
+ { 255, 255, 241, 253, 252, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } },
+ { { 240, 255, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 240, 253, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } } },
+ { { { 206, 203, 227, 239, 247, 255, 253, 255, 255, 255, 255 },
+ { 207, 199, 220, 236, 243, 252, 252, 255, 255, 255, 255 },
+ { 212, 219, 230, 243, 244, 253, 252, 255, 255, 255, 255 },
+ { 236, 237, 247, 252, 253, 255, 255, 255, 255, 255, 255 },
+ { 240, 240, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } },
+ { { 230, 233, 249, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 238, 238, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 248, 251, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } } },
+ { { { 225, 239, 227, 231, 244, 253, 243, 255, 255, 253, 255 },
+ { 232, 234, 224, 228, 242, 249, 242, 252, 251, 251, 255 },
+ { 235, 249, 238, 240, 251, 255, 249, 255, 253, 253, 255 },
+ { 249, 253, 251, 250, 255, 255, 255, 255, 255, 255, 255 },
+ { 251, 250, 249, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } },
+ { { 243, 244, 250, 250, 255, 255, 255, 255, 255, 255, 255 },
+ { 249, 248, 250, 253, 255, 255, 255, 255, 255, 255, 255 },
+ { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } } }
+};
+
+static const int vp6_dccv_lc[3][5][2] = {
+ { { 122, 133 }, { 0, 1 }, { 78, 171 }, { 139, 117 }, { 168, 79 } },
+ { { 133, 51 }, { 0, 1 }, { 169, 71 }, { 214, 44 }, { 210, 38 } },
+ { { 142, -16 }, { 0, 1 }, { 221, -30 }, { 246, -3 }, { 203, 17 } },
+};
+
+static const uint8_t vp6_coeff_groups[] = {
+ 0, 0, 1, 1, 1, 2, 2, 2,
+ 2, 2, 2, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+};
+
+static const int16_t vp6_block_copy_filter[17][8][4] = {
+ { { 0, 128, 0, 0 }, /* 0 */
+ { -3, 122, 9, 0 },
+ { -4, 109, 24, -1 },
+ { -5, 91, 45, -3 },
+ { -4, 68, 68, -4 },
+ { -3, 45, 91, -5 },
+ { -1, 24, 109, -4 },
+ { 0, 9, 122, -3 } },
+ { { 0, 128, 0, 0 }, /* 1 */
+ { -4, 124, 9, -1 },
+ { -5, 110, 25, -2 },
+ { -6, 91, 46, -3 },
+ { -5, 69, 69, -5 },
+ { -3, 46, 91, -6 },
+ { -2, 25, 110, -5 },
+ { -1, 9, 124, -4 } },
+ { { 0, 128, 0, 0 }, /* 2 */
+ { -4, 123, 10, -1 },
+ { -6, 110, 26, -2 },
+ { -7, 92, 47, -4 },
+ { -6, 70, 70, -6 },
+ { -4, 47, 92, -7 },
+ { -2, 26, 110, -6 },
+ { -1, 10, 123, -4 } },
+ { { 0, 128, 0, 0 }, /* 3 */
+ { -5, 124, 10, -1 },
+ { -7, 110, 27, -2 },
+ { -7, 91, 48, -4 },
+ { -6, 70, 70, -6 },
+ { -4, 48, 92, -8 },
+ { -2, 27, 110, -7 },
+ { -1, 10, 124, -5 } },
+ { { 0, 128, 0, 0 }, /* 4 */
+ { -6, 124, 11, -1 },
+ { -8, 111, 28, -3 },
+ { -8, 92, 49, -5 },
+ { -7, 71, 71, -7 },
+ { -5, 49, 92, -8 },
+ { -3, 28, 111, -8 },
+ { -1, 11, 124, -6 } },
+ { { 0, 128, 0, 0 }, /* 5 */
+ { -6, 123, 12, -1 },
+ { -9, 111, 29, -3 },
+ { -9, 93, 50, -6 },
+ { -8, 72, 72, -8 },
+ { -6, 50, 93, -9 },
+ { -3, 29, 111, -9 },
+ { -1, 12, 123, -6 } },
+ { { 0, 128, 0, 0 }, /* 6 */
+ { -7, 124, 12, -1 },
+ { -10, 111, 30, -3 },
+ { -10, 93, 51, -6 },
+ { -9, 73, 73, -9 },
+ { -6, 51, 93, -10 },
+ { -3, 30, 111, -10 },
+ { -1, 12, 124, -7 } },
+ { { 0, 128, 0, 0 }, /* 7 */
+ { -7, 123, 13, -1 },
+ { -11, 112, 31, -4 },
+ { -11, 94, 52, -7 },
+ { -10, 74, 74, -10 },
+ { -7, 52, 94, -11 },
+ { -4, 31, 112, -11 },
+ { -1, 13, 123, -7 } },
+ { { 0, 128, 0, 0 }, /* 8 */
+ { -8, 124, 13, -1 },
+ { -12, 112, 32, -4 },
+ { -12, 94, 53, -7 },
+ { -10, 74, 74, -10 },
+ { -7, 53, 94, -12 },
+ { -4, 32, 112, -12 },
+ { -1, 13, 124, -8 } },
+ { { 0, 128, 0, 0 }, /* 9 */
+ { -9, 124, 14, -1 },
+ { -13, 112, 33, -4 },
+ { -13, 95, 54, -8 },
+ { -11, 75, 75, -11 },
+ { -8, 54, 95, -13 },
+ { -4, 33, 112, -13 },
+ { -1, 14, 124, -9 } },
+ { { 0, 128, 0, 0 }, /* 10 */
+ { -9, 123, 15, -1 },
+ { -14, 113, 34, -5 },
+ { -14, 95, 55, -8 },
+ { -12, 76, 76, -12 },
+ { -8, 55, 95, -14 },
+ { -5, 34, 112, -13 },
+ { -1, 15, 123, -9 } },
+ { { 0, 128, 0, 0 }, /* 11 */
+ { -10, 124, 15, -1 },
+ { -14, 113, 34, -5 },
+ { -15, 96, 56, -9 },
+ { -13, 77, 77, -13 },
+ { -9, 56, 96, -15 },
+ { -5, 34, 113, -14 },
+ { -1, 15, 124, -10 } },
+ { { 0, 128, 0, 0 }, /* 12 */
+ { -10, 123, 16, -1 },
+ { -15, 113, 35, -5 },
+ { -16, 98, 56, -10 },
+ { -14, 78, 78, -14 },
+ { -10, 56, 98, -16 },
+ { -5, 35, 113, -15 },
+ { -1, 16, 123, -10 } },
+ { { 0, 128, 0, 0 }, /* 13 */
+ { -11, 124, 17, -2 },
+ { -16, 113, 36, -5 },
+ { -17, 98, 57, -10 },
+ { -14, 78, 78, -14 },
+ { -10, 57, 98, -17 },
+ { -5, 36, 113, -16 },
+ { -2, 17, 124, -11 } },
+ { { 0, 128, 0, 0 }, /* 14 */
+ { -12, 125, 17, -2 },
+ { -17, 114, 37, -6 },
+ { -18, 99, 58, -11 },
+ { -15, 79, 79, -15 },
+ { -11, 58, 99, -18 },
+ { -6, 37, 114, -17 },
+ { -2, 17, 125, -12 } },
+ { { 0, 128, 0, 0 }, /* 15 */
+ { -12, 124, 18, -2 },
+ { -18, 114, 38, -6 },
+ { -19, 99, 59, -11 },
+ { -16, 80, 80, -16 },
+ { -11, 59, 99, -19 },
+ { -6, 38, 114, -18 },
+ { -2, 18, 124, -12 } },
+ { { 0, 128, 0, 0 }, /* 16 */
+ { -4, 118, 16, -2 },
+ { -7, 106, 34, -5 },
+ { -8, 90, 53, -7 },
+ { -8, 72, 72, -8 },
+ { -7, 53, 90, -8 },
+ { -5, 34, 106, -7 },
+ { -2, 16, 118, -4 } },
+};
+
+static const vp56_tree_t vp6_pcr_tree[] = {
+ { 8, 0},
+ { 4, 1},
+ { 2, 2}, {-1}, {-2},
+ { 2, 3}, {-3}, {-4},
+ { 8, 4},
+ { 4, 5},
+ { 2, 6}, {-5}, {-6},
+ { 2, 7}, {-7}, {-8},
+ {-0},
+};
+
+static const uint8_t vp6_coord_div[] = { 4, 4, 4, 4, 8, 8 };
+
+#endif /* VP6DATA_H */
diff --git a/src/libffmpeg/libavcodec/wmadec.c b/src/libffmpeg/libavcodec/wmadec.c
index 684aea2c8..bbf4970ce 100644
--- a/src/libffmpeg/libavcodec/wmadec.c
+++ b/src/libffmpeg/libavcodec/wmadec.c
@@ -115,6 +115,8 @@ typedef struct WMADecodeContext {
float max_exponent[MAX_CHANNELS];
int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]);
+ DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]);
+ DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]);
MDCTContext mdct_ctx[BLOCK_NB_SIZES];
float *windows[BLOCK_NB_SIZES];
DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); /* temporary storage for imdct */
@@ -717,7 +719,6 @@ static int wma_decode_block(WMADecodeContext *s)
{
int n, v, a, ch, code, bsize;
int coef_nb_bits, total_gain, parse_exponents;
- DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]);
int nb_coefs[MAX_CHANNELS];
float mdct_norm;
@@ -1072,7 +1073,7 @@ static int wma_decode_block(WMADecodeContext *s)
next_block_len = 1 << s->next_block_len_bits;
/* right part */
- wptr = window + block_len;
+ wptr = s->window + block_len;
if (block_len <= next_block_len) {
for(i=0;i<block_len;i++)
*wptr++ = s->windows[bsize][i];
@@ -1088,7 +1089,7 @@ static int wma_decode_block(WMADecodeContext *s)
}
/* left part */
- wptr = window + block_len;
+ wptr = s->window + block_len;
if (block_len <= prev_block_len) {
for(i=0;i<block_len;i++)
*--wptr = s->windows[bsize][i];
@@ -1107,14 +1108,13 @@ static int wma_decode_block(WMADecodeContext *s)
for(ch = 0; ch < s->nb_channels; ch++) {
if (s->channel_coded[ch]) {
- DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]);
float *ptr;
int n4, index, n;
n = s->block_len;
n4 = s->block_len / 2;
s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize],
- output, s->coefs[ch], s->mdct_tmp);
+ s->output, s->coefs[ch], s->mdct_tmp);
/* XXX: optimize all that by build the window and
multipying/adding at the same time */
@@ -1122,13 +1122,13 @@ static int wma_decode_block(WMADecodeContext *s)
/* multiply by the window and add in the frame */
index = (s->frame_len / 2) + s->block_pos - n4;
ptr = &s->frame_out[ch][index];
- s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1);
+ s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1);
/* specific fast case for ms-stereo : add to second
channel if it is not coded */
if (s->ms_stereo && !s->channel_coded[1]) {
ptr = &s->frame_out[1][index];
- s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1);
+ s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1);
}
}
}
diff --git a/src/libffmpeg/libavcodec/wmv2.c b/src/libffmpeg/libavcodec/wmv2.c
index 5abc51775..f3d4f0f23 100644
--- a/src/libffmpeg/libavcodec/wmv2.c
+++ b/src/libffmpeg/libavcodec/wmv2.c
@@ -643,6 +643,12 @@ void ff_mspel_motion(MpegEncContext *s,
v_edge_pos = s->v_edge_pos;
src_x = clip(src_x, -16, s->width);
src_y = clip(src_y, -16, s->height);
+
+ if(src_x<=-16 || src_x >= s->width)
+ dxy &= ~3;
+ if(src_y<=-16 || src_y >= s->height)
+ dxy &= ~4;
+
linesize = s->linesize;
uvlinesize = s->uvlinesize;
ptr = ref_picture[0] + (src_y * linesize) + src_x;
diff --git a/src/libffmpeg/libavutil/Makefile.am b/src/libffmpeg/libavutil/Makefile.am
index 76340cf14..6e507cb67 100644
--- a/src/libffmpeg/libavutil/Makefile.am
+++ b/src/libffmpeg/libavutil/Makefile.am
@@ -1,6 +1,6 @@
include $(top_srcdir)/misc/Makefile.common
-AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS)
+AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS) -I$(top_srcdir)/src/libffmpeg
AM_CFLAGS = -fno-strict-aliasing
ASFLAGS =
@@ -28,6 +28,7 @@ noinst_HEADERS = \
integer.h \
internal.h \
intfloat_readwrite.h \
+ intreadwrite.h \
lls.h \
log.h \
mathematics.h \
diff --git a/src/libffmpeg/libavutil/bswap.h b/src/libffmpeg/libavutil/bswap.h
index 4614c9045..03d613db2 100644
--- a/src/libffmpeg/libavutil/bswap.h
+++ b/src/libffmpeg/libavutil/bswap.h
@@ -37,7 +37,7 @@
#endif
#if defined(ARCH_X86)
-static always_inline uint16_t bswap_16(uint16_t x)
+static av_always_inline uint16_t bswap_16(uint16_t x)
{
__asm("rorw $8, %0" :
LEGACY_REGS (x) :
@@ -45,7 +45,7 @@ static always_inline uint16_t bswap_16(uint16_t x)
return x;
}
-static always_inline uint32_t bswap_32(uint32_t x)
+static av_always_inline uint32_t bswap_32(uint32_t x)
{
#if __CPU__ != 386
__asm("bswap %0":
@@ -82,12 +82,12 @@ static inline uint64_t bswap_64(uint64_t x)
#elif defined(ARCH_SH4)
-static always_inline uint16_t bswap_16(uint16_t x) {
+static av_always_inline uint16_t bswap_16(uint16_t x) {
__asm__("swap.b %0,%0":"=r"(x):"0"(x));
return x;
}
-static always_inline uint32_t bswap_32(uint32_t x) {
+static av_always_inline uint32_t bswap_32(uint32_t x) {
__asm__(
"swap.b %0,%0\n"
"swap.w %0,%0\n"
@@ -110,12 +110,12 @@ static inline uint64_t bswap_64(uint64_t x)
}
#else
-static always_inline uint16_t bswap_16(uint16_t x){
+static av_always_inline uint16_t bswap_16(uint16_t x){
return (x>>8) | (x<<8);
}
#ifdef ARCH_ARM
-static always_inline uint32_t bswap_32(uint32_t x){
+static av_always_inline uint32_t bswap_32(uint32_t x){
uint32_t t;
__asm__ (
"eor %1, %0, %0, ror #16 \n\t"
@@ -126,7 +126,7 @@ static always_inline uint32_t bswap_32(uint32_t x){
return x;
}
#else
-static always_inline uint32_t bswap_32(uint32_t x){
+static av_always_inline uint32_t bswap_32(uint32_t x){
x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
return (x>>16) | (x<<16);
}
diff --git a/src/libffmpeg/libavutil/common.h b/src/libffmpeg/libavutil/common.h
index d167404b6..0e093616c 100644
--- a/src/libffmpeg/libavutil/common.h
+++ b/src/libffmpeg/libavutil/common.h
@@ -26,9 +26,7 @@
#ifndef COMMON_H
#define COMMON_H
-#ifndef M_PI
-#define M_PI 3.14159265358979323846
-#endif
+#include <inttypes.h>
#ifdef HAVE_AV_CONFIG_H
/* only include the following when compiling package */
@@ -47,34 +45,17 @@
# include <math.h>
#endif /* HAVE_AV_CONFIG_H */
-/* Suppress restrict if it was not defined in config.h. */
-#ifndef restrict
-# define restrict
-#endif
-
-#ifndef always_inline
-#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
-# define always_inline __attribute__((always_inline)) inline
-#else
-# define always_inline inline
-#endif
-#endif
-
-#ifndef attribute_used
+#ifndef av_always_inline
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
-# define attribute_used __attribute__((used))
+# define av_always_inline __attribute__((always_inline)) inline
#else
-# define attribute_used
+# define av_always_inline inline
#endif
#endif
-#ifndef attribute_unused
-#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
-# define attribute_unused __attribute__((unused))
-#else
-# define attribute_unused
-#endif
-#endif
+#ifdef HAVE_AV_CONFIG_H
+# include "internal.h"
+#endif /* HAVE_AV_CONFIG_H */
#ifndef attribute_deprecated
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
@@ -84,91 +65,9 @@
#endif
#endif
-# include <inttypes.h>
-
-#ifndef PRId64
-#define PRId64 "lld"
-#endif
-
-#ifndef PRIu64
-#define PRIu64 "llu"
-#endif
-
-#ifndef PRIx64
-#define PRIx64 "llx"
-#endif
-
-#ifndef PRIX64
-#define PRIX64 "llX"
-#endif
-
-#ifndef PRId32
-#define PRId32 "d"
-#endif
-
-#ifndef PRIdFAST16
-#define PRIdFAST16 PRId32
-#endif
-
-#ifndef PRIdFAST32
-#define PRIdFAST32 PRId32
-#endif
-
-#ifndef INT16_MIN
-#define INT16_MIN (-0x7fff-1)
-#endif
-
-#ifndef INT16_MAX
-#define INT16_MAX 0x7fff
-#endif
-
-#ifndef INT32_MIN
-#define INT32_MIN (-0x7fffffff-1)
-#endif
-
-#ifndef INT32_MAX
-#define INT32_MAX 0x7fffffff
-#endif
-
-#ifndef UINT32_MAX
-#define UINT32_MAX 0xffffffff
-#endif
-
-#ifndef INT64_MIN
-#define INT64_MIN (-0x7fffffffffffffffLL-1)
-#endif
-
-#ifndef INT64_MAX
-#define INT64_MAX int64_t_C(9223372036854775807)
-#endif
-
-#ifndef UINT64_MAX
-#define UINT64_MAX uint64_t_C(0xFFFFFFFFFFFFFFFF)
-#endif
-
-#ifndef INT_BIT
-# if INT_MAX != 2147483647
-# define INT_BIT 64
-# else
-# define INT_BIT 32
-# endif
-#endif
-
-#ifndef int64_t_C
-#define int64_t_C(c) (c ## LL)
-#define uint64_t_C(c) (c ## ULL)
-#endif
-
-#if defined(__MINGW32__) && !defined(BUILD_AVUTIL) && defined(BUILD_SHARED_AV)
-# define FF_IMPORT_ATTR __declspec(dllimport)
-#else
-# define FF_IMPORT_ATTR
-#endif
-
-
-#ifdef HAVE_AV_CONFIG_H
-/* only include the following when compiling package */
-# include "internal.h"
+#ifndef INT64_C
+#define INT64_C(c) (c ## LL)
+#define UINT64_C(c) (c ## ULL)
#endif
//rounded divison & shift
@@ -184,7 +83,7 @@
#define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0)
/* misc math functions */
-extern FF_IMPORT_ATTR const uint8_t ff_log2_tab[256];
+extern const uint8_t ff_log2_tab[256];
static inline int av_log2(unsigned int v)
{
@@ -375,7 +274,7 @@ static inline uint64_t read_time(void)
);
return (d << 32) | (a & 0xffffffff);
}
-#elif defined(ARCH_X86)
+#elif defined(ARCH_X86_32)
static inline long long read_time(void)
{
long long l;
@@ -465,4 +364,8 @@ void av_freep(void *ptr);
# define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t"
#endif
+/* xine: another config.h with codecs to use */
+#include "ffmpeg_config.h"
+
#endif /* COMMON_H */
+
diff --git a/src/libffmpeg/libavutil/internal.h b/src/libffmpeg/libavutil/internal.h
index 7d850141b..0c4b44170 100644
--- a/src/libffmpeg/libavutil/internal.h
+++ b/src/libffmpeg/libavutil/internal.h
@@ -26,6 +26,94 @@
#ifndef INTERNAL_H
#define INTERNAL_H
+#ifndef attribute_used
+#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
+# define attribute_used __attribute__((used))
+#else
+# define attribute_used
+#endif
+#endif
+
+#ifndef attribute_unused
+#if defined(__GNUC__)
+# define attribute_unused __attribute__((unused))
+#else
+# define attribute_unused
+#endif
+#endif
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+#ifndef PRId64
+#define PRId64 "lld"
+#endif
+
+#ifndef PRIu64
+#define PRIu64 "llu"
+#endif
+
+#ifndef PRIx64
+#define PRIx64 "llx"
+#endif
+
+#ifndef PRIX64
+#define PRIX64 "llX"
+#endif
+
+#ifndef PRId32
+#define PRId32 "d"
+#endif
+
+#ifndef PRIdFAST16
+#define PRIdFAST16 PRId32
+#endif
+
+#ifndef PRIdFAST32
+#define PRIdFAST32 PRId32
+#endif
+
+#ifndef INT16_MIN
+#define INT16_MIN (-0x7fff-1)
+#endif
+
+#ifndef INT16_MAX
+#define INT16_MAX 0x7fff
+#endif
+
+#ifndef INT32_MIN
+#define INT32_MIN (-0x7fffffff-1)
+#endif
+
+#ifndef INT32_MAX
+#define INT32_MAX 0x7fffffff
+#endif
+
+#ifndef UINT32_MAX
+#define UINT32_MAX 0xffffffff
+#endif
+
+#ifndef INT64_MIN
+#define INT64_MIN (-0x7fffffffffffffffLL-1)
+#endif
+
+#ifndef INT64_MAX
+#define INT64_MAX INT64_C(9223372036854775807)
+#endif
+
+#ifndef UINT64_MAX
+#define UINT64_MAX UINT64_C(0xFFFFFFFFFFFFFFFF)
+#endif
+
+#ifndef INT_BIT
+# if INT_MAX != 2147483647
+# define INT_BIT 64
+# else
+# define INT_BIT 32
+# endif
+#endif
+
#if ( defined(__PIC__) || defined(__pic__) ) && ! defined(PIC)
# define PIC
#endif
@@ -34,6 +122,7 @@
# define ENODATA 61
#endif
+#include "intreadwrite.h"
#include "bswap.h"
#include <stddef.h>
@@ -136,7 +225,7 @@ extern const uint32_t ff_inverse[256];
# define FASTDIV(a,b) ((a)/(b))
#endif
-extern FF_IMPORT_ATTR const uint8_t ff_sqrt_tab[128];
+extern const uint8_t ff_sqrt_tab[128];
static inline int ff_sqrt(int a)
{
@@ -216,7 +305,7 @@ if((y)<(x)){\
/* XXX: add ISOC specific test to avoid specific BSD testing. */
/* better than nothing implementation. */
/* btw, rintf() is existing on fbsd too -- alex */
-static always_inline long int lrintf(float x)
+static av_always_inline long int lrintf(float x)
{
#ifdef __MINGW32__
# ifdef ARCH_X86_32
diff --git a/src/libffmpeg/libavutil/intreadwrite.h b/src/libffmpeg/libavutil/intreadwrite.h
new file mode 100644
index 000000000..c43f9d651
--- /dev/null
+++ b/src/libffmpeg/libavutil/intreadwrite.h
@@ -0,0 +1,42 @@
+#ifndef INTREADWRITE_H
+#define INTREADWRITE_H
+
+#ifdef __GNUC__
+
+struct unaligned_64 { uint64_t l; } __attribute__((packed));
+struct unaligned_32 { uint32_t l; } __attribute__((packed));
+struct unaligned_16 { uint16_t l; } __attribute__((packed));
+
+#define LD16(a) (((const struct unaligned_16 *) (a))->l)
+#define LD32(a) (((const struct unaligned_32 *) (a))->l)
+#define LD64(a) (((const struct unaligned_64 *) (a))->l)
+
+#define ST16(a, b) (((struct unaligned_16 *) (a))->l) = (b)
+#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b)
+
+#else /* __GNUC__ */
+
+#define LD16(a) (*((uint16_t*)(a)))
+#define LD32(a) (*((uint32_t*)(a)))
+#define LD64(a) (*((uint64_t*)(a)))
+
+#define ST16(a, b) *((uint16_t*)(a)) = (b)
+#define ST32(a, b) *((uint32_t*)(a)) = (b)
+
+#endif /* !__GNUC__ */
+
+/* endian macros */
+#if !defined(BE_16) || !defined(BE_32) || !defined(LE_16) || !defined(LE_32)
+#define BE_16(x) ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1])
+#define BE_32(x) ((((uint8_t*)(x))[0] << 24) | \
+ (((uint8_t*)(x))[1] << 16) | \
+ (((uint8_t*)(x))[2] << 8) | \
+ ((uint8_t*)(x))[3])
+#define LE_16(x) ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
+#define LE_32(x) ((((uint8_t*)(x))[3] << 24) | \
+ (((uint8_t*)(x))[2] << 16) | \
+ (((uint8_t*)(x))[1] << 8) | \
+ ((uint8_t*)(x))[0])
+#endif
+
+#endif /* INTREADWRITE_H */
diff --git a/src/libffmpeg/libavutil/rational.c b/src/libffmpeg/libavutil/rational.c
index 0e018c41b..0480aa882 100644
--- a/src/libffmpeg/libavutil/rational.c
+++ b/src/libffmpeg/libavutil/rational.c
@@ -38,8 +38,10 @@ int av_reduce(int *dst_nom, int *dst_den, int64_t nom, int64_t den, int64_t max)
int sign= (nom<0) ^ (den<0);
int64_t gcd= ff_gcd(FFABS(nom), FFABS(den));
- nom = FFABS(nom)/gcd;
- den = FFABS(den)/gcd;
+ if(gcd){
+ nom = FFABS(nom)/gcd;
+ den = FFABS(den)/gcd;
+ }
if(nom<=max && den<=max){
a1= (AVRational){nom, den};
den=0;
@@ -65,7 +67,7 @@ int av_reduce(int *dst_nom, int *dst_den, int64_t nom, int64_t den, int64_t max)
nom= den;
den= next_den;
}
- assert(ff_gcd(a1.num, a1.den) == 1);
+ assert(ff_gcd(a1.num, a1.den) <= 1U);
*dst_nom = sign ? -a1.num : a1.num;
*dst_den = a1.den;
diff --git a/src/libffmpeg/video_decoder.c b/src/libffmpeg/video_decoder.c
index ad2bc99b4..b019d52d3 100644
--- a/src/libffmpeg/video_decoder.c
+++ b/src/libffmpeg/video_decoder.c
@@ -17,7 +17,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*
- * $Id: video_decoder.c,v 1.64 2006/12/02 21:06:18 miguelfreitas Exp $
+ * $Id: video_decoder.c,v 1.65 2007/01/13 21:19:52 miguelfreitas Exp $
*
* xine video decoder plugin using ffmpeg
*
@@ -25,6 +25,7 @@
#ifdef HAVE_CONFIG_H
#include "config.h"
+#include "ffmpeg_config.h"
#endif
#include <stdlib.h>
@@ -116,6 +117,8 @@ struct ff_video_decoder_s {
int is_direct_rendering_disabled;
AVPaletteControl palette_control;
+
+ xine_list_t *dr1_frames;
};
@@ -203,16 +206,25 @@ static int get_buffer(AVCodecContext *context, AVFrame *av_frame){
av_frame->type= FF_BUFFER_TYPE_USER;
+ xine_list_push_back(this->dr1_frames, av_frame);
+
return 0;
}
static void release_buffer(struct AVCodecContext *context, AVFrame *av_frame){
+ ff_video_decoder_t *this = (ff_video_decoder_t *)context->opaque;
if (av_frame->type == FF_BUFFER_TYPE_USER) {
vo_frame_t *img = (vo_frame_t *)av_frame->opaque;
+ xine_list_iterator_t it;
assert(av_frame->opaque);
img->free(img);
+
+ it = xine_list_find(this->dr1_frames, av_frame);
+ assert(it);
+ if( it != NULL )
+ xine_list_remove(this->dr1_frames, it);
} else {
avcodec_default_release_buffer(context, av_frame);
}
@@ -249,6 +261,8 @@ static const ff_codec_t ff_video_lookup[] = {
{BUF_VIDEO_DV, CODEC_ID_DVVIDEO, "DV (ffmpeg)"},
{BUF_VIDEO_HUFFYUV, CODEC_ID_HUFFYUV, "HuffYUV (ffmpeg)"},
{BUF_VIDEO_VP31, CODEC_ID_VP3, "On2 VP3.1 (ffmpeg)"},
+ {BUF_VIDEO_VP5, CODEC_ID_VP5, "On2 VP5 (ffmpeg)"},
+ {BUF_VIDEO_VP6, CODEC_ID_VP6, "On2 VP6 (ffmpeg)"},
{BUF_VIDEO_4XM, CODEC_ID_4XM, "4X Video (ffmpeg)"},
{BUF_VIDEO_CINEPAK, CODEC_ID_CINEPAK, "Cinepak (ffmpeg)"},
{BUF_VIDEO_MSVC, CODEC_ID_MSVIDEO1, "Microsoft Video 1 (ffmpeg)"},
@@ -376,7 +390,7 @@ static void init_video_codec (ff_video_decoder_t *this, unsigned int codec_type)
/* enable direct rendering by default */
this->output_format = XINE_IMGFMT_YV12;
#ifdef ENABLE_DIRECT_RENDERING
- if( this->codec->capabilities & CODEC_CAP_DR1 ) {
+ if( this->codec->capabilities & CODEC_CAP_DR1 && this->codec->id != CODEC_ID_H264 ) {
this->context->get_buffer = get_buffer;
this->context->release_buffer = release_buffer;
xprintf(this->stream->xine, XINE_VERBOSITY_LOG,
@@ -801,7 +815,7 @@ static void ff_check_bufsize (ff_video_decoder_t *this, int size) {
xprintf(this->stream->xine, XINE_VERBOSITY_LOG,
_("ffmpeg_video_dec: increasing buffer to %d to avoid overflow.\n"),
this->bufsize);
- this->buf = realloc(this->buf, this->bufsize);
+ this->buf = realloc(this->buf, this->bufsize + FF_INPUT_BUFFER_PADDING_SIZE );
}
}
@@ -826,7 +840,7 @@ static void ff_handle_header_buffer (ff_video_decoder_t *this, buf_element_t *bu
lprintf ("header buffer\n");
/* accumulate data */
- ff_check_bufsize(this, this->size + buf->size + FF_INPUT_BUFFER_PADDING_SIZE);
+ ff_check_bufsize(this, this->size + buf->size);
xine_fast_memcpy (&this->buf[this->size], buf->content, buf->size);
this->size += buf->size;
@@ -1102,7 +1116,7 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) {
lprintf("no memcpy needed to accumulate data\n");
} else {
/* copy data into our internal buffer */
- ff_check_bufsize(this, this->size + buf->size + FF_INPUT_BUFFER_PADDING_SIZE);
+ ff_check_bufsize(this, this->size + buf->size);
chunk_buf = this->buf; /* ff_check_bufsize might realloc this->buf */
xine_fast_memcpy (&this->buf[this->size], buf->content, buf->size);
@@ -1122,7 +1136,13 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) {
int codec_type = buf->type & 0xFFFF0000;
/* pad input data */
- chunk_buf[this->size] = 0;
+ /* note: bitstream, alt bitstream reader or something will cause
+ * severe mpeg4 artifacts if padding is less than 32 bits.
+ */
+ chunk_buf[this->size+0] = 0;
+ chunk_buf[this->size+1] = 0;
+ chunk_buf[this->size+2] = 0;
+ chunk_buf[this->size+3] = 0;
while (this->size > 0) {
@@ -1150,7 +1170,7 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) {
this->size -= len;
if (this->size > 0) {
- ff_check_bufsize(this, this->size + FF_INPUT_BUFFER_PADDING_SIZE);
+ ff_check_bufsize(this, this->size);
memmove (this->buf, &chunk_buf[offset], this->size);
chunk_buf = this->buf;
}
@@ -1256,7 +1276,7 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) {
img->crop_bottom = this->crop_bottom;
this->skipframes = img->draw(img, this->stream);
-
+
if(free_img)
img->free(img);
}
@@ -1360,12 +1380,23 @@ static void ff_dispose (video_decoder_t *this_gen) {
ff_video_decoder_t *this = (ff_video_decoder_t *) this_gen;
lprintf ("ff_dispose\n");
-
+
if (this->decoder_ok) {
+ xine_list_iterator_t it;
+ AVFrame *av_frame;
+
pthread_mutex_lock(&ffmpeg_lock);
avcodec_close (this->context);
pthread_mutex_unlock(&ffmpeg_lock);
-
+
+ /* frame garbage collector here - workaround for buggy ffmpeg codecs that
+ * don't release their DR1 frames */
+ while( (it = xine_list_front(this->dr1_frames)) != NULL )
+ {
+ av_frame = (AVFrame *)xine_list_get_value(this->dr1_frames, it);
+ release_buffer(this->context, av_frame);
+ }
+
this->stream->video_out->close(this->stream->video_out, this->stream);
this->decoder_ok = 0;
}
@@ -1394,6 +1425,8 @@ static void ff_dispose (video_decoder_t *this_gen) {
if(this->pp_mode)
pp_free_mode(this->pp_mode);
+
+ xine_list_delete(this->dr1_frames);
free (this_gen);
}
@@ -1433,6 +1466,8 @@ static video_decoder_t *ff_video_open_plugin (video_decoder_class_t *class_gen,
this->pp_context = NULL;
this->pp_mode = NULL;
+ this->dr1_frames = xine_list_new();
+
mpeg_parser_init(&this->mpeg_parser);
return &this->video_decoder;
@@ -1483,73 +1518,223 @@ void *init_video_plugin (xine_t *xine, void *data) {
}
static uint32_t supported_video_types[] = {
- BUF_VIDEO_MSMPEG4_V1,
+ #ifdef CONFIG_MSMPEG4V1_DECODER
+ BUF_VIDEO_MSMPEG4_V1,
+ #endif
+ #ifdef CONFIG_MSMPEG4V2_DECODER
BUF_VIDEO_MSMPEG4_V2,
- BUF_VIDEO_MSMPEG4_V3,
- BUF_VIDEO_WMV7,
+ #endif
+ #ifdef CONFIG_MSMPEG4V3_DECODER
+ BUF_VIDEO_MSMPEG4_V3,
+ #endif
+ #ifdef CONFIG_WMV1_DECODER
+ BUF_VIDEO_WMV7,
+ #endif
+ #ifdef CONFIG_WMV2_DECODER
+ BUF_VIDEO_WMV8,
+ #endif
+ #ifdef CONFIG_WMV3_DECODER
+ BUF_VIDEO_WMV9,
+ #endif
+ #ifdef CONFIG_MPEG4_DECODER
BUF_VIDEO_MPEG4,
- BUF_VIDEO_XVID,
- BUF_VIDEO_DIVX5,
+ #endif
+ #ifdef CONFIG_MPEG4_DECODER
+ BUF_VIDEO_XVID,
+ #endif
+ #ifdef CONFIG_MPEG4_DECODER
+ BUF_VIDEO_DIVX5,
+ #endif
+ #ifdef CONFIG_MPEG4_DECODER
BUF_VIDEO_3IVX,
+ #endif
+ #ifdef CONFIG_MJPEG_DECODER
+ BUF_VIDEO_JPEG,
+ #endif
+ #ifdef CONFIG_MJPEG_DECODER
BUF_VIDEO_MJPEG,
+ #endif
+ #ifdef CONFIG_MJPEGB_DECODER
BUF_VIDEO_MJPEG_B,
+ #endif
+ #ifdef CONFIG_H263I_DECODER
+ BUF_VIDEO_I263,
+ #endif
+ #ifdef CONFIG_H263_DECODER
BUF_VIDEO_H263,
+ #endif
+ #ifdef CONFIG_RV10_DECODER
BUF_VIDEO_RV10,
+ #endif
+ #ifdef CONFIG_RV20_DECODER
BUF_VIDEO_RV20,
+ #endif
+ #ifdef CONFIG_INDEO3_DECODER
BUF_VIDEO_IV31,
+ #endif
+ #ifdef CONFIG_INDEO3_DECODER
BUF_VIDEO_IV32,
+ #endif
+ #ifdef CONFIG_SVQ1_DECODER
BUF_VIDEO_SORENSON_V1,
+ #endif
+ #ifdef CONFIG_SVQ3_DECODER
BUF_VIDEO_SORENSON_V3,
- BUF_VIDEO_JPEG,
- BUF_VIDEO_MPEG,
+ #endif
+ #ifdef CONFIG_DVVIDEO_DECODER
BUF_VIDEO_DV,
+ #endif
+ #ifdef CONFIG_HUFFYUV_DECODER
BUF_VIDEO_HUFFYUV,
+ #endif
+ #ifdef CONFIG_VP3_DECODER
BUF_VIDEO_VP31,
+ #endif
+ #ifdef CONFIG_VP5_DECODER
+ BUF_VIDEO_VP5,
+ #endif
+ #ifdef CONFIG_VP6_DECODER
+ BUF_VIDEO_VP6,
+ #endif
+ #ifdef CONFIG_4XM_DECODER
BUF_VIDEO_4XM,
+ #endif
+ #ifdef CONFIG_CINEPAK_DECODER
BUF_VIDEO_CINEPAK,
+ #endif
+ #ifdef CONFIG_MSVIDEO1_DECODER
BUF_VIDEO_MSVC,
+ #endif
+ #ifdef CONFIG_MSRLE_DECODER
BUF_VIDEO_MSRLE,
+ #endif
+ #ifdef CONFIG_RPZA_DECODER
BUF_VIDEO_RPZA,
+ #endif
+ #ifdef CONFIG_CYUV_DECODER
BUF_VIDEO_CYUV,
+ #endif
+ #ifdef CONFIG_ROQ_DECODER
BUF_VIDEO_ROQ,
+ #endif
+ #ifdef CONFIG_IDCIN_DECODER
BUF_VIDEO_IDCIN,
+ #endif
+ #ifdef CONFIG_XAN_WC3_DECODER
BUF_VIDEO_WC3,
+ #endif
+ #ifdef CONFIG_WS_VQA_DECODER
BUF_VIDEO_VQA,
+ #endif
+ #ifdef CONFIG_INTERPLAY_VIDEO_DECODER
BUF_VIDEO_INTERPLAY,
+ #endif
+ #ifdef CONFIG_FLIC_DECODER
BUF_VIDEO_FLI,
+ #endif
+ #ifdef CONFIG_8BPS_DECODER
BUF_VIDEO_8BPS,
+ #endif
+ #ifdef CONFIG_SMC_DECODER
BUF_VIDEO_SMC,
- BUF_VIDEO_VMD,
+ #endif
+ #ifdef CONFIG_TRUEMOTION1_DECODER
BUF_VIDEO_DUCKTM1,
+ #endif
+ #ifdef CONFIG_TRUEMOTION2_DECODER
BUF_VIDEO_DUCKTM2,
+ #endif
+ #ifdef CONFIG_VMDVIDEO_DECODER
+ BUF_VIDEO_VMD,
+ #endif
+ #ifdef CONFIG_ZLIB_DECODER
BUF_VIDEO_ZLIB,
+ #endif
+ #ifdef CONFIG_MSZH_DECODER
BUF_VIDEO_MSZH,
+ #endif
+ #ifdef CONFIG_ASV1_DECODER
BUF_VIDEO_ASV1,
+ #endif
+ #ifdef CONFIG_ASV2_DECODER
BUF_VIDEO_ASV2,
+ #endif
+ #ifdef CONFIG_VCR1_DECODER
BUF_VIDEO_ATIVCR1,
+ #endif
+ #ifdef CONFIG_FLV1_DECODER
BUF_VIDEO_FLV1,
+ #endif
+ #ifdef CONFIG_QTRLE_DECODER
BUF_VIDEO_QTRLE,
+ #endif
+ #ifdef CONFIG_H264_DECODER
BUF_VIDEO_H264,
+ #endif
+ #ifdef CONFIG_H261_DECODER
BUF_VIDEO_H261,
+ #endif
+ #ifdef CONFIG_AASC_DECODER
BUF_VIDEO_AASC,
+ #endif
+ #ifdef CONFIG_LOCO_DECODER
BUF_VIDEO_LOCO,
+ #endif
+ #ifdef CONFIG_QDRAW_DECODER
BUF_VIDEO_QDRW,
+ #endif
+ #ifdef CONFIG_QPEG_DECODER
BUF_VIDEO_QPEG,
+ #endif
+ #ifdef CONFIG_TSCC_DECODER
BUF_VIDEO_TSCC,
+ #endif
+ #ifdef CONFIG_ULTI_DECODER
BUF_VIDEO_ULTI,
+ #endif
+ #ifdef CONFIG_WNV1_DECODER
BUF_VIDEO_WNV1,
+ #endif
+ #ifdef CONFIG_VIXL_DECODER
BUF_VIDEO_XL,
+ #endif
+ #ifdef CONFIG_INDEO2_DECODER
BUF_VIDEO_RT21,
+ #endif
+ #ifdef CONFIG_FRAPS_DECODER
BUF_VIDEO_FPS1,
+ #endif
+ #ifdef CONFIG_MPEG1VIDEO_DECODER
+ BUF_VIDEO_MPEG,
+ #endif
+ #ifdef CONFIG_CSCD_DECODER
BUF_VIDEO_CSCD,
+ #endif
+ #ifdef CONFIG_AVS_DECODER
+ BUF_VIDEO_AVS,
+ #endif
+ #ifdef CONFIG_MMVIDEO_DECODER
BUF_VIDEO_ALGMM,
+ #endif
+ #ifdef CONFIG_ZMBV_DECODER
BUF_VIDEO_ZMBV,
- BUF_VIDEO_AVS,
+ #endif
+ #ifdef CONFIG_SMACKVIDEO_DECODER
BUF_VIDEO_SMACKER,
+ #endif
+ #ifdef CONFIG_NUV_DECODER
BUF_VIDEO_NUV,
+ #endif
+ #ifdef CONFIG_KMVC_DECODER
BUF_VIDEO_KMVC,
+ #endif
+ #ifdef CONFIG_FLASHSV_DECODER
BUF_VIDEO_FLASHSV,
+ #endif
+ #ifdef CONFIG_CAVS_DECODER
BUF_VIDEO_CAVS,
+ #endif
+
0
};
diff --git a/src/libffmpeg/xine_decoder.c b/src/libffmpeg/xine_decoder.c
index 02d19cc1a..2eeb9746b 100644
--- a/src/libffmpeg/xine_decoder.c
+++ b/src/libffmpeg/xine_decoder.c
@@ -17,7 +17,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*
- * $Id: xine_decoder.c,v 1.172 2006/12/04 22:25:13 miguelfreitas Exp $
+ * $Id: xine_decoder.c,v 1.173 2007/01/13 21:19:52 miguelfreitas Exp $
*
* xine decoder plugin using ffmpeg
*
@@ -25,6 +25,7 @@
#ifdef HAVE_CONFIG_H
#include "config.h"
+#include "ffmpeg_config.h"
#endif
#include "xine_internal.h"
@@ -39,114 +40,273 @@ pthread_once_t once_control = PTHREAD_ONCE_INIT;
pthread_mutex_t ffmpeg_lock;
#ifndef HAVE_FFMPEG
+
+#define REGISTER_ENCODER(X,x) \
+ if(ENABLE_##X##_ENCODER) register_avcodec(&x##_encoder)
+#define REGISTER_DECODER(X,x) \
+ if(ENABLE_##X##_DECODER) register_avcodec(&x##_decoder)
+#define REGISTER_ENCDEC(X,x) REGISTER_ENCODER(X,x); REGISTER_DECODER(X,x)
+
+#define REGISTER_PARSER(X,x) \
+ if(ENABLE_##X##_PARSER) av_register_codec_parser(&x##_parser)
+
+/* If you do not call this function, then you can select exactly which
+ formats you want to support */
+
+/**
+ * simple call to register all the codecs.
+ */
void avcodec_register_all(void)
{
static int inited = 0;
-
+
if (inited != 0)
- return;
+ return;
inited = 1;
- /* decoders */
- register_avcodec(&h263_decoder);
- register_avcodec(&mpeg4_decoder);
- register_avcodec(&msmpeg4v1_decoder);
- register_avcodec(&msmpeg4v2_decoder);
- register_avcodec(&msmpeg4v3_decoder);
- register_avcodec(&wmv1_decoder);
- register_avcodec(&wmv2_decoder);
- register_avcodec(&h263i_decoder);
- register_avcodec(&rv10_decoder);
- register_avcodec(&rv20_decoder);
- register_avcodec(&svq1_decoder);
- register_avcodec(&svq3_decoder);
- register_avcodec(&wmav1_decoder);
- register_avcodec(&wmav2_decoder);
- register_avcodec(&indeo3_decoder);
- register_avcodec(&mpeg1video_decoder);
- register_avcodec(&dvvideo_decoder);
- register_avcodec(&pcm_s16le_decoder);
- register_avcodec(&mjpeg_decoder);
- register_avcodec(&mjpegb_decoder);
- register_avcodec(&mp2_decoder);
- register_avcodec(&mp3_decoder);
- register_avcodec(&mace3_decoder);
- register_avcodec(&mace6_decoder);
- register_avcodec(&huffyuv_decoder);
- register_avcodec(&cyuv_decoder);
- register_avcodec(&h264_decoder);
- register_avcodec(&vp3_decoder);
- register_avcodec(&fourxm_decoder);
- register_avcodec(&ra_144_decoder);
- register_avcodec(&ra_288_decoder);
- register_avcodec(&adpcm_ms_decoder);
- register_avcodec(&adpcm_ima_qt_decoder);
- register_avcodec(&adpcm_ima_wav_decoder);
- register_avcodec(&adpcm_ima_dk3_decoder);
- register_avcodec(&adpcm_ima_dk4_decoder);
- register_avcodec(&adpcm_ima_ws_decoder);
- register_avcodec(&adpcm_ima_smjpeg_decoder);
- register_avcodec(&adpcm_xa_decoder);
- register_avcodec(&adpcm_4xm_decoder);
- register_avcodec(&adpcm_ea_decoder);
- register_avcodec(&pcm_alaw_decoder);
- register_avcodec(&pcm_mulaw_decoder);
- register_avcodec(&roq_dpcm_decoder);
- register_avcodec(&interplay_dpcm_decoder);
- register_avcodec(&cinepak_decoder);
- register_avcodec(&msvideo1_decoder);
- register_avcodec(&msrle_decoder);
- register_avcodec(&rpza_decoder);
- register_avcodec(&roq_decoder);
- register_avcodec(&idcin_decoder);
- register_avcodec(&xan_wc3_decoder);
- register_avcodec(&vqa_decoder);
- register_avcodec(&interplay_video_decoder);
- register_avcodec(&flic_decoder);
- register_avcodec(&smc_decoder);
- register_avcodec(&eightbps_decoder);
- register_avcodec(&vmdvideo_decoder);
- register_avcodec(&vmdaudio_decoder);
- register_avcodec(&truemotion1_decoder);
- //register_avcodec(&mszh_decoder);
- //register_avcodec(&zlib_decoder);
- register_avcodec(&xan_dpcm_decoder);
- register_avcodec(&asv1_decoder);
- register_avcodec(&asv2_decoder);
- register_avcodec(&vcr1_decoder);
- register_avcodec(&flv_decoder);
- register_avcodec(&qtrle_decoder);
- register_avcodec(&flac_decoder);
- register_avcodec(&aasc_decoder);
- register_avcodec(&alac_decoder);
- register_avcodec(&h261_decoder);
- register_avcodec(&loco_decoder);
- register_avcodec(&qdraw_decoder);
- register_avcodec(&qpeg_decoder);
- register_avcodec(&tscc_decoder);
- register_avcodec(&ulti_decoder);
- register_avcodec(&wnv1_decoder);
- register_avcodec(&xl_decoder);
- register_avcodec(&indeo2_decoder);
- register_avcodec(&fraps_decoder);
- register_avcodec(&shorten_decoder);
- register_avcodec(&qdm2_decoder);
- register_avcodec(&truemotion2_decoder);
- register_avcodec(&wmv3_decoder);
- register_avcodec(&cscd_decoder);
- register_avcodec(&mmvideo_decoder);
- register_avcodec(&zmbv_decoder);
- register_avcodec(&avs_decoder);
- register_avcodec(&smacker_decoder);
- register_avcodec(&smackaud_decoder);
- register_avcodec(&nuv_decoder);
- register_avcodec(&kmvc_decoder);
- register_avcodec(&flashsv_decoder);
- //register_avcodec(&cavs_decoder);
- register_avcodec(&cook_decoder);
- register_avcodec(&truespeech_decoder);
- register_avcodec(&tta_decoder);
+ /* video codecs */
+ REGISTER_DECODER(AASC, aasc);
+ REGISTER_ENCDEC (ASV1, asv1);
+ REGISTER_ENCDEC (ASV2, asv2);
+ REGISTER_DECODER(AVS, avs);
+ REGISTER_DECODER(BMP, bmp);
+ REGISTER_DECODER(CAVS, cavs);
+ REGISTER_DECODER(CINEPAK, cinepak);
+ REGISTER_DECODER(CLJR, cljr);
+ REGISTER_DECODER(CSCD, cscd);
+ REGISTER_DECODER(CYUV, cyuv);
+ REGISTER_DECODER(DSICINVIDEO, dsicinvideo);
+ REGISTER_ENCDEC (DVVIDEO, dvvideo);
+ REGISTER_DECODER(EIGHTBPS, eightbps);
+ REGISTER_ENCDEC (FFV1, ffv1);
+ REGISTER_ENCDEC (FFVHUFF, ffvhuff);
+ REGISTER_DECODER(FLASHSV, flashsv);
+ REGISTER_DECODER(FLIC, flic);
+ REGISTER_ENCDEC (FLV, flv);
+ REGISTER_DECODER(FOURXM, fourxm);
+ REGISTER_DECODER(FRAPS, fraps);
+ REGISTER_ENCDEC (GIF, gif);
+ REGISTER_ENCDEC (H261, h261);
+ REGISTER_ENCDEC (H263, h263);
+ REGISTER_DECODER(H263I, h263i);
+ REGISTER_ENCODER(H263P, h263p);
+ REGISTER_DECODER(H264, h264);
+ REGISTER_ENCDEC (HUFFYUV, huffyuv);
+ REGISTER_DECODER(IDCIN, idcin);
+ REGISTER_DECODER(INDEO2, indeo2);
+ REGISTER_DECODER(INDEO3, indeo3);
+ REGISTER_DECODER(INTERPLAY_VIDEO, interplay_video);
+ REGISTER_ENCODER(JPEGLS, jpegls);
+ REGISTER_DECODER(KMVC, kmvc);
+ REGISTER_ENCODER(LJPEG, ljpeg);
+ REGISTER_DECODER(LOCO, loco);
+ REGISTER_DECODER(MDEC, mdec);
+ REGISTER_ENCDEC (MJPEG, mjpeg);
+ REGISTER_DECODER(MJPEGB, mjpegb);
+ REGISTER_DECODER(MMVIDEO, mmvideo);
+#ifdef HAVE_XVMC
+ REGISTER_DECODER(MPEG_XVMC, mpeg_xvmc);
+#endif
+ REGISTER_ENCDEC (MPEG1VIDEO, mpeg1video);
+ REGISTER_ENCDEC (MPEG2VIDEO, mpeg2video);
+ REGISTER_ENCDEC (MPEG4, mpeg4);
+ REGISTER_DECODER(MPEGVIDEO, mpegvideo);
+ REGISTER_ENCDEC (MSMPEG4V1, msmpeg4v1);
+ REGISTER_ENCDEC (MSMPEG4V2, msmpeg4v2);
+ REGISTER_ENCDEC (MSMPEG4V3, msmpeg4v3);
+ REGISTER_DECODER(MSRLE, msrle);
+ REGISTER_DECODER(MSVIDEO1, msvideo1);
+ REGISTER_DECODER(MSZH, mszh);
+ REGISTER_DECODER(NUV, nuv);
+ REGISTER_ENCODER(PAM, pam);
+ REGISTER_ENCODER(PBM, pbm);
+ REGISTER_ENCODER(PGM, pgm);
+ REGISTER_ENCODER(PGMYUV, pgmyuv);
+#ifdef CONFIG_ZLIB
+ REGISTER_ENCDEC (PNG, png);
+#endif
+ REGISTER_ENCODER(PPM, ppm);
+ REGISTER_DECODER(QDRAW, qdraw);
+ REGISTER_DECODER(QPEG, qpeg);
+ REGISTER_DECODER(QTRLE, qtrle);
+ REGISTER_ENCDEC (RAWVIDEO, rawvideo);
+ REGISTER_DECODER(ROQ, roq);
+ REGISTER_DECODER(RPZA, rpza);
+ REGISTER_ENCDEC (RV10, rv10);
+ REGISTER_ENCDEC (RV20, rv20);
+ REGISTER_DECODER(SMACKER, smacker);
+ REGISTER_DECODER(SMC, smc);
+ REGISTER_ENCDEC (SNOW, snow);
+ REGISTER_DECODER(SP5X, sp5x);
+ REGISTER_ENCDEC (SVQ1, svq1);
+ REGISTER_DECODER(SVQ3, svq3);
+ REGISTER_DECODER(TARGA, targa);
+ REGISTER_DECODER(THEORA, theora);
+ REGISTER_DECODER(TIERTEXSEQVIDEO, tiertexseqvideo);
+ REGISTER_DECODER(TIFF, tiff);
+ REGISTER_DECODER(TRUEMOTION1, truemotion1);
+ REGISTER_DECODER(TRUEMOTION2, truemotion2);
+ REGISTER_DECODER(TSCC, tscc);
+ REGISTER_DECODER(ULTI, ulti);
+ REGISTER_DECODER(VC1, vc1);
+ REGISTER_DECODER(VCR1, vcr1);
+ REGISTER_DECODER(VMDVIDEO, vmdvideo);
+ REGISTER_DECODER(VMNC, vmnc);
+ REGISTER_DECODER(VP3, vp3);
+ REGISTER_DECODER(VP5, vp5);
+ REGISTER_DECODER(VP6, vp6);
+ REGISTER_DECODER(VP6F, vp6f);
+ REGISTER_DECODER(VQA, vqa);
+ REGISTER_ENCDEC (WMV1, wmv1);
+ REGISTER_ENCDEC (WMV2, wmv2);
+ REGISTER_DECODER(WMV3, wmv3);
+ REGISTER_DECODER(WNV1, wnv1);
+#ifdef CONFIG_X264
+ REGISTER_ENCODER(X264, x264);
+#endif
+ REGISTER_DECODER(XAN_WC3, xan_wc3);
+ REGISTER_DECODER(XL, xl);
+#ifdef CONFIG_XVID
+ REGISTER_ENCODER(XVID, xvid);
+#endif
+ REGISTER_ENCDEC (ZLIB, zlib);
+#ifdef CONFIG_ZLIB
+ REGISTER_ENCDEC (ZMBV, zmbv);
+#endif
+
+ /* audio codecs */
+#ifdef CONFIG_LIBFAAD
+ REGISTER_DECODER(AAC, aac);
+ REGISTER_DECODER(MPEG4AAC, mpeg4aac);
+#endif
+#ifdef CONFIG_LIBA52
+ REGISTER_DECODER(AC3, ac3);
+#endif
+ REGISTER_ENCODER(AC3, ac3);
+ REGISTER_DECODER(ALAC, alac);
+#if defined(CONFIG_AMR_NB) || defined(CONFIG_AMR_NB_FIXED)
+ REGISTER_ENCDEC (AMR_NB, amr_nb);
+#endif
+#ifdef CONFIG_AMR_WB
+ REGISTER_ENCDEC (AMR_WB, amr_wb);
+#endif
+ REGISTER_DECODER(COOK, cook);
+ REGISTER_DECODER(DSICINAUDIO, dsicinaudio);
+#ifdef CONFIG_LIBDTS
+ REGISTER_DECODER(DTS, dts);
+#endif
+#ifdef CONFIG_LIBFAAC
+ REGISTER_ENCODER(FAAC, faac);
+#endif
+ REGISTER_ENCDEC (FLAC, flac);
+ REGISTER_DECODER(IMC, imc);
+#ifdef CONFIG_LIBGSM
+ REGISTER_ENCDEC (LIBGSM, libgsm);
+#endif
+ REGISTER_DECODER(MACE3, mace3);
+ REGISTER_DECODER(MACE6, mace6);
+ REGISTER_ENCDEC (MP2, mp2);
+ REGISTER_DECODER(MP3, mp3);
+ REGISTER_DECODER(MP3ADU, mp3adu);
+#ifdef CONFIG_LIBMP3LAME
+ REGISTER_ENCODER(MP3LAME, mp3lame);
+#endif
+ REGISTER_DECODER(MP3ON4, mp3on4);
+ REGISTER_DECODER(MPC7, mpc7);
+#ifdef CONFIG_LIBVORBIS
+ if (!ENABLE_VORBIS_ENCODER) REGISTER_ENCODER(OGGVORBIS, oggvorbis);
+ if (!ENABLE_VORBIS_DECODER) REGISTER_DECODER(OGGVORBIS, oggvorbis);
+#endif
+ REGISTER_DECODER(QDM2, qdm2);
+ REGISTER_DECODER(RA_144, ra_144);
+ REGISTER_DECODER(RA_288, ra_288);
+ REGISTER_DECODER(SHORTEN, shorten);
+ REGISTER_DECODER(SMACKAUD, smackaud);
+ REGISTER_ENCDEC (SONIC, sonic);
+ REGISTER_ENCODER(SONIC_LS, sonic_ls);
+ REGISTER_DECODER(TRUESPEECH, truespeech);
+ REGISTER_DECODER(TTA, tta);
+ REGISTER_DECODER(VMDAUDIO, vmdaudio);
+ REGISTER_ENCDEC (VORBIS, vorbis);
+ REGISTER_DECODER(WAVPACK, wavpack);
+ REGISTER_DECODER(WMAV1, wmav1);
+ REGISTER_DECODER(WMAV2, wmav2);
+ REGISTER_DECODER(WS_SND1, ws_snd1);
+
+ /* pcm codecs */
+ REGISTER_ENCDEC (PCM_ALAW, pcm_alaw);
+ REGISTER_ENCDEC (PCM_MULAW, pcm_mulaw);
+ REGISTER_ENCDEC (PCM_S8, pcm_s8);
+ REGISTER_ENCDEC (PCM_S16BE, pcm_s16be);
+ REGISTER_ENCDEC (PCM_S16LE, pcm_s16le);
+ REGISTER_ENCDEC (PCM_S24BE, pcm_s24be);
+ REGISTER_ENCDEC (PCM_S24DAUD, pcm_s24daud);
+ REGISTER_ENCDEC (PCM_S24LE, pcm_s24le);
+ REGISTER_ENCDEC (PCM_S32BE, pcm_s32be);
+ REGISTER_ENCDEC (PCM_S32LE, pcm_s32le);
+ REGISTER_ENCDEC (PCM_U8, pcm_u8);
+ REGISTER_ENCDEC (PCM_U16BE, pcm_u16be);
+ REGISTER_ENCDEC (PCM_U16LE, pcm_u16le);
+ REGISTER_ENCDEC (PCM_U24BE, pcm_u24be);
+ REGISTER_ENCDEC (PCM_U24LE, pcm_u24le);
+ REGISTER_ENCDEC (PCM_U32BE, pcm_u32be);
+ REGISTER_ENCDEC (PCM_U32LE, pcm_u32le);
+
+ /* dpcm codecs */
+ REGISTER_DECODER(INTERPLAY_DPCM, interplay_dpcm);
+ REGISTER_DECODER(ROQ_DPCM, roq_dpcm);
+ REGISTER_DECODER(SOL_DPCM, sol_dpcm);
+ REGISTER_DECODER(XAN_DPCM, xan_dpcm);
+
+ /* adpcm codecs */
+ REGISTER_ENCDEC (ADPCM_4XM, adpcm_4xm);
+ REGISTER_ENCDEC (ADPCM_ADX, adpcm_adx);
+ REGISTER_ENCDEC (ADPCM_CT, adpcm_ct);
+ REGISTER_ENCDEC (ADPCM_EA, adpcm_ea);
+ REGISTER_ENCDEC (ADPCM_G726, adpcm_g726);
+ REGISTER_ENCDEC (ADPCM_IMA_DK3, adpcm_ima_dk3);
+ REGISTER_ENCDEC (ADPCM_IMA_DK4, adpcm_ima_dk4);
+ REGISTER_ENCDEC (ADPCM_IMA_QT, adpcm_ima_qt);
+ REGISTER_ENCDEC (ADPCM_IMA_SMJPEG, adpcm_ima_smjpeg);
+ REGISTER_ENCDEC (ADPCM_IMA_WAV, adpcm_ima_wav);
+ REGISTER_ENCDEC (ADPCM_IMA_WS, adpcm_ima_ws);
+ REGISTER_ENCDEC (ADPCM_MS, adpcm_ms);
+ REGISTER_ENCDEC (ADPCM_SBPRO_2, adpcm_sbpro_2);
+ REGISTER_ENCDEC (ADPCM_SBPRO_3, adpcm_sbpro_3);
+ REGISTER_ENCDEC (ADPCM_SBPRO_4, adpcm_sbpro_4);
+ REGISTER_ENCDEC (ADPCM_SWF, adpcm_swf);
+ REGISTER_ENCDEC (ADPCM_XA, adpcm_xa);
+ REGISTER_ENCDEC (ADPCM_YAMAHA, adpcm_yamaha);
+
+ /* subtitles */
+ REGISTER_ENCDEC (DVBSUB, dvbsub);
+ REGISTER_ENCDEC (DVDSUB, dvdsub);
+
+ /* parsers */
+ REGISTER_PARSER (AAC, aac);
+ REGISTER_PARSER (AC3, ac3);
+ REGISTER_PARSER (CAVSVIDEO, cavsvideo);
+ REGISTER_PARSER (DVBSUB, dvbsub);
+ REGISTER_PARSER (DVDSUB, dvdsub);
+ REGISTER_PARSER (H261, h261);
+ REGISTER_PARSER (H263, h263);
+ REGISTER_PARSER (H264, h264);
+ REGISTER_PARSER (MJPEG, mjpeg);
+ REGISTER_PARSER (MPEG4VIDEO, mpeg4video);
+ REGISTER_PARSER (MPEGAUDIO, mpegaudio);
+ REGISTER_PARSER (MPEGVIDEO, mpegvideo);
+ REGISTER_PARSER (PNM, pnm);
+
+ /*
+ av_register_bitstream_filter(&dump_extradata_bsf);
+ av_register_bitstream_filter(&remove_extradata_bsf);
+ av_register_bitstream_filter(&noise_bsf);
+ av_register_bitstream_filter(&mp3_header_compress_bsf);
+ av_register_bitstream_filter(&mp3_header_decompress_bsf);
+ av_register_bitstream_filter(&mjpega_dump_header_bsf);
+ */
}
+
#endif
void init_once_routine(void) {