diff options
author | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2007-01-13 21:19:52 +0000 |
---|---|---|
committer | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2007-01-13 21:19:52 +0000 |
commit | 6e8ff6e5c232de4b8235626af31ab85345120a93 (patch) | |
tree | 25930156aa9f4f2014bf6fe3d65c183262626b8d /src | |
parent | 2f5905081ee2040537f043fe4afabbb66d26354e (diff) | |
download | xine-lib-6e8ff6e5c232de4b8235626af31ab85345120a93.tar.gz xine-lib-6e8ff6e5c232de4b8235626af31ab85345120a93.tar.bz2 |
* ffmpeg update to 51.28.0
* Workaround ffmpeg buggy codecs that don't release their DR1 frames.
* Fix several segfaults and freezing problem with H264 streams that use a lot
of reference frames (eg. 15)
* Initial support to enable/disable ffmpeg codecs. Codecs may be disabled in
groups by --disable-ffmpeg-uncommon-codecs/--disable-ffmpeg-popular-codecs
Think of "uncommon" codecs what people would never want to play with their
PDAs (they will save memory by removing them).
Note: currently both uncommon/popular codecs are _build_ but disabled.
that is, build system still need some improvements to really save memory.
warning: non-autoconf guru playing with the build system, likely breakage.
CVS patchset: 8499
CVS date: 2007/01/13 21:19:52
Diffstat (limited to 'src')
75 files changed, 9773 insertions, 873 deletions
diff --git a/src/libffmpeg/audio_decoder.c b/src/libffmpeg/audio_decoder.c index 22f567e9c..8f0425775 100644 --- a/src/libffmpeg/audio_decoder.c +++ b/src/libffmpeg/audio_decoder.c @@ -17,7 +17,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * - * $Id: audio_decoder.c,v 1.31 2006/12/26 03:20:12 dgp85 Exp $ + * $Id: audio_decoder.c,v 1.32 2007/01/13 21:19:52 miguelfreitas Exp $ * * xine audio decoder plugin using ffmpeg * @@ -25,6 +25,7 @@ #ifdef HAVE_CONFIG_H #include "config.h" +#include "ffmpeg_config.h" #endif #include <stdlib.h> @@ -107,8 +108,8 @@ static const ff_codec_t ff_audio_lookup[] = { {BUF_AUDIO_TRUESPEECH, CODEC_ID_TRUESPEECH, "TrueSpeech (ffmpeg)"}, {BUF_AUDIO_TTA, CODEC_ID_TTA, "True Audio Lossless (ffmpeg)"}, {BUF_AUDIO_SMACKER, CODEC_ID_SMACKAUDIO, "Smacker (ffmpeg)"}, - {BUF_AUDIO_FLVADPCM, CODEC_ID_ADPCM_SWF, "Flash ADPCM (ffmpeg)"}, - {BUF_AUDIO_WAVPACK, CODEC_ID_WAVPACK, "WavPack (ffmpeg)"}, + {BUF_AUDIO_FLVADPCM, CODEC_ID_ADPCM_SWF, "Flash ADPCM (ffmpeg)"}, + {BUF_AUDIO_WAVPACK, CODEC_ID_WAVPACK, "WavPack (ffmpeg)"}, }; @@ -443,39 +444,106 @@ void *init_audio_plugin (xine_t *xine, void *data) { } static uint32_t supported_audio_types[] = { + #ifdef CONFIG_WMAV1_DECODER BUF_AUDIO_WMAV1, + #endif + #ifdef CONFIG_WMAV2_DECODER BUF_AUDIO_WMAV2, + #endif + #ifdef CONFIG_RA_144_DECODER BUF_AUDIO_14_4, + #endif + #ifdef CONFIG_RA_288_DECODER BUF_AUDIO_28_8, - BUF_AUDIO_MULAW, - BUF_AUDIO_ALAW, + #endif + #ifdef CONFIG_MP3_DECODER + BUF_AUDIO_MPEG, + #endif + #ifdef CONFIG_ADPCM_MS_DECODER BUF_AUDIO_MSADPCM, + #endif + #ifdef CONFIG_ADPCM_IMA_QT_DECODER BUF_AUDIO_QTIMAADPCM, + #endif + #ifdef CONFIG_ADPCM_IMA_WAV_DECODER BUF_AUDIO_MSIMAADPCM, + #endif + #ifdef CONFIG_ADPCM_IMA_DK3_DECODER BUF_AUDIO_DK3ADPCM, + #endif + #ifdef CONFIG_ADPCM_IMA_DK4_DECODER BUF_AUDIO_DK4ADPCM, + #endif + #ifdef CONFIG_ADPCM_IMA_WS_DECODER + BUF_AUDIO_VQA_IMA, + #endif + #ifdef CONFIG_ADPCM_IMA_SMJPEG_DECODER + BUF_AUDIO_SMJPEG_IMA, + #endif + #ifdef CONFIG_ADPCM_XA_DECODER BUF_AUDIO_XA_ADPCM, + #endif + #ifdef CONFIG_ADPCM_4XM_DECODER + BUF_AUDIO_4X_ADPCM, + #endif + #ifdef CONFIG_ADPCM_EA_DECODER + BUF_AUDIO_EA_ADPCM, + #endif + #ifdef CONFIG_PCM_MULAW_DECODER + BUF_AUDIO_MULAW, + #endif + #ifdef CONFIG_PCM_ALAW_DECODER + BUF_AUDIO_ALAW, + #endif + #ifdef CONFIG_ROQ_DPCM_DECODER BUF_AUDIO_ROQ, + #endif + #ifdef CONFIG_INTERPLAY_DPCM_DECODER BUF_AUDIO_INTERPLAY, - BUF_AUDIO_VQA_IMA, - BUF_AUDIO_4X_ADPCM, + #endif + #ifdef CONFIG_MACE3_DECODER BUF_AUDIO_MAC3, + #endif + #ifdef CONFIG_MACE6_DECODER BUF_AUDIO_MAC6, + #endif + #ifdef CONFIG_XAN_DPCM_DECODER BUF_AUDIO_XAN_DPCM, + #endif + #ifdef CONFIG_VMDAUDIO_DECODER BUF_AUDIO_VMD, - BUF_AUDIO_EA_ADPCM, - BUF_AUDIO_SMJPEG_IMA, + #endif + #ifdef CONFIG_FLAC_DECODER BUF_AUDIO_FLAC, - BUF_AUDIO_ALAC, + #endif + #ifdef CONFIG_SHORTEN_DECODER BUF_AUDIO_SHORTEN, - BUF_AUDIO_MPEG, + #endif + #ifdef CONFIG_ALAC_DECODER + BUF_AUDIO_ALAC, + #endif + #ifdef CONFIG_QDM2_DECODER BUF_AUDIO_QDESIGN2, + #endif + #ifdef CONFIG_COOK_DECODER BUF_AUDIO_COOK, + #endif + #ifdef CONFIG_TRUESPEECH_DECODER BUF_AUDIO_TRUESPEECH, + #endif + #ifdef CONFIG_TTA_DECODER BUF_AUDIO_TTA, + #endif + #ifdef CONFIG_SMACKAUDIO_DECODER BUF_AUDIO_SMACKER, + #endif + #ifdef CONFIG_ADPCM_SWF_DECODER BUF_AUDIO_FLVADPCM, + #endif + #ifdef CONFIG_WAVPACK_DECODER BUF_AUDIO_WAVPACK, + #endif + 0 }; diff --git a/src/libffmpeg/diff_to_ffmpeg_cvs.txt b/src/libffmpeg/diff_to_ffmpeg_cvs.txt index 7e19e643c..b813b3ab2 100644 --- a/src/libffmpeg/diff_to_ffmpeg_cvs.txt +++ b/src/libffmpeg/diff_to_ffmpeg_cvs.txt @@ -1,74 +1,79 @@ -Index: libavcodec/avcodec.h +Index: libavutil/internal.h =================================================================== ---- libavcodec/avcodec.h (revision 7221) -+++ libavcodec/avcodec.h (working copy) -@@ -47,6 +47,13 @@ - #define AV_TIME_BASE 1000000 - #define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE} +--- libavutil/internal.h (revision 7433) ++++ libavutil/internal.h (working copy) +@@ -181,11 +181,15 @@ + #include <assert.h> -+/* FIXME: We cannot use ffmpeg's XvMC capabilities, since that would require -+ * linking the ffmpeg plugin against XvMC libraries, which is a bad thing, -+ * since they are output dependend. -+ * The correct fix would be to reimplement the XvMC functions libavcodec uses -+ * and do the necessary talking with our XvMC output plugin there. */ -+#undef HAVE_XVMC -+ - enum CodecID { - CODEC_ID_NONE, - CODEC_ID_MPEG1VIDEO, -@@ -2686,6 +2693,13 @@ + /* dprintf macros */ +-#ifdef DEBUG +-# define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__) +-#else +-# define dprintf(fmt,...) +-#endif ++# ifdef DEBUG ++# ifdef __GNUC__ ++# define dprintf(fmt,args...) av_log(NULL, AV_LOG_DEBUG, fmt, ##args) ++# else ++# define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__) ++# endif ++# else ++# define dprintf(fmt,...) ++# endif - extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v); + #define av_abort() do { av_log(NULL, AV_LOG_ERROR, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0) -+/* unused static macro */ -+#if defined(__GNUC__) && !defined(DEBUG) -+/* since we do not compile the encoder part of ffmpeg, some static -+ * functions will be unused; this is ok, the compiler will take care */ -+# define static static __attribute__((__unused__)) -+#endif -+ - #ifdef __cplusplus - } - #endif -Index: libavcodec/dsputil.h +Index: libavutil/integer.c =================================================================== ---- libavcodec/dsputil.h (revision 7221) -+++ libavcodec/dsputil.h (working copy) -@@ -33,6 +33,9 @@ - #include "common.h" - #include "avcodec.h" - -+#if defined(ARCH_X86) || defined(ARCH_X86_64) -+#define HAVE_MMX 1 -+#endif +--- libavutil/integer.c (revision 7433) ++++ libavutil/integer.c (working copy) +@@ -126,8 +126,8 @@ + AVInteger quot_temp; + if(!quot) quot = "_temp; - //#define DEBUG - /* dct code */ -Index: libavcodec/motion_est.c +- assert((int16_t)a[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b[AV_INTEGER_SIZE-1] >= 0); +- assert(av_log2(b)>=0); ++ assert((int16_t)a.v[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b.v[AV_INTEGER_SIZE-1] >= 0); ++ assert(av_log2_i(b)>=0); + + if(i > 0) + b= av_shr_i(b, -i); +Index: libavutil/common.h =================================================================== ---- libavcodec/motion_est.c (revision 7221) -+++ libavcodec/motion_est.c (working copy) -@@ -23,6 +23,9 @@ - * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at> - */ +--- libavutil/common.h (revision 7433) ++++ libavutil/common.h (working copy) +@@ -345,4 +345,27 @@ + char *av_strdup(const char *s); + void av_freep(void *ptr); -+/* motion estimation only needed for encoders */ -+#ifdef CONFIG_ENCODERS ++/* xine: inline causes trouble for debug compiling */ ++#ifdef DISABLE_INLINE ++# ifdef inline ++# undef inline ++# endif ++# ifdef always_inline ++# undef always_inline ++# endif ++# define inline ++# define always_inline ++#endif + - /** - * @file motion_est.c - * Motion estimation. -@@ -2112,3 +2115,5 @@ - } - } - } ++/* xine: define ASMALIGN here since it's cleaner that generating it in the configure */ ++#if HAVE_ASMALIGN_POT ++# define ASMALIGN(ZEROBITS) ".align " #ZEROBITS "\n\t" ++#else ++# define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t" ++#endif ++ ++/* xine: another config.h with codecs to use */ ++#include "ffmpeg_config.h" ++ + #endif /* COMMON_H */ + -+#endif /* CONFIG_ENCODERS */ Index: libavcodec/mjpeg.c =================================================================== -diff -u -r1.38 mjpeg.c ---- libavcodec/mjpeg.c 4 Dec 2006 22:25:19 -0000 1.38 -+++ libavcodec/mjpeg.c 30 Dec 2006 22:21:34 -0000 +--- libavcodec/mjpeg.c (revision 7433) ++++ libavcodec/mjpeg.c (working copy) @@ -38,6 +38,13 @@ #include "mpegvideo.h" #include "bytestream.h" @@ -83,27 +88,61 @@ diff -u -r1.38 mjpeg.c /* use two quantizer tables (one for luminance and one for chrominance) */ /* not yet working */ #undef TWOMATRIXES -Index: libavcodec/mpeg12.c +Index: libavcodec/i386/dsputil_mmx.c =================================================================== ---- libavcodec/mpeg12.c (revision 7221) -+++ libavcodec/mpeg12.c (working copy) -@@ -36,6 +36,13 @@ - //#include <assert.h> - - -+/* if xine's MPEG encoder is enabled, enable the encoding features in -+ * this particular module */ -+#if defined(XINE_MPEG_ENCODER) && !defined(CONFIG_ENCODERS) -+#define CONFIG_ENCODERS -+#endif +--- libavcodec/i386/dsputil_mmx.c (revision 7433) ++++ libavcodec/i386/dsputil_mmx.c (working copy) +@@ -2545,33 +2545,39 @@ + "pmullw %%mm5, %%mm2 \n\t" // (s-dx)*dy + "pmullw %%mm4, %%mm1 \n\t" // dx*(s-dy) + +- "movd %4, %%mm5 \n\t" +- "movd %3, %%mm4 \n\t" ++ "movd %3, %%mm5 \n\t" ++ "movd %2, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "pmullw %%mm5, %%mm3 \n\t" // src[1,1] * dx*dy + "pmullw %%mm4, %%mm2 \n\t" // src[0,1] * (s-dx)*dy + +- "movd %2, %%mm5 \n\t" +- "movd %1, %%mm4 \n\t" ++ "movd %1, %%mm5 \n\t" ++ "movd %0, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "pmullw %%mm5, %%mm1 \n\t" // src[1,0] * dx*(s-dy) + "pmullw %%mm4, %%mm0 \n\t" // src[0,0] * (s-dx)*(s-dy) +- "paddw %5, %%mm1 \n\t" ++ "paddw %4, %%mm1 \n\t" + "paddw %%mm3, %%mm2 \n\t" + "paddw %%mm1, %%mm0 \n\t" + "paddw %%mm2, %%mm0 \n\t" + +- "psrlw %6, %%mm0 \n\t" ++ "psrlw %5, %%mm0 \n\t" + "packuswb %%mm0, %%mm0 \n\t" +- "movd %%mm0, %0 \n\t" + +- : "=m"(dst[x+y*stride]) ++ : + : "m"(src[0]), "m"(src[1]), + "m"(src[stride]), "m"(src[stride+1]), + "m"(*r4), "m"(shift2) + ); ++ ++ asm volatile( ++ "movd %%mm0, %0 \n\t" + -+ - /* Start codes. */ - #define SEQ_END_CODE 0x000001b7 - #define SEQ_START_CODE 0x000001b3 ++ : "=m"(dst[x+y*stride]) ++ : ++ ); + src += stride; + } + src += 4-h*stride; Index: libavcodec/mpegvideo.c =================================================================== ---- libavcodec/mpegvideo.c (revision 7221) +--- libavcodec/mpegvideo.c (revision 7433) +++ libavcodec/mpegvideo.c (working copy) @@ -40,6 +40,14 @@ //#undef NDEBUG @@ -163,7 +202,7 @@ Index: libavcodec/mpegvideo.c if(avctx->rc_buffer_size){ RateControlContext *rcc= &s->rc_context; -@@ -4575,6 +4593,8 @@ +@@ -4574,6 +4592,8 @@ case CODEC_ID_MPEG1VIDEO: case CODEC_ID_MPEG2VIDEO: mpeg1_encode_mb(s, s->block, motion_x, motion_y); break; @@ -172,7 +211,7 @@ Index: libavcodec/mpegvideo.c case CODEC_ID_MPEG4: mpeg4_encode_mb(s, s->block, motion_x, motion_y); break; case CODEC_ID_MSMPEG4V2: -@@ -4595,6 +4615,7 @@ +@@ -4594,6 +4614,7 @@ h263_encode_mb(s, s->block, motion_x, motion_y); break; case CODEC_ID_MJPEG: mjpeg_encode_mb(s, s->block); break; @@ -180,7 +219,7 @@ Index: libavcodec/mpegvideo.c default: assert(0); } -@@ -4816,6 +4837,8 @@ +@@ -4815,6 +4836,8 @@ +sse(s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize); } @@ -189,7 +228,7 @@ Index: libavcodec/mpegvideo.c static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){ MpegEncContext *s= arg; -@@ -4859,6 +4882,7 @@ +@@ -4860,6 +4883,7 @@ } return 0; } @@ -197,7 +236,7 @@ Index: libavcodec/mpegvideo.c static int mb_var_thread(AVCodecContext *c, void *arg){ MpegEncContext *s= arg; -@@ -4883,6 +4907,8 @@ +@@ -4886,6 +4910,8 @@ } static void write_slice_end(MpegEncContext *s){ @@ -206,7 +245,7 @@ Index: libavcodec/mpegvideo.c if(s->codec_id==CODEC_ID_MPEG4){ if(s->partitioned_frame){ ff_mpeg4_merge_partitions(s); -@@ -4892,6 +4918,7 @@ +@@ -4895,6 +4921,7 @@ }else if(s->out_format == FMT_MJPEG){ ff_mjpeg_stuffing(&s->pb); } @@ -214,7 +253,7 @@ Index: libavcodec/mpegvideo.c align_put_bits(&s->pb); flush_put_bits(&s->pb); -@@ -4945,10 +4972,13 @@ +@@ -4950,10 +4977,13 @@ case CODEC_ID_FLV1: s->gob_index = ff_h263_get_gob_height(s); break; @@ -228,7 +267,7 @@ Index: libavcodec/mpegvideo.c } s->resync_mb_x=0; -@@ -5021,9 +5051,12 @@ +@@ -5026,9 +5056,12 @@ if(s->start_mb_y != mb_y || mb_x!=0){ write_slice_end(s); @@ -241,7 +280,7 @@ Index: libavcodec/mpegvideo.c } assert((put_bits_count(&s->pb)&7) == 0); -@@ -5047,19 +5080,25 @@ +@@ -5052,19 +5085,25 @@ } switch(s->codec_id){ @@ -267,18 +306,18 @@ Index: libavcodec/mpegvideo.c } if(s->flags&CODEC_FLAG_PASS1){ -@@ -5172,7 +5211,10 @@ - +@@ -5286,7 +5325,10 @@ + backup_s.dquant = 0; s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT; s->mb_intra= 0; +/* xine: do not need this for decode or MPEG-1 encoding modes */ +#if 0 - ff_mpeg4_set_direct_mv(s, mx, my); + ff_mpeg4_set_direct_mv(s, 0, 0); +#endif /* #if 0 */ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, - &dmin, &next_block, mx, my); + &dmin, &next_block, 0, 0); } -@@ -5354,7 +5396,10 @@ +@@ -5400,7 +5442,10 @@ s->mb_intra= 0; motion_x=s->b_direct_mv_table[xy][0]; motion_y=s->b_direct_mv_table[xy][1]; @@ -287,9 +326,9 @@ Index: libavcodec/mpegvideo.c ff_mpeg4_set_direct_mv(s, motion_x, motion_y); +#endif /* #if 0 */ break; - case CANDIDATE_MB_TYPE_BIDIR: - s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD; -@@ -5462,8 +5507,11 @@ + case CANDIDATE_MB_TYPE_DIRECT0: + s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT; +@@ -5513,8 +5558,11 @@ } //not beautiful here but we must write it before flushing so it has to be here @@ -301,7 +340,7 @@ Index: libavcodec/mpegvideo.c write_slice_end(s); -@@ -5531,6 +5579,8 @@ +@@ -5582,6 +5630,8 @@ } if(s->adaptive_quant){ @@ -310,7 +349,7 @@ Index: libavcodec/mpegvideo.c switch(s->codec_id){ case CODEC_ID_MPEG4: ff_clean_mpeg4_qscales(s); -@@ -5541,6 +5591,7 @@ +@@ -5592,6 +5642,7 @@ ff_clean_h263_qscales(s); break; } @@ -318,7 +357,7 @@ Index: libavcodec/mpegvideo.c s->lambda= s->lambda_table[0]; //FIXME broken -@@ -5562,10 +5613,13 @@ +@@ -5613,10 +5664,13 @@ s->me.mb_var_sum_temp = s->me.mc_mb_var_sum_temp = 0; @@ -332,7 +371,7 @@ Index: libavcodec/mpegvideo.c s->me.scene_change_score=0; -@@ -5596,6 +5650,8 @@ +@@ -5647,6 +5701,8 @@ ff_update_duplicate_context(s->thread_context[i], s); } @@ -341,7 +380,7 @@ Index: libavcodec/mpegvideo.c ff_init_me(s); /* Estimate motion for every MB */ -@@ -5610,6 +5666,8 @@ +@@ -5661,6 +5717,8 @@ s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count); }else /* if(s->pict_type == I_TYPE) */{ @@ -350,7 +389,7 @@ Index: libavcodec/mpegvideo.c /* I-Frame */ for(i=0; i<s->mb_stride*s->mb_height; i++) s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA; -@@ -5633,6 +5691,8 @@ +@@ -5684,6 +5742,8 @@ //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum); } @@ -359,7 +398,7 @@ Index: libavcodec/mpegvideo.c if(!s->umvplus){ if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) { s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER); -@@ -5686,6 +5746,7 @@ +@@ -5737,6 +5797,7 @@ } } } @@ -367,7 +406,7 @@ Index: libavcodec/mpegvideo.c if (estimate_qp(s, 0) < 0) return -1; -@@ -5717,6 +5778,8 @@ +@@ -5768,6 +5829,8 @@ s->last_bits= put_bits_count(&s->pb); switch(s->out_format) { @@ -376,7 +415,7 @@ Index: libavcodec/mpegvideo.c case FMT_MJPEG: mjpeg_picture_header(s); break; -@@ -5745,11 +5808,15 @@ +@@ -5796,11 +5859,15 @@ else h263_encode_picture_header(s, picture_number); break; @@ -392,11 +431,49 @@ Index: libavcodec/mpegvideo.c default: assert(0); } +Index: libavcodec/mpeg12.c +=================================================================== +--- libavcodec/mpeg12.c (revision 7433) ++++ libavcodec/mpeg12.c (working copy) +@@ -36,6 +36,13 @@ + //#include <assert.h> + + ++/* if xine's MPEG encoder is enabled, enable the encoding features in ++ * this particular module */ ++#if defined(XINE_MPEG_ENCODER) && !defined(CONFIG_ENCODERS) ++#define CONFIG_ENCODERS ++#endif ++ ++ + /* Start codes. */ + #define SEQ_END_CODE 0x000001b7 + #define SEQ_START_CODE 0x000001b3 +Index: libavcodec/motion_est.c +=================================================================== +--- libavcodec/motion_est.c (revision 7433) ++++ libavcodec/motion_est.c (working copy) +@@ -23,6 +23,9 @@ + * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at> + */ + ++/* motion estimation only needed for encoders */ ++#ifdef CONFIG_ENCODERS ++ + /** + * @file motion_est.c + * Motion estimation. +@@ -2142,3 +2145,5 @@ + } + } + } ++ ++#endif /* CONFIG_ENCODERS */ Index: libavcodec/snow.c =================================================================== ---- libavcodec/snow.c (revision 7221) +--- libavcodec/snow.c (revision 7433) +++ libavcodec/snow.c (working copy) -@@ -1977,6 +1977,7 @@ +@@ -1982,6 +1982,7 @@ #define P_MV1 P[9] #define FLAG_QPEL 1 //must be 1 @@ -404,15 +481,15 @@ Index: libavcodec/snow.c static int encode_q_branch(SnowContext *s, int level, int x, int y){ uint8_t p_buffer[1024]; uint8_t i_buffer[1024]; -@@ -2205,6 +2206,7 @@ +@@ -2210,6 +2211,7 @@ return score; } } +#endif - static always_inline int same_block(BlockNode *a, BlockNode *b){ + static av_always_inline int same_block(BlockNode *a, BlockNode *b){ if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ -@@ -2319,6 +2321,7 @@ +@@ -2322,6 +2324,7 @@ } } @@ -420,7 +497,7 @@ Index: libavcodec/snow.c static void encode_blocks(SnowContext *s, int search){ int x, y; int w= s->b_width; -@@ -2340,6 +2343,7 @@ +@@ -2343,6 +2346,7 @@ } } } @@ -428,7 +505,7 @@ Index: libavcodec/snow.c static void decode_blocks(SnowContext *s){ int x, y; -@@ -3910,6 +3914,7 @@ +@@ -3931,6 +3935,7 @@ } } @@ -436,7 +513,7 @@ Index: libavcodec/snow.c static int encode_init(AVCodecContext *avctx) { SnowContext *s = avctx->priv_data; -@@ -3997,6 +4002,7 @@ +@@ -4018,6 +4023,7 @@ return 0; } @@ -444,7 +521,7 @@ Index: libavcodec/snow.c static int frame_start(SnowContext *s){ AVFrame tmp; -@@ -4035,6 +4041,7 @@ +@@ -4056,6 +4062,7 @@ return 0; } @@ -452,7 +529,7 @@ Index: libavcodec/snow.c static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ SnowContext *s = avctx->priv_data; RangeCoder * const c= &s->c; -@@ -4288,6 +4295,7 @@ +@@ -4308,6 +4315,7 @@ return ff_rac_terminate(c); } @@ -460,7 +537,7 @@ Index: libavcodec/snow.c static void common_end(SnowContext *s){ int plane_index, level, orientation, i; -@@ -4319,6 +4327,7 @@ +@@ -4339,6 +4347,7 @@ } } @@ -468,7 +545,7 @@ Index: libavcodec/snow.c static int encode_end(AVCodecContext *avctx) { SnowContext *s = avctx->priv_data; -@@ -4328,6 +4337,7 @@ +@@ -4348,6 +4357,7 @@ return 0; } @@ -476,86 +553,9 @@ Index: libavcodec/snow.c static int decode_init(AVCodecContext *avctx) { -Index: libavutil/common.h -=================================================================== ---- libavutil/common.h (revision 7221) -+++ libavutil/common.h (working copy) -@@ -375,7 +375,7 @@ - ); - return (d << 32) | (a & 0xffffffff); - } --#elif defined(ARCH_X86_32) -+#elif defined(ARCH_X86) - static inline long long read_time(void) - { - long long l; -@@ -446,4 +446,23 @@ - char *av_strdup(const char *s); - void av_freep(void *ptr); - -+/* xine: inline causes trouble for debug compiling */ -+#ifdef DISABLE_INLINE -+# ifdef inline -+# undef inline -+# endif -+# ifdef always_inline -+# undef always_inline -+# endif -+# define inline -+# define always_inline -+#endif -+ -+/* xine: define ASMALIGN here since it's cleaner that generating it in the configure */ -+#if HAVE_ASMALIGN_POT -+# define ASMALIGN(ZEROBITS) ".align " #ZEROBITS "\n\t" -+#else -+# define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t" -+#endif -+ - #endif /* COMMON_H */ -Index: libavutil/integer.c -=================================================================== ---- libavutil/integer.c (revision 7221) -+++ libavutil/integer.c (working copy) -@@ -126,8 +126,8 @@ - AVInteger quot_temp; - if(!quot) quot = "_temp; - -- assert((int16_t)a[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b[AV_INTEGER_SIZE-1] >= 0); -- assert(av_log2(b)>=0); -+ assert((int16_t)a.v[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b.v[AV_INTEGER_SIZE-1] >= 0); -+ assert(av_log2_i(b)>=0); - - if(i > 0) - b= av_shr_i(b, -i); -Index: libavutil/internal.h -=================================================================== ---- libavutil/internal.h (revision 7221) -+++ libavutil/internal.h (working copy) -@@ -93,11 +93,15 @@ - #include <assert.h> - - /* dprintf macros */ --#ifdef DEBUG --# define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__) --#else --# define dprintf(fmt,...) --#endif -+# ifdef DEBUG -+# ifdef __GNUC__ -+# define dprintf(fmt,args...) av_log(NULL, AV_LOG_DEBUG, fmt, ##args) -+# else -+# define dprintf(fmt,...) av_log(NULL, AV_LOG_DEBUG, fmt, __VA_ARGS__) -+# endif -+# else -+# define dprintf(fmt,...) -+# endif - - #define av_abort() do { av_log(NULL, AV_LOG_ERROR, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0) - Index: libavcodec/mlib/dsputil_mlib.c =================================================================== ---- libavcodec/mlib/dsputil_mlib.c (revision 7221) +--- libavcodec/mlib/dsputil_mlib.c (revision 7433) +++ libavcodec/mlib/dsputil_mlib.c (working copy) @@ -22,6 +22,8 @@ #include "../dsputil.h" @@ -566,3 +566,35 @@ Index: libavcodec/mlib/dsputil_mlib.c #include <mlib_types.h> #include <mlib_status.h> #include <mlib_sys.h> +Index: libavcodec/avcodec.h +=================================================================== +--- libavcodec/avcodec.h (revision 7433) ++++ libavcodec/avcodec.h (working copy) +@@ -47,6 +47,13 @@ + #define AV_TIME_BASE 1000000 + #define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE} + ++/* FIXME: We cannot use ffmpeg's XvMC capabilities, since that would require ++ * linking the ffmpeg plugin against XvMC libraries, which is a bad thing, ++ * since they are output dependend. ++ * The correct fix would be to reimplement the XvMC functions libavcodec uses ++ * and do the necessary talking with our XvMC output plugin there. */ ++#undef HAVE_XVMC ++ + enum CodecID { + CODEC_ID_NONE, + CODEC_ID_MPEG1VIDEO, +@@ -2688,6 +2695,13 @@ + + extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v); + ++/* unused static macro */ ++#if defined(__GNUC__) && !defined(DEBUG) ++/* since we do not compile the encoder part of ffmpeg, some static ++ * functions will be unused; this is ok, the compiler will take care */ ++# define static static __attribute__((__unused__)) ++#endif ++ + #ifdef __cplusplus + } + #endif diff --git a/src/libffmpeg/libavcodec/Makefile.am b/src/libffmpeg/libavcodec/Makefile.am index cf34b0d28..cae72eeff 100644 --- a/src/libffmpeg/libavcodec/Makefile.am +++ b/src/libffmpeg/libavcodec/Makefile.am @@ -4,14 +4,15 @@ SUBDIRS = armv4l i386 mlib alpha ppc sparc libpostproc # some of ffmpeg's decoders are not used by xine yet EXTRA_DIST = motion_est_template.c \ - adx.c cljr.c fdctref.c ffv1.c g726.c jpeg_ls.c mdec.c raw.c snow.c svq3.c wmv2.c + adx.c cljr.c fdctref.c ffv1.c g726.c jpeg_ls.c mdec.c raw.c svq3.c wmv2.c # we need to compile everything in debug mode, including the encoders, # otherwise we get unresolved symbols, because some unsatisfied function calls # are not optimized away with debug optimization -AM_CFLAGS = `test "$(CFLAGS)" = "$(DEBUG_CFLAGS)" && echo -DCONFIG_ENCODERS` -fno-strict-aliasing -DCONFIG_VC1_DECODER +#AM_CFLAGS = `test "$(CFLAGS)" = "$(DEBUG_CFLAGS)" && echo -DCONFIG_ENCODERS` -fno-strict-aliasing +AM_CFLAGS = `test "$(CFLAGS)" = "$(DEBUG_CFLAGS)"` -fno-strict-aliasing AM_CPPFLAGS = $(ZLIB_CPPFLAGS) $(LIBFFMPEG_CPPFLAGS) \ - -I$(top_srcdir)/src/libffmpeg/libavutil + -I$(top_srcdir)/src/libffmpeg/libavutil -I$(top_srcdir)/src/libffmpeg ASFLAGS = noinst_LTLIBRARIES = libavcodec.la @@ -94,6 +95,7 @@ libavcodec_la_SOURCES = \ simple_idct.c \ smacker.c \ smc.c \ + snow.c \ svq1.c \ tscc.c \ truemotion1.c \ @@ -110,7 +112,12 @@ libavcodec_la_SOURCES = \ vorbis_data.c \ vp3.c \ vp3dsp.c \ + vp5.c \ + vp56.c \ + vp56data.c \ + vp6.c \ vqavideo.c \ + wavpack.c \ wmadec.c \ wnv1.c \ xan.c \ @@ -175,4 +182,8 @@ noinst_HEADERS = \ vc1acdata.h \ vc1data.h \ vp3data.h \ + vp56.h \ + vp56data.h \ + vp5data.h \ + vp6data.h \ wmadata.h diff --git a/src/libffmpeg/libavcodec/armv4l/Makefile.am b/src/libffmpeg/libavcodec/armv4l/Makefile.am index 0f3d230f6..33e0882c9 100644 --- a/src/libffmpeg/libavcodec/armv4l/Makefile.am +++ b/src/libffmpeg/libavcodec/armv4l/Makefile.am @@ -6,7 +6,12 @@ ASFLAGS = noinst_LTLIBRARIES = libavcodec_armv4l.la -libavcodec_armv4l_src = dsputil_arm.c jrevdct_arm.S mpegvideo_arm.c simple_idct_arm.S +libavcodec_armv4l_src = dsputil_arm.c jrevdct_arm.S mpegvideo_arm.c simple_idct_arm.S \ + dsputil_arm_s.S dsputil_iwmmxt.c dsputil_iwmmxt_rnd.h \ + mpegvideo_armv5te.c mpegvideo_iwmmxt.c simple_idct_armv5te.S + +noinst_HEADERS = mathops.h + libavcodec_armv4l_dummy = libavcodec_armv4l_dummy.c EXTRA_DIST = $(libavcodec_armv4l_src) $(libavcodec_armv4l_dummy) diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_arm_s.S b/src/libffmpeg/libavcodec/armv4l/dsputil_arm_s.S new file mode 100644 index 000000000..2a3ee9c50 --- /dev/null +++ b/src/libffmpeg/libavcodec/armv4l/dsputil_arm_s.S @@ -0,0 +1,696 @@ +@ +@ ARMv4L optimized DSP utils +@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp> +@ +@ This file is part of FFmpeg. +@ +@ FFmpeg is free software; you can redistribute it and/or +@ modify it under the terms of the GNU Lesser General Public +@ License as published by the Free Software Foundation; either +@ version 2.1 of the License, or (at your option) any later version. +@ +@ FFmpeg is distributed in the hope that it will be useful, +@ but WITHOUT ANY WARRANTY; without even the implied warranty of +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +@ Lesser General Public License for more details. +@ +@ You should have received a copy of the GNU Lesser General Public +@ License along with FFmpeg; if not, write to the Free Software +@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +@ + +.macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 + mov \Rd0, \Rn0, lsr #(\shift * 8) + mov \Rd1, \Rn1, lsr #(\shift * 8) + mov \Rd2, \Rn2, lsr #(\shift * 8) + mov \Rd3, \Rn3, lsr #(\shift * 8) + orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8) + orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8) + orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8) + orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8) +.endm +.macro ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2 + mov \R0, \R0, lsr #(\shift * 8) + orr \R0, \R0, \R1, lsl #(32 - \shift * 8) + mov \R1, \R1, lsr #(\shift * 8) + orr \R1, \R1, \R2, lsl #(32 - \shift * 8) +.endm +.macro ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2 + mov \Rdst0, \Rsrc0, lsr #(\shift * 8) + mov \Rdst1, \Rsrc1, lsr #(\shift * 8) + orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8)) + orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8)) +.endm + +.macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask + @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) + @ Rmask = 0xFEFEFEFE + @ Rn = destroy + eor \Rd0, \Rn0, \Rm0 + eor \Rd1, \Rn1, \Rm1 + orr \Rn0, \Rn0, \Rm0 + orr \Rn1, \Rn1, \Rm1 + and \Rd0, \Rd0, \Rmask + and \Rd1, \Rd1, \Rmask + sub \Rd0, \Rn0, \Rd0, lsr #1 + sub \Rd1, \Rn1, \Rd1, lsr #1 +.endm + +.macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask + @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) + @ Rmask = 0xFEFEFEFE + @ Rn = destroy + eor \Rd0, \Rn0, \Rm0 + eor \Rd1, \Rn1, \Rm1 + and \Rn0, \Rn0, \Rm0 + and \Rn1, \Rn1, \Rm1 + and \Rd0, \Rd0, \Rmask + and \Rd1, \Rd1, \Rmask + add \Rd0, \Rn0, \Rd0, lsr #1 + add \Rd1, \Rn1, \Rd1, lsr #1 +.endm + +@ ---------------------------------------------------------------- + .align 8 + .global put_pixels16_arm +put_pixels16_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r11, lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 + bic r1, r1, #3 + add r5, r5, r4, lsl #2 + ldrne pc, [r5] +1: + ldmia r1, {r4-r7} + add r1, r1, r2 + stmia r0, {r4-r7} + pld [r1] + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11, pc} + .align 8 +2: + ldmia r1, {r4-r8} + add r1, r1, r2 + ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 + pld [r1] + subs r3, r3, #1 + stmia r0, {r9-r12} + add r0, r0, r2 + bne 2b + ldmfd sp!, {r4-r11, pc} + .align 8 +3: + ldmia r1, {r4-r8} + add r1, r1, r2 + ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 + pld [r1] + subs r3, r3, #1 + stmia r0, {r9-r12} + add r0, r0, r2 + bne 3b + ldmfd sp!, {r4-r11, pc} + .align 8 +4: + ldmia r1, {r4-r8} + add r1, r1, r2 + ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 + pld [r1] + subs r3, r3, #1 + stmia r0, {r9-r12} + add r0, r0, r2 + bne 4b + ldmfd sp!, {r4-r11,pc} + .align 8 +5: + .word 1b + .word 2b + .word 3b + .word 4b + +@ ---------------------------------------------------------------- + .align 8 + .global put_pixels8_arm +put_pixels8_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r5,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 + bic r1, r1, #3 + add r5, r5, r4, lsl #2 + ldrne pc, [r5] +1: + ldmia r1, {r4-r5} + add r1, r1, r2 + subs r3, r3, #1 + pld [r1] + stmia r0, {r4-r5} + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r5,pc} + .align 8 +2: + ldmia r1, {r4-r5, r12} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12 + pld [r1] + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 2b + ldmfd sp!, {r4-r5,pc} + .align 8 +3: + ldmia r1, {r4-r5, r12} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12 + pld [r1] + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 3b + ldmfd sp!, {r4-r5,pc} + .align 8 +4: + ldmia r1, {r4-r5, r12} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12 + pld [r1] + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 4b + ldmfd sp!, {r4-r5,pc} + .align 8 +5: + .word 1b + .word 2b + .word 3b + .word 4b + +@ ---------------------------------------------------------------- + .align 8 + .global put_pixels8_x2_arm +put_pixels8_x2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r10,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 + ldr r12, [r5] + add r5, r5, r4, lsl #2 + bic r1, r1, #3 + ldrne pc, [r5] +1: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 + pld [r1] + RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r10,pc} + .align 8 +2: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 + pld [r1] + RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 2b + ldmfd sp!, {r4-r10,pc} + .align 8 +3: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 + pld [r1] + RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 3b + ldmfd sp!, {r4-r10,pc} + .align 8 +4: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 + pld [r1] + RND_AVG32 r8, r9, r6, r7, r5, r10, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} + add r0, r0, r2 + bne 4b + ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. + .align 8 +5: + .word 0xFEFEFEFE + .word 2b + .word 3b + .word 4b + + .align 8 + .global put_no_rnd_pixels8_x2_arm +put_no_rnd_pixels8_x2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r10,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 + ldr r12, [r5] + add r5, r5, r4, lsl #2 + bic r1, r1, #3 + ldrne pc, [r5] +1: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 + pld [r1] + NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r10,pc} + .align 8 +2: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 + pld [r1] + NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 2b + ldmfd sp!, {r4-r10,pc} + .align 8 +3: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 + pld [r1] + NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 3b + ldmfd sp!, {r4-r10,pc} + .align 8 +4: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 + pld [r1] + NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} + add r0, r0, r2 + bne 4b + ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. + .align 8 +5: + .word 0xFEFEFEFE + .word 2b + .word 3b + .word 4b + + +@ ---------------------------------------------------------------- + .align 8 + .global put_pixels8_y2_arm +put_pixels8_y2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 + mov r3, r3, lsr #1 + ldr r12, [r5] + add r5, r5, r4, lsl #2 + bic r1, r1, #3 + ldrne pc, [r5] +1: + ldmia r1, {r4-r5} + add r1, r1, r2 +6: ldmia r1, {r6-r7} + add r1, r1, r2 + pld [r1] + RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + ldmia r1, {r4-r5} + add r1, r1, r2 + stmia r0, {r8-r9} + add r0, r0, r2 + pld [r1] + RND_AVG32 r8, r9, r6, r7, r4, r5, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + .align 8 +2: + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 +6: ldmia r1, {r7-r9} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 + RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + subs r3, r3, #1 + RND_AVG32 r10, r11, r7, r8, r4, r5, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + .align 8 +3: + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 +6: ldmia r1, {r7-r9} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 + RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + subs r3, r3, #1 + RND_AVG32 r10, r11, r7, r8, r4, r5, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + .align 8 +4: + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 +6: ldmia r1, {r7-r9} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 + RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + subs r3, r3, #1 + RND_AVG32 r10, r11, r7, r8, r4, r5, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + + .align 8 +5: + .word 0xFEFEFEFE + .word 2b + .word 3b + .word 4b + + .align 8 + .global put_no_rnd_pixels8_y2_arm +put_no_rnd_pixels8_y2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 + mov r3, r3, lsr #1 + ldr r12, [r5] + add r5, r5, r4, lsl #2 + bic r1, r1, #3 + ldrne pc, [r5] +1: + ldmia r1, {r4-r5} + add r1, r1, r2 +6: ldmia r1, {r6-r7} + add r1, r1, r2 + pld [r1] + NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + ldmia r1, {r4-r5} + add r1, r1, r2 + stmia r0, {r8-r9} + add r0, r0, r2 + pld [r1] + NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + .align 8 +2: + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 +6: ldmia r1, {r7-r9} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 + NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + subs r3, r3, #1 + NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + .align 8 +3: + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 +6: ldmia r1, {r7-r9} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 + NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + subs r3, r3, #1 + NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + .align 8 +4: + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 +6: ldmia r1, {r7-r9} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 + NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + subs r3, r3, #1 + NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + .align 8 +5: + .word 0xFEFEFEFE + .word 2b + .word 3b + .word 4b + +@ ---------------------------------------------------------------- +.macro RND_XY2_IT align, rnd + @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) + @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) +.if \align == 0 + ldmia r1, {r6-r8} +.elseif \align == 3 + ldmia r1, {r5-r7} +.else + ldmia r1, {r8-r10} +.endif + add r1, r1, r2 + pld [r1] +.if \align == 0 + ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8 +.elseif \align == 1 + ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10 + ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10 +.elseif \align == 2 + ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10 + ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10 +.elseif \align == 3 + ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7 +.endif + ldr r14, [r12, #0] @ 0x03030303 + tst r3, #1 + and r8, r4, r14 + and r9, r5, r14 + and r10, r6, r14 + and r11, r7, r14 +.if \rnd == 1 + ldreq r14, [r12, #16] @ 0x02020202 +.else + ldreq r14, [r12, #28] @ 0x01010101 +.endif + add r8, r8, r10 + add r9, r9, r11 + addeq r8, r8, r14 + addeq r9, r9, r14 + ldr r14, [r12, #20] @ 0xFCFCFCFC >> 2 + and r4, r14, r4, lsr #2 + and r5, r14, r5, lsr #2 + and r6, r14, r6, lsr #2 + and r7, r14, r7, lsr #2 + add r10, r4, r6 + add r11, r5, r7 +.endm + +.macro RND_XY2_EXPAND align, rnd + RND_XY2_IT \align, \rnd +6: stmfd sp!, {r8-r11} + RND_XY2_IT \align, \rnd + ldmfd sp!, {r4-r7} + add r4, r4, r8 + add r5, r5, r9 + add r6, r6, r10 + add r7, r7, r11 + ldr r14, [r12, #24] @ 0x0F0F0F0F + and r4, r14, r4, lsr #2 + and r5, r14, r5, lsr #2 + add r4, r4, r6 + add r5, r5, r7 + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} +.endm + + .align 8 + .global put_pixels8_xy2_arm +put_pixels8_xy2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adrl r12, 5f + ands r4, r1, #3 + add r5, r12, r4, lsl #2 + bic r1, r1, #3 + ldrne pc, [r5] +1: + RND_XY2_EXPAND 0, 1 + + .align 8 +2: + RND_XY2_EXPAND 1, 1 + + .align 8 +3: + RND_XY2_EXPAND 2, 1 + + .align 8 +4: + RND_XY2_EXPAND 3, 1 + +5: + .word 0x03030303 + .word 2b + .word 3b + .word 4b + .word 0x02020202 + .word 0xFCFCFCFC >> 2 + .word 0x0F0F0F0F + .word 0x01010101 + + .align 8 + .global put_no_rnd_pixels8_xy2_arm +put_no_rnd_pixels8_xy2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adrl r12, 5f + ands r4, r1, #3 + add r5, r12, r4, lsl #2 + bic r1, r1, #3 + ldrne pc, [r5] +1: + RND_XY2_EXPAND 0, 0 + + .align 8 +2: + RND_XY2_EXPAND 1, 0 + + .align 8 +3: + RND_XY2_EXPAND 2, 0 + + .align 8 +4: + RND_XY2_EXPAND 3, 0 + +5: + .word 0x03030303 + .word 2b + .word 3b + .word 4b + .word 0x02020202 + .word 0xFCFCFCFC >> 2 + .word 0x0F0F0F0F + .word 0x01010101 diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c new file mode 100644 index 000000000..d7401e760 --- /dev/null +++ b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c @@ -0,0 +1,188 @@ +/* + * iWMMXt optimized DSP utils + * Copyright (c) 2004 AGAWA Koji + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "../dsputil.h" + +#define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt +#define SET_RND(regd) __asm__ __volatile__ ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12"); +#define WAVG2B "wavg2b" +#include "dsputil_iwmmxt_rnd.h" +#undef DEF +#undef SET_RND +#undef WAVG2B + +#define DEF(x, y) x ## _ ## y ##_iwmmxt +#define SET_RND(regd) __asm__ __volatile__ ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12"); +#define WAVG2B "wavg2br" +#include "dsputil_iwmmxt_rnd.h" +#undef DEF +#undef SET_RND +#undef WAVG2BR + +// need scheduling +#define OP(AVG) \ + asm volatile ( \ + /* alignment */ \ + "and r12, %[pixels], #7 \n\t" \ + "bic %[pixels], %[pixels], #7 \n\t" \ + "tmcr wcgr1, r12 \n\t" \ + \ + "wldrd wr0, [%[pixels]] \n\t" \ + "wldrd wr1, [%[pixels], #8] \n\t" \ + "add %[pixels], %[pixels], %[line_size] \n\t" \ + "walignr1 wr4, wr0, wr1 \n\t" \ + \ + "1: \n\t" \ + \ + "wldrd wr2, [%[pixels]] \n\t" \ + "wldrd wr3, [%[pixels], #8] \n\t" \ + "add %[pixels], %[pixels], %[line_size] \n\t" \ + "pld [%[pixels]] \n\t" \ + "walignr1 wr5, wr2, wr3 \n\t" \ + AVG " wr6, wr4, wr5 \n\t" \ + "wstrd wr6, [%[block]] \n\t" \ + "add %[block], %[block], %[line_size] \n\t" \ + \ + "wldrd wr0, [%[pixels]] \n\t" \ + "wldrd wr1, [%[pixels], #8] \n\t" \ + "add %[pixels], %[pixels], %[line_size] \n\t" \ + "walignr1 wr4, wr0, wr1 \n\t" \ + "pld [%[pixels]] \n\t" \ + AVG " wr6, wr4, wr5 \n\t" \ + "wstrd wr6, [%[block]] \n\t" \ + "add %[block], %[block], %[line_size] \n\t" \ + \ + "subs %[h], %[h], #2 \n\t" \ + "bne 1b \n\t" \ + : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h) \ + : [line_size]"r"(line_size) \ + : "memory", "r12"); +void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + OP("wavg2br"); +} +void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + OP("wavg2b"); +} +#undef OP + +void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size) +{ + uint8_t *pixels2 = pixels + line_size; + + __asm__ __volatile__ ( + "mov r12, #4 \n\t" + "1: \n\t" + "pld [%[pixels], %[line_size2]] \n\t" + "pld [%[pixels2], %[line_size2]] \n\t" + "wldrd wr4, [%[pixels]] \n\t" + "wldrd wr5, [%[pixels2]] \n\t" + "pld [%[block], #32] \n\t" + "wunpckelub wr6, wr4 \n\t" + "wldrd wr0, [%[block]] \n\t" + "wunpckehub wr7, wr4 \n\t" + "wldrd wr1, [%[block], #8] \n\t" + "wunpckelub wr8, wr5 \n\t" + "wldrd wr2, [%[block], #16] \n\t" + "wunpckehub wr9, wr5 \n\t" + "wldrd wr3, [%[block], #24] \n\t" + "add %[block], %[block], #32 \n\t" + "waddhss wr10, wr0, wr6 \n\t" + "waddhss wr11, wr1, wr7 \n\t" + "waddhss wr12, wr2, wr8 \n\t" + "waddhss wr13, wr3, wr9 \n\t" + "wpackhus wr14, wr10, wr11 \n\t" + "wpackhus wr15, wr12, wr13 \n\t" + "wstrd wr14, [%[pixels]] \n\t" + "add %[pixels], %[pixels], %[line_size2] \n\t" + "subs r12, r12, #1 \n\t" + "wstrd wr15, [%[pixels2]] \n\t" + "add %[pixels2], %[pixels2], %[line_size2] \n\t" + "bne 1b \n\t" + : [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2) + : [line_size2]"r"(line_size << 1) + : "cc", "memory", "r12"); +} + +static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + return; +} + +int mm_flags; /* multimedia extension flags */ + +int mm_support(void) +{ + return 0; /* TODO, implement proper detection */ +} + +void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx) +{ + mm_flags = mm_support(); + + if (avctx->dsp_mask) { + if (avctx->dsp_mask & FF_MM_FORCE) + mm_flags |= (avctx->dsp_mask & 0xffff); + else + mm_flags &= ~(avctx->dsp_mask & 0xffff); + } + + if (!(mm_flags & MM_IWMMXT)) return; + + c->add_pixels_clamped = add_pixels_clamped_iwmmxt; + + c->put_pixels_tab[0][0] = put_pixels16_iwmmxt; + c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt; + c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt; + c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt; + c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt; + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt; + c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt; + + c->put_pixels_tab[1][0] = put_pixels8_iwmmxt; + c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt; + c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt; + c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt; + c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt; + c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt; + + c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt; + c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt; + c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt; + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt; + c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt; + c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt; + c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt; + c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt; + + c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt; + c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt; + c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt; + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt; + c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt; + c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt; + c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt; + c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt; +} diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h new file mode 100644 index 000000000..51ba61c47 --- /dev/null +++ b/src/libffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h @@ -0,0 +1,1114 @@ +/* + * iWMMXt optimized DSP utils + * copyright (c) 2004 AGAWA Koji + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + __asm__ __volatile__ ( + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r4, %[pixels], %[line_size] \n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + "1: \n\t" + "wldrd wr0, [%[pixels]] \n\t" + "subs %[h], %[h], #2 \n\t" + "wldrd wr1, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr3, [r4] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "wldrd wr4, [r4, #8] \n\t" + "add r4, r4, %[line_size] \n\t" + "walignr1 wr8, wr0, wr1 \n\t" + "pld [r4] \n\t" + "pld [r4, #32] \n\t" + "walignr1 wr10, wr3, wr4 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "wstrd wr10, [r5] \n\t" + "add r5, r5, %[line_size] \n\t" + "bne 1b \n\t" + : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) + : + : "memory", "r4", "r5", "r12"); +} + +void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + __asm__ __volatile__ ( + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r4, %[pixels], %[line_size] \n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + "1: \n\t" + "wldrd wr0, [%[pixels]] \n\t" + "subs %[h], %[h], #2 \n\t" + "wldrd wr1, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr3, [r4] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "wldrd wr4, [r4, #8] \n\t" + "add r4, r4, %[line_size] \n\t" + "walignr1 wr8, wr0, wr1 \n\t" + "wldrd wr0, [%[block]] \n\t" + "wldrd wr2, [r5] \n\t" + "pld [r4] \n\t" + "pld [r4, #32] \n\t" + "walignr1 wr10, wr3, wr4 \n\t" + WAVG2B" wr8, wr8, wr0 \n\t" + WAVG2B" wr10, wr10, wr2 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "wstrd wr10, [r5] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "add r5, r5, %[line_size] \n\t" + "pld [r5] \n\t" + "pld [r5, #32] \n\t" + "bne 1b \n\t" + : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) + : + : "memory", "r4", "r5", "r12"); +} + +void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + __asm__ __volatile__ ( + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r4, %[pixels], %[line_size] \n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + "1: \n\t" + "wldrd wr0, [%[pixels]] \n\t" + "wldrd wr1, [%[pixels], #8] \n\t" + "subs %[h], %[h], #2 \n\t" + "wldrd wr2, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr3, [r4] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr8, wr0, wr1 \n\t" + "wldrd wr4, [r4, #8] \n\t" + "walignr1 wr9, wr1, wr2 \n\t" + "wldrd wr5, [r4, #16] \n\t" + "add r4, r4, %[line_size] \n\t" + "pld [r4] \n\t" + "pld [r4, #32] \n\t" + "walignr1 wr10, wr3, wr4 \n\t" + "wstrd wr8, [%[block]] \n\t" + "walignr1 wr11, wr4, wr5 \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "wstrd wr10, [r5] \n\t" + "wstrd wr11, [r5, #8] \n\t" + "add r5, r5, %[line_size] \n\t" + "bne 1b \n\t" + : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) + : + : "memory", "r4", "r5", "r12"); +} + +void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + __asm__ __volatile__ ( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r4, %[pixels], %[line_size]\n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + "1: \n\t" + "wldrd wr0, [%[pixels]] \n\t" + "wldrd wr1, [%[pixels], #8] \n\t" + "subs %[h], %[h], #2 \n\t" + "wldrd wr2, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr3, [r4] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr8, wr0, wr1 \n\t" + "wldrd wr4, [r4, #8] \n\t" + "walignr1 wr9, wr1, wr2 \n\t" + "wldrd wr5, [r4, #16] \n\t" + "add r4, r4, %[line_size] \n\t" + "wldrd wr0, [%[block]] \n\t" + "pld [r4] \n\t" + "wldrd wr1, [%[block], #8] \n\t" + "pld [r4, #32] \n\t" + "wldrd wr2, [r5] \n\t" + "walignr1 wr10, wr3, wr4 \n\t" + "wldrd wr3, [r5, #8] \n\t" + WAVG2B" wr8, wr8, wr0 \n\t" + WAVG2B" wr9, wr9, wr1 \n\t" + WAVG2B" wr10, wr10, wr2 \n\t" + "wstrd wr8, [%[block]] \n\t" + "walignr1 wr11, wr4, wr5 \n\t" + WAVG2B" wr11, wr11, wr3 \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "wstrd wr10, [r5] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "wstrd wr11, [r5, #8] \n\t" + "add r5, r5, %[line_size] \n\t" + "pld [r5] \n\t" + "pld [r5, #32] \n\t" + "bne 1b \n\t" + : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) + : + : "memory", "r4", "r5", "r12"); +} + +void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r12, r12, #1 \n\t" + "add r4, %[pixels], %[line_size]\n\t" + "tmcr wcgr2, r12 \n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + + "1: \n\t" + "wldrd wr10, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr13, [r4] \n\t" + "pld [%[pixels]] \n\t" + "wldrd wr14, [r4, #8] \n\t" + "pld [%[pixels], #32] \n\t" + "add r4, r4, %[line_size] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "pld [r4] \n\t" + "pld [r4, #32] \n\t" + "walignr1 wr2, wr13, wr14 \n\t" + "wmoveq wr4, wr11 \n\t" + "wmoveq wr6, wr14 \n\t" + "walignr2ne wr4, wr10, wr11 \n\t" + "walignr2ne wr6, wr13, wr14 \n\t" + WAVG2B" wr0, wr0, wr4 \n\t" + WAVG2B" wr2, wr2, wr6 \n\t" + "wstrd wr0, [%[block]] \n\t" + "subs %[h], %[h], #2 \n\t" + "wstrd wr2, [r5] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "add r5, r5, %[line_size] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) + : + : "r4", "r5", "r12", "memory"); +} + +void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r12, r12, #1 \n\t" + "add r4, %[pixels], %[line_size]\n\t" + "tmcr wcgr2, r12 \n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + + "1: \n\t" + "wldrd wr10, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr13, [r4] \n\t" + "pld [%[pixels]] \n\t" + "wldrd wr14, [r4, #8] \n\t" + "pld [%[pixels], #32] \n\t" + "wldrd wr15, [r4, #16] \n\t" + "add r4, r4, %[line_size] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "pld [r4] \n\t" + "pld [r4, #32] \n\t" + "walignr1 wr1, wr11, wr12 \n\t" + "walignr1 wr2, wr13, wr14 \n\t" + "walignr1 wr3, wr14, wr15 \n\t" + "wmoveq wr4, wr11 \n\t" + "wmoveq wr5, wr12 \n\t" + "wmoveq wr6, wr14 \n\t" + "wmoveq wr7, wr15 \n\t" + "walignr2ne wr4, wr10, wr11 \n\t" + "walignr2ne wr5, wr11, wr12 \n\t" + "walignr2ne wr6, wr13, wr14 \n\t" + "walignr2ne wr7, wr14, wr15 \n\t" + WAVG2B" wr0, wr0, wr4 \n\t" + WAVG2B" wr1, wr1, wr5 \n\t" + "wstrd wr0, [%[block]] \n\t" + WAVG2B" wr2, wr2, wr6 \n\t" + "wstrd wr1, [%[block], #8] \n\t" + WAVG2B" wr3, wr3, wr7 \n\t" + "add %[block], %[block], %[line_size] \n\t" + "wstrd wr2, [r5] \n\t" + "subs %[h], %[h], #2 \n\t" + "wstrd wr3, [r5, #8] \n\t" + "add r5, r5, %[line_size] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) + : + : "r4", "r5", "r12", "memory"); +} + +void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r12, r12, #1 \n\t" + "add r4, %[pixels], %[line_size]\n\t" + "tmcr wcgr2, r12 \n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + "pld [r5] \n\t" + "pld [r5, #32] \n\t" + + "1: \n\t" + "wldrd wr10, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr13, [r4] \n\t" + "pld [%[pixels]] \n\t" + "wldrd wr14, [r4, #8] \n\t" + "pld [%[pixels], #32] \n\t" + "add r4, r4, %[line_size] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "pld [r4] \n\t" + "pld [r4, #32] \n\t" + "walignr1 wr2, wr13, wr14 \n\t" + "wmoveq wr4, wr11 \n\t" + "wmoveq wr6, wr14 \n\t" + "walignr2ne wr4, wr10, wr11 \n\t" + "wldrd wr10, [%[block]] \n\t" + "walignr2ne wr6, wr13, wr14 \n\t" + "wldrd wr12, [r5] \n\t" + WAVG2B" wr0, wr0, wr4 \n\t" + WAVG2B" wr2, wr2, wr6 \n\t" + WAVG2B" wr0, wr0, wr10 \n\t" + WAVG2B" wr2, wr2, wr12 \n\t" + "wstrd wr0, [%[block]] \n\t" + "subs %[h], %[h], #2 \n\t" + "wstrd wr2, [r5] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "add r5, r5, %[line_size] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "pld [r5] \n\t" + "pld [r5, #32] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) + : + : "r4", "r5", "r12", "memory"); +} + +void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r12, r12, #1 \n\t" + "add r4, %[pixels], %[line_size]\n\t" + "tmcr wcgr2, r12 \n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + "pld [r5] \n\t" + "pld [r5, #32] \n\t" + + "1: \n\t" + "wldrd wr10, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr13, [r4] \n\t" + "pld [%[pixels]] \n\t" + "wldrd wr14, [r4, #8] \n\t" + "pld [%[pixels], #32] \n\t" + "wldrd wr15, [r4, #16] \n\t" + "add r4, r4, %[line_size] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "pld [r4] \n\t" + "pld [r4, #32] \n\t" + "walignr1 wr1, wr11, wr12 \n\t" + "walignr1 wr2, wr13, wr14 \n\t" + "walignr1 wr3, wr14, wr15 \n\t" + "wmoveq wr4, wr11 \n\t" + "wmoveq wr5, wr12 \n\t" + "wmoveq wr6, wr14 \n\t" + "wmoveq wr7, wr15 \n\t" + "walignr2ne wr4, wr10, wr11 \n\t" + "walignr2ne wr5, wr11, wr12 \n\t" + "walignr2ne wr6, wr13, wr14 \n\t" + "walignr2ne wr7, wr14, wr15 \n\t" + "wldrd wr10, [%[block]] \n\t" + WAVG2B" wr0, wr0, wr4 \n\t" + "wldrd wr11, [%[block], #8] \n\t" + WAVG2B" wr1, wr1, wr5 \n\t" + "wldrd wr12, [r5] \n\t" + WAVG2B" wr2, wr2, wr6 \n\t" + "wldrd wr13, [r5, #8] \n\t" + WAVG2B" wr3, wr3, wr7 \n\t" + WAVG2B" wr0, wr0, wr10 \n\t" + WAVG2B" wr1, wr1, wr11 \n\t" + WAVG2B" wr2, wr2, wr12 \n\t" + WAVG2B" wr3, wr3, wr13 \n\t" + "wstrd wr0, [%[block]] \n\t" + "subs %[h], %[h], #2 \n\t" + "wstrd wr1, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "wstrd wr2, [r5] \n\t" + "pld [%[block]] \n\t" + "wstrd wr3, [r5, #8] \n\t" + "add r5, r5, %[line_size] \n\t" + "pld [%[block], #32] \n\t" + "pld [r5] \n\t" + "pld [r5, #32] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) + : + :"r4", "r5", "r12", "memory"); +} + +void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "pld [%[block]] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + + "1: \n\t" + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr4, wr10, wr11 \n\t" + "wldrd wr10, [%[block]] \n\t" + WAVG2B" wr8, wr0, wr4 \n\t" + WAVG2B" wr8, wr8, wr10 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "pld [%[block]] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "wldrd wr10, [%[block]] \n\t" + WAVG2B" wr8, wr0, wr4 \n\t" + WAVG2B" wr8, wr8, wr10 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + "subs %[h], %[h], #2 \n\t" + "pld [%[block]] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) + : + : "cc", "memory", "r12"); +} + +void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "walignr1 wr1, wr11, wr12 \n\t" + + "1: \n\t" + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr4, wr10, wr11 \n\t" + "walignr1 wr5, wr11, wr12 \n\t" + WAVG2B" wr8, wr0, wr4 \n\t" + WAVG2B" wr9, wr1, wr5 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "walignr1 wr1, wr11, wr12 \n\t" + WAVG2B" wr8, wr0, wr4 \n\t" + WAVG2B" wr9, wr1, wr5 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + "subs %[h], %[h], #2 \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) + : + : "r4", "r5", "r12", "memory"); +} + +void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "pld [%[block]] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "walignr1 wr1, wr11, wr12 \n\t" + + "1: \n\t" + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr4, wr10, wr11 \n\t" + "walignr1 wr5, wr11, wr12 \n\t" + "wldrd wr10, [%[block]] \n\t" + "wldrd wr11, [%[block], #8] \n\t" + WAVG2B" wr8, wr0, wr4 \n\t" + WAVG2B" wr9, wr1, wr5 \n\t" + WAVG2B" wr8, wr8, wr10 \n\t" + WAVG2B" wr9, wr9, wr11 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "pld [%[block]] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "walignr1 wr1, wr11, wr12 \n\t" + "wldrd wr10, [%[block]] \n\t" + "wldrd wr11, [%[block], #8] \n\t" + WAVG2B" wr8, wr0, wr4 \n\t" + WAVG2B" wr9, wr1, wr5 \n\t" + WAVG2B" wr8, wr8, wr10 \n\t" + WAVG2B" wr9, wr9, wr11 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + "subs %[h], %[h], #2 \n\t" + "pld [%[block]] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) + : + : "r4", "r5", "r12", "memory"); +} + +void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "mov r12, #2 \n\t" + "pld [%[pixels], #32] \n\t" + "tmcr wcgr0, r12 \n\t" /* for shift value */ + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr12, [%[pixels]] \n\t" + "add r12, r12, #1 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "tmcr wcgr2, r12 \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "cmp r12, #8 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "wmoveq wr10, wr13 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + + "1: \n\t" + // [wr0 wr1 wr2 wr3] + // [wr4 wr5 wr6 wr7] <= * + "wldrd wr12, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr6, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "wmoveq wr10, wr13 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "wunpckelub wr4, wr6 \n\t" + "wunpckehub wr5, wr6 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "waddhus wr4, wr4, wr8 \n\t" + "waddhus wr5, wr5, wr9 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr12, [%[pixels]] \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "wmoveq wr10, wr13 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + "subs %[h], %[h], #2 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) + : [line_size]"r"(line_size) + : "r12", "memory"); +} + +void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "mov r12, #2 \n\t" + "pld [%[pixels], #32] \n\t" + "tmcr wcgr0, r12 \n\t" /* for shift value */ + /* alignment */ + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r12, r12, #1 \n\t" + "tmcr wcgr2, r12 \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr12, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "wldrd wr14, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr3, wr13, wr14 \n\t" + "wmoveq wr10, wr13 \n\t" + "wmoveq wr11, wr14 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "walignr2ne wr11, wr13, wr14 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr2, wr3 \n\t" + "wunpckehub wr3, wr3 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "wunpckelub wr10, wr11 \n\t" + "wunpckehub wr11, wr11 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + "waddhus wr2, wr2, wr10 \n\t" + "waddhus wr3, wr3, wr11 \n\t" + + "1: \n\t" + // [wr0 wr1 wr2 wr3] + // [wr4 wr5 wr6 wr7] <= * + "wldrd wr12, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "wldrd wr14, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr6, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr7, wr13, wr14 \n\t" + "wmoveq wr10, wr13 \n\t" + "wmoveq wr11, wr14 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "walignr2ne wr11, wr13, wr14 \n\t" + "wunpckelub wr4, wr6 \n\t" + "wunpckehub wr5, wr6 \n\t" + "wunpckelub wr6, wr7 \n\t" + "wunpckehub wr7, wr7 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "wunpckelub wr10, wr11 \n\t" + "wunpckehub wr11, wr11 \n\t" + "waddhus wr4, wr4, wr8 \n\t" + "waddhus wr5, wr5, wr9 \n\t" + "waddhus wr6, wr6, wr10 \n\t" + "waddhus wr7, wr7, wr11 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr10, wr2, wr6 \n\t" + "waddhus wr11, wr3, wr7 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "waddhus wr10, wr10, wr15 \n\t" + "waddhus wr11, wr11, wr15 \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wsrlhg wr10, wr10, wcgr0 \n\t" + "wsrlhg wr11, wr11, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + "wpackhus wr9, wr10, wr11 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr12, [%[pixels]] \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "wldrd wr14, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr3, wr13, wr14 \n\t" + "wmoveq wr10, wr13 \n\t" + "wmoveq wr11, wr14 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "walignr2ne wr11, wr13, wr14 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr2, wr3 \n\t" + "wunpckehub wr3, wr3 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "wunpckelub wr10, wr11 \n\t" + "wunpckehub wr11, wr11 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + "waddhus wr2, wr2, wr10 \n\t" + "waddhus wr3, wr3, wr11 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr10, wr2, wr6 \n\t" + "waddhus wr11, wr3, wr7 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "waddhus wr10, wr10, wr15 \n\t" + "waddhus wr11, wr11, wr15 \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wsrlhg wr10, wr10, wcgr0 \n\t" + "wsrlhg wr11, wr11, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + "wpackhus wr9, wr10, wr11 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + "subs %[h], %[h], #2 \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) + : [line_size]"r"(line_size) + : "r12", "memory"); +} + +void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "pld [%[pixels]] \n\t" + "mov r12, #2 \n\t" + "pld [%[pixels], #32] \n\t" + "tmcr wcgr0, r12 \n\t" /* for shift value */ + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr12, [%[pixels]] \n\t" + "add r12, r12, #1 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "tmcr wcgr2, r12 \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "cmp r12, #8 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "wmoveq wr10, wr13 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + + "1: \n\t" + // [wr0 wr1 wr2 wr3] + // [wr4 wr5 wr6 wr7] <= * + "wldrd wr12, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr6, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "wmoveq wr10, wr13 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "wunpckelub wr4, wr6 \n\t" + "wunpckehub wr5, wr6 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "waddhus wr4, wr4, wr8 \n\t" + "waddhus wr5, wr5, wr9 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "wldrd wr12, [%[block]] \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + WAVG2B" wr8, wr8, wr12 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "wldrd wr12, [%[pixels]] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr13, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "wmoveq wr10, wr13 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "wldrd wr12, [%[block]] \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + "subs %[h], %[h], #2 \n\t" + WAVG2B" wr8, wr8, wr12 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) + : [line_size]"r"(line_size) + : "r12", "memory"); +} + +void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "pld [%[pixels]] \n\t" + "mov r12, #2 \n\t" + "pld [%[pixels], #32] \n\t" + "tmcr wcgr0, r12 \n\t" /* for shift value */ + /* alignment */ + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r12, r12, #1 \n\t" + "tmcr wcgr2, r12 \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr12, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "wldrd wr14, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr3, wr13, wr14 \n\t" + "wmoveq wr10, wr13 \n\t" + "wmoveq wr11, wr14 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "walignr2ne wr11, wr13, wr14 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr2, wr3 \n\t" + "wunpckehub wr3, wr3 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "wunpckelub wr10, wr11 \n\t" + "wunpckehub wr11, wr11 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + "waddhus wr2, wr2, wr10 \n\t" + "waddhus wr3, wr3, wr11 \n\t" + + "1: \n\t" + // [wr0 wr1 wr2 wr3] + // [wr4 wr5 wr6 wr7] <= * + "wldrd wr12, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "wldrd wr14, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr6, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr7, wr13, wr14 \n\t" + "wmoveq wr10, wr13 \n\t" + "wmoveq wr11, wr14 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "walignr2ne wr11, wr13, wr14 \n\t" + "wunpckelub wr4, wr6 \n\t" + "wunpckehub wr5, wr6 \n\t" + "wunpckelub wr6, wr7 \n\t" + "wunpckehub wr7, wr7 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "wunpckelub wr10, wr11 \n\t" + "wunpckehub wr11, wr11 \n\t" + "waddhus wr4, wr4, wr8 \n\t" + "waddhus wr5, wr5, wr9 \n\t" + "waddhus wr6, wr6, wr10 \n\t" + "waddhus wr7, wr7, wr11 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr10, wr2, wr6 \n\t" + "waddhus wr11, wr3, wr7 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "waddhus wr10, wr10, wr15 \n\t" + "waddhus wr11, wr11, wr15 \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wldrd wr12, [%[block]] \n\t" + "wldrd wr13, [%[block], #8] \n\t" + "wsrlhg wr10, wr10, wcgr0 \n\t" + "wsrlhg wr11, wr11, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + "wpackhus wr9, wr10, wr11 \n\t" + WAVG2B" wr8, wr8, wr12 \n\t" + WAVG2B" wr9, wr9, wr13 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr12, [%[pixels]] \n\t" + "pld [%[block]] \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "pld [%[block], #32] \n\t" + "wldrd wr14, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr3, wr13, wr14 \n\t" + "wmoveq wr10, wr13 \n\t" + "wmoveq wr11, wr14 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "walignr2ne wr11, wr13, wr14 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr2, wr3 \n\t" + "wunpckehub wr3, wr3 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "wunpckelub wr10, wr11 \n\t" + "wunpckehub wr11, wr11 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + "waddhus wr2, wr2, wr10 \n\t" + "waddhus wr3, wr3, wr11 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr10, wr2, wr6 \n\t" + "waddhus wr11, wr3, wr7 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "waddhus wr10, wr10, wr15 \n\t" + "waddhus wr11, wr11, wr15 \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wldrd wr12, [%[block]] \n\t" + "wldrd wr13, [%[block], #8] \n\t" + "wsrlhg wr10, wr10, wcgr0 \n\t" + "wsrlhg wr11, wr11, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + "wpackhus wr9, wr10, wr11 \n\t" + WAVG2B" wr8, wr8, wr12 \n\t" + WAVG2B" wr9, wr9, wr13 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "subs %[h], %[h], #2 \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) + : [line_size]"r"(line_size) + : "r12", "memory"); +} diff --git a/src/libffmpeg/libavcodec/armv4l/mathops.h b/src/libffmpeg/libavcodec/armv4l/mathops.h new file mode 100644 index 000000000..7ddd0ec6e --- /dev/null +++ b/src/libffmpeg/libavcodec/armv4l/mathops.h @@ -0,0 +1,49 @@ +/* + * simple math operations + * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef FRAC_BITS +# define MULL(a, b) \ + ({ int lo, hi;\ + asm("smull %0, %1, %2, %3 \n\t"\ + "mov %0, %0, lsr %4\n\t"\ + "add %1, %0, %1, lsl %5\n\t"\ + : "=&r"(lo), "=&r"(hi)\ + : "r"(b), "r"(a), "i"(FRAC_BITS), "i"(32-FRAC_BITS));\ + hi; }) +#endif + +#define MULH(a, b) \ + ({ int lo, hi;\ + asm ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));\ + hi; }) + +#if defined(HAVE_ARMV5TE) + +/* signed 16x16 -> 32 multiply add accumulate */ +# define MAC16(rt, ra, rb) \ + asm ("smlabb %0, %2, %3, %0" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); +/* signed 16x16 -> 32 multiply */ +# define MUL16(ra, rb) \ + ({ int __rt; \ + asm ("smulbb %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \ + __rt; }) + +#endif diff --git a/src/libffmpeg/libavcodec/armv4l/mpegvideo_armv5te.c b/src/libffmpeg/libavcodec/armv4l/mpegvideo_armv5te.c new file mode 100644 index 000000000..a8d09b8ce --- /dev/null +++ b/src/libffmpeg/libavcodec/armv4l/mpegvideo_armv5te.c @@ -0,0 +1,213 @@ +/* + * Optimization of some functions from mpegvideo.c for armv5te + * Copyright (c) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * Some useful links for those who may be interested in optimizing code for ARM. + * ARM Architecture Reference Manual: http://www.arm.com/community/academy/resources.html + * Instructions timings and optimization guide for ARM9E: http://www.arm.com/pdfs/DDI0222B_9EJS_r1p2.pdf + */ + +#include "../dsputil.h" +#include "../mpegvideo.h" +#include "../avcodec.h" + + +#ifdef ENABLE_ARM_TESTS +/** + * h263 dequantizer supplementary function, it is performance critical and needs to + * have optimized implementations for each architecture. Is also used as a reference + * implementation in regression tests + */ +static inline void dct_unquantize_h263_helper_c(DCTELEM *block, int qmul, int qadd, int count) +{ + int i, level; + for (i = 0; i < count; i++) { + level = block[i]; + if (level) { + if (level < 0) { + level = level * qmul - qadd; + } else { + level = level * qmul + qadd; + } + block[i] = level; + } + } +} +#endif + +/* GCC 3.1 or higher is required to support symbolic names in assembly code */ +#if (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1)) + +/** + * Special optimized version of dct_unquantize_h263_helper_c, it requires the block + * to be at least 8 bytes aligned, and may process more elements than requested. + * But it is guaranteed to never process more than 64 elements provided that + * xxcount argument is <= 64, so it is safe. This macro is optimized for a common + * distribution of values for nCoeffs (they are mostly multiple of 8 plus one or + * two extra elements). So this macro processes data as 8 elements per loop iteration + * and contains optional 2 elements processing in the end. + * + * Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770) + */ +#define dct_unquantize_h263_special_helper_armv5te(xxblock, xxqmul, xxqadd, xxcount) \ +({ DCTELEM *xblock = xxblock; \ + int xqmul = xxqmul, xqadd = xxqadd, xcount = xxcount, xtmp; \ + int xdata1, xdata2; \ +__asm__ __volatile__( \ + "subs %[count], #2 \n\t" \ + "ble 2f \n\t" \ + "ldrd r4, [%[block], #0] \n\t" \ + "1: \n\t" \ + "ldrd r6, [%[block], #8] \n\t" \ +\ + "rsbs %[data1], %[zero], r4, asr #16 \n\t" \ + "addgt %[data1], %[qadd], #0 \n\t" \ + "rsblt %[data1], %[qadd], #0 \n\t" \ + "smlatbne %[data1], r4, %[qmul], %[data1] \n\t" \ +\ + "rsbs %[data2], %[zero], r5, asr #16 \n\t" \ + "addgt %[data2], %[qadd], #0 \n\t" \ + "rsblt %[data2], %[qadd], #0 \n\t" \ + "smlatbne %[data2], r5, %[qmul], %[data2] \n\t" \ +\ + "rsbs %[tmp], %[zero], r4, asl #16 \n\t" \ + "addgt %[tmp], %[qadd], #0 \n\t" \ + "rsblt %[tmp], %[qadd], #0 \n\t" \ + "smlabbne r4, r4, %[qmul], %[tmp] \n\t" \ +\ + "rsbs %[tmp], %[zero], r5, asl #16 \n\t" \ + "addgt %[tmp], %[qadd], #0 \n\t" \ + "rsblt %[tmp], %[qadd], #0 \n\t" \ + "smlabbne r5, r5, %[qmul], %[tmp] \n\t" \ +\ + "strh r4, [%[block]], #2 \n\t" \ + "strh %[data1], [%[block]], #2 \n\t" \ + "strh r5, [%[block]], #2 \n\t" \ + "strh %[data2], [%[block]], #2 \n\t" \ +\ + "rsbs %[data1], %[zero], r6, asr #16 \n\t" \ + "addgt %[data1], %[qadd], #0 \n\t" \ + "rsblt %[data1], %[qadd], #0 \n\t" \ + "smlatbne %[data1], r6, %[qmul], %[data1] \n\t" \ +\ + "rsbs %[data2], %[zero], r7, asr #16 \n\t" \ + "addgt %[data2], %[qadd], #0 \n\t" \ + "rsblt %[data2], %[qadd], #0 \n\t" \ + "smlatbne %[data2], r7, %[qmul], %[data2] \n\t" \ +\ + "rsbs %[tmp], %[zero], r6, asl #16 \n\t" \ + "addgt %[tmp], %[qadd], #0 \n\t" \ + "rsblt %[tmp], %[qadd], #0 \n\t" \ + "smlabbne r6, r6, %[qmul], %[tmp] \n\t" \ +\ + "rsbs %[tmp], %[zero], r7, asl #16 \n\t" \ + "addgt %[tmp], %[qadd], #0 \n\t" \ + "rsblt %[tmp], %[qadd], #0 \n\t" \ + "smlabbne r7, r7, %[qmul], %[tmp] \n\t" \ +\ + "strh r6, [%[block]], #2 \n\t" \ + "strh %[data1], [%[block]], #2 \n\t" \ + "strh r7, [%[block]], #2 \n\t" \ + "strh %[data2], [%[block]], #2 \n\t" \ +\ + "subs %[count], #8 \n\t" \ + "ldrgtd r4, [%[block], #0] \n\t" /* load data early to avoid load/use pipeline stall */ \ + "bgt 1b \n\t" \ +\ + "adds %[count], #2 \n\t" \ + "ble 3f \n\t" \ + "2: \n\t" \ + "ldrsh %[data1], [%[block], #0] \n\t" \ + "ldrsh %[data2], [%[block], #2] \n\t" \ + "mov %[tmp], %[qadd] \n\t" \ + "cmp %[data1], #0 \n\t" \ + "rsblt %[tmp], %[qadd], #0 \n\t" \ + "smlabbne %[data1], %[data1], %[qmul], %[tmp] \n\t" \ + "mov %[tmp], %[qadd] \n\t" \ + "cmp %[data2], #0 \n\t" \ + "rsblt %[tmp], %[qadd], #0 \n\t" \ + "smlabbne %[data2], %[data2], %[qmul], %[tmp] \n\t" \ + "strh %[data1], [%[block]], #2 \n\t" \ + "strh %[data2], [%[block]], #2 \n\t" \ + "3: \n\t" \ + : [block] "+&r" (xblock), [count] "+&r" (xcount), [tmp] "=&r" (xtmp), \ + [data1] "=&r" (xdata1), [data2] "=&r" (xdata2) \ + : [qmul] "r" (xqmul), [qadd] "r" (xqadd), [zero] "r" (0) \ + : "r4", "r5", "r6", "r7", "cc", "memory" \ +); \ +}) + +static void dct_unquantize_h263_intra_armv5te(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int i, level, qmul, qadd; + int nCoeffs; + + assert(s->block_last_index[n]>=0); + + qmul = qscale << 1; + + if (!s->h263_aic) { + if (n < 4) + level = block[0] * s->y_dc_scale; + else + level = block[0] * s->c_dc_scale; + qadd = (qscale - 1) | 1; + }else{ + qadd = 0; + level = block[0]; + } + if(s->ac_pred) + nCoeffs=63; + else + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; + + dct_unquantize_h263_special_helper_armv5te(block, qmul, qadd, nCoeffs + 1); + block[0] = level; +} + +static void dct_unquantize_h263_inter_armv5te(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int i, level, qmul, qadd; + int nCoeffs; + + assert(s->block_last_index[n]>=0); + + qadd = (qscale - 1) | 1; + qmul = qscale << 1; + + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; + + dct_unquantize_h263_special_helper_armv5te(block, qmul, qadd, nCoeffs + 1); +} + +#define HAVE_DCT_UNQUANTIZE_H263_ARMV5TE_OPTIMIZED + +#endif + +void MPV_common_init_armv5te(MpegEncContext *s) +{ +#ifdef HAVE_DCT_UNQUANTIZE_H263_ARMV5TE_OPTIMIZED + s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_armv5te; + s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_armv5te; +#endif +} diff --git a/src/libffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c b/src/libffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c new file mode 100644 index 000000000..1336ac5f8 --- /dev/null +++ b/src/libffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c @@ -0,0 +1,119 @@ +/* + * copyright (c) 2004 AGAWA Koji + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "../dsputil.h" +#include "../mpegvideo.h" +#include "../avcodec.h" + +static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int level, qmul, qadd; + int nCoeffs; + DCTELEM *block_orig = block; + + assert(s->block_last_index[n]>=0); + + qmul = qscale << 1; + + if (!s->h263_aic) { + if (n < 4) + level = block[0] * s->y_dc_scale; + else + level = block[0] * s->c_dc_scale; + qadd = (qscale - 1) | 1; + }else{ + qadd = 0; + level = block[0]; + } + if(s->ac_pred) + nCoeffs=63; + else + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; + + __asm__ __volatile__ ( +/* "movd %1, %%mm6 \n\t" //qmul */ +/* "packssdw %%mm6, %%mm6 \n\t" */ +/* "packssdw %%mm6, %%mm6 \n\t" */ + "tbcsth wr6, %[qmul] \n\t" +/* "movd %2, %%mm5 \n\t" //qadd */ +/* "packssdw %%mm5, %%mm5 \n\t" */ +/* "packssdw %%mm5, %%mm5 \n\t" */ + "tbcsth wr5, %[qadd] \n\t" + "wzero wr7 \n\t" /* "pxor %%mm7, %%mm7 \n\t" */ + "wzero wr4 \n\t" /* "pxor %%mm4, %%mm4 \n\t" */ + "wsubh wr7, wr5, wr7 \n\t" /* "psubw %%mm5, %%mm7 \n\t" */ + "1: \n\t" + "wldrd wr2, [%[block]] \n\t" /* "movq (%0, %3), %%mm0 \n\t" */ + "wldrd wr3, [%[block], #8] \n\t" /* "movq 8(%0, %3), %%mm1 \n\t" */ + "wmulsl wr0, wr6, wr2 \n\t" /* "pmullw %%mm6, %%mm0 \n\t" */ + "wmulsl wr1, wr6, wr3 \n\t" /* "pmullw %%mm6, %%mm1 \n\t" */ +/* "movq (%0, %3), %%mm2 \n\t" */ +/* "movq 8(%0, %3), %%mm3 \n\t" */ + "wcmpgtsh wr2, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 */ + "wcmpgtsh wr3, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 */ + "wxor wr0, wr2, wr0 \n\t" /* "pxor %%mm2, %%mm0 \n\t" */ + "wxor wr1, wr3, wr1 \n\t" /* "pxor %%mm3, %%mm1 \n\t" */ + "waddh wr0, wr7, wr0 \n\t" /* "paddw %%mm7, %%mm0 \n\t" */ + "waddh wr1, wr7, wr1 \n\t" /* "paddw %%mm7, %%mm1 \n\t" */ + "wxor wr2, wr0, wr2 \n\t" /* "pxor %%mm0, %%mm2 \n\t" */ + "wxor wr3, wr1, wr3 \n\t" /* "pxor %%mm1, %%mm3 \n\t" */ + "wcmpeqh wr0, wr7, wr0 \n\t" /* "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 */ + "wcmpeqh wr1, wr7, wr1 \n\t" /* "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 */ + "wandn wr0, wr2, wr0 \n\t" /* "pandn %%mm2, %%mm0 \n\t" */ + "wandn wr1, wr3, wr1 \n\t" /* "pandn %%mm3, %%mm1 \n\t" */ + "wstrd wr0, [%[block]] \n\t" /* "movq %%mm0, (%0, %3) \n\t" */ + "wstrd wr1, [%[block], #8] \n\t" /* "movq %%mm1, 8(%0, %3) \n\t" */ + "add %[block], %[block], #16 \n\t" /* "addl $16, %3 \n\t" */ + "subs %[i], %[i], #1 \n\t" + "bne 1b \n\t" /* "jng 1b \n\t" */ + :[block]"+r"(block) + :[i]"r"((nCoeffs + 8) / 8), [qmul]"r"(qmul), [qadd]"r"(qadd) + :"memory"); + + block_orig[0] = level; +} + +#if 0 +static void dct_unquantize_h263_inter_iwmmxt(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int nCoeffs; + + assert(s->block_last_index[n]>=0); + + if(s->ac_pred) + nCoeffs=63; + else + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; + + ippiQuantInvInter_Compact_H263_16s_I(block, nCoeffs+1, qscale); +} +#endif + +void MPV_common_init_iwmmxt(MpegEncContext *s) +{ + if (!(mm_flags & MM_IWMMXT)) return; + + s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_iwmmxt; +#if 0 + s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_iwmmxt; +#endif +} diff --git a/src/libffmpeg/libavcodec/armv4l/simple_idct_armv5te.S b/src/libffmpeg/libavcodec/armv4l/simple_idct_armv5te.S new file mode 100644 index 000000000..28bee0643 --- /dev/null +++ b/src/libffmpeg/libavcodec/armv4l/simple_idct_armv5te.S @@ -0,0 +1,718 @@ +/* + * Simple IDCT + * + * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> + * Copyright (c) 2006 Mans Rullgard <mru@inprovide.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ +#define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ +#define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ +#define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ +#define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ +#define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ +#define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ +#define ROW_SHIFT 11 +#define COL_SHIFT 20 + +#define W13 (W1 | (W3 << 16)) +#define W26 (W2 | (W6 << 16)) +#define W57 (W5 | (W7 << 16)) + + .text + .align +w13: .long W13 +w26: .long W26 +w57: .long W57 + + .align + .func idct_row_armv5te +idct_row_armv5te: + str lr, [sp, #-4]! + + ldrd v1, [a1, #8] + ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */ + orrs v1, v1, v2 + cmpeq v1, a4 + cmpeq v1, a3, lsr #16 + beq row_dc_only + + mov v1, #(1<<(ROW_SHIFT-1)) + mov ip, #16384 + sub ip, ip, #1 /* ip = W4 */ + smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */ + ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */ + smultb a2, ip, a4 + smulbb lr, ip, a4 + add v2, v1, a2 + sub v3, v1, a2 + sub v4, v1, lr + add v1, v1, lr + + ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */ + ldr lr, [pc, #(w57-.-8)] /* lr = W5 | (W7 << 16) */ + smulbt v5, ip, a3 + smultt v6, lr, a4 + smlatt v5, ip, a4, v5 + smultt a2, ip, a3 + smulbt v7, lr, a3 + sub v6, v6, a2 + smulbt a2, ip, a4 + smultt fp, lr, a3 + sub v7, v7, a2 + smulbt a2, lr, a4 + ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */ + sub fp, fp, a2 + + orrs a2, a3, a4 + beq 1f + + smlabt v5, lr, a3, v5 + smlabt v6, ip, a3, v6 + smlatt v5, lr, a4, v5 + smlabt v6, lr, a4, v6 + smlatt v7, lr, a3, v7 + smlatt fp, ip, a3, fp + smulbt a2, ip, a4 + smlatt v7, ip, a4, v7 + sub fp, fp, a2 + + ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */ + mov a2, #16384 + sub a2, a2, #1 /* a2 = W4 */ + smulbb a2, a2, a3 /* a2 = W4*row[4] */ + smultb lr, ip, a4 /* lr = W6*row[6] */ + add v1, v1, a2 /* v1 += W4*row[4] */ + add v1, v1, lr /* v1 += W6*row[6] */ + add v4, v4, a2 /* v4 += W4*row[4] */ + sub v4, v4, lr /* v4 -= W6*row[6] */ + smulbb lr, ip, a4 /* lr = W2*row[6] */ + sub v2, v2, a2 /* v2 -= W4*row[4] */ + sub v2, v2, lr /* v2 -= W2*row[6] */ + sub v3, v3, a2 /* v3 -= W4*row[4] */ + add v3, v3, lr /* v3 += W2*row[6] */ + +1: add a2, v1, v5 + mov a3, a2, lsr #11 + bic a3, a3, #0x1f0000 + sub a2, v2, v6 + mov a2, a2, lsr #11 + add a3, a3, a2, lsl #16 + add a2, v3, v7 + mov a4, a2, lsr #11 + bic a4, a4, #0x1f0000 + add a2, v4, fp + mov a2, a2, lsr #11 + add a4, a4, a2, lsl #16 + strd a3, [a1] + + sub a2, v4, fp + mov a3, a2, lsr #11 + bic a3, a3, #0x1f0000 + sub a2, v3, v7 + mov a2, a2, lsr #11 + add a3, a3, a2, lsl #16 + add a2, v2, v6 + mov a4, a2, lsr #11 + bic a4, a4, #0x1f0000 + sub a2, v1, v5 + mov a2, a2, lsr #11 + add a4, a4, a2, lsl #16 + strd a3, [a1, #8] + + ldr pc, [sp], #4 + +row_dc_only: + orr a3, a3, a3, lsl #16 + bic a3, a3, #0xe000 + mov a3, a3, lsl #3 + mov a4, a3 + strd a3, [a1] + strd a3, [a1, #8] + + ldr pc, [sp], #4 + .endfunc + + .macro idct_col + ldr a4, [a1] /* a4 = col[1:0] */ + mov ip, #16384 + sub ip, ip, #1 /* ip = W4 */ +#if 0 + mov v1, #(1<<(COL_SHIFT-1)) + smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */ + smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */ + ldr a4, [a1, #(16*4)] +#else + mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */ + add v2, v1, a4, asr #16 + rsb v2, v2, v2, lsl #14 + mov a4, a4, lsl #16 + add v1, v1, a4, asr #16 + ldr a4, [a1, #(16*4)] + rsb v1, v1, v1, lsl #14 +#endif + + smulbb lr, ip, a4 + smulbt a3, ip, a4 + sub v3, v1, lr + sub v5, v1, lr + add v7, v1, lr + add v1, v1, lr + sub v4, v2, a3 + sub v6, v2, a3 + add fp, v2, a3 + ldr ip, [pc, #(w26-.-8)] + ldr a4, [a1, #(16*2)] + add v2, v2, a3 + + smulbb lr, ip, a4 + smultb a3, ip, a4 + add v1, v1, lr + sub v7, v7, lr + add v3, v3, a3 + sub v5, v5, a3 + smulbt lr, ip, a4 + smultt a3, ip, a4 + add v2, v2, lr + sub fp, fp, lr + add v4, v4, a3 + ldr a4, [a1, #(16*6)] + sub v6, v6, a3 + + smultb lr, ip, a4 + smulbb a3, ip, a4 + add v1, v1, lr + sub v7, v7, lr + sub v3, v3, a3 + add v5, v5, a3 + smultt lr, ip, a4 + smulbt a3, ip, a4 + add v2, v2, lr + sub fp, fp, lr + sub v4, v4, a3 + add v6, v6, a3 + + stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp} + + ldr ip, [pc, #(w13-.-8)] + ldr a4, [a1, #(16*1)] + ldr lr, [pc, #(w57-.-8)] + smulbb v1, ip, a4 + smultb v3, ip, a4 + smulbb v5, lr, a4 + smultb v7, lr, a4 + smulbt v2, ip, a4 + smultt v4, ip, a4 + smulbt v6, lr, a4 + smultt fp, lr, a4 + rsb v4, v4, #0 + ldr a4, [a1, #(16*3)] + rsb v3, v3, #0 + + smlatb v1, ip, a4, v1 + smlatb v3, lr, a4, v3 + smulbb a3, ip, a4 + smulbb a2, lr, a4 + sub v5, v5, a3 + sub v7, v7, a2 + smlatt v2, ip, a4, v2 + smlatt v4, lr, a4, v4 + smulbt a3, ip, a4 + smulbt a2, lr, a4 + sub v6, v6, a3 + ldr a4, [a1, #(16*5)] + sub fp, fp, a2 + + smlabb v1, lr, a4, v1 + smlabb v3, ip, a4, v3 + smlatb v5, lr, a4, v5 + smlatb v7, ip, a4, v7 + smlabt v2, lr, a4, v2 + smlabt v4, ip, a4, v4 + smlatt v6, lr, a4, v6 + ldr a3, [a1, #(16*7)] + smlatt fp, ip, a4, fp + + smlatb v1, lr, a3, v1 + smlabb v3, lr, a3, v3 + smlatb v5, ip, a3, v5 + smulbb a4, ip, a3 + smlatt v2, lr, a3, v2 + sub v7, v7, a4 + smlabt v4, lr, a3, v4 + smulbt a4, ip, a3 + smlatt v6, ip, a3, v6 + sub fp, fp, a4 + .endm + + .align + .func idct_col_armv5te +idct_col_armv5te: + str lr, [sp, #-4]! + + idct_col + + ldmfd sp!, {a3, a4} + adds a2, a3, v1 + mov a2, a2, lsr #20 + orrmi a2, a2, #0xf000 + add ip, a4, v2 + mov ip, ip, asr #20 + orr a2, a2, ip, lsl #16 + str a2, [a1] + subs a3, a3, v1 + mov a2, a3, lsr #20 + orrmi a2, a2, #0xf000 + sub a4, a4, v2 + mov a4, a4, asr #20 + orr a2, a2, a4, lsl #16 + ldmfd sp!, {a3, a4} + str a2, [a1, #(16*7)] + + subs a2, a3, v3 + mov a2, a2, lsr #20 + orrmi a2, a2, #0xf000 + sub ip, a4, v4 + mov ip, ip, asr #20 + orr a2, a2, ip, lsl #16 + str a2, [a1, #(16*1)] + adds a3, a3, v3 + mov a2, a3, lsr #20 + orrmi a2, a2, #0xf000 + add a4, a4, v4 + mov a4, a4, asr #20 + orr a2, a2, a4, lsl #16 + ldmfd sp!, {a3, a4} + str a2, [a1, #(16*6)] + + adds a2, a3, v5 + mov a2, a2, lsr #20 + orrmi a2, a2, #0xf000 + add ip, a4, v6 + mov ip, ip, asr #20 + orr a2, a2, ip, lsl #16 + str a2, [a1, #(16*2)] + subs a3, a3, v5 + mov a2, a3, lsr #20 + orrmi a2, a2, #0xf000 + sub a4, a4, v6 + mov a4, a4, asr #20 + orr a2, a2, a4, lsl #16 + ldmfd sp!, {a3, a4} + str a2, [a1, #(16*5)] + + adds a2, a3, v7 + mov a2, a2, lsr #20 + orrmi a2, a2, #0xf000 + add ip, a4, fp + mov ip, ip, asr #20 + orr a2, a2, ip, lsl #16 + str a2, [a1, #(16*3)] + subs a3, a3, v7 + mov a2, a3, lsr #20 + orrmi a2, a2, #0xf000 + sub a4, a4, fp + mov a4, a4, asr #20 + orr a2, a2, a4, lsl #16 + str a2, [a1, #(16*4)] + + ldr pc, [sp], #4 + .endfunc + + .align + .func idct_col_put_armv5te +idct_col_put_armv5te: + str lr, [sp, #-4]! + + idct_col + + ldmfd sp!, {a3, a4} + ldr lr, [sp, #32] + add a2, a3, v1 + movs a2, a2, asr #20 + movmi a2, #0 + cmp a2, #255 + movgt a2, #255 + add ip, a4, v2 + movs ip, ip, asr #20 + movmi ip, #0 + cmp ip, #255 + movgt ip, #255 + orr a2, a2, ip, lsl #8 + sub a3, a3, v1 + movs a3, a3, asr #20 + movmi a3, #0 + cmp a3, #255 + movgt a3, #255 + sub a4, a4, v2 + movs a4, a4, asr #20 + movmi a4, #0 + cmp a4, #255 + ldr v1, [sp, #28] + movgt a4, #255 + strh a2, [v1] + add a2, v1, #2 + str a2, [sp, #28] + orr a2, a3, a4, lsl #8 + rsb v2, lr, lr, lsl #3 + ldmfd sp!, {a3, a4} + strh a2, [v2, v1]! + + sub a2, a3, v3 + movs a2, a2, asr #20 + movmi a2, #0 + cmp a2, #255 + movgt a2, #255 + sub ip, a4, v4 + movs ip, ip, asr #20 + movmi ip, #0 + cmp ip, #255 + movgt ip, #255 + orr a2, a2, ip, lsl #8 + strh a2, [v1, lr]! + add a3, a3, v3 + movs a2, a3, asr #20 + movmi a2, #0 + cmp a2, #255 + movgt a2, #255 + add a4, a4, v4 + movs a4, a4, asr #20 + movmi a4, #0 + cmp a4, #255 + movgt a4, #255 + orr a2, a2, a4, lsl #8 + ldmfd sp!, {a3, a4} + strh a2, [v2, -lr]! + + add a2, a3, v5 + movs a2, a2, asr #20 + movmi a2, #0 + cmp a2, #255 + movgt a2, #255 + add ip, a4, v6 + movs ip, ip, asr #20 + movmi ip, #0 + cmp ip, #255 + movgt ip, #255 + orr a2, a2, ip, lsl #8 + strh a2, [v1, lr]! + sub a3, a3, v5 + movs a2, a3, asr #20 + movmi a2, #0 + cmp a2, #255 + movgt a2, #255 + sub a4, a4, v6 + movs a4, a4, asr #20 + movmi a4, #0 + cmp a4, #255 + movgt a4, #255 + orr a2, a2, a4, lsl #8 + ldmfd sp!, {a3, a4} + strh a2, [v2, -lr]! + + add a2, a3, v7 + movs a2, a2, asr #20 + movmi a2, #0 + cmp a2, #255 + movgt a2, #255 + add ip, a4, fp + movs ip, ip, asr #20 + movmi ip, #0 + cmp ip, #255 + movgt ip, #255 + orr a2, a2, ip, lsl #8 + strh a2, [v1, lr] + sub a3, a3, v7 + movs a2, a3, asr #20 + movmi a2, #0 + cmp a2, #255 + movgt a2, #255 + sub a4, a4, fp + movs a4, a4, asr #20 + movmi a4, #0 + cmp a4, #255 + movgt a4, #255 + orr a2, a2, a4, lsl #8 + strh a2, [v2, -lr] + + ldr pc, [sp], #4 + .endfunc + + .align + .func idct_col_add_armv5te +idct_col_add_armv5te: + str lr, [sp, #-4]! + + idct_col + + ldr lr, [sp, #36] + + ldmfd sp!, {a3, a4} + ldrh ip, [lr] + add a2, a3, v1 + mov a2, a2, asr #20 + sub a3, a3, v1 + and v1, ip, #255 + adds a2, a2, v1 + movmi a2, #0 + cmp a2, #255 + movgt a2, #255 + add v1, a4, v2 + mov v1, v1, asr #20 + adds v1, v1, ip, lsr #8 + movmi v1, #0 + cmp v1, #255 + movgt v1, #255 + orr a2, a2, v1, lsl #8 + ldr v1, [sp, #32] + sub a4, a4, v2 + rsb v2, v1, v1, lsl #3 + ldrh ip, [v2, lr]! + strh a2, [lr] + mov a3, a3, asr #20 + and a2, ip, #255 + adds a3, a3, a2 + movmi a3, #0 + cmp a3, #255 + movgt a3, #255 + mov a4, a4, asr #20 + adds a4, a4, ip, lsr #8 + movmi a4, #0 + cmp a4, #255 + movgt a4, #255 + add a2, lr, #2 + str a2, [sp, #28] + orr a2, a3, a4, lsl #8 + strh a2, [v2] + + ldmfd sp!, {a3, a4} + ldrh ip, [lr, v1]! + sub a2, a3, v3 + mov a2, a2, asr #20 + add a3, a3, v3 + and v3, ip, #255 + adds a2, a2, v3 + movmi a2, #0 + cmp a2, #255 + movgt a2, #255 + sub v3, a4, v4 + mov v3, v3, asr #20 + adds v3, v3, ip, lsr #8 + movmi v3, #0 + cmp v3, #255 + movgt v3, #255 + orr a2, a2, v3, lsl #8 + add a4, a4, v4 + ldrh ip, [v2, -v1]! + strh a2, [lr] + mov a3, a3, asr #20 + and a2, ip, #255 + adds a3, a3, a2 + movmi a3, #0 + cmp a3, #255 + movgt a3, #255 + mov a4, a4, asr #20 + adds a4, a4, ip, lsr #8 + movmi a4, #0 + cmp a4, #255 + movgt a4, #255 + orr a2, a3, a4, lsl #8 + strh a2, [v2] + + ldmfd sp!, {a3, a4} + ldrh ip, [lr, v1]! + add a2, a3, v5 + mov a2, a2, asr #20 + sub a3, a3, v5 + and v3, ip, #255 + adds a2, a2, v3 + movmi a2, #0 + cmp a2, #255 + movgt a2, #255 + add v3, a4, v6 + mov v3, v3, asr #20 + adds v3, v3, ip, lsr #8 + movmi v3, #0 + cmp v3, #255 + movgt v3, #255 + orr a2, a2, v3, lsl #8 + sub a4, a4, v6 + ldrh ip, [v2, -v1]! + strh a2, [lr] + mov a3, a3, asr #20 + and a2, ip, #255 + adds a3, a3, a2 + movmi a3, #0 + cmp a3, #255 + movgt a3, #255 + mov a4, a4, asr #20 + adds a4, a4, ip, lsr #8 + movmi a4, #0 + cmp a4, #255 + movgt a4, #255 + orr a2, a3, a4, lsl #8 + strh a2, [v2] + + ldmfd sp!, {a3, a4} + ldrh ip, [lr, v1]! + add a2, a3, v7 + mov a2, a2, asr #20 + sub a3, a3, v7 + and v3, ip, #255 + adds a2, a2, v3 + movmi a2, #0 + cmp a2, #255 + movgt a2, #255 + add v3, a4, fp + mov v3, v3, asr #20 + adds v3, v3, ip, lsr #8 + movmi v3, #0 + cmp v3, #255 + movgt v3, #255 + orr a2, a2, v3, lsl #8 + sub a4, a4, fp + ldrh ip, [v2, -v1]! + strh a2, [lr] + mov a3, a3, asr #20 + and a2, ip, #255 + adds a3, a3, a2 + movmi a3, #0 + cmp a3, #255 + movgt a3, #255 + mov a4, a4, asr #20 + adds a4, a4, ip, lsr #8 + movmi a4, #0 + cmp a4, #255 + movgt a4, #255 + orr a2, a3, a4, lsl #8 + strh a2, [v2] + + ldr pc, [sp], #4 + .endfunc + + .align + .global simple_idct_armv5te + .func simple_idct_armv5te +simple_idct_armv5te: + stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr} + + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + + sub a1, a1, #(16*7) + + bl idct_col_armv5te + add a1, a1, #4 + bl idct_col_armv5te + add a1, a1, #4 + bl idct_col_armv5te + add a1, a1, #4 + bl idct_col_armv5te + + ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} + .endfunc + + .align + .global simple_idct_add_armv5te + .func simple_idct_add_armv5te +simple_idct_add_armv5te: + stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} + + mov a1, a3 + + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + + sub a1, a1, #(16*7) + + bl idct_col_add_armv5te + add a1, a1, #4 + bl idct_col_add_armv5te + add a1, a1, #4 + bl idct_col_add_armv5te + add a1, a1, #4 + bl idct_col_add_armv5te + + add sp, sp, #8 + ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} + .endfunc + + .align + .global simple_idct_put_armv5te + .func simple_idct_put_armv5te +simple_idct_put_armv5te: + stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} + + mov a1, a3 + + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + add a1, a1, #16 + bl idct_row_armv5te + + sub a1, a1, #(16*7) + + bl idct_col_put_armv5te + add a1, a1, #4 + bl idct_col_put_armv5te + add a1, a1, #4 + bl idct_col_put_armv5te + add a1, a1, #4 + bl idct_col_put_armv5te + + add sp, sp, #8 + ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} + .endfunc diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h index d8090ed32..7d7678455 100644 --- a/src/libffmpeg/libavcodec/avcodec.h +++ b/src/libffmpeg/libavcodec/avcodec.h @@ -37,13 +37,13 @@ extern "C" { #define AV_STRINGIFY(s) AV_TOSTRING(s) #define AV_TOSTRING(s) #s -#define LIBAVCODEC_VERSION_INT ((51<<16)+(25<<8)+0) -#define LIBAVCODEC_VERSION 51.25.0 +#define LIBAVCODEC_VERSION_INT ((51<<16)+(28<<8)+0) +#define LIBAVCODEC_VERSION 51.28.0 #define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT #define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION) -#define AV_NOPTS_VALUE int64_t_C(0x8000000000000000) +#define AV_NOPTS_VALUE INT64_C(0x8000000000000000) #define AV_TIME_BASE 1000000 #define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE} @@ -156,6 +156,7 @@ enum CodecID { CODEC_ID_TIERTEXSEQVIDEO, CODEC_ID_TIFF, CODEC_ID_GIF, + CODEC_ID_FFH264, /* various pcm "codecs" */ CODEC_ID_PCM_S16LE= 0x10000, @@ -243,6 +244,7 @@ enum CodecID { CODEC_ID_WAVPACK, CODEC_ID_DSICINAUDIO, CODEC_ID_IMC, + CODEC_ID_MUSEPACK7, /* subtitle codecs */ CODEC_ID_DVD_SUBTITLE= 0x17000, @@ -372,7 +374,7 @@ typedef struct RcOverride{ #define CODEC_FLAG2_LOCAL_HEADER 0x00000008 ///< place global headers at every keyframe instead of in extradata #define CODEC_FLAG2_BPYRAMID 0x00000010 ///< H.264 allow b-frames to be used as references #define CODEC_FLAG2_WPRED 0x00000020 ///< H.264 weighted biprediction for b-frames -#define CODEC_FLAG2_MIXED_REFS 0x00000040 ///< H.264 multiple references per partition +#define CODEC_FLAG2_MIXED_REFS 0x00000040 ///< H.264 one reference per partition, as opposed to one reference per macroblock #define CODEC_FLAG2_8X8DCT 0x00000080 ///< H.264 high profile 8x8 transform #define CODEC_FLAG2_FASTPSKIP 0x00000100 ///< H.264 fast pskip #define CODEC_FLAG2_AUD 0x00000200 ///< H.264 access unit delimiters @@ -380,6 +382,7 @@ typedef struct RcOverride{ #define CODEC_FLAG2_INTRA_VLC 0x00000800 ///< use MPEG-2 intra VLC table #define CODEC_FLAG2_MEMC_ONLY 0x00001000 ///< only do ME/MC (I frames -> ref, P frame -> ME+MC) #define CODEC_FLAG2_DROP_FRAME_TIMECODE 0x00002000 ///< timecode is in drop frame format +#define CODEC_FLAG2_SKIP_RD 0x00004000 ///< RD optimal MB level residual skiping /* Unsupported options : * Syntax Arithmetic coding (SAC) @@ -2090,9 +2093,6 @@ typedef struct AVCodec { int (*decode)(AVCodecContext *, void *outdata, int *outdata_size, uint8_t *buf, int buf_size); int capabilities; -#if LIBAVCODEC_VERSION_INT < ((50<<16)+(0<<8)+0) - void *dummy; // FIXME remove next time we break binary compatibility -#endif struct AVCodec *next; void (*flush)(AVCodecContext *); const AVRational *supported_framerates; ///array of supported framerates, or NULL if any, array is terminated by {0,0} @@ -2310,6 +2310,7 @@ extern AVCodec libgsm_decoder; extern AVCodec bmp_decoder; extern AVCodec mmvideo_decoder; extern AVCodec zmbv_decoder; +extern AVCodec zmbv_encoder; extern AVCodec avs_decoder; extern AVCodec smacker_decoder; extern AVCodec smackaud_decoder; @@ -2324,6 +2325,7 @@ extern AVCodec dsicinaudio_decoder; extern AVCodec tiertexseqvideo_decoder; extern AVCodec tiff_decoder; extern AVCodec imc_decoder; +extern AVCodec mpc7_decoder; /* pcm codecs */ #define PCM_CODEC(id, name) \ @@ -2691,20 +2693,6 @@ int img_crop(AVPicture *dst, const AVPicture *src, int img_pad(AVPicture *dst, const AVPicture *src, int height, int width, int pix_fmt, int padtop, int padbottom, int padleft, int padright, int *color); -/* endian macros */ -#if !defined(BE_16) || !defined(BE_32) || !defined(LE_16) || !defined(LE_32) -#define BE_16(x) ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1]) -#define BE_32(x) ((((uint8_t*)(x))[0] << 24) | \ - (((uint8_t*)(x))[1] << 16) | \ - (((uint8_t*)(x))[2] << 8) | \ - ((uint8_t*)(x))[3]) -#define LE_16(x) ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0]) -#define LE_32(x) ((((uint8_t*)(x))[3] << 24) | \ - (((uint8_t*)(x))[2] << 16) | \ - (((uint8_t*)(x))[1] << 8) | \ - ((uint8_t*)(x))[0]) -#endif - extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v); /* unused static macro */ diff --git a/src/libffmpeg/libavcodec/bitstream.h b/src/libffmpeg/libavcodec/bitstream.h index af25b6dcf..29e0f441e 100644 --- a/src/libffmpeg/libavcodec/bitstream.h +++ b/src/libffmpeg/libavcodec/bitstream.h @@ -187,12 +187,12 @@ static inline uint##x##_t unaligned##x(const void *v) { \ } # elif defined(__DECC) # define unaligned(x) \ -static inline uint##x##_t unaligned##x##(const void *v) { \ +static inline uint##x##_t unaligned##x(const void *v) { \ return *(const __unaligned uint##x##_t *) v; \ } # else # define unaligned(x) \ -static inline uint##x##_t unaligned##x##(const void *v) { \ +static inline uint##x##_t unaligned##x(const void *v) { \ return *(const uint##x##_t *) v; \ } # endif @@ -877,7 +877,7 @@ void free_vlc(VLC *vlc); * read the longest vlc code * = (max_vlc_length + bits - 1) / bits */ -static always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2], +static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2], int bits, int max_depth) { int code; diff --git a/src/libffmpeg/libavcodec/bytestream.h b/src/libffmpeg/libavcodec/bytestream.h index 25c457fe4..a742fa1c1 100644 --- a/src/libffmpeg/libavcodec/bytestream.h +++ b/src/libffmpeg/libavcodec/bytestream.h @@ -22,32 +22,32 @@ #ifndef FFMPEG_BYTESTREAM_H #define FFMPEG_BYTESTREAM_H -static always_inline unsigned int bytestream_get_le32(uint8_t **b) +static av_always_inline unsigned int bytestream_get_le32(uint8_t **b) { (*b) += 4; return LE_32(*b - 4); } -static always_inline unsigned int bytestream_get_le16(uint8_t **b) +static av_always_inline unsigned int bytestream_get_le16(uint8_t **b) { (*b) += 2; return LE_16(*b - 2); } -static always_inline unsigned int bytestream_get_byte(uint8_t **b) +static av_always_inline unsigned int bytestream_get_byte(uint8_t **b) { (*b)++; return (*b)[-1]; } -static always_inline unsigned int bytestream_get_buffer(uint8_t **b, uint8_t *dst, unsigned int size) +static av_always_inline unsigned int bytestream_get_buffer(uint8_t **b, uint8_t *dst, unsigned int size) { memcpy(dst, *b, size); (*b) += size; return size; } -static always_inline void bytestream_put_be32(uint8_t **b, const unsigned int value) +static av_always_inline void bytestream_put_be32(uint8_t **b, const unsigned int value) { *(*b)++ = value >> 24; *(*b)++ = value >> 16; @@ -55,13 +55,13 @@ static always_inline void bytestream_put_be32(uint8_t **b, const unsigned int va *(*b)++ = value; }; -static always_inline void bytestream_put_be16(uint8_t **b, const unsigned int value) +static av_always_inline void bytestream_put_be16(uint8_t **b, const unsigned int value) { *(*b)++ = value >> 8; *(*b)++ = value; } -static always_inline void bytestream_put_le32(uint8_t **b, const unsigned int value) +static av_always_inline void bytestream_put_le32(uint8_t **b, const unsigned int value) { *(*b)++ = value; *(*b)++ = value >> 8; @@ -69,18 +69,18 @@ static always_inline void bytestream_put_le32(uint8_t **b, const unsigned int va *(*b)++ = value >> 24; } -static always_inline void bytestream_put_le16(uint8_t **b, const unsigned int value) +static av_always_inline void bytestream_put_le16(uint8_t **b, const unsigned int value) { *(*b)++ = value; *(*b)++ = value >> 8; } -static always_inline void bytestream_put_byte(uint8_t **b, const unsigned int value) +static av_always_inline void bytestream_put_byte(uint8_t **b, const unsigned int value) { *(*b)++ = value; } -static always_inline void bytestream_put_buffer(uint8_t **b, const uint8_t *src, unsigned int size) +static av_always_inline void bytestream_put_buffer(uint8_t **b, const uint8_t *src, unsigned int size) { memcpy(*b, src, size); (*b) += size; diff --git a/src/libffmpeg/libavcodec/cabac.h b/src/libffmpeg/libavcodec/cabac.h index 43fe78e3b..f47406a9e 100644 --- a/src/libffmpeg/libavcodec/cabac.h +++ b/src/libffmpeg/libavcodec/cabac.h @@ -363,7 +363,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){ refill(c); } -static int always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){ +static int av_always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){ //FIXME gcc generates duplicate load/stores for c->low and c->range #define LOW "0" #define RANGE "4" @@ -631,7 +631,7 @@ static int get_cabac_bypass(CABACContext *c){ } -static always_inline int get_cabac_bypass_sign(CABACContext *c, int val){ +static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){ #if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) asm volatile( "movl "RANGE "(%1), %%ebx \n\t" diff --git a/src/libffmpeg/libavcodec/cinepak.c b/src/libffmpeg/libavcodec/cinepak.c index e137377e5..fd95b739e 100644 --- a/src/libffmpeg/libavcodec/cinepak.c +++ b/src/libffmpeg/libavcodec/cinepak.c @@ -26,6 +26,8 @@ * by Ewald Snel <ewald@rambo.its.tudelft.nl> * For more information on the Cinepak algorithm, visit: * http://www.csse.monash.edu.au/~timf/ + * For more information on the quirky data inside Sega FILM/CPK files, visit: + * http://wiki.multimedia.cx/index.php?title=Sega_FILM */ #include <stdio.h> @@ -67,6 +69,8 @@ typedef struct CinepakContext { int palette_video; cvid_strip_t strips[MAX_STRIPS]; + int sega_film_skip_bytes; + } CinepakContext; static void cinepak_decode_codebook (cvid_codebook_t *codebook, @@ -319,8 +323,6 @@ static int cinepak_decode (CinepakContext *s) int i, result, strip_size, frame_flags, num_strips; int y0 = 0; int encoded_buf_size; - /* if true, Cinepak data is from a Sega FILM/CPK file */ - int sega_film_data = 0; if (s->size < 10) return -1; @@ -328,12 +330,29 @@ static int cinepak_decode (CinepakContext *s) frame_flags = s->data[0]; num_strips = BE_16 (&s->data[8]); encoded_buf_size = ((s->data[1] << 16) | BE_16 (&s->data[2])); - if (encoded_buf_size != s->size) - sega_film_data = 1; - if (sega_film_data) - s->data += 12; - else - s->data += 10; + + /* if this is the first frame, check for deviant Sega FILM data */ + if (s->sega_film_skip_bytes == -1) { + if (encoded_buf_size != s->size) { + /* If the encoded frame size differs from the frame size as indicated + * by the container file, this data likely comes from a Sega FILM/CPK file. + * If the frame header is followed by the bytes FE 00 00 06 00 00 then + * this is probably one of the two known files that have 6 extra bytes + * after the frame header. Else, assume 2 extra bytes. */ + if ((s->data[10] == 0xFE) && + (s->data[11] == 0x00) && + (s->data[12] == 0x00) && + (s->data[13] == 0x06) && + (s->data[14] == 0x00) && + (s->data[15] == 0x00)) + s->sega_film_skip_bytes = 6; + else + s->sega_film_skip_bytes = 2; + } else + s->sega_film_skip_bytes = 0; + } + + s->data += 10 + s->sega_film_skip_bytes; if (num_strips > MAX_STRIPS) num_strips = MAX_STRIPS; @@ -377,6 +396,7 @@ static int cinepak_decode_init(AVCodecContext *avctx) s->avctx = avctx; s->width = (avctx->width + 3) & ~3; s->height = (avctx->height + 3) & ~3; + s->sega_film_skip_bytes = -1; /* uninitialized state */ // check for paletted data if ((avctx->palctrl == NULL) || (avctx->bits_per_sample == 40)) { diff --git a/src/libffmpeg/libavcodec/cook.c b/src/libffmpeg/libavcodec/cook.c index 47d9ce2c3..943addb89 100644 --- a/src/libffmpeg/libavcodec/cook.c +++ b/src/libffmpeg/libavcodec/cook.c @@ -312,7 +312,7 @@ static int cook_decode_close(AVCodecContext *avctx) { int i; COOKContext *q = avctx->priv_data; - av_log(NULL,AV_LOG_DEBUG, "Deallocating memory.\n"); + av_log(avctx,AV_LOG_DEBUG, "Deallocating memory.\n"); /* Free allocated memory buffers. */ av_free(q->mlt_window); @@ -1160,12 +1160,12 @@ static int cook_decode_init(AVCodecContext *avctx) /* Take care of the codec specific extradata. */ if (avctx->extradata_size <= 0) { - av_log(NULL,AV_LOG_ERROR,"Necessary extradata missing!\n"); + av_log(avctx,AV_LOG_ERROR,"Necessary extradata missing!\n"); return -1; } else { /* 8 for mono, 16 for stereo, ? for multichannel Swap to right endianness so we don't need to care later on. */ - av_log(NULL,AV_LOG_DEBUG,"codecdata_length=%d\n",avctx->extradata_size); + av_log(avctx,AV_LOG_DEBUG,"codecdata_length=%d\n",avctx->extradata_size); if (avctx->extradata_size >= 8){ e->cookversion = be2me_32(e->cookversion); e->samples_per_frame = be2me_16(e->samples_per_frame); @@ -1201,24 +1201,24 @@ static int cook_decode_init(AVCodecContext *avctx) switch (e->cookversion) { case MONO_COOK1: if (q->nb_channels != 1) { - av_log(NULL,AV_LOG_ERROR,"Container channels != 1, report sample!\n"); + av_log(avctx,AV_LOG_ERROR,"Container channels != 1, report sample!\n"); return -1; } - av_log(NULL,AV_LOG_DEBUG,"MONO_COOK1\n"); + av_log(avctx,AV_LOG_DEBUG,"MONO_COOK1\n"); break; case MONO_COOK2: if (q->nb_channels != 1) { q->joint_stereo = 0; q->bits_per_subpacket = q->bits_per_subpacket/2; } - av_log(NULL,AV_LOG_DEBUG,"MONO_COOK2\n"); + av_log(avctx,AV_LOG_DEBUG,"MONO_COOK2\n"); break; case JOINT_STEREO: if (q->nb_channels != 2) { - av_log(NULL,AV_LOG_ERROR,"Container channels != 2, report sample!\n"); + av_log(avctx,AV_LOG_ERROR,"Container channels != 2, report sample!\n"); return -1; } - av_log(NULL,AV_LOG_DEBUG,"JOINT_STEREO\n"); + av_log(avctx,AV_LOG_DEBUG,"JOINT_STEREO\n"); if (avctx->extradata_size >= 16){ q->total_subbands = q->subbands + e->js_subband_start; q->js_subband_start = e->js_subband_start; @@ -1233,11 +1233,11 @@ static int cook_decode_init(AVCodecContext *avctx) } break; case MC_COOK: - av_log(NULL,AV_LOG_ERROR,"MC_COOK not supported!\n"); + av_log(avctx,AV_LOG_ERROR,"MC_COOK not supported!\n"); return -1; break; default: - av_log(NULL,AV_LOG_ERROR,"Unknown Cook version, report sample!\n"); + av_log(avctx,AV_LOG_ERROR,"Unknown Cook version, report sample!\n"); return -1; break; } @@ -1280,16 +1280,16 @@ static int cook_decode_init(AVCodecContext *avctx) /* Try to catch some obviously faulty streams, othervise it might be exploitable */ if (q->total_subbands > 53) { - av_log(NULL,AV_LOG_ERROR,"total_subbands > 53, report sample!\n"); + av_log(avctx,AV_LOG_ERROR,"total_subbands > 53, report sample!\n"); return -1; } if (q->subbands > 50) { - av_log(NULL,AV_LOG_ERROR,"subbands > 50, report sample!\n"); + av_log(avctx,AV_LOG_ERROR,"subbands > 50, report sample!\n"); return -1; } if ((q->samples_per_channel == 256) || (q->samples_per_channel == 512) || (q->samples_per_channel == 1024)) { } else { - av_log(NULL,AV_LOG_ERROR,"unknown amount of samples_per_channel = %d, report sample!\n",q->samples_per_channel); + av_log(avctx,AV_LOG_ERROR,"unknown amount of samples_per_channel = %d, report sample!\n",q->samples_per_channel); return -1; } diff --git a/src/libffmpeg/libavcodec/cscd.c b/src/libffmpeg/libavcodec/cscd.c index e4257f4c0..d8733d6dd 100644 --- a/src/libffmpeg/libavcodec/cscd.c +++ b/src/libffmpeg/libavcodec/cscd.c @@ -220,12 +220,12 @@ static int decode_init(AVCodecContext *avctx) { } avctx->has_b_frames = 0; switch (avctx->bits_per_sample) { - case 16: avctx->pix_fmt = PIX_FMT_RGB565; break; + case 16: avctx->pix_fmt = PIX_FMT_RGB555; break; case 24: avctx->pix_fmt = PIX_FMT_BGR24; break; case 32: avctx->pix_fmt = PIX_FMT_RGBA32; break; default: av_log(avctx, AV_LOG_ERROR, - "CamStudio codec error: unvalid depth %i bpp\n", + "CamStudio codec error: invalid depth %i bpp\n", avctx->bits_per_sample); return 1; } diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c index 51eddbc60..916d8658c 100644 --- a/src/libffmpeg/libavcodec/dsputil.c +++ b/src/libffmpeg/libavcodec/dsputil.c @@ -2549,6 +2549,11 @@ void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { } #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */ +#if defined(CONFIG_H264_ENCODER) +/* H264 specific */ +void ff_h264dsp_init(DSPContext* c, AVCodecContext *avctx); +#endif /* CONFIG_H264_ENCODER */ + static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; int i; @@ -3801,11 +3806,31 @@ void dsputil_static_init(void) for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; } +int ff_check_alignment(void){ + static int did_fail=0; + DECLARE_ALIGNED_16(int, aligned); + + if((int)&aligned & 15){ + if(!did_fail){ +#if defined(HAVE_MMX) || defined(HAVE_ALTIVEC) + av_log(NULL, AV_LOG_ERROR, + "Compiler did not align stack variables. Libavcodec has been miscompiled\n" + "and may be very slow or crash. This is not a bug in libavcodec,\n" + "but in the compiler. Do not report crashes to FFmpeg developers.\n"); +#endif + did_fail=1; + } + return -1; + } + return 0; +} void dsputil_init(DSPContext* c, AVCodecContext *avctx) { int i; + ff_check_alignment(); + #ifdef CONFIG_ENCODERS if(avctx->dct_algo==FF_DCT_FASTINT) { c->fdct = fdct_ifast; @@ -4006,6 +4031,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER) ff_vc1dsp_init(c,avctx); #endif +#if defined(CONFIG_H264_ENCODER) + ff_h264dsp_init(c,avctx); +#endif c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h index de3c1d564..78109f7b9 100644 --- a/src/libffmpeg/libavcodec/dsputil.h +++ b/src/libffmpeg/libavcodec/dsputil.h @@ -33,9 +33,6 @@ #include "common.h" #include "avcodec.h" -#if defined(ARCH_X86) || defined(ARCH_X86_64) -#define HAVE_MMX 1 -#endif //#define DEBUG /* dct code */ @@ -381,10 +378,12 @@ typedef struct DSPContext { #define BASIS_SHIFT 16 #define RECON_SHIFT 6 + /* h264 functions */ void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride); void (*h264_idct8_add)(uint8_t *dst, DCTELEM *block, int stride); void (*h264_idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); void (*h264_idct8_dc_add)(uint8_t *dst, DCTELEM *block, int stride); + void (*h264_dct)(DCTELEM block[4][4]); /* snow wavelet */ void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); @@ -411,6 +410,8 @@ typedef struct DSPContext { void dsputil_static_init(void); void dsputil_init(DSPContext* p, AVCodecContext *avctx); +int ff_check_alignment(void); + /** * permute block according to permuatation. * @param last last non zero element in scantable order @@ -483,6 +484,7 @@ int mm_support(void); #define MM_SSE2 0x0010 /* PIV SSE2 functions */ #define MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */ #define MM_SSE3 0x0040 /* Prescott SSE3 functions */ +#define MM_SSSE3 0x0080 /* Conroe SSSE3 functions */ extern int mm_flags; @@ -593,30 +595,6 @@ void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx); #endif -#ifdef __GNUC__ - -struct unaligned_64 { uint64_t l; } __attribute__((packed)); -struct unaligned_32 { uint32_t l; } __attribute__((packed)); -struct unaligned_16 { uint16_t l; } __attribute__((packed)); - -#define LD16(a) (((const struct unaligned_16 *) (a))->l) -#define LD32(a) (((const struct unaligned_32 *) (a))->l) -#define LD64(a) (((const struct unaligned_64 *) (a))->l) - -#define ST16(a, b) (((struct unaligned_16 *) (a))->l) = (b) -#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b) - -#else /* __GNUC__ */ - -#define LD16(a) (*((uint16_t*)(a))) -#define LD32(a) (*((uint32_t*)(a))) -#define LD64(a) (*((uint64_t*)(a))) - -#define ST16(a, b) *((uint16_t*)(a)) = (b) -#define ST32(a, b) *((uint32_t*)(a)) = (b) - -#endif /* !__GNUC__ */ - /* PSNR */ void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3], int orig_linesize[3], int coded_linesize, diff --git a/src/libffmpeg/libavcodec/dv.c b/src/libffmpeg/libavcodec/dv.c index 76095a481..803d3502d 100644 --- a/src/libffmpeg/libavcodec/dv.c +++ b/src/libffmpeg/libavcodec/dv.c @@ -560,7 +560,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s, #ifdef DV_CODEC_TINY_TARGET /* Converts run and level (where level != 0) pair into vlc, returning bit size */ -static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc) +static av_always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc) { int size; if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) { @@ -585,7 +585,7 @@ static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc) return size; } -static always_inline int dv_rl2vlc_size(int run, int level) +static av_always_inline int dv_rl2vlc_size(int run, int level) { int size; @@ -601,13 +601,13 @@ static always_inline int dv_rl2vlc_size(int run, int level) return size; } #else -static always_inline int dv_rl2vlc(int run, int l, int sign, uint32_t* vlc) +static av_always_inline int dv_rl2vlc(int run, int l, int sign, uint32_t* vlc) { *vlc = dv_vlc_map[run][l].vlc | sign; return dv_vlc_map[run][l].size; } -static always_inline int dv_rl2vlc_size(int run, int l) +static av_always_inline int dv_rl2vlc_size(int run, int l) { return dv_vlc_map[run][l].size; } @@ -627,7 +627,7 @@ typedef struct EncBlockInfo { uint32_t partial_bit_buffer; /* we can't use uint16_t here */ } EncBlockInfo; -static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool, +static av_always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool, PutBitContext* pb_end) { int prev; @@ -670,7 +670,7 @@ static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext return pb; } -static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi, +static av_always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi, const uint8_t* zigzag_scan, const int *weight, int bias) { int i, area; @@ -742,7 +742,7 @@ static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi, //FIXME replace this by dsputil #define SC(x, y) ((s[x] - s[y]) ^ ((s[x] - s[y]) >> 7)) -static always_inline int dv_guess_dct_mode(DCTELEM *blk) { +static av_always_inline int dv_guess_dct_mode(DCTELEM *blk) { DCTELEM *s; int score88 = 0; int score248 = 0; diff --git a/src/libffmpeg/libavcodec/faandct.c b/src/libffmpeg/libavcodec/faandct.c index e3c0d84a2..6f73ee5e9 100644 --- a/src/libffmpeg/libavcodec/faandct.c +++ b/src/libffmpeg/libavcodec/faandct.c @@ -70,7 +70,7 @@ B6*B0, B6*B1, B6*B2, B6*B3, B6*B4, B6*B5, B6*B6, B6*B7, B7*B0, B7*B1, B7*B2, B7*B3, B7*B4, B7*B5, B7*B6, B7*B7, }; -static always_inline void row_fdct(FLOAT temp[64], DCTELEM * data) +static av_always_inline void row_fdct(FLOAT temp[64], DCTELEM * data) { FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; FLOAT tmp10, tmp11, tmp12, tmp13; diff --git a/src/libffmpeg/libavcodec/ffv1.c b/src/libffmpeg/libavcodec/ffv1.c index 62623e591..1ca18a4e8 100644 --- a/src/libffmpeg/libavcodec/ffv1.c +++ b/src/libffmpeg/libavcodec/ffv1.c @@ -186,7 +186,7 @@ typedef struct FFV1Context{ DSPContext dsp; }FFV1Context; -static always_inline int fold(int diff, int bits){ +static av_always_inline int fold(int diff, int bits){ if(bits==8) diff= (int8_t)diff; else{ diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c index ba51c245a..af5fa50e6 100644 --- a/src/libffmpeg/libavcodec/h263.c +++ b/src/libffmpeg/libavcodec/h263.c @@ -487,12 +487,28 @@ static inline void restore_ac_coeffs(MpegEncContext * s, DCTELEM block[6][64], i } /** + * init s->current_picture.qscale_table from s->lambda_table + */ +static void ff_init_qscale_tab(MpegEncContext *s){ + int8_t * const qscale_table= s->current_picture.qscale_table; + int i; + + for(i=0; i<s->mb_num; i++){ + unsigned int lam= s->lambda_table[ s->mb_index2xy[i] ]; + int qp= (lam*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); + qscale_table[ s->mb_index2xy[i] ]= clip(qp, s->avctx->qmin, s->avctx->qmax); + } +} + +/** * modify qscale so that encoding is acually possible in h263 (limit difference to -2..2) */ void ff_clean_h263_qscales(MpegEncContext *s){ int i; int8_t * const qscale_table= s->current_picture.qscale_table; + ff_init_qscale_tab(s); + for(i=1; i<s->mb_num; i++){ if(qscale_table[ s->mb_index2xy[i] ] - qscale_table[ s->mb_index2xy[i-1] ] >2) qscale_table[ s->mb_index2xy[i] ]= qscale_table[ s->mb_index2xy[i-1] ]+2; @@ -507,7 +523,6 @@ void ff_clean_h263_qscales(MpegEncContext *s){ int mb_xy= s->mb_index2xy[i]; if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTER4V)){ - s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_INTER4V; s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_INTER; } } @@ -546,7 +561,6 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){ for(i=1; i<s->mb_num; i++){ int mb_xy= s->mb_index2xy[i]; if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_DIRECT)){ - s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_DIRECT; s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_BIDIR; } } diff --git a/src/libffmpeg/libavcodec/h264.c b/src/libffmpeg/libavcodec/h264.c index ad23ae120..d7c48bd4a 100644 --- a/src/libffmpeg/libavcodec/h264.c +++ b/src/libffmpeg/libavcodec/h264.c @@ -165,20 +165,6 @@ typedef struct H264Context{ MpegEncContext s; int nal_ref_idc; int nal_unit_type; -#define NAL_SLICE 1 -#define NAL_DPA 2 -#define NAL_DPB 3 -#define NAL_DPC 4 -#define NAL_IDR_SLICE 5 -#define NAL_SEI 6 -#define NAL_SPS 7 -#define NAL_PPS 8 -#define NAL_AUD 9 -#define NAL_END_SEQUENCE 10 -#define NAL_END_STREAM 11 -#define NAL_FILLER_DATA 12 -#define NAL_SPS_EXT 13 -#define NAL_AUXILIARY_SLICE 19 uint8_t *rbsp_buffer; unsigned int rbsp_buffer_size; @@ -414,7 +400,7 @@ static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, in static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); -static always_inline uint32_t pack16to32(int a, int b){ +static av_always_inline uint32_t pack16to32(int a, int b){ #ifdef WORDS_BIGENDIAN return (b&0xFFFF) + (a<<16); #else @@ -422,13 +408,22 @@ static always_inline uint32_t pack16to32(int a, int b){ #endif } +const uint8_t ff_rem6[52]={ +0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, +}; + +const uint8_t ff_div6[52]={ +0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, +}; + + /** * fill a rectangle. * @param h height of the rectangle, should be a constant * @param w width of the rectangle, should be a constant * @param size the size of val (1 or 4), should be a constant */ -static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ +static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ uint8_t *p= (uint8_t*)vp; assert(size==1 || size==4); assert(w<=4); @@ -1808,81 +1803,6 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c return dst; } -#if 0 -/** - * @param src the data which should be escaped - * @param dst the target buffer, dst+1 == src is allowed as a special case - * @param length the length of the src data - * @param dst_length the length of the dst array - * @returns length of escaped data in bytes or -1 if an error occured - */ -static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){ - int i, escape_count, si, di; - uint8_t *temp; - - assert(length>=0); - assert(dst_length>0); - - dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type; - - if(length==0) return 1; - - escape_count= 0; - for(i=0; i<length; i+=2){ - if(src[i]) continue; - if(i>0 && src[i-1]==0) - i--; - if(i+2<length && src[i+1]==0 && src[i+2]<=3){ - escape_count++; - i+=2; - } - } - - if(escape_count==0){ - if(dst+1 != src) - memcpy(dst+1, src, length); - return length + 1; - } - - if(length + escape_count + 1> dst_length) - return -1; - - //this should be damn rare (hopefully) - - h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count); - temp= h->rbsp_buffer; -//printf("encoding esc\n"); - - si= 0; - di= 0; - while(si < length){ - if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){ - temp[di++]= 0; si++; - temp[di++]= 0; si++; - temp[di++]= 3; - temp[di++]= src[si++]; - } - else - temp[di++]= src[si++]; - } - memcpy(dst+1, temp, length+escape_count); - - assert(di == length+escape_count); - - return di + 1; -} - -/** - * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4 - */ -static void encode_rbsp_trailing(PutBitContext *pb){ - int length; - put_bits(pb, 1, 1); - length= (-put_bits_count(pb))&7; - if(length) put_bits(pb, length, 0); -} -#endif - /** * identifies the exact end of the bitstream * @return the length of the trailing, or 0 if damaged @@ -2035,42 +1955,6 @@ static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){ return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)]; } - -#if 0 -static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){ - int i; - //FIXME try int temp instead of block - - for(i=0; i<4; i++){ - const int d0= src1[0 + i*stride] - src2[0 + i*stride]; - const int d1= src1[1 + i*stride] - src2[1 + i*stride]; - const int d2= src1[2 + i*stride] - src2[2 + i*stride]; - const int d3= src1[3 + i*stride] - src2[3 + i*stride]; - const int z0= d0 + d3; - const int z3= d0 - d3; - const int z1= d1 + d2; - const int z2= d1 - d2; - - block[0 + 4*i]= z0 + z1; - block[1 + 4*i]= 2*z3 + z2; - block[2 + 4*i]= z0 - z1; - block[3 + 4*i]= z3 - 2*z2; - } - - for(i=0; i<4; i++){ - const int z0= block[0*4 + i] + block[3*4 + i]; - const int z3= block[0*4 + i] - block[3*4 + i]; - const int z1= block[1*4 + i] + block[2*4 + i]; - const int z2= block[1*4 + i] - block[2*4 + i]; - - block[0*4 + i]= z0 + z1; - block[1*4 + i]= 2*z3 + z2; - block[2*4 + i]= z0 - z1; - block[3*4 + i]= z3 - 2*z2; - } -} -#endif - //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away) static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){ @@ -2357,7 +2241,7 @@ static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int strid src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2; } -static void pred16x16_vertical_c(uint8_t *src, int stride){ +void ff_pred16x16_vertical_c(uint8_t *src, int stride){ int i; const uint32_t a= ((uint32_t*)(src-stride))[0]; const uint32_t b= ((uint32_t*)(src-stride))[1]; @@ -2372,7 +2256,7 @@ static void pred16x16_vertical_c(uint8_t *src, int stride){ } } -static void pred16x16_horizontal_c(uint8_t *src, int stride){ +void ff_pred16x16_horizontal_c(uint8_t *src, int stride){ int i; for(i=0; i<16; i++){ @@ -2383,7 +2267,7 @@ static void pred16x16_horizontal_c(uint8_t *src, int stride){ } } -static void pred16x16_dc_c(uint8_t *src, int stride){ +void ff_pred16x16_dc_c(uint8_t *src, int stride){ int i, dc=0; for(i=0;i<16; i++){ @@ -2437,7 +2321,7 @@ static void pred16x16_top_dc_c(uint8_t *src, int stride){ } } -static void pred16x16_128_dc_c(uint8_t *src, int stride){ +void ff_pred16x16_128_dc_c(uint8_t *src, int stride){ int i; for(i=0; i<16; i++){ @@ -2488,11 +2372,11 @@ static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int } } -static void pred16x16_plane_c(uint8_t *src, int stride){ +void ff_pred16x16_plane_c(uint8_t *src, int stride){ pred16x16_plane_compat_c(src, stride, 0); } -static void pred8x8_vertical_c(uint8_t *src, int stride){ +void ff_pred8x8_vertical_c(uint8_t *src, int stride){ int i; const uint32_t a= ((uint32_t*)(src-stride))[0]; const uint32_t b= ((uint32_t*)(src-stride))[1]; @@ -2503,7 +2387,7 @@ static void pred8x8_vertical_c(uint8_t *src, int stride){ } } -static void pred8x8_horizontal_c(uint8_t *src, int stride){ +void ff_pred8x8_horizontal_c(uint8_t *src, int stride){ int i; for(i=0; i<8; i++){ @@ -2512,7 +2396,7 @@ static void pred8x8_horizontal_c(uint8_t *src, int stride){ } } -static void pred8x8_128_dc_c(uint8_t *src, int stride){ +void ff_pred8x8_128_dc_c(uint8_t *src, int stride){ int i; for(i=0; i<8; i++){ @@ -2566,7 +2450,7 @@ static void pred8x8_top_dc_c(uint8_t *src, int stride){ } -static void pred8x8_dc_c(uint8_t *src, int stride){ +void ff_pred8x8_dc_c(uint8_t *src, int stride){ int i; int dc0, dc1, dc2, dc3; @@ -2591,7 +2475,7 @@ static void pred8x8_dc_c(uint8_t *src, int stride){ } } -static void pred8x8_plane_c(uint8_t *src, int stride){ +void ff_pred8x8_plane_c(uint8_t *src, int stride){ int j, k; int a; uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; @@ -3220,21 +3104,21 @@ static void init_pred_ptrs(H264Context *h){ h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c; h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c; - h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c; - h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c; - h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c; - h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c; + h->pred8x8[DC_PRED8x8 ]= ff_pred8x8_dc_c; + h->pred8x8[VERT_PRED8x8 ]= ff_pred8x8_vertical_c; + h->pred8x8[HOR_PRED8x8 ]= ff_pred8x8_horizontal_c; + h->pred8x8[PLANE_PRED8x8 ]= ff_pred8x8_plane_c; h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c; h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c; - h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c; + h->pred8x8[DC_128_PRED8x8 ]= ff_pred8x8_128_dc_c; - h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c; - h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c; - h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c; - h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c; + h->pred16x16[DC_PRED8x8 ]= ff_pred16x16_dc_c; + h->pred16x16[VERT_PRED8x8 ]= ff_pred16x16_vertical_c; + h->pred16x16[HOR_PRED8x8 ]= ff_pred16x16_horizontal_c; + h->pred16x16[PLANE_PRED8x8 ]= ff_pred16x16_plane_c; h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c; h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c; - h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c; + h->pred16x16[DC_128_PRED8x8 ]= ff_pred16x16_128_dc_c; } static void free_tables(H264Context *h){ @@ -3269,8 +3153,8 @@ static void init_dequant8_coeff_table(H264Context *h){ } for(q=0; q<52; q++){ - int shift = div6[q]; - int idx = rem6[q]; + int shift = ff_div6[q]; + int idx = ff_rem6[q]; for(x=0; x<64; x++) h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] = ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * @@ -3294,8 +3178,8 @@ static void init_dequant4_coeff_table(H264Context *h){ continue; for(q=0; q<52; q++){ - int shift = div6[q] + 2; - int idx = rem6[q]; + int shift = ff_div6[q] + 2; + int idx = ff_rem6[q]; for(x=0; x<16; x++) h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] = ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] * @@ -4972,6 +4856,10 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in if(total_coeff==0) return 0; + if(total_coeff<0) { + av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff<0)\n", s->mb_x, s->mb_y); + return -1; + } trailing_ones= coeff_token&3; tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff); diff --git a/src/libffmpeg/libavcodec/h264data.h b/src/libffmpeg/libavcodec/h264data.h index 2dea3580f..74e720421 100644 --- a/src/libffmpeg/libavcodec/h264data.h +++ b/src/libffmpeg/libavcodec/h264data.h @@ -53,6 +53,24 @@ #define EXTENDED_SAR 255 +/* NAL unit types */ +enum { +NAL_SLICE=1, +NAL_DPA, +NAL_DPB, +NAL_DPC, +NAL_IDR_SLICE, +NAL_SEI, +NAL_SPS, +NAL_PPS, +NAL_AUD, +NAL_END_SEQUENCE, +NAL_END_STREAM, +NAL_FILLER_DATA, +NAL_SPS_EXT, +NAL_AUXILIARY_SLICE=19 +}; + static const AVRational pixel_aspect[14]={ {0, 1}, {1, 1}, @@ -488,15 +506,6 @@ static const PMbInfo b_sub_mb_type_info[13]={ {MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, }, }; - -static const uint8_t rem6[52]={ -0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, -}; - -static const uint8_t div6[52]={ -0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, -}; - static const uint8_t default_scaling4[2][16]={ { 6,13,20,28, 13,20,28,32, diff --git a/src/libffmpeg/libavcodec/h264idct.c b/src/libffmpeg/libavcodec/h264idct.c index 3506418ad..a6a56d33a 100755 --- a/src/libffmpeg/libavcodec/h264idct.c +++ b/src/libffmpeg/libavcodec/h264idct.c @@ -28,7 +28,7 @@ #include "dsputil.h" -static always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){ +static av_always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){ int i; uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; diff --git a/src/libffmpeg/libavcodec/i386/Makefile.am b/src/libffmpeg/libavcodec/i386/Makefile.am index 15ab4db89..ee170efd5 100644 --- a/src/libffmpeg/libavcodec/i386/Makefile.am +++ b/src/libffmpeg/libavcodec/i386/Makefile.am @@ -6,7 +6,7 @@ AM_CFLAGS = -fomit-frame-pointer -fno-strict-aliasing # CFLAGS is here to filter out -funroll-loops because it causes bad # behavior of libavcodec CFLAGS := `echo @CFLAGS@ | sed -e 's/-funroll-loops//g'` -AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS) -I$(top_srcdir)/src/libffmpeg/libavutil +AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS) -I$(top_srcdir)/src/libffmpeg/libavutil -I$(top_srcdir)/src/libffmpeg # Avoid "can't find register" failures with -O1 and higher dsputil_mmx.o dsputil_mmx.lo: CFLAGS=$(shell echo @CFLAGS@ | sed -e 's/-funroll-loops//g; s/$$/ -Os/') @@ -42,10 +42,10 @@ EXTRA_DIST = \ h264dsp_mmx.c \ mpegvideo_mmx_template.c -if HAVE_FFMMX +if HAVE_MMX mmx_modules = $(libavcodec_mmx_src) endif libavcodec_mmx_la_SOURCES = $(mmx_modules) $(libavcodec_mmx_dummy) -noinst_HEADERS = dsputil_mmx_avg.h dsputil_mmx_rnd.h mmx.h +noinst_HEADERS = dsputil_mmx_avg.h dsputil_mmx_rnd.h mathops.h mmx.h diff --git a/src/libffmpeg/libavcodec/i386/cputest.c b/src/libffmpeg/libavcodec/i386/cputest.c index 262786b71..0705ab3e5 100644 --- a/src/libffmpeg/libavcodec/i386/cputest.c +++ b/src/libffmpeg/libavcodec/i386/cputest.c @@ -87,6 +87,8 @@ int mm_support(void) rval |= MM_SSE2; if (ecx & 1) rval |= MM_SSE3; + if (ecx & 0x00000200 ) + rval |= MM_SSSE3; } cpuid(0x80000000, max_ext_level, ebx, ecx, edx); @@ -104,11 +106,13 @@ int mm_support(void) } #if 0 - av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n", + av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s%s%s\n", (rval&MM_MMX) ? "MMX ":"", (rval&MM_MMXEXT) ? "MMX2 ":"", (rval&MM_SSE) ? "SSE ":"", (rval&MM_SSE2) ? "SSE2 ":"", + (rval&MM_SSE3) ? "SSE3 ":"", + (rval&MM_SSSE3) ? "SSSE3 ":"", (rval&MM_3DNOW) ? "3DNow ":"", (rval&MM_3DNOWEXT) ? "3DNowExt ":""); #endif diff --git a/src/libffmpeg/libavcodec/i386/fdct_mmx.c b/src/libffmpeg/libavcodec/i386/fdct_mmx.c index 2ffbfecf6..7e2682a4a 100644 --- a/src/libffmpeg/libavcodec/i386/fdct_mmx.c +++ b/src/libffmpeg/libavcodec/i386/fdct_mmx.c @@ -284,7 +284,7 @@ TABLE_SSE2 }}; -static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset) +static av_always_inline void fdct_col(const int16_t *in, int16_t *out, int offset) { movq_m2r(*(in + offset + 1 * 8), mm0); movq_m2r(*(in + offset + 6 * 8), mm1); @@ -364,7 +364,7 @@ static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset) } -static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) +static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) { asm volatile( #define FDCT_ROW_SSE2_H1(i,t) \ @@ -426,7 +426,7 @@ static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) ); } -static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table) +static av_always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table) { pshufw_m2r(*(in + 4), mm5, 0x1B); movq_m2r(*(in + 0), mm0); @@ -469,7 +469,7 @@ static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const i movq_r2m(mm7, *(out + 4)); } -static always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table) +static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table) { //FIXME reorder (i dont have a old mmx only cpu here to benchmark ...) movd_m2r(*(in + 6), mm1); diff --git a/src/libffmpeg/libavcodec/i386/mathops.h b/src/libffmpeg/libavcodec/i386/mathops.h new file mode 100644 index 000000000..3553a4025 --- /dev/null +++ b/src/libffmpeg/libavcodec/i386/mathops.h @@ -0,0 +1,41 @@ +/* + * simple math operations + * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef FRAC_BITS +# define MULL(ra, rb) \ + ({ int rt, dummy; asm (\ + "imull %3 \n\t"\ + "shrdl %4, %%edx, %%eax \n\t"\ + : "=a"(rt), "=d"(dummy)\ + : "a" (ra), "rm" (rb), "i"(FRAC_BITS));\ + rt; }) +#endif + +#define MULH(ra, rb) \ + ({ int rt, dummy;\ + asm ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" (ra), "rm" (rb));\ + rt; }) + +#define MUL64(ra, rb) \ + ({ int64_t rt;\ + asm ("imull %2\n\t" : "=A"(rt) : "a" (ra), "g" (rb));\ + rt; }) + diff --git a/src/libffmpeg/libavcodec/jfdctfst.c b/src/libffmpeg/libavcodec/jfdctfst.c index 38424563d..a9dcfab82 100644 --- a/src/libffmpeg/libavcodec/jfdctfst.c +++ b/src/libffmpeg/libavcodec/jfdctfst.c @@ -145,7 +145,7 @@ #define MULTIPLY(var,const) ((DCTELEM) DESCALE((var) * (const), CONST_BITS)) -static always_inline void row_fdct(DCTELEM * data){ +static av_always_inline void row_fdct(DCTELEM * data){ int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int_fast16_t tmp10, tmp11, tmp12, tmp13; int_fast16_t z1, z2, z3, z4, z5, z11, z13; diff --git a/src/libffmpeg/libavcodec/jfdctint.c b/src/libffmpeg/libavcodec/jfdctint.c index 58f3a1446..250312467 100644 --- a/src/libffmpeg/libavcodec/jfdctint.c +++ b/src/libffmpeg/libavcodec/jfdctint.c @@ -181,7 +181,7 @@ #endif -static always_inline void row_fdct(DCTELEM * data){ +static av_always_inline void row_fdct(DCTELEM * data){ int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int_fast32_t tmp10, tmp11, tmp12, tmp13; int_fast32_t z1, z2, z3, z4, z5; diff --git a/src/libffmpeg/libavcodec/jpeg_ls.c b/src/libffmpeg/libavcodec/jpeg_ls.c index 1b4df2b1a..4629176ad 100644 --- a/src/libffmpeg/libavcodec/jpeg_ls.c +++ b/src/libffmpeg/libavcodec/jpeg_ls.c @@ -804,11 +804,16 @@ static int encode_picture_ls(AVCodecContext *avctx, unsigned char *buf, int buf_ av_free(zero); av_free(state); + // the specification says that after doing 0xff escaping unused bits in the + // last byte must be set to 0, so just append 7 "optional" zero-bits to + // avoid special-casing. + put_bits(&pb2, 7, 0); + size = put_bits_count(&pb2); flush_put_bits(&pb2); /* do escape coding */ - size = put_bits_count(&pb2) >> 3; init_get_bits(&gb, buf2, size); - while(get_bits_count(&gb) < size * 8){ + size -= 7; + while(get_bits_count(&gb) < size){ int v; v = get_bits(&gb, 8); put_bits(&pb, 8, v); diff --git a/src/libffmpeg/libavcodec/mathops.h b/src/libffmpeg/libavcodec/mathops.h index 9ae34d71b..c6ec70597 100644 --- a/src/libffmpeg/libavcodec/mathops.h +++ b/src/libffmpeg/libavcodec/mathops.h @@ -46,7 +46,7 @@ //gcc 3.4 creates an incredibly bloated mess out of this //# define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32) -static always_inline int MULH(int a, int b){ +static av_always_inline int MULH(int a, int b){ return ((int64_t)(a) * (int64_t)(b))>>32; } #endif diff --git a/src/libffmpeg/libavcodec/motion_est.c b/src/libffmpeg/libavcodec/motion_est.c index 0e1504147..a11787bac 100644 --- a/src/libffmpeg/libavcodec/motion_est.c +++ b/src/libffmpeg/libavcodec/motion_est.c @@ -106,7 +106,7 @@ static int get_flags(MotionEstContext *c, int direct, int chroma){ + (chroma ? FLAG_CHROMA : 0); } -static always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby, +static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby, const int size, const int h, int ref_index, int src_index, me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){ MotionEstContext * const c= &s->me; @@ -122,6 +122,7 @@ static always_inline int cmp(MpegEncContext *s, const int x, const int y, const int d; //FIXME check chroma 4mv, (no crashes ...) if(flags&FLAG_DIRECT){ + assert(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)); if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){ const int time_pp= s->pp_time; const int time_pb= s->pb_time; @@ -233,8 +234,14 @@ static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){ void ff_init_me(MpegEncContext *s){ MotionEstContext * const c= &s->me; + int cache_size= FFMIN(ME_MAP_SIZE>>ME_MAP_SHIFT, 1<<ME_MAP_SHIFT); + int dia_size= FFMAX(FFABS(s->avctx->dia_size)&255, FFABS(s->avctx->pre_dia_size)&255); c->avctx= s->avctx; + if(cache_size < 2*dia_size && !c->stride){ + av_log(s->avctx, AV_LOG_INFO, "ME_MAP size may be a little small for the selected diamond size\n"); + } + ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, c->avctx->me_pre_cmp); ff_set_cmp(&s->dsp, s->dsp.me_cmp, c->avctx->me_cmp); ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, c->avctx->me_sub_cmp); @@ -692,6 +699,7 @@ static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4) static inline void get_limits(MpegEncContext *s, int x, int y) { MotionEstContext * const c= &s->me; + int range= c->avctx->me_range >> (1 + !!(c->flags&FLAG_QPEL)); /* if(c->avctx->me_range) c->range= c->avctx->me_range >> 1; else c->range= 16; @@ -713,6 +721,12 @@ static inline void get_limits(MpegEncContext *s, int x, int y) c->xmax = - x + s->mb_width *16 - 16; c->ymax = - y + s->mb_height*16 - 16; } + if(range){ + c->xmin = FFMAX(c->xmin,-range); + c->xmax = FFMIN(c->xmax, range); + c->ymin = FFMAX(c->ymin,-range); + c->ymax = FFMIN(c->ymax, range); + } } static inline void init_mv4_ref(MotionEstContext *c){ @@ -1148,7 +1162,9 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, { MotionEstContext * const c= &s->me; uint8_t *pix, *ppix; - int sum, varc, vard, mx, my, dmin; + int sum, mx, my, dmin; + int varc; ///< the variance of the block (sum of squared (p[y][x]-average)) + int vard; ///< sum of squared differences with the estimated motion vector int P[10][2]; const int shift= 1+s->quarter_sample; int mb_type=0; @@ -1810,8 +1826,8 @@ static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y) get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed - s->b_direct_mv_table[mot_xy][0]= mx; - s->b_direct_mv_table[mot_xy][1]= my; + mv_table[mot_xy][0]= mx; + mv_table[mot_xy][1]= my; c->flags &= ~FLAG_DIRECT; c->sub_flags &= ~FLAG_DIRECT; @@ -1831,6 +1847,18 @@ void ff_estimate_b_frame_motion(MpegEncContext * s, get_limits(s, 16*mb_x, 16*mb_y); c->skip=0; + + if(s->codec_id == CODEC_ID_MPEG4 && s->next_picture.mbskip_table[xy]){ + int score= direct_search(s, mb_x, mb_y); //FIXME just check 0,0 + + score= ((unsigned)(score*score + 128*256))>>16; + c->mc_mb_var_sum_temp += score; + s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE + s->mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_DIRECT0; + + return; + } + if(c->avctx->me_threshold){ int vard= check_input_motion(s, mb_x, mb_y, 0); @@ -1953,6 +1981,8 @@ void ff_estimate_b_frame_motion(MpegEncContext * s, } //FIXME something smarter if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB + if(s->codec_id == CODEC_ID_MPEG4 && type&CANDIDATE_MB_TYPE_DIRECT && s->flags&CODEC_FLAG_MV0 && *(uint32_t*)s->b_direct_mv_table[xy]) + type |= CANDIDATE_MB_TYPE_DIRECT0; #if 0 if(s->out_format == FMT_MPEG1) type |= CANDIDATE_MB_TYPE_INTRA; diff --git a/src/libffmpeg/libavcodec/motion_est_template.c b/src/libffmpeg/libavcodec/motion_est_template.c index d8feaff5a..897c08e3d 100644 --- a/src/libffmpeg/libavcodec/motion_est_template.c +++ b/src/libffmpeg/libavcodec/motion_est_template.c @@ -555,7 +555,7 @@ if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, const int qpel= flags&FLAG_QPEL;\ const int shift= 1+qpel;\ -static always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin, +static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin, int src_index, int ref_index, int const penalty_factor, int size, int h, int flags) { @@ -667,31 +667,28 @@ static int hex_search(MpegEncContext * s, int *best, int dmin, LOAD_COMMON LOAD_COMMON2 int map_generation= c->map_generation; - int x,y,i,d; - static const int hex[6][2]={{-2, 0}, { 2,0}, {-1,-2}, {1,-2}, {-1,2},{1,2}}; + int x,y,d; + const int dec= dia_size & (dia_size-1); cmpf= s->dsp.me_cmp[size]; chroma_cmpf= s->dsp.me_cmp[size+1]; - for(;dia_size; dia_size--){ + for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){ do{ x= best[0]; y= best[1]; - for(i=0; i<6; i++){ - CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size); + + CHECK_CLIPPED_MV(x -dia_size , y); + CHECK_CLIPPED_MV(x+ dia_size , y); + CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size); + CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size); + if(dia_size>1){ + CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size); + CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size); } }while(best[0] != x || best[1] != y); } - do{ - x= best[0]; - y= best[1]; - CHECK_CLIPPED_MV(x+1, y); - CHECK_CLIPPED_MV(x, y+1); - CHECK_CLIPPED_MV(x-1, y); - CHECK_CLIPPED_MV(x, y-1); - }while(best[0] != x || best[1] != y); - return dmin; } @@ -704,14 +701,16 @@ static int l2s_dia_search(MpegEncContext * s, int *best, int dmin, LOAD_COMMON LOAD_COMMON2 int map_generation= c->map_generation; - int x,y,i,d, dia_size; + int x,y,i,d; + int dia_size= c->dia_size&0xFF; + const int dec= dia_size & (dia_size-1); static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1}, { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}}; cmpf= s->dsp.me_cmp[size]; chroma_cmpf= s->dsp.me_cmp[size+1]; - for(dia_size= c->dia_size&0xFF; dia_size; dia_size--){ + for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){ do{ x= best[0]; y= best[1]; @@ -775,7 +774,7 @@ static int umh_search(MpegEncContext * s, int *best, int dmin, } } - return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 1); + return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2); } #define SAB_CHECK_MV(ax,ay)\ @@ -824,20 +823,27 @@ static int sab_diamond_search(MpegEncContext * s, int *best, int dmin, cmpf= s->dsp.me_cmp[size]; chroma_cmpf= s->dsp.me_cmp[size+1]; - for(j=i=0; i<ME_MAP_SIZE; i++){ + /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can + become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map + */ + for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){ uint32_t key= map[i]; key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1)); if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue; - assert(j<MAX_SAB_SIZE); //max j = number of predictors - minima[j].height= score_map[i]; minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS; minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1); minima[j].x-= (1<<(ME_MAP_MV_BITS-1)); minima[j].y-= (1<<(ME_MAP_MV_BITS-1)); + + // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space + if( minima[j].x > xmax || minima[j].x < xmin + || minima[j].y > ymax || minima[j].y < ymin) + continue; + minima[j].checked=0; if(minima[j].x || minima[j].y) minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor; @@ -965,7 +971,7 @@ if(256*256*256*64 % (stats[0]+1)==0){ return dmin; } -static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin, +static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin, int src_index, int ref_index, int const penalty_factor, int size, int h, int flags){ MotionEstContext * const c= &s->me; @@ -985,7 +991,7 @@ static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin, return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); } -static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr, +static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr, int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2], int ref_mv_scale, int flags, int size, int h) { @@ -1018,6 +1024,10 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx map[0]= map_generation; score_map[0]= dmin; + //FIXME precalc first term below? + if((s->pict_type == B_TYPE && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0) + dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor; + /* first line */ if (s->first_slice_line) { CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c index e3a4c2da5..8af7bdfa7 100644 --- a/src/libffmpeg/libavcodec/mpeg12.c +++ b/src/libffmpeg/libavcodec/mpeg12.c @@ -515,7 +515,7 @@ static inline void put_mb_modes(MpegEncContext *s, int n, int bits, } } -static always_inline void mpeg1_encode_mb_internal(MpegEncContext *s, +static av_always_inline void mpeg1_encode_mb_internal(MpegEncContext *s, DCTELEM block[6][64], int motion_x, int motion_y, int mb_block_count) diff --git a/src/libffmpeg/libavcodec/mpegaudiodec.c b/src/libffmpeg/libavcodec/mpegaudiodec.c index 54bcee3b0..367400581 100644 --- a/src/libffmpeg/libavcodec/mpegaudiodec.c +++ b/src/libffmpeg/libavcodec/mpegaudiodec.c @@ -327,7 +327,7 @@ static int decode_init(AVCodecContext * avctx) for(i=0;i<15;i++) { int n, norm; n = i + 2; - norm = ((int64_t_C(1) << n) * FRAC_ONE) / ((1 << n) - 1); + norm = ((INT64_C(1) << n) * FRAC_ONE) / ((1 << n) - 1); scale_factor_mult[i][0] = MULL(FIXR(1.0 * 2.0), norm); scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm); scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm); @@ -1749,7 +1749,7 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g, /* skip extension bits */ bits_left = end_pos - get_bits_count(&s->gb); //av_log(NULL, AV_LOG_ERROR, "left:%d buf:%p\n", bits_left, s->in_gb.buffer); - if (bits_left < 0 || bits_left > 16) { + if (bits_left < 0 || bits_left > 500) { av_log(NULL, AV_LOG_ERROR, "bits_left=%d\n", bits_left); s_index=0; }else if(bits_left > 0 && s->error_resilience >= FF_ER_AGGRESSIVE){ diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c index a9d877fff..a33485549 100644 --- a/src/libffmpeg/libavcodec/mpegvideo.c +++ b/src/libffmpeg/libavcodec/mpegvideo.c @@ -140,7 +140,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ - qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / + qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j])); } } else if (dsp->fdct == fdct_ifast @@ -155,7 +155,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ - qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) / + qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / (aanscales[i] * qscale * quant_matrix[j])); } } else { @@ -166,7 +166,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[ so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905 so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67 */ - qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j])); + qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j])); // qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]); qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]); @@ -2964,7 +2964,7 @@ static inline int hpel_motion_lowres(MpegEncContext *s, } /* apply one mpeg motion vector to the three components */ -static always_inline void mpeg_motion(MpegEncContext *s, +static av_always_inline void mpeg_motion(MpegEncContext *s, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int field_based, int bottom_field, int field_select, uint8_t **ref_picture, op_pixels_func (*pix_op)[4], @@ -3081,7 +3081,7 @@ if(s->quarter_sample) } /* apply one mpeg motion vector to the three components */ -static always_inline void mpeg_motion_lowres(MpegEncContext *s, +static av_always_inline void mpeg_motion_lowres(MpegEncContext *s, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int field_based, int bottom_field, int field_select, uint8_t **ref_picture, h264_chroma_mc_func *pix_op, @@ -3913,7 +3913,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s) s->mv : motion vector s->interlaced_dct : true if interlaced dct used (mpeg2) */ -static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag) +static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag) { int mb_x, mb_y; const int mb_xy = s->mb_y * s->mb_stride + s->mb_x; @@ -4336,7 +4336,7 @@ static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){ } } -static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count) +static av_always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count) { int16_t weight[8][64]; DCTELEM orig[8][64]; @@ -4348,7 +4348,7 @@ static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, in uint8_t *ptr_y, *ptr_cb, *ptr_cr; int wrap_y, wrap_c; - for(i=0; i<mb_block_count; i++) skip_dct[i]=0; + for(i=0; i<mb_block_count; i++) skip_dct[i]=s->skipdct; if(s->adaptive_quant){ const int last_qp= s->qscale; @@ -4358,17 +4358,16 @@ static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, in update_qscale(s); if(!(s->flags&CODEC_FLAG_QP_RD)){ + s->qscale= s->current_picture_ptr->qscale_table[mb_xy]; s->dquant= s->qscale - last_qp; if(s->out_format==FMT_H263){ - s->dquant= clip(s->dquant, -2, 2); //FIXME RD + s->dquant= clip(s->dquant, -2, 2); if(s->codec_id==CODEC_ID_MPEG4){ if(!s->mb_intra){ if(s->pict_type == B_TYPE){ - if(s->dquant&1) - s->dquant= (s->dquant/2)*2; - if(s->mv_dir&MV_DIRECT) + if(s->dquant&1 || s->mv_dir&MV_DIRECT) s->dquant= 0; } if(s->mv_type==MV_TYPE_8X8) @@ -4621,7 +4620,7 @@ static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, in } } -static always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y) +static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y) { if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y, 8, 6); else encode_mb_internal(s, motion_x, motion_y, 16, 8); @@ -4861,6 +4860,8 @@ static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){ static int estimate_motion_thread(AVCodecContext *c, void *arg){ MpegEncContext *s= arg; + ff_check_alignment(); + s->me.dia_size= s->avctx->dia_size; s->first_slice_line=1; for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) { @@ -4888,6 +4889,8 @@ static int mb_var_thread(AVCodecContext *c, void *arg){ MpegEncContext *s= arg; int mb_x, mb_y; + ff_check_alignment(); + for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) { for(mb_x=0; mb_x < s->mb_width; mb_x++) { int xx = mb_x * 16; @@ -4938,6 +4941,8 @@ static int encode_thread(AVCodecContext *c, void *arg){ PutBitContext pb[2], pb2[2], tex_pb[2]; //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y); + ff_check_alignment(); + for(i=0; i<2; i++){ init_put_bits(&pb [i], bit_buf [i], MAX_MB_BYTES); init_put_bits(&pb2 [i], bit_buf2 [i], MAX_MB_BYTES); @@ -5205,19 +5210,6 @@ static int encode_thread(AVCodecContext *c, void *arg){ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb, &dmin, &next_block, 0, 0); } - if(mb_type&CANDIDATE_MB_TYPE_DIRECT){ - int mx= s->b_direct_mv_table[xy][0]; - int my= s->b_direct_mv_table[xy][1]; - - s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT; - s->mb_intra= 0; -/* xine: do not need this for decode or MPEG-1 encoding modes */ -#if 0 - ff_mpeg4_set_direct_mv(s, mx, my); -#endif /* #if 0 */ - encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, - &dmin, &next_block, mx, my); - } if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){ s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_FIELD; @@ -5272,8 +5264,8 @@ static int encode_thread(AVCodecContext *c, void *arg){ } } - if(s->flags & CODEC_FLAG_QP_RD){ - if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){ + if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){ + if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD const int last_qp= backup_s.qscale; int qpi, qp, dc[6]; DCTELEM ac[6][16]; @@ -5316,10 +5308,64 @@ static int encode_thread(AVCodecContext *c, void *arg){ } } } - qp= best_s.qscale; - s->current_picture.qscale_table[xy]= qp; } } + if(mb_type&CANDIDATE_MB_TYPE_DIRECT){ + int mx= s->b_direct_mv_table[xy][0]; + int my= s->b_direct_mv_table[xy][1]; + + backup_s.dquant = 0; + s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT; + s->mb_intra= 0; + ff_mpeg4_set_direct_mv(s, mx, my); + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, + &dmin, &next_block, mx, my); + } + if(mb_type&CANDIDATE_MB_TYPE_DIRECT0){ + backup_s.dquant = 0; + s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT; + s->mb_intra= 0; +/* xine: do not need this for decode or MPEG-1 encoding modes */ +#if 0 + ff_mpeg4_set_direct_mv(s, 0, 0); +#endif /* #if 0 */ + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, + &dmin, &next_block, 0, 0); + } + if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){ + int coded=0; + for(i=0; i<6; i++) + coded |= s->block_last_index[i]; + if(coded){ + int mx,my; + memcpy(s->mv, best_s.mv, sizeof(s->mv)); + if(best_s.mv_dir & MV_DIRECT){ + mx=my=0; //FIXME find the one we actually used + ff_mpeg4_set_direct_mv(s, mx, my); + }else if(best_s.mv_dir&MV_DIR_BACKWARD){ + mx= s->mv[1][0][0]; + my= s->mv[1][0][1]; + }else{ + mx= s->mv[0][0][0]; + my= s->mv[0][0][1]; + } + + s->mv_dir= best_s.mv_dir; + s->mv_type = best_s.mv_type; + s->mb_intra= 0; +/* s->mv[0][0][0] = best_s.mv[0][0][0]; + s->mv[0][0][1] = best_s.mv[0][0][1]; + s->mv[1][0][0] = best_s.mv[1][0][0]; + s->mv[1][0][1] = best_s.mv[1][0][1];*/ + backup_s.dquant= 0; + s->skipdct=1; + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, + &dmin, &next_block, mx, my); + s->skipdct=0; + } + } + + s->current_picture.qscale_table[xy]= best_s.qscale; copy_context_after_encode(s, &best_s, -1); @@ -5401,6 +5447,11 @@ static int encode_thread(AVCodecContext *c, void *arg){ ff_mpeg4_set_direct_mv(s, motion_x, motion_y); #endif /* #if 0 */ break; + case CANDIDATE_MB_TYPE_DIRECT0: + s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT; + s->mb_intra= 0; + ff_mpeg4_set_direct_mv(s, 0, 0); + break; case CANDIDATE_MB_TYPE_BIDIR: s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD; s->mb_intra= 0; diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h index 011678a42..ed02759ae 100644 --- a/src/libffmpeg/libavcodec/mpegvideo.h +++ b/src/libffmpeg/libavcodec/mpegvideo.h @@ -324,6 +324,7 @@ typedef struct MpegEncContext { int dropable; int frame_rate_index; int last_lambda_for[5]; ///< last lambda for a specific pict type + int skipdct; ///< skip dct and code zero residual /* motion compensation */ int unrestricted_mv; ///< mv can point outside of the coded picture @@ -402,6 +403,8 @@ typedef struct MpegEncContext { #define CANDIDATE_MB_TYPE_BACKWARD_I 0x400 #define CANDIDATE_MB_TYPE_BIDIR_I 0x800 +#define CANDIDATE_MB_TYPE_DIRECT0 0x1000 + int block_index[6]; ///< index to current MB in block based arrays with edges int block_wrap[6]; uint8_t *dest[3]; diff --git a/src/libffmpeg/libavcodec/parser.c b/src/libffmpeg/libavcodec/parser.c index 72a3e55a3..740ad855c 100644 --- a/src/libffmpeg/libavcodec/parser.c +++ b/src/libffmpeg/libavcodec/parser.c @@ -91,7 +91,8 @@ AVCodecParserContext *av_parser_init(int codec_id) * in_data += len; * in_len -= len; * - * decode_frame(data, size); + * if(size) + * decode_frame(data, size); * } * @endcode */ diff --git a/src/libffmpeg/libavcodec/ppc/Makefile.am b/src/libffmpeg/libavcodec/ppc/Makefile.am index 00e796f6d..d52cc481e 100644 --- a/src/libffmpeg/libavcodec/ppc/Makefile.am +++ b/src/libffmpeg/libavcodec/ppc/Makefile.am @@ -12,14 +12,17 @@ noinst_LTLIBRARIES = libavcodec_ppc.la libavcodec_ppc_src = dsputil_altivec.c \ dsputil_ppc.c \ - dsputil_h264_altivec.c \ - dsputil_h264_template_altivec.c \ + h264_altivec.c \ + h264_template_altivec.c \ fdct_altivec.c \ fft_altivec.c \ + float_altivec.c \ idct_altivec.c \ gmc_altivec.c \ mpegvideo_altivec.c \ - mpegvideo_ppc.c + mpegvideo_ppc.c \ + snow_altivec.c \ + vc1dsp_altivec.c libavcodec_ppc_dummy = libavcodec_ppc_dummy.c EXTRA_DIST = $(libavcodec_ppc_src) $(libavcodec_ppc_dummy) @@ -28,7 +31,6 @@ EXTRA_DIST = $(libavcodec_ppc_src) $(libavcodec_ppc_dummy) #ppc_modules = $(libavcodec_ppc_src) #endif - libavcodec_ppc_la_SOURCES = $(ppc_modules) $(libavcodec_ppc_dummy) -noinst_HEADERS = dsputil_altivec.h dsputil_ppc.h gcc_fixes.h +noinst_HEADERS = dsputil_altivec.h dsputil_ppc.h gcc_fixes.h mathops.h types_altivec.h diff --git a/src/libffmpeg/libavcodec/ppc/float_altivec.c b/src/libffmpeg/libavcodec/ppc/float_altivec.c new file mode 100644 index 000000000..c6e43dec2 --- /dev/null +++ b/src/libffmpeg/libavcodec/ppc/float_altivec.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "../dsputil.h" + +#include "gcc_fixes.h" + +#include "dsputil_altivec.h" + +static void vector_fmul_altivec(float *dst, const float *src, int len) +{ + int i; + vector float d0, d1, s, zero = (vector float)vec_splat_u32(0); + for(i=0; i<len-7; i+=8) { + d0 = vec_ld(0, dst+i); + s = vec_ld(0, src+i); + d1 = vec_ld(16, dst+i); + d0 = vec_madd(d0, s, zero); + d1 = vec_madd(d1, vec_ld(16,src+i), zero); + vec_st(d0, 0, dst+i); + vec_st(d1, 16, dst+i); + } +} + +static void vector_fmul_reverse_altivec(float *dst, const float *src0, + const float *src1, int len) +{ + int i; + vector float d, s0, s1, h0, l0, + s2, s3, zero = (vector float)vec_splat_u32(0); + src1 += len-4; + for(i=0; i<len-7; i+=8) { + s1 = vec_ld(0, src1-i); // [a,b,c,d] + s0 = vec_ld(0, src0+i); + l0 = vec_mergel(s1, s1); // [c,c,d,d] + s3 = vec_ld(-16, src1-i); + h0 = vec_mergeh(s1, s1); // [a,a,b,b] + s2 = vec_ld(16, src0+i); + s1 = vec_mergeh(vec_mergel(l0,h0), // [d,b,d,b] + vec_mergeh(l0,h0)); // [c,a,c,a] + // [d,c,b,a] + l0 = vec_mergel(s3, s3); + d = vec_madd(s0, s1, zero); + h0 = vec_mergeh(s3, s3); + vec_st(d, 0, dst+i); + s3 = vec_mergeh(vec_mergel(l0,h0), + vec_mergeh(l0,h0)); + d = vec_madd(s2, s3, zero); + vec_st(d, 16, dst+i); + } +} + +static void vector_fmul_add_add_altivec(float *dst, const float *src0, + const float *src1, const float *src2, + int src3, int len, int step) +{ + int i; + vector float d, s0, s1, s2, t0, t1, edges; + vector unsigned char align = vec_lvsr(0,dst), + mask = vec_lvsl(0, dst); + + t0 = vec_ld(0, dst); +#if 0 //FIXME: there is still something wrong + if (step == 2) { + int y; + vector float d0, d1, s3, t2; + vector unsigned int sel = + vec_mergeh(vec_splat_u32(-1), vec_splat_u32(0)); + t1 = vec_ld(16, dst); + for (i=0,y=0; i<len-3; i+=4,y+=8) { + + s0 = vec_ld(0,src0+i); + s1 = vec_ld(0,src1+i); + s2 = vec_ld(0,src2+i); + +// t0 = vec_ld(0, dst+y); //[x x x|a] +// t1 = vec_ld(16, dst+y); //[b c d|e] + t2 = vec_ld(31, dst+y); //[f g h|x] + + d = vec_madd(s0,s1,s2); // [A B C D] + + // [A A B B] + + // [C C D D] + + d0 = vec_perm(t0, t1, mask); // [a b c d] + + d0 = vec_sel(vec_mergeh(d, d), d0, sel); // [A b B d] + + edges = vec_perm(t1, t0, mask); + + t0 = vec_perm(edges, d0, align); // [x x x|A] + + t1 = vec_perm(d0, edges, align); // [b B d|e] + + vec_stl(t0, 0, dst+y); + + d1 = vec_perm(t1, t2, mask); // [e f g h] + + d1 = vec_sel(vec_mergel(d, d), d1, sel); // [C f D h] + + edges = vec_perm(t2, t1, mask); + + t1 = vec_perm(edges, d1, align); // [b B d|C] + + t2 = vec_perm(d1, edges, align); // [f D h|x] + + vec_stl(t1, 16, dst+y); + + t0 = t1; + + vec_stl(t2, 31, dst+y); + + t1 = t2; + } + } else + #endif + if (step == 1 && src3 == 0) + for (i=0; i<len-3; i+=4) { + t1 = vec_ld(15, dst+i); + s0 = vec_ld(0, src0+i); + s1 = vec_ld(0, src1+i); + s2 = vec_ld(0, src2+i); + edges = vec_perm(t1 ,t0, mask); + d = vec_madd(s0,s1,s2); + t1 = vec_perm(d, edges, align); + t0 = vec_perm(edges, d, align); + vec_st(t1, 15, dst+i); + vec_st(t0, 0, dst+i); + t0 = t1; + } + else + ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step); +} + +void float_to_int16_altivec(int16_t *dst, const float *src, int len) +{ + int i; + vector float s0, s1; + vector signed int t0, t1; + vector signed short d0, d1, d; + vector unsigned char align; + if(((long)dst)&15) //FIXME + for(i=0; i<len-7; i+=8) { + s0 = vec_ld(0, src+i); + s1 = vec_ld(16, src+i); + t0 = vec_cts(s0, 0); + d0 = vec_ld(0, dst+i); + t1 = vec_cts(s1, 0); + d1 = vec_ld(15, dst+i); + d = vec_packs(t0,t1); + d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i)); + align = vec_lvsr(0, dst+i); + d0 = vec_perm(d1, d, align); + d1 = vec_perm(d, d1, align); + vec_st(d0, 0, dst+i); + vec_st(d1,15, dst+i); + } + else + for(i=0; i<len-7; i+=8) { + s0 = vec_ld(0, src+i); + s1 = vec_ld(16, src+i); + t0 = vec_cts(s0, 0); + t1 = vec_cts(s1, 0); + d = vec_packs(t0,t1); + vec_st(d, 0, dst+i); + } +} + +void float_init_altivec(DSPContext* c, AVCodecContext *avctx) +{ + c->vector_fmul = vector_fmul_altivec; + c->vector_fmul_reverse = vector_fmul_reverse_altivec; + c->vector_fmul_add_add = vector_fmul_add_add_altivec; + if(!(avctx->flags & CODEC_FLAG_BITEXACT)) + c->float_to_int16 = float_to_int16_altivec; +} diff --git a/src/libffmpeg/libavcodec/ppc/h264_altivec.c b/src/libffmpeg/libavcodec/ppc/h264_altivec.c new file mode 100644 index 000000000..bac620e82 --- /dev/null +++ b/src/libffmpeg/libavcodec/ppc/h264_altivec.c @@ -0,0 +1,565 @@ +/* + * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "../dsputil.h" + +#include "gcc_fixes.h" + +#include "dsputil_altivec.h" +#include "types_altivec.h" + +#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s +#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s) + +#define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC +#define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec +#define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num +#define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec +#define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num +#define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec +#define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num +#define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec +#define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num +#include "h264_template_altivec.c" +#undef OP_U8_ALTIVEC +#undef PREFIX_h264_chroma_mc8_altivec +#undef PREFIX_h264_chroma_mc8_num +#undef PREFIX_h264_qpel16_h_lowpass_altivec +#undef PREFIX_h264_qpel16_h_lowpass_num +#undef PREFIX_h264_qpel16_v_lowpass_altivec +#undef PREFIX_h264_qpel16_v_lowpass_num +#undef PREFIX_h264_qpel16_hv_lowpass_altivec +#undef PREFIX_h264_qpel16_hv_lowpass_num + +#define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC +#define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec +#define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num +#define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec +#define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num +#define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec +#define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num +#define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec +#define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num +#include "h264_template_altivec.c" +#undef OP_U8_ALTIVEC +#undef PREFIX_h264_chroma_mc8_altivec +#undef PREFIX_h264_chroma_mc8_num +#undef PREFIX_h264_qpel16_h_lowpass_altivec +#undef PREFIX_h264_qpel16_h_lowpass_num +#undef PREFIX_h264_qpel16_v_lowpass_altivec +#undef PREFIX_h264_qpel16_v_lowpass_num +#undef PREFIX_h264_qpel16_hv_lowpass_altivec +#undef PREFIX_h264_qpel16_hv_lowpass_num + +#define H264_MC(OPNAME, SIZE, CODETYPE) \ +static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \ + DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ + DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ + DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ + DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ + DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\ + DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ + DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\ + DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ + DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\ + DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ + DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\ + DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ + DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\ + OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ + DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\ + DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\ + DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ + put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ + DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\ + DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\ + DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ + put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ + DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\ + DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\ + DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ + put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ + DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\ + DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\ + DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ + put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ +}\ + +/* this code assume that stride % 16 == 0 */ +void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { + signed int ABCD[4] __attribute__((aligned(16))) = + {((8 - x) * (8 - y)), + ((x) * (8 - y)), + ((8 - x) * (y)), + ((x) * (y))}; + register int i; + vector unsigned char fperm; + const vector signed int vABCD = vec_ld(0, ABCD); + const vector signed short vA = vec_splat((vector signed short)vABCD, 1); + const vector signed short vB = vec_splat((vector signed short)vABCD, 3); + const vector signed short vC = vec_splat((vector signed short)vABCD, 5); + const vector signed short vD = vec_splat((vector signed short)vABCD, 7); + const vector signed int vzero = vec_splat_s32(0); + const vector signed short v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4)); + const vector unsigned short v6us = vec_splat_u16(6); + register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; + register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; + + vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1; + vector unsigned char vsrc0uc, vsrc1uc; + vector signed short vsrc0ssH, vsrc1ssH; + vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc; + vector signed short vsrc2ssH, vsrc3ssH, psum; + vector unsigned char vdst, ppsum, fsum; + + if (((unsigned long)dst) % 16 == 0) { + fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, + 0x08, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F); + } else { + fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, + 0x18, 0x19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F); + } + + vsrcAuc = vec_ld(0, src); + + if (loadSecond) + vsrcBuc = vec_ld(16, src); + vsrcperm0 = vec_lvsl(0, src); + vsrcperm1 = vec_lvsl(1, src); + + vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0); + if (reallyBadAlign) + vsrc1uc = vsrcBuc; + else + vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1); + + vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc0uc); + vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc1uc); + + if (!loadSecond) {// -> !reallyBadAlign + for (i = 0 ; i < h ; i++) { + + + vsrcCuc = vec_ld(stride + 0, src); + + vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); + vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); + + vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc2uc); + vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc3uc); + + psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0)); + psum = vec_mladd(vB, vsrc1ssH, psum); + psum = vec_mladd(vC, vsrc2ssH, psum); + psum = vec_mladd(vD, vsrc3ssH, psum); + psum = vec_add(v28ss, psum); + psum = vec_sra(psum, v6us); + + vdst = vec_ld(0, dst); + ppsum = (vector unsigned char)vec_packsu(psum, psum); + fsum = vec_perm(vdst, ppsum, fperm); + + vec_st(fsum, 0, dst); + + vsrc0ssH = vsrc2ssH; + vsrc1ssH = vsrc3ssH; + + dst += stride; + src += stride; + } + } else { + vector unsigned char vsrcDuc; + for (i = 0 ; i < h ; i++) { + vsrcCuc = vec_ld(stride + 0, src); + vsrcDuc = vec_ld(stride + 16, src); + + vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); + if (reallyBadAlign) + vsrc3uc = vsrcDuc; + else + vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); + + vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc2uc); + vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc3uc); + + psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0)); + psum = vec_mladd(vB, vsrc1ssH, psum); + psum = vec_mladd(vC, vsrc2ssH, psum); + psum = vec_mladd(vD, vsrc3ssH, psum); + psum = vec_add(v28ss, psum); + psum = vec_sr(psum, v6us); + + vdst = vec_ld(0, dst); + ppsum = (vector unsigned char)vec_pack(psum, psum); + fsum = vec_perm(vdst, ppsum, fperm); + + vec_st(fsum, 0, dst); + + vsrc0ssH = vsrc2ssH; + vsrc1ssH = vsrc3ssH; + + dst += stride; + src += stride; + } + } +} + +static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, + const uint8_t * src2, int dst_stride, + int src_stride1, int h) +{ + int i; + vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align; + + mask_ = vec_lvsl(0, src2); + + for (i = 0; i < h; i++) { + + tmp1 = vec_ld(i * src_stride1, src1); + mask = vec_lvsl(i * src_stride1, src1); + tmp2 = vec_ld(i * src_stride1 + 15, src1); + + a = vec_perm(tmp1, tmp2, mask); + + tmp1 = vec_ld(i * 16, src2); + tmp2 = vec_ld(i * 16 + 15, src2); + + b = vec_perm(tmp1, tmp2, mask_); + + tmp1 = vec_ld(0, dst); + mask = vec_lvsl(0, dst); + tmp2 = vec_ld(15, dst); + + d = vec_avg(a, b); + + edges = vec_perm(tmp2, tmp1, mask); + + align = vec_lvsr(0, dst); + + tmp2 = vec_perm(d, edges, align); + tmp1 = vec_perm(edges, d, align); + + vec_st(tmp2, 15, dst); + vec_st(tmp1, 0 , dst); + + dst += dst_stride; + } +} + +static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, + const uint8_t * src2, int dst_stride, + int src_stride1, int h) +{ + int i; + vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align; + + mask_ = vec_lvsl(0, src2); + + for (i = 0; i < h; i++) { + + tmp1 = vec_ld(i * src_stride1, src1); + mask = vec_lvsl(i * src_stride1, src1); + tmp2 = vec_ld(i * src_stride1 + 15, src1); + + a = vec_perm(tmp1, tmp2, mask); + + tmp1 = vec_ld(i * 16, src2); + tmp2 = vec_ld(i * 16 + 15, src2); + + b = vec_perm(tmp1, tmp2, mask_); + + tmp1 = vec_ld(0, dst); + mask = vec_lvsl(0, dst); + tmp2 = vec_ld(15, dst); + + d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b)); + + edges = vec_perm(tmp2, tmp1, mask); + + align = vec_lvsr(0, dst); + + tmp2 = vec_perm(d, edges, align); + tmp1 = vec_perm(edges, d, align); + + vec_st(tmp2, 15, dst); + vec_st(tmp1, 0 , dst); + + dst += dst_stride; + } +} + +/* Implemented but could be faster +#define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h) +#define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h) + */ + + H264_MC(put_, 16, altivec) + H264_MC(avg_, 16, altivec) + + +/**************************************************************************** + * IDCT transform: + ****************************************************************************/ + +#define IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, d4, d5, d6, d7) {\ + /* a0 = SRC(0) + SRC(4); */ \ + vec_s16_t a0v = vec_add(s0, s4); \ + /* a2 = SRC(0) - SRC(4); */ \ + vec_s16_t a2v = vec_sub(s0, s4); \ + /* a4 = (SRC(2)>>1) - SRC(6); */ \ + vec_s16_t a4v = vec_sub(vec_sra(s2, onev), s6); \ + /* a6 = (SRC(6)>>1) + SRC(2); */ \ + vec_s16_t a6v = vec_add(vec_sra(s6, onev), s2); \ + /* b0 = a0 + a6; */ \ + vec_s16_t b0v = vec_add(a0v, a6v); \ + /* b2 = a2 + a4; */ \ + vec_s16_t b2v = vec_add(a2v, a4v); \ + /* b4 = a2 - a4; */ \ + vec_s16_t b4v = vec_sub(a2v, a4v); \ + /* b6 = a0 - a6; */ \ + vec_s16_t b6v = vec_sub(a0v, a6v); \ + /* a1 = SRC(5) - SRC(3) - SRC(7) - (SRC(7)>>1); */ \ + /* a1 = (SRC(5)-SRC(3)) - (SRC(7) + (SRC(7)>>1)); */ \ + vec_s16_t a1v = vec_sub( vec_sub(s5, s3), vec_add(s7, vec_sra(s7, onev)) ); \ + /* a3 = SRC(7) + SRC(1) - SRC(3) - (SRC(3)>>1); */ \ + /* a3 = (SRC(7)+SRC(1)) - (SRC(3) + (SRC(3)>>1)); */ \ + vec_s16_t a3v = vec_sub( vec_add(s7, s1), vec_add(s3, vec_sra(s3, onev)) );\ + /* a5 = SRC(7) - SRC(1) + SRC(5) + (SRC(5)>>1); */ \ + /* a5 = (SRC(7)-SRC(1)) + SRC(5) + (SRC(5)>>1); */ \ + vec_s16_t a5v = vec_add( vec_sub(s7, s1), vec_add(s5, vec_sra(s5, onev)) );\ + /* a7 = SRC(5)+SRC(3) + SRC(1) + (SRC(1)>>1); */ \ + vec_s16_t a7v = vec_add( vec_add(s5, s3), vec_add(s1, vec_sra(s1, onev)) );\ + /* b1 = (a7>>2) + a1; */ \ + vec_s16_t b1v = vec_add( vec_sra(a7v, twov), a1v); \ + /* b3 = a3 + (a5>>2); */ \ + vec_s16_t b3v = vec_add(a3v, vec_sra(a5v, twov)); \ + /* b5 = (a3>>2) - a5; */ \ + vec_s16_t b5v = vec_sub( vec_sra(a3v, twov), a5v); \ + /* b7 = a7 - (a1>>2); */ \ + vec_s16_t b7v = vec_sub( a7v, vec_sra(a1v, twov)); \ + /* DST(0, b0 + b7); */ \ + d0 = vec_add(b0v, b7v); \ + /* DST(1, b2 + b5); */ \ + d1 = vec_add(b2v, b5v); \ + /* DST(2, b4 + b3); */ \ + d2 = vec_add(b4v, b3v); \ + /* DST(3, b6 + b1); */ \ + d3 = vec_add(b6v, b1v); \ + /* DST(4, b6 - b1); */ \ + d4 = vec_sub(b6v, b1v); \ + /* DST(5, b4 - b3); */ \ + d5 = vec_sub(b4v, b3v); \ + /* DST(6, b2 - b5); */ \ + d6 = vec_sub(b2v, b5v); \ + /* DST(7, b0 - b7); */ \ + d7 = vec_sub(b0v, b7v); \ +} + +#define ALTIVEC_STORE_SUM_CLIP(dest, idctv, perm_ldv, perm_stv, sel) { \ + /* unaligned load */ \ + vec_u8_t hv = vec_ld( 0, dest ); \ + vec_u8_t lv = vec_ld( 7, dest ); \ + vec_u8_t dstv = vec_perm( hv, lv, (vec_u8_t)perm_ldv ); \ + vec_s16_t idct_sh6 = vec_sra(idctv, sixv); \ + vec_u16_t dst16 = (vec_u16_t)vec_mergeh(zero_u8v, dstv); \ + vec_s16_t idstsum = vec_adds(idct_sh6, (vec_s16_t)dst16); \ + vec_u8_t idstsum8 = vec_packsu(zero_s16v, idstsum); \ + vec_u8_t edgehv; \ + /* unaligned store */ \ + vec_u8_t bodyv = vec_perm( idstsum8, idstsum8, perm_stv );\ + vec_u8_t edgelv = vec_perm( sel, zero_u8v, perm_stv ); \ + lv = vec_sel( lv, bodyv, edgelv ); \ + vec_st( lv, 7, dest ); \ + hv = vec_ld( 0, dest ); \ + edgehv = vec_perm( zero_u8v, sel, perm_stv ); \ + hv = vec_sel( hv, bodyv, edgehv ); \ + vec_st( hv, 0, dest ); \ + } + +void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) { + vec_s16_t s0, s1, s2, s3, s4, s5, s6, s7; + vec_s16_t d0, d1, d2, d3, d4, d5, d6, d7; + vec_s16_t idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7; + + vec_u8_t perm_ldv = vec_lvsl(0, dst); + vec_u8_t perm_stv = vec_lvsr(8, dst); + + const vec_u16_t onev = vec_splat_u16(1); + const vec_u16_t twov = vec_splat_u16(2); + const vec_u16_t sixv = vec_splat_u16(6); + + const vec_u8_t sel = (vec_u8_t) AVV(0,0,0,0,0,0,0,0, + -1,-1,-1,-1,-1,-1,-1,-1); + LOAD_ZERO; + + dct[0] += 32; // rounding for the >>6 at the end + + s0 = vec_ld(0x00, (int16_t*)dct); + s1 = vec_ld(0x10, (int16_t*)dct); + s2 = vec_ld(0x20, (int16_t*)dct); + s3 = vec_ld(0x30, (int16_t*)dct); + s4 = vec_ld(0x40, (int16_t*)dct); + s5 = vec_ld(0x50, (int16_t*)dct); + s6 = vec_ld(0x60, (int16_t*)dct); + s7 = vec_ld(0x70, (int16_t*)dct); + + IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7, + d0, d1, d2, d3, d4, d5, d6, d7); + + TRANSPOSE8( d0, d1, d2, d3, d4, d5, d6, d7 ); + + IDCT8_1D_ALTIVEC(d0, d1, d2, d3, d4, d5, d6, d7, + idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7); + + ALTIVEC_STORE_SUM_CLIP(&dst[0*stride], idct0, perm_ldv, perm_stv, sel); + ALTIVEC_STORE_SUM_CLIP(&dst[1*stride], idct1, perm_ldv, perm_stv, sel); + ALTIVEC_STORE_SUM_CLIP(&dst[2*stride], idct2, perm_ldv, perm_stv, sel); + ALTIVEC_STORE_SUM_CLIP(&dst[3*stride], idct3, perm_ldv, perm_stv, sel); + ALTIVEC_STORE_SUM_CLIP(&dst[4*stride], idct4, perm_ldv, perm_stv, sel); + ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel); + ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel); + ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel); +} + +void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { + +#ifdef HAVE_ALTIVEC + if (has_altivec()) { + c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec; + c->put_no_rnd_h264_chroma_pixels_tab[0] = put_no_rnd_h264_chroma_mc8_altivec; + c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec; + c->h264_idct8_add = ff_h264_idct8_add_altivec; + +#define dspfunc(PFX, IDX, NUM) \ + c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \ + c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \ + c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \ + c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \ + c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \ + c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \ + c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \ + c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \ + c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \ + c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \ + c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \ + c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \ + c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \ + c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \ + c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \ + c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec + + dspfunc(put_h264_qpel, 0, 16); + dspfunc(avg_h264_qpel, 0, 16); +#undef dspfunc + + } else +#endif /* HAVE_ALTIVEC */ + { + // Non-AltiVec PPC optimisations + + // ... pending ... + } +} diff --git a/src/libffmpeg/libavcodec/ppc/h264_template_altivec.c b/src/libffmpeg/libavcodec/ppc/h264_template_altivec.c new file mode 100644 index 000000000..e8ad67f2f --- /dev/null +++ b/src/libffmpeg/libavcodec/ppc/h264_template_altivec.c @@ -0,0 +1,719 @@ +/* + * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* this code assume that stride % 16 == 0 */ +void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { + POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1); + signed int ABCD[4] __attribute__((aligned(16))) = + {((8 - x) * (8 - y)), + ((x) * (8 - y)), + ((8 - x) * (y)), + ((x) * (y))}; + register int i; + vector unsigned char fperm; + const vector signed int vABCD = vec_ld(0, ABCD); + const vector signed short vA = vec_splat((vector signed short)vABCD, 1); + const vector signed short vB = vec_splat((vector signed short)vABCD, 3); + const vector signed short vC = vec_splat((vector signed short)vABCD, 5); + const vector signed short vD = vec_splat((vector signed short)vABCD, 7); + const vector signed int vzero = vec_splat_s32(0); + const vector signed short v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5)); + const vector unsigned short v6us = vec_splat_u16(6); + register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; + register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; + + vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1; + vector unsigned char vsrc0uc, vsrc1uc; + vector signed short vsrc0ssH, vsrc1ssH; + vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc; + vector signed short vsrc2ssH, vsrc3ssH, psum; + vector unsigned char vdst, ppsum, vfdst, fsum; + + POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1); + + if (((unsigned long)dst) % 16 == 0) { + fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, + 0x08, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F); + } else { + fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, + 0x18, 0x19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F); + } + + vsrcAuc = vec_ld(0, src); + + if (loadSecond) + vsrcBuc = vec_ld(16, src); + vsrcperm0 = vec_lvsl(0, src); + vsrcperm1 = vec_lvsl(1, src); + + vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0); + if (reallyBadAlign) + vsrc1uc = vsrcBuc; + else + vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1); + + vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc0uc); + vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc1uc); + + if (!loadSecond) {// -> !reallyBadAlign + for (i = 0 ; i < h ; i++) { + + + vsrcCuc = vec_ld(stride + 0, src); + + vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); + vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); + + vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc2uc); + vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc3uc); + + psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0)); + psum = vec_mladd(vB, vsrc1ssH, psum); + psum = vec_mladd(vC, vsrc2ssH, psum); + psum = vec_mladd(vD, vsrc3ssH, psum); + psum = vec_add(v32ss, psum); + psum = vec_sra(psum, v6us); + + vdst = vec_ld(0, dst); + ppsum = (vector unsigned char)vec_packsu(psum, psum); + vfdst = vec_perm(vdst, ppsum, fperm); + + OP_U8_ALTIVEC(fsum, vfdst, vdst); + + vec_st(fsum, 0, dst); + + vsrc0ssH = vsrc2ssH; + vsrc1ssH = vsrc3ssH; + + dst += stride; + src += stride; + } + } else { + vector unsigned char vsrcDuc; + for (i = 0 ; i < h ; i++) { + vsrcCuc = vec_ld(stride + 0, src); + vsrcDuc = vec_ld(stride + 16, src); + + vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); + if (reallyBadAlign) + vsrc3uc = vsrcDuc; + else + vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); + + vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc2uc); + vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc3uc); + + psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0)); + psum = vec_mladd(vB, vsrc1ssH, psum); + psum = vec_mladd(vC, vsrc2ssH, psum); + psum = vec_mladd(vD, vsrc3ssH, psum); + psum = vec_add(v32ss, psum); + psum = vec_sr(psum, v6us); + + vdst = vec_ld(0, dst); + ppsum = (vector unsigned char)vec_pack(psum, psum); + vfdst = vec_perm(vdst, ppsum, fperm); + + OP_U8_ALTIVEC(fsum, vfdst, vdst); + + vec_st(fsum, 0, dst); + + vsrc0ssH = vsrc2ssH; + vsrc1ssH = vsrc3ssH; + + dst += stride; + src += stride; + } + } + POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1); +} + +/* this code assume stride % 16 == 0 */ +static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { + POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1); + register int i; + + const vector signed int vzero = vec_splat_s32(0); + const vector unsigned char permM2 = vec_lvsl(-2, src); + const vector unsigned char permM1 = vec_lvsl(-1, src); + const vector unsigned char permP0 = vec_lvsl(+0, src); + const vector unsigned char permP1 = vec_lvsl(+1, src); + const vector unsigned char permP2 = vec_lvsl(+2, src); + const vector unsigned char permP3 = vec_lvsl(+3, src); + const vector signed short v5ss = vec_splat_s16(5); + const vector unsigned short v5us = vec_splat_u16(5); + const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2)); + const vector signed short v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4)); + const vector unsigned char dstperm = vec_lvsr(0, dst); + const vector unsigned char neg1 = + (const vector unsigned char) vec_splat_s8(-1); + + const vector unsigned char dstmask = + vec_perm((const vector unsigned char)vzero, + neg1, dstperm); + + vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3; + + register int align = ((((unsigned long)src) - 2) % 16); + + vector signed short srcP0A, srcP0B, srcP1A, srcP1B, + srcP2A, srcP2B, srcP3A, srcP3B, + srcM1A, srcM1B, srcM2A, srcM2B, + sum1A, sum1B, sum2A, sum2B, sum3A, sum3B, + pp1A, pp1B, pp2A, pp2B, pp3A, pp3B, + psumA, psumB, sumA, sumB; + + vector unsigned char sum, dst1, dst2, vdst, fsum, + rsum, fdst1, fdst2; + + POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1); + + for (i = 0 ; i < 16 ; i ++) { + vector unsigned char srcR1 = vec_ld(-2, src); + vector unsigned char srcR2 = vec_ld(14, src); + + switch (align) { + default: { + srcM2 = vec_perm(srcR1, srcR2, permM2); + srcM1 = vec_perm(srcR1, srcR2, permM1); + srcP0 = vec_perm(srcR1, srcR2, permP0); + srcP1 = vec_perm(srcR1, srcR2, permP1); + srcP2 = vec_perm(srcR1, srcR2, permP2); + srcP3 = vec_perm(srcR1, srcR2, permP3); + } break; + case 11: { + srcM2 = vec_perm(srcR1, srcR2, permM2); + srcM1 = vec_perm(srcR1, srcR2, permM1); + srcP0 = vec_perm(srcR1, srcR2, permP0); + srcP1 = vec_perm(srcR1, srcR2, permP1); + srcP2 = vec_perm(srcR1, srcR2, permP2); + srcP3 = srcR2; + } break; + case 12: { + vector unsigned char srcR3 = vec_ld(30, src); + srcM2 = vec_perm(srcR1, srcR2, permM2); + srcM1 = vec_perm(srcR1, srcR2, permM1); + srcP0 = vec_perm(srcR1, srcR2, permP0); + srcP1 = vec_perm(srcR1, srcR2, permP1); + srcP2 = srcR2; + srcP3 = vec_perm(srcR2, srcR3, permP3); + } break; + case 13: { + vector unsigned char srcR3 = vec_ld(30, src); + srcM2 = vec_perm(srcR1, srcR2, permM2); + srcM1 = vec_perm(srcR1, srcR2, permM1); + srcP0 = vec_perm(srcR1, srcR2, permP0); + srcP1 = srcR2; + srcP2 = vec_perm(srcR2, srcR3, permP2); + srcP3 = vec_perm(srcR2, srcR3, permP3); + } break; + case 14: { + vector unsigned char srcR3 = vec_ld(30, src); + srcM2 = vec_perm(srcR1, srcR2, permM2); + srcM1 = vec_perm(srcR1, srcR2, permM1); + srcP0 = srcR2; + srcP1 = vec_perm(srcR2, srcR3, permP1); + srcP2 = vec_perm(srcR2, srcR3, permP2); + srcP3 = vec_perm(srcR2, srcR3, permP3); + } break; + case 15: { + vector unsigned char srcR3 = vec_ld(30, src); + srcM2 = vec_perm(srcR1, srcR2, permM2); + srcM1 = srcR2; + srcP0 = vec_perm(srcR2, srcR3, permP0); + srcP1 = vec_perm(srcR2, srcR3, permP1); + srcP2 = vec_perm(srcR2, srcR3, permP2); + srcP3 = vec_perm(srcR2, srcR3, permP3); + } break; + } + + srcP0A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP0); + srcP0B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP0); + srcP1A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP1); + srcP1B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP1); + + srcP2A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP2); + srcP2B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP2); + srcP3A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP3); + srcP3B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP3); + + srcM1A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcM1); + srcM1B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcM1); + srcM2A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcM2); + srcM2B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcM2); + + sum1A = vec_adds(srcP0A, srcP1A); + sum1B = vec_adds(srcP0B, srcP1B); + sum2A = vec_adds(srcM1A, srcP2A); + sum2B = vec_adds(srcM1B, srcP2B); + sum3A = vec_adds(srcM2A, srcP3A); + sum3B = vec_adds(srcM2B, srcP3B); + + pp1A = vec_mladd(sum1A, v20ss, v16ss); + pp1B = vec_mladd(sum1B, v20ss, v16ss); + + pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero); + pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero); + + pp3A = vec_add(sum3A, pp1A); + pp3B = vec_add(sum3B, pp1B); + + psumA = vec_sub(pp3A, pp2A); + psumB = vec_sub(pp3B, pp2B); + + sumA = vec_sra(psumA, v5us); + sumB = vec_sra(psumB, v5us); + + sum = vec_packsu(sumA, sumB); + + dst1 = vec_ld(0, dst); + dst2 = vec_ld(16, dst); + vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst)); + + OP_U8_ALTIVEC(fsum, sum, vdst); + + rsum = vec_perm(fsum, fsum, dstperm); + fdst1 = vec_sel(dst1, rsum, dstmask); + fdst2 = vec_sel(rsum, dst2, dstmask); + + vec_st(fdst1, 0, dst); + vec_st(fdst2, 16, dst); + + src += srcStride; + dst += dstStride; + } +POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1); +} + +/* this code assume stride % 16 == 0 */ +static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { + POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1); + + register int i; + + const vector signed int vzero = vec_splat_s32(0); + const vector unsigned char perm = vec_lvsl(0, src); + const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2)); + const vector unsigned short v5us = vec_splat_u16(5); + const vector signed short v5ss = vec_splat_s16(5); + const vector signed short v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4)); + const vector unsigned char dstperm = vec_lvsr(0, dst); + const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1); + const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm); + + uint8_t *srcbis = src - (srcStride * 2); + + const vector unsigned char srcM2a = vec_ld(0, srcbis); + const vector unsigned char srcM2b = vec_ld(16, srcbis); + const vector unsigned char srcM2 = vec_perm(srcM2a, srcM2b, perm); +// srcbis += srcStride; + const vector unsigned char srcM1a = vec_ld(0, srcbis += srcStride); + const vector unsigned char srcM1b = vec_ld(16, srcbis); + const vector unsigned char srcM1 = vec_perm(srcM1a, srcM1b, perm); +// srcbis += srcStride; + const vector unsigned char srcP0a = vec_ld(0, srcbis += srcStride); + const vector unsigned char srcP0b = vec_ld(16, srcbis); + const vector unsigned char srcP0 = vec_perm(srcP0a, srcP0b, perm); +// srcbis += srcStride; + const vector unsigned char srcP1a = vec_ld(0, srcbis += srcStride); + const vector unsigned char srcP1b = vec_ld(16, srcbis); + const vector unsigned char srcP1 = vec_perm(srcP1a, srcP1b, perm); +// srcbis += srcStride; + const vector unsigned char srcP2a = vec_ld(0, srcbis += srcStride); + const vector unsigned char srcP2b = vec_ld(16, srcbis); + const vector unsigned char srcP2 = vec_perm(srcP2a, srcP2b, perm); +// srcbis += srcStride; + + vector signed short srcM2ssA = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcM2); + vector signed short srcM2ssB = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcM2); + vector signed short srcM1ssA = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcM1); + vector signed short srcM1ssB = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcM1); + vector signed short srcP0ssA = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP0); + vector signed short srcP0ssB = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP0); + vector signed short srcP1ssA = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP1); + vector signed short srcP1ssB = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP1); + vector signed short srcP2ssA = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP2); + vector signed short srcP2ssB = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP2); + + vector signed short pp1A, pp1B, pp2A, pp2B, pp3A, pp3B, + psumA, psumB, sumA, sumB, + srcP3ssA, srcP3ssB, + sum1A, sum1B, sum2A, sum2B, sum3A, sum3B; + + vector unsigned char sum, dst1, dst2, vdst, fsum, rsum, fdst1, fdst2, + srcP3a, srcP3b, srcP3; + + POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1); + + for (i = 0 ; i < 16 ; i++) { + srcP3a = vec_ld(0, srcbis += srcStride); + srcP3b = vec_ld(16, srcbis); + srcP3 = vec_perm(srcP3a, srcP3b, perm); + srcP3ssA = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP3); + srcP3ssB = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP3); +// srcbis += srcStride; + + sum1A = vec_adds(srcP0ssA, srcP1ssA); + sum1B = vec_adds(srcP0ssB, srcP1ssB); + sum2A = vec_adds(srcM1ssA, srcP2ssA); + sum2B = vec_adds(srcM1ssB, srcP2ssB); + sum3A = vec_adds(srcM2ssA, srcP3ssA); + sum3B = vec_adds(srcM2ssB, srcP3ssB); + + srcM2ssA = srcM1ssA; + srcM2ssB = srcM1ssB; + srcM1ssA = srcP0ssA; + srcM1ssB = srcP0ssB; + srcP0ssA = srcP1ssA; + srcP0ssB = srcP1ssB; + srcP1ssA = srcP2ssA; + srcP1ssB = srcP2ssB; + srcP2ssA = srcP3ssA; + srcP2ssB = srcP3ssB; + + pp1A = vec_mladd(sum1A, v20ss, v16ss); + pp1B = vec_mladd(sum1B, v20ss, v16ss); + + pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero); + pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero); + + pp3A = vec_add(sum3A, pp1A); + pp3B = vec_add(sum3B, pp1B); + + psumA = vec_sub(pp3A, pp2A); + psumB = vec_sub(pp3B, pp2B); + + sumA = vec_sra(psumA, v5us); + sumB = vec_sra(psumB, v5us); + + sum = vec_packsu(sumA, sumB); + + dst1 = vec_ld(0, dst); + dst2 = vec_ld(16, dst); + vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst)); + + OP_U8_ALTIVEC(fsum, sum, vdst); + + rsum = vec_perm(fsum, fsum, dstperm); + fdst1 = vec_sel(dst1, rsum, dstmask); + fdst2 = vec_sel(rsum, dst2, dstmask); + + vec_st(fdst1, 0, dst); + vec_st(fdst2, 16, dst); + + dst += dstStride; + } + POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1); +} + +/* this code assume stride % 16 == 0 *and* tmp is properly aligned */ +static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) { + POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1); + register int i; + const vector signed int vzero = vec_splat_s32(0); + const vector unsigned char permM2 = vec_lvsl(-2, src); + const vector unsigned char permM1 = vec_lvsl(-1, src); + const vector unsigned char permP0 = vec_lvsl(+0, src); + const vector unsigned char permP1 = vec_lvsl(+1, src); + const vector unsigned char permP2 = vec_lvsl(+2, src); + const vector unsigned char permP3 = vec_lvsl(+3, src); + const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2)); + const vector unsigned int v10ui = vec_splat_u32(10); + const vector signed short v5ss = vec_splat_s16(5); + const vector signed short v1ss = vec_splat_s16(1); + const vector signed int v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9)); + const vector unsigned int v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4)); + + register int align = ((((unsigned long)src) - 2) % 16); + + const vector unsigned char neg1 = (const vector unsigned char) + vec_splat_s8(-1); + + vector signed short srcP0A, srcP0B, srcP1A, srcP1B, + srcP2A, srcP2B, srcP3A, srcP3B, + srcM1A, srcM1B, srcM2A, srcM2B, + sum1A, sum1B, sum2A, sum2B, sum3A, sum3B, + pp1A, pp1B, pp2A, pp2B, psumA, psumB; + + const vector unsigned char dstperm = vec_lvsr(0, dst); + + const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm); + + const vector unsigned char mperm = (const vector unsigned char) + AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B, + 0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F); + int16_t *tmpbis = tmp; + + vector signed short tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB, + tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB, + tmpP2ssA, tmpP2ssB; + + vector signed int pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo, + pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo, + pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo, + ssumAe, ssumAo, ssumBe, ssumBo; + vector unsigned char fsum, sumv, sum, dst1, dst2, vdst, + rsum, fdst1, fdst2; + vector signed short ssume, ssumo; + + POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1); + src -= (2 * srcStride); + for (i = 0 ; i < 21 ; i ++) { + vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3; + vector unsigned char srcR1 = vec_ld(-2, src); + vector unsigned char srcR2 = vec_ld(14, src); + + switch (align) { + default: { + srcM2 = vec_perm(srcR1, srcR2, permM2); + srcM1 = vec_perm(srcR1, srcR2, permM1); + srcP0 = vec_perm(srcR1, srcR2, permP0); + srcP1 = vec_perm(srcR1, srcR2, permP1); + srcP2 = vec_perm(srcR1, srcR2, permP2); + srcP3 = vec_perm(srcR1, srcR2, permP3); + } break; + case 11: { + srcM2 = vec_perm(srcR1, srcR2, permM2); + srcM1 = vec_perm(srcR1, srcR2, permM1); + srcP0 = vec_perm(srcR1, srcR2, permP0); + srcP1 = vec_perm(srcR1, srcR2, permP1); + srcP2 = vec_perm(srcR1, srcR2, permP2); + srcP3 = srcR2; + } break; + case 12: { + vector unsigned char srcR3 = vec_ld(30, src); + srcM2 = vec_perm(srcR1, srcR2, permM2); + srcM1 = vec_perm(srcR1, srcR2, permM1); + srcP0 = vec_perm(srcR1, srcR2, permP0); + srcP1 = vec_perm(srcR1, srcR2, permP1); + srcP2 = srcR2; + srcP3 = vec_perm(srcR2, srcR3, permP3); + } break; + case 13: { + vector unsigned char srcR3 = vec_ld(30, src); + srcM2 = vec_perm(srcR1, srcR2, permM2); + srcM1 = vec_perm(srcR1, srcR2, permM1); + srcP0 = vec_perm(srcR1, srcR2, permP0); + srcP1 = srcR2; + srcP2 = vec_perm(srcR2, srcR3, permP2); + srcP3 = vec_perm(srcR2, srcR3, permP3); + } break; + case 14: { + vector unsigned char srcR3 = vec_ld(30, src); + srcM2 = vec_perm(srcR1, srcR2, permM2); + srcM1 = vec_perm(srcR1, srcR2, permM1); + srcP0 = srcR2; + srcP1 = vec_perm(srcR2, srcR3, permP1); + srcP2 = vec_perm(srcR2, srcR3, permP2); + srcP3 = vec_perm(srcR2, srcR3, permP3); + } break; + case 15: { + vector unsigned char srcR3 = vec_ld(30, src); + srcM2 = vec_perm(srcR1, srcR2, permM2); + srcM1 = srcR2; + srcP0 = vec_perm(srcR2, srcR3, permP0); + srcP1 = vec_perm(srcR2, srcR3, permP1); + srcP2 = vec_perm(srcR2, srcR3, permP2); + srcP3 = vec_perm(srcR2, srcR3, permP3); + } break; + } + + srcP0A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP0); + srcP0B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP0); + srcP1A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP1); + srcP1B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP1); + + srcP2A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP2); + srcP2B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP2); + srcP3A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP3); + srcP3B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP3); + + srcM1A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcM1); + srcM1B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcM1); + srcM2A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcM2); + srcM2B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcM2); + + sum1A = vec_adds(srcP0A, srcP1A); + sum1B = vec_adds(srcP0B, srcP1B); + sum2A = vec_adds(srcM1A, srcP2A); + sum2B = vec_adds(srcM1B, srcP2B); + sum3A = vec_adds(srcM2A, srcP3A); + sum3B = vec_adds(srcM2B, srcP3B); + + pp1A = vec_mladd(sum1A, v20ss, sum3A); + pp1B = vec_mladd(sum1B, v20ss, sum3B); + + pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero); + pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero); + + psumA = vec_sub(pp1A, pp2A); + psumB = vec_sub(pp1B, pp2B); + + vec_st(psumA, 0, tmp); + vec_st(psumB, 16, tmp); + + src += srcStride; + tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */ + } + + tmpM2ssA = vec_ld(0, tmpbis); + tmpM2ssB = vec_ld(16, tmpbis); + tmpbis += tmpStride; + tmpM1ssA = vec_ld(0, tmpbis); + tmpM1ssB = vec_ld(16, tmpbis); + tmpbis += tmpStride; + tmpP0ssA = vec_ld(0, tmpbis); + tmpP0ssB = vec_ld(16, tmpbis); + tmpbis += tmpStride; + tmpP1ssA = vec_ld(0, tmpbis); + tmpP1ssB = vec_ld(16, tmpbis); + tmpbis += tmpStride; + tmpP2ssA = vec_ld(0, tmpbis); + tmpP2ssB = vec_ld(16, tmpbis); + tmpbis += tmpStride; + + for (i = 0 ; i < 16 ; i++) { + const vector signed short tmpP3ssA = vec_ld(0, tmpbis); + const vector signed short tmpP3ssB = vec_ld(16, tmpbis); + + const vector signed short sum1A = vec_adds(tmpP0ssA, tmpP1ssA); + const vector signed short sum1B = vec_adds(tmpP0ssB, tmpP1ssB); + const vector signed short sum2A = vec_adds(tmpM1ssA, tmpP2ssA); + const vector signed short sum2B = vec_adds(tmpM1ssB, tmpP2ssB); + const vector signed short sum3A = vec_adds(tmpM2ssA, tmpP3ssA); + const vector signed short sum3B = vec_adds(tmpM2ssB, tmpP3ssB); + + tmpbis += tmpStride; + + tmpM2ssA = tmpM1ssA; + tmpM2ssB = tmpM1ssB; + tmpM1ssA = tmpP0ssA; + tmpM1ssB = tmpP0ssB; + tmpP0ssA = tmpP1ssA; + tmpP0ssB = tmpP1ssB; + tmpP1ssA = tmpP2ssA; + tmpP1ssB = tmpP2ssB; + tmpP2ssA = tmpP3ssA; + tmpP2ssB = tmpP3ssB; + + pp1Ae = vec_mule(sum1A, v20ss); + pp1Ao = vec_mulo(sum1A, v20ss); + pp1Be = vec_mule(sum1B, v20ss); + pp1Bo = vec_mulo(sum1B, v20ss); + + pp2Ae = vec_mule(sum2A, v5ss); + pp2Ao = vec_mulo(sum2A, v5ss); + pp2Be = vec_mule(sum2B, v5ss); + pp2Bo = vec_mulo(sum2B, v5ss); + + pp3Ae = vec_sra((vector signed int)sum3A, v16ui); + pp3Ao = vec_mulo(sum3A, v1ss); + pp3Be = vec_sra((vector signed int)sum3B, v16ui); + pp3Bo = vec_mulo(sum3B, v1ss); + + pp1cAe = vec_add(pp1Ae, v512si); + pp1cAo = vec_add(pp1Ao, v512si); + pp1cBe = vec_add(pp1Be, v512si); + pp1cBo = vec_add(pp1Bo, v512si); + + pp32Ae = vec_sub(pp3Ae, pp2Ae); + pp32Ao = vec_sub(pp3Ao, pp2Ao); + pp32Be = vec_sub(pp3Be, pp2Be); + pp32Bo = vec_sub(pp3Bo, pp2Bo); + + sumAe = vec_add(pp1cAe, pp32Ae); + sumAo = vec_add(pp1cAo, pp32Ao); + sumBe = vec_add(pp1cBe, pp32Be); + sumBo = vec_add(pp1cBo, pp32Bo); + + ssumAe = vec_sra(sumAe, v10ui); + ssumAo = vec_sra(sumAo, v10ui); + ssumBe = vec_sra(sumBe, v10ui); + ssumBo = vec_sra(sumBo, v10ui); + + ssume = vec_packs(ssumAe, ssumBe); + ssumo = vec_packs(ssumAo, ssumBo); + + sumv = vec_packsu(ssume, ssumo); + sum = vec_perm(sumv, sumv, mperm); + + dst1 = vec_ld(0, dst); + dst2 = vec_ld(16, dst); + vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst)); + + OP_U8_ALTIVEC(fsum, sum, vdst); + + rsum = vec_perm(fsum, fsum, dstperm); + fdst1 = vec_sel(dst1, rsum, dstmask); + fdst2 = vec_sel(rsum, dst2, dstmask); + + vec_st(fdst1, 0, dst); + vec_st(fdst2, 16, dst); + + dst += dstStride; + } + POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1); +} diff --git a/src/libffmpeg/libavcodec/ppc/mathops.h b/src/libffmpeg/libavcodec/ppc/mathops.h new file mode 100644 index 000000000..6af23f246 --- /dev/null +++ b/src/libffmpeg/libavcodec/ppc/mathops.h @@ -0,0 +1,33 @@ +/* + * simple math operations + * Copyright (c) 2001, 2002 Fabrice Bellard. + * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#if defined(ARCH_POWERPC_405) +/* signed 16x16 -> 32 multiply add accumulate */ +# define MAC16(rt, ra, rb) \ + asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); + +/* signed 16x16 -> 32 multiply */ +# define MUL16(ra, rb) \ + ({ int __rt; + asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); + __rt; }) +#endif diff --git a/src/libffmpeg/libavcodec/ppc/snow_altivec.c b/src/libffmpeg/libavcodec/ppc/snow_altivec.c new file mode 100644 index 000000000..b15672ffe --- /dev/null +++ b/src/libffmpeg/libavcodec/ppc/snow_altivec.c @@ -0,0 +1,788 @@ +/* + * Altivec optimized snow DSP utils + * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * + */ + +#include "../dsputil.h" + +#include "gcc_fixes.h" +#include "dsputil_altivec.h" +#include "../snow.h" + +#undef NDEBUG +#include <assert.h> + + + +//FIXME remove this replication +#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) + +static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) +{ + int offset; + DWTELEM * buffer; + +// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line); + + assert(buf->data_stack_top >= 0); +// assert(!buf->line[line]); + if (buf->line[line]) + return buf->line[line]; + + offset = buf->line_width * line; + buffer = buf->data_stack[buf->data_stack_top]; + buf->data_stack_top--; + buf->line[line] = buffer; + +// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1); + + return buffer; +} + + +//altivec code + +void ff_snow_horizontal_compose97i_altivec(DWTELEM *b, int width) +{ + const int w2= (width+1)>>1; + DECLARE_ALIGNED_16(DWTELEM, temp[(width>>1)]); + const int w_l= (width>>1); + const int w_r= w2 - 1; + int i; + vector signed int t1, t2, x, y, tmp1, tmp2; + vector signed int *vbuf, *vtmp; + vector unsigned char align; + + + + { // Lift 0 + DWTELEM * const ref = b + w2 - 1; + DWTELEM b_0 = b[0]; + vbuf = (vector signed int *)b; + + tmp1 = vec_ld (0, ref); + align = vec_lvsl (0, ref); + tmp2 = vec_ld (15, ref); + t1= vec_perm(tmp1, tmp2, align); + + i = 0; + + for (i=0; i<w_l-15; i+=16) { +#if 0 + b[i+0] = b[i+0] - ((3 * (ref[i+0] + ref[i+1]) + 4) >> 3); + b[i+1] = b[i+1] - ((3 * (ref[i+1] + ref[i+2]) + 4) >> 3); + b[i+2] = b[i+2] - ((3 * (ref[i+2] + ref[i+3]) + 4) >> 3); + b[i+3] = b[i+3] - ((3 * (ref[i+3] + ref[i+4]) + 4) >> 3); +#else + + tmp1 = vec_ld (0, ref+4+i); + tmp2 = vec_ld (15, ref+4+i); + + t2 = vec_perm(tmp1, tmp2, align); + + y = vec_add(t1,vec_sld(t1,t2,4)); + y = vec_add(vec_add(y,y),y); + + tmp1 = vec_ld (0, ref+8+i); + + y = vec_add(y, vec_splat_s32(4)); + y = vec_sra(y, vec_splat_u32(3)); + + tmp2 = vec_ld (15, ref+8+i); + + *vbuf = vec_sub(*vbuf, y); + + t1=t2; + + vbuf++; + + t2 = vec_perm(tmp1, tmp2, align); + + y = vec_add(t1,vec_sld(t1,t2,4)); + y = vec_add(vec_add(y,y),y); + + tmp1 = vec_ld (0, ref+12+i); + + y = vec_add(y, vec_splat_s32(4)); + y = vec_sra(y, vec_splat_u32(3)); + + tmp2 = vec_ld (15, ref+12+i); + + *vbuf = vec_sub(*vbuf, y); + + t1=t2; + + vbuf++; + + t2 = vec_perm(tmp1, tmp2, align); + + y = vec_add(t1,vec_sld(t1,t2,4)); + y = vec_add(vec_add(y,y),y); + + tmp1 = vec_ld (0, ref+16+i); + + y = vec_add(y, vec_splat_s32(4)); + y = vec_sra(y, vec_splat_u32(3)); + + tmp2 = vec_ld (15, ref+16+i); + + *vbuf = vec_sub(*vbuf, y); + + t1=t2; + + t2 = vec_perm(tmp1, tmp2, align); + + y = vec_add(t1,vec_sld(t1,t2,4)); + y = vec_add(vec_add(y,y),y); + + vbuf++; + + y = vec_add(y, vec_splat_s32(4)); + y = vec_sra(y, vec_splat_u32(3)); + *vbuf = vec_sub(*vbuf, y); + + t1=t2; + + vbuf++; + +#endif + } + + snow_horizontal_compose_lift_lead_out(i, b, b, ref, width, w_l, 0, W_DM, W_DO, W_DS); + b[0] = b_0 - ((W_DM * 2 * ref[1]+W_DO)>>W_DS); + } + + { // Lift 1 + DWTELEM * const dst = b+w2; + + i = 0; + for(; (((long)&dst[i]) & 0xF) && i<w_r; i++){ + dst[i] = dst[i] - (b[i] + b[i + 1]); + } + + align = vec_lvsl(0, b+i); + tmp1 = vec_ld(0, b+i); + vbuf = (vector signed int*) (dst + i); + tmp2 = vec_ld(15, b+i); + + t1 = vec_perm(tmp1, tmp2, align); + + for (; i<w_r-3; i+=4) { + +#if 0 + dst[i] = dst[i] - (b[i] + b[i + 1]); + dst[i+1] = dst[i+1] - (b[i+1] + b[i + 2]); + dst[i+2] = dst[i+2] - (b[i+2] + b[i + 3]); + dst[i+3] = dst[i+3] - (b[i+3] + b[i + 4]); +#else + + tmp1 = vec_ld(0, b+4+i); + tmp2 = vec_ld(15, b+4+i); + + t2 = vec_perm(tmp1, tmp2, align); + + y = vec_add(t1, vec_sld(t1,t2,4)); + *vbuf = vec_sub (*vbuf, y); + + vbuf++; + + t1 = t2; + +#endif + + } + + snow_horizontal_compose_lift_lead_out(i, dst, dst, b, width, w_r, 1, W_CM, W_CO, W_CS); + } + + { // Lift 2 + DWTELEM * const ref = b+w2 - 1; + DWTELEM b_0 = b[0]; + vbuf= (vector signed int *) b; + + tmp1 = vec_ld (0, ref); + align = vec_lvsl (0, ref); + tmp2 = vec_ld (15, ref); + t1= vec_perm(tmp1, tmp2, align); + + i = 0; + for (; i<w_l-15; i+=16) { +#if 0 + b[i] = b[i] - (((8 -(ref[i] + ref[i+1])) - (b[i] <<2)) >> 4); + b[i+1] = b[i+1] - (((8 -(ref[i+1] + ref[i+2])) - (b[i+1]<<2)) >> 4); + b[i+2] = b[i+2] - (((8 -(ref[i+2] + ref[i+3])) - (b[i+2]<<2)) >> 4); + b[i+3] = b[i+3] - (((8 -(ref[i+3] + ref[i+4])) - (b[i+3]<<2)) >> 4); +#else + tmp1 = vec_ld (0, ref+4+i); + tmp2 = vec_ld (15, ref+4+i); + + t2 = vec_perm(tmp1, tmp2, align); + + y = vec_add(t1,vec_sld(t1,t2,4)); + y = vec_sub(vec_splat_s32(8),y); + + tmp1 = vec_ld (0, ref+8+i); + + x = vec_sl(*vbuf,vec_splat_u32(2)); + y = vec_sra(vec_sub(y,x),vec_splat_u32(4)); + + tmp2 = vec_ld (15, ref+8+i); + + *vbuf = vec_sub( *vbuf, y); + + t1 = t2; + + vbuf++; + + t2 = vec_perm(tmp1, tmp2, align); + + y = vec_add(t1,vec_sld(t1,t2,4)); + y = vec_sub(vec_splat_s32(8),y); + + tmp1 = vec_ld (0, ref+12+i); + + x = vec_sl(*vbuf,vec_splat_u32(2)); + y = vec_sra(vec_sub(y,x),vec_splat_u32(4)); + + tmp2 = vec_ld (15, ref+12+i); + + *vbuf = vec_sub( *vbuf, y); + + t1 = t2; + + vbuf++; + + t2 = vec_perm(tmp1, tmp2, align); + + y = vec_add(t1,vec_sld(t1,t2,4)); + y = vec_sub(vec_splat_s32(8),y); + + tmp1 = vec_ld (0, ref+16+i); + + x = vec_sl(*vbuf,vec_splat_u32(2)); + y = vec_sra(vec_sub(y,x),vec_splat_u32(4)); + + tmp2 = vec_ld (15, ref+16+i); + + *vbuf = vec_sub( *vbuf, y); + + t1 = t2; + + vbuf++; + + t2 = vec_perm(tmp1, tmp2, align); + + y = vec_add(t1,vec_sld(t1,t2,4)); + y = vec_sub(vec_splat_s32(8),y); + + t1 = t2; + + x = vec_sl(*vbuf,vec_splat_u32(2)); + y = vec_sra(vec_sub(y,x),vec_splat_u32(4)); + *vbuf = vec_sub( *vbuf, y); + + vbuf++; + +#endif + } + + snow_horizontal_compose_liftS_lead_out(i, b, b, ref, width, w_l); + b[0] = b_0 - (((-2 * ref[1] + W_BO) - 4 * b_0) >> W_BS); + } + + { // Lift 3 + DWTELEM * const src = b+w2; + + vbuf = (vector signed int *)b; + vtmp = (vector signed int *)temp; + + i = 0; + align = vec_lvsl(0, src); + + for (; i<w_r-3; i+=4) { +#if 0 + temp[i] = src[i] - ((-3*(b[i] + b[i+1]))>>1); + temp[i+1] = src[i+1] - ((-3*(b[i+1] + b[i+2]))>>1); + temp[i+2] = src[i+2] - ((-3*(b[i+2] + b[i+3]))>>1); + temp[i+3] = src[i+3] - ((-3*(b[i+3] + b[i+4]))>>1); +#else + tmp1 = vec_ld(0,src+i); + t1 = vec_add(vbuf[0],vec_sld(vbuf[0],vbuf[1],4)); + tmp2 = vec_ld(15,src+i); + t1 = vec_sub(vec_splat_s32(0),t1); //bad! + t1 = vec_add(t1,vec_add(t1,t1)); + t2 = vec_perm(tmp1 ,tmp2 ,align); + t1 = vec_sra(t1,vec_splat_u32(1)); + vbuf++; + *vtmp = vec_sub(t2,t1); + vtmp++; + +#endif + + } + + snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -3, 0, 1); + } + + { + //Interleave + int a; + vector signed int *t = (vector signed int *)temp, + *v = (vector signed int *)b; + + snow_interleave_line_header(&i, width, b, temp); + + for (; (i & 0xE) != 0xE; i-=2){ + b[i+1] = temp[i>>1]; + b[i] = b[i>>1]; + } + for (i-=14; i>=0; i-=16){ + a=i/4; + + v[a+3]=vec_mergel(v[(a>>1)+1],t[(a>>1)+1]); + v[a+2]=vec_mergeh(v[(a>>1)+1],t[(a>>1)+1]); + v[a+1]=vec_mergel(v[a>>1],t[a>>1]); + v[a]=vec_mergeh(v[a>>1],t[a>>1]); + + } + + } +} + +void ff_snow_vertical_compose97i_altivec(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width) +{ + int i, w4 = width/4; + vector signed int *v0, *v1,*v2,*v3,*v4,*v5; + vector signed int t1, t2; + + v0=(vector signed int *)b0; + v1=(vector signed int *)b1; + v2=(vector signed int *)b2; + v3=(vector signed int *)b3; + v4=(vector signed int *)b4; + v5=(vector signed int *)b5; + + for (i=0; i< w4;i++) + { + + #if 0 + b4[i] -= (3*(b3[i] + b5[i])+4)>>3; + b3[i] -= ((b2[i] + b4[i])); + b2[i] += ((b1[i] + b3[i])+4*b2[i]+8)>>4; + b1[i] += (3*(b0[i] + b2[i]))>>1; + #else + t1 = vec_add(v3[i], v5[i]); + t2 = vec_add(t1, vec_add(t1,t1)); + t1 = vec_add(t2, vec_splat_s32(4)); + v4[i] = vec_sub(v4[i], vec_sra(t1,vec_splat_u32(3))); + + v3[i] = vec_sub(v3[i], vec_add(v2[i], v4[i])); + + t1 = vec_add(vec_splat_s32(8), vec_add(v1[i], v3[i])); + t2 = vec_sl(v2[i], vec_splat_u32(2)); + v2[i] = vec_add(v2[i], vec_sra(vec_add(t1,t2),vec_splat_u32(4))); + t1 = vec_add(v0[i], v2[i]); + t2 = vec_add(t1, vec_add(t1,t1)); + v1[i] = vec_add(v1[i], vec_sra(t2,vec_splat_u32(1))); + + #endif + } + + for(i*=4; i < width; i++) + { + b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; + b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; + b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; + b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; + } +} + +#define LOAD_BLOCKS \ + tmp1 = vec_ld(0, &block[3][y*src_stride]);\ + align = vec_lvsl(0, &block[3][y*src_stride]);\ + tmp2 = vec_ld(15, &block[3][y*src_stride]);\ +\ + b3 = vec_perm(tmp1,tmp2,align);\ +\ + tmp1 = vec_ld(0, &block[2][y*src_stride]);\ + align = vec_lvsl(0, &block[2][y*src_stride]);\ + tmp2 = vec_ld(15, &block[2][y*src_stride]);\ +\ + b2 = vec_perm(tmp1,tmp2,align);\ +\ + tmp1 = vec_ld(0, &block[1][y*src_stride]);\ + align = vec_lvsl(0, &block[1][y*src_stride]);\ + tmp2 = vec_ld(15, &block[1][y*src_stride]);\ +\ + b1 = vec_perm(tmp1,tmp2,align);\ +\ + tmp1 = vec_ld(0, &block[0][y*src_stride]);\ + align = vec_lvsl(0, &block[0][y*src_stride]);\ + tmp2 = vec_ld(15, &block[0][y*src_stride]);\ +\ + b0 = vec_perm(tmp1,tmp2,align); + +#define LOAD_OBMCS \ + tmp1 = vec_ld(0, obmc1);\ + align = vec_lvsl(0, obmc1);\ + tmp2 = vec_ld(15, obmc1);\ +\ + ob1 = vec_perm(tmp1,tmp2,align);\ +\ + tmp1 = vec_ld(0, obmc2);\ + align = vec_lvsl(0, obmc2);\ + tmp2 = vec_ld(15, obmc2);\ +\ + ob2 = vec_perm(tmp1,tmp2,align);\ +\ + tmp1 = vec_ld(0, obmc3);\ + align = vec_lvsl(0, obmc3);\ + tmp2 = vec_ld(15, obmc3);\ +\ + ob3 = vec_perm(tmp1,tmp2,align);\ +\ + tmp1 = vec_ld(0, obmc4);\ + align = vec_lvsl(0, obmc4);\ + tmp2 = vec_ld(15, obmc4);\ +\ + ob4 = vec_perm(tmp1,tmp2,align); + +/* interleave logic + * h1 <- [ a,b,a,b, a,b,a,b, a,b,a,b, a,b,a,b ] + * h2 <- [ c,d,c,d, c,d,c,d, c,d,c,d, c,d,c,d ] + * h <- [ a,b,c,d, a,b,c,d, a,b,c,d, a,b,c,d ] + */ + +#define STEPS_0_1\ + h1 = (vector unsigned short)\ + vec_mergeh(ob1, ob2);\ +\ + h2 = (vector unsigned short)\ + vec_mergeh(ob3, ob4);\ +\ + ih = (vector unsigned char)\ + vec_mergeh(h1,h2);\ +\ + l1 = (vector unsigned short) vec_mergeh(b3, b2);\ +\ + ih1 = (vector unsigned char) vec_mergel(h1, h2);\ +\ + l2 = (vector unsigned short) vec_mergeh(b1, b0);\ +\ + il = (vector unsigned char) vec_mergeh(l1, l2);\ +\ + v[0] = (vector signed int) vec_msum(ih, il, vec_splat_u32(0));\ +\ + il1 = (vector unsigned char) vec_mergel(l1, l2);\ +\ + v[1] = (vector signed int) vec_msum(ih1, il1, vec_splat_u32(0)); + +#define FINAL_STEP_SCALAR\ + for(x=0; x<b_w; x++)\ + if(add){\ + vbuf[x] += dst[x + src_x];\ + vbuf[x] = (vbuf[x] + (1<<(FRAC_BITS-1))) >> FRAC_BITS;\ + if(vbuf[x]&(~255)) vbuf[x]= ~(vbuf[x]>>31);\ + dst8[x + y*src_stride] = vbuf[x];\ + }else{\ + dst[x + src_x] -= vbuf[x];\ + } + +static void inner_add_yblock_bw_8_obmc_16_altivec(uint8_t *obmc, + const int obmc_stride, + uint8_t * * block, int b_w, + int b_h, int src_x, int src_y, + int src_stride, slice_buffer * sb, + int add, uint8_t * dst8) +{ + int y, x; + DWTELEM * dst; + vector unsigned short h1, h2, l1, l2; + vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align; + vector unsigned char b0,b1,b2,b3; + vector unsigned char ob1,ob2,ob3,ob4; + + DECLARE_ALIGNED_16(int, vbuf[16]); + vector signed int *v = (vector signed int *)vbuf, *d; + + for(y=0; y<b_h; y++){ + //FIXME ugly missue of obmc_stride + + uint8_t *obmc1= obmc + y*obmc_stride; + uint8_t *obmc2= obmc1+ (obmc_stride>>1); + uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); + uint8_t *obmc4= obmc3+ (obmc_stride>>1); + + dst = slice_buffer_get_line(sb, src_y + y); + d = (vector signed int *)(dst + src_x); + +//FIXME i could avoid some loads! + + // load blocks + LOAD_BLOCKS + + // load obmcs + LOAD_OBMCS + + // steps 0 1 + STEPS_0_1 + + FINAL_STEP_SCALAR + + } + +} + +#define STEPS_2_3\ + h1 = (vector unsigned short) vec_mergel(ob1, ob2);\ +\ + h2 = (vector unsigned short) vec_mergel(ob3, ob4);\ +\ + ih = (vector unsigned char) vec_mergeh(h1,h2);\ +\ + l1 = (vector unsigned short) vec_mergel(b3, b2);\ +\ + l2 = (vector unsigned short) vec_mergel(b1, b0);\ +\ + ih1 = (vector unsigned char) vec_mergel(h1,h2);\ +\ + il = (vector unsigned char) vec_mergeh(l1,l2);\ +\ + v[2] = (vector signed int) vec_msum(ih, il, vec_splat_u32(0));\ +\ + il1 = (vector unsigned char) vec_mergel(l1,l2);\ +\ + v[3] = (vector signed int) vec_msum(ih1, il1, vec_splat_u32(0)); + + +static void inner_add_yblock_bw_16_obmc_32_altivec(uint8_t *obmc, + const int obmc_stride, + uint8_t * * block, int b_w, + int b_h, int src_x, int src_y, + int src_stride, slice_buffer * sb, + int add, uint8_t * dst8) +{ + int y, x; + DWTELEM * dst; + vector unsigned short h1, h2, l1, l2; + vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align; + vector unsigned char b0,b1,b2,b3; + vector unsigned char ob1,ob2,ob3,ob4; + DECLARE_ALIGNED_16(int, vbuf[b_w]); + vector signed int *v = (vector signed int *)vbuf, *d; + + for(y=0; y<b_h; y++){ + //FIXME ugly missue of obmc_stride + + uint8_t *obmc1= obmc + y*obmc_stride; + uint8_t *obmc2= obmc1+ (obmc_stride>>1); + uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); + uint8_t *obmc4= obmc3+ (obmc_stride>>1); + + dst = slice_buffer_get_line(sb, src_y + y); + d = (vector signed int *)(dst + src_x); + + // load blocks + LOAD_BLOCKS + + // load obmcs + LOAD_OBMCS + + // steps 0 1 2 3 + STEPS_0_1 + + STEPS_2_3 + + FINAL_STEP_SCALAR + + } +} + +#define FINAL_STEP_VEC \ +\ + if(add)\ + {\ + for(x=0; x<b_w/4; x++)\ + {\ + v[x] = vec_add(v[x], d[x]);\ + v[x] = vec_sra(vec_add(v[x],\ + vec_sl( vec_splat_s32(1),\ + vec_splat_u32(7))),\ + vec_splat_u32(8));\ +\ + mask = (vector bool int) vec_sl((vector signed int)\ + vec_cmpeq(v[x],v[x]),vec_splat_u32(8));\ + mask = (vector bool int) vec_and(v[x],vec_nor(mask,mask));\ +\ + mask = (vector bool int)\ + vec_cmpeq((vector signed int)mask,\ + (vector signed int)vec_splat_u32(0));\ +\ + vs = vec_sra(v[x],vec_splat_u32(8));\ + vs = vec_sra(v[x],vec_splat_u32(8));\ + vs = vec_sra(v[x],vec_splat_u32(15));\ +\ + vs = vec_nor(vs,vs);\ +\ + v[x]= vec_sel(v[x],vs,mask);\ + }\ +\ + for(x=0; x<b_w; x++)\ + dst8[x + y*src_stride] = vbuf[x];\ +\ + }\ + else\ + for(x=0; x<b_w/4; x++)\ + d[x] = vec_sub(d[x], v[x]); + +static void inner_add_yblock_a_bw_8_obmc_16_altivec(uint8_t *obmc, + const int obmc_stride, + uint8_t * * block, int b_w, + int b_h, int src_x, int src_y, + int src_stride, slice_buffer * sb, + int add, uint8_t * dst8) +{ + int y, x; + DWTELEM * dst; + vector bool int mask; + vector signed int vs; + vector unsigned short h1, h2, l1, l2; + vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align; + vector unsigned char b0,b1,b2,b3; + vector unsigned char ob1,ob2,ob3,ob4; + + DECLARE_ALIGNED_16(int, vbuf[16]); + vector signed int *v = (vector signed int *)vbuf, *d; + + for(y=0; y<b_h; y++){ + //FIXME ugly missue of obmc_stride + + uint8_t *obmc1= obmc + y*obmc_stride; + uint8_t *obmc2= obmc1+ (obmc_stride>>1); + uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); + uint8_t *obmc4= obmc3+ (obmc_stride>>1); + + dst = slice_buffer_get_line(sb, src_y + y); + d = (vector signed int *)(dst + src_x); + +//FIXME i could avoid some loads! + + // load blocks + LOAD_BLOCKS + + // load obmcs + LOAD_OBMCS + + // steps 0 1 + STEPS_0_1 + + FINAL_STEP_VEC + + } + +} + +static void inner_add_yblock_a_bw_16_obmc_32_altivec(uint8_t *obmc, + const int obmc_stride, + uint8_t * * block, int b_w, + int b_h, int src_x, int src_y, + int src_stride, slice_buffer * sb, + int add, uint8_t * dst8) +{ + int y, x; + DWTELEM * dst; + vector bool int mask; + vector signed int vs; + vector unsigned short h1, h2, l1, l2; + vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align; + vector unsigned char b0,b1,b2,b3; + vector unsigned char ob1,ob2,ob3,ob4; + DECLARE_ALIGNED_16(int, vbuf[b_w]); + vector signed int *v = (vector signed int *)vbuf, *d; + + for(y=0; y<b_h; y++){ + //FIXME ugly missue of obmc_stride + + uint8_t *obmc1= obmc + y*obmc_stride; + uint8_t *obmc2= obmc1+ (obmc_stride>>1); + uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); + uint8_t *obmc4= obmc3+ (obmc_stride>>1); + + dst = slice_buffer_get_line(sb, src_y + y); + d = (vector signed int *)(dst + src_x); + + // load blocks + LOAD_BLOCKS + + // load obmcs + LOAD_OBMCS + + // steps 0 1 2 3 + STEPS_0_1 + + STEPS_2_3 + + FINAL_STEP_VEC + + } +} + + +void ff_snow_inner_add_yblock_altivec(uint8_t *obmc, const int obmc_stride, + uint8_t * * block, int b_w, int b_h, + int src_x, int src_y, int src_stride, + slice_buffer * sb, int add, + uint8_t * dst8) +{ + if (src_x&15) { + if (b_w == 16) + inner_add_yblock_bw_16_obmc_32_altivec(obmc, obmc_stride, block, + b_w, b_h, src_x, src_y, + src_stride, sb, add, dst8); + else if (b_w == 8) + inner_add_yblock_bw_8_obmc_16_altivec(obmc, obmc_stride, block, + b_w, b_h, src_x, src_y, + src_stride, sb, add, dst8); + else + ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x, + src_y, src_stride, sb, add, dst8); + } else { + if (b_w == 16) + inner_add_yblock_a_bw_16_obmc_32_altivec(obmc, obmc_stride, block, + b_w, b_h, src_x, src_y, + src_stride, sb, add, dst8); + else if (b_w == 8) + inner_add_yblock_a_bw_8_obmc_16_altivec(obmc, obmc_stride, block, + b_w, b_h, src_x, src_y, + src_stride, sb, add, dst8); + else + ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x, + src_y, src_stride, sb, add, dst8); + } +} + + +void snow_init_altivec(DSPContext* c, AVCodecContext *avctx) +{ + c->horizontal_compose97i = ff_snow_horizontal_compose97i_altivec; + c->vertical_compose97i = ff_snow_vertical_compose97i_altivec; + c->inner_add_yblock = ff_snow_inner_add_yblock_altivec; +} diff --git a/src/libffmpeg/libavcodec/ppc/types_altivec.h b/src/libffmpeg/libavcodec/ppc/types_altivec.h new file mode 100644 index 000000000..f29026e04 --- /dev/null +++ b/src/libffmpeg/libavcodec/ppc/types_altivec.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2006 Guillaume Poirier <gpoirier@mplayerhq.hu> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/*********************************************************************** + * Vector types + **********************************************************************/ +#define vec_u8_t vector unsigned char +#define vec_s8_t vector signed char +#define vec_u16_t vector unsigned short +#define vec_s16_t vector signed short +#define vec_u32_t vector unsigned int +#define vec_s32_t vector signed int + +/*********************************************************************** + * Null vector + **********************************************************************/ +#define LOAD_ZERO const vec_u8_t zerov = vec_splat_u8( 0 ) + +#define zero_u8v (vec_u8_t) zerov +#define zero_s8v (vec_s8_t) zerov +#define zero_u16v (vec_u16_t) zerov +#define zero_s16v (vec_s16_t) zerov +#define zero_u32v (vec_u32_t) zerov +#define zero_s32v (vec_s32_t) zerov diff --git a/src/libffmpeg/libavcodec/ppc/vc1dsp_altivec.c b/src/libffmpeg/libavcodec/ppc/vc1dsp_altivec.c new file mode 100644 index 000000000..114c9d41f --- /dev/null +++ b/src/libffmpeg/libavcodec/ppc/vc1dsp_altivec.c @@ -0,0 +1,338 @@ +/* + * VC-1 and WMV3 decoder - DSP functions AltiVec-optimized + * Copyright (c) 2006 Konstantin Shishkov + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "../dsputil.h" + +#include "gcc_fixes.h" + +#include "dsputil_altivec.h" + +// main steps of 8x8 transform +#define STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_rnd) \ +do { \ + t0 = vec_sl(vec_add(s0, s4), vec_2); \ + t0 = vec_add(vec_sl(t0, vec_1), t0); \ + t0 = vec_add(t0, vec_rnd); \ + t1 = vec_sl(vec_sub(s0, s4), vec_2); \ + t1 = vec_add(vec_sl(t1, vec_1), t1); \ + t1 = vec_add(t1, vec_rnd); \ + t2 = vec_add(vec_sl(s6, vec_2), vec_sl(s6, vec_1)); \ + t2 = vec_add(t2, vec_sl(s2, vec_4)); \ + t3 = vec_add(vec_sl(s2, vec_2), vec_sl(s2, vec_1)); \ + t3 = vec_sub(t3, vec_sl(s6, vec_4)); \ + t4 = vec_add(t0, t2); \ + t5 = vec_add(t1, t3); \ + t6 = vec_sub(t1, t3); \ + t7 = vec_sub(t0, t2); \ +\ + t0 = vec_sl(vec_add(s1, s3), vec_4); \ + t0 = vec_add(t0, vec_sl(s5, vec_3)); \ + t0 = vec_add(t0, vec_sl(s7, vec_2)); \ + t0 = vec_add(t0, vec_sub(s5, s3)); \ +\ + t1 = vec_sl(vec_sub(s1, s5), vec_4); \ + t1 = vec_sub(t1, vec_sl(s7, vec_3)); \ + t1 = vec_sub(t1, vec_sl(s3, vec_2)); \ + t1 = vec_sub(t1, vec_add(s1, s7)); \ +\ + t2 = vec_sl(vec_sub(s7, s3), vec_4); \ + t2 = vec_add(t2, vec_sl(s1, vec_3)); \ + t2 = vec_add(t2, vec_sl(s5, vec_2)); \ + t2 = vec_add(t2, vec_sub(s1, s7)); \ +\ + t3 = vec_sl(vec_sub(s5, s7), vec_4); \ + t3 = vec_sub(t3, vec_sl(s3, vec_3)); \ + t3 = vec_add(t3, vec_sl(s1, vec_2)); \ + t3 = vec_sub(t3, vec_add(s3, s5)); \ +\ + s0 = vec_add(t4, t0); \ + s1 = vec_add(t5, t1); \ + s2 = vec_add(t6, t2); \ + s3 = vec_add(t7, t3); \ + s4 = vec_sub(t7, t3); \ + s5 = vec_sub(t6, t2); \ + s6 = vec_sub(t5, t1); \ + s7 = vec_sub(t4, t0); \ +}while(0) + +#define SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7) \ +do { \ + s0 = vec_sra(s0, vec_3); \ + s1 = vec_sra(s1, vec_3); \ + s2 = vec_sra(s2, vec_3); \ + s3 = vec_sra(s3, vec_3); \ + s4 = vec_sra(s4, vec_3); \ + s5 = vec_sra(s5, vec_3); \ + s6 = vec_sra(s6, vec_3); \ + s7 = vec_sra(s7, vec_3); \ +}while(0) + +#define SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7) \ +do { \ + s0 = vec_sra(s0, vec_7); \ + s1 = vec_sra(s1, vec_7); \ + s2 = vec_sra(s2, vec_7); \ + s3 = vec_sra(s3, vec_7); \ + s4 = vec_sra(vec_add(s4, vec_1s), vec_7); \ + s5 = vec_sra(vec_add(s5, vec_1s), vec_7); \ + s6 = vec_sra(vec_add(s6, vec_1s), vec_7); \ + s7 = vec_sra(vec_add(s7, vec_1s), vec_7); \ +}while(0) + +/* main steps of 4x4 transform */ +#define STEP4(s0, s1, s2, s3, vec_rnd) \ +do { \ + t1 = vec_add(vec_sl(s0, vec_4), s0); \ + t1 = vec_add(t1, vec_rnd); \ + t2 = vec_add(vec_sl(s2, vec_4), s2); \ + t0 = vec_add(t1, t2); \ + t1 = vec_sub(t1, t2); \ + t3 = vec_sl(vec_sub(s3, s1), vec_1); \ + t3 = vec_add(t3, vec_sl(t3, vec_2)); \ + t2 = vec_add(t3, vec_sl(s1, vec_5)); \ + t3 = vec_add(t3, vec_sl(s3, vec_3)); \ + t3 = vec_add(t3, vec_sl(s3, vec_2)); \ + s0 = vec_add(t0, t2); \ + s1 = vec_sub(t1, t3); \ + s2 = vec_add(t1, t3); \ + s3 = vec_sub(t0, t2); \ +}while (0) + +#define SHIFT_HOR4(s0, s1, s2, s3) \ + s0 = vec_sra(s0, vec_3); \ + s1 = vec_sra(s1, vec_3); \ + s2 = vec_sra(s2, vec_3); \ + s3 = vec_sra(s3, vec_3); + +#define SHIFT_VERT4(s0, s1, s2, s3) \ + s0 = vec_sra(s0, vec_7); \ + s1 = vec_sra(s1, vec_7); \ + s2 = vec_sra(s2, vec_7); \ + s3 = vec_sra(s3, vec_7); + +/** Do inverse transform on 8x8 block +*/ +static void vc1_inv_trans_8x8_altivec(DCTELEM block[64]) +{ + vector signed short src0, src1, src2, src3, src4, src5, src6, src7; + vector signed int s0, s1, s2, s3, s4, s5, s6, s7; + vector signed int s8, s9, sA, sB, sC, sD, sE, sF; + vector signed int t0, t1, t2, t3, t4, t5, t6, t7; + const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4)); + const vector unsigned int vec_7 = vec_splat_u32(7); + const vector unsigned int vec_5 = vec_splat_u32(5); + const vector unsigned int vec_4 = vec_splat_u32(4); + const vector signed int vec_4s = vec_splat_s32(4); + const vector unsigned int vec_3 = vec_splat_u32(3); + const vector unsigned int vec_2 = vec_splat_u32(2); + const vector signed int vec_1s = vec_splat_s32(1); + const vector unsigned int vec_1 = vec_splat_u32(1); + + + src0 = vec_ld( 0, block); + src1 = vec_ld( 16, block); + src2 = vec_ld( 32, block); + src3 = vec_ld( 48, block); + src4 = vec_ld( 64, block); + src5 = vec_ld( 80, block); + src6 = vec_ld( 96, block); + src7 = vec_ld(112, block); + + TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7); + s0 = vec_unpackl(src0); + s1 = vec_unpackl(src1); + s2 = vec_unpackl(src2); + s3 = vec_unpackl(src3); + s4 = vec_unpackl(src4); + s5 = vec_unpackl(src5); + s6 = vec_unpackl(src6); + s7 = vec_unpackl(src7); + s8 = vec_unpackh(src0); + s9 = vec_unpackh(src1); + sA = vec_unpackh(src2); + sB = vec_unpackh(src3); + sC = vec_unpackh(src4); + sD = vec_unpackh(src5); + sE = vec_unpackh(src6); + sF = vec_unpackh(src7); + STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s); + SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7); + STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s); + SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF); + src0 = vec_pack(s8, s0); + src1 = vec_pack(s9, s1); + src2 = vec_pack(sA, s2); + src3 = vec_pack(sB, s3); + src4 = vec_pack(sC, s4); + src5 = vec_pack(sD, s5); + src6 = vec_pack(sE, s6); + src7 = vec_pack(sF, s7); + TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7); + + s0 = vec_unpackl(src0); + s1 = vec_unpackl(src1); + s2 = vec_unpackl(src2); + s3 = vec_unpackl(src3); + s4 = vec_unpackl(src4); + s5 = vec_unpackl(src5); + s6 = vec_unpackl(src6); + s7 = vec_unpackl(src7); + s8 = vec_unpackh(src0); + s9 = vec_unpackh(src1); + sA = vec_unpackh(src2); + sB = vec_unpackh(src3); + sC = vec_unpackh(src4); + sD = vec_unpackh(src5); + sE = vec_unpackh(src6); + sF = vec_unpackh(src7); + STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_64); + SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7); + STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_64); + SHIFT_VERT8(s8, s9, sA, sB, sC, sD, sE, sF); + src0 = vec_pack(s8, s0); + src1 = vec_pack(s9, s1); + src2 = vec_pack(sA, s2); + src3 = vec_pack(sB, s3); + src4 = vec_pack(sC, s4); + src5 = vec_pack(sD, s5); + src6 = vec_pack(sE, s6); + src7 = vec_pack(sF, s7); + + vec_st(src0, 0, block); + vec_st(src1, 16, block); + vec_st(src2, 32, block); + vec_st(src3, 48, block); + vec_st(src4, 64, block); + vec_st(src5, 80, block); + vec_st(src6, 96, block); + vec_st(src7,112, block); +} + +/** Do inverse transform on 8x4 part of block +*/ +static void vc1_inv_trans_8x4_altivec(DCTELEM block[64], int n) +{ + vector signed short src0, src1, src2, src3, src4, src5, src6, src7; + vector signed int s0, s1, s2, s3, s4, s5, s6, s7; + vector signed int s8, s9, sA, sB, sC, sD, sE, sF; + vector signed int t0, t1, t2, t3, t4, t5, t6, t7; + const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4)); + const vector unsigned int vec_7 = vec_splat_u32(7); + const vector unsigned int vec_5 = vec_splat_u32(5); + const vector unsigned int vec_4 = vec_splat_u32(4); + const vector signed int vec_4s = vec_splat_s32(4); + const vector unsigned int vec_3 = vec_splat_u32(3); + const vector unsigned int vec_2 = vec_splat_u32(2); + const vector unsigned int vec_1 = vec_splat_u32(1); + + src0 = vec_ld( 0, block); + src1 = vec_ld( 16, block); + src2 = vec_ld( 32, block); + src3 = vec_ld( 48, block); + src4 = vec_ld( 64, block); + src5 = vec_ld( 80, block); + src6 = vec_ld( 96, block); + src7 = vec_ld(112, block); + + TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7); + s0 = vec_unpackl(src0); + s1 = vec_unpackl(src1); + s2 = vec_unpackl(src2); + s3 = vec_unpackl(src3); + s4 = vec_unpackl(src4); + s5 = vec_unpackl(src5); + s6 = vec_unpackl(src6); + s7 = vec_unpackl(src7); + s8 = vec_unpackh(src0); + s9 = vec_unpackh(src1); + sA = vec_unpackh(src2); + sB = vec_unpackh(src3); + sC = vec_unpackh(src4); + sD = vec_unpackh(src5); + sE = vec_unpackh(src6); + sF = vec_unpackh(src7); + STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s); + SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7); + STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s); + SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF); + src0 = vec_pack(s8, s0); + src1 = vec_pack(s9, s1); + src2 = vec_pack(sA, s2); + src3 = vec_pack(sB, s3); + src4 = vec_pack(sC, s4); + src5 = vec_pack(sD, s5); + src6 = vec_pack(sE, s6); + src7 = vec_pack(sF, s7); + TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7); + + if(!n){ // upper half of block + s0 = vec_unpackh(src0); + s1 = vec_unpackh(src1); + s2 = vec_unpackh(src2); + s3 = vec_unpackh(src3); + s8 = vec_unpackl(src0); + s9 = vec_unpackl(src1); + sA = vec_unpackl(src2); + sB = vec_unpackl(src3); + STEP4(s0, s1, s2, s3, vec_64); + SHIFT_VERT4(s0, s1, s2, s3); + STEP4(s8, s9, sA, sB, vec_64); + SHIFT_VERT4(s8, s9, sA, sB); + src0 = vec_pack(s0, s8); + src1 = vec_pack(s1, s9); + src2 = vec_pack(s2, sA); + src3 = vec_pack(s3, sB); + + vec_st(src0, 0, block); + vec_st(src1, 16, block); + vec_st(src2, 32, block); + vec_st(src3, 48, block); + } else { //lower half of block + s0 = vec_unpackh(src4); + s1 = vec_unpackh(src5); + s2 = vec_unpackh(src6); + s3 = vec_unpackh(src7); + s8 = vec_unpackl(src4); + s9 = vec_unpackl(src5); + sA = vec_unpackl(src6); + sB = vec_unpackl(src7); + STEP4(s0, s1, s2, s3, vec_64); + SHIFT_VERT4(s0, s1, s2, s3); + STEP4(s8, s9, sA, sB, vec_64); + SHIFT_VERT4(s8, s9, sA, sB); + src4 = vec_pack(s0, s8); + src5 = vec_pack(s1, s9); + src6 = vec_pack(s2, sA); + src7 = vec_pack(s3, sB); + + vec_st(src4, 64, block); + vec_st(src5, 80, block); + vec_st(src6, 96, block); + vec_st(src7,112, block); + } +} + + +void vc1dsp_init_altivec(DSPContext* dsp, AVCodecContext *avctx) { + dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec; + dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec; +} diff --git a/src/libffmpeg/libavcodec/smacker.c b/src/libffmpeg/libavcodec/smacker.c index 2f2185848..2e1784075 100644 --- a/src/libffmpeg/libavcodec/smacker.c +++ b/src/libffmpeg/libavcodec/smacker.c @@ -320,12 +320,12 @@ static int decode_header_trees(SmackVContext *smk) { return 0; } -static always_inline void last_reset(int *recode, int *last) { +static av_always_inline void last_reset(int *recode, int *last) { recode[last[0]] = recode[last[1]] = recode[last[2]] = 0; } /* get code and update history */ -static always_inline int smk_get_code(GetBitContext *gb, int *recode, int *last) { +static av_always_inline int smk_get_code(GetBitContext *gb, int *recode, int *last) { register int *table = recode; int v, b; diff --git a/src/libffmpeg/libavcodec/snow.c b/src/libffmpeg/libavcodec/snow.c index 346d56861..5e93d40a1 100644 --- a/src/libffmpeg/libavcodec/snow.c +++ b/src/libffmpeg/libavcodec/snow.c @@ -439,6 +439,7 @@ typedef struct SnowContext{ int always_reset; int version; int spatial_decomposition_type; + int last_spatial_decomposition_type; int temporal_decomposition_type; int spatial_decomposition_count; int temporal_decomposition_count; @@ -452,15 +453,19 @@ typedef struct SnowContext{ int chroma_v_shift; int spatial_scalability; int qlog; + int last_qlog; int lambda; int lambda2; int pass1_rc; int mv_scale; + int last_mv_scale; int qbias; + int last_qbias; #define QBIAS_SHIFT 3 int b_width; int b_height; int block_max_depth; + int last_block_max_depth; Plane plane[MAX_PLANES]; BlockNode *block; #define ME_CACHE_SIZE 1024 @@ -709,7 +714,7 @@ static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){ return v; } -static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ +static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ const int mirror_left= !highpass; const int mirror_right= (width&1) ^ highpass; const int w= (width>>1) - 1 + (highpass & width); @@ -732,7 +737,7 @@ static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst } #ifndef lift5 -static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ +static av_always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ const int mirror_left= !highpass; const int mirror_right= (width&1) ^ highpass; const int w= (width>>1) - 1 + (highpass & width); @@ -764,7 +769,7 @@ static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int ds #endif #ifndef liftS -static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ +static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ const int mirror_left= !highpass; const int mirror_right= (width&1) ^ highpass; const int w= (width>>1) - 1 + (highpass & width); @@ -1849,7 +1854,7 @@ static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, sli return; } -static void reset_contexts(SnowContext *s){ +static void reset_contexts(SnowContext *s){ //FIXME better initial contexts int plane_index, level, orientation; for(plane_index=0; plane_index<3; plane_index++){ @@ -2208,7 +2213,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){ } #endif -static always_inline int same_block(BlockNode *a, BlockNode *b){ +static av_always_inline int same_block(BlockNode *a, BlockNode *b){ if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); }else{ @@ -2287,12 +2292,10 @@ static void decode_q_branch(SnowContext *s, int level, int x, int y){ } if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ - int type; + int type, mx, my; int l = left->color[0]; int cb= left->color[1]; int cr= left->color[2]; - int mx= mid_pred(left->mx, top->mx, tr->mx); - int my= mid_pred(left->my, top->my, tr->my); int ref = 0; int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx)); @@ -2557,7 +2560,7 @@ void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * } //FIXME name clenup (b_w, block_w, b_width stuff) -static always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ +static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ const int b_width = s->b_width << s->block_max_depth; const int b_height= s->b_height << s->block_max_depth; const int b_stride= b_width; @@ -2716,7 +2719,7 @@ assert(src_stride > 2*MB_SIZE + 5); #endif } -static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){ +static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){ Plane *p= &s->plane[plane_index]; const int mb_w= s->b_width << s->block_max_depth; const int mb_h= s->b_height << s->block_max_depth; @@ -2783,7 +2786,7 @@ static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * STOP_TIMER("predict_slice") } -static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){ +static av_always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){ Plane *p= &s->plane[plane_index]; const int mb_w= s->b_width << s->block_max_depth; const int mb_h= s->b_height << s->block_max_depth; @@ -2840,7 +2843,7 @@ static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_ STOP_TIMER("predict_slice") } -static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){ +static av_always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){ const int mb_h= s->b_height << s->block_max_depth; int mb_y; for(mb_y=0; mb_y<=mb_h; mb_y++) @@ -3098,7 +3101,7 @@ static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ return distortion + rate*penalty_factor; } -static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ +static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ const int b_stride= s->b_width << s->block_max_depth; BlockNode *block= &s->block[mb_x + mb_y * b_stride]; BlockNode backup= *block; @@ -3137,12 +3140,12 @@ static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3 } /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */ -static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){ +static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){ int p[2] = {p0, p1}; return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd); } -static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ +static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ const int b_stride= s->b_width << s->block_max_depth; BlockNode *block= &s->block[mb_x + mb_y * b_stride]; BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]}; @@ -3607,8 +3610,14 @@ static void encode_header(SnowContext *s){ memset(kstate, MID_STATE, sizeof(kstate)); put_rac(&s->c, kstate, s->keyframe); - if(s->keyframe || s->always_reset) + if(s->keyframe || s->always_reset){ reset_contexts(s); + s->last_spatial_decomposition_type= + s->last_qlog= + s->last_qbias= + s->last_mv_scale= + s->last_block_max_depth= 0; + } if(s->keyframe){ put_symbol(&s->c, s->header_state, s->version, 0); put_rac(&s->c, s->header_state, s->always_reset); @@ -3631,11 +3640,17 @@ static void encode_header(SnowContext *s){ } } } - put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0); - put_symbol(&s->c, s->header_state, s->qlog, 1); - put_symbol(&s->c, s->header_state, s->mv_scale, 0); - put_symbol(&s->c, s->header_state, s->qbias, 1); - put_symbol(&s->c, s->header_state, s->block_max_depth, 0); + put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1); + put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1); + put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1); + put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1); + put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1); + + s->last_spatial_decomposition_type= s->spatial_decomposition_type; + s->last_qlog = s->qlog; + s->last_qbias = s->qbias; + s->last_mv_scale = s->mv_scale; + s->last_block_max_depth = s->block_max_depth; } static int decode_header(SnowContext *s){ @@ -3645,8 +3660,14 @@ static int decode_header(SnowContext *s){ memset(kstate, MID_STATE, sizeof(kstate)); s->keyframe= get_rac(&s->c, kstate); - if(s->keyframe || s->always_reset) + if(s->keyframe || s->always_reset){ reset_contexts(s); + s->spatial_decomposition_type= + s->qlog= + s->qbias= + s->mv_scale= + s->block_max_depth= 0; + } if(s->keyframe){ s->version= get_symbol(&s->c, s->header_state, 0); if(s->version>0){ @@ -3677,16 +3698,16 @@ static int decode_header(SnowContext *s){ } } - s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0); + s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1); if(s->spatial_decomposition_type > 2){ av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); return -1; } - s->qlog= get_symbol(&s->c, s->header_state, 1); - s->mv_scale= get_symbol(&s->c, s->header_state, 0); - s->qbias= get_symbol(&s->c, s->header_state, 1); - s->block_max_depth= get_symbol(&s->c, s->header_state, 0); + s->qlog += get_symbol(&s->c, s->header_state, 1); + s->mv_scale += get_symbol(&s->c, s->header_state, 1); + s->qbias += get_symbol(&s->c, s->header_state, 1); + s->block_max_depth+= get_symbol(&s->c, s->header_state, 1); if(s->block_max_depth > 1 || s->block_max_depth < 0){ av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth); s->block_max_depth= 0; @@ -4177,7 +4198,6 @@ redo_frame: pict->pict_type= FF_I_TYPE; s->keyframe=1; s->current_picture.key_frame=1; - reset_contexts(s); goto redo_frame; } diff --git a/src/libffmpeg/libavcodec/snow.h b/src/libffmpeg/libavcodec/snow.h index f7cee131a..6794d2c5a 100644 --- a/src/libffmpeg/libavcodec/snow.h +++ b/src/libffmpeg/libavcodec/snow.h @@ -137,7 +137,7 @@ static int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int /* C bits used by mmx/sse2/altivec */ -static always_inline void snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){ +static av_always_inline void snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){ (*i) = (width) - 2; if (width & 1){ @@ -146,14 +146,14 @@ static always_inline void snow_interleave_line_header(int * i, int width, DWTELE } } -static always_inline void snow_interleave_line_footer(int * i, DWTELEM * low, DWTELEM * high){ +static av_always_inline void snow_interleave_line_footer(int * i, DWTELEM * low, DWTELEM * high){ for (; (*i)>=0; (*i)-=2){ low[(*i)+1] = high[(*i)>>1]; low[*i] = low[(*i)>>1]; } } -static always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){ +static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){ for(; i<w; i++){ dst[i] = src[i] - ((mul * (ref[i] + ref[i + 1]) + add) >> shift); } @@ -163,7 +163,7 @@ static always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * } } -static always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){ +static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){ for(; i<w; i++){ dst[i] = src[i] - (((-(ref[i] + ref[(i+1)])+W_BO) - 4 * src[i]) >> W_BS); } diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c index c3661dda7..36dcc7746 100644 --- a/src/libffmpeg/libavcodec/utils.c +++ b/src/libffmpeg/libavcodec/utils.c @@ -421,7 +421,7 @@ static const char* context_to_name(void* ptr) { static const AVOption options[]={ {"b", "set video bitrate (in bits/s)", OFFSET(bit_rate), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE, INT_MIN, INT_MAX, V|A|E}, -{"bt", "set video bitrate tolerance (in bits/s)", OFFSET(bit_rate_tolerance), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE*20, INT_MIN, INT_MAX, V|E}, +{"bt", "set video bitrate tolerance (in bits/s)", OFFSET(bit_rate_tolerance), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE*20, 1, INT_MAX, V|E}, {"flags", NULL, OFFSET(flags), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|A|E|D, "flags"}, {"mv4", "use four motion vector by macroblock (mpeg4)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_4MV, INT_MIN, INT_MAX, V|E, "flags"}, {"obmc", "use overlapped block motion compensation (h263+)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_OBMC, INT_MIN, INT_MAX, V|E, "flags"}, @@ -464,7 +464,7 @@ static const AVOption options[]={ {"extradata_size", NULL, OFFSET(extradata_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, {"time_base", NULL, OFFSET(time_base), FF_OPT_TYPE_RATIONAL, DEFAULT, INT_MIN, INT_MAX}, {"g", "set the group of picture size", OFFSET(gop_size), FF_OPT_TYPE_INT, 12, INT_MIN, INT_MAX, V|E}, -{"rate_emu", NULL, OFFSET(rate_emu), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, +{"rate_emu", "frame rate emulation", OFFSET(rate_emu), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, {"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, {"ac", "set number of audio channels", OFFSET(channels), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, {"cutoff", "set cutoff bandwidth", OFFSET(cutoff), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|E}, @@ -509,15 +509,15 @@ static const AVOption options[]={ {"edge", "edge padding bug (autodetected per fourcc/version)", 0, FF_OPT_TYPE_CONST, FF_BUG_EDGE, INT_MIN, INT_MAX, V|D, "bug"}, {"hpel_chroma", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_HPEL_CHROMA, INT_MIN, INT_MAX, V|D, "bug"}, {"dc_clip", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_DC_CLIP, INT_MIN, INT_MAX, V|D, "bug"}, -{"ms", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_MS, INT_MIN, INT_MAX, V|D, "bug"}, +{"ms", "workaround various bugs in microsofts broken decoders", 0, FF_OPT_TYPE_CONST, FF_BUG_MS, INT_MIN, INT_MAX, V|D, "bug"}, {"lelim", "single coefficient elimination threshold for luminance (negative values also consider dc coefficient)", OFFSET(luma_elim_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"celim", "single coefficient elimination threshold for chrominance (negative values also consider dc coefficient)", OFFSET(chroma_elim_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"strict", "how strictly to follow the standards", OFFSET(strict_std_compliance), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "strict"}, -{"very", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_VERY_STRICT, INT_MIN, INT_MAX, V|E, "strict"}, -{"strict", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_STRICT, INT_MIN, INT_MAX, V|E, "strict"}, +{"very", "strictly conform to a older more strict version of the spec or reference software", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_VERY_STRICT, INT_MIN, INT_MAX, V|E, "strict"}, +{"strict", "strictly conform to all the things in the spec no matter what consequences", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_STRICT, INT_MIN, INT_MAX, V|E, "strict"}, {"normal", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_NORMAL, INT_MIN, INT_MAX, V|E, "strict"}, -{"inofficial", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_INOFFICIAL, INT_MIN, INT_MAX, V|E, "strict"}, -{"experimental", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_EXPERIMENTAL, INT_MIN, INT_MAX, V|E, "strict"}, +{"inofficial", "allow inofficial extensions", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_INOFFICIAL, INT_MIN, INT_MAX, V|E, "strict"}, +{"experimental", "allow non standarized experimental things", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_EXPERIMENTAL, INT_MIN, INT_MAX, V|E, "strict"}, {"b_qoffset", "qp offset between p and b frames", OFFSET(b_quant_offset), FF_OPT_TYPE_FLOAT, 1.25, FLT_MIN, FLT_MAX, V|E}, {"er", "set error resilience strategy", OFFSET(error_resilience), FF_OPT_TYPE_INT, FF_ER_CAREFUL, INT_MIN, INT_MAX, V|D, "er"}, {"careful", NULL, 0, FF_OPT_TYPE_CONST, FF_ER_CAREFUL, INT_MIN, INT_MAX, V|D, "er"}, @@ -549,14 +549,14 @@ static const AVOption options[]={ {"mmx", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_MMX, INT_MIN, INT_MAX, V|E, "dct"}, {"mlib", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_MLIB, INT_MIN, INT_MAX, V|E, "dct"}, {"altivec", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_ALTIVEC, INT_MIN, INT_MAX, V|E, "dct"}, -{"faan", "floating point AAN", 0, FF_OPT_TYPE_CONST, FF_DCT_FAAN, INT_MIN, INT_MAX, V|E, "dct"}, +{"faan", "floating point AAN DCT", 0, FF_OPT_TYPE_CONST, FF_DCT_FAAN, INT_MIN, INT_MAX, V|E, "dct"}, {"lumi_mask", "compresses bright areas stronger than medium ones", OFFSET(lumi_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E}, {"tcplx_mask", "temporal complexity masking", OFFSET(temporal_cplx_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E}, {"scplx_mask", "spatial complexity masking", OFFSET(spatial_cplx_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E}, {"p_mask", "inter masking", OFFSET(p_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E}, {"dark_mask", "compresses dark areas stronger than medium ones", OFFSET(dark_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E}, {"unused", NULL, OFFSET(unused), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, -{"idct", "use interlaced DCT", OFFSET(idct_algo), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, V|E|D, "idct"}, +{"idct", "select IDCT implementation", OFFSET(idct_algo), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, V|E|D, "idct"}, {"auto", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_AUTO, INT_MIN, INT_MAX, V|E|D, "idct"}, {"int", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_INT, INT_MIN, INT_MAX, V|E|D, "idct"}, {"simple", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLE, INT_MIN, INT_MAX, V|E|D, "idct"}, @@ -582,7 +582,7 @@ static const AVOption options[]={ {"left", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_LEFT, INT_MIN, INT_MAX, V|E, "pred"}, {"plane", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_PLANE, INT_MIN, INT_MAX, V|E, "pred"}, {"median", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_MEDIAN, INT_MIN, INT_MAX, V|E, "pred"}, -{"aspect", NULL, OFFSET(sample_aspect_ratio), FF_OPT_TYPE_RATIONAL, DEFAULT, 0, 10, V|E}, +{"aspect", "sample aspect ratio", OFFSET(sample_aspect_ratio), FF_OPT_TYPE_RATIONAL, DEFAULT, 0, 10, V|E}, {"debug", "print specific debug info", OFFSET(debug), FF_OPT_TYPE_FLAGS, DEFAULT, 0, INT_MAX, V|A|S|E|D, "debug"}, {"pict", "picture info", 0, FF_OPT_TYPE_CONST, FF_DEBUG_PICT_INFO, INT_MIN, INT_MAX, V|D, "debug"}, {"rc", "rate control", 0, FF_OPT_TYPE_CONST, FF_DEBUG_RC, INT_MIN, INT_MAX, V|E, "debug"}, @@ -603,8 +603,8 @@ static const AVOption options[]={ {"pf", "forward predicted MVs of P-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_P_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"}, {"bf", "forward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"}, {"bb", "backward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_BACK, INT_MIN, INT_MAX, V|D, "debug_mv"}, -{"mb_qmin", "obsolete, use vqmin", OFFSET(mb_qmin), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, -{"mb_qmax", "obsolete, use vqmax", OFFSET(mb_qmax), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, +{"mb_qmin", "obsolete, use qmin", OFFSET(mb_qmin), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, +{"mb_qmax", "obsolete, use qmax", OFFSET(mb_qmax), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"cmp", "full pel me compare function", OFFSET(me_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"}, {"subcmp", "sub pel me compare function", OFFSET(me_sub_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"}, {"mbcmp", "macroblock compare function", OFFSET(mb_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"}, @@ -654,11 +654,11 @@ static const AVOption options[]={ {"lmin", "min lagrange factor (VBR)", OFFSET(lmin), FF_OPT_TYPE_INT, 2*FF_QP2LAMBDA, 0, INT_MAX, V|E}, {"lmax", "max lagrange factor (VBR)", OFFSET(lmax), FF_OPT_TYPE_INT, 31*FF_QP2LAMBDA, 0, INT_MAX, V|E}, {"nr", "noise reduction", OFFSET(noise_reduction), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, -{"rc_init_occupancy", NULL, OFFSET(rc_initial_buffer_occupancy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, +{"rc_init_occupancy", "number of bits which should be loaded into the rc buffer before decoding starts", OFFSET(rc_initial_buffer_occupancy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"inter_threshold", NULL, OFFSET(inter_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"flags2", NULL, OFFSET(flags2), FF_OPT_TYPE_FLAGS, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|A|E|D, "flags2"}, {"error", NULL, OFFSET(error_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, -{"antialias", NULL, OFFSET(antialias_algo), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D, "aa"}, +{"antialias", "MP3 antialias algorithm", OFFSET(antialias_algo), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D, "aa"}, {"auto", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_AUTO, INT_MIN, INT_MAX, V|D, "aa"}, {"fastint", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_FASTINT, INT_MIN, INT_MAX, V|D, "aa"}, {"int", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_INT, INT_MIN, INT_MAX, V|D, "aa"}, @@ -669,8 +669,8 @@ static const AVOption options[]={ {"mb_threshold", "macroblock threshold", OFFSET(mb_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"dc", "intra_dc_precision", OFFSET(intra_dc_precision), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E}, {"nssew", "nsse weight", OFFSET(nsse_weight), FF_OPT_TYPE_INT, 8, INT_MIN, INT_MAX, V|E}, -{"skip_top", NULL, OFFSET(skip_top), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D}, -{"skip_bottom", NULL, OFFSET(skip_bottom), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D}, +{"skip_top", "number of macroblock rows at the top which are skipped", OFFSET(skip_top), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D}, +{"skip_bottom", "number of macroblock rows at the bottom which are skipped", OFFSET(skip_bottom), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D}, {"profile", NULL, OFFSET(profile), FF_OPT_TYPE_INT, FF_PROFILE_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "profile"}, {"unknown", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "profile"}, {"level", NULL, OFFSET(level), FF_OPT_TYPE_INT, FF_LEVEL_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "level"}, @@ -687,42 +687,43 @@ static const AVOption options[]={ {"bidir_refine", "refine the two motion vectors used in bidirectional macroblocks", OFFSET(bidir_refine), FF_OPT_TYPE_INT, DEFAULT, 0, 4, V|E}, {"brd_scale", "downscales frames for dynamic B-frame decision", OFFSET(brd_scale), FF_OPT_TYPE_INT, DEFAULT, 0, 10, V|E}, {"crf", "enables constant quality mode, and selects the quality (x264)", OFFSET(crf), FF_OPT_TYPE_FLOAT, DEFAULT, 0, 51, V|E}, -{"cqp", NULL, OFFSET(cqp), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, V|E}, +{"cqp", "constant quantization parameter rate control method", OFFSET(cqp), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, V|E}, {"keyint_min", "minimum interval between IDR-frames (x264)", OFFSET(keyint_min), FF_OPT_TYPE_INT, 25, INT_MIN, INT_MAX, V|E}, {"refs", "reference frames to consider for motion compensation (Snow)", OFFSET(refs), FF_OPT_TYPE_INT, 1, INT_MIN, INT_MAX, V|E}, -{"chromaoffset", NULL, OFFSET(chromaoffset), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, -{"bframebias", NULL, OFFSET(bframebias), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, +{"chromaoffset", "chroma qp offset from luma", OFFSET(chromaoffset), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, +{"bframebias", "influences how often B-frames are used", OFFSET(bframebias), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"trellis", "rate-distortion optimal quantization", OFFSET(trellis), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|A|E}, -{"directpred", NULL, OFFSET(directpred), FF_OPT_TYPE_INT, 2, INT_MIN, INT_MAX, V|E}, +{"directpred", "direct mv prediction mode - 0 (none), 1 (spatial), 2 (temporal)", OFFSET(directpred), FF_OPT_TYPE_INT, 2, INT_MIN, INT_MAX, V|E}, {"bpyramid", "allows B-frames to be used as references for predicting", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BPYRAMID, INT_MIN, INT_MAX, V|E, "flags2"}, -{"wpred", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_WPRED, INT_MIN, INT_MAX, V|E, "flags2"}, -{"mixed_refs", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_MIXED_REFS, INT_MIN, INT_MAX, V|E, "flags2"}, -{"8x8dct", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_8X8DCT, INT_MIN, INT_MAX, V|E, "flags2"}, -{"fastpskip", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|E, "flags2"}, -{"aud", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_AUD, INT_MIN, INT_MAX, V|E, "flags2"}, -{"brdo", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BRDO, INT_MIN, INT_MAX, V|E, "flags2"}, -{"complexityblur", NULL, OFFSET(complexityblur), FF_OPT_TYPE_FLOAT, 20.0, FLT_MIN, FLT_MAX, V|E}, -{"deblockalpha", NULL, OFFSET(deblockalpha), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, -{"deblockbeta", NULL, OFFSET(deblockbeta), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, -{"partitions", NULL, OFFSET(partitions), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|E, "partitions"}, +{"wpred", "weighted biprediction for b-frames (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_WPRED, INT_MIN, INT_MAX, V|E, "flags2"}, +{"mixed_refs", "one reference per partition, as opposed to one reference per macroblock", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_MIXED_REFS, INT_MIN, INT_MAX, V|E, "flags2"}, +{"8x8dct", "high profile 8x8 transform (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_8X8DCT, INT_MIN, INT_MAX, V|E, "flags2"}, +{"fastpskip", "fast pskip (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|E, "flags2"}, +{"aud", "access unit delimiters (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_AUD, INT_MIN, INT_MAX, V|E, "flags2"}, +{"brdo", "b-frame rate-distortion optimization", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BRDO, INT_MIN, INT_MAX, V|E, "flags2"}, +{"skiprd", "RD optimal MB level residual skiping", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_SKIP_RD, INT_MIN, INT_MAX, V|E, "flags2"}, +{"complexityblur", "reduce fluctuations in qp (before curve compression)", OFFSET(complexityblur), FF_OPT_TYPE_FLOAT, 20.0, FLT_MIN, FLT_MAX, V|E}, +{"deblockalpha", "in-loop deblocking filter alphac0 parameter", OFFSET(deblockalpha), FF_OPT_TYPE_INT, DEFAULT, -6, 6, V|E}, +{"deblockbeta", "in-loop deblocking filter beta parameter", OFFSET(deblockbeta), FF_OPT_TYPE_INT, DEFAULT, -6, 6, V|E}, +{"partitions", "macroblock subpartition sizes to consider", OFFSET(partitions), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|E, "partitions"}, {"parti4x4", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_I4X4, INT_MIN, INT_MAX, V|E, "partitions"}, {"parti8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_I8X8, INT_MIN, INT_MAX, V|E, "partitions"}, {"partp4x4", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_P4X4, INT_MIN, INT_MAX, V|E, "partitions"}, {"partp8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_P8X8, INT_MIN, INT_MAX, V|E, "partitions"}, {"partb8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_B8X8, INT_MIN, INT_MAX, V|E, "partitions"}, -{"sc_factor", NULL, OFFSET(scenechange_factor), FF_OPT_TYPE_INT, 6, 0, INT_MAX, V|E}, +{"sc_factor", "multiplied by qscale for each frame and added to scene_change_score", OFFSET(scenechange_factor), FF_OPT_TYPE_INT, 6, 0, INT_MAX, V|E}, {"mv0_threshold", NULL, OFFSET(mv0_threshold), FF_OPT_TYPE_INT, 256, 0, INT_MAX, V|E}, {"ivlc", "intra vlc table", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_INTRA_VLC, INT_MIN, INT_MAX, V|E, "flags2"}, -{"b_sensitivity", NULL, OFFSET(b_sensitivity), FF_OPT_TYPE_INT, 40, 1, INT_MAX, V|E}, +{"b_sensitivity", "adjusts sensitivity of b_frame_strategy 1", OFFSET(b_sensitivity), FF_OPT_TYPE_INT, 40, 1, INT_MAX, V|E}, {"compression_level", NULL, OFFSET(compression_level), FF_OPT_TYPE_INT, FF_COMPRESSION_DEFAULT, INT_MIN, INT_MAX, V|A|E}, -{"use_lpc", NULL, OFFSET(use_lpc), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E}, -{"lpc_coeff_precision", NULL, OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, A|E}, +{"use_lpc", "sets whether to use LPC mode (FLAC)", OFFSET(use_lpc), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E}, +{"lpc_coeff_precision", "LPC coefficient precision (FLAC)", OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, A|E}, {"min_prediction_order", NULL, OFFSET(min_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E}, {"max_prediction_order", NULL, OFFSET(max_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E}, -{"prediction_order_method", NULL, OFFSET(prediction_order_method), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E}, +{"prediction_order_method", "search method for selecting prediction order", OFFSET(prediction_order_method), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E}, {"min_partition_order", NULL, OFFSET(min_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E}, {"max_partition_order", NULL, OFFSET(max_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E}, -{"timecode_frame_start", NULL, OFFSET(timecode_frame_start), FF_OPT_TYPE_INT, 0, 0, INT_MAX, V|E}, +{"timecode_frame_start", "GOP timecode frame start number, in non drop frame format", OFFSET(timecode_frame_start), FF_OPT_TYPE_INT, 0, 0, INT_MAX, V|E}, {"drop_frame_timecode", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_DROP_FRAME_TIMECODE, INT_MIN, INT_MAX, V|E, "flags2"}, {NULL}, }; diff --git a/src/libffmpeg/libavcodec/vc1.c b/src/libffmpeg/libavcodec/vc1.c index 7b385ca47..231f3ca26 100644 --- a/src/libffmpeg/libavcodec/vc1.c +++ b/src/libffmpeg/libavcodec/vc1.c @@ -2140,7 +2140,7 @@ static void vc1_interp_mc(VC1Context *v) dsp->avg_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy); } -static always_inline int scale_mv(int value, int bfrac, int inv, int qs) +static av_always_inline int scale_mv(int value, int bfrac, int inv, int qs) { int n = bfrac; @@ -3072,8 +3072,8 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c ac_val -= 16 * s->block_wrap[n]; q1 = s->current_picture.qscale_table[mb_pos]; - if(dc_pred_dir && c_avail) q2 = s->current_picture.qscale_table[mb_pos - 1]; - if(!dc_pred_dir && a_avail) q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride]; + if(dc_pred_dir && c_avail && mb_pos) q2 = s->current_picture.qscale_table[mb_pos - 1]; + if(!dc_pred_dir && a_avail && mb_pos >= s->mb_stride) q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride]; if(n && n<4) q2 = q1; if(coded) { diff --git a/src/libffmpeg/libavcodec/vc1dsp.c b/src/libffmpeg/libavcodec/vc1dsp.c index 9139ffb28..f19f266d1 100644 --- a/src/libffmpeg/libavcodec/vc1dsp.c +++ b/src/libffmpeg/libavcodec/vc1dsp.c @@ -326,7 +326,7 @@ static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n) /** Filter used to interpolate fractional pel values */ -static always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r) +static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r) { switch(mode){ case 0: //no shift diff --git a/src/libffmpeg/libavcodec/vp3dsp.c b/src/libffmpeg/libavcodec/vp3dsp.c index a48515a5e..bb9fed091 100644 --- a/src/libffmpeg/libavcodec/vp3dsp.c +++ b/src/libffmpeg/libavcodec/vp3dsp.c @@ -39,7 +39,7 @@ #define M(a,b) (((a) * (b))>>16) -static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type) +static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type) { int16_t *ip = input; uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; diff --git a/src/libffmpeg/libavcodec/vp5.c b/src/libffmpeg/libavcodec/vp5.c new file mode 100644 index 000000000..ac953c7aa --- /dev/null +++ b/src/libffmpeg/libavcodec/vp5.c @@ -0,0 +1,290 @@ +/** + * @file vp5.c + * VP5 compatible video decoder + * + * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdlib.h> +#include <string.h> + +#include "avcodec.h" +#include "dsputil.h" +#include "bitstream.h" +#include "mpegvideo.h" + +#include "vp56.h" +#include "vp56data.h" +#include "vp5data.h" + + +static int vp5_parse_header(vp56_context_t *s, uint8_t *buf, int buf_size, + int *golden_frame) +{ + vp56_range_coder_t *c = &s->c; + int rows, cols; + + vp56_init_range_decoder(&s->c, buf, buf_size); + s->frames[VP56_FRAME_CURRENT].key_frame = !vp56_rac_get(c); + vp56_rac_get(c); + vp56_init_dequant(s, vp56_rac_gets(c, 6)); + if (s->frames[VP56_FRAME_CURRENT].key_frame) + { + vp56_rac_gets(c, 8); + if(vp56_rac_gets(c, 5) > 5) + return 0; + vp56_rac_gets(c, 2); + if (vp56_rac_get(c)) { + av_log(s->avctx, AV_LOG_ERROR, "interlacing not supported\n"); + return 0; + } + rows = vp56_rac_gets(c, 8); /* number of stored macroblock rows */ + cols = vp56_rac_gets(c, 8); /* number of stored macroblock cols */ + vp56_rac_gets(c, 8); /* number of displayed macroblock rows */ + vp56_rac_gets(c, 8); /* number of displayed macroblock cols */ + vp56_rac_gets(c, 2); + if (16*cols != s->avctx->coded_width || + 16*rows != s->avctx->coded_height) { + avcodec_set_dimensions(s->avctx, 16*cols, 16*rows); + return 2; + } + } + return 1; +} + +/* Gives very similar result than the vp6 version except in a few cases */ +static int vp5_adjust(int v, int t) +{ + int s2, s1 = v >> 31; + v ^= s1; + v -= s1; + v *= v < 2*t; + v -= t; + s2 = v >> 31; + v ^= s2; + v -= s2; + v = t - v; + v += s1; + v ^= s1; + return v; +} + +static void vp5_parse_vector_adjustment(vp56_context_t *s, vp56_mv_t *vect) +{ + vp56_range_coder_t *c = &s->c; + int comp, di; + + for (comp=0; comp<2; comp++) { + int delta = 0; + if (vp56_rac_get_prob(c, s->vector_model_dct[comp])) { + int sign = vp56_rac_get_prob(c, s->vector_model_sig[comp]); + di = vp56_rac_get_prob(c, s->vector_model_pdi[comp][0]); + di |= vp56_rac_get_prob(c, s->vector_model_pdi[comp][1]) << 1; + delta = vp56_rac_get_tree(c, vp56_pva_tree, + s->vector_model_pdv[comp]); + delta = di | (delta << 2); + delta = (delta ^ -sign) + sign; + } + if (!comp) + vect->x = delta; + else + vect->y = delta; + } +} + +static void vp5_parse_vector_models(vp56_context_t *s) +{ + vp56_range_coder_t *c = &s->c; + int comp, node; + + for (comp=0; comp<2; comp++) { + if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][0])) + s->vector_model_dct[comp] = vp56_rac_gets_nn(c, 7); + if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][1])) + s->vector_model_sig[comp] = vp56_rac_gets_nn(c, 7); + if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][2])) + s->vector_model_pdi[comp][0] = vp56_rac_gets_nn(c, 7); + if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][3])) + s->vector_model_pdi[comp][1] = vp56_rac_gets_nn(c, 7); + } + + for (comp=0; comp<2; comp++) + for (node=0; node<7; node++) + if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][4 + node])) + s->vector_model_pdv[comp][node] = vp56_rac_gets_nn(c, 7); +} + +static void vp5_parse_coeff_models(vp56_context_t *s) +{ + vp56_range_coder_t *c = &s->c; + uint8_t def_prob[11]; + int node, cg, ctx; + int ct; /* code type */ + int pt; /* plane type (0 for Y, 1 for U or V) */ + + memset(def_prob, 0x80, sizeof(def_prob)); + + for (pt=0; pt<2; pt++) + for (node=0; node<11; node++) + if (vp56_rac_get_prob(c, vp5_dccv_pct[pt][node])) { + def_prob[node] = vp56_rac_gets_nn(c, 7); + s->coeff_model_dccv[pt][node] = def_prob[node]; + } else if (s->frames[VP56_FRAME_CURRENT].key_frame) { + s->coeff_model_dccv[pt][node] = def_prob[node]; + } + + for (ct=0; ct<3; ct++) + for (pt=0; pt<2; pt++) + for (cg=0; cg<6; cg++) + for (node=0; node<11; node++) + if (vp56_rac_get_prob(c, vp5_ract_pct[ct][pt][cg][node])) { + def_prob[node] = vp56_rac_gets_nn(c, 7); + s->coeff_model_ract[pt][ct][cg][node] = def_prob[node]; + } else if (s->frames[VP56_FRAME_CURRENT].key_frame) { + s->coeff_model_ract[pt][ct][cg][node] = def_prob[node]; + } + + /* coeff_model_dcct is a linear combination of coeff_model_dccv */ + for (pt=0; pt<2; pt++) + for (ctx=0; ctx<36; ctx++) + for (node=0; node<5; node++) + s->coeff_model_dcct[pt][ctx][node] = clip(((s->coeff_model_dccv[pt][node] * vp5_dccv_lc[node][ctx][0] + 128) >> 8) + vp5_dccv_lc[node][ctx][1], 1, 254); + + /* coeff_model_acct is a linear combination of coeff_model_ract */ + for (ct=0; ct<3; ct++) + for (pt=0; pt<2; pt++) + for (cg=0; cg<3; cg++) + for (ctx=0; ctx<6; ctx++) + for (node=0; node<5; node++) + s->coeff_model_acct[pt][ct][cg][ctx][node] = clip(((s->coeff_model_ract[pt][ct][cg][node] * vp5_ract_lc[ct][cg][node][ctx][0] + 128) >> 8) + vp5_ract_lc[ct][cg][node][ctx][1], 1, 254); +} + +static void vp5_parse_coeff(vp56_context_t *s) +{ + vp56_range_coder_t *c = &s->c; + uint8_t *permute = s->scantable.permutated; + uint8_t *model, *model2; + int coeff, sign, coeff_idx; + int b, i, cg, idx, ctx, ctx_last; + int pt = 0; /* plane type (0 for Y, 1 for U or V) */ + + for (b=0; b<6; b++) { + int ct = 1; /* code type */ + + if (b > 3) pt = 1; + + ctx = 6*s->coeff_ctx[vp56_b6to4[b]][0] + + s->above_blocks[s->above_block_idx[b]].not_null_dc; + model = s->coeff_model_dccv[pt]; + model2 = s->coeff_model_dcct[pt][ctx]; + + for (coeff_idx=0; coeff_idx<64; ) { + if (vp56_rac_get_prob(c, model2[0])) { + if (vp56_rac_get_prob(c, model2[2])) { + if (vp56_rac_get_prob(c, model2[3])) { + s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 4; + idx = vp56_rac_get_tree(c, vp56_pc_tree, model); + sign = vp56_rac_get(c); + coeff = vp56_coeff_bias[idx]; + for (i=vp56_coeff_bit_length[idx]; i>=0; i--) + coeff += vp56_rac_get_prob(c, vp56_coeff_parse_table[idx][i]) << i; + } else { + if (vp56_rac_get_prob(c, model2[4])) { + coeff = 3 + vp56_rac_get_prob(c, model[5]); + s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 3; + } else { + coeff = 2; + s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 2; + } + sign = vp56_rac_get(c); + } + ct = 2; + } else { + ct = 1; + s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 1; + sign = vp56_rac_get(c); + coeff = 1; + } + coeff = (coeff ^ -sign) + sign; + if (coeff_idx) + coeff *= s->dequant_ac; + s->block_coeff[b][permute[coeff_idx]] = coeff; + } else { + if (ct && !vp56_rac_get_prob(c, model2[1])) + break; + ct = 0; + s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 0; + } + + cg = vp5_coeff_groups[++coeff_idx]; + ctx = s->coeff_ctx[vp56_b6to4[b]][coeff_idx]; + model = s->coeff_model_ract[pt][ct][cg]; + model2 = cg > 2 ? model : s->coeff_model_acct[pt][ct][cg][ctx]; + } + + ctx_last = FFMIN(s->coeff_ctx_last[vp56_b6to4[b]], 24); + s->coeff_ctx_last[vp56_b6to4[b]] = coeff_idx; + if (coeff_idx < ctx_last) + for (i=coeff_idx; i<=ctx_last; i++) + s->coeff_ctx[vp56_b6to4[b]][i] = 5; + s->above_blocks[s->above_block_idx[b]].not_null_dc = s->coeff_ctx[vp56_b6to4[b]][0]; + } +} + +static void vp5_default_models_init(vp56_context_t *s) +{ + int i; + + for (i=0; i<2; i++) { + s->vector_model_sig[i] = 0x80; + s->vector_model_dct[i] = 0x80; + s->vector_model_pdi[i][0] = 0x55; + s->vector_model_pdi[i][1] = 0x80; + } + memcpy(s->mb_types_stats, vp56_def_mb_types_stats, sizeof(s->mb_types_stats)); + memset(s->vector_model_pdv, 0x80, sizeof(s->vector_model_pdv)); +} + +static int vp5_decode_init(AVCodecContext *avctx) +{ + vp56_context_t *s = avctx->priv_data; + + vp56_init(s, avctx, 1); + s->vp56_coord_div = vp5_coord_div; + s->parse_vector_adjustment = vp5_parse_vector_adjustment; + s->adjust = vp5_adjust; + s->parse_coeff = vp5_parse_coeff; + s->default_models_init = vp5_default_models_init; + s->parse_vector_models = vp5_parse_vector_models; + s->parse_coeff_models = vp5_parse_coeff_models; + s->parse_header = vp5_parse_header; + + return 0; +} + +AVCodec vp5_decoder = { + "vp5", + CODEC_TYPE_VIDEO, + CODEC_ID_VP5, + sizeof(vp56_context_t), + vp5_decode_init, + NULL, + vp56_free, + vp56_decode_frame, +}; diff --git a/src/libffmpeg/libavcodec/vp56.c b/src/libffmpeg/libavcodec/vp56.c new file mode 100644 index 000000000..eb78d02e4 --- /dev/null +++ b/src/libffmpeg/libavcodec/vp56.c @@ -0,0 +1,665 @@ +/** + * @file vp56.c + * VP5 and VP6 compatible video decoder (common features) + * + * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "avcodec.h" + +#include "vp56.h" +#include "vp56data.h" + + +void vp56_init_dequant(vp56_context_t *s, int quantizer) +{ + s->quantizer = quantizer; + s->dequant_dc = vp56_dc_dequant[quantizer] << 2; + s->dequant_ac = vp56_ac_dequant[quantizer] << 2; +} + +static int vp56_get_vectors_predictors(vp56_context_t *s, int row, int col, + vp56_frame_t ref_frame) +{ + int nb_pred = 0; + vp56_mv_t vect[2] = {{0,0}, {0,0}}; + int pos, offset; + vp56_mv_t mvp; + + for (pos=0; pos<12; pos++) { + mvp.x = col + vp56_candidate_predictor_pos[pos][0]; + mvp.y = row + vp56_candidate_predictor_pos[pos][1]; + if (mvp.x < 0 || mvp.x >= s->mb_width || + mvp.y < 0 || mvp.y >= s->mb_height) + continue; + offset = mvp.x + s->mb_width*mvp.y; + + if (vp56_reference_frame[s->macroblocks[offset].type] != ref_frame) + continue; + if ((s->macroblocks[offset].mv.x == vect[0].x && + s->macroblocks[offset].mv.y == vect[0].y) || + (s->macroblocks[offset].mv.x == 0 && + s->macroblocks[offset].mv.y == 0)) + continue; + + vect[nb_pred++] = s->macroblocks[offset].mv; + if (nb_pred > 1) { + nb_pred = -1; + break; + } + s->vector_candidate_pos = pos; + } + + s->vector_candidate[0] = vect[0]; + s->vector_candidate[1] = vect[1]; + + return nb_pred+1; +} + +static void vp56_parse_mb_type_models(vp56_context_t *s) +{ + vp56_range_coder_t *c = &s->c; + int i, ctx, type; + + for (ctx=0; ctx<3; ctx++) { + if (vp56_rac_get_prob(c, 174)) { + int idx = vp56_rac_gets(c, 4); + memcpy(s->mb_types_stats[ctx],vp56_pre_def_mb_type_stats[idx][ctx], + sizeof(s->mb_types_stats[ctx])); + } + if (vp56_rac_get_prob(c, 254)) { + for (type=0; type<10; type++) { + for(i=0; i<2; i++) { + if (vp56_rac_get_prob(c, 205)) { + int delta, sign = vp56_rac_get(c); + + delta = vp56_rac_get_tree(c, vp56_pmbtm_tree, + vp56_mb_type_model_model); + if (!delta) + delta = 4 * vp56_rac_gets(c, 7); + s->mb_types_stats[ctx][type][i] += (delta ^ -sign) + sign; + } + } + } + } + } + + /* compute MB type probability tables based on previous MB type */ + for (ctx=0; ctx<3; ctx++) { + int p[10]; + + for (type=0; type<10; type++) + p[type] = 100 * s->mb_types_stats[ctx][type][1]; + + for (type=0; type<10; type++) { + int p02, p34, p0234, p17, p56, p89, p5689, p156789; + + /* conservative MB type probability */ + s->mb_type_model[ctx][type][0] = 255 - (255 * s->mb_types_stats[ctx][type][0]) / (1 + s->mb_types_stats[ctx][type][0] + s->mb_types_stats[ctx][type][1]); + + p[type] = 0; /* same MB type => weight is null */ + + /* binary tree parsing probabilities */ + p02 = p[0] + p[2]; + p34 = p[3] + p[4]; + p0234 = p02 + p34; + p17 = p[1] + p[7]; + p56 = p[5] + p[6]; + p89 = p[8] + p[9]; + p5689 = p56 + p89; + p156789 = p17 + p5689; + + s->mb_type_model[ctx][type][1] = 1 + 255 * p0234/(1+p0234+p156789); + s->mb_type_model[ctx][type][2] = 1 + 255 * p02 / (1+p0234); + s->mb_type_model[ctx][type][3] = 1 + 255 * p17 / (1+p156789); + s->mb_type_model[ctx][type][4] = 1 + 255 * p[0] / (1+p02); + s->mb_type_model[ctx][type][5] = 1 + 255 * p[3] / (1+p34); + s->mb_type_model[ctx][type][6] = 1 + 255 * p[1] / (1+p17); + s->mb_type_model[ctx][type][7] = 1 + 255 * p56 / (1+p5689); + s->mb_type_model[ctx][type][8] = 1 + 255 * p[5] / (1+p56); + s->mb_type_model[ctx][type][9] = 1 + 255 * p[8] / (1+p89); + + /* restore initial value */ + p[type] = 100 * s->mb_types_stats[ctx][type][1]; + } + } +} + +static vp56_mb_t vp56_parse_mb_type(vp56_context_t *s, + vp56_mb_t prev_type, int ctx) +{ + uint8_t *mb_type_model = s->mb_type_model[ctx][prev_type]; + vp56_range_coder_t *c = &s->c; + + if (vp56_rac_get_prob(c, mb_type_model[0])) + return prev_type; + else + return vp56_rac_get_tree(c, vp56_pmbt_tree, mb_type_model); +} + +static void vp56_decode_4mv(vp56_context_t *s, int row, int col) +{ + vp56_mv_t mv = {0,0}; + int type[4]; + int b; + + /* parse each block type */ + for (b=0; b<4; b++) { + type[b] = vp56_rac_gets(&s->c, 2); + if (type[b]) + type[b]++; /* only returns 0, 2, 3 or 4 (all INTER_PF) */ + } + + /* get vectors */ + for (b=0; b<4; b++) { + switch (type[b]) { + case VP56_MB_INTER_NOVEC_PF: + s->mv[b] = (vp56_mv_t) {0,0}; + break; + case VP56_MB_INTER_DELTA_PF: + s->parse_vector_adjustment(s, &s->mv[b]); + break; + case VP56_MB_INTER_V1_PF: + s->mv[b] = s->vector_candidate[0]; + break; + case VP56_MB_INTER_V2_PF: + s->mv[b] = s->vector_candidate[1]; + break; + } + mv.x += s->mv[b].x; + mv.y += s->mv[b].y; + } + + /* this is the one selected for the whole MB for prediction */ + s->macroblocks[row * s->mb_width + col].mv = s->mv[3]; + + /* chroma vectors are average luma vectors */ + if (s->avctx->codec->id == CODEC_ID_VP5) { + s->mv[4].x = s->mv[5].x = RSHIFT(mv.x,2); + s->mv[4].y = s->mv[5].y = RSHIFT(mv.y,2); + } else { + s->mv[4] = s->mv[5] = (vp56_mv_t) {mv.x/4, mv.y/4}; + } +} + +static vp56_mb_t vp56_decode_mv(vp56_context_t *s, int row, int col) +{ + vp56_mv_t *mv, vect = {0,0}; + int ctx, b; + + ctx = vp56_get_vectors_predictors(s, row, col, VP56_FRAME_PREVIOUS); + s->mb_type = vp56_parse_mb_type(s, s->mb_type, ctx); + s->macroblocks[row * s->mb_width + col].type = s->mb_type; + + switch (s->mb_type) { + case VP56_MB_INTER_V1_PF: + mv = &s->vector_candidate[0]; + break; + + case VP56_MB_INTER_V2_PF: + mv = &s->vector_candidate[1]; + break; + + case VP56_MB_INTER_V1_GF: + vp56_get_vectors_predictors(s, row, col, VP56_FRAME_GOLDEN); + mv = &s->vector_candidate[0]; + break; + + case VP56_MB_INTER_V2_GF: + vp56_get_vectors_predictors(s, row, col, VP56_FRAME_GOLDEN); + mv = &s->vector_candidate[1]; + break; + + case VP56_MB_INTER_DELTA_PF: + s->parse_vector_adjustment(s, &vect); + mv = &vect; + break; + + case VP56_MB_INTER_DELTA_GF: + vp56_get_vectors_predictors(s, row, col, VP56_FRAME_GOLDEN); + s->parse_vector_adjustment(s, &vect); + mv = &vect; + break; + + case VP56_MB_INTER_4V: + vp56_decode_4mv(s, row, col); + return s->mb_type; + + default: + mv = &vect; + break; + } + + s->macroblocks[row*s->mb_width + col].mv = *mv; + + /* same vector for all blocks */ + for (b=0; b<6; b++) + s->mv[b] = *mv; + + return s->mb_type; +} + +static void vp56_add_predictors_dc(vp56_context_t *s, vp56_frame_t ref_frame) +{ + int idx = s->scantable.permutated[0]; + int i; + + for (i=0; i<6; i++) { + vp56_ref_dc_t *ab = &s->above_blocks[s->above_block_idx[i]]; + vp56_ref_dc_t *lb = &s->left_block[vp56_b6to4[i]]; + int count = 0; + int dc = 0; + + if (ref_frame == lb->ref_frame) { + dc += lb->dc_coeff; + count++; + } + if (ref_frame == ab->ref_frame) { + dc += ab->dc_coeff; + count++; + } + if (s->avctx->codec->id == CODEC_ID_VP5) { + if (count < 2 && ref_frame == ab[-1].ref_frame) { + dc += ab[-1].dc_coeff; + count++; + } + if (count < 2 && ref_frame == ab[1].ref_frame) { + dc += ab[1].dc_coeff; + count++; + } + } + if (count == 0) + dc = s->prev_dc[vp56_b6to3[i]][ref_frame]; + else if (count == 2) + dc /= 2; + + s->block_coeff[i][idx] += dc; + s->prev_dc[vp56_b6to3[i]][ref_frame] = s->block_coeff[i][idx]; + ab->dc_coeff = s->block_coeff[i][idx]; + ab->ref_frame = ref_frame; + lb->dc_coeff = s->block_coeff[i][idx]; + lb->ref_frame = ref_frame; + s->block_coeff[i][idx] *= s->dequant_dc; + } +} + +static void vp56_edge_filter(vp56_context_t *s, uint8_t *yuv, + int pix_inc, int line_inc, int t) +{ + int pix2_inc = 2 * pix_inc; + int i, v; + + for (i=0; i<12; i++) { + v = (yuv[-pix2_inc] + 3*(yuv[0]-yuv[-pix_inc]) - yuv[pix_inc] + 4) >>3; + v = s->adjust(v, t); + yuv[-pix_inc] = clip_uint8(yuv[-pix_inc] + v); + yuv[0] = clip_uint8(yuv[0] - v); + yuv += line_inc; + } +} + +static void vp56_deblock_filter(vp56_context_t *s, uint8_t *yuv, + int stride, int dx, int dy) +{ + int t = vp56_filter_threshold[s->quantizer]; + if (dx) vp56_edge_filter(s, yuv + 10-dx , 1, stride, t); + if (dy) vp56_edge_filter(s, yuv + stride*(10-dy), stride, 1, t); +} + +static void vp56_mc(vp56_context_t *s, int b, uint8_t *src, + int stride, int x, int y) +{ + int plane = vp56_b6to3[b]; + uint8_t *dst= s->frames[VP56_FRAME_CURRENT].data[plane]+s->block_offset[b]; + uint8_t *src_block; + int src_offset; + int overlap_offset = 0; + int mask = s->vp56_coord_div[b] - 1; + int deblock_filtering = s->deblock_filtering; + int dx; + int dy; + + if (s->avctx->skip_loop_filter >= AVDISCARD_ALL || + (s->avctx->skip_loop_filter >= AVDISCARD_NONKEY + && !s->frames[VP56_FRAME_CURRENT].key_frame)) + deblock_filtering = 0; + + dx = s->mv[b].x / s->vp56_coord_div[b]; + dy = s->mv[b].y / s->vp56_coord_div[b]; + + if (b >= 4) { + x /= 2; + y /= 2; + } + x += dx - 2; + y += dy - 2; + + if (x<0 || x+12>=s->plane_width[plane] || + y<0 || y+12>=s->plane_height[plane]) { + ff_emulated_edge_mc(s->edge_emu_buffer, + src + s->block_offset[b] + (dy-2)*stride + (dx-2), + stride, 12, 12, x, y, + s->plane_width[plane], + s->plane_height[plane]); + src_block = s->edge_emu_buffer; + src_offset = 2 + 2*stride; + } else if (deblock_filtering) { + /* only need a 12x12 block, but there is no such dsp function, */ + /* so copy a 16x12 block */ + s->dsp.put_pixels_tab[0][0](s->edge_emu_buffer, + src + s->block_offset[b] + (dy-2)*stride + (dx-2), + stride, 12); + src_block = s->edge_emu_buffer; + src_offset = 2 + 2*stride; + } else { + src_block = src; + src_offset = s->block_offset[b] + dy*stride + dx; + } + + if (deblock_filtering) + vp56_deblock_filter(s, src_block, stride, dx&7, dy&7); + + if (s->mv[b].x & mask) + overlap_offset += (s->mv[b].x > 0) ? 1 : -1; + if (s->mv[b].y & mask) + overlap_offset += (s->mv[b].y > 0) ? stride : -stride; + + if (overlap_offset) { + if (s->filter) + s->filter(s, dst, src_block, src_offset, src_offset+overlap_offset, + stride, s->mv[b], mask, s->filter_selection, b<4); + else + s->dsp.put_no_rnd_pixels_l2[1](dst, src_block+src_offset, + src_block+src_offset+overlap_offset, + stride, 8); + } else { + s->dsp.put_pixels_tab[1][0](dst, src_block+src_offset, stride, 8); + } +} + +static void vp56_decode_mb(vp56_context_t *s, int row, int col) +{ + AVFrame *frame_current, *frame_ref; + vp56_mb_t mb_type; + vp56_frame_t ref_frame; + int b, plan, off; + + if (s->frames[VP56_FRAME_CURRENT].key_frame) + mb_type = VP56_MB_INTRA; + else + mb_type = vp56_decode_mv(s, row, col); + ref_frame = vp56_reference_frame[mb_type]; + + memset(s->block_coeff, 0, sizeof(s->block_coeff)); + + s->parse_coeff(s); + + vp56_add_predictors_dc(s, ref_frame); + + frame_current = &s->frames[VP56_FRAME_CURRENT]; + frame_ref = &s->frames[ref_frame]; + + switch (mb_type) { + case VP56_MB_INTRA: + for (b=0; b<6; b++) { + plan = vp56_b6to3[b]; + s->dsp.idct_put(frame_current->data[plan] + s->block_offset[b], + s->stride[plan], s->block_coeff[b]); + } + break; + + case VP56_MB_INTER_NOVEC_PF: + case VP56_MB_INTER_NOVEC_GF: + for (b=0; b<6; b++) { + plan = vp56_b6to3[b]; + off = s->block_offset[b]; + s->dsp.put_pixels_tab[1][0](frame_current->data[plan] + off, + frame_ref->data[plan] + off, + s->stride[plan], 8); + s->dsp.idct_add(frame_current->data[plan] + off, + s->stride[plan], s->block_coeff[b]); + } + break; + + case VP56_MB_INTER_DELTA_PF: + case VP56_MB_INTER_V1_PF: + case VP56_MB_INTER_V2_PF: + case VP56_MB_INTER_DELTA_GF: + case VP56_MB_INTER_4V: + case VP56_MB_INTER_V1_GF: + case VP56_MB_INTER_V2_GF: + for (b=0; b<6; b++) { + int x_off = b==1 || b==3 ? 8 : 0; + int y_off = b==2 || b==3 ? 8 : 0; + plan = vp56_b6to3[b]; + vp56_mc(s, b, frame_ref->data[plan], s->stride[plan], + 16*col+x_off, 16*row+y_off); + s->dsp.idct_add(frame_current->data[plan] + s->block_offset[b], + s->stride[plan], s->block_coeff[b]); + } + break; + } +} + +static int vp56_size_changed(AVCodecContext *avctx, vp56_context_t *s) +{ + int stride = s->frames[VP56_FRAME_CURRENT].linesize[0]; + int i; + + s->plane_width[0] = s->avctx->coded_width; + s->plane_width[1] = s->plane_width[2] = s->avctx->coded_width/2; + s->plane_height[0] = s->avctx->coded_height; + s->plane_height[1] = s->plane_height[2] = s->avctx->coded_height/2; + + for (i=0; i<3; i++) + s->stride[i] = s->flip * s->frames[VP56_FRAME_CURRENT].linesize[i]; + + s->mb_width = (s->avctx->coded_width+15) / 16; + s->mb_height = (s->avctx->coded_height+15) / 16; + + if (s->mb_width > 1000 || s->mb_height > 1000) { + av_log(avctx, AV_LOG_ERROR, "picture too big\n"); + return -1; + } + + s->above_blocks = av_realloc(s->above_blocks, + (4*s->mb_width+6) * sizeof(*s->above_blocks)); + s->macroblocks = av_realloc(s->macroblocks, + s->mb_width*s->mb_height*sizeof(*s->macroblocks)); + av_free(s->edge_emu_buffer_alloc); + s->edge_emu_buffer_alloc = av_malloc(16*stride); + s->edge_emu_buffer = s->edge_emu_buffer_alloc; + if (s->flip < 0) + s->edge_emu_buffer += 15 * stride; + + return 0; +} + +int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + vp56_context_t *s = avctx->priv_data; + AVFrame *const p = &s->frames[VP56_FRAME_CURRENT]; + AVFrame *picture = data; + int mb_row, mb_col, mb_row_flip, mb_offset = 0; + int block, y, uv, stride_y, stride_uv; + int golden_frame = 0; + int res; + + res = s->parse_header(s, buf, buf_size, &golden_frame); + if (!res) + return -1; + + p->reference = 1; + if (avctx->get_buffer(avctx, p) < 0) { + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + + if (res == 2) + if (vp56_size_changed(avctx, s)) { + avctx->release_buffer(avctx, p); + return -1; + } + + if (p->key_frame) { + p->pict_type = FF_I_TYPE; + s->default_models_init(s); + for (block=0; block<s->mb_height*s->mb_width; block++) + s->macroblocks[block].type = VP56_MB_INTRA; + } else { + p->pict_type = FF_P_TYPE; + vp56_parse_mb_type_models(s); + s->parse_vector_models(s); + s->mb_type = VP56_MB_INTER_NOVEC_PF; + } + + s->parse_coeff_models(s); + + memset(s->prev_dc, 0, sizeof(s->prev_dc)); + s->prev_dc[1][VP56_FRAME_CURRENT] = 128; + s->prev_dc[2][VP56_FRAME_CURRENT] = 128; + + for (block=0; block < 4*s->mb_width+6; block++) { + s->above_blocks[block].ref_frame = -1; + s->above_blocks[block].dc_coeff = 0; + s->above_blocks[block].not_null_dc = 0; + } + s->above_blocks[2*s->mb_width + 2].ref_frame = 0; + s->above_blocks[3*s->mb_width + 4].ref_frame = 0; + + stride_y = p->linesize[0]; + stride_uv = p->linesize[1]; + + if (s->flip < 0) + mb_offset = 7; + + /* main macroblocks loop */ + for (mb_row=0; mb_row<s->mb_height; mb_row++) { + if (s->flip < 0) + mb_row_flip = s->mb_height - mb_row - 1; + else + mb_row_flip = mb_row; + + for (block=0; block<4; block++) { + s->left_block[block].ref_frame = -1; + s->left_block[block].dc_coeff = 0; + s->left_block[block].not_null_dc = 0; + memset(s->coeff_ctx[block], 0, 64*sizeof(s->coeff_ctx[block][0])); + } + memset(s->coeff_ctx_last, 24, sizeof(s->coeff_ctx_last)); + + s->above_block_idx[0] = 1; + s->above_block_idx[1] = 2; + s->above_block_idx[2] = 1; + s->above_block_idx[3] = 2; + s->above_block_idx[4] = 2*s->mb_width + 2 + 1; + s->above_block_idx[5] = 3*s->mb_width + 4 + 1; + + s->block_offset[s->frbi] = (mb_row_flip*16 + mb_offset) * stride_y; + s->block_offset[s->srbi] = s->block_offset[s->frbi] + 8*stride_y; + s->block_offset[1] = s->block_offset[0] + 8; + s->block_offset[3] = s->block_offset[2] + 8; + s->block_offset[4] = (mb_row_flip*8 + mb_offset) * stride_uv; + s->block_offset[5] = s->block_offset[4]; + + for (mb_col=0; mb_col<s->mb_width; mb_col++) { + vp56_decode_mb(s, mb_row, mb_col); + + for (y=0; y<4; y++) { + s->above_block_idx[y] += 2; + s->block_offset[y] += 16; + } + + for (uv=4; uv<6; uv++) { + s->above_block_idx[uv] += 1; + s->block_offset[uv] += 8; + } + } + } + + if (s->frames[VP56_FRAME_PREVIOUS].data[0] + && (s->frames[VP56_FRAME_PREVIOUS].data[0] + != s->frames[VP56_FRAME_GOLDEN].data[0])) { + avctx->release_buffer(avctx, &s->frames[VP56_FRAME_PREVIOUS]); + } + if (p->key_frame || golden_frame) { + if (s->frames[VP56_FRAME_GOLDEN].data[0]) + avctx->release_buffer(avctx, &s->frames[VP56_FRAME_GOLDEN]); + s->frames[VP56_FRAME_GOLDEN] = *p; + } + s->frames[VP56_FRAME_PREVIOUS] = *p; + + *picture = *p; + *data_size = sizeof(AVPicture); + + return buf_size; +} + +void vp56_init(vp56_context_t *s, AVCodecContext *avctx, int flip) +{ + int i; + + s->avctx = avctx; + avctx->pix_fmt = PIX_FMT_YUV420P; + + if (s->avctx->idct_algo == FF_IDCT_AUTO) + s->avctx->idct_algo = FF_IDCT_VP3; + dsputil_init(&s->dsp, s->avctx); + ff_init_scantable(s->dsp.idct_permutation, &s->scantable,ff_zigzag_direct); + + avcodec_set_dimensions(s->avctx, 0, 0); + + for (i=0; i<3; i++) + s->frames[i].data[0] = NULL; + s->edge_emu_buffer_alloc = NULL; + + s->above_blocks = NULL; + s->macroblocks = NULL; + s->quantizer = -1; + s->deblock_filtering = 1; + + s->filter = NULL; + + if (flip) { + s->flip = -1; + s->frbi = 2; + s->srbi = 0; + } else { + s->flip = 1; + s->frbi = 0; + s->srbi = 2; + } +} + +int vp56_free(AVCodecContext *avctx) +{ + vp56_context_t *s = avctx->priv_data; + + av_free(s->above_blocks); + av_free(s->macroblocks); + av_free(s->edge_emu_buffer_alloc); + if (s->frames[VP56_FRAME_GOLDEN].data[0] + && (s->frames[VP56_FRAME_PREVIOUS].data[0] + != s->frames[VP56_FRAME_GOLDEN].data[0])) + avctx->release_buffer(avctx, &s->frames[VP56_FRAME_GOLDEN]); + if (s->frames[VP56_FRAME_PREVIOUS].data[0]) + avctx->release_buffer(avctx, &s->frames[VP56_FRAME_PREVIOUS]); + return 0; +} diff --git a/src/libffmpeg/libavcodec/vp56.h b/src/libffmpeg/libavcodec/vp56.h new file mode 100644 index 000000000..f8b3a8e4b --- /dev/null +++ b/src/libffmpeg/libavcodec/vp56.h @@ -0,0 +1,249 @@ +/** + * @file vp56.h + * VP5 and VP6 compatible video decoder (common features) + * + * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef VP56_H +#define VP56_H + +#include "vp56data.h" +#include "dsputil.h" +#include "mpegvideo.h" + + +typedef struct vp56_context vp56_context_t; +typedef struct vp56_mv vp56_mv_t; + +typedef void (*vp56_parse_vector_adjustment_t)(vp56_context_t *s, + vp56_mv_t *vect); +typedef int (*vp56_adjust_t)(int v, int t); +typedef void (*vp56_filter_t)(vp56_context_t *s, uint8_t *dst, uint8_t *src, + int offset1, int offset2, int stride, + vp56_mv_t mv, int mask, int select, int luma); +typedef void (*vp56_parse_coeff_t)(vp56_context_t *s); +typedef void (*vp56_default_models_init_t)(vp56_context_t *s); +typedef void (*vp56_parse_vector_models_t)(vp56_context_t *s); +typedef void (*vp56_parse_coeff_models_t)(vp56_context_t *s); +typedef int (*vp56_parse_header_t)(vp56_context_t *s, uint8_t *buf, + int buf_size, int *golden_frame); + +typedef struct { + int high; + int bits; + const uint8_t *buffer; + unsigned long code_word; +} vp56_range_coder_t; + +typedef struct { + uint8_t not_null_dc; + vp56_frame_t ref_frame; + DCTELEM dc_coeff; +} vp56_ref_dc_t; + +struct vp56_mv { + int x; + int y; +}; + +typedef struct { + uint8_t type; + vp56_mv_t mv; +} vp56_macroblock_t; + +struct vp56_context { + AVCodecContext *avctx; + DSPContext dsp; + ScanTable scantable; + AVFrame frames[3]; + uint8_t *edge_emu_buffer_alloc; + uint8_t *edge_emu_buffer; + vp56_range_coder_t c; + int sub_version; + + /* frame info */ + int plane_width[3]; + int plane_height[3]; + int mb_width; /* number of horizontal MB */ + int mb_height; /* number of vertical MB */ + int block_offset[6]; + + int quantizer; + uint16_t dequant_dc; + uint16_t dequant_ac; + + /* DC predictors management */ + vp56_ref_dc_t *above_blocks; + vp56_ref_dc_t left_block[4]; + int above_block_idx[6]; + DCTELEM prev_dc[3][3]; /* [plan][ref_frame] */ + + /* blocks / macroblock */ + vp56_mb_t mb_type; + vp56_macroblock_t *macroblocks; + DECLARE_ALIGNED_16(DCTELEM, block_coeff[6][64]); + uint8_t coeff_reorder[64]; /* used in vp6 only */ + uint8_t coeff_index_to_pos[64]; /* used in vp6 only */ + + /* motion vectors */ + vp56_mv_t mv[6]; /* vectors for each block in MB */ + vp56_mv_t vector_candidate[2]; + int vector_candidate_pos; + + /* filtering hints */ + int deblock_filtering; + int filter_selection; + int filter_mode; + int max_vector_length; + int sample_variance_threshold; + + /* AC models */ + uint8_t vector_model_sig[2]; /* delta sign */ + uint8_t vector_model_dct[2]; /* delta coding types */ + uint8_t vector_model_pdi[2][2]; /* predefined delta init */ + uint8_t vector_model_pdv[2][7]; /* predefined delta values */ + uint8_t vector_model_fdv[2][8]; /* 8 bit delta value definition */ + uint8_t mb_type_model[3][10][10]; /* model for decoding MB type */ + uint8_t coeff_model_dccv[2][11]; /* DC coeff value */ + uint8_t coeff_model_ract[2][3][6][11]; /* Run/AC coding type and AC coeff value */ + uint8_t coeff_model_acct[2][3][3][6][5];/* vp5 only AC coding type for coding group < 3 */ + uint8_t coeff_model_dcct[2][36][5]; /* DC coeff coding type */ + uint8_t coeff_model_runv[2][14]; /* run value (vp6 only) */ + uint8_t mb_types_stats[3][10][2]; /* contextual, next MB type stats */ + uint8_t coeff_ctx[4][64]; /* used in vp5 only */ + uint8_t coeff_ctx_last[4]; /* used in vp5 only */ + + /* upside-down flipping hints */ + int flip; /* are we flipping ? */ + int frbi; /* first row block index in MB */ + int srbi; /* second row block index in MB */ + int stride[3]; /* stride for each plan */ + + const uint8_t *vp56_coord_div; + vp56_parse_vector_adjustment_t parse_vector_adjustment; + vp56_adjust_t adjust; + vp56_filter_t filter; + vp56_parse_coeff_t parse_coeff; + vp56_default_models_init_t default_models_init; + vp56_parse_vector_models_t parse_vector_models; + vp56_parse_coeff_models_t parse_coeff_models; + vp56_parse_header_t parse_header; +}; + + +void vp56_init(vp56_context_t *s, AVCodecContext *avctx, int flip); +int vp56_free(AVCodecContext *avctx); +void vp56_init_dequant(vp56_context_t *s, int quantizer); +int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size, + uint8_t *buf, int buf_size); + + +/** + * vp56 specific range coder implementation + */ + +static inline void vp56_init_range_decoder(vp56_range_coder_t *c, + const uint8_t *buf, int buf_size) +{ + c->high = 255; + c->bits = 8; + c->buffer = buf; + c->code_word = *c->buffer++ << 8; + c->code_word |= *c->buffer++; +} + +static inline int vp56_rac_get_prob(vp56_range_coder_t *c, uint8_t prob) +{ + unsigned int low = 1 + (((c->high - 1) * prob) / 256); + unsigned int low_shift = low << 8; + int bit = c->code_word >= low_shift; + + if (bit) { + c->high -= low; + c->code_word -= low_shift; + } else { + c->high = low; + } + + /* normalize */ + while (c->high < 128) { + c->high <<= 1; + c->code_word <<= 1; + if (--c->bits == 0) { + c->bits = 8; + c->code_word |= *c->buffer++; + } + } + return bit; +} + +static inline int vp56_rac_get(vp56_range_coder_t *c) +{ + /* equiprobable */ + int low = (c->high + 1) >> 1; + unsigned int low_shift = low << 8; + int bit = c->code_word >= low_shift; + if (bit) { + c->high = (c->high - low) << 1; + c->code_word -= low_shift; + } else { + c->high = low << 1; + } + + /* normalize */ + c->code_word <<= 1; + if (--c->bits == 0) { + c->bits = 8; + c->code_word |= *c->buffer++; + } + return bit; +} + +static inline int vp56_rac_gets(vp56_range_coder_t *c, int bits) +{ + int value = 0; + + while (bits--) { + value = (value << 1) | vp56_rac_get(c); + } + + return value; +} + +static inline int vp56_rac_gets_nn(vp56_range_coder_t *c, int bits) +{ + int v = vp56_rac_gets(c, 7) << 1; + return v + !v; +} + +static inline int vp56_rac_get_tree(vp56_range_coder_t *c, + const vp56_tree_t *tree, + const uint8_t *probs) +{ + while (tree->val > 0) { + if (vp56_rac_get_prob(c, probs[tree->prob_idx])) + tree += tree->val; + else + tree++; + } + return -tree->val; +} + +#endif /* VP56_H */ diff --git a/src/libffmpeg/libavcodec/vp56data.c b/src/libffmpeg/libavcodec/vp56data.c new file mode 100644 index 000000000..e75c6d1ce --- /dev/null +++ b/src/libffmpeg/libavcodec/vp56data.c @@ -0,0 +1,66 @@ +/** + * @file vp56data.c + * VP5 and VP6 compatible video decoder (common data) + * + * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vp56data.h" + +const uint8_t vp56_b6to3[] = { 0, 0, 0, 0, 1, 2 }; +const uint8_t vp56_b6to4[] = { 0, 0, 1, 1, 2, 3 }; + +const uint8_t vp56_coeff_parse_table[6][11] = { + { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0 }, + { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0 }, + { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254 }, +}; + +const uint8_t vp56_def_mb_types_stats[3][10][2] = { + { { 69, 42 }, { 1, 2 }, { 1, 7 }, { 44, 42 }, { 6, 22 }, + { 1, 3 }, { 0, 2 }, { 1, 5 }, { 0, 1 }, { 0, 0 }, }, + { { 229, 8 }, { 1, 1 }, { 0, 8 }, { 0, 0 }, { 0, 0 }, + { 1, 2 }, { 0, 1 }, { 0, 0 }, { 1, 1 }, { 0, 0 }, }, + { { 122, 35 }, { 1, 1 }, { 1, 6 }, { 46, 34 }, { 0, 0 }, + { 1, 2 }, { 0, 1 }, { 0, 1 }, { 1, 1 }, { 0, 0 }, }, +}; + +const vp56_tree_t vp56_pva_tree[] = { + { 8, 0}, + { 4, 1}, + { 2, 2}, {-0}, {-1}, + { 2, 3}, {-2}, {-3}, + { 4, 4}, + { 2, 5}, {-4}, {-5}, + { 2, 6}, {-6}, {-7}, +}; + +const vp56_tree_t vp56_pc_tree[] = { + { 4, 6}, + { 2, 7}, {-0}, {-1}, + { 4, 8}, + { 2, 9}, {-2}, {-3}, + { 2,10}, {-4}, {-5}, +}; + +const uint8_t vp56_coeff_bias[] = { 5, 7, 11, 19, 35, 67 }; +const uint8_t vp56_coeff_bit_length[] = { 0, 1, 2, 3, 4, 10 }; diff --git a/src/libffmpeg/libavcodec/vp56data.h b/src/libffmpeg/libavcodec/vp56data.h new file mode 100644 index 000000000..dbf92dd68 --- /dev/null +++ b/src/libffmpeg/libavcodec/vp56data.h @@ -0,0 +1,248 @@ +/** + * @file vp56data.h + * VP5 and VP6 compatible video decoder (common data) + * + * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef VP56DATA_H +#define VP56DATA_H + +#include "common.h" + +typedef enum { + VP56_FRAME_CURRENT = 0, + VP56_FRAME_PREVIOUS = 1, + VP56_FRAME_GOLDEN = 2, +} vp56_frame_t; + +typedef enum { + VP56_MB_INTER_NOVEC_PF = 0, /**< Inter MB, no vector, from previous frame */ + VP56_MB_INTRA = 1, /**< Intra MB */ + VP56_MB_INTER_DELTA_PF = 2, /**< Inter MB, above/left vector + delta, from previous frame */ + VP56_MB_INTER_V1_PF = 3, /**< Inter MB, first vector, from previous frame */ + VP56_MB_INTER_V2_PF = 4, /**< Inter MB, second vector, from previous frame */ + VP56_MB_INTER_NOVEC_GF = 5, /**< Inter MB, no vector, from golden frame */ + VP56_MB_INTER_DELTA_GF = 6, /**< Inter MB, above/left vector + delta, from golden frame */ + VP56_MB_INTER_4V = 7, /**< Inter MB, 4 vectors, from previous frame */ + VP56_MB_INTER_V1_GF = 8, /**< Inter MB, first vector, from golden frame */ + VP56_MB_INTER_V2_GF = 9, /**< Inter MB, second vector, from golden frame */ +} vp56_mb_t; + +typedef struct { + int8_t val; + int8_t prob_idx; +} vp56_tree_t; + +extern const uint8_t vp56_b6to3[]; +extern const uint8_t vp56_b6to4[]; +extern const uint8_t vp56_coeff_parse_table[6][11]; +extern const uint8_t vp56_def_mb_types_stats[3][10][2]; +extern const vp56_tree_t vp56_pva_tree[]; +extern const vp56_tree_t vp56_pc_tree[]; +extern const uint8_t vp56_coeff_bias[]; +extern const uint8_t vp56_coeff_bit_length[]; + +static const vp56_frame_t vp56_reference_frame[] = { + VP56_FRAME_PREVIOUS, /* VP56_MB_INTER_NOVEC_PF */ + VP56_FRAME_CURRENT, /* VP56_MB_INTRA */ + VP56_FRAME_PREVIOUS, /* VP56_MB_INTER_DELTA_PF */ + VP56_FRAME_PREVIOUS, /* VP56_MB_INTER_V1_PF */ + VP56_FRAME_PREVIOUS, /* VP56_MB_INTER_V2_PF */ + VP56_FRAME_GOLDEN, /* VP56_MB_INTER_NOVEC_GF */ + VP56_FRAME_GOLDEN, /* VP56_MB_INTER_DELTA_GF */ + VP56_FRAME_PREVIOUS, /* VP56_MB_INTER_4V */ + VP56_FRAME_GOLDEN, /* VP56_MB_INTER_V1_GF */ + VP56_FRAME_GOLDEN, /* VP56_MB_INTER_V2_GF */ +}; + +static const uint8_t vp56_ac_dequant[64] = { + 94, 92, 90, 88, 86, 82, 78, 74, + 70, 66, 62, 58, 54, 53, 52, 51, + 50, 49, 48, 47, 46, 45, 44, 43, + 42, 40, 39, 37, 36, 35, 34, 33, + 32, 31, 30, 29, 28, 27, 26, 25, + 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, + 8, 7, 6, 5, 4, 3, 2, 1, +}; + +static const uint8_t vp56_dc_dequant[64] = { + 47, 47, 47, 47, 45, 43, 43, 43, + 43, 43, 42, 41, 41, 40, 40, 40, + 40, 35, 35, 35, 35, 33, 33, 33, + 33, 32, 32, 32, 27, 27, 26, 26, + 25, 25, 24, 24, 23, 23, 19, 19, + 19, 19, 18, 18, 17, 16, 16, 16, + 16, 16, 15, 11, 11, 11, 10, 10, + 9, 8, 7, 5, 3, 3, 2, 2, +}; + +static const uint8_t vp56_pre_def_mb_type_stats[16][3][10][2] = { + { { { 9, 15 }, { 32, 25 }, { 7, 19 }, { 9, 21 }, { 1, 12 }, + { 14, 12 }, { 3, 18 }, { 14, 23 }, { 3, 10 }, { 0, 4 }, }, + { { 41, 22 }, { 1, 0 }, { 1, 31 }, { 0, 0 }, { 0, 0 }, + { 0, 1 }, { 1, 7 }, { 0, 1 }, { 98, 25 }, { 4, 10 }, }, + { { 2, 3 }, { 2, 3 }, { 0, 2 }, { 0, 2 }, { 0, 0 }, + { 11, 4 }, { 1, 4 }, { 0, 2 }, { 3, 2 }, { 0, 4 }, }, }, + { { { 48, 39 }, { 1, 2 }, { 11, 27 }, { 29, 44 }, { 7, 27 }, + { 1, 4 }, { 0, 3 }, { 1, 6 }, { 1, 2 }, { 0, 0 }, }, + { { 123, 37 }, { 6, 4 }, { 1, 27 }, { 0, 0 }, { 0, 0 }, + { 5, 8 }, { 1, 7 }, { 0, 1 }, { 12, 10 }, { 0, 2 }, }, + { { 49, 46 }, { 3, 4 }, { 7, 31 }, { 42, 41 }, { 0, 0 }, + { 2, 6 }, { 1, 7 }, { 1, 4 }, { 2, 4 }, { 0, 1 }, }, }, + { { { 21, 32 }, { 1, 2 }, { 4, 10 }, { 32, 43 }, { 6, 23 }, + { 2, 3 }, { 1, 19 }, { 1, 6 }, { 12, 21 }, { 0, 7 }, }, + { { 26, 14 }, { 14, 12 }, { 0, 24 }, { 0, 0 }, { 0, 0 }, + { 55, 17 }, { 1, 9 }, { 0, 36 }, { 5, 7 }, { 1, 3 }, }, + { { 26, 25 }, { 1, 1 }, { 2, 10 }, { 67, 39 }, { 0, 0 }, + { 1, 1 }, { 0, 14 }, { 0, 2 }, { 31, 26 }, { 1, 6 }, }, }, + { { { 69, 83 }, { 0, 0 }, { 0, 2 }, { 10, 29 }, { 3, 12 }, + { 0, 1 }, { 0, 3 }, { 0, 3 }, { 2, 2 }, { 0, 0 }, }, + { { 209, 5 }, { 0, 0 }, { 0, 27 }, { 0, 0 }, { 0, 0 }, + { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 0 }, { 0, 0 }, }, + { { 103, 46 }, { 1, 2 }, { 2, 10 }, { 33, 42 }, { 0, 0 }, + { 1, 4 }, { 0, 3 }, { 0, 1 }, { 1, 3 }, { 0, 0 }, }, }, + { { { 11, 20 }, { 1, 4 }, { 18, 36 }, { 43, 48 }, { 13, 35 }, + { 0, 2 }, { 0, 5 }, { 3, 12 }, { 1, 2 }, { 0, 0 }, }, + { { 2, 5 }, { 4, 5 }, { 0, 121 }, { 0, 0 }, { 0, 0 }, + { 0, 3 }, { 2, 4 }, { 1, 4 }, { 2, 2 }, { 0, 1 }, }, + { { 14, 31 }, { 9, 13 }, { 14, 54 }, { 22, 29 }, { 0, 0 }, + { 2, 6 }, { 4, 18 }, { 6, 13 }, { 1, 5 }, { 0, 1 }, }, }, + { { { 70, 44 }, { 0, 1 }, { 2, 10 }, { 37, 46 }, { 8, 26 }, + { 0, 2 }, { 0, 2 }, { 0, 2 }, { 0, 1 }, { 0, 0 }, }, + { { 175, 5 }, { 0, 1 }, { 0, 48 }, { 0, 0 }, { 0, 0 }, + { 0, 2 }, { 0, 1 }, { 0, 2 }, { 0, 1 }, { 0, 0 }, }, + { { 85, 39 }, { 0, 0 }, { 1, 9 }, { 69, 40 }, { 0, 0 }, + { 0, 1 }, { 0, 3 }, { 0, 1 }, { 2, 3 }, { 0, 0 }, }, }, + { { { 8, 15 }, { 0, 1 }, { 8, 21 }, { 74, 53 }, { 22, 42 }, + { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 2 }, { 0, 0 }, }, + { { 83, 5 }, { 2, 3 }, { 0, 102 }, { 0, 0 }, { 0, 0 }, + { 1, 3 }, { 0, 2 }, { 0, 1 }, { 0, 0 }, { 0, 0 }, }, + { { 31, 28 }, { 0, 0 }, { 3, 14 }, { 130, 34 }, { 0, 0 }, + { 0, 1 }, { 0, 3 }, { 0, 1 }, { 3, 3 }, { 0, 1 }, }, }, + { { { 141, 42 }, { 0, 0 }, { 1, 4 }, { 11, 24 }, { 1, 11 }, + { 0, 1 }, { 0, 1 }, { 0, 2 }, { 0, 0 }, { 0, 0 }, }, + { { 233, 6 }, { 0, 0 }, { 0, 8 }, { 0, 0 }, { 0, 0 }, + { 0, 1 }, { 0, 1 }, { 0, 0 }, { 0, 1 }, { 0, 0 }, }, + { { 171, 25 }, { 0, 0 }, { 1, 5 }, { 25, 21 }, { 0, 0 }, + { 0, 1 }, { 0, 1 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, }, }, + { { { 8, 19 }, { 4, 10 }, { 24, 45 }, { 21, 37 }, { 9, 29 }, + { 0, 3 }, { 1, 7 }, { 11, 25 }, { 0, 2 }, { 0, 1 }, }, + { { 34, 16 }, { 112, 21 }, { 1, 28 }, { 0, 0 }, { 0, 0 }, + { 6, 8 }, { 1, 7 }, { 0, 3 }, { 2, 5 }, { 0, 2 }, }, + { { 17, 21 }, { 68, 29 }, { 6, 15 }, { 13, 22 }, { 0, 0 }, + { 6, 12 }, { 3, 14 }, { 4, 10 }, { 1, 7 }, { 0, 3 }, }, }, + { { { 46, 42 }, { 0, 1 }, { 2, 10 }, { 54, 51 }, { 10, 30 }, + { 0, 2 }, { 0, 2 }, { 0, 1 }, { 0, 1 }, { 0, 0 }, }, + { { 159, 35 }, { 2, 2 }, { 0, 25 }, { 0, 0 }, { 0, 0 }, + { 3, 6 }, { 0, 5 }, { 0, 1 }, { 4, 4 }, { 0, 1 }, }, + { { 51, 39 }, { 0, 1 }, { 2, 12 }, { 91, 44 }, { 0, 0 }, + { 0, 2 }, { 0, 3 }, { 0, 1 }, { 2, 3 }, { 0, 1 }, }, }, + { { { 28, 32 }, { 0, 0 }, { 3, 10 }, { 75, 51 }, { 14, 33 }, + { 0, 1 }, { 0, 2 }, { 0, 1 }, { 1, 2 }, { 0, 0 }, }, + { { 75, 39 }, { 5, 7 }, { 2, 48 }, { 0, 0 }, { 0, 0 }, + { 3, 11 }, { 2, 16 }, { 1, 4 }, { 7, 10 }, { 0, 2 }, }, + { { 81, 25 }, { 0, 0 }, { 2, 9 }, { 106, 26 }, { 0, 0 }, + { 0, 1 }, { 0, 1 }, { 0, 1 }, { 1, 1 }, { 0, 0 }, }, }, + { { { 100, 46 }, { 0, 1 }, { 3, 9 }, { 21, 37 }, { 5, 20 }, + { 0, 1 }, { 0, 2 }, { 1, 2 }, { 0, 1 }, { 0, 0 }, }, + { { 212, 21 }, { 0, 1 }, { 0, 9 }, { 0, 0 }, { 0, 0 }, + { 1, 2 }, { 0, 2 }, { 0, 0 }, { 2, 2 }, { 0, 0 }, }, + { { 140, 37 }, { 0, 1 }, { 1, 8 }, { 24, 33 }, { 0, 0 }, + { 1, 2 }, { 0, 2 }, { 0, 1 }, { 1, 2 }, { 0, 0 }, }, }, + { { { 27, 29 }, { 0, 1 }, { 9, 25 }, { 53, 51 }, { 12, 34 }, + { 0, 1 }, { 0, 3 }, { 1, 5 }, { 0, 2 }, { 0, 0 }, }, + { { 4, 2 }, { 0, 0 }, { 0, 172 }, { 0, 0 }, { 0, 0 }, + { 0, 1 }, { 0, 2 }, { 0, 0 }, { 2, 0 }, { 0, 0 }, }, + { { 14, 23 }, { 1, 3 }, { 11, 53 }, { 90, 31 }, { 0, 0 }, + { 0, 3 }, { 1, 5 }, { 2, 6 }, { 1, 2 }, { 0, 0 }, }, }, + { { { 80, 38 }, { 0, 0 }, { 1, 4 }, { 69, 33 }, { 5, 16 }, + { 0, 1 }, { 0, 1 }, { 0, 0 }, { 0, 1 }, { 0, 0 }, }, + { { 187, 22 }, { 1, 1 }, { 0, 17 }, { 0, 0 }, { 0, 0 }, + { 3, 6 }, { 0, 4 }, { 0, 1 }, { 4, 4 }, { 0, 1 }, }, + { { 123, 29 }, { 0, 0 }, { 1, 7 }, { 57, 30 }, { 0, 0 }, + { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 0 }, }, }, + { { { 16, 20 }, { 0, 0 }, { 2, 8 }, { 104, 49 }, { 15, 33 }, + { 0, 1 }, { 0, 1 }, { 0, 1 }, { 1, 1 }, { 0, 0 }, }, + { { 133, 6 }, { 1, 2 }, { 1, 70 }, { 0, 0 }, { 0, 0 }, + { 0, 2 }, { 0, 4 }, { 0, 3 }, { 1, 1 }, { 0, 0 }, }, + { { 13, 14 }, { 0, 0 }, { 4, 20 }, { 175, 20 }, { 0, 0 }, + { 0, 1 }, { 0, 1 }, { 0, 1 }, { 1, 1 }, { 0, 0 }, }, }, + { { { 194, 16 }, { 0, 0 }, { 1, 1 }, { 1, 9 }, { 1, 3 }, + { 0, 0 }, { 0, 1 }, { 0, 1 }, { 0, 0 }, { 0, 0 }, }, + { { 251, 1 }, { 0, 0 }, { 0, 2 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, }, + { { 202, 23 }, { 0, 0 }, { 1, 3 }, { 2, 9 }, { 0, 0 }, + { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 0 }, { 0, 0 }, }, }, +}; + +static const uint8_t vp56_filter_threshold[] = { + 14, 14, 13, 13, 12, 12, 10, 10, + 10, 10, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 7, 7, 7, 7, + 7, 7, 6, 6, 6, 6, 6, 6, + 5, 5, 5, 5, 4, 4, 4, 4, + 4, 4, 4, 3, 3, 3, 3, 2, +}; + +static const uint8_t vp56_mb_type_model_model[] = { + 171, 83, 199, 140, 125, 104, +}; + +static const vp56_tree_t vp56_pmbtm_tree[] = { + { 4, 0}, + { 2, 1}, {-8}, {-4}, + { 8, 2}, + { 6, 3}, + { 4, 4}, + { 2, 5}, {-24}, {-20}, {-16}, {-12}, {-0}, +}; + +static const vp56_tree_t vp56_pmbt_tree[] = { + { 8, 1}, + { 4, 2}, + { 2, 4}, {-VP56_MB_INTER_NOVEC_PF}, {-VP56_MB_INTER_DELTA_PF}, + { 2, 5}, {-VP56_MB_INTER_V1_PF}, {-VP56_MB_INTER_V2_PF}, + { 4, 3}, + { 2, 6}, {-VP56_MB_INTRA}, {-VP56_MB_INTER_4V}, + { 4, 7}, + { 2, 8}, {-VP56_MB_INTER_NOVEC_GF}, {-VP56_MB_INTER_DELTA_GF}, + { 2, 9}, {-VP56_MB_INTER_V1_GF}, {-VP56_MB_INTER_V2_GF}, +}; + +/* relative pos of surrounding blocks, from closest to farthest */ +static const int8_t vp56_candidate_predictor_pos[12][2] = { + { 0, -1 }, + { -1, 0 }, + { -1, -1 }, + { 1, -1 }, + { 0, -2 }, + { -2, 0 }, + { -2, -1 }, + { -1, -2 }, + { 1, -2 }, + { 2, -1 }, + { -2, -2 }, + { 2, -2 }, +}; + +#endif /* VP56DATA */ diff --git a/src/libffmpeg/libavcodec/vp5data.h b/src/libffmpeg/libavcodec/vp5data.h new file mode 100644 index 000000000..effc17c2c --- /dev/null +++ b/src/libffmpeg/libavcodec/vp5data.h @@ -0,0 +1,173 @@ +/** + * @file vp5data.h + * VP5 compatible video decoder + * + * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef VP5DATA_H +#define VP5DATA_H + +static const uint8_t vp5_coeff_groups[] = { + -1, 0, 1, 1, 2, 1, 1, 2, + 2, 1, 1, 2, 2, 2, 1, 2, + 2, 2, 2, 2, 1, 1, 2, 2, + 3, 3, 4, 3, 4, 4, 4, 3, + 3, 3, 3, 3, 4, 3, 3, 3, + 4, 4, 4, 4, 4, 3, 3, 4, + 4, 4, 3, 4, 4, 4, 4, 4, + 4, 4, 5, 5, 5, 5, 5, 5, +}; + +static const uint8_t vp5_vmc_pct[2][11] = { + { 243, 220, 251, 253, 237, 232, 241, 245, 247, 251, 253 }, + { 235, 211, 246, 249, 234, 231, 248, 249, 252, 252, 254 }, +}; + +static const uint8_t vp5_dccv_pct[2][11] = { + { 146, 197, 181, 207, 232, 243, 238, 251, 244, 250, 249 }, + { 179, 219, 214, 240, 250, 254, 244, 254, 254, 254, 254 }, +}; + +static const uint8_t vp5_ract_pct[3][2][6][11] = { + { { { 227, 246, 230, 247, 244, 254, 254, 254, 254, 254, 254 }, + { 202, 254, 209, 231, 231, 249, 249, 253, 254, 254, 254 }, + { 206, 254, 225, 242, 241, 251, 253, 254, 254, 254, 254 }, + { 235, 254, 241, 253, 252, 254, 254, 254, 254, 254, 254 }, + { 234, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254 }, + { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } }, + { { 240, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254 }, + { 238, 254, 240, 253, 254, 254, 254, 254, 254, 254, 254 }, + { 244, 254, 251, 254, 254, 254, 254, 254, 254, 254, 254 }, + { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 }, + { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 }, + { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } } }, + { { { 206, 203, 227, 239, 247, 254, 253, 254, 254, 254, 254 }, + { 207, 199, 220, 236, 243, 252, 252, 254, 254, 254, 254 }, + { 212, 219, 230, 243, 244, 253, 252, 254, 254, 254, 254 }, + { 236, 237, 247, 252, 253, 254, 254, 254, 254, 254, 254 }, + { 240, 240, 248, 254, 254, 254, 254, 254, 254, 254, 254 }, + { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } }, + { { 230, 233, 249, 254, 254, 254, 254, 254, 254, 254, 254 }, + { 238, 238, 250, 254, 254, 254, 254, 254, 254, 254, 254 }, + { 248, 251, 254, 254, 254, 254, 254, 254, 254, 254, 254 }, + { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 }, + { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 }, + { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } } }, + { { { 225, 239, 227, 231, 244, 253, 243, 254, 254, 253, 254 }, + { 232, 234, 224, 228, 242, 249, 242, 252, 251, 251, 254 }, + { 235, 249, 238, 240, 251, 254, 249, 254, 253, 253, 254 }, + { 249, 253, 251, 250, 254, 254, 254, 254, 254, 254, 254 }, + { 251, 250, 249, 254, 254, 254, 254, 254, 254, 254, 254 }, + { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } }, + { { 243, 244, 250, 250, 254, 254, 254, 254, 254, 254, 254 }, + { 249, 248, 250, 253, 254, 254, 254, 254, 254, 254, 254 }, + { 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 }, + { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 }, + { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 }, + { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } } }, +}; + +static const int16_t vp5_dccv_lc[5][36][2] = { + { {154, 61}, {141, 54}, { 90, 45}, { 54, 34}, { 54, 13}, {128, 109}, + {136, 54}, {148, 45}, { 92, 41}, { 54, 33}, { 51, 15}, { 87, 113}, + { 87, 44}, { 97, 40}, { 67, 36}, { 46, 29}, { 41, 15}, { 64, 80}, + { 59, 33}, { 61, 31}, { 51, 28}, { 44, 22}, { 33, 12}, { 49, 63}, + { 69, 12}, { 59, 16}, { 46, 14}, { 31, 13}, { 26, 6}, { 92, 26}, + {128, 108}, { 77, 119}, { 54, 84}, { 26, 71}, { 87, 19}, { 95, 155} }, + { {154, 4}, {182, 0}, {159, -8}, {128, -5}, {143, -5}, {187, 55}, + {182, 0}, {228, -3}, {187, -7}, {174, -9}, {189, -11}, {169, 79}, + {161, -9}, {192, -8}, {187, -9}, {169, -10}, {136, -9}, {184, 40}, + {164, -11}, {179, -10}, {174, -10}, {161, -10}, {115, -7}, {197, 20}, + {195, -11}, {195, -11}, {146, -10}, {110, -6}, { 95, -4}, {195, 39}, + {182, 55}, {172, 77}, {177, 37}, {169, 29}, {172, 52}, { 92, 162} }, + { {174, 80}, {164, 80}, { 95, 80}, { 46, 66}, { 56, 24}, { 36, 193}, + {164, 80}, {166, 77}, {105, 76}, { 49, 68}, { 46, 31}, { 49, 186}, + { 97, 78}, {110, 74}, { 72, 72}, { 44, 60}, { 33, 30}, { 69, 131}, + { 61, 61}, { 69, 63}, { 51, 57}, { 31, 48}, { 26, 27}, { 64, 89}, + { 67, 23}, { 51, 32}, { 36, 33}, { 26, 28}, { 20, 12}, { 44, 68}, + { 26, 197}, { 41, 189}, { 61, 129}, { 28, 103}, { 49, 52}, {-12, 245} }, + { {102, 141}, { 79, 166}, { 72, 162}, { 97, 125}, {179, 4}, {307, 0}, + { 72, 168}, { 69, 175}, { 84, 160}, {105, 127}, {148, 34}, {310, 0}, + { 84, 151}, { 82, 161}, { 87, 153}, { 87, 135}, {115, 51}, {317, 0}, + { 97, 125}, {102, 131}, {105, 125}, { 87, 122}, { 84, 64}, { 54, 184}, + {166, 18}, {146, 43}, {125, 51}, { 90, 64}, { 95, 7}, { 38, 154}, + {294, 0}, { 13, 225}, { 10, 225}, { 67, 168}, { 0, 167}, {161, 94} }, + { {172, 76}, {172, 75}, {136, 80}, { 64, 98}, { 74, 67}, {315, 0}, + {169, 76}, {207, 56}, {164, 66}, { 97, 80}, { 67, 72}, {328, 0}, + {136, 80}, {187, 53}, {154, 62}, { 72, 85}, { -2, 105}, {305, 0}, + { 74, 91}, {128, 64}, {113, 64}, { 61, 77}, { 41, 75}, {259, 0}, + { 46, 84}, { 51, 81}, { 28, 89}, { 31, 78}, { 23, 77}, {202, 0}, + {323, 0}, {323, 0}, {300, 0}, {236, 0}, {195, 0}, {328, 0} }, +}; + +static const int16_t vp5_ract_lc[3][3][5][6][2] = { + { { { {276, 0}, {238, 0}, {195, 0}, {156, 0}, {113, 0}, {274, 0} }, + { { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1} }, + { {192, 59}, {182, 50}, {141, 48}, {110, 40}, { 92, 19}, {125,128} }, + { {169, 87}, {169, 83}, {184, 62}, {220, 16}, {184, 0}, {264, 0} }, + { {212, 40}, {212, 36}, {169, 49}, {174, 27}, { 8,120}, {182, 71} } }, + { { {259, 10}, {197, 19}, {143, 22}, {123, 16}, {110, 8}, {133, 88} }, + { { 0, 1}, {256, 0}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1} }, + { {207, 46}, {187, 50}, { 97, 83}, { 23,100}, { 41, 56}, { 56,188} }, + { {166, 90}, {146,108}, {161, 88}, {136, 95}, {174, 0}, {266, 0} }, + { {264, 7}, {243, 18}, {184, 43}, {-14,154}, { 20,112}, { 20,199} } }, + { { {230, 26}, {197, 22}, {159, 20}, {146, 12}, {136, 4}, { 54,162} }, + { { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1} }, + { {192, 59}, {156, 72}, { 84,101}, { 49,101}, { 79, 47}, { 79,167} }, + { {138,115}, {136,116}, {166, 80}, {238, 0}, {195, 0}, {261, 0} }, + { {225, 33}, {205, 42}, {159, 61}, { 79, 96}, { 92, 66}, { 28,195} } }, + }, { + { { {200, 37}, {197, 18}, {159, 13}, {143, 7}, {102, 5}, {123,126} }, + { {197, 3}, {220, -9}, {210,-12}, {187, -6}, {151, -2}, {174, 80} }, + { {200, 53}, {187, 47}, {159, 40}, {118, 38}, {100, 18}, {141,111} }, + { {179, 78}, {166, 86}, {197, 50}, {207, 27}, {187, 0}, {115,139} }, + { {218, 34}, {220, 29}, {174, 46}, {128, 61}, { 54, 89}, {187, 65} } }, + { { {238, 14}, {197, 18}, {125, 26}, { 90, 25}, { 82, 13}, {161, 86} }, + { {189, 1}, {205, -2}, {156, -4}, {143, -4}, {146, -4}, {172, 72} }, + { {230, 31}, {192, 45}, {102, 76}, { 38, 85}, { 56, 41}, { 64,173} }, + { {166, 91}, {141,111}, {128,116}, {118,109}, {177, 0}, { 23,222} }, + { {253, 14}, {236, 21}, {174, 49}, { 33,118}, { 44, 93}, { 23,187} } }, + { { {218, 28}, {179, 28}, {118, 35}, { 95, 30}, { 72, 24}, {128,108} }, + { {187, 1}, {174, -1}, {125, -1}, {110, -1}, {108, -1}, {202, 52} }, + { {197, 53}, {146, 75}, { 46,118}, { 33,103}, { 64, 50}, {118,126} }, + { {138,114}, {128,122}, {161, 86}, {243, -6}, {195, 0}, { 38,210} }, + { {215, 39}, {179, 58}, { 97,101}, { 95, 85}, { 87, 70}, { 69,152} } }, + }, { + { { {236, 24}, {205, 18}, {172, 12}, {154, 6}, {125, 1}, {169, 75} }, + { {187, 4}, {230, -2}, {228, -4}, {236, -4}, {241, -2}, {192, 66} }, + { {200, 46}, {187, 42}, {159, 34}, {136, 25}, {105, 10}, {179, 62} }, + { {207, 55}, {192, 63}, {192, 54}, {195, 36}, {177, 1}, {143, 98} }, + { {225, 27}, {207, 34}, {200, 30}, {131, 57}, { 97, 60}, {197, 45} } }, + { { {271, 8}, {218, 13}, {133, 19}, { 90, 19}, { 72, 7}, {182, 51} }, + { {179, 1}, {225, -1}, {154, -2}, {110, -1}, { 92, 0}, {195, 41} }, + { {241, 26}, {189, 40}, { 82, 64}, { 33, 60}, { 67, 17}, {120, 94} }, + { {192, 68}, {151, 94}, {146, 90}, {143, 72}, {161, 0}, {113,128} }, + { {256, 12}, {218, 29}, {166, 48}, { 44, 99}, { 31, 87}, {148, 78} } }, + { { {238, 20}, {184, 22}, {113, 27}, { 90, 22}, { 74, 9}, {192, 37} }, + { {184, 0}, {215, -1}, {141, -1}, { 97, 0}, { 49, 0}, {264, 13} }, + { {182, 51}, {138, 61}, { 95, 63}, { 54, 59}, { 64, 25}, {200, 45} }, + { {179, 75}, {156, 87}, {174, 65}, {177, 44}, {174, 0}, {164, 85} }, + { {195, 45}, {148, 65}, {105, 79}, { 95, 72}, { 87, 60}, {169, 63} } }, + } +}; + +static const uint8_t vp5_coord_div[] = { 2, 2, 2, 2, 4, 4 }; + +#endif /* VP5DATA_H */ diff --git a/src/libffmpeg/libavcodec/vp6.c b/src/libffmpeg/libavcodec/vp6.c new file mode 100644 index 000000000..381fcc8ee --- /dev/null +++ b/src/libffmpeg/libavcodec/vp6.c @@ -0,0 +1,537 @@ +/** + * @file vp6.c + * VP6 compatible video decoder + * + * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * + * The VP6F decoder accept an optional 1 byte extradata. It is composed of: + * - upper 4bits: difference between encoded width and visible width + * - lower 4bits: difference between encoded height and visible height + */ + +#include <stdlib.h> + +#include "avcodec.h" +#include "dsputil.h" +#include "bitstream.h" +#include "mpegvideo.h" + +#include "vp56.h" +#include "vp56data.h" +#include "vp6data.h" + + +static int vp6_parse_header(vp56_context_t *s, uint8_t *buf, int buf_size, + int *golden_frame) +{ + vp56_range_coder_t *c = &s->c; + int parse_filter_info = 0; + int vrt_shift = 0; + int sub_version; + int rows, cols; + int res = 1; + + if (buf[0] & 1) + return 0; + + s->frames[VP56_FRAME_CURRENT].key_frame = !(buf[0] & 0x80); + vp56_init_dequant(s, (buf[0] >> 1) & 0x3F); + + if (s->frames[VP56_FRAME_CURRENT].key_frame) { + sub_version = buf[1] >> 3; + if (sub_version > 8) + return 0; + if ((buf[1] & 0x06) != 0x06) + return 0; + if (buf[1] & 1) { + av_log(s->avctx, AV_LOG_ERROR, "interlacing not supported\n"); + return 0; + } + + rows = buf[2]; /* number of stored macroblock rows */ + cols = buf[3]; /* number of stored macroblock cols */ + /* buf[4] is number of displayed macroblock rows */ + /* buf[5] is number of displayed macroblock cols */ + + if (16*cols != s->avctx->coded_width || + 16*rows != s->avctx->coded_height) { + avcodec_set_dimensions(s->avctx, 16*cols, 16*rows); + if (s->avctx->extradata_size == 1) { + s->avctx->width -= s->avctx->extradata[0] >> 4; + s->avctx->height -= s->avctx->extradata[0] & 0x0F; + } + res = 2; + } + + vp56_init_range_decoder(c, buf+6, buf_size-6); + vp56_rac_gets(c, 2); + + parse_filter_info = 1; + if (sub_version < 8) + vrt_shift = 5; + s->sub_version = sub_version; + } else { + if (!s->sub_version) + return 0; + + vp56_init_range_decoder(c, buf+1, buf_size-1); + + *golden_frame = vp56_rac_get(c); + s->deblock_filtering = vp56_rac_get(c); + if (s->deblock_filtering) + vp56_rac_get(c); + if (s->sub_version > 7) + parse_filter_info = vp56_rac_get(c); + } + + if (parse_filter_info) { + if (vp56_rac_get(c)) { + s->filter_mode = 2; + s->sample_variance_threshold = vp56_rac_gets(c, 5) << vrt_shift; + s->max_vector_length = 2 << vp56_rac_gets(c, 3); + } else if (vp56_rac_get(c)) { + s->filter_mode = 1; + } else { + s->filter_mode = 0; + } + if (s->sub_version > 7) + s->filter_selection = vp56_rac_gets(c, 4); + else + s->filter_selection = 16; + } + + vp56_rac_get(c); + return res; +} + +static void vp6_coeff_order_table_init(vp56_context_t *s) +{ + int i, pos, idx = 1; + + s->coeff_index_to_pos[0] = 0; + for (i=0; i<16; i++) + for (pos=1; pos<64; pos++) + if (s->coeff_reorder[pos] == i) + s->coeff_index_to_pos[idx++] = pos; +} + +static void vp6_default_models_init(vp56_context_t *s) +{ + s->vector_model_dct[0] = 0xA2; + s->vector_model_dct[1] = 0xA4; + s->vector_model_sig[0] = 0x80; + s->vector_model_sig[1] = 0x80; + + memcpy(s->mb_types_stats, vp56_def_mb_types_stats, sizeof(s->mb_types_stats)); + memcpy(s->vector_model_fdv, vp6_def_fdv_vector_model, sizeof(s->vector_model_fdv)); + memcpy(s->vector_model_pdv, vp6_def_pdv_vector_model, sizeof(s->vector_model_pdv)); + memcpy(s->coeff_model_runv, vp6_def_runv_coeff_model, sizeof(s->coeff_model_runv)); + memcpy(s->coeff_reorder, vp6_def_coeff_reorder, sizeof(s->coeff_reorder)); + + vp6_coeff_order_table_init(s); +} + +static void vp6_parse_vector_models(vp56_context_t *s) +{ + vp56_range_coder_t *c = &s->c; + int comp, node; + + for (comp=0; comp<2; comp++) { + if (vp56_rac_get_prob(c, vp6_sig_dct_pct[comp][0])) + s->vector_model_dct[comp] = vp56_rac_gets_nn(c, 7); + if (vp56_rac_get_prob(c, vp6_sig_dct_pct[comp][1])) + s->vector_model_sig[comp] = vp56_rac_gets_nn(c, 7); + } + + for (comp=0; comp<2; comp++) + for (node=0; node<7; node++) + if (vp56_rac_get_prob(c, vp6_pdv_pct[comp][node])) + s->vector_model_pdv[comp][node] = vp56_rac_gets_nn(c, 7); + + for (comp=0; comp<2; comp++) + for (node=0; node<8; node++) + if (vp56_rac_get_prob(c, vp6_fdv_pct[comp][node])) + s->vector_model_fdv[comp][node] = vp56_rac_gets_nn(c, 7); +} + +static void vp6_parse_coeff_models(vp56_context_t *s) +{ + vp56_range_coder_t *c = &s->c; + int def_prob[11]; + int node, cg, ctx, pos; + int ct; /* code type */ + int pt; /* plane type (0 for Y, 1 for U or V) */ + + memset(def_prob, 0x80, sizeof(def_prob)); + + for (pt=0; pt<2; pt++) + for (node=0; node<11; node++) + if (vp56_rac_get_prob(c, vp6_dccv_pct[pt][node])) { + def_prob[node] = vp56_rac_gets_nn(c, 7); + s->coeff_model_dccv[pt][node] = def_prob[node]; + } else if (s->frames[VP56_FRAME_CURRENT].key_frame) { + s->coeff_model_dccv[pt][node] = def_prob[node]; + } + + if (vp56_rac_get(c)) { + for (pos=1; pos<64; pos++) + if (vp56_rac_get_prob(c, vp6_coeff_reorder_pct[pos])) + s->coeff_reorder[pos] = vp56_rac_gets(c, 4); + vp6_coeff_order_table_init(s); + } + + for (cg=0; cg<2; cg++) + for (node=0; node<14; node++) + if (vp56_rac_get_prob(c, vp6_runv_pct[cg][node])) + s->coeff_model_runv[cg][node] = vp56_rac_gets_nn(c, 7); + + for (ct=0; ct<3; ct++) + for (pt=0; pt<2; pt++) + for (cg=0; cg<6; cg++) + for (node=0; node<11; node++) + if (vp56_rac_get_prob(c, vp6_ract_pct[ct][pt][cg][node])) { + def_prob[node] = vp56_rac_gets_nn(c, 7); + s->coeff_model_ract[pt][ct][cg][node] = def_prob[node]; + } else if (s->frames[VP56_FRAME_CURRENT].key_frame) { + s->coeff_model_ract[pt][ct][cg][node] = def_prob[node]; + } + + /* coeff_model_dcct is a linear combination of coeff_model_dccv */ + for (pt=0; pt<2; pt++) + for (ctx=0; ctx<3; ctx++) + for (node=0; node<5; node++) + s->coeff_model_dcct[pt][ctx][node] = clip(((s->coeff_model_dccv[pt][node] * vp6_dccv_lc[ctx][node][0] + 128) >> 8) + vp6_dccv_lc[ctx][node][1], 1, 255); +} + +static void vp6_parse_vector_adjustment(vp56_context_t *s, vp56_mv_t *vect) +{ + vp56_range_coder_t *c = &s->c; + int comp; + + *vect = (vp56_mv_t) {0,0}; + if (s->vector_candidate_pos < 2) + *vect = s->vector_candidate[0]; + + for (comp=0; comp<2; comp++) { + int i, delta = 0; + + if (vp56_rac_get_prob(c, s->vector_model_dct[comp])) { + static const uint8_t prob_order[] = {0, 1, 2, 7, 6, 5, 4}; + for (i=0; i<sizeof(prob_order); i++) { + int j = prob_order[i]; + delta |= vp56_rac_get_prob(c, s->vector_model_fdv[comp][j])<<j; + } + if (delta & 0xF0) + delta |= vp56_rac_get_prob(c, s->vector_model_fdv[comp][3])<<3; + else + delta |= 8; + } else { + delta = vp56_rac_get_tree(c, vp56_pva_tree, + s->vector_model_pdv[comp]); + } + + if (delta && vp56_rac_get_prob(c, s->vector_model_sig[comp])) + delta = -delta; + + if (!comp) + vect->x += delta; + else + vect->y += delta; + } +} + +static void vp6_parse_coeff(vp56_context_t *s) +{ + vp56_range_coder_t *c = &s->c; + uint8_t *permute = s->scantable.permutated; + uint8_t *model, *model2, *model3; + int coeff, sign, coeff_idx; + int b, i, cg, idx, ctx; + int pt = 0; /* plane type (0 for Y, 1 for U or V) */ + + for (b=0; b<6; b++) { + int ct = 1; /* code type */ + int run = 1; + + if (b > 3) pt = 1; + + ctx = s->left_block[vp56_b6to4[b]].not_null_dc + + s->above_blocks[s->above_block_idx[b]].not_null_dc; + model = s->coeff_model_dccv[pt]; + model2 = s->coeff_model_dcct[pt][ctx]; + + for (coeff_idx=0; coeff_idx<64; ) { + if ((coeff_idx>1 && ct==0) || vp56_rac_get_prob(c, model2[0])) { + /* parse a coeff */ + if (coeff_idx == 0) { + s->left_block[vp56_b6to4[b]].not_null_dc = 1; + s->above_blocks[s->above_block_idx[b]].not_null_dc = 1; + } + + if (vp56_rac_get_prob(c, model2[2])) { + if (vp56_rac_get_prob(c, model2[3])) { + idx = vp56_rac_get_tree(c, vp56_pc_tree, model); + coeff = vp56_coeff_bias[idx]; + for (i=vp56_coeff_bit_length[idx]; i>=0; i--) + coeff += vp56_rac_get_prob(c, vp56_coeff_parse_table[idx][i]) << i; + } else { + if (vp56_rac_get_prob(c, model2[4])) + coeff = 3 + vp56_rac_get_prob(c, model[5]); + else + coeff = 2; + } + ct = 2; + } else { + ct = 1; + coeff = 1; + } + sign = vp56_rac_get(c); + coeff = (coeff ^ -sign) + sign; + if (coeff_idx) + coeff *= s->dequant_ac; + idx = s->coeff_index_to_pos[coeff_idx]; + s->block_coeff[b][permute[idx]] = coeff; + run = 1; + } else { + /* parse a run */ + ct = 0; + if (coeff_idx == 0) { + s->left_block[vp56_b6to4[b]].not_null_dc = 0; + s->above_blocks[s->above_block_idx[b]].not_null_dc = 0; + } else { + if (!vp56_rac_get_prob(c, model2[1])) + break; + + model3 = s->coeff_model_runv[coeff_idx >= 6]; + run = vp56_rac_get_tree(c, vp6_pcr_tree, model3); + if (!run) + for (run=9, i=0; i<6; i++) + run += vp56_rac_get_prob(c, model3[i+8]) << i; + } + } + + cg = vp6_coeff_groups[coeff_idx+=run]; + model = model2 = s->coeff_model_ract[pt][ct][cg]; + } + } +} + +static int vp6_adjust(int v, int t) +{ + int V = v, s = v >> 31; + V ^= s; + V -= s; + if (V-t-1 >= (unsigned)(t-1)) + return v; + V = 2*t - V; + V += s; + V ^= s; + return V; +} + +static int vp6_block_variance(uint8_t *src, int stride) +{ + int sum = 0, square_sum = 0; + int y, x; + + for (y=0; y<8; y+=2) { + for (x=0; x<8; x+=2) { + sum += src[x]; + square_sum += src[x]*src[x]; + } + src += 2*stride; + } + return (16*square_sum - sum*sum) >> 8; +} + +static void vp6_filter_hv2(vp56_context_t *s, uint8_t *dst, uint8_t *src, + int stride, int delta, int16_t weight) +{ + s->dsp.put_pixels_tab[1][0](dst, src, stride, 8); + s->dsp.biweight_h264_pixels_tab[3](dst, src+delta, stride, 2, + 8-weight, weight, 0); +} + +static void vp6_filter_hv4(uint8_t *dst, uint8_t *src, int stride, + int delta, const int16_t *weights) +{ + int x, y; + + for (y=0; y<8; y++) { + for (x=0; x<8; x++) { + dst[x] = clip_uint8(( src[x-delta ] * weights[0] + + src[x ] * weights[1] + + src[x+delta ] * weights[2] + + src[x+2*delta] * weights[3] + 64) >> 7); + } + src += stride; + dst += stride; + } +} + +static void vp6_filter_diag2(vp56_context_t *s, uint8_t *dst, uint8_t *src, + int stride, int h_weight, int v_weight) +{ + uint8_t *tmp = s->edge_emu_buffer+16; + int x, xmax; + + s->dsp.put_pixels_tab[1][0](tmp, src, stride, 8); + s->dsp.biweight_h264_pixels_tab[3](tmp, src+1, stride, 2, + 8-h_weight, h_weight, 0); + /* we need a 8x9 block to do vertical filter, so compute one more line */ + for (x=8*stride, xmax=x+8; x<xmax; x++) + tmp[x] = (src[x]*(8-h_weight) + src[x+1]*h_weight + 4) >> 3; + + s->dsp.put_pixels_tab[1][0](dst, tmp, stride, 8); + s->dsp.biweight_h264_pixels_tab[3](dst, tmp+stride, stride, 2, + 8-v_weight, v_weight, 0); +} + +static void vp6_filter_diag4(uint8_t *dst, uint8_t *src, int stride, + const int16_t *h_weights,const int16_t *v_weights) +{ + int x, y; + int tmp[8*11]; + int *t = tmp; + + src -= stride; + + for (y=0; y<11; y++) { + for (x=0; x<8; x++) { + t[x] = clip_uint8(( src[x-1] * h_weights[0] + + src[x ] * h_weights[1] + + src[x+1] * h_weights[2] + + src[x+2] * h_weights[3] + 64) >> 7); + } + src += stride; + t += 8; + } + + t = tmp + 8; + for (y=0; y<8; y++) { + for (x=0; x<8; x++) { + dst[x] = clip_uint8(( t[x-8 ] * v_weights[0] + + t[x ] * v_weights[1] + + t[x+8 ] * v_weights[2] + + t[x+16] * v_weights[3] + 64) >> 7); + } + dst += stride; + t += 8; + } +} + +static void vp6_filter(vp56_context_t *s, uint8_t *dst, uint8_t *src, + int offset1, int offset2, int stride, + vp56_mv_t mv, int mask, int select, int luma) +{ + int filter4 = 0; + int x8 = mv.x & mask; + int y8 = mv.y & mask; + + if (luma) { + x8 *= 2; + y8 *= 2; + filter4 = s->filter_mode; + if (filter4 == 2) { + if (s->max_vector_length && + (FFABS(mv.x) > s->max_vector_length || + FFABS(mv.y) > s->max_vector_length)) { + filter4 = 0; + } else if (s->sample_variance_threshold + && (vp6_block_variance(src+offset1, stride) + < s->sample_variance_threshold)) { + filter4 = 0; + } + } + } + + if ((y8 && (offset2-offset1)*s->flip<0) || (!y8 && offset1 > offset2)) { + offset1 = offset2; + } + + if (filter4) { + if (!y8) { /* left or right combine */ + vp6_filter_hv4(dst, src+offset1, stride, 1, + vp6_block_copy_filter[select][x8]); + } else if (!x8) { /* above or below combine */ + vp6_filter_hv4(dst, src+offset1, stride, stride, + vp6_block_copy_filter[select][y8]); + } else if ((mv.x^mv.y) >> 31) { /* lower-left or upper-right combine */ + vp6_filter_diag4(dst, src+offset1-1, stride, + vp6_block_copy_filter[select][x8], + vp6_block_copy_filter[select][y8]); + } else { /* lower-right or upper-left combine */ + vp6_filter_diag4(dst, src+offset1, stride, + vp6_block_copy_filter[select][x8], + vp6_block_copy_filter[select][y8]); + } + } else { + if (!y8) { /* left or right combine */ + vp6_filter_hv2(s, dst, src+offset1, stride, 1, x8); + } else if (!x8) { /* above or below combine */ + vp6_filter_hv2(s, dst, src+offset1, stride, stride, y8); + } else if ((mv.x^mv.y) >> 31) { /* lower-left or upper-right combine */ + vp6_filter_diag2(s, dst, src+offset1-1, stride, x8, y8); + } else { /* lower-right or upper-left combine */ + vp6_filter_diag2(s, dst, src+offset1, stride, x8, y8); + } + } +} + +static int vp6_decode_init(AVCodecContext *avctx) +{ + vp56_context_t *s = avctx->priv_data; + + vp56_init(s, avctx, avctx->codec->id == CODEC_ID_VP6); + s->vp56_coord_div = vp6_coord_div; + s->parse_vector_adjustment = vp6_parse_vector_adjustment; + s->adjust = vp6_adjust; + s->filter = vp6_filter; + s->parse_coeff = vp6_parse_coeff; + s->default_models_init = vp6_default_models_init; + s->parse_vector_models = vp6_parse_vector_models; + s->parse_coeff_models = vp6_parse_coeff_models; + s->parse_header = vp6_parse_header; + + return 0; +} + +AVCodec vp6_decoder = { + "vp6", + CODEC_TYPE_VIDEO, + CODEC_ID_VP6, + sizeof(vp56_context_t), + vp6_decode_init, + NULL, + vp56_free, + vp56_decode_frame, +}; + +/* flash version, not flipped upside-down */ +AVCodec vp6f_decoder = { + "vp6f", + CODEC_TYPE_VIDEO, + CODEC_ID_VP6F, + sizeof(vp56_context_t), + vp6_decode_init, + NULL, + vp56_free, + vp56_decode_frame, +}; diff --git a/src/libffmpeg/libavcodec/vp6data.h b/src/libffmpeg/libavcodec/vp6data.h new file mode 100644 index 000000000..0545a9d66 --- /dev/null +++ b/src/libffmpeg/libavcodec/vp6data.h @@ -0,0 +1,300 @@ +/** + * @file vp6data.h + * VP6 compatible video decoder + * + * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef VP6DATA_H +#define VP6DATA_H + +#include "vp56data.h" + +static const uint8_t vp6_def_fdv_vector_model[2][8] = { + { 247, 210, 135, 68, 138, 220, 239, 246 }, + { 244, 184, 201, 44, 173, 221, 239, 253 }, +}; + +static const uint8_t vp6_def_pdv_vector_model[2][7] = { + { 225, 146, 172, 147, 214, 39, 156 }, + { 204, 170, 119, 235, 140, 230, 228 }, +}; + +static const uint8_t vp6_def_coeff_reorder[] = { + 0, 0, 1, 1, 1, 2, 2, 2, + 2, 2, 2, 3, 3, 4, 4, 4, + 5, 5, 5, 5, 6, 6, 7, 7, + 7, 7, 7, 8, 8, 9, 9, 9, + 9, 9, 9, 10, 10, 11, 11, 11, + 11, 11, 11, 12, 12, 12, 12, 12, + 12, 13, 13, 13, 13, 13, 14, 14, + 14, 14, 15, 15, 15, 15, 15, 15, +}; + +static const uint8_t vp6_def_runv_coeff_model[2][14] = { + { 198, 197, 196, 146, 198, 204, 169, 142, 130, 136, 149, 149, 191, 249 }, + { 135, 201, 181, 154, 98, 117, 132, 126, 146, 169, 184, 240, 246, 254 }, +}; + +static const uint8_t vp6_sig_dct_pct[2][2] = { + { 237, 246 }, + { 231, 243 }, +}; + +static const uint8_t vp6_pdv_pct[2][7] = { + { 253, 253, 254, 254, 254, 254, 254 }, + { 245, 253, 254, 254, 254, 254, 254 }, +}; + +static const uint8_t vp6_fdv_pct[2][8] = { + { 254, 254, 254, 254, 254, 250, 250, 252 }, + { 254, 254, 254, 254, 254, 251, 251, 254 }, +}; + +static const uint8_t vp6_dccv_pct[2][11] = { + { 146, 255, 181, 207, 232, 243, 238, 251, 244, 250, 249 }, + { 179, 255, 214, 240, 250, 255, 244, 255, 255, 255, 255 }, +}; + +static const uint8_t vp6_coeff_reorder_pct[] = { + 255, 132, 132, 159, 153, 151, 161, 170, + 164, 162, 136, 110, 103, 114, 129, 118, + 124, 125, 132, 136, 114, 110, 142, 135, + 134, 123, 143, 126, 153, 183, 166, 161, + 171, 180, 179, 164, 203, 218, 225, 217, + 215, 206, 203, 217, 229, 241, 248, 243, + 253, 255, 253, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, +}; + +static const uint8_t vp6_runv_pct[2][14] = { + { 219, 246, 238, 249, 232, 239, 249, 255, 248, 253, 239, 244, 241, 248 }, + { 198, 232, 251, 253, 219, 241, 253, 255, 248, 249, 244, 238, 251, 255 }, +}; + +static const uint8_t vp6_ract_pct[3][2][6][11] = { + { { { 227, 246, 230, 247, 244, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 209, 231, 231, 249, 249, 253, 255, 255, 255 }, + { 255, 255, 225, 242, 241, 251, 253, 255, 255, 255, 255 }, + { 255, 255, 241, 253, 252, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 248, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } }, + { { 240, 255, 248, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 240, 253, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } } }, + { { { 206, 203, 227, 239, 247, 255, 253, 255, 255, 255, 255 }, + { 207, 199, 220, 236, 243, 252, 252, 255, 255, 255, 255 }, + { 212, 219, 230, 243, 244, 253, 252, 255, 255, 255, 255 }, + { 236, 237, 247, 252, 253, 255, 255, 255, 255, 255, 255 }, + { 240, 240, 248, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } }, + { { 230, 233, 249, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 238, 238, 250, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 248, 251, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } } }, + { { { 225, 239, 227, 231, 244, 253, 243, 255, 255, 253, 255 }, + { 232, 234, 224, 228, 242, 249, 242, 252, 251, 251, 255 }, + { 235, 249, 238, 240, 251, 255, 249, 255, 253, 253, 255 }, + { 249, 253, 251, 250, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 250, 249, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } }, + { { 243, 244, 250, 250, 255, 255, 255, 255, 255, 255, 255 }, + { 249, 248, 250, 253, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } } } +}; + +static const int vp6_dccv_lc[3][5][2] = { + { { 122, 133 }, { 0, 1 }, { 78, 171 }, { 139, 117 }, { 168, 79 } }, + { { 133, 51 }, { 0, 1 }, { 169, 71 }, { 214, 44 }, { 210, 38 } }, + { { 142, -16 }, { 0, 1 }, { 221, -30 }, { 246, -3 }, { 203, 17 } }, +}; + +static const uint8_t vp6_coeff_groups[] = { + 0, 0, 1, 1, 1, 2, 2, 2, + 2, 2, 2, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, +}; + +static const int16_t vp6_block_copy_filter[17][8][4] = { + { { 0, 128, 0, 0 }, /* 0 */ + { -3, 122, 9, 0 }, + { -4, 109, 24, -1 }, + { -5, 91, 45, -3 }, + { -4, 68, 68, -4 }, + { -3, 45, 91, -5 }, + { -1, 24, 109, -4 }, + { 0, 9, 122, -3 } }, + { { 0, 128, 0, 0 }, /* 1 */ + { -4, 124, 9, -1 }, + { -5, 110, 25, -2 }, + { -6, 91, 46, -3 }, + { -5, 69, 69, -5 }, + { -3, 46, 91, -6 }, + { -2, 25, 110, -5 }, + { -1, 9, 124, -4 } }, + { { 0, 128, 0, 0 }, /* 2 */ + { -4, 123, 10, -1 }, + { -6, 110, 26, -2 }, + { -7, 92, 47, -4 }, + { -6, 70, 70, -6 }, + { -4, 47, 92, -7 }, + { -2, 26, 110, -6 }, + { -1, 10, 123, -4 } }, + { { 0, 128, 0, 0 }, /* 3 */ + { -5, 124, 10, -1 }, + { -7, 110, 27, -2 }, + { -7, 91, 48, -4 }, + { -6, 70, 70, -6 }, + { -4, 48, 92, -8 }, + { -2, 27, 110, -7 }, + { -1, 10, 124, -5 } }, + { { 0, 128, 0, 0 }, /* 4 */ + { -6, 124, 11, -1 }, + { -8, 111, 28, -3 }, + { -8, 92, 49, -5 }, + { -7, 71, 71, -7 }, + { -5, 49, 92, -8 }, + { -3, 28, 111, -8 }, + { -1, 11, 124, -6 } }, + { { 0, 128, 0, 0 }, /* 5 */ + { -6, 123, 12, -1 }, + { -9, 111, 29, -3 }, + { -9, 93, 50, -6 }, + { -8, 72, 72, -8 }, + { -6, 50, 93, -9 }, + { -3, 29, 111, -9 }, + { -1, 12, 123, -6 } }, + { { 0, 128, 0, 0 }, /* 6 */ + { -7, 124, 12, -1 }, + { -10, 111, 30, -3 }, + { -10, 93, 51, -6 }, + { -9, 73, 73, -9 }, + { -6, 51, 93, -10 }, + { -3, 30, 111, -10 }, + { -1, 12, 124, -7 } }, + { { 0, 128, 0, 0 }, /* 7 */ + { -7, 123, 13, -1 }, + { -11, 112, 31, -4 }, + { -11, 94, 52, -7 }, + { -10, 74, 74, -10 }, + { -7, 52, 94, -11 }, + { -4, 31, 112, -11 }, + { -1, 13, 123, -7 } }, + { { 0, 128, 0, 0 }, /* 8 */ + { -8, 124, 13, -1 }, + { -12, 112, 32, -4 }, + { -12, 94, 53, -7 }, + { -10, 74, 74, -10 }, + { -7, 53, 94, -12 }, + { -4, 32, 112, -12 }, + { -1, 13, 124, -8 } }, + { { 0, 128, 0, 0 }, /* 9 */ + { -9, 124, 14, -1 }, + { -13, 112, 33, -4 }, + { -13, 95, 54, -8 }, + { -11, 75, 75, -11 }, + { -8, 54, 95, -13 }, + { -4, 33, 112, -13 }, + { -1, 14, 124, -9 } }, + { { 0, 128, 0, 0 }, /* 10 */ + { -9, 123, 15, -1 }, + { -14, 113, 34, -5 }, + { -14, 95, 55, -8 }, + { -12, 76, 76, -12 }, + { -8, 55, 95, -14 }, + { -5, 34, 112, -13 }, + { -1, 15, 123, -9 } }, + { { 0, 128, 0, 0 }, /* 11 */ + { -10, 124, 15, -1 }, + { -14, 113, 34, -5 }, + { -15, 96, 56, -9 }, + { -13, 77, 77, -13 }, + { -9, 56, 96, -15 }, + { -5, 34, 113, -14 }, + { -1, 15, 124, -10 } }, + { { 0, 128, 0, 0 }, /* 12 */ + { -10, 123, 16, -1 }, + { -15, 113, 35, -5 }, + { -16, 98, 56, -10 }, + { -14, 78, 78, -14 }, + { -10, 56, 98, -16 }, + { -5, 35, 113, -15 }, + { -1, 16, 123, -10 } }, + { { 0, 128, 0, 0 }, /* 13 */ + { -11, 124, 17, -2 }, + { -16, 113, 36, -5 }, + { -17, 98, 57, -10 }, + { -14, 78, 78, -14 }, + { -10, 57, 98, -17 }, + { -5, 36, 113, -16 }, + { -2, 17, 124, -11 } }, + { { 0, 128, 0, 0 }, /* 14 */ + { -12, 125, 17, -2 }, + { -17, 114, 37, -6 }, + { -18, 99, 58, -11 }, + { -15, 79, 79, -15 }, + { -11, 58, 99, -18 }, + { -6, 37, 114, -17 }, + { -2, 17, 125, -12 } }, + { { 0, 128, 0, 0 }, /* 15 */ + { -12, 124, 18, -2 }, + { -18, 114, 38, -6 }, + { -19, 99, 59, -11 }, + { -16, 80, 80, -16 }, + { -11, 59, 99, -19 }, + { -6, 38, 114, -18 }, + { -2, 18, 124, -12 } }, + { { 0, 128, 0, 0 }, /* 16 */ + { -4, 118, 16, -2 }, + { -7, 106, 34, -5 }, + { -8, 90, 53, -7 }, + { -8, 72, 72, -8 }, + { -7, 53, 90, -8 }, + { -5, 34, 106, -7 }, + { -2, 16, 118, -4 } }, +}; + +static const vp56_tree_t vp6_pcr_tree[] = { + { 8, 0}, + { 4, 1}, + { 2, 2}, {-1}, {-2}, + { 2, 3}, {-3}, {-4}, + { 8, 4}, + { 4, 5}, + { 2, 6}, {-5}, {-6}, + { 2, 7}, {-7}, {-8}, + {-0}, +}; + +static const uint8_t vp6_coord_div[] = { 4, 4, 4, 4, 8, 8 }; + +#endif /* VP6DATA_H */ diff --git a/src/libffmpeg/libavcodec/wmadec.c b/src/libffmpeg/libavcodec/wmadec.c index 684aea2c8..bbf4970ce 100644 --- a/src/libffmpeg/libavcodec/wmadec.c +++ b/src/libffmpeg/libavcodec/wmadec.c @@ -115,6 +115,8 @@ typedef struct WMADecodeContext { float max_exponent[MAX_CHANNELS]; int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]); + DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]); + DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]); MDCTContext mdct_ctx[BLOCK_NB_SIZES]; float *windows[BLOCK_NB_SIZES]; DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); /* temporary storage for imdct */ @@ -717,7 +719,6 @@ static int wma_decode_block(WMADecodeContext *s) { int n, v, a, ch, code, bsize; int coef_nb_bits, total_gain, parse_exponents; - DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]); int nb_coefs[MAX_CHANNELS]; float mdct_norm; @@ -1072,7 +1073,7 @@ static int wma_decode_block(WMADecodeContext *s) next_block_len = 1 << s->next_block_len_bits; /* right part */ - wptr = window + block_len; + wptr = s->window + block_len; if (block_len <= next_block_len) { for(i=0;i<block_len;i++) *wptr++ = s->windows[bsize][i]; @@ -1088,7 +1089,7 @@ static int wma_decode_block(WMADecodeContext *s) } /* left part */ - wptr = window + block_len; + wptr = s->window + block_len; if (block_len <= prev_block_len) { for(i=0;i<block_len;i++) *--wptr = s->windows[bsize][i]; @@ -1107,14 +1108,13 @@ static int wma_decode_block(WMADecodeContext *s) for(ch = 0; ch < s->nb_channels; ch++) { if (s->channel_coded[ch]) { - DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]); float *ptr; int n4, index, n; n = s->block_len; n4 = s->block_len / 2; s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize], - output, s->coefs[ch], s->mdct_tmp); + s->output, s->coefs[ch], s->mdct_tmp); /* XXX: optimize all that by build the window and multipying/adding at the same time */ @@ -1122,13 +1122,13 @@ static int wma_decode_block(WMADecodeContext *s) /* multiply by the window and add in the frame */ index = (s->frame_len / 2) + s->block_pos - n4; ptr = &s->frame_out[ch][index]; - s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1); + s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1); /* specific fast case for ms-stereo : add to second channel if it is not coded */ if (s->ms_stereo && !s->channel_coded[1]) { ptr = &s->frame_out[1][index]; - s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1); + s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1); } } } diff --git a/src/libffmpeg/libavcodec/wmv2.c b/src/libffmpeg/libavcodec/wmv2.c index 5abc51775..f3d4f0f23 100644 --- a/src/libffmpeg/libavcodec/wmv2.c +++ b/src/libffmpeg/libavcodec/wmv2.c @@ -643,6 +643,12 @@ void ff_mspel_motion(MpegEncContext *s, v_edge_pos = s->v_edge_pos; src_x = clip(src_x, -16, s->width); src_y = clip(src_y, -16, s->height); + + if(src_x<=-16 || src_x >= s->width) + dxy &= ~3; + if(src_y<=-16 || src_y >= s->height) + dxy &= ~4; + linesize = s->linesize; uvlinesize = s->uvlinesize; ptr = ref_picture[0] + (src_y * linesize) + src_x; diff --git a/src/libffmpeg/libavutil/Makefile.am b/src/libffmpeg/libavutil/Makefile.am index 76340cf14..6e507cb67 100644 --- a/src/libffmpeg/libavutil/Makefile.am +++ b/src/libffmpeg/libavutil/Makefile.am @@ -1,6 +1,6 @@ include $(top_srcdir)/misc/Makefile.common -AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS) +AM_CPPFLAGS = $(LIBFFMPEG_CPPFLAGS) -I$(top_srcdir)/src/libffmpeg AM_CFLAGS = -fno-strict-aliasing ASFLAGS = @@ -28,6 +28,7 @@ noinst_HEADERS = \ integer.h \ internal.h \ intfloat_readwrite.h \ + intreadwrite.h \ lls.h \ log.h \ mathematics.h \ diff --git a/src/libffmpeg/libavutil/bswap.h b/src/libffmpeg/libavutil/bswap.h index 4614c9045..03d613db2 100644 --- a/src/libffmpeg/libavutil/bswap.h +++ b/src/libffmpeg/libavutil/bswap.h @@ -37,7 +37,7 @@ #endif #if defined(ARCH_X86) -static always_inline uint16_t bswap_16(uint16_t x) +static av_always_inline uint16_t bswap_16(uint16_t x) { __asm("rorw $8, %0" : LEGACY_REGS (x) : @@ -45,7 +45,7 @@ static always_inline uint16_t bswap_16(uint16_t x) return x; } -static always_inline uint32_t bswap_32(uint32_t x) +static av_always_inline uint32_t bswap_32(uint32_t x) { #if __CPU__ != 386 __asm("bswap %0": @@ -82,12 +82,12 @@ static inline uint64_t bswap_64(uint64_t x) #elif defined(ARCH_SH4) -static always_inline uint16_t bswap_16(uint16_t x) { +static av_always_inline uint16_t bswap_16(uint16_t x) { __asm__("swap.b %0,%0":"=r"(x):"0"(x)); return x; } -static always_inline uint32_t bswap_32(uint32_t x) { +static av_always_inline uint32_t bswap_32(uint32_t x) { __asm__( "swap.b %0,%0\n" "swap.w %0,%0\n" @@ -110,12 +110,12 @@ static inline uint64_t bswap_64(uint64_t x) } #else -static always_inline uint16_t bswap_16(uint16_t x){ +static av_always_inline uint16_t bswap_16(uint16_t x){ return (x>>8) | (x<<8); } #ifdef ARCH_ARM -static always_inline uint32_t bswap_32(uint32_t x){ +static av_always_inline uint32_t bswap_32(uint32_t x){ uint32_t t; __asm__ ( "eor %1, %0, %0, ror #16 \n\t" @@ -126,7 +126,7 @@ static always_inline uint32_t bswap_32(uint32_t x){ return x; } #else -static always_inline uint32_t bswap_32(uint32_t x){ +static av_always_inline uint32_t bswap_32(uint32_t x){ x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF); return (x>>16) | (x<<16); } diff --git a/src/libffmpeg/libavutil/common.h b/src/libffmpeg/libavutil/common.h index d167404b6..0e093616c 100644 --- a/src/libffmpeg/libavutil/common.h +++ b/src/libffmpeg/libavutil/common.h @@ -26,9 +26,7 @@ #ifndef COMMON_H #define COMMON_H -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif +#include <inttypes.h> #ifdef HAVE_AV_CONFIG_H /* only include the following when compiling package */ @@ -47,34 +45,17 @@ # include <math.h> #endif /* HAVE_AV_CONFIG_H */ -/* Suppress restrict if it was not defined in config.h. */ -#ifndef restrict -# define restrict -#endif - -#ifndef always_inline -#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) -# define always_inline __attribute__((always_inline)) inline -#else -# define always_inline inline -#endif -#endif - -#ifndef attribute_used +#ifndef av_always_inline #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) -# define attribute_used __attribute__((used)) +# define av_always_inline __attribute__((always_inline)) inline #else -# define attribute_used +# define av_always_inline inline #endif #endif -#ifndef attribute_unused -#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) -# define attribute_unused __attribute__((unused)) -#else -# define attribute_unused -#endif -#endif +#ifdef HAVE_AV_CONFIG_H +# include "internal.h" +#endif /* HAVE_AV_CONFIG_H */ #ifndef attribute_deprecated #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) @@ -84,91 +65,9 @@ #endif #endif -# include <inttypes.h> - -#ifndef PRId64 -#define PRId64 "lld" -#endif - -#ifndef PRIu64 -#define PRIu64 "llu" -#endif - -#ifndef PRIx64 -#define PRIx64 "llx" -#endif - -#ifndef PRIX64 -#define PRIX64 "llX" -#endif - -#ifndef PRId32 -#define PRId32 "d" -#endif - -#ifndef PRIdFAST16 -#define PRIdFAST16 PRId32 -#endif - -#ifndef PRIdFAST32 -#define PRIdFAST32 PRId32 -#endif - -#ifndef INT16_MIN -#define INT16_MIN (-0x7fff-1) -#endif - -#ifndef INT16_MAX -#define INT16_MAX 0x7fff -#endif - -#ifndef INT32_MIN -#define INT32_MIN (-0x7fffffff-1) -#endif - -#ifndef INT32_MAX -#define INT32_MAX 0x7fffffff -#endif - -#ifndef UINT32_MAX -#define UINT32_MAX 0xffffffff -#endif - -#ifndef INT64_MIN -#define INT64_MIN (-0x7fffffffffffffffLL-1) -#endif - -#ifndef INT64_MAX -#define INT64_MAX int64_t_C(9223372036854775807) -#endif - -#ifndef UINT64_MAX -#define UINT64_MAX uint64_t_C(0xFFFFFFFFFFFFFFFF) -#endif - -#ifndef INT_BIT -# if INT_MAX != 2147483647 -# define INT_BIT 64 -# else -# define INT_BIT 32 -# endif -#endif - -#ifndef int64_t_C -#define int64_t_C(c) (c ## LL) -#define uint64_t_C(c) (c ## ULL) -#endif - -#if defined(__MINGW32__) && !defined(BUILD_AVUTIL) && defined(BUILD_SHARED_AV) -# define FF_IMPORT_ATTR __declspec(dllimport) -#else -# define FF_IMPORT_ATTR -#endif - - -#ifdef HAVE_AV_CONFIG_H -/* only include the following when compiling package */ -# include "internal.h" +#ifndef INT64_C +#define INT64_C(c) (c ## LL) +#define UINT64_C(c) (c ## ULL) #endif //rounded divison & shift @@ -184,7 +83,7 @@ #define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0) /* misc math functions */ -extern FF_IMPORT_ATTR const uint8_t ff_log2_tab[256]; +extern const uint8_t ff_log2_tab[256]; static inline int av_log2(unsigned int v) { @@ -375,7 +274,7 @@ static inline uint64_t read_time(void) ); return (d << 32) | (a & 0xffffffff); } -#elif defined(ARCH_X86) +#elif defined(ARCH_X86_32) static inline long long read_time(void) { long long l; @@ -465,4 +364,8 @@ void av_freep(void *ptr); # define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t" #endif +/* xine: another config.h with codecs to use */ +#include "ffmpeg_config.h" + #endif /* COMMON_H */ + diff --git a/src/libffmpeg/libavutil/internal.h b/src/libffmpeg/libavutil/internal.h index 7d850141b..0c4b44170 100644 --- a/src/libffmpeg/libavutil/internal.h +++ b/src/libffmpeg/libavutil/internal.h @@ -26,6 +26,94 @@ #ifndef INTERNAL_H #define INTERNAL_H +#ifndef attribute_used +#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) +# define attribute_used __attribute__((used)) +#else +# define attribute_used +#endif +#endif + +#ifndef attribute_unused +#if defined(__GNUC__) +# define attribute_unused __attribute__((unused)) +#else +# define attribute_unused +#endif +#endif + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#ifndef PRId64 +#define PRId64 "lld" +#endif + +#ifndef PRIu64 +#define PRIu64 "llu" +#endif + +#ifndef PRIx64 +#define PRIx64 "llx" +#endif + +#ifndef PRIX64 +#define PRIX64 "llX" +#endif + +#ifndef PRId32 +#define PRId32 "d" +#endif + +#ifndef PRIdFAST16 +#define PRIdFAST16 PRId32 +#endif + +#ifndef PRIdFAST32 +#define PRIdFAST32 PRId32 +#endif + +#ifndef INT16_MIN +#define INT16_MIN (-0x7fff-1) +#endif + +#ifndef INT16_MAX +#define INT16_MAX 0x7fff +#endif + +#ifndef INT32_MIN +#define INT32_MIN (-0x7fffffff-1) +#endif + +#ifndef INT32_MAX +#define INT32_MAX 0x7fffffff +#endif + +#ifndef UINT32_MAX +#define UINT32_MAX 0xffffffff +#endif + +#ifndef INT64_MIN +#define INT64_MIN (-0x7fffffffffffffffLL-1) +#endif + +#ifndef INT64_MAX +#define INT64_MAX INT64_C(9223372036854775807) +#endif + +#ifndef UINT64_MAX +#define UINT64_MAX UINT64_C(0xFFFFFFFFFFFFFFFF) +#endif + +#ifndef INT_BIT +# if INT_MAX != 2147483647 +# define INT_BIT 64 +# else +# define INT_BIT 32 +# endif +#endif + #if ( defined(__PIC__) || defined(__pic__) ) && ! defined(PIC) # define PIC #endif @@ -34,6 +122,7 @@ # define ENODATA 61 #endif +#include "intreadwrite.h" #include "bswap.h" #include <stddef.h> @@ -136,7 +225,7 @@ extern const uint32_t ff_inverse[256]; # define FASTDIV(a,b) ((a)/(b)) #endif -extern FF_IMPORT_ATTR const uint8_t ff_sqrt_tab[128]; +extern const uint8_t ff_sqrt_tab[128]; static inline int ff_sqrt(int a) { @@ -216,7 +305,7 @@ if((y)<(x)){\ /* XXX: add ISOC specific test to avoid specific BSD testing. */ /* better than nothing implementation. */ /* btw, rintf() is existing on fbsd too -- alex */ -static always_inline long int lrintf(float x) +static av_always_inline long int lrintf(float x) { #ifdef __MINGW32__ # ifdef ARCH_X86_32 diff --git a/src/libffmpeg/libavutil/intreadwrite.h b/src/libffmpeg/libavutil/intreadwrite.h new file mode 100644 index 000000000..c43f9d651 --- /dev/null +++ b/src/libffmpeg/libavutil/intreadwrite.h @@ -0,0 +1,42 @@ +#ifndef INTREADWRITE_H +#define INTREADWRITE_H + +#ifdef __GNUC__ + +struct unaligned_64 { uint64_t l; } __attribute__((packed)); +struct unaligned_32 { uint32_t l; } __attribute__((packed)); +struct unaligned_16 { uint16_t l; } __attribute__((packed)); + +#define LD16(a) (((const struct unaligned_16 *) (a))->l) +#define LD32(a) (((const struct unaligned_32 *) (a))->l) +#define LD64(a) (((const struct unaligned_64 *) (a))->l) + +#define ST16(a, b) (((struct unaligned_16 *) (a))->l) = (b) +#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b) + +#else /* __GNUC__ */ + +#define LD16(a) (*((uint16_t*)(a))) +#define LD32(a) (*((uint32_t*)(a))) +#define LD64(a) (*((uint64_t*)(a))) + +#define ST16(a, b) *((uint16_t*)(a)) = (b) +#define ST32(a, b) *((uint32_t*)(a)) = (b) + +#endif /* !__GNUC__ */ + +/* endian macros */ +#if !defined(BE_16) || !defined(BE_32) || !defined(LE_16) || !defined(LE_32) +#define BE_16(x) ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1]) +#define BE_32(x) ((((uint8_t*)(x))[0] << 24) | \ + (((uint8_t*)(x))[1] << 16) | \ + (((uint8_t*)(x))[2] << 8) | \ + ((uint8_t*)(x))[3]) +#define LE_16(x) ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0]) +#define LE_32(x) ((((uint8_t*)(x))[3] << 24) | \ + (((uint8_t*)(x))[2] << 16) | \ + (((uint8_t*)(x))[1] << 8) | \ + ((uint8_t*)(x))[0]) +#endif + +#endif /* INTREADWRITE_H */ diff --git a/src/libffmpeg/libavutil/rational.c b/src/libffmpeg/libavutil/rational.c index 0e018c41b..0480aa882 100644 --- a/src/libffmpeg/libavutil/rational.c +++ b/src/libffmpeg/libavutil/rational.c @@ -38,8 +38,10 @@ int av_reduce(int *dst_nom, int *dst_den, int64_t nom, int64_t den, int64_t max) int sign= (nom<0) ^ (den<0); int64_t gcd= ff_gcd(FFABS(nom), FFABS(den)); - nom = FFABS(nom)/gcd; - den = FFABS(den)/gcd; + if(gcd){ + nom = FFABS(nom)/gcd; + den = FFABS(den)/gcd; + } if(nom<=max && den<=max){ a1= (AVRational){nom, den}; den=0; @@ -65,7 +67,7 @@ int av_reduce(int *dst_nom, int *dst_den, int64_t nom, int64_t den, int64_t max) nom= den; den= next_den; } - assert(ff_gcd(a1.num, a1.den) == 1); + assert(ff_gcd(a1.num, a1.den) <= 1U); *dst_nom = sign ? -a1.num : a1.num; *dst_den = a1.den; diff --git a/src/libffmpeg/video_decoder.c b/src/libffmpeg/video_decoder.c index ad2bc99b4..b019d52d3 100644 --- a/src/libffmpeg/video_decoder.c +++ b/src/libffmpeg/video_decoder.c @@ -17,7 +17,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * - * $Id: video_decoder.c,v 1.64 2006/12/02 21:06:18 miguelfreitas Exp $ + * $Id: video_decoder.c,v 1.65 2007/01/13 21:19:52 miguelfreitas Exp $ * * xine video decoder plugin using ffmpeg * @@ -25,6 +25,7 @@ #ifdef HAVE_CONFIG_H #include "config.h" +#include "ffmpeg_config.h" #endif #include <stdlib.h> @@ -116,6 +117,8 @@ struct ff_video_decoder_s { int is_direct_rendering_disabled; AVPaletteControl palette_control; + + xine_list_t *dr1_frames; }; @@ -203,16 +206,25 @@ static int get_buffer(AVCodecContext *context, AVFrame *av_frame){ av_frame->type= FF_BUFFER_TYPE_USER; + xine_list_push_back(this->dr1_frames, av_frame); + return 0; } static void release_buffer(struct AVCodecContext *context, AVFrame *av_frame){ + ff_video_decoder_t *this = (ff_video_decoder_t *)context->opaque; if (av_frame->type == FF_BUFFER_TYPE_USER) { vo_frame_t *img = (vo_frame_t *)av_frame->opaque; + xine_list_iterator_t it; assert(av_frame->opaque); img->free(img); + + it = xine_list_find(this->dr1_frames, av_frame); + assert(it); + if( it != NULL ) + xine_list_remove(this->dr1_frames, it); } else { avcodec_default_release_buffer(context, av_frame); } @@ -249,6 +261,8 @@ static const ff_codec_t ff_video_lookup[] = { {BUF_VIDEO_DV, CODEC_ID_DVVIDEO, "DV (ffmpeg)"}, {BUF_VIDEO_HUFFYUV, CODEC_ID_HUFFYUV, "HuffYUV (ffmpeg)"}, {BUF_VIDEO_VP31, CODEC_ID_VP3, "On2 VP3.1 (ffmpeg)"}, + {BUF_VIDEO_VP5, CODEC_ID_VP5, "On2 VP5 (ffmpeg)"}, + {BUF_VIDEO_VP6, CODEC_ID_VP6, "On2 VP6 (ffmpeg)"}, {BUF_VIDEO_4XM, CODEC_ID_4XM, "4X Video (ffmpeg)"}, {BUF_VIDEO_CINEPAK, CODEC_ID_CINEPAK, "Cinepak (ffmpeg)"}, {BUF_VIDEO_MSVC, CODEC_ID_MSVIDEO1, "Microsoft Video 1 (ffmpeg)"}, @@ -376,7 +390,7 @@ static void init_video_codec (ff_video_decoder_t *this, unsigned int codec_type) /* enable direct rendering by default */ this->output_format = XINE_IMGFMT_YV12; #ifdef ENABLE_DIRECT_RENDERING - if( this->codec->capabilities & CODEC_CAP_DR1 ) { + if( this->codec->capabilities & CODEC_CAP_DR1 && this->codec->id != CODEC_ID_H264 ) { this->context->get_buffer = get_buffer; this->context->release_buffer = release_buffer; xprintf(this->stream->xine, XINE_VERBOSITY_LOG, @@ -801,7 +815,7 @@ static void ff_check_bufsize (ff_video_decoder_t *this, int size) { xprintf(this->stream->xine, XINE_VERBOSITY_LOG, _("ffmpeg_video_dec: increasing buffer to %d to avoid overflow.\n"), this->bufsize); - this->buf = realloc(this->buf, this->bufsize); + this->buf = realloc(this->buf, this->bufsize + FF_INPUT_BUFFER_PADDING_SIZE ); } } @@ -826,7 +840,7 @@ static void ff_handle_header_buffer (ff_video_decoder_t *this, buf_element_t *bu lprintf ("header buffer\n"); /* accumulate data */ - ff_check_bufsize(this, this->size + buf->size + FF_INPUT_BUFFER_PADDING_SIZE); + ff_check_bufsize(this, this->size + buf->size); xine_fast_memcpy (&this->buf[this->size], buf->content, buf->size); this->size += buf->size; @@ -1102,7 +1116,7 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) { lprintf("no memcpy needed to accumulate data\n"); } else { /* copy data into our internal buffer */ - ff_check_bufsize(this, this->size + buf->size + FF_INPUT_BUFFER_PADDING_SIZE); + ff_check_bufsize(this, this->size + buf->size); chunk_buf = this->buf; /* ff_check_bufsize might realloc this->buf */ xine_fast_memcpy (&this->buf[this->size], buf->content, buf->size); @@ -1122,7 +1136,13 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) { int codec_type = buf->type & 0xFFFF0000; /* pad input data */ - chunk_buf[this->size] = 0; + /* note: bitstream, alt bitstream reader or something will cause + * severe mpeg4 artifacts if padding is less than 32 bits. + */ + chunk_buf[this->size+0] = 0; + chunk_buf[this->size+1] = 0; + chunk_buf[this->size+2] = 0; + chunk_buf[this->size+3] = 0; while (this->size > 0) { @@ -1150,7 +1170,7 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) { this->size -= len; if (this->size > 0) { - ff_check_bufsize(this, this->size + FF_INPUT_BUFFER_PADDING_SIZE); + ff_check_bufsize(this, this->size); memmove (this->buf, &chunk_buf[offset], this->size); chunk_buf = this->buf; } @@ -1256,7 +1276,7 @@ static void ff_handle_buffer (ff_video_decoder_t *this, buf_element_t *buf) { img->crop_bottom = this->crop_bottom; this->skipframes = img->draw(img, this->stream); - + if(free_img) img->free(img); } @@ -1360,12 +1380,23 @@ static void ff_dispose (video_decoder_t *this_gen) { ff_video_decoder_t *this = (ff_video_decoder_t *) this_gen; lprintf ("ff_dispose\n"); - + if (this->decoder_ok) { + xine_list_iterator_t it; + AVFrame *av_frame; + pthread_mutex_lock(&ffmpeg_lock); avcodec_close (this->context); pthread_mutex_unlock(&ffmpeg_lock); - + + /* frame garbage collector here - workaround for buggy ffmpeg codecs that + * don't release their DR1 frames */ + while( (it = xine_list_front(this->dr1_frames)) != NULL ) + { + av_frame = (AVFrame *)xine_list_get_value(this->dr1_frames, it); + release_buffer(this->context, av_frame); + } + this->stream->video_out->close(this->stream->video_out, this->stream); this->decoder_ok = 0; } @@ -1394,6 +1425,8 @@ static void ff_dispose (video_decoder_t *this_gen) { if(this->pp_mode) pp_free_mode(this->pp_mode); + + xine_list_delete(this->dr1_frames); free (this_gen); } @@ -1433,6 +1466,8 @@ static video_decoder_t *ff_video_open_plugin (video_decoder_class_t *class_gen, this->pp_context = NULL; this->pp_mode = NULL; + this->dr1_frames = xine_list_new(); + mpeg_parser_init(&this->mpeg_parser); return &this->video_decoder; @@ -1483,73 +1518,223 @@ void *init_video_plugin (xine_t *xine, void *data) { } static uint32_t supported_video_types[] = { - BUF_VIDEO_MSMPEG4_V1, + #ifdef CONFIG_MSMPEG4V1_DECODER + BUF_VIDEO_MSMPEG4_V1, + #endif + #ifdef CONFIG_MSMPEG4V2_DECODER BUF_VIDEO_MSMPEG4_V2, - BUF_VIDEO_MSMPEG4_V3, - BUF_VIDEO_WMV7, + #endif + #ifdef CONFIG_MSMPEG4V3_DECODER + BUF_VIDEO_MSMPEG4_V3, + #endif + #ifdef CONFIG_WMV1_DECODER + BUF_VIDEO_WMV7, + #endif + #ifdef CONFIG_WMV2_DECODER + BUF_VIDEO_WMV8, + #endif + #ifdef CONFIG_WMV3_DECODER + BUF_VIDEO_WMV9, + #endif + #ifdef CONFIG_MPEG4_DECODER BUF_VIDEO_MPEG4, - BUF_VIDEO_XVID, - BUF_VIDEO_DIVX5, + #endif + #ifdef CONFIG_MPEG4_DECODER + BUF_VIDEO_XVID, + #endif + #ifdef CONFIG_MPEG4_DECODER + BUF_VIDEO_DIVX5, + #endif + #ifdef CONFIG_MPEG4_DECODER BUF_VIDEO_3IVX, + #endif + #ifdef CONFIG_MJPEG_DECODER + BUF_VIDEO_JPEG, + #endif + #ifdef CONFIG_MJPEG_DECODER BUF_VIDEO_MJPEG, + #endif + #ifdef CONFIG_MJPEGB_DECODER BUF_VIDEO_MJPEG_B, + #endif + #ifdef CONFIG_H263I_DECODER + BUF_VIDEO_I263, + #endif + #ifdef CONFIG_H263_DECODER BUF_VIDEO_H263, + #endif + #ifdef CONFIG_RV10_DECODER BUF_VIDEO_RV10, + #endif + #ifdef CONFIG_RV20_DECODER BUF_VIDEO_RV20, + #endif + #ifdef CONFIG_INDEO3_DECODER BUF_VIDEO_IV31, + #endif + #ifdef CONFIG_INDEO3_DECODER BUF_VIDEO_IV32, + #endif + #ifdef CONFIG_SVQ1_DECODER BUF_VIDEO_SORENSON_V1, + #endif + #ifdef CONFIG_SVQ3_DECODER BUF_VIDEO_SORENSON_V3, - BUF_VIDEO_JPEG, - BUF_VIDEO_MPEG, + #endif + #ifdef CONFIG_DVVIDEO_DECODER BUF_VIDEO_DV, + #endif + #ifdef CONFIG_HUFFYUV_DECODER BUF_VIDEO_HUFFYUV, + #endif + #ifdef CONFIG_VP3_DECODER BUF_VIDEO_VP31, + #endif + #ifdef CONFIG_VP5_DECODER + BUF_VIDEO_VP5, + #endif + #ifdef CONFIG_VP6_DECODER + BUF_VIDEO_VP6, + #endif + #ifdef CONFIG_4XM_DECODER BUF_VIDEO_4XM, + #endif + #ifdef CONFIG_CINEPAK_DECODER BUF_VIDEO_CINEPAK, + #endif + #ifdef CONFIG_MSVIDEO1_DECODER BUF_VIDEO_MSVC, + #endif + #ifdef CONFIG_MSRLE_DECODER BUF_VIDEO_MSRLE, + #endif + #ifdef CONFIG_RPZA_DECODER BUF_VIDEO_RPZA, + #endif + #ifdef CONFIG_CYUV_DECODER BUF_VIDEO_CYUV, + #endif + #ifdef CONFIG_ROQ_DECODER BUF_VIDEO_ROQ, + #endif + #ifdef CONFIG_IDCIN_DECODER BUF_VIDEO_IDCIN, + #endif + #ifdef CONFIG_XAN_WC3_DECODER BUF_VIDEO_WC3, + #endif + #ifdef CONFIG_WS_VQA_DECODER BUF_VIDEO_VQA, + #endif + #ifdef CONFIG_INTERPLAY_VIDEO_DECODER BUF_VIDEO_INTERPLAY, + #endif + #ifdef CONFIG_FLIC_DECODER BUF_VIDEO_FLI, + #endif + #ifdef CONFIG_8BPS_DECODER BUF_VIDEO_8BPS, + #endif + #ifdef CONFIG_SMC_DECODER BUF_VIDEO_SMC, - BUF_VIDEO_VMD, + #endif + #ifdef CONFIG_TRUEMOTION1_DECODER BUF_VIDEO_DUCKTM1, + #endif + #ifdef CONFIG_TRUEMOTION2_DECODER BUF_VIDEO_DUCKTM2, + #endif + #ifdef CONFIG_VMDVIDEO_DECODER + BUF_VIDEO_VMD, + #endif + #ifdef CONFIG_ZLIB_DECODER BUF_VIDEO_ZLIB, + #endif + #ifdef CONFIG_MSZH_DECODER BUF_VIDEO_MSZH, + #endif + #ifdef CONFIG_ASV1_DECODER BUF_VIDEO_ASV1, + #endif + #ifdef CONFIG_ASV2_DECODER BUF_VIDEO_ASV2, + #endif + #ifdef CONFIG_VCR1_DECODER BUF_VIDEO_ATIVCR1, + #endif + #ifdef CONFIG_FLV1_DECODER BUF_VIDEO_FLV1, + #endif + #ifdef CONFIG_QTRLE_DECODER BUF_VIDEO_QTRLE, + #endif + #ifdef CONFIG_H264_DECODER BUF_VIDEO_H264, + #endif + #ifdef CONFIG_H261_DECODER BUF_VIDEO_H261, + #endif + #ifdef CONFIG_AASC_DECODER BUF_VIDEO_AASC, + #endif + #ifdef CONFIG_LOCO_DECODER BUF_VIDEO_LOCO, + #endif + #ifdef CONFIG_QDRAW_DECODER BUF_VIDEO_QDRW, + #endif + #ifdef CONFIG_QPEG_DECODER BUF_VIDEO_QPEG, + #endif + #ifdef CONFIG_TSCC_DECODER BUF_VIDEO_TSCC, + #endif + #ifdef CONFIG_ULTI_DECODER BUF_VIDEO_ULTI, + #endif + #ifdef CONFIG_WNV1_DECODER BUF_VIDEO_WNV1, + #endif + #ifdef CONFIG_VIXL_DECODER BUF_VIDEO_XL, + #endif + #ifdef CONFIG_INDEO2_DECODER BUF_VIDEO_RT21, + #endif + #ifdef CONFIG_FRAPS_DECODER BUF_VIDEO_FPS1, + #endif + #ifdef CONFIG_MPEG1VIDEO_DECODER + BUF_VIDEO_MPEG, + #endif + #ifdef CONFIG_CSCD_DECODER BUF_VIDEO_CSCD, + #endif + #ifdef CONFIG_AVS_DECODER + BUF_VIDEO_AVS, + #endif + #ifdef CONFIG_MMVIDEO_DECODER BUF_VIDEO_ALGMM, + #endif + #ifdef CONFIG_ZMBV_DECODER BUF_VIDEO_ZMBV, - BUF_VIDEO_AVS, + #endif + #ifdef CONFIG_SMACKVIDEO_DECODER BUF_VIDEO_SMACKER, + #endif + #ifdef CONFIG_NUV_DECODER BUF_VIDEO_NUV, + #endif + #ifdef CONFIG_KMVC_DECODER BUF_VIDEO_KMVC, + #endif + #ifdef CONFIG_FLASHSV_DECODER BUF_VIDEO_FLASHSV, + #endif + #ifdef CONFIG_CAVS_DECODER BUF_VIDEO_CAVS, + #endif + 0 }; diff --git a/src/libffmpeg/xine_decoder.c b/src/libffmpeg/xine_decoder.c index 02d19cc1a..2eeb9746b 100644 --- a/src/libffmpeg/xine_decoder.c +++ b/src/libffmpeg/xine_decoder.c @@ -17,7 +17,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * - * $Id: xine_decoder.c,v 1.172 2006/12/04 22:25:13 miguelfreitas Exp $ + * $Id: xine_decoder.c,v 1.173 2007/01/13 21:19:52 miguelfreitas Exp $ * * xine decoder plugin using ffmpeg * @@ -25,6 +25,7 @@ #ifdef HAVE_CONFIG_H #include "config.h" +#include "ffmpeg_config.h" #endif #include "xine_internal.h" @@ -39,114 +40,273 @@ pthread_once_t once_control = PTHREAD_ONCE_INIT; pthread_mutex_t ffmpeg_lock; #ifndef HAVE_FFMPEG + +#define REGISTER_ENCODER(X,x) \ + if(ENABLE_##X##_ENCODER) register_avcodec(&x##_encoder) +#define REGISTER_DECODER(X,x) \ + if(ENABLE_##X##_DECODER) register_avcodec(&x##_decoder) +#define REGISTER_ENCDEC(X,x) REGISTER_ENCODER(X,x); REGISTER_DECODER(X,x) + +#define REGISTER_PARSER(X,x) \ + if(ENABLE_##X##_PARSER) av_register_codec_parser(&x##_parser) + +/* If you do not call this function, then you can select exactly which + formats you want to support */ + +/** + * simple call to register all the codecs. + */ void avcodec_register_all(void) { static int inited = 0; - + if (inited != 0) - return; + return; inited = 1; - /* decoders */ - register_avcodec(&h263_decoder); - register_avcodec(&mpeg4_decoder); - register_avcodec(&msmpeg4v1_decoder); - register_avcodec(&msmpeg4v2_decoder); - register_avcodec(&msmpeg4v3_decoder); - register_avcodec(&wmv1_decoder); - register_avcodec(&wmv2_decoder); - register_avcodec(&h263i_decoder); - register_avcodec(&rv10_decoder); - register_avcodec(&rv20_decoder); - register_avcodec(&svq1_decoder); - register_avcodec(&svq3_decoder); - register_avcodec(&wmav1_decoder); - register_avcodec(&wmav2_decoder); - register_avcodec(&indeo3_decoder); - register_avcodec(&mpeg1video_decoder); - register_avcodec(&dvvideo_decoder); - register_avcodec(&pcm_s16le_decoder); - register_avcodec(&mjpeg_decoder); - register_avcodec(&mjpegb_decoder); - register_avcodec(&mp2_decoder); - register_avcodec(&mp3_decoder); - register_avcodec(&mace3_decoder); - register_avcodec(&mace6_decoder); - register_avcodec(&huffyuv_decoder); - register_avcodec(&cyuv_decoder); - register_avcodec(&h264_decoder); - register_avcodec(&vp3_decoder); - register_avcodec(&fourxm_decoder); - register_avcodec(&ra_144_decoder); - register_avcodec(&ra_288_decoder); - register_avcodec(&adpcm_ms_decoder); - register_avcodec(&adpcm_ima_qt_decoder); - register_avcodec(&adpcm_ima_wav_decoder); - register_avcodec(&adpcm_ima_dk3_decoder); - register_avcodec(&adpcm_ima_dk4_decoder); - register_avcodec(&adpcm_ima_ws_decoder); - register_avcodec(&adpcm_ima_smjpeg_decoder); - register_avcodec(&adpcm_xa_decoder); - register_avcodec(&adpcm_4xm_decoder); - register_avcodec(&adpcm_ea_decoder); - register_avcodec(&pcm_alaw_decoder); - register_avcodec(&pcm_mulaw_decoder); - register_avcodec(&roq_dpcm_decoder); - register_avcodec(&interplay_dpcm_decoder); - register_avcodec(&cinepak_decoder); - register_avcodec(&msvideo1_decoder); - register_avcodec(&msrle_decoder); - register_avcodec(&rpza_decoder); - register_avcodec(&roq_decoder); - register_avcodec(&idcin_decoder); - register_avcodec(&xan_wc3_decoder); - register_avcodec(&vqa_decoder); - register_avcodec(&interplay_video_decoder); - register_avcodec(&flic_decoder); - register_avcodec(&smc_decoder); - register_avcodec(&eightbps_decoder); - register_avcodec(&vmdvideo_decoder); - register_avcodec(&vmdaudio_decoder); - register_avcodec(&truemotion1_decoder); - //register_avcodec(&mszh_decoder); - //register_avcodec(&zlib_decoder); - register_avcodec(&xan_dpcm_decoder); - register_avcodec(&asv1_decoder); - register_avcodec(&asv2_decoder); - register_avcodec(&vcr1_decoder); - register_avcodec(&flv_decoder); - register_avcodec(&qtrle_decoder); - register_avcodec(&flac_decoder); - register_avcodec(&aasc_decoder); - register_avcodec(&alac_decoder); - register_avcodec(&h261_decoder); - register_avcodec(&loco_decoder); - register_avcodec(&qdraw_decoder); - register_avcodec(&qpeg_decoder); - register_avcodec(&tscc_decoder); - register_avcodec(&ulti_decoder); - register_avcodec(&wnv1_decoder); - register_avcodec(&xl_decoder); - register_avcodec(&indeo2_decoder); - register_avcodec(&fraps_decoder); - register_avcodec(&shorten_decoder); - register_avcodec(&qdm2_decoder); - register_avcodec(&truemotion2_decoder); - register_avcodec(&wmv3_decoder); - register_avcodec(&cscd_decoder); - register_avcodec(&mmvideo_decoder); - register_avcodec(&zmbv_decoder); - register_avcodec(&avs_decoder); - register_avcodec(&smacker_decoder); - register_avcodec(&smackaud_decoder); - register_avcodec(&nuv_decoder); - register_avcodec(&kmvc_decoder); - register_avcodec(&flashsv_decoder); - //register_avcodec(&cavs_decoder); - register_avcodec(&cook_decoder); - register_avcodec(&truespeech_decoder); - register_avcodec(&tta_decoder); + /* video codecs */ + REGISTER_DECODER(AASC, aasc); + REGISTER_ENCDEC (ASV1, asv1); + REGISTER_ENCDEC (ASV2, asv2); + REGISTER_DECODER(AVS, avs); + REGISTER_DECODER(BMP, bmp); + REGISTER_DECODER(CAVS, cavs); + REGISTER_DECODER(CINEPAK, cinepak); + REGISTER_DECODER(CLJR, cljr); + REGISTER_DECODER(CSCD, cscd); + REGISTER_DECODER(CYUV, cyuv); + REGISTER_DECODER(DSICINVIDEO, dsicinvideo); + REGISTER_ENCDEC (DVVIDEO, dvvideo); + REGISTER_DECODER(EIGHTBPS, eightbps); + REGISTER_ENCDEC (FFV1, ffv1); + REGISTER_ENCDEC (FFVHUFF, ffvhuff); + REGISTER_DECODER(FLASHSV, flashsv); + REGISTER_DECODER(FLIC, flic); + REGISTER_ENCDEC (FLV, flv); + REGISTER_DECODER(FOURXM, fourxm); + REGISTER_DECODER(FRAPS, fraps); + REGISTER_ENCDEC (GIF, gif); + REGISTER_ENCDEC (H261, h261); + REGISTER_ENCDEC (H263, h263); + REGISTER_DECODER(H263I, h263i); + REGISTER_ENCODER(H263P, h263p); + REGISTER_DECODER(H264, h264); + REGISTER_ENCDEC (HUFFYUV, huffyuv); + REGISTER_DECODER(IDCIN, idcin); + REGISTER_DECODER(INDEO2, indeo2); + REGISTER_DECODER(INDEO3, indeo3); + REGISTER_DECODER(INTERPLAY_VIDEO, interplay_video); + REGISTER_ENCODER(JPEGLS, jpegls); + REGISTER_DECODER(KMVC, kmvc); + REGISTER_ENCODER(LJPEG, ljpeg); + REGISTER_DECODER(LOCO, loco); + REGISTER_DECODER(MDEC, mdec); + REGISTER_ENCDEC (MJPEG, mjpeg); + REGISTER_DECODER(MJPEGB, mjpegb); + REGISTER_DECODER(MMVIDEO, mmvideo); +#ifdef HAVE_XVMC + REGISTER_DECODER(MPEG_XVMC, mpeg_xvmc); +#endif + REGISTER_ENCDEC (MPEG1VIDEO, mpeg1video); + REGISTER_ENCDEC (MPEG2VIDEO, mpeg2video); + REGISTER_ENCDEC (MPEG4, mpeg4); + REGISTER_DECODER(MPEGVIDEO, mpegvideo); + REGISTER_ENCDEC (MSMPEG4V1, msmpeg4v1); + REGISTER_ENCDEC (MSMPEG4V2, msmpeg4v2); + REGISTER_ENCDEC (MSMPEG4V3, msmpeg4v3); + REGISTER_DECODER(MSRLE, msrle); + REGISTER_DECODER(MSVIDEO1, msvideo1); + REGISTER_DECODER(MSZH, mszh); + REGISTER_DECODER(NUV, nuv); + REGISTER_ENCODER(PAM, pam); + REGISTER_ENCODER(PBM, pbm); + REGISTER_ENCODER(PGM, pgm); + REGISTER_ENCODER(PGMYUV, pgmyuv); +#ifdef CONFIG_ZLIB + REGISTER_ENCDEC (PNG, png); +#endif + REGISTER_ENCODER(PPM, ppm); + REGISTER_DECODER(QDRAW, qdraw); + REGISTER_DECODER(QPEG, qpeg); + REGISTER_DECODER(QTRLE, qtrle); + REGISTER_ENCDEC (RAWVIDEO, rawvideo); + REGISTER_DECODER(ROQ, roq); + REGISTER_DECODER(RPZA, rpza); + REGISTER_ENCDEC (RV10, rv10); + REGISTER_ENCDEC (RV20, rv20); + REGISTER_DECODER(SMACKER, smacker); + REGISTER_DECODER(SMC, smc); + REGISTER_ENCDEC (SNOW, snow); + REGISTER_DECODER(SP5X, sp5x); + REGISTER_ENCDEC (SVQ1, svq1); + REGISTER_DECODER(SVQ3, svq3); + REGISTER_DECODER(TARGA, targa); + REGISTER_DECODER(THEORA, theora); + REGISTER_DECODER(TIERTEXSEQVIDEO, tiertexseqvideo); + REGISTER_DECODER(TIFF, tiff); + REGISTER_DECODER(TRUEMOTION1, truemotion1); + REGISTER_DECODER(TRUEMOTION2, truemotion2); + REGISTER_DECODER(TSCC, tscc); + REGISTER_DECODER(ULTI, ulti); + REGISTER_DECODER(VC1, vc1); + REGISTER_DECODER(VCR1, vcr1); + REGISTER_DECODER(VMDVIDEO, vmdvideo); + REGISTER_DECODER(VMNC, vmnc); + REGISTER_DECODER(VP3, vp3); + REGISTER_DECODER(VP5, vp5); + REGISTER_DECODER(VP6, vp6); + REGISTER_DECODER(VP6F, vp6f); + REGISTER_DECODER(VQA, vqa); + REGISTER_ENCDEC (WMV1, wmv1); + REGISTER_ENCDEC (WMV2, wmv2); + REGISTER_DECODER(WMV3, wmv3); + REGISTER_DECODER(WNV1, wnv1); +#ifdef CONFIG_X264 + REGISTER_ENCODER(X264, x264); +#endif + REGISTER_DECODER(XAN_WC3, xan_wc3); + REGISTER_DECODER(XL, xl); +#ifdef CONFIG_XVID + REGISTER_ENCODER(XVID, xvid); +#endif + REGISTER_ENCDEC (ZLIB, zlib); +#ifdef CONFIG_ZLIB + REGISTER_ENCDEC (ZMBV, zmbv); +#endif + + /* audio codecs */ +#ifdef CONFIG_LIBFAAD + REGISTER_DECODER(AAC, aac); + REGISTER_DECODER(MPEG4AAC, mpeg4aac); +#endif +#ifdef CONFIG_LIBA52 + REGISTER_DECODER(AC3, ac3); +#endif + REGISTER_ENCODER(AC3, ac3); + REGISTER_DECODER(ALAC, alac); +#if defined(CONFIG_AMR_NB) || defined(CONFIG_AMR_NB_FIXED) + REGISTER_ENCDEC (AMR_NB, amr_nb); +#endif +#ifdef CONFIG_AMR_WB + REGISTER_ENCDEC (AMR_WB, amr_wb); +#endif + REGISTER_DECODER(COOK, cook); + REGISTER_DECODER(DSICINAUDIO, dsicinaudio); +#ifdef CONFIG_LIBDTS + REGISTER_DECODER(DTS, dts); +#endif +#ifdef CONFIG_LIBFAAC + REGISTER_ENCODER(FAAC, faac); +#endif + REGISTER_ENCDEC (FLAC, flac); + REGISTER_DECODER(IMC, imc); +#ifdef CONFIG_LIBGSM + REGISTER_ENCDEC (LIBGSM, libgsm); +#endif + REGISTER_DECODER(MACE3, mace3); + REGISTER_DECODER(MACE6, mace6); + REGISTER_ENCDEC (MP2, mp2); + REGISTER_DECODER(MP3, mp3); + REGISTER_DECODER(MP3ADU, mp3adu); +#ifdef CONFIG_LIBMP3LAME + REGISTER_ENCODER(MP3LAME, mp3lame); +#endif + REGISTER_DECODER(MP3ON4, mp3on4); + REGISTER_DECODER(MPC7, mpc7); +#ifdef CONFIG_LIBVORBIS + if (!ENABLE_VORBIS_ENCODER) REGISTER_ENCODER(OGGVORBIS, oggvorbis); + if (!ENABLE_VORBIS_DECODER) REGISTER_DECODER(OGGVORBIS, oggvorbis); +#endif + REGISTER_DECODER(QDM2, qdm2); + REGISTER_DECODER(RA_144, ra_144); + REGISTER_DECODER(RA_288, ra_288); + REGISTER_DECODER(SHORTEN, shorten); + REGISTER_DECODER(SMACKAUD, smackaud); + REGISTER_ENCDEC (SONIC, sonic); + REGISTER_ENCODER(SONIC_LS, sonic_ls); + REGISTER_DECODER(TRUESPEECH, truespeech); + REGISTER_DECODER(TTA, tta); + REGISTER_DECODER(VMDAUDIO, vmdaudio); + REGISTER_ENCDEC (VORBIS, vorbis); + REGISTER_DECODER(WAVPACK, wavpack); + REGISTER_DECODER(WMAV1, wmav1); + REGISTER_DECODER(WMAV2, wmav2); + REGISTER_DECODER(WS_SND1, ws_snd1); + + /* pcm codecs */ + REGISTER_ENCDEC (PCM_ALAW, pcm_alaw); + REGISTER_ENCDEC (PCM_MULAW, pcm_mulaw); + REGISTER_ENCDEC (PCM_S8, pcm_s8); + REGISTER_ENCDEC (PCM_S16BE, pcm_s16be); + REGISTER_ENCDEC (PCM_S16LE, pcm_s16le); + REGISTER_ENCDEC (PCM_S24BE, pcm_s24be); + REGISTER_ENCDEC (PCM_S24DAUD, pcm_s24daud); + REGISTER_ENCDEC (PCM_S24LE, pcm_s24le); + REGISTER_ENCDEC (PCM_S32BE, pcm_s32be); + REGISTER_ENCDEC (PCM_S32LE, pcm_s32le); + REGISTER_ENCDEC (PCM_U8, pcm_u8); + REGISTER_ENCDEC (PCM_U16BE, pcm_u16be); + REGISTER_ENCDEC (PCM_U16LE, pcm_u16le); + REGISTER_ENCDEC (PCM_U24BE, pcm_u24be); + REGISTER_ENCDEC (PCM_U24LE, pcm_u24le); + REGISTER_ENCDEC (PCM_U32BE, pcm_u32be); + REGISTER_ENCDEC (PCM_U32LE, pcm_u32le); + + /* dpcm codecs */ + REGISTER_DECODER(INTERPLAY_DPCM, interplay_dpcm); + REGISTER_DECODER(ROQ_DPCM, roq_dpcm); + REGISTER_DECODER(SOL_DPCM, sol_dpcm); + REGISTER_DECODER(XAN_DPCM, xan_dpcm); + + /* adpcm codecs */ + REGISTER_ENCDEC (ADPCM_4XM, adpcm_4xm); + REGISTER_ENCDEC (ADPCM_ADX, adpcm_adx); + REGISTER_ENCDEC (ADPCM_CT, adpcm_ct); + REGISTER_ENCDEC (ADPCM_EA, adpcm_ea); + REGISTER_ENCDEC (ADPCM_G726, adpcm_g726); + REGISTER_ENCDEC (ADPCM_IMA_DK3, adpcm_ima_dk3); + REGISTER_ENCDEC (ADPCM_IMA_DK4, adpcm_ima_dk4); + REGISTER_ENCDEC (ADPCM_IMA_QT, adpcm_ima_qt); + REGISTER_ENCDEC (ADPCM_IMA_SMJPEG, adpcm_ima_smjpeg); + REGISTER_ENCDEC (ADPCM_IMA_WAV, adpcm_ima_wav); + REGISTER_ENCDEC (ADPCM_IMA_WS, adpcm_ima_ws); + REGISTER_ENCDEC (ADPCM_MS, adpcm_ms); + REGISTER_ENCDEC (ADPCM_SBPRO_2, adpcm_sbpro_2); + REGISTER_ENCDEC (ADPCM_SBPRO_3, adpcm_sbpro_3); + REGISTER_ENCDEC (ADPCM_SBPRO_4, adpcm_sbpro_4); + REGISTER_ENCDEC (ADPCM_SWF, adpcm_swf); + REGISTER_ENCDEC (ADPCM_XA, adpcm_xa); + REGISTER_ENCDEC (ADPCM_YAMAHA, adpcm_yamaha); + + /* subtitles */ + REGISTER_ENCDEC (DVBSUB, dvbsub); + REGISTER_ENCDEC (DVDSUB, dvdsub); + + /* parsers */ + REGISTER_PARSER (AAC, aac); + REGISTER_PARSER (AC3, ac3); + REGISTER_PARSER (CAVSVIDEO, cavsvideo); + REGISTER_PARSER (DVBSUB, dvbsub); + REGISTER_PARSER (DVDSUB, dvdsub); + REGISTER_PARSER (H261, h261); + REGISTER_PARSER (H263, h263); + REGISTER_PARSER (H264, h264); + REGISTER_PARSER (MJPEG, mjpeg); + REGISTER_PARSER (MPEG4VIDEO, mpeg4video); + REGISTER_PARSER (MPEGAUDIO, mpegaudio); + REGISTER_PARSER (MPEGVIDEO, mpegvideo); + REGISTER_PARSER (PNM, pnm); + + /* + av_register_bitstream_filter(&dump_extradata_bsf); + av_register_bitstream_filter(&remove_extradata_bsf); + av_register_bitstream_filter(&noise_bsf); + av_register_bitstream_filter(&mp3_header_compress_bsf); + av_register_bitstream_filter(&mp3_header_decompress_bsf); + av_register_bitstream_filter(&mjpega_dump_header_bsf); + */ } + #endif void init_once_routine(void) { |