diff options
Diffstat (limited to 'contrib/ffmpeg/libavcodec/i386/mpegvideo_mmx.c')
-rw-r--r-- | contrib/ffmpeg/libavcodec/i386/mpegvideo_mmx.c | 41 |
1 files changed, 30 insertions, 11 deletions
diff --git a/contrib/ffmpeg/libavcodec/i386/mpegvideo_mmx.c b/contrib/ffmpeg/libavcodec/i386/mpegvideo_mmx.c index 1b7b1c19f..90b553aa2 100644 --- a/contrib/ffmpeg/libavcodec/i386/mpegvideo_mmx.c +++ b/contrib/ffmpeg/libavcodec/i386/mpegvideo_mmx.c @@ -2,6 +2,9 @@ * The simplest mpeg encoder (well, it was the simplest!) * Copyright (c) 2000,2001 Fabrice Bellard. * + * Optimized for ia32 CPUs by Nick Kurshev <nickols_k@mail.ru> + * h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at> + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -17,21 +20,16 @@ * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * - * Optimized for ia32 cpus by Nick Kurshev <nickols_k@mail.ru> - * h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at> */ -#include "../dsputil.h" -#include "../mpegvideo.h" -#include "../avcodec.h" +#include "dsputil.h" +#include "dsputil_mmx.h" +#include "mpegvideo.h" +#include "avcodec.h" #include "x86_cpu.h" extern uint16_t inv_zigzag_direct16[64]; -static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL; -static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; - static void dct_unquantize_h263_intra_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) @@ -179,7 +177,7 @@ asm volatile( if (level < -2048 || level > 2047) fprintf(stderr, "unquant error %d %d\n", i, level); #endif - We can suppose that result of two multiplications can't be greate of 0xFFFF + We can suppose that result of two multiplications can't be greater than 0xFFFF i.e. is 16-bit, so we use here only PMULLW instruction and can avoid a complex multiplication. ===================================================== @@ -397,7 +395,7 @@ asm volatile( : "%"REG_a, "memory" ); block[0]= block0; - //Note, we dont do mismatch control for intra as errors cannot accumulate + //Note, we do not do mismatch control for intra as errors cannot accumulate } static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s, @@ -673,6 +671,12 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){ ); } +#ifdef HAVE_SSSE3 +#define HAVE_SSSE3_BAK +#endif +#undef HAVE_SSSE3 + +#undef HAVE_SSE2 #undef HAVE_MMX2 #define RENAME(a) a ## _MMX #define RENAMEl(a) a ## _mmx @@ -685,12 +689,22 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){ #define RENAMEl(a) a ## _mmx2 #include "mpegvideo_mmx_template.c" +#define HAVE_SSE2 #undef RENAME #undef RENAMEl #define RENAME(a) a ## _SSE2 #define RENAMEl(a) a ## _sse2 #include "mpegvideo_mmx_template.c" +#ifdef HAVE_SSSE3_BAK +#define HAVE_SSSE3 +#undef RENAME +#undef RENAMEl +#define RENAME(a) a ## _SSSE3 +#define RENAMEl(a) a ## _sse2 +#include "mpegvideo_mmx_template.c" +#endif + void MPV_common_init_mmx(MpegEncContext *s) { if (mm_flags & MM_MMX) { @@ -713,6 +727,11 @@ void MPV_common_init_mmx(MpegEncContext *s) } if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ +#ifdef HAVE_SSSE3 + if(mm_flags & MM_SSSE3){ + s->dct_quantize= dct_quantize_SSSE3; + } else +#endif if(mm_flags & MM_SSE2){ s->dct_quantize= dct_quantize_SSE2; } else if(mm_flags & MM_MMXEXT){ |