From c5b6afab8b74e5cc938b8467d3808a877ded7d03 Mon Sep 17 00:00:00 2001 From: Mike Melanson Date: Mon, 27 Oct 2003 15:24:38 +0000 Subject: super mega ffmpeg tree sync CVS patchset: 5615 CVS date: 2003/10/27 15:24:38 --- src/libffmpeg/libavcodec/i386/dsputil_mmx.c | 58 ++++++++++++++++++++++ src/libffmpeg/libavcodec/i386/motion_est_mmx.c | 2 +- .../libavcodec/i386/mpegvideo_mmx_template.c | 4 +- src/libffmpeg/libavcodec/i386/simple_idct_mmx.c | 7 ++- 4 files changed, 67 insertions(+), 4 deletions(-) (limited to 'src/libffmpeg/libavcodec/i386') diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c index d58ebcc4e..c523be74a 100644 --- a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c +++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c @@ -171,6 +171,7 @@ static const uint64_t ff_pw_15 __attribute__ ((aligned(8))) = 0x000F000F000F000F /***********************************/ /* standard MMX */ +#ifdef CONFIG_ENCODERS static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) { asm volatile( @@ -227,6 +228,7 @@ static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint : "%eax" ); } +#endif //CONFIG_ENCODERS void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) { @@ -401,6 +403,7 @@ static void clear_blocks_mmx(DCTELEM *blocks) ); } +#ifdef CONFIG_ENCODERS static int pix_sum16_mmx(uint8_t * pix, int line_size){ const int h=16; int sum; @@ -438,6 +441,7 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){ return sum; } +#endif //CONFIG_ENCODERS static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ int i=0; @@ -461,6 +465,7 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ dst[i+0] += src[i+0]; } +#ifdef CONFIG_ENCODERS static int pix_norm1_mmx(uint8_t *pix, int line_size) { int tmp; asm volatile ( @@ -583,6 +588,43 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ for(; iidct_put= ff_simple_idct_put_mmx; c->idct_add= ff_simple_idct_add_mmx; + c->idct = ff_simple_idct_mmx; c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ if(mm_flags & MM_MMXEXT){ c->idct_put= ff_libmpeg2mmx2_idct_put; c->idct_add= ff_libmpeg2mmx2_idct_add; + c->idct = ff_mmxext_idct; }else{ c->idct_put= ff_libmpeg2mmx_idct_put; c->idct_add= ff_libmpeg2mmx_idct_add; + c->idct = ff_mmx_idct; } c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; } +#ifdef CONFIG_ENCODERS c->get_pixels = get_pixels_mmx; c->diff_pixels = diff_pixels_mmx; +#endif //CONFIG_ENCODERS c->put_pixels_clamped = put_pixels_clamped_mmx; c->add_pixels_clamped = add_pixels_clamped_mmx; c->clear_blocks = clear_blocks_mmx; +#ifdef CONFIG_ENCODERS c->pix_sum = pix_sum16_mmx; +#endif //CONFIG_ENCODERS c->put_pixels_tab[0][0] = put_pixels16_mmx; c->put_pixels_tab[0][1] = put_pixels16_x2_mmx; @@ -1627,6 +1677,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; c->add_bytes= add_bytes_mmx; +#ifdef CONFIG_ENCODERS c->diff_bytes= diff_bytes_mmx; c->hadamard8_diff[0]= hadamard8_diff16_mmx; @@ -1634,6 +1685,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->pix_norm1 = pix_norm1_mmx; c->sse[0] = sse16_mmx; +#endif //CONFIG_ENCODERS if (mm_flags & MM_MMXEXT) { c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; @@ -1650,8 +1702,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; +#ifdef CONFIG_ENCODERS c->hadamard8_diff[0]= hadamard8_diff16_mmx2; c->hadamard8_diff[1]= hadamard8_diff_mmx2; +#endif //CONFIG_ENCODERS if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; @@ -1696,6 +1750,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_mmx2) SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_mmx2) #endif + + c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; } else if (mm_flags & MM_3DNOW) { c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; @@ -1755,7 +1811,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } } +#ifdef CONFIG_ENCODERS dsputil_init_pix_mmx(c, avctx); +#endif //CONFIG_ENCODERS #if 0 // for speed testing get_pixels = just_return; diff --git a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c index 1bc5dfeb2..aa22f0649 100644 --- a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c +++ b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c @@ -26,7 +26,7 @@ static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={ 0x0002000200020002, }; -static const __attribute__ ((aligned(8))) uint64_t bone= 0x0101010101010101LL; +static const __attribute__ ((aligned(8), unused)) uint64_t bone= 0x0101010101010101LL; static inline void sad8_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) { diff --git a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c index fa1ab579d..706211eec 100644 --- a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c +++ b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c @@ -38,7 +38,9 @@ static int RENAME(dct_quantize)(MpegEncContext *s, { int level=0, last_non_zero_p1, q; //=0 is cuz gcc says uninitalized ... const uint16_t *qmat, *bias; - static __align8 int16_t temp_block[64]; + __align8 int16_t temp_block[64]; + + assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly? //s->fdct (block); ff_fdct_mmx (block); //cant be anything else ... diff --git a/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c b/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c index 836403ca5..626c1f565 100644 --- a/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c +++ b/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c @@ -47,8 +47,8 @@ static const uint64_t __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000ULL; static const uint64_t __attribute__((aligned(8))) d40000= 0x0000000000040000ULL; -static int16_t __attribute__((aligned(8))) temp[64]; -static int16_t __attribute__((aligned(8))) coeffs[]= { + +static const int16_t __attribute__((aligned(8))) coeffs[]= { 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0, // 1<<(COL_SHIFT-1), 0, 1<<(COL_SHIFT-1), 0, // 0, 1<<(COL_SHIFT-1-16), 0, 1<<(COL_SHIFT-1-16), @@ -206,6 +206,9 @@ row[7] = input[13]; static inline void idct(int16_t *block) { + int64_t __attribute__((aligned(8))) align_tmp[16]; + int16_t * const temp= (int16_t*)align_tmp; + asm volatile( #if 0 //Alternative, simpler variant -- cgit v1.2.3