diff options
-rw-r--r-- | configure.in | 2 | ||||
-rw-r--r-- | src/libmpeg2/idct.c | 6 | ||||
-rw-r--r-- | src/libmpeg2/idct_altivec.c | 35 | ||||
-rw-r--r-- | src/libmpeg2/idct_mlib.c | 4 | ||||
-rw-r--r-- | src/libmpeg2/idct_mmx.c | 23 | ||||
-rw-r--r-- | src/libmpeg2/slice.c | 3 |
6 files changed, 69 insertions, 4 deletions
diff --git a/configure.in b/configure.in index f92f6d59e..cd84bbfd7 100644 --- a/configure.in +++ b/configure.in @@ -148,7 +148,7 @@ AC_C_BIGENDIAN dnl AC_C_BIGENDIAN triggers an AC_TRY_RUN warning; we can't cross compile dnl xine (oh, well) AC_C_CONST -AC_C_INLINE +AC_C_ALWAYS_INLINE AC_TYPE_OFF_T AC_TYPE_SIZE_T AM_TYPE_PTRDIFF_T diff --git a/src/libmpeg2/idct.c b/src/libmpeg2/idct.c index 1c577aa5f..abc87a0a1 100644 --- a/src/libmpeg2/idct.c +++ b/src/libmpeg2/idct.c @@ -223,6 +223,9 @@ static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, int stride) dest[6] = CLIP (block[6]); dest[7] = CLIP (block[7]); + block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; + block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; + dest += stride; block += 8; } while (--i); @@ -249,6 +252,9 @@ static void mpeg2_idct_add_c (int16_t * block, uint8_t * dest, int stride) dest[6] = CLIP (block[6] + dest[6]); dest[7] = CLIP (block[7] + dest[7]); + block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; + block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; + dest += stride; block += 8; } while (--i); diff --git a/src/libmpeg2/idct_altivec.c b/src/libmpeg2/idct_altivec.c index 1d20f56b4..b982238e7 100644 --- a/src/libmpeg2/idct_altivec.c +++ b/src/libmpeg2/idct_altivec.c @@ -243,6 +243,22 @@ void mpeg2_idct_copy_altivec (int16_t * block, uint8_t * dest, int stride) "# lwz %r0, 132(%r1) \n" "# mtlr %r0 \n" "# la %r1, 128(%r1) \n" + " vxor %v1, %v1, %v1 \n" + " addi %r9, %r3, 16 \n" + " stvx %v1, 0, %r3 \n" + " stvx %v1, 0, %r9 \n" + " addi %r11, %r3, 32 \n" + " stvx %v1, 0, %r11 \n" + " addi %r9, %r3, 48 \n" + " stvx %v1, 0, %r9 \n" + " addi %r11, %r3, -64 \n" + " stvx %v1, 0, %r11 \n" + " addi %r9, %r3, -48 \n" + " stvx %v1, 0, %r9 \n" + " addi %r11, %r3, -32 \n" + " stvx %v1, 0, %r11 \n" + " addi %r3, %r3, -16 \n" + " stvx %v1, 0, %r3 \n" ); } @@ -463,6 +479,21 @@ void mpeg2_idct_add_altivec (int16_t * block, uint8_t * dest, int stride) "# lwz %r0, 196(%r1) \n" "# mtlr %r0 \n" "# la %r1, 192(%r1) \n" + " addi %r9, %r3, 16 \n" + " stvx %v1, 0, %r3 \n" + " stvx %v1, 0, %r9 \n" + " addi %r11, %r3, 32 \n" + " stvx %v1, 0, %r11 \n" + " addi %r9, %r3, 48 \n" + " stvx %v1, 0, %r9 \n" + " addi %r11, %r3, -64 \n" + " stvx %v1, 0, %r11 \n" + " addi %r9, %r3, -48 \n" + " stvx %v1, 0, %r9 \n" + " addi %r11, %r3, -32 \n" + " stvx %v1, 0, %r11 \n" + " addi %r3, %r3, -16 \n" + " stvx %v1, 0, %r3 \n" ); } @@ -625,6 +656,7 @@ void mpeg2_idct_copy_altivec (vector_s16_t * block, unsigned char * dest, COPY (dest, vx5) dest += stride; COPY (dest, vx6) dest += stride; COPY (dest, vx7) + memset (block, 0, 64 * sizeof (signed short)); } void mpeg2_idct_add_altivec (vector_s16_t * block, unsigned char * dest, @@ -661,6 +693,9 @@ void mpeg2_idct_add_altivec (vector_s16_t * block, unsigned char * dest, ADD (dest, vx5, perm1) dest += stride; ADD (dest, vx6, perm0) dest += stride; ADD (dest, vx7, perm1) + memset (block, 0, 64 * sizeof (signed short)); } #endif /* __ALTIVEC__ */ + + diff --git a/src/libmpeg2/idct_mlib.c b/src/libmpeg2/idct_mlib.c index 18097f51c..8fc4e41a2 100644 --- a/src/libmpeg2/idct_mlib.c +++ b/src/libmpeg2/idct_mlib.c @@ -28,6 +28,7 @@ #include <mlib_status.h> #include <mlib_sys.h> #include <mlib_video.h> +#include <string.h> #include <inttypes.h> #include "mpeg2_internal.h" @@ -36,18 +37,21 @@ void mpeg2_idct_add_mlib (int16_t * block, uint8_t * dest, int stride) { mlib_VideoIDCT_IEEE_S16_S16 (block, block); mlib_VideoAddBlock_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t); } void mpeg2_idct_copy_mlib_non_ieee (int16_t * block, uint8_t * dest, int stride) { mlib_VideoIDCT8x8_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t); } void mpeg2_idct_add_mlib_non_ieee (int16_t * block, uint8_t * dest, int stride) { mlib_VideoIDCT8x8_S16_S16 (block, block); mlib_VideoAddBlock_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t); } #endif diff --git a/src/libmpeg2/idct_mmx.c b/src/libmpeg2/idct_mmx.c index d8822f6d9..bee54e83b 100644 --- a/src/libmpeg2/idct_mmx.c +++ b/src/libmpeg2/idct_mmx.c @@ -654,6 +654,25 @@ static void block_add (int16_t * block, uint8_t * dest, int stride) movq_r2m (mm3, *(dest+stride)); } +static inline void block_zero (int16_t * block) { + pxor_r2r (mm0, mm0); + movq_r2m (mm0, *(block+0*4)); + movq_r2m (mm0, *(block+1*4)); + movq_r2m (mm0, *(block+2*4)); + movq_r2m (mm0, *(block+3*4)); + movq_r2m (mm0, *(block+4*4)); + movq_r2m (mm0, *(block+5*4)); + movq_r2m (mm0, *(block+6*4)); + movq_r2m (mm0, *(block+7*4)); + movq_r2m (mm0, *(block+8*4)); + movq_r2m (mm0, *(block+9*4)); + movq_r2m (mm0, *(block+10*4)); + movq_r2m (mm0, *(block+11*4)); + movq_r2m (mm0, *(block+12*4)); + movq_r2m (mm0, *(block+13*4)); + movq_r2m (mm0, *(block+14*4)); + movq_r2m (mm0, *(block+15*4)); +} declare_idct (mmxext_idct, mmxext_table, mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) @@ -662,12 +681,14 @@ void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride) { mmxext_idct (block); block_copy (block, dest, stride); + block_zero (block); } void mpeg2_idct_add_mmxext (int16_t * block, uint8_t * dest, int stride) { mmxext_idct (block); block_add (block, dest, stride); + block_zero (block); } @@ -678,12 +699,14 @@ void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride) { mmx_idct (block); block_copy (block, dest, stride); + block_zero (block); } void mpeg2_idct_add_mmx (int16_t * block, uint8_t * dest, int stride) { mmx_idct (block); block_add (block, dest, stride); + block_zero (block); } void mpeg2_idct_mmx_init (void) diff --git a/src/libmpeg2/slice.c b/src/libmpeg2/slice.c index 07f92ea10..8913b208b 100644 --- a/src/libmpeg2/slice.c +++ b/src/libmpeg2/slice.c @@ -23,7 +23,6 @@ #include "config.h" -#include <string.h> #include <inttypes.h> #include "video_out.h" @@ -966,7 +965,6 @@ static inline void slice_intra_DCT (picture_t * picture, int cc, picture->dc_dct_pred[cc] += get_chroma_dc_dct_diff (picture); picture->DCTblock[0] = picture->dc_dct_pred[cc] << (3 - picture->intra_dc_precision); - memset (picture->DCTblock + 1, 0, 63 * sizeof (int16_t)); if (picture->mpeg1) { if (picture->picture_coding_type != D_TYPE) @@ -984,7 +982,6 @@ static inline void slice_intra_DCT (picture_t * picture, int cc, static inline void slice_non_intra_DCT (picture_t * picture, uint8_t * dest, int stride) { - memset (picture->DCTblock, 0, 64 * sizeof (int16_t)); if (picture->mpeg1) get_mpeg1_non_intra_block (picture); else |