diff options
Diffstat (limited to 'src/libffmpeg/libavcodec/ppc')
-rw-r--r-- | src/libffmpeg/libavcodec/ppc/dsputil_altivec.c | 612 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/ppc/dsputil_altivec.h | 77 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/ppc/dsputil_ppc.c | 130 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/ppc/dsputil_ppc.h | 21 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/ppc/fdct_altivec.c | 15 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/ppc/fft_altivec.c | 95 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/ppc/gcc_fixes.h | 16 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/ppc/gmc_altivec.c | 38 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/ppc/idct_altivec.c | 24 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c | 58 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c | 14 |
11 files changed, 346 insertions, 754 deletions
diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c index 81a32c9e3..6f48893a4 100644 --- a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c +++ b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c @@ -3,18 +3,20 @@ * Copyright (c) 2002 Dieter Shirley * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> * - * This library is free software; you can redistribute it and/or + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -616,61 +618,28 @@ void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1, } void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) { -#ifdef ALTIVEC_USE_REFERENCE_C_CODE - int i; - for(i=0; i+7<w; i++){ - dst[i+0] += src[i+0]; - dst[i+1] += src[i+1]; - dst[i+2] += src[i+2]; - dst[i+3] += src[i+3]; - dst[i+4] += src[i+4]; - dst[i+5] += src[i+5]; - dst[i+6] += src[i+6]; - dst[i+7] += src[i+7]; - } - for(; i<w; i++) - dst[i+0] += src[i+0]; -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ register int i; register vector unsigned char vdst, vsrc; /* dst and src are 16 bytes-aligned (guaranteed) */ - for(i = 0 ; (i + 15) < w ; i++) + for(i = 0 ; (i + 15) < w ; i+=16) { - vdst = vec_ld(i << 4, (unsigned char*)dst); - vsrc = vec_ld(i << 4, (unsigned char*)src); + vdst = vec_ld(i, (unsigned char*)dst); + vsrc = vec_ld(i, (unsigned char*)src); vdst = vec_add(vsrc, vdst); - vec_st(vdst, i << 4, (unsigned char*)dst); + vec_st(vdst, i, (unsigned char*)dst); } /* if w is not a multiple of 16 */ for (; (i < w) ; i++) { dst[i] = src[i]; } -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } /* next one assumes that ((line_size % 16) == 0) */ void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) { POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1); -#ifdef ALTIVEC_USE_REFERENCE_C_CODE - int i; - -POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1); - - for(i=0; i<h; i++) { - *((uint32_t*)(block)) = LD32(pixels); - *((uint32_t*)(block+4)) = LD32(pixels+4); - *((uint32_t*)(block+8)) = LD32(pixels+8); - *((uint32_t*)(block+12)) = LD32(pixels+12); - pixels+=line_size; - block +=line_size; - } - -POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1); - -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ register vector unsigned char pixelsv1, pixelsv2; register vector unsigned char pixelsv1B, pixelsv2B; register vector unsigned char pixelsv1C, pixelsv2C; @@ -700,13 +669,13 @@ POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1); #else for(i=0; i<h; i+=4) { pixelsv1 = vec_ld(0, (unsigned char*)pixels); - pixelsv2 = vec_ld(16, (unsigned char*)pixels); + pixelsv2 = vec_ld(15, (unsigned char*)pixels); pixelsv1B = vec_ld(line_size, (unsigned char*)pixels); - pixelsv2B = vec_ld(16 + line_size, (unsigned char*)pixels); + pixelsv2B = vec_ld(15 + line_size, (unsigned char*)pixels); pixelsv1C = vec_ld(line_size_2, (unsigned char*)pixels); - pixelsv2C = vec_ld(16 + line_size_2, (unsigned char*)pixels); + pixelsv2C = vec_ld(15 + line_size_2, (unsigned char*)pixels); pixelsv1D = vec_ld(line_size_3, (unsigned char*)pixels); - pixelsv2D = vec_ld(16 + line_size_3, (unsigned char*)pixels); + pixelsv2D = vec_ld(15 + line_size_3, (unsigned char*)pixels); vec_st(vec_perm(pixelsv1, pixelsv2, perm), 0, (unsigned char*)block); vec_st(vec_perm(pixelsv1B, pixelsv2B, perm), @@ -720,8 +689,6 @@ POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1); } #endif POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1); - -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } /* next one assumes that ((line_size % 16) == 0) */ @@ -729,23 +696,6 @@ POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1); void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) { POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1); -#ifdef ALTIVEC_USE_REFERENCE_C_CODE - int i; - -POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1); - - for(i=0; i<h; i++) { - op_avg(*((uint32_t*)(block)),LD32(pixels)); - op_avg(*((uint32_t*)(block+4)),LD32(pixels+4)); - op_avg(*((uint32_t*)(block+8)),LD32(pixels+8)); - op_avg(*((uint32_t*)(block+12)),LD32(pixels+12)); - pixels+=line_size; - block +=line_size; - } - -POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); - -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; register vector unsigned char perm = vec_lvsl(0, pixels); int i; @@ -764,37 +714,12 @@ POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1); } POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); - -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } /* next one assumes that ((line_size % 8) == 0) */ void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) { POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1); -#ifdef ALTIVEC_USE_REFERENCE_C_CODE - int i; -POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1); - for (i = 0; i < h; i++) { - *((uint32_t *) (block)) = - (((*((uint32_t *) (block))) | - ((((const struct unaligned_32 *) (pixels))->l))) - - ((((*((uint32_t *) (block))) ^ - ((((const struct unaligned_32 *) (pixels))-> - l))) & 0xFEFEFEFEUL) >> 1)); - *((uint32_t *) (block + 4)) = - (((*((uint32_t *) (block + 4))) | - ((((const struct unaligned_32 *) (pixels + 4))->l))) - - ((((*((uint32_t *) (block + 4))) ^ - ((((const struct unaligned_32 *) (pixels + - 4))-> - l))) & 0xFEFEFEFEUL) >> 1)); - pixels += line_size; - block += line_size; - } -POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1); - -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; int i; @@ -830,52 +755,12 @@ POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1); } POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1); - -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } /* next one assumes that ((line_size % 8) == 0) */ void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) { POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1); -#ifdef ALTIVEC_USE_REFERENCE_C_CODE - int j; -POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1); - for (j = 0; j < 2; j++) { - int i; - const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); - const uint32_t b = - (((const struct unaligned_32 *) (pixels + 1))->l); - uint32_t l0 = - (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; - uint32_t h0 = - ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); - uint32_t l1, h1; - pixels += line_size; - for (i = 0; i < h; i += 2) { - uint32_t a = (((const struct unaligned_32 *) (pixels))->l); - uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); - l1 = (a & 0x03030303UL) + (b & 0x03030303UL); - h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); - *((uint32_t *) block) = - h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); - pixels += line_size; - block += line_size; - a = (((const struct unaligned_32 *) (pixels))->l); - b = (((const struct unaligned_32 *) (pixels + 1))->l); - l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; - h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); - *((uint32_t *) block) = - h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); - pixels += line_size; - block += line_size; - } pixels += 4 - line_size * (h + 1); - block += 4 - line_size * h; - } - -POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); - -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ register int i; register vector unsigned char pixelsv1, pixelsv2, @@ -946,51 +831,12 @@ POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1); } POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } /* next one assumes that ((line_size % 8) == 0) */ void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) { POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1); -#ifdef ALTIVEC_USE_REFERENCE_C_CODE - int j; -POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); - for (j = 0; j < 2; j++) { - int i; - const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); - const uint32_t b = - (((const struct unaligned_32 *) (pixels + 1))->l); - uint32_t l0 = - (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; - uint32_t h0 = - ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); - uint32_t l1, h1; - pixels += line_size; - for (i = 0; i < h; i += 2) { - uint32_t a = (((const struct unaligned_32 *) (pixels))->l); - uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); - l1 = (a & 0x03030303UL) + (b & 0x03030303UL); - h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); - *((uint32_t *) block) = - h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); - pixels += line_size; - block += line_size; - a = (((const struct unaligned_32 *) (pixels))->l); - b = (((const struct unaligned_32 *) (pixels + 1))->l); - l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; - h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); - *((uint32_t *) block) = - h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); - pixels += line_size; - block += line_size; - } pixels += 4 - line_size * (h + 1); - block += 4 - line_size * h; - } - -POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); - -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ register int i; register vector unsigned char pixelsv1, pixelsv2, @@ -1062,51 +908,12 @@ POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); } POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } /* next one assumes that ((line_size % 16) == 0) */ void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) { POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1); -#ifdef ALTIVEC_USE_REFERENCE_C_CODE - int j; -POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1); - for (j = 0; j < 4; j++) { - int i; - const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); - const uint32_t b = - (((const struct unaligned_32 *) (pixels + 1))->l); - uint32_t l0 = - (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; - uint32_t h0 = - ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); - uint32_t l1, h1; - pixels += line_size; - for (i = 0; i < h; i += 2) { - uint32_t a = (((const struct unaligned_32 *) (pixels))->l); - uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); - l1 = (a & 0x03030303UL) + (b & 0x03030303UL); - h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); - *((uint32_t *) block) = - h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); - pixels += line_size; - block += line_size; - a = (((const struct unaligned_32 *) (pixels))->l); - b = (((const struct unaligned_32 *) (pixels + 1))->l); - l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; - h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); - *((uint32_t *) block) = - h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); - pixels += line_size; - block += line_size; - } pixels += 4 - line_size * (h + 1); - block += 4 - line_size * h; - } - -POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); - -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ register int i; register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; @@ -1183,51 +990,12 @@ POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1); } POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } /* next one assumes that ((line_size % 16) == 0) */ void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) { POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1); -#ifdef ALTIVEC_USE_REFERENCE_C_CODE - int j; -POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); - for (j = 0; j < 4; j++) { - int i; - const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); - const uint32_t b = - (((const struct unaligned_32 *) (pixels + 1))->l); - uint32_t l0 = - (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; - uint32_t h0 = - ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); - uint32_t l1, h1; - pixels += line_size; - for (i = 0; i < h; i += 2) { - uint32_t a = (((const struct unaligned_32 *) (pixels))->l); - uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); - l1 = (a & 0x03030303UL) + (b & 0x03030303UL); - h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); - *((uint32_t *) block) = - h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); - pixels += line_size; - block += line_size; - a = (((const struct unaligned_32 *) (pixels))->l); - b = (((const struct unaligned_32 *) (pixels + 1))->l); - l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; - h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); - *((uint32_t *) block) = - h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); - pixels += line_size; - block += line_size; - } pixels += 4 - line_size * (h + 1); - block += 4 - line_size * h; - } - -POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); - -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ register int i; register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; @@ -1305,34 +1073,32 @@ POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); } POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); - int sum; - register const_vector unsigned char vzero = (const_vector unsigned char)vec_splat_u8(0); - register vector signed short temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + int sum; + register const_vector unsigned char vzero = + (const_vector unsigned char)vec_splat_u8(0); + register vector signed short temp0, temp1, temp2, temp3, temp4, + temp5, temp6, temp7; POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); { - register const_vector signed short vprod1 = (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); - register const_vector signed short vprod2 = (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); - register const_vector signed short vprod3 = (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); + register const_vector signed short vprod1 =(const_vector signed short) + AVV( 1,-1, 1,-1, 1,-1, 1,-1); + register const_vector signed short vprod2 =(const_vector signed short) + AVV( 1, 1,-1,-1, 1, 1,-1,-1); + register const_vector signed short vprod3 =(const_vector signed short) + AVV( 1, 1, 1, 1,-1,-1,-1,-1); register const_vector unsigned char perm1 = (const_vector unsigned char) - AVV(0x02, 0x03, 0x00, 0x01, - 0x06, 0x07, 0x04, 0x05, - 0x0A, 0x0B, 0x08, 0x09, - 0x0E, 0x0F, 0x0C, 0x0D); + AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, + 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D); register const_vector unsigned char perm2 = (const_vector unsigned char) - AVV(0x04, 0x05, 0x06, 0x07, - 0x00, 0x01, 0x02, 0x03, - 0x0C, 0x0D, 0x0E, 0x0F, - 0x08, 0x09, 0x0A, 0x0B); + AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, + 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B); register const_vector unsigned char perm3 = (const_vector unsigned char) - AVV(0x08, 0x09, 0x0A, 0x0B, - 0x0C, 0x0D, 0x0E, 0x0F, - 0x00, 0x01, 0x02, 0x03, - 0x04, 0x05, 0x06, 0x07); + AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); #define ONEITERBUTTERFLY(i, res) \ { \ @@ -1443,45 +1209,46 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff8x8_num, 1); */ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { - int sum; - register vector signed short - temp0 REG_v(v0), - temp1 REG_v(v1), - temp2 REG_v(v2), - temp3 REG_v(v3), - temp4 REG_v(v4), - temp5 REG_v(v5), - temp6 REG_v(v6), - temp7 REG_v(v7); - register vector signed short - temp0S REG_v(v8), - temp1S REG_v(v9), - temp2S REG_v(v10), - temp3S REG_v(v11), - temp4S REG_v(v12), - temp5S REG_v(v13), - temp6S REG_v(v14), - temp7S REG_v(v15); - register const_vector unsigned char vzero REG_v(v31)= (const_vector unsigned char)vec_splat_u8(0); + int sum; + register vector signed short + temp0 REG_v(v0), + temp1 REG_v(v1), + temp2 REG_v(v2), + temp3 REG_v(v3), + temp4 REG_v(v4), + temp5 REG_v(v5), + temp6 REG_v(v6), + temp7 REG_v(v7); + register vector signed short + temp0S REG_v(v8), + temp1S REG_v(v9), + temp2S REG_v(v10), + temp3S REG_v(v11), + temp4S REG_v(v12), + temp5S REG_v(v13), + temp6S REG_v(v14), + temp7S REG_v(v15); + register const_vector unsigned char vzero REG_v(v31)= + (const_vector unsigned char)vec_splat_u8(0); { - register const_vector signed short vprod1 REG_v(v16)= (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); - register const_vector signed short vprod2 REG_v(v17)= (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); - register const_vector signed short vprod3 REG_v(v18)= (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); - register const_vector unsigned char perm1 REG_v(v19)= (const_vector unsigned char) - AVV(0x02, 0x03, 0x00, 0x01, - 0x06, 0x07, 0x04, 0x05, - 0x0A, 0x0B, 0x08, 0x09, - 0x0E, 0x0F, 0x0C, 0x0D); - register const_vector unsigned char perm2 REG_v(v20)= (const_vector unsigned char) - AVV(0x04, 0x05, 0x06, 0x07, - 0x00, 0x01, 0x02, 0x03, - 0x0C, 0x0D, 0x0E, 0x0F, - 0x08, 0x09, 0x0A, 0x0B); - register const_vector unsigned char perm3 REG_v(v21)= (const_vector unsigned char) - AVV(0x08, 0x09, 0x0A, 0x0B, - 0x0C, 0x0D, 0x0E, 0x0F, - 0x00, 0x01, 0x02, 0x03, - 0x04, 0x05, 0x06, 0x07); + register const_vector signed short vprod1 REG_v(v16)= + (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); + register const_vector signed short vprod2 REG_v(v17)= + (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); + register const_vector signed short vprod3 REG_v(v18)= + (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); + register const_vector unsigned char perm1 REG_v(v19)= + (const_vector unsigned char) + AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, + 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D); + register const_vector unsigned char perm2 REG_v(v20)= + (const_vector unsigned char) + AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, + 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B); + register const_vector unsigned char perm3 REG_v(v21)= + (const_vector unsigned char) + AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); #define ONEITERBUTTERFLY(i, res1, res2) \ { \ @@ -1642,27 +1409,27 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1); - int score; + int score; POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1); - score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8); - if (h==16) { - dst += 8*stride; - src += 8*stride; - score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); - } + score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8); + if (h==16) { + dst += 8*stride; + src += 8*stride; + score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); + } POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); - return score; + return score; } int has_altivec(void) { #ifdef __AMIGAOS4__ - ULONG result = 0; - extern struct ExecIFace *IExec; + ULONG result = 0; + extern struct ExecIFace *IExec; - IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); - if (result == VECTORTYPE_ALTIVEC) return 1; - return 0; + IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); + if (result == VECTORTYPE_ALTIVEC) return 1; + return 0; #else /* __AMIGAOS4__ */ #ifdef CONFIG_DARWIN @@ -1698,112 +1465,127 @@ int has_altivec(void) #endif /* __AMIGAOS4__ */ } +static void vorbis_inverse_coupling_altivec(float *mag, float *ang, + int blocksize) +{ + int i; + vector float m, a; + vector bool int t0, t1; + const vector unsigned int v_31 = //XXX + vec_add(vec_add(vec_splat_u32(15),vec_splat_u32(15)),vec_splat_u32(1)); + for(i=0; i<blocksize; i+=4) { + m = vec_ld(0, mag+i); + a = vec_ld(0, ang+i); + t0 = vec_cmple(m, (vector float)vec_splat_u32(0)); + t1 = vec_cmple(a, (vector float)vec_splat_u32(0)); + a = vec_xor(a, (vector float) vec_sl((vector unsigned int)t0, v_31)); + t0 = (vector bool int)vec_and(a, t1); + t1 = (vector bool int)vec_andc(a, t1); + a = vec_sub(m, (vector float)t1); + m = vec_add(m, (vector float)t0); + vec_stl(a, 0, ang+i); + vec_stl(m, 0, mag+i); + } +} + /* next one assumes that ((line_size % 8) == 0) */ void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) { POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1); -#ifdef ALTIVEC_USE_REFERENCE_C_CODE - - int j; -POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); - for (j = 0; j < 2; j++) { - int i; - const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); - const uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); - uint32_t l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; - uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); - uint32_t l1, h1; - pixels += line_size; - for (i = 0; i < h; i += 2) { - uint32_t a = (((const struct unaligned_32 *) (pixels))->l); - uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); - l1 = (a & 0x03030303UL) + (b & 0x03030303UL); - h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); - *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); - pixels += line_size; - block += line_size; - a = (((const struct unaligned_32 *) (pixels))->l); - b = (((const struct unaligned_32 *) (pixels + 1))->l); - l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; - h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); - *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); - pixels += line_size; - block += line_size; - } pixels += 4 - line_size * (h + 1); - block += 4 - line_size * h; - } -POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ - register int i; - register vector unsigned char - pixelsv1, pixelsv2, - pixelsavg; - register vector unsigned char - blockv, temp1, temp2, blocktemp; - register vector unsigned short - pixelssum1, pixelssum2, temp3; - register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); - register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); - - temp1 = vec_ld(0, pixels); - temp2 = vec_ld(16, pixels); - pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); - if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) - { - pixelsv2 = temp2; - } - else - { - pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); - } - pixelsv1 = vec_mergeh(vczero, pixelsv1); - pixelsv2 = vec_mergeh(vczero, pixelsv2); - pixelssum1 = vec_add((vector unsigned short)pixelsv1, - (vector unsigned short)pixelsv2); - pixelssum1 = vec_add(pixelssum1, vctwo); + register int i; + register vector unsigned char pixelsv1, pixelsv2, pixelsavg; + register vector unsigned char blockv, temp1, temp2, blocktemp; + register vector unsigned short pixelssum1, pixelssum2, temp3; + + register const_vector unsigned char vczero = (const_vector unsigned char) + vec_splat_u8(0); + register const_vector unsigned short vctwo = (const_vector unsigned short) + vec_splat_u16(2); + + temp1 = vec_ld(0, pixels); + temp2 = vec_ld(16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); + if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { + pixelsv2 = temp2; + } else { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); + } + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum1 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + pixelssum1 = vec_add(pixelssum1, vctwo); POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); - for (i = 0; i < h ; i++) { - int rightside = ((unsigned long)block & 0x0000000F); - blockv = vec_ld(0, block); + for (i = 0; i < h ; i++) { + int rightside = ((unsigned long)block & 0x0000000F); + blockv = vec_ld(0, block); + + temp1 = vec_ld(line_size, pixels); + temp2 = vec_ld(line_size + 16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); + if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } else { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); + } + + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum2 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + temp3 = vec_add(pixelssum1, pixelssum2); + temp3 = vec_sra(temp3, vctwo); + pixelssum1 = vec_add(pixelssum2, vctwo); + pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); + + if (rightside) { + blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); + } else { + blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); + } + + blockv = vec_avg(blocktemp, blockv); + vec_st(blockv, 0, block); - temp1 = vec_ld(line_size, pixels); - temp2 = vec_ld(line_size + 16, pixels); - pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); - if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) - { - pixelsv2 = temp2; - } - else - { - pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); - } - - pixelsv1 = vec_mergeh(vczero, pixelsv1); - pixelsv2 = vec_mergeh(vczero, pixelsv2); - pixelssum2 = vec_add((vector unsigned short)pixelsv1, - (vector unsigned short)pixelsv2); - temp3 = vec_add(pixelssum1, pixelssum2); - temp3 = vec_sra(temp3, vctwo); - pixelssum1 = vec_add(pixelssum2, vctwo); - pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); - - if (rightside) - { - blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); - } - else - { - blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); - } - - blockv = vec_avg(blocktemp, blockv); - vec_st(blockv, 0, block); - - block += line_size; - pixels += line_size; - } + block += line_size; + pixels += line_size; + } POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} + +void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx) +{ + c->pix_abs[0][1] = sad16_x2_altivec; + c->pix_abs[0][2] = sad16_y2_altivec; + c->pix_abs[0][3] = sad16_xy2_altivec; + c->pix_abs[0][0] = sad16_altivec; + c->pix_abs[1][0] = sad8_altivec; + c->sad[0]= sad16_altivec; + c->sad[1]= sad8_altivec; + c->pix_norm1 = pix_norm1_altivec; + c->sse[1]= sse8_altivec; + c->sse[0]= sse16_altivec; + c->pix_sum = pix_sum_altivec; + c->diff_pixels = diff_pixels_altivec; + c->get_pixels = get_pixels_altivec; + c->add_bytes= add_bytes_altivec; + c->put_pixels_tab[0][0] = put_pixels16_altivec; + /* the two functions do the same thing, so use the same code */ + c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec; + c->avg_pixels_tab[0][0] = avg_pixels16_altivec; + c->avg_pixels_tab[1][0] = avg_pixels8_altivec; + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec; + c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec; + c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec; + c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec; + c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; + + c->hadamard8_diff[0] = hadamard8_diff16_altivec; + c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; +#ifdef CONFIG_VORBIS_DECODER + c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec; +#endif } diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h index ac54817d0..560d778bb 100644 --- a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h +++ b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h @@ -3,18 +3,20 @@ * Copyright (c) 2002 Dieter Shirley * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> * - * This library is free software; you can redistribute it and/or + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -25,34 +27,11 @@ #ifdef HAVE_ALTIVEC -extern int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); -extern int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); -extern int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); -extern int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); -extern int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); -extern int pix_norm1_altivec(uint8_t *pix, int line_size); -extern int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); -extern int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); -extern int pix_sum_altivec(uint8_t * pix, int line_size); -extern void diff_pixels_altivec(DCTELEM* block, const uint8_t* s1, const uint8_t* s2, int stride); -extern void get_pixels_altivec(DCTELEM* block, const uint8_t * pixels, int line_size); +extern int has_altivec(void); -extern void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w); -extern void put_pixels_clamped_altivec(const DCTELEM *block, uint8_t *restrict pixels, int line_size); -extern void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); -extern void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); -extern void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h); -extern void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); -extern void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); -extern void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h); -extern void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h); -extern int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h); -extern int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h); -extern void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); +void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); -extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder); - -extern int has_altivec(void); +void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); // used to build registers permutation vectors (vcprm) // the 's' are for words in the _s_econd vector @@ -88,10 +67,40 @@ extern int has_altivec(void); #define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d} #endif -#else /* HAVE_ALTIVEC */ -#ifdef ALTIVEC_USE_REFERENCE_C_CODE -#error "I can't use ALTIVEC_USE_REFERENCE_C_CODE if I don't use HAVE_ALTIVEC" -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +// Transpose 8x8 matrix of 16-bit elements (in-place) +#define TRANSPOSE8(a,b,c,d,e,f,g,h) \ +do { \ + vector signed short A1, B1, C1, D1, E1, F1, G1, H1; \ + vector signed short A2, B2, C2, D2, E2, F2, G2, H2; \ + \ + A1 = vec_mergeh (a, e); \ + B1 = vec_mergel (a, e); \ + C1 = vec_mergeh (b, f); \ + D1 = vec_mergel (b, f); \ + E1 = vec_mergeh (c, g); \ + F1 = vec_mergel (c, g); \ + G1 = vec_mergeh (d, h); \ + H1 = vec_mergel (d, h); \ + \ + A2 = vec_mergeh (A1, E1); \ + B2 = vec_mergel (A1, E1); \ + C2 = vec_mergeh (B1, F1); \ + D2 = vec_mergel (B1, F1); \ + E2 = vec_mergeh (C1, G1); \ + F2 = vec_mergel (C1, G1); \ + G2 = vec_mergeh (D1, H1); \ + H2 = vec_mergel (D1, H1); \ + \ + a = vec_mergeh (A2, E2); \ + b = vec_mergel (A2, E2); \ + c = vec_mergeh (B2, F2); \ + d = vec_mergel (B2, F2); \ + e = vec_mergeh (C2, G2); \ + f = vec_mergel (C2, G2); \ + g = vec_mergeh (D2, H2); \ + h = vec_mergel (D2, H2); \ +} while (0) + #endif /* HAVE_ALTIVEC */ #endif /* _DSPUTIL_ALTIVEC_ */ diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c index b63c8dd84..9169eaef0 100644 --- a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c +++ b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c @@ -3,18 +3,20 @@ * Copyright (c) 2002 Dieter Shirley * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> * - * This library is free software; you can redistribute it and/or + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -24,22 +26,21 @@ #ifdef HAVE_ALTIVEC #include "dsputil_altivec.h" -#endif extern void fdct_altivec(int16_t *block); +extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, + int x16, int y16, int rounder); extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); -extern void ff_snow_horizontal_compose97i_altivec(DWTELEM *b, int width); -extern void ff_snow_vertical_compose97i_altivec(DWTELEM *b0, DWTELEM *b1, - DWTELEM *b2, DWTELEM *b3, - DWTELEM *b4, DWTELEM *b5, - int width); -extern void ff_snow_inner_add_yblock_altivec(uint8_t *obmc, const int obmc_stride, - uint8_t * * block, int b_w, int b_h, - int src_x, int src_y, int src_stride, - slice_buffer * sb, int add, - uint8_t * dst8); +void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx); + +void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx); +void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx); +void snow_init_altivec(DSPContext* c, AVCodecContext *avctx); +void float_init_altivec(DSPContext* c, AVCodecContext *avctx); + +#endif int mm_flags = 0; @@ -100,7 +101,7 @@ void powerpc_display_perf_report(void) { if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0) av_log(NULL, AV_LOG_INFO, - " Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", + " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n", perfname[i], j+1, perfdata[j][i][powerpc_data_min], @@ -174,7 +175,7 @@ POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); /* same as above, when dcbzl clear a whole 128B cache line i.e. the PPC970 aka G5 */ -#ifndef NO_DCBZL +#ifdef HAVE_DCBZL void clear_blocks_dcbz128_ppc(DCTELEM *blocks) { POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1); @@ -204,7 +205,7 @@ void clear_blocks_dcbz128_ppc(DCTELEM *blocks) } #endif -#ifndef NO_DCBZL +#ifdef HAVE_DCBZL /* check dcbz report how many bytes are set to 0 by dcbz */ /* update 24/06/2003 : replace dcbz by dcbzl to get the intended effect (Apple "fixed" dcbz) @@ -248,69 +249,43 @@ long check_dcbzl_effect(void) } #endif - -void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx); +static void prefetch_ppc(void *mem, int stride, int h) +{ + register const uint8_t *p = mem; + do { + asm volatile ("dcbt 0,%0" : : "r" (p)); + p+= stride; + } while(--h); +} void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) { // Common optimizations whether Altivec is available or not - - switch (check_dcbzl_effect()) { - case 32: - c->clear_blocks = clear_blocks_dcbz32_ppc; - break; - case 128: - c->clear_blocks = clear_blocks_dcbz128_ppc; - break; - default: - break; - } + c->prefetch = prefetch_ppc; + switch (check_dcbzl_effect()) { + case 32: + c->clear_blocks = clear_blocks_dcbz32_ppc; + break; + case 128: + c->clear_blocks = clear_blocks_dcbz128_ppc; + break; + default: + break; + } #ifdef HAVE_ALTIVEC - dsputil_h264_init_ppc(c, avctx); + if(ENABLE_H264_DECODER) dsputil_h264_init_ppc(c, avctx); if (has_altivec()) { mm_flags |= MM_ALTIVEC; - // Altivec specific optimisations - c->pix_abs[0][1] = sad16_x2_altivec; - c->pix_abs[0][2] = sad16_y2_altivec; - c->pix_abs[0][3] = sad16_xy2_altivec; - c->pix_abs[0][0] = sad16_altivec; - c->pix_abs[1][0] = sad8_altivec; - c->sad[0]= sad16_altivec; - c->sad[1]= sad8_altivec; - c->pix_norm1 = pix_norm1_altivec; - c->sse[1]= sse8_altivec; - c->sse[0]= sse16_altivec; - c->pix_sum = pix_sum_altivec; - c->diff_pixels = diff_pixels_altivec; - c->get_pixels = get_pixels_altivec; -// next one disabled as it's untested. -#if 0 - c->add_bytes= add_bytes_altivec; -#endif /* 0 */ - c->put_pixels_tab[0][0] = put_pixels16_altivec; - /* the two functions do the same thing, so use the same code */ - c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec; - c->avg_pixels_tab[0][0] = avg_pixels16_altivec; - c->avg_pixels_tab[1][0] = avg_pixels8_altivec; - c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec; - c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec; - c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec; - c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec; - c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; - + dsputil_init_altivec(c, avctx); + if(ENABLE_SNOW_DECODER) snow_init_altivec(c, avctx); + if(ENABLE_VC1_DECODER || ENABLE_WMV3_DECODER) + vc1dsp_init_altivec(c, avctx); + float_init_altivec(c, avctx); c->gmc1 = gmc1_altivec; - c->hadamard8_diff[0] = hadamard8_diff16_altivec; - c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; - - - c->horizontal_compose97i = ff_snow_horizontal_compose97i_altivec; - c->vertical_compose97i = ff_snow_vertical_compose97i_altivec; - c->inner_add_yblock = ff_snow_inner_add_yblock_altivec; - #ifdef CONFIG_ENCODERS if (avctx->dct_algo == FF_DCT_AUTO || avctx->dct_algo == FF_DCT_ALTIVEC) @@ -319,20 +294,16 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) } #endif //CONFIG_ENCODERS - if (avctx->lowres==0) - { + if (avctx->lowres==0) + { if ((avctx->idct_algo == FF_IDCT_AUTO) || (avctx->idct_algo == FF_IDCT_ALTIVEC)) { c->idct_put = idct_put_altivec; c->idct_add = idct_add_altivec; -#ifndef ALTIVEC_USE_REFERENCE_C_CODE c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ - c->idct_permutation_type = FF_NO_IDCT_PERM; -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } - } + } #ifdef POWERPC_PERFORMANCE_REPORT { @@ -349,11 +320,6 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) } } #endif /* POWERPC_PERFORMANCE_REPORT */ - } else -#endif /* HAVE_ALTIVEC */ - { - // Non-AltiVec PPC optimisations - - // ... pending ... } +#endif /* HAVE_ALTIVEC */ } diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.h b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.h index 966ffa71a..ab2b05780 100644 --- a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.h +++ b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.h @@ -1,35 +1,26 @@ /* * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> * - * This library is free software; you can redistribute it and/or + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _DSPUTIL_PPC_ #define _DSPUTIL_PPC_ -#ifdef CONFIG_DARWIN -/* The Apple assembler shipped w/ gcc-3.3 knows about DCBZL, previous assemblers don't - We assume here that the Darwin GCC is from Apple.... */ -#if (__GNUC__ * 100 + __GNUC_MINOR__ < 303) -#define NO_DCBZL -#endif -#else /* CONFIG_DARWIN */ -/* I don't think any non-Apple assembler knows about DCBZL */ -#define NO_DCBZL -#endif /* CONFIG_DARWIN */ - #ifdef POWERPC_PERFORMANCE_REPORT void powerpc_display_perf_report(void); /* the 604* have 2, the G3* have 4, the G4s have 6, diff --git a/src/libffmpeg/libavcodec/ppc/fdct_altivec.c b/src/libffmpeg/libavcodec/ppc/fdct_altivec.c index f5778c24e..2418c32bb 100644 --- a/src/libffmpeg/libavcodec/ppc/fdct_altivec.c +++ b/src/libffmpeg/libavcodec/ppc/fdct_altivec.c @@ -2,18 +2,20 @@ * AltiVec optimized library for the FFMPEG Multimedia System * Copyright (C) 2003 James Klicman <james@klicman.org> * - * This library is free software; you can redistribute it and/or + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -196,12 +198,6 @@ static vector float fdctconsts[3] = { void fdct_altivec(int16_t *block) { POWERPC_PERF_DECLARE(altivec_fdct, 1); -#ifdef ALTIVEC_USE_REFERENCE_C_CODE -POWERPC_PERF_START_COUNT(altivec_fdct, 1); - void ff_jpeg_fdct_islow(int16_t *block); - ff_jpeg_fdct_islow(block); -POWERPC_PERF_STOP_COUNT(altivec_fdct, 1); -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ vector signed short *bp; vector float *cp; vector float b00, b10, b20, b30, b40, b50, b60, b70; @@ -492,7 +488,6 @@ POWERPC_PERF_STOP_COUNT(altivec_fdct, 1); /* }}} */ POWERPC_PERF_STOP_COUNT(altivec_fdct, 1); -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } /* vim:set foldmethod=marker foldlevel=0: */ diff --git a/src/libffmpeg/libavcodec/ppc/fft_altivec.c b/src/libffmpeg/libavcodec/ppc/fft_altivec.c index f4ea78359..384a774ff 100644 --- a/src/libffmpeg/libavcodec/ppc/fft_altivec.c +++ b/src/libffmpeg/libavcodec/ppc/fft_altivec.c @@ -4,18 +4,20 @@ * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> * Based on code Copyright (c) 2002 Fabrice Bellard. * - * This library is free software; you can redistribute it and/or + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "../dsputil.h" @@ -63,88 +65,7 @@ void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z) { POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6); -#ifdef ALTIVEC_USE_REFERENCE_C_CODE - int ln = s->nbits; - int j, np, np2; - int nblocks, nloops; - register FFTComplex *p, *q; - FFTComplex *exptab = s->exptab; - int l; - FFTSample tmp_re, tmp_im; - -POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6); - - np = 1 << ln; - - /* pass 0 */ - - p=&z[0]; - j=(np >> 1); - do { - BF(p[0].re, p[0].im, p[1].re, p[1].im, - p[0].re, p[0].im, p[1].re, p[1].im); - p+=2; - } while (--j != 0); - - /* pass 1 */ - - - p=&z[0]; - j=np >> 2; - if (s->inverse) { - do { - BF(p[0].re, p[0].im, p[2].re, p[2].im, - p[0].re, p[0].im, p[2].re, p[2].im); - BF(p[1].re, p[1].im, p[3].re, p[3].im, - p[1].re, p[1].im, -p[3].im, p[3].re); - p+=4; - } while (--j != 0); - } else { - do { - BF(p[0].re, p[0].im, p[2].re, p[2].im, - p[0].re, p[0].im, p[2].re, p[2].im); - BF(p[1].re, p[1].im, p[3].re, p[3].im, - p[1].re, p[1].im, p[3].im, -p[3].re); - p+=4; - } while (--j != 0); - } - /* pass 2 .. ln-1 */ - - nblocks = np >> 3; - nloops = 1 << 2; - np2 = np >> 1; - do { - p = z; - q = z + nloops; - for (j = 0; j < nblocks; ++j) { - BF(p->re, p->im, q->re, q->im, - p->re, p->im, q->re, q->im); - - p++; - q++; - for(l = nblocks; l < np2; l += nblocks) { - CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im); - BF(p->re, p->im, q->re, q->im, - p->re, p->im, tmp_re, tmp_im); - p++; - q++; - } - - p += nloops; - q += nloops; - } - nblocks = nblocks >> 1; - nloops = nloops << 1; - } while (nblocks != 0); - -POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6); - -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ -#ifdef CONFIG_DARWIN - register const vector float vczero = (const vector float)(0.); -#else - register const vector float vczero = (const vector float){0.,0.,0.,0.}; -#endif + register const vector float vczero = (const vector float)vec_splat_u32(0.); int ln = s->nbits; int j, np, np2; @@ -242,6 +163,4 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6); } while (nblocks != 0); POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6); - -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } diff --git a/src/libffmpeg/libavcodec/ppc/gcc_fixes.h b/src/libffmpeg/libavcodec/ppc/gcc_fixes.h index 943905bc5..5a4a55188 100644 --- a/src/libffmpeg/libavcodec/ppc/gcc_fixes.h +++ b/src/libffmpeg/libavcodec/ppc/gcc_fixes.h @@ -2,6 +2,22 @@ * gcc fixes for altivec. * Used to workaround broken gcc (FSF gcc-3 pre gcc-3.3) * and to stay somewhat compatible with Darwin. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _GCC_FIXES_ diff --git a/src/libffmpeg/libavcodec/ppc/gmc_altivec.c b/src/libffmpeg/libavcodec/ppc/gmc_altivec.c index 04978d825..42c936bb3 100644 --- a/src/libffmpeg/libavcodec/ppc/gmc_altivec.c +++ b/src/libffmpeg/libavcodec/ppc/gmc_altivec.c @@ -3,18 +3,20 @@ * AltiVec-enabled * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> * - * This library is free software; you can redistribute it and/or + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -32,32 +34,6 @@ void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder) { POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); -#ifdef ALTIVEC_USE_REFERENCE_C_CODE - const int A=(16-x16)*(16-y16); - const int B=( x16)*(16-y16); - const int C=(16-x16)*( y16); - const int D=( x16)*( y16); - int i; - -POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); - - for(i=0; i<h; i++) - { - dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; - dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; - dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; - dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; - dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; - dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; - dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; - dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; - dst+= stride; - src+= stride; - } - -POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); - -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ const unsigned short __attribute__ ((aligned(16))) rounder_a[8] = {rounder, rounder, rounder, rounder, rounder, rounder, rounder, rounder}; @@ -167,6 +143,4 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); } POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); - -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } diff --git a/src/libffmpeg/libavcodec/ppc/idct_altivec.c b/src/libffmpeg/libavcodec/ppc/idct_altivec.c index 93d63cfd3..cee46fc25 100644 --- a/src/libffmpeg/libavcodec/ppc/idct_altivec.c +++ b/src/libffmpeg/libavcodec/ppc/idct_altivec.c @@ -1,18 +1,20 @@ /* * Copyright (c) 2001 Michel Lespinasse * - * This library is free software; you can redistribute it and/or + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ @@ -167,12 +169,6 @@ static const_vector_s16_t constants[5] = { void idct_put_altivec(uint8_t* dest, int stride, vector_s16_t* block) { POWERPC_PERF_DECLARE(altivec_idct_put_num, 1); -#ifdef ALTIVEC_USE_REFERENCE_C_CODE -POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1); - void simple_idct_put(uint8_t *dest, int line_size, int16_t *block); - simple_idct_put(dest, stride, (int16_t*)block); -POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1); -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ vector_u8_t tmp; #ifdef POWERPC_PERFORMANCE_REPORT @@ -195,18 +191,11 @@ POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1); COPY (dest, vx7) POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1); -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } void idct_add_altivec(uint8_t* dest, int stride, vector_s16_t* block) { POWERPC_PERF_DECLARE(altivec_idct_add_num, 1); -#ifdef ALTIVEC_USE_REFERENCE_C_CODE -POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1); - void simple_idct_add(uint8_t *dest, int line_size, int16_t *block); - simple_idct_add(dest, stride, (int16_t*)block); -POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1); -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ vector_u8_t tmp; vector_s16_t tmp2, tmp3; vector_u8_t perm0; @@ -244,6 +233,5 @@ POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1); ADD (dest, vx7, perm1) POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1); -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } diff --git a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c index 7a771a8ec..3822cb20e 100644 --- a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c +++ b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c @@ -4,18 +4,20 @@ * dct_unquantize_h263_altivec: * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> * - * This library is free software; you can redistribute it and/or + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -50,39 +52,6 @@ do { \ d = vec_mergel(_trans_acl, _trans_bdl); \ } while (0) -#define TRANSPOSE8(a,b,c,d,e,f,g,h) \ -do { \ - __typeof__(a) _A1, _B1, _C1, _D1, _E1, _F1, _G1, _H1; \ - __typeof__(a) _A2, _B2, _C2, _D2, _E2, _F2, _G2, _H2; \ - \ - _A1 = vec_mergeh (a, e); \ - _B1 = vec_mergel (a, e); \ - _C1 = vec_mergeh (b, f); \ - _D1 = vec_mergel (b, f); \ - _E1 = vec_mergeh (c, g); \ - _F1 = vec_mergel (c, g); \ - _G1 = vec_mergeh (d, h); \ - _H1 = vec_mergel (d, h); \ - \ - _A2 = vec_mergeh (_A1, _E1); \ - _B2 = vec_mergel (_A1, _E1); \ - _C2 = vec_mergeh (_B1, _F1); \ - _D2 = vec_mergel (_B1, _F1); \ - _E2 = vec_mergeh (_C1, _G1); \ - _F2 = vec_mergel (_C1, _G1); \ - _G2 = vec_mergeh (_D1, _H1); \ - _H2 = vec_mergel (_D1, _H1); \ - \ - a = vec_mergeh (_A2, _E2); \ - b = vec_mergel (_A2, _E2); \ - c = vec_mergeh (_B2, _F2); \ - d = vec_mergel (_B2, _F2); \ - e = vec_mergeh (_C2, _G2); \ - f = vec_mergel (_C2, _G2); \ - g = vec_mergeh (_D2, _H2); \ - h = vec_mergel (_D2, _H2); \ -} while (0) - // Loads a four-byte value (int or float) from the target address // into every element in the target vector. Only works if the @@ -552,19 +521,6 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; } -#ifdef ALTIVEC_USE_REFERENCE_C_CODE - for(;i<=nCoeffs;i++) { - level = block[i]; - if (level) { - if (level < 0) { - level = level * qmul - qadd; - } else { - level = level * qmul + qadd; - } - block[i] = level; - } - } -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ { register const_vector signed short vczero = (const_vector signed short)vec_splat_s16(0); short __attribute__ ((aligned(16))) qmul8[] = @@ -643,7 +599,5 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); block[0] = backup_0; } } -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ - POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63); } diff --git a/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c b/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c index b391b4294..c5e822f77 100644 --- a/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c +++ b/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c @@ -1,18 +1,20 @@ /* * Copyright (c) 2002 Dieter Shirley * - * This library is free software; you can redistribute it and/or + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -46,11 +48,7 @@ void MPV_common_init_ppc(MpegEncContext *s) { s->dsp.idct_put = idct_put_altivec; s->dsp.idct_add = idct_add_altivec; -#ifndef ALTIVEC_USE_REFERENCE_C_CODE s->dsp.idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ - s->dsp.idct_permutation_type = FF_NO_IDCT_PERM; -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } } |