summaryrefslogtreecommitdiff
path: root/src/libffmpeg/libavcodec/ppc
diff options
context:
space:
mode:
authorMike Melanson <mike@multimedia.cx>2003-10-27 15:24:38 +0000
committerMike Melanson <mike@multimedia.cx>2003-10-27 15:24:38 +0000
commitc5b6afab8b74e5cc938b8467d3808a877ded7d03 (patch)
tree4a9738571b6330c8895c6ad3faec4d68f72fbb16 /src/libffmpeg/libavcodec/ppc
parentd2a72f348508fd0a78a80f4da795dcf3155f02bc (diff)
downloadxine-lib-c5b6afab8b74e5cc938b8467d3808a877ded7d03.tar.gz
xine-lib-c5b6afab8b74e5cc938b8467d3808a877ded7d03.tar.bz2
super mega ffmpeg tree sync
CVS patchset: 5615 CVS date: 2003/10/27 15:24:38
Diffstat (limited to 'src/libffmpeg/libavcodec/ppc')
-rw-r--r--src/libffmpeg/libavcodec/ppc/dsputil_altivec.c115
-rw-r--r--src/libffmpeg/libavcodec/ppc/dsputil_ppc.c158
-rw-r--r--src/libffmpeg/libavcodec/ppc/dsputil_ppc.h143
-rw-r--r--src/libffmpeg/libavcodec/ppc/fft_altivec.c10
-rw-r--r--src/libffmpeg/libavcodec/ppc/gcc_fixes.h27
-rw-r--r--src/libffmpeg/libavcodec/ppc/gmc_altivec.c11
-rw-r--r--src/libffmpeg/libavcodec/ppc/idct_altivec.c20
-rw-r--r--src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c6
-rw-r--r--src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c2
9 files changed, 288 insertions, 204 deletions
diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c
index 32e881b70..635480784 100644
--- a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c
@@ -655,11 +655,11 @@ void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
/* next one assumes that ((line_size % 16) == 0) */
void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
-POWERPC_TBL_DECLARE(altivec_put_pixels16_num, 1);
+POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
int i;
-POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
+POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1);
for(i=0; i<h; i++) {
*((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l);
@@ -670,15 +670,27 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
block +=line_size;
}
-POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1);
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
register vector unsigned char pixelsv1, pixelsv2;
+ register vector unsigned char pixelsv1B, pixelsv2B;
+ register vector unsigned char pixelsv1C, pixelsv2C;
+ register vector unsigned char pixelsv1D, pixelsv2D;
+
register vector unsigned char perm = vec_lvsl(0, pixels);
int i;
-
-POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
-
+ register int line_size_2 = line_size << 1;
+ register int line_size_3 = line_size + line_size_2;
+ register int line_size_4 = line_size << 2;
+
+POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1);
+// hand-unrolling the loop by 4 gains about 15%
+// mininum execution time goes from 74 to 60 cycles
+// it's faster than -funroll-loops, but using
+// -funroll-loops w/ this is bad - 74 cycles again.
+// all this is on a 7450, tuning for the 7450
+#if 0
for(i=0; i<h; i++) {
pixelsv1 = vec_ld(0, (unsigned char*)pixels);
pixelsv2 = vec_ld(16, (unsigned char*)pixels);
@@ -687,8 +699,29 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
pixels+=line_size;
block +=line_size;
}
-
-POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
+#else
+ for(i=0; i<h; i+=4) {
+ pixelsv1 = vec_ld(0, (unsigned char*)pixels);
+ pixelsv2 = vec_ld(16, (unsigned char*)pixels);
+ pixelsv1B = vec_ld(line_size, (unsigned char*)pixels);
+ pixelsv2B = vec_ld(16 + line_size, (unsigned char*)pixels);
+ pixelsv1C = vec_ld(line_size_2, (unsigned char*)pixels);
+ pixelsv2C = vec_ld(16 + line_size_2, (unsigned char*)pixels);
+ pixelsv1D = vec_ld(line_size_3, (unsigned char*)pixels);
+ pixelsv2D = vec_ld(16 + line_size_3, (unsigned char*)pixels);
+ vec_st(vec_perm(pixelsv1, pixelsv2, perm),
+ 0, (unsigned char*)block);
+ vec_st(vec_perm(pixelsv1B, pixelsv2B, perm),
+ line_size, (unsigned char*)block);
+ vec_st(vec_perm(pixelsv1C, pixelsv2C, perm),
+ line_size_2, (unsigned char*)block);
+ vec_st(vec_perm(pixelsv1D, pixelsv2D, perm),
+ line_size_3, (unsigned char*)block);
+ pixels+=line_size_4;
+ block +=line_size_4;
+ }
+#endif
+POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
@@ -697,11 +730,11 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
#define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
-POWERPC_TBL_DECLARE(altivec_avg_pixels16_num, 1);
+POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
int i;
-POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
for(i=0; i<h; i++) {
op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l));
@@ -712,14 +745,14 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
block +=line_size;
}
-POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
register vector unsigned char perm = vec_lvsl(0, pixels);
int i;
-POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
for(i=0; i<h; i++) {
pixelsv1 = vec_ld(0, (unsigned char*)pixels);
@@ -732,7 +765,7 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
block +=line_size;
}
-POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
@@ -740,10 +773,10 @@ POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
/* next one assumes that ((line_size % 8) == 0) */
void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
{
-POWERPC_TBL_DECLARE(altivec_avg_pixels8_num, 1);
+POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
int i;
-POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
+POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
for (i = 0; i < h; i++) {
*((uint32_t *) (block)) =
(((*((uint32_t *) (block))) |
@@ -761,13 +794,13 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
pixels += line_size;
block += line_size;
}
-POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
int i;
-POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
+POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
for (i = 0; i < h; i++) {
/*
@@ -798,7 +831,7 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
block += line_size;
}
-POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
@@ -806,10 +839,10 @@ POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
/* next one assumes that ((line_size % 8) == 0) */
void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
-POWERPC_TBL_DECLARE(altivec_put_pixels8_xy2_num, 1);
+POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
int j;
-POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
+POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1);
for (j = 0; j < 2; j++) {
int i;
const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
@@ -842,7 +875,7 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
block += 4 - line_size * h;
}
-POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
register int i;
@@ -873,7 +906,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
(vector unsigned short)pixelsv2);
pixelssum1 = vec_add(pixelssum1, vctwo);
-POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
+POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1);
for (i = 0; i < h ; i++) {
int rightside = ((unsigned long)block & 0x0000000F);
blockv = vec_ld(0, block);
@@ -914,17 +947,17 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
pixels += line_size;
}
-POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
/* next one assumes that ((line_size % 8) == 0) */
void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
-POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
+POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
int j;
-POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
+POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
for (j = 0; j < 2; j++) {
int i;
const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
@@ -957,7 +990,7 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
block += 4 - line_size * h;
}
-POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
register int i;
@@ -989,7 +1022,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
(vector unsigned short)pixelsv2);
pixelssum1 = vec_add(pixelssum1, vcone);
-POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
+POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
for (i = 0; i < h ; i++) {
int rightside = ((unsigned long)block & 0x0000000F);
blockv = vec_ld(0, block);
@@ -1030,17 +1063,17 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
pixels += line_size;
}
-POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
/* next one assumes that ((line_size % 16) == 0) */
void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
{
-POWERPC_TBL_DECLARE(altivec_put_pixels16_xy2_num, 1);
+POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
int j;
-POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
+POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
for (j = 0; j < 4; j++) {
int i;
const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
@@ -1073,7 +1106,7 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
block += 4 - line_size * h;
}
-POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
register int i;
@@ -1086,7 +1119,9 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
pixelssum3, pixelssum4, temp4;
register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
-
+
+POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
+
temp1 = vec_ld(0, pixels);
temp2 = vec_ld(16, pixels);
pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
@@ -1109,7 +1144,6 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
(vector unsigned short)pixelsv2);
pixelssum1 = vec_add(pixelssum1, vctwo);
-POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
for (i = 0; i < h ; i++) {
blockv = vec_ld(0, block);
@@ -1150,17 +1184,17 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
pixels += line_size;
}
-POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
/* next one assumes that ((line_size % 16) == 0) */
void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
{
-POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
+POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
int j;
-POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
+POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
for (j = 0; j < 4; j++) {
int i;
const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
@@ -1193,7 +1227,7 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
block += 4 - line_size * h;
}
-POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
register int i;
@@ -1207,7 +1241,9 @@ POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
-
+
+POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
+
temp1 = vec_ld(0, pixels);
temp2 = vec_ld(16, pixels);
pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
@@ -1230,7 +1266,6 @@ POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
(vector unsigned short)pixelsv2);
pixelssum1 = vec_add(pixelssum1, vcone);
-POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
for (i = 0; i < h ; i++) {
blockv = vec_ld(0, block);
@@ -1271,7 +1306,7 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
pixels += line_size;
}
-POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c
index 374309e37..7af2aa002 100644
--- a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c
+++ b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c
@@ -33,7 +33,7 @@ int mm_flags = 0;
int mm_support(void)
{
int result = 0;
-#if HAVE_ALTIVEC
+#ifdef HAVE_ALTIVEC
if (has_altivec()) {
result |= MM_ALTIVEC;
}
@@ -41,8 +41,8 @@ int mm_support(void)
return result;
}
-#ifdef POWERPC_TBL_PERFORMANCE_REPORT
-unsigned long long perfdata[powerpc_perf_total][powerpc_data_total];
+#ifdef POWERPC_PERFORMANCE_REPORT
+unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
/* list below must match enum in dsputil_ppc.h */
static unsigned char* perfname[] = {
"fft_calc_altivec",
@@ -57,46 +57,35 @@ static unsigned char* perfname[] = {
"put_no_rnd_pixels8_xy2_altivec",
"put_pixels16_xy2_altivec",
"put_no_rnd_pixels16_xy2_altivec",
- "clear_blocks_dcbz32_ppc"
+ "clear_blocks_dcbz32_ppc",
+ "clear_blocks_dcbz128_ppc"
};
-#ifdef POWERPC_PERF_USE_PMC
-unsigned long long perfdata_miss[powerpc_perf_total][powerpc_data_total];
-#endif
#include <stdio.h>
#endif
-#ifdef POWERPC_TBL_PERFORMANCE_REPORT
+#ifdef POWERPC_PERFORMANCE_REPORT
void powerpc_display_perf_report(void)
{
- int i;
-#ifndef POWERPC_PERF_USE_PMC
- fprintf(stderr, "PowerPC performance report\n Values are from the Time Base register, and represent 4 bus cycles.\n");
-#else /* POWERPC_PERF_USE_PMC */
+ int i, j;
fprintf(stderr, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
-#endif /* POWERPC_PERF_USE_PMC */
for(i = 0 ; i < powerpc_perf_total ; i++)
{
- if (perfdata[i][powerpc_data_num] != (unsigned long long)0)
- fprintf(stderr, " Function \"%s\" (pmc1):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
- perfname[i],
- perfdata[i][powerpc_data_min],
- perfdata[i][powerpc_data_max],
- (double)perfdata[i][powerpc_data_sum] /
- (double)perfdata[i][powerpc_data_num],
- perfdata[i][powerpc_data_num]);
-#ifdef POWERPC_PERF_USE_PMC
- if (perfdata_miss[i][powerpc_data_num] != (unsigned long long)0)
- fprintf(stderr, " Function \"%s\" (pmc2):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
- perfname[i],
- perfdata_miss[i][powerpc_data_min],
- perfdata_miss[i][powerpc_data_max],
- (double)perfdata_miss[i][powerpc_data_sum] /
- (double)perfdata_miss[i][powerpc_data_num],
- perfdata_miss[i][powerpc_data_num]);
-#endif
+ for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
+ {
+ if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
+ fprintf(stderr,
+ " Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
+ perfname[i],
+ j+1,
+ perfdata[j][i][powerpc_data_min],
+ perfdata[j][i][powerpc_data_max],
+ (double)perfdata[j][i][powerpc_data_sum] /
+ (double)perfdata[j][i][powerpc_data_num],
+ perfdata[j][i][powerpc_data_num]);
+ }
}
}
-#endif /* POWERPC_TBL_PERFORMANCE_REPORT */
+#endif /* POWERPC_PERFORMANCE_REPORT */
/* ***** WARNING ***** WARNING ***** WARNING ***** */
/*
@@ -110,13 +99,25 @@ void powerpc_display_perf_report(void)
It simply clear to zero a single cache line,
so you need to know the cache line size to use it !
It's absurd, but it's fast...
+
+ update 24/06/2003 : Apple released yesterday the G5,
+ with a PPC970. cache line size : 128 bytes. Oups.
+ The semantic of dcbz was changed, it always clear
+ 32 bytes. so the function below will work, but will
+ be slow. So I fixed check_dcbz_effect to use dcbzl,
+ which is defined to clear a cache line (as dcbz before).
+ So we still can distinguish, and use dcbz (32 bytes)
+ or dcbzl (one cache line) as required.
+
+ see <http://developer.apple.com/technotes/tn/tn2087.html>
+ and <http://developer.apple.com/technotes/tn/tn2086.html>
*/
void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
{
-POWERPC_TBL_DECLARE(powerpc_clear_blocks_dcbz32, 1);
+POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
register int misal = ((unsigned long)blocks & 0x00000010);
register int i = 0;
-POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
+POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
#if 1
if (misal) {
((unsigned long*)blocks)[0] = 0L;
@@ -126,7 +127,7 @@ POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
i += 16;
}
for ( ; i < sizeof(DCTELEM)*6*64 ; i += 32) {
- asm volatile("dcbz %0,%1" : : "r" (blocks), "r" (i) : "memory");
+ asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
}
if (misal) {
((unsigned long*)blocks)[188] = 0L;
@@ -138,11 +139,48 @@ POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
#else
memset(blocks, 0, sizeof(DCTELEM)*6*64);
#endif
-POWERPC_TBL_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
+POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
+}
+
+/* same as above, when dcbzl clear a whole 128B cache line
+ i.e. the PPC970 aka G5 */
+#ifndef NO_DCBZL
+void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
+{
+POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
+ register int misal = ((unsigned long)blocks & 0x0000007f);
+ register int i = 0;
+POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
+#if 1
+ if (misal) {
+ // we could probably also optimize this case,
+ // but there's not much point as the machines
+ // aren't available yet (2003-06-26)
+ memset(blocks, 0, sizeof(DCTELEM)*6*64);
+ }
+ else
+ for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
+ asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
+ }
+#else
+ memset(blocks, 0, sizeof(DCTELEM)*6*64);
+#endif
+POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
+}
+#else
+void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
+{
+ memset(blocks, 0, sizeof(DCTELEM)*6*64);
}
+#endif
+#ifndef NO_DCBZL
/* check dcbz report how many bytes are set to 0 by dcbz */
-long check_dcbz_effect(void)
+/* update 24/06/2003 : replace dcbz by dcbzl to get
+ the intended effect (Apple "fixed" dcbz)
+ unfortunately this cannot be used unless the assembler
+ knows about dcbzl ... */
+long check_dcbzl_effect(void)
{
register char *fakedata = (char*)av_malloc(1024);
register char *fakedata_middle;
@@ -159,7 +197,9 @@ long check_dcbz_effect(void)
memset(fakedata, 0xFF, 1024);
- asm volatile("dcbz %0, %1" : : "r" (fakedata_middle), "r" (zero));
+ /* below the constraint "b" seems to mean "Address base register"
+ in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
+ asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
for (i = 0; i < 1024 ; i ++)
{
@@ -171,20 +211,29 @@ long check_dcbz_effect(void)
return count;
}
+#else
+long check_dcbzl_effect(void)
+{
+ return 0;
+}
+#endif
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
{
- // Common optimisations whether Altivec or not
+ // Common optimizations whether Altivec is available or not
- switch (check_dcbz_effect()) {
+ switch (check_dcbzl_effect()) {
case 32:
c->clear_blocks = clear_blocks_dcbz32_ppc;
break;
+ case 128:
+ c->clear_blocks = clear_blocks_dcbz128_ppc;
+ break;
default:
break;
}
-#if HAVE_ALTIVEC
+#ifdef HAVE_ALTIVEC
if (has_altivec()) {
mm_flags |= MM_ALTIVEC;
@@ -207,6 +256,8 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
c->add_bytes= add_bytes_altivec;
#endif /* 0 */
c->put_pixels_tab[0][0] = put_pixels16_altivec;
+ /* the tow functions do the same thing, so use the same code */
+ c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
// next one disabled as it's untested.
#if 0
@@ -231,24 +282,21 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
-#ifdef POWERPC_TBL_PERFORMANCE_REPORT
+#ifdef POWERPC_PERFORMANCE_REPORT
{
- int i;
+ int i, j;
for (i = 0 ; i < powerpc_perf_total ; i++)
{
- perfdata[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
- perfdata[i][powerpc_data_max] = 0x0000000000000000;
- perfdata[i][powerpc_data_sum] = 0x0000000000000000;
- perfdata[i][powerpc_data_num] = 0x0000000000000000;
-#ifdef POWERPC_PERF_USE_PMC
- perfdata_miss[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
- perfdata_miss[i][powerpc_data_max] = 0x0000000000000000;
- perfdata_miss[i][powerpc_data_sum] = 0x0000000000000000;
- perfdata_miss[i][powerpc_data_num] = 0x0000000000000000;
-#endif /* POWERPC_PERF_USE_PMC */
- }
+ for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
+ {
+ perfdata[j][i][powerpc_data_min] = (unsigned long long)0xFFFFFFFFFFFFFFFF;
+ perfdata[j][i][powerpc_data_max] = (unsigned long long)0x0000000000000000;
+ perfdata[j][i][powerpc_data_sum] = (unsigned long long)0x0000000000000000;
+ perfdata[j][i][powerpc_data_num] = (unsigned long long)0x0000000000000000;
+ }
+ }
}
-#endif /* POWERPC_TBL_PERFORMANCE_REPORT */
+#endif /* POWERPC_PERFORMANCE_REPORT */
} else
#endif /* HAVE_ALTIVEC */
{
diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.h b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.h
index ef1481a99..4cb299dd9 100644
--- a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.h
+++ b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.h
@@ -19,8 +19,21 @@
#ifndef _DSPUTIL_PPC_
#define _DSPUTIL_PPC_
-#ifdef POWERPC_TBL_PERFORMANCE_REPORT
+#ifdef CONFIG_DARWIN
+/* The Apple assembler shipped w/ gcc-3.3 knows about DCBZL, previous assemblers don't
+ We assume here that the Darwin GCC is from Apple.... */
+#if (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
+#define NO_DCBZL
+#endif
+#else /* CONFIG_DARWIN */
+/* I don't think any non-Apple assembler knows about DCBZL */
+#define NO_DCBZL
+#endif /* CONFIG_DARWIN */
+
+#ifdef POWERPC_PERFORMANCE_REPORT
void powerpc_display_perf_report(void);
+/* the 604* have 2, the G3* have 4, the G4s have 6 */
+#define POWERPC_NUM_PMC_ENABLED 4
/* if you add to the enum below, also add to the perfname array
in dsputil_ppc.c */
enum powerpc_perf_index {
@@ -37,6 +50,7 @@ enum powerpc_perf_index {
altivec_put_pixels16_xy2_num,
altivec_put_no_rnd_pixels16_xy2_num,
powerpc_clear_blocks_dcbz32,
+ powerpc_clear_blocks_dcbz128,
powerpc_perf_total
};
enum powerpc_data_index {
@@ -46,78 +60,65 @@ enum powerpc_data_index {
powerpc_data_num,
powerpc_data_total
};
-extern unsigned long long perfdata[powerpc_perf_total][powerpc_data_total];
-#ifdef POWERPC_PERF_USE_PMC
-extern unsigned long long perfdata_miss[powerpc_perf_total][powerpc_data_total];
-#endif
-
-#ifndef POWERPC_PERF_USE_PMC
-#define POWERPC_GET_CYCLES(a) asm volatile("mftb %0" : "=r" (a))
-#define POWERPC_TBL_DECLARE(a, cond) register unsigned long tbl_start, tbl_stop
-#define POWERPC_TBL_START_COUNT(a, cond) do { POWERPC_GET_CYCLES(tbl_start); } while (0)
-#define POWERPC_TBL_STOP_COUNT(a, cond) do { \
- POWERPC_GET_CYCLES(tbl_stop); \
- if (tbl_stop > tbl_start) \
- { \
- unsigned long diff = tbl_stop - tbl_start; \
- if (cond) \
- { \
- if (diff < perfdata[a][powerpc_data_min]) \
- perfdata[a][powerpc_data_min] = diff; \
- if (diff > perfdata[a][powerpc_data_max]) \
- perfdata[a][powerpc_data_max] = diff; \
- perfdata[a][powerpc_data_sum] += diff; \
- perfdata[a][powerpc_data_num] ++; \
- } \
- } \
-} while (0)
+extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
-#else /* POWERPC_PERF_USE_PMC */
-#define POWERPC_GET_CYCLES(a) asm volatile("mfspr %0, 937" : "=r" (a))
-#define POWERPC_GET_MISS(a) asm volatile("mfspr %0, 938" : "=r" (a))
-#define POWERPC_TBL_DECLARE(a, cond) register unsigned long cycles_start, cycles_stop, miss_start, miss_stop
-#define POWERPC_TBL_START_COUNT(a, cond) do { POWERPC_GET_MISS(miss_start); POWERPC_GET_CYCLES(cycles_start); } while (0)
-#define POWERPC_TBL_STOP_COUNT(a, cond) do { \
- POWERPC_GET_CYCLES(cycles_stop); \
- POWERPC_GET_MISS(miss_stop); \
- if (cycles_stop >= cycles_start) \
- { \
- unsigned long diff = \
- cycles_stop - cycles_start; \
- if (cond) \
- { \
- if (diff < perfdata[a][powerpc_data_min]) \
- perfdata[a][powerpc_data_min] = diff; \
- if (diff > perfdata[a][powerpc_data_max]) \
- perfdata[a][powerpc_data_max] = diff; \
- perfdata[a][powerpc_data_sum] += diff; \
- perfdata[a][powerpc_data_num] ++; \
- } \
- } \
- if (miss_stop >= miss_start) \
- { \
- unsigned long diff = \
- miss_stop - miss_start; \
- if (cond) \
- { \
- if (diff < perfdata_miss[a][powerpc_data_min]) \
- perfdata_miss[a][powerpc_data_min] = diff; \
- if (diff > perfdata_miss[a][powerpc_data_max]) \
- perfdata_miss[a][powerpc_data_max] = diff; \
- perfdata_miss[a][powerpc_data_sum] += diff; \
- perfdata_miss[a][powerpc_data_num] ++; \
- } \
- } \
+#define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 937" : "=r" (a))
+#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a))
+#if (POWERPC_NUM_PMC_ENABLED > 2)
+#define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a))
+#define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 942" : "=r" (a))
+#else
+#define POWERPC_GET_PMC3(a) do {} while (0)
+#define POWERPC_GET_PMC4(a) do {} while (0)
+#endif
+#if (POWERPC_NUM_PMC_ENABLED > 4)
+#define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 929" : "=r" (a))
+#define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 930" : "=r" (a))
+#else
+#define POWERPC_GET_PMC5(a) do {} while (0)
+#define POWERPC_GET_PMC6(a) do {} while (0)
+#endif
+#define POWERPC_PERF_DECLARE(a, cond) unsigned long pmc_start[POWERPC_NUM_PMC_ENABLED], pmc_stop[POWERPC_NUM_PMC_ENABLED], pmc_loop_index;
+#define POWERPC_PERF_START_COUNT(a, cond) do { \
+ POWERPC_GET_PMC6(pmc_start[5]); \
+ POWERPC_GET_PMC5(pmc_start[4]); \
+ POWERPC_GET_PMC4(pmc_start[3]); \
+ POWERPC_GET_PMC3(pmc_start[2]); \
+ POWERPC_GET_PMC2(pmc_start[1]); \
+ POWERPC_GET_PMC1(pmc_start[0]); \
+ } while (0)
+#define POWERPC_PERF_STOP_COUNT(a, cond) do { \
+ POWERPC_GET_PMC1(pmc_stop[0]); \
+ POWERPC_GET_PMC2(pmc_stop[1]); \
+ POWERPC_GET_PMC3(pmc_stop[2]); \
+ POWERPC_GET_PMC4(pmc_stop[3]); \
+ POWERPC_GET_PMC5(pmc_stop[4]); \
+ POWERPC_GET_PMC6(pmc_stop[5]); \
+ if (cond) \
+ { \
+ for(pmc_loop_index = 0; \
+ pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \
+ pmc_loop_index++) \
+ { \
+ if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \
+ { \
+ unsigned long diff = \
+ pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \
+ if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
+ perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \
+ if (diff > perfdata[pmc_loop_index][a][powerpc_data_max]) \
+ perfdata[pmc_loop_index][a][powerpc_data_max] = diff; \
+ perfdata[pmc_loop_index][a][powerpc_data_sum] += diff; \
+ perfdata[pmc_loop_index][a][powerpc_data_num] ++; \
+ } \
+ } \
+ } \
} while (0)
-
-#endif /* POWERPC_PERF_USE_PMC */
-
-
-#else /* POWERPC_TBL_PERFORMANCE_REPORT */
+#else /* POWERPC_PERFORMANCE_REPORT */
// those are needed to avoid empty statements.
-#define POWERPC_TBL_DECLARE(a, cond) int altivec_placeholder __attribute__ ((unused))
-#define POWERPC_TBL_START_COUNT(a, cond) do {} while (0)
-#define POWERPC_TBL_STOP_COUNT(a, cond) do {} while (0)
-#endif /* POWERPC_TBL_PERFORMANCE_REPORT */
+#define POWERPC_PERF_DECLARE(a, cond) int altivec_placeholder __attribute__ ((unused))
+#define POWERPC_PERF_START_COUNT(a, cond) do {} while (0)
+#define POWERPC_PERF_STOP_COUNT(a, cond) do {} while (0)
+#endif /* POWERPC_PERFORMANCE_REPORT */
#endif /* _DSPUTIL_PPC_ */
diff --git a/src/libffmpeg/libavcodec/ppc/fft_altivec.c b/src/libffmpeg/libavcodec/ppc/fft_altivec.c
index 75c95bb87..e39c9dbb7 100644
--- a/src/libffmpeg/libavcodec/ppc/fft_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/fft_altivec.c
@@ -62,7 +62,7 @@
*/
void fft_calc_altivec(FFTContext *s, FFTComplex *z)
{
-POWERPC_TBL_DECLARE(altivec_fft_num, s->nbits >= 6);
+POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
int ln = s->nbits;
int j, np, np2;
@@ -72,7 +72,7 @@ POWERPC_TBL_DECLARE(altivec_fft_num, s->nbits >= 6);
int l;
FFTSample tmp_re, tmp_im;
-POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6);
+POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
np = 1 << ln;
@@ -137,7 +137,7 @@ POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6);
nloops = nloops << 1;
} while (nblocks != 0);
-POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
+POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
#ifdef CONFIG_DARWIN
@@ -153,7 +153,7 @@ POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
FFTComplex *cptr, *cptr1;
int k;
-POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6);
+POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
np = 1 << ln;
@@ -241,7 +241,7 @@ POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6);
nloops = nloops << 1;
} while (nblocks != 0);
-POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
+POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
diff --git a/src/libffmpeg/libavcodec/ppc/gcc_fixes.h b/src/libffmpeg/libavcodec/ppc/gcc_fixes.h
index 855a5b4f7..a8e92cb2f 100644
--- a/src/libffmpeg/libavcodec/ppc/gcc_fixes.h
+++ b/src/libffmpeg/libavcodec/ppc/gcc_fixes.h
@@ -15,7 +15,6 @@
#define AVV(x...) (x)
#else
#define AVV(x...) {x}
-
#if (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
/* This code was provided to me by Bartosch Pixa
@@ -26,7 +25,7 @@
* http://gcc.gnu.org/ml/gcc/2003-04/msg00967.html
*/
-static inline vector signed char my_vmrglb (vector signed char const A,
+static inline vector signed char ff_vmrglb (vector signed char const A,
vector signed char const B)
{
static const vector unsigned char lowbyte = {
@@ -36,7 +35,7 @@ static inline vector signed char my_vmrglb (vector signed char const A,
return vec_perm (A, B, lowbyte);
}
-static inline vector signed short my_vmrglh (vector signed short const A,
+static inline vector signed short ff_vmrglh (vector signed short const A,
vector signed short const B)
{
static const vector unsigned char lowhalf = {
@@ -46,7 +45,7 @@ static inline vector signed short my_vmrglh (vector signed short const A,
return vec_perm (A, B, lowhalf);
}
-static inline vector signed int my_vmrglw (vector signed int const A,
+static inline vector signed int ff_vmrglw (vector signed int const A,
vector signed int const B)
{
static const vector unsigned char lowword = {
@@ -55,27 +54,27 @@ static inline vector signed int my_vmrglw (vector signed int const A,
};
return vec_perm (A, B, lowword);
}
-/*#define my_vmrglb my_vmrglb
-#define my_vmrglh my_vmrglh
-#define my_vmrglw my_vmrglw
+/*#define ff_vmrglb ff_vmrglb
+#define ff_vmrglh ff_vmrglh
+#define ff_vmrglw ff_vmrglw
*/
#undef vec_mergel
#define vec_mergel(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
- ((vector signed char) my_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \
+ ((vector signed char) ff_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
- ((vector unsigned char) my_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \
+ ((vector unsigned char) ff_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
- ((vector signed short) my_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \
+ ((vector signed short) ff_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
- ((vector unsigned short) my_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \
+ ((vector unsigned short) ff_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
- ((vector float) my_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
+ ((vector float) ff_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
- ((vector signed int) my_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
+ ((vector signed int) ff_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
- ((vector unsigned int) my_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
+ ((vector unsigned int) ff_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
__altivec_link_error_invalid_argument ())))))))
#endif
diff --git a/src/libffmpeg/libavcodec/ppc/gmc_altivec.c b/src/libffmpeg/libavcodec/ppc/gmc_altivec.c
index 18d52bbc5..671ee110a 100644
--- a/src/libffmpeg/libavcodec/ppc/gmc_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/gmc_altivec.c
@@ -28,9 +28,10 @@
altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8,
to preserve proper dst alignement.
*/
+#define GMC1_PERF_COND (h==8)
void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
{
-POWERPC_TBL_DECLARE(altivec_gmc1_num, h == 8);
+POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
const int A=(16-x16)*(16-y16);
const int B=( x16)*(16-y16);
@@ -38,7 +39,7 @@ POWERPC_TBL_DECLARE(altivec_gmc1_num, h == 8);
const int D=( x16)*( y16);
int i;
-POWERPC_TBL_START_COUNT(altivec_gmc1_num, h == 8);
+POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
for(i=0; i<h; i++)
{
@@ -54,7 +55,7 @@ POWERPC_TBL_START_COUNT(altivec_gmc1_num, h == 8);
src+= stride;
}
-POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, h == 8);
+POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
const unsigned short __attribute__ ((aligned(16))) rounder_a[8] =
@@ -77,7 +78,7 @@ POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, h == 8);
unsigned long src_really_odd = (unsigned long)src & 0x0000000F;
-POWERPC_TBL_START_COUNT(altivec_gmc1_num, h == 8);
+POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
tempA = vec_ld(0, (unsigned short*)ABCD);
Av = vec_splat(tempA, 0);
@@ -165,7 +166,7 @@ POWERPC_TBL_START_COUNT(altivec_gmc1_num, h == 8);
src += stride;
}
-POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, h == 8);
+POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
diff --git a/src/libffmpeg/libavcodec/ppc/idct_altivec.c b/src/libffmpeg/libavcodec/ppc/idct_altivec.c
index f8a8aa678..d821ecd22 100644
--- a/src/libffmpeg/libavcodec/ppc/idct_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/idct_altivec.c
@@ -165,16 +165,16 @@ static const vector_s16_t constants[5] = {
void idct_put_altivec(uint8_t* dest, int stride, vector_s16_t* block)
{
-POWERPC_TBL_DECLARE(altivec_idct_put_num, 1);
+POWERPC_PERF_DECLARE(altivec_idct_put_num, 1);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
-POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1);
+POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
void simple_idct_put(uint8_t *dest, int line_size, int16_t *block);
simple_idct_put(dest, stride, (int16_t*)block);
-POWERPC_TBL_STOP_COUNT(altivec_idct_put_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
vector_u8_t tmp;
-POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1);
+POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
IDCT
@@ -192,18 +192,18 @@ POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1);
COPY (dest, vx6) dest += stride;
COPY (dest, vx7)
-POWERPC_TBL_STOP_COUNT(altivec_idct_put_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
void idct_add_altivec(uint8_t* dest, int stride, vector_s16_t* block)
{
-POWERPC_TBL_DECLARE(altivec_idct_add_num, 1);
+POWERPC_PERF_DECLARE(altivec_idct_add_num, 1);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
-POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1);
+POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
void simple_idct_add(uint8_t *dest, int line_size, int16_t *block);
simple_idct_add(dest, stride, (int16_t*)block);
-POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
vector_u8_t tmp;
vector_s16_t tmp2, tmp3;
@@ -211,7 +211,7 @@ POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1);
vector_u8_t perm1;
vector_u8_t p0, p1, p;
-POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1);
+POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
IDCT
@@ -239,7 +239,7 @@ POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1);
ADD (dest, vx6, perm0) dest += stride;
ADD (dest, vx7, perm1)
-POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1);
+POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
diff --git a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
index bbf9c4433..ae3170d91 100644
--- a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
@@ -522,13 +522,13 @@ int dct_quantize_altivec(MpegEncContext* s,
void dct_unquantize_h263_altivec(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
-POWERPC_TBL_DECLARE(altivec_dct_unquantize_h263_num, 1);
+POWERPC_PERF_DECLARE(altivec_dct_unquantize_h263_num, 1);
int i, level, qmul, qadd;
int nCoeffs;
assert(s->block_last_index[n]>=0);
-POWERPC_TBL_START_COUNT(altivec_dct_unquantize_h263_num, 1);
+POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
qadd = (qscale - 1) | 1;
qmul = qscale << 1;
@@ -641,5 +641,5 @@ POWERPC_TBL_START_COUNT(altivec_dct_unquantize_h263_num, 1);
}
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
-POWERPC_TBL_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63);
+POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63);
}
diff --git a/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c b/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c
index c290dde16..18e86dce9 100644
--- a/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c
+++ b/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c
@@ -36,7 +36,7 @@ extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
void MPV_common_init_ppc(MpegEncContext *s)
{
-#if HAVE_ALTIVEC
+#ifdef HAVE_ALTIVEC
if (has_altivec())
{
if ((s->avctx->idct_algo == FF_IDCT_AUTO) ||