summaryrefslogtreecommitdiff
path: root/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
diff options
context:
space:
mode:
authorMiguel Freitas <miguelfreitas@users.sourceforge.net>2003-01-31 18:29:43 +0000
committerMiguel Freitas <miguelfreitas@users.sourceforge.net>2003-01-31 18:29:43 +0000
commit5350f2b7701f01bc4f234d3971fb8a623a8cd72a (patch)
tree5f6cd350778863ad8d2612bce4ac2f6270919115 /src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
parent8b0e8647a0d0c279b6a355362452dff4bd6f5c05 (diff)
downloadxine-lib-5350f2b7701f01bc4f234d3971fb8a623a8cd72a.tar.gz
xine-lib-5350f2b7701f01bc4f234d3971fb8a623a8cd72a.tar.bz2
update ffmpeg
CVS patchset: 4068 CVS date: 2003/01/31 18:29:43
Diffstat (limited to 'src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c')
-rw-r--r--src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c169
1 files changed, 151 insertions, 18 deletions
diff --git a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
index bcbc1e6ba..dd898e158 100644
--- a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
@@ -20,10 +20,7 @@
#include <stdio.h>
#include "../dsputil.h"
#include "../mpegvideo.h"
-
-
-// Used when initializing constant vectors
-#define FOUR_INSTANCES(x) x,x,x,x
+#include "dsputil_altivec.h"
// Swaps two variables (used for altivec registers)
#define SWAP(a,b) \
@@ -93,6 +90,13 @@ do { \
vec = vec_splat(vec, 0); \
}
+
+#ifdef CONFIG_DARWIN
+#define FOUROF(a) (a)
+#else
+// slower, for dumb non-apple GCC
+#define FOUROF(a) {a,a,a,a}
+#endif
int dct_quantize_altivec(MpegEncContext* s,
DCTELEM* data, int n,
int qscale, int* overflow)
@@ -100,7 +104,7 @@ int dct_quantize_altivec(MpegEncContext* s,
int lastNonZero;
vector float row0, row1, row2, row3, row4, row5, row6, row7;
vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7;
- const vector float zero = {FOUR_INSTANCES(0.0f)};
+ const vector float zero = (const vector float)FOUROF(0.);
// Load the data into the row/alt vectors
{
@@ -144,18 +148,18 @@ int dct_quantize_altivec(MpegEncContext* s,
// in the vector local variables, as floats, which we'll use during the
// quantize step...
{
- const vector float vec_0_298631336 = {FOUR_INSTANCES(0.298631336f)};
- const vector float vec_0_390180644 = {FOUR_INSTANCES(-0.390180644f)};
- const vector float vec_0_541196100 = {FOUR_INSTANCES(0.541196100f)};
- const vector float vec_0_765366865 = {FOUR_INSTANCES(0.765366865f)};
- const vector float vec_0_899976223 = {FOUR_INSTANCES(-0.899976223f)};
- const vector float vec_1_175875602 = {FOUR_INSTANCES(1.175875602f)};
- const vector float vec_1_501321110 = {FOUR_INSTANCES(1.501321110f)};
- const vector float vec_1_847759065 = {FOUR_INSTANCES(-1.847759065f)};
- const vector float vec_1_961570560 = {FOUR_INSTANCES(-1.961570560f)};
- const vector float vec_2_053119869 = {FOUR_INSTANCES(2.053119869f)};
- const vector float vec_2_562915447 = {FOUR_INSTANCES(-2.562915447f)};
- const vector float vec_3_072711026 = {FOUR_INSTANCES(3.072711026f)};
+ const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f);
+ const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f);
+ const vector float vec_0_541196100 = (vector float)FOUROF(0.541196100f);
+ const vector float vec_0_765366865 = (vector float)FOUROF(0.765366865f);
+ const vector float vec_0_899976223 = (vector float)FOUROF(-0.899976223f);
+ const vector float vec_1_175875602 = (vector float)FOUROF(1.175875602f);
+ const vector float vec_1_501321110 = (vector float)FOUROF(1.501321110f);
+ const vector float vec_1_847759065 = (vector float)FOUROF(-1.847759065f);
+ const vector float vec_1_961570560 = (vector float)FOUROF(-1.961570560f);
+ const vector float vec_2_053119869 = (vector float)FOUROF(2.053119869f);
+ const vector float vec_2_562915447 = (vector float)FOUROF(-2.562915447f);
+ const vector float vec_3_072711026 = (vector float)FOUROF(3.072711026f);
int whichPass, whichHalf;
@@ -309,7 +313,7 @@ int dct_quantize_altivec(MpegEncContext* s,
// rounding when we convert to int, instead of flooring.)
{
vector signed int biasInt;
- const vector float negOneFloat = (vector float)(FOUR_INSTANCES(-1.0f));
+ const vector float negOneFloat = (vector float)FOUROF(-1.0f);
LOAD4(biasInt, biasAddr);
bias = vec_ctf(biasInt, QUANT_BIAS_SHIFT);
negBias = vec_madd(bias, negOneFloat, zero);
@@ -506,4 +510,133 @@ int dct_quantize_altivec(MpegEncContext* s,
return lastNonZero;
}
+#undef FOUROF
+
+/*
+ AltiVec version of dct_unquantize_h263
+ this code assumes `block' is 16 bytes-aligned
+*/
+void dct_unquantize_h263_altivec(MpegEncContext *s,
+ DCTELEM *block, int n, int qscale)
+{
+POWERPC_TBL_DECLARE(altivec_dct_unquantize_h263_num, 1);
+ int i, level, qmul, qadd;
+ int nCoeffs;
+
+ assert(s->block_last_index[n]>=0);
+
+POWERPC_TBL_START_COUNT(altivec_dct_unquantize_h263_num, 1);
+
+ qadd = (qscale - 1) | 1;
+ qmul = qscale << 1;
+
+ if (s->mb_intra) {
+ if (!s->h263_aic) {
+ if (n < 4)
+ block[0] = block[0] * s->y_dc_scale;
+ else
+ block[0] = block[0] * s->c_dc_scale;
+ }else
+ qadd = 0;
+ i = 1;
+ nCoeffs= 63; //does not allways use zigzag table
+ } else {
+ i = 0;
+ nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
+ }
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+ for(;i<=nCoeffs;i++) {
+ level = block[i];
+ if (level) {
+ if (level < 0) {
+ level = level * qmul - qadd;
+ } else {
+ level = level * qmul + qadd;
+ }
+ block[i] = level;
+ }
+ }
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+ {
+ register const vector short vczero = (const vector short)vec_splat_s16(0);
+ short __attribute__ ((aligned(16))) qmul8[] =
+ {
+ qmul, qmul, qmul, qmul,
+ qmul, qmul, qmul, qmul
+ };
+ short __attribute__ ((aligned(16))) qadd8[] =
+ {
+ qadd, qadd, qadd, qadd,
+ qadd, qadd, qadd, qadd
+ };
+ short __attribute__ ((aligned(16))) nqadd8[] =
+ {
+ -qadd, -qadd, -qadd, -qadd,
+ -qadd, -qadd, -qadd, -qadd
+ };
+ register vector short blockv, qmulv, qaddv, nqaddv, temp1;
+ register vector bool short blockv_null, blockv_neg;
+ register short backup_0 = block[0];
+ register int j = 0;
+
+ qmulv = vec_ld(0, qmul8);
+ qaddv = vec_ld(0, qadd8);
+ nqaddv = vec_ld(0, nqadd8);
+
+#if 0 // block *is* 16 bytes-aligned, it seems.
+ // first make sure block[j] is 16 bytes-aligned
+ for(j = 0; (j <= nCoeffs) && ((((unsigned long)block) + (j << 1)) & 0x0000000F) ; j++) {
+ level = block[j];
+ if (level) {
+ if (level < 0) {
+ level = level * qmul - qadd;
+ } else {
+ level = level * qmul + qadd;
+ }
+ block[j] = level;
+ }
+ }
+#endif
+
+ // vectorize all the 16 bytes-aligned blocks
+ // of 8 elements
+ for(; (j + 7) <= nCoeffs ; j+=8)
+ {
+ blockv = vec_ld(j << 1, block);
+ blockv_neg = vec_cmplt(blockv, vczero);
+ blockv_null = vec_cmpeq(blockv, vczero);
+ // choose between +qadd or -qadd as the third operand
+ temp1 = vec_sel(qaddv, nqaddv, blockv_neg);
+ // multiply & add (block{i,i+7} * qmul [+-] qadd)
+ temp1 = vec_mladd(blockv, qmulv, temp1);
+ // put 0 where block[{i,i+7} used to have 0
+ blockv = vec_sel(temp1, blockv, blockv_null);
+ vec_st(blockv, j << 1, block);
+ }
+
+ // if nCoeffs isn't a multiple of 8, finish the job
+ // using good old scalar units.
+ // (we could do it using a truncated vector,
+ // but I'm not sure it's worth the hassle)
+ for(; j <= nCoeffs ; j++) {
+ level = block[j];
+ if (level) {
+ if (level < 0) {
+ level = level * qmul - qadd;
+ } else {
+ level = level * qmul + qadd;
+ }
+ block[j] = level;
+ }
+ }
+
+ if (i == 1)
+ { // cheat. this avoid special-casing the first iteration
+ block[0] = backup_0;
+ }
+ }
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+
+POWERPC_TBL_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63);
+}