diff options
author | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2003-01-31 18:29:43 +0000 |
---|---|---|
committer | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2003-01-31 18:29:43 +0000 |
commit | 5350f2b7701f01bc4f234d3971fb8a623a8cd72a (patch) | |
tree | 5f6cd350778863ad8d2612bce4ac2f6270919115 /src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c | |
parent | 8b0e8647a0d0c279b6a355362452dff4bd6f5c05 (diff) | |
download | xine-lib-5350f2b7701f01bc4f234d3971fb8a623a8cd72a.tar.gz xine-lib-5350f2b7701f01bc4f234d3971fb8a623a8cd72a.tar.bz2 |
update ffmpeg
CVS patchset: 4068
CVS date: 2003/01/31 18:29:43
Diffstat (limited to 'src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c')
-rw-r--r-- | src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c | 169 |
1 files changed, 151 insertions, 18 deletions
diff --git a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c index bcbc1e6ba..dd898e158 100644 --- a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c +++ b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c @@ -20,10 +20,7 @@ #include <stdio.h> #include "../dsputil.h" #include "../mpegvideo.h" - - -// Used when initializing constant vectors -#define FOUR_INSTANCES(x) x,x,x,x +#include "dsputil_altivec.h" // Swaps two variables (used for altivec registers) #define SWAP(a,b) \ @@ -93,6 +90,13 @@ do { \ vec = vec_splat(vec, 0); \ } + +#ifdef CONFIG_DARWIN +#define FOUROF(a) (a) +#else +// slower, for dumb non-apple GCC +#define FOUROF(a) {a,a,a,a} +#endif int dct_quantize_altivec(MpegEncContext* s, DCTELEM* data, int n, int qscale, int* overflow) @@ -100,7 +104,7 @@ int dct_quantize_altivec(MpegEncContext* s, int lastNonZero; vector float row0, row1, row2, row3, row4, row5, row6, row7; vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7; - const vector float zero = {FOUR_INSTANCES(0.0f)}; + const vector float zero = (const vector float)FOUROF(0.); // Load the data into the row/alt vectors { @@ -144,18 +148,18 @@ int dct_quantize_altivec(MpegEncContext* s, // in the vector local variables, as floats, which we'll use during the // quantize step... { - const vector float vec_0_298631336 = {FOUR_INSTANCES(0.298631336f)}; - const vector float vec_0_390180644 = {FOUR_INSTANCES(-0.390180644f)}; - const vector float vec_0_541196100 = {FOUR_INSTANCES(0.541196100f)}; - const vector float vec_0_765366865 = {FOUR_INSTANCES(0.765366865f)}; - const vector float vec_0_899976223 = {FOUR_INSTANCES(-0.899976223f)}; - const vector float vec_1_175875602 = {FOUR_INSTANCES(1.175875602f)}; - const vector float vec_1_501321110 = {FOUR_INSTANCES(1.501321110f)}; - const vector float vec_1_847759065 = {FOUR_INSTANCES(-1.847759065f)}; - const vector float vec_1_961570560 = {FOUR_INSTANCES(-1.961570560f)}; - const vector float vec_2_053119869 = {FOUR_INSTANCES(2.053119869f)}; - const vector float vec_2_562915447 = {FOUR_INSTANCES(-2.562915447f)}; - const vector float vec_3_072711026 = {FOUR_INSTANCES(3.072711026f)}; + const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f); + const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f); + const vector float vec_0_541196100 = (vector float)FOUROF(0.541196100f); + const vector float vec_0_765366865 = (vector float)FOUROF(0.765366865f); + const vector float vec_0_899976223 = (vector float)FOUROF(-0.899976223f); + const vector float vec_1_175875602 = (vector float)FOUROF(1.175875602f); + const vector float vec_1_501321110 = (vector float)FOUROF(1.501321110f); + const vector float vec_1_847759065 = (vector float)FOUROF(-1.847759065f); + const vector float vec_1_961570560 = (vector float)FOUROF(-1.961570560f); + const vector float vec_2_053119869 = (vector float)FOUROF(2.053119869f); + const vector float vec_2_562915447 = (vector float)FOUROF(-2.562915447f); + const vector float vec_3_072711026 = (vector float)FOUROF(3.072711026f); int whichPass, whichHalf; @@ -309,7 +313,7 @@ int dct_quantize_altivec(MpegEncContext* s, // rounding when we convert to int, instead of flooring.) { vector signed int biasInt; - const vector float negOneFloat = (vector float)(FOUR_INSTANCES(-1.0f)); + const vector float negOneFloat = (vector float)FOUROF(-1.0f); LOAD4(biasInt, biasAddr); bias = vec_ctf(biasInt, QUANT_BIAS_SHIFT); negBias = vec_madd(bias, negOneFloat, zero); @@ -506,4 +510,133 @@ int dct_quantize_altivec(MpegEncContext* s, return lastNonZero; } +#undef FOUROF + +/* + AltiVec version of dct_unquantize_h263 + this code assumes `block' is 16 bytes-aligned +*/ +void dct_unquantize_h263_altivec(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ +POWERPC_TBL_DECLARE(altivec_dct_unquantize_h263_num, 1); + int i, level, qmul, qadd; + int nCoeffs; + + assert(s->block_last_index[n]>=0); + +POWERPC_TBL_START_COUNT(altivec_dct_unquantize_h263_num, 1); + + qadd = (qscale - 1) | 1; + qmul = qscale << 1; + + if (s->mb_intra) { + if (!s->h263_aic) { + if (n < 4) + block[0] = block[0] * s->y_dc_scale; + else + block[0] = block[0] * s->c_dc_scale; + }else + qadd = 0; + i = 1; + nCoeffs= 63; //does not allways use zigzag table + } else { + i = 0; + nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; + } +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + for(;i<=nCoeffs;i++) { + level = block[i]; + if (level) { + if (level < 0) { + level = level * qmul - qadd; + } else { + level = level * qmul + qadd; + } + block[i] = level; + } + } +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + { + register const vector short vczero = (const vector short)vec_splat_s16(0); + short __attribute__ ((aligned(16))) qmul8[] = + { + qmul, qmul, qmul, qmul, + qmul, qmul, qmul, qmul + }; + short __attribute__ ((aligned(16))) qadd8[] = + { + qadd, qadd, qadd, qadd, + qadd, qadd, qadd, qadd + }; + short __attribute__ ((aligned(16))) nqadd8[] = + { + -qadd, -qadd, -qadd, -qadd, + -qadd, -qadd, -qadd, -qadd + }; + register vector short blockv, qmulv, qaddv, nqaddv, temp1; + register vector bool short blockv_null, blockv_neg; + register short backup_0 = block[0]; + register int j = 0; + + qmulv = vec_ld(0, qmul8); + qaddv = vec_ld(0, qadd8); + nqaddv = vec_ld(0, nqadd8); + +#if 0 // block *is* 16 bytes-aligned, it seems. + // first make sure block[j] is 16 bytes-aligned + for(j = 0; (j <= nCoeffs) && ((((unsigned long)block) + (j << 1)) & 0x0000000F) ; j++) { + level = block[j]; + if (level) { + if (level < 0) { + level = level * qmul - qadd; + } else { + level = level * qmul + qadd; + } + block[j] = level; + } + } +#endif + + // vectorize all the 16 bytes-aligned blocks + // of 8 elements + for(; (j + 7) <= nCoeffs ; j+=8) + { + blockv = vec_ld(j << 1, block); + blockv_neg = vec_cmplt(blockv, vczero); + blockv_null = vec_cmpeq(blockv, vczero); + // choose between +qadd or -qadd as the third operand + temp1 = vec_sel(qaddv, nqaddv, blockv_neg); + // multiply & add (block{i,i+7} * qmul [+-] qadd) + temp1 = vec_mladd(blockv, qmulv, temp1); + // put 0 where block[{i,i+7} used to have 0 + blockv = vec_sel(temp1, blockv, blockv_null); + vec_st(blockv, j << 1, block); + } + + // if nCoeffs isn't a multiple of 8, finish the job + // using good old scalar units. + // (we could do it using a truncated vector, + // but I'm not sure it's worth the hassle) + for(; j <= nCoeffs ; j++) { + level = block[j]; + if (level) { + if (level < 0) { + level = level * qmul - qadd; + } else { + level = level * qmul + qadd; + } + block[j] = level; + } + } + + if (i == 1) + { // cheat. this avoid special-casing the first iteration + block[0] = backup_0; + } + } +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ + +POWERPC_TBL_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63); +} |