diff options
Diffstat (limited to 'src/libffmpeg/libavcodec/libpostproc/postprocess.c')
-rw-r--r-- | src/libffmpeg/libavcodec/libpostproc/postprocess.c | 82 |
1 files changed, 60 insertions, 22 deletions
diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess.c b/src/libffmpeg/libavcodec/libpostproc/postprocess.c index 9ac18eaf2..a03ff133d 100644 --- a/src/libffmpeg/libavcodec/libpostproc/postprocess.c +++ b/src/libffmpeg/libavcodec/libpostproc/postprocess.c @@ -1,6 +1,8 @@ /* Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at) + AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org> + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or @@ -22,16 +24,16 @@ */ /* - C MMX MMX2 3DNow -isVertDC Ec Ec -isVertMinMaxOk Ec Ec -doVertLowPass E e e -doVertDefFilter Ec Ec e e + C MMX MMX2 3DNow AltiVec +isVertDC Ec Ec Ec +isVertMinMaxOk Ec Ec Ec +doVertLowPass E e e Ec +doVertDefFilter Ec Ec e e Ec isHorizDC Ec Ec isHorizMinMaxOk a E doHorizLowPass E e e doHorizDefFilter Ec Ec e e -deRing E e e* +deRing E e e* Ecp Vertical RKAlgo1 E a a Horizontal RKAlgo1 a a Vertical X1# a E E @@ -48,6 +50,7 @@ E = Exact implementation e = allmost exact implementation (slightly different rounding,...) a = alternative / approximate impl c = checked against the other implementations (-vo md5) +p = partially optimized, still some work to do */ /* @@ -123,7 +126,7 @@ static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x808080808080 static uint8_t clip_table[3*256]; static uint8_t * const clip_tab= clip_table + 256; -static int verbose= 0; +static const int verbose= 0; static const int attribute_used deringThreshold= 20; @@ -158,13 +161,6 @@ static char *replaceTable[]= NULL //End Marker }; -#ifdef ARCH_X86 -static inline void unusedVariableWarningFixer() -{ - if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0; -} -#endif - #ifdef ARCH_X86 static inline void prefetchnta(void *p) @@ -201,7 +197,7 @@ static inline void prefetcht2(void *p) /** * Check if the given 8x8 Block is mostly "flat" */ -static inline int isHorizDC(uint8_t src[], int stride, PPContext *c) +static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) { int numEq= 0; int y; @@ -247,7 +243,7 @@ static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ return numEq > c->ppMode.flatnessThreshold; } -static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP) +static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) { int i; #if 1 @@ -311,6 +307,17 @@ static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) #endif } +static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){ + if( isHorizDC_C(src, stride, c) ){ + if( isHorizMinMaxOk_C(src, stride, c->QP) ) + return 1; + else + return 0; + }else{ + return 2; + } +} + static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ if( isVertDC_C(src, stride, c) ){ if( isVertMinMaxOk_C(src, stride, c->QP) ) @@ -322,14 +329,14 @@ static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ } } -static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP) +static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) { int y; for(y=0; y<BLOCK_SIZE; y++) { - const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]); + const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]); - if(ABS(middleEnergy) < 8*QP) + if(ABS(middleEnergy) < 8*c->QP) { const int q=(dst[3] - dst[4])/2; const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); @@ -363,14 +370,14 @@ static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP) * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) */ -static inline void doHorizLowPass(uint8_t dst[], int stride, int QP) +static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) { int y; for(y=0; y<BLOCK_SIZE; y++) { - const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0]; - const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7]; + const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; + const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; int sums[9]; sums[0] = first + dst[0]; @@ -469,6 +476,17 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) #define COMPILE_C #endif +#ifdef ARCH_POWERPC +#ifdef HAVE_ALTIVEC +#define COMPILE_ALTIVEC +#ifndef CONFIG_DARWIN +#warning "################################################################################" +#warning "WARNING: No gcc available as of today (2004-05-25) seems to be able to compile properly some of the code under non-Darwin PPC OSes. Some functions result in wrong results, while others simply won't compile (gcc explodes after allocating 1GiB+)." +#warning "################################################################################" +#endif //CONFIG_DARWIN +#endif //HAVE_ALTIVEC +#endif //ARCH_POWERPC + #ifdef ARCH_X86 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) @@ -487,6 +505,7 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) #undef HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW +#undef HAVE_ALTIVEC #undef ARCH_X86 #ifdef COMPILE_C @@ -498,6 +517,16 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) #include "postprocess_template.c" #endif +#ifdef ARCH_POWERPC +#ifdef COMPILE_ALTIVEC +#undef RENAME +#define HAVE_ALTIVEC +#define RENAME(a) a ## _altivec +#include "postprocess_altivec_template.c" +#include "postprocess_template.c" +#endif +#endif //ARCH_POWERPC + //MMX versions #ifdef COMPILE_MMX #undef RENAME @@ -555,6 +584,13 @@ static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int else postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); #else +#ifdef ARCH_POWERPC +#ifdef HAVE_ALTIVEC + else if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) + postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); + else +#endif +#endif postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); #endif #else //RUNTIME_CPUDETECT @@ -564,6 +600,8 @@ static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); #elif defined (HAVE_MMX) postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); +#elif defined (HAVE_ALTIVEC) + postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); #else postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); #endif |