diff options
Diffstat (limited to 'src/libffmpeg/libavcodec/libpostproc/postprocess.c')
-rw-r--r-- | src/libffmpeg/libavcodec/libpostproc/postprocess.c | 266 |
1 files changed, 196 insertions, 70 deletions
diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess.c b/src/libffmpeg/libavcodec/libpostproc/postprocess.c index a03ff133d..e7ca0191d 100644 --- a/src/libffmpeg/libavcodec/libpostproc/postprocess.c +++ b/src/libffmpeg/libavcodec/libpostproc/postprocess.c @@ -29,10 +29,11 @@ isVertDC Ec Ec Ec isVertMinMaxOk Ec Ec Ec doVertLowPass E e e Ec doVertDefFilter Ec Ec e e Ec -isHorizDC Ec Ec -isHorizMinMaxOk a E -doHorizLowPass E e e -doHorizDefFilter Ec Ec e e +isHorizDC Ec Ec Ec +isHorizMinMaxOk a E Ec +doHorizLowPass E e e Ec +doHorizDefFilter Ec Ec e e Ec +do_a_deblock Ec E Ec E deRing E e e* Ecp Vertical RKAlgo1 E a a Horizontal RKAlgo1 a a @@ -42,7 +43,7 @@ LinIpolDeinterlace e E E* CubicIpolDeinterlace a e e* LinBlendDeinterlace e E E* MedianDeinterlace# E Ec Ec -TempDeNoiser# E e e +TempDeNoiser# E e e Ec * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work # more or less selfinvented filters so the exactness isnt too meaningfull @@ -91,6 +92,10 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks #include "mangle.h" //FIXME should be supressed +#ifdef HAVE_ALTIVEC_H +#include <altivec.h> +#endif + #ifndef HAVE_MEMALIGN #define memalign(a,b) malloc(b) #endif @@ -108,12 +113,15 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) # define attribute_used __attribute__((used)) +# define always_inline __attribute__((always_inline)) inline #else # define attribute_used +# define always_inline inline #endif -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL; +static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL; static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL; static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL; static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL; @@ -122,7 +130,6 @@ static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x080808080808 static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL; #endif - static uint8_t clip_table[3*256]; static uint8_t * const clip_tab= clip_table + 256; @@ -139,6 +146,8 @@ static struct PPFilter filters[]= {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/ {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, + {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, + {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, {"dr", "dering", 1, 5, 6, DERING}, {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, @@ -154,15 +163,16 @@ static struct PPFilter filters[]= static char *replaceTable[]= { - "default", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", - "de", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", - "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", - "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", + "default", "hdeblock:a,vdeblock:a,dering:a", + "de", "hdeblock:a,vdeblock:a,dering:a", + "fast", "x1hdeblock:a,x1vdeblock:a,dering:a", + "fa", "x1hdeblock:a,x1vdeblock:a,dering:a", + "ac", "ha:a:128:7,va:a,dering:a", NULL //End Marker }; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static inline void prefetchnta(void *p) { asm volatile( "prefetchnta (%0)\n\t" @@ -372,32 +382,32 @@ static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) */ static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) { - int y; for(y=0; y<BLOCK_SIZE; y++) { const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; - int sums[9]; - sums[0] = first + dst[0]; - sums[1] = dst[0] + dst[1]; - sums[2] = dst[1] + dst[2]; - sums[3] = dst[2] + dst[3]; - sums[4] = dst[3] + dst[4]; - sums[5] = dst[4] + dst[5]; - sums[6] = dst[5] + dst[6]; - sums[7] = dst[6] + dst[7]; - sums[8] = dst[7] + last; - - dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4; - dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4; - dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4; - dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4; - dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4; - dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4; - dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4; - dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4; + int sums[10]; + sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; + sums[1] = sums[0] - first + dst[3]; + sums[2] = sums[1] - first + dst[4]; + sums[3] = sums[2] - first + dst[5]; + sums[4] = sums[3] - first + dst[6]; + sums[5] = sums[4] - dst[0] + dst[7]; + sums[6] = sums[5] - dst[1] + last; + sums[7] = sums[6] - dst[2] + last; + sums[8] = sums[7] - dst[3] + last; + sums[9] = sums[8] - dst[4] + last; + + dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; + dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; + dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; + dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; + dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; + dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; + dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; + dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; dst+= stride; } @@ -469,6 +479,111 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) } } +/** + * accurate deblock filter + */ +static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ + int y; + const int QP= c->QP; + const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; + const int dcThreshold= dcOffset*2 + 1; +//START_TIMER + src+= step*4; // src points to begin of the 8x8 Block + for(y=0; y<8; y++){ + int numEq= 0; + + if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; + if(numEq > c->ppMode.flatnessThreshold){ + int min, max, x; + + if(src[0] > src[step]){ + max= src[0]; + min= src[step]; + }else{ + max= src[step]; + min= src[0]; + } + for(x=2; x<8; x+=2){ + if(src[x*step] > src[(x+1)*step]){ + if(src[x *step] > max) max= src[ x *step]; + if(src[(x+1)*step] < min) min= src[(x+1)*step]; + }else{ + if(src[(x+1)*step] > max) max= src[(x+1)*step]; + if(src[ x *step] < min) min= src[ x *step]; + } + } + if(max-min < 2*QP){ + const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0]; + const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step]; + + int sums[10]; + sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4; + sums[1] = sums[0] - first + src[3*step]; + sums[2] = sums[1] - first + src[4*step]; + sums[3] = sums[2] - first + src[5*step]; + sums[4] = sums[3] - first + src[6*step]; + sums[5] = sums[4] - src[0*step] + src[7*step]; + sums[6] = sums[5] - src[1*step] + last; + sums[7] = sums[6] - src[2*step] + last; + sums[8] = sums[7] - src[3*step] + last; + sums[9] = sums[8] - src[4*step] + last; + + src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4; + src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4; + src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4; + src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4; + src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4; + src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4; + src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4; + src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4; + } + }else{ + const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); + + if(ABS(middleEnergy) < 8*QP) + { + const int q=(src[3*step] - src[4*step])/2; + const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); + const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); + + int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); + d= MAX(d, 0); + + d= (5*d + 32) >> 6; + d*= SIGN(-middleEnergy); + + if(q>0) + { + d= d<0 ? 0 : d; + d= d>q ? q : d; + } + else + { + d= d>0 ? 0 : d; + d= d<q ? q : d; + } + + src[3*step]-= d; + src[4*step]+= d; + } + } + + src += stride; + } +/*if(step==16){ + STOP_TIMER("step16") +}else{ + STOP_TIMER("stepX") +}*/ +} //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one //Plain C versions @@ -479,15 +594,10 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) #ifdef ARCH_POWERPC #ifdef HAVE_ALTIVEC #define COMPILE_ALTIVEC -#ifndef CONFIG_DARWIN -#warning "################################################################################" -#warning "WARNING: No gcc available as of today (2004-05-25) seems to be able to compile properly some of the code under non-Darwin PPC OSes. Some functions result in wrong results, while others simply won't compile (gcc explodes after allocating 1GiB+)." -#warning "################################################################################" -#endif //CONFIG_DARWIN #endif //HAVE_ALTIVEC #endif //ARCH_POWERPC -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) #define COMPILE_MMX @@ -506,13 +616,11 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) #undef HAVE_MMX2 #undef HAVE_3DNOW #undef HAVE_ALTIVEC -#undef ARCH_X86 #ifdef COMPILE_C #undef HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW -#undef ARCH_X86 #define RENAME(a) a ## _C #include "postprocess_template.c" #endif @@ -533,7 +641,6 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) #define HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW -#define ARCH_X86 #define RENAME(a) a ## _MMX #include "postprocess_template.c" #endif @@ -544,7 +651,6 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) #define HAVE_MMX #define HAVE_MMX2 #undef HAVE_3DNOW -#define ARCH_X86 #define RENAME(a) a ## _MMX2 #include "postprocess_template.c" #endif @@ -555,7 +661,6 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) #define HAVE_MMX #undef HAVE_MMX2 #define HAVE_3DNOW -#define ARCH_X86 #define RENAME(a) a ## _3DNow #include "postprocess_template.c" #endif @@ -573,7 +678,7 @@ static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int // difference wouldnt be messureable here but its much better because // someone might exchange the cpu whithout restarting mplayer ;) #ifdef RUNTIME_CPUDETECT -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) // ordered per speed fasterst first if(c->cpuCaps & PP_CPU_CAPS_MMX2) postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); @@ -586,7 +691,7 @@ static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int #else #ifdef ARCH_POWERPC #ifdef HAVE_ALTIVEC - else if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) + if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); else #endif @@ -614,24 +719,21 @@ static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int /* -pp Command line Help */ char *pp_help= -"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n" -"long form example:\n" -"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n" -"short form example:\n" -"vb:a/hb:a/lb de,-vb\n" -"more examples:\n" -"tn:64:128:256\n" +"Available postprocessing filters:\n" "Filters Options\n" "short long name short long option Description\n" "* * a autoq CPU power dependent enabler\n" " c chrom chrominance filtering enabled\n" " y nochrom chrominance filtering disabled\n" +" n noluma luma filtering disabled\n" "hb hdeblock (2 threshold) horizontal deblocking filter\n" " 1. difference factor: default=32, higher -> more deblocking\n" " 2. flatness threshold: default=39, lower -> more deblocking\n" " the h & v deblocking filters share these\n" " so you can't set different thresholds for h / v\n" "vb vdeblock (2 threshold) vertical deblocking filter\n" +"ha hadeblock (2 threshold) horizontal deblocking filter\n" +"va vadeblock (2 threshold) vertical deblocking filter\n" "h1 x1hdeblock experimental h deblock filter 1\n" "v1 x1vdeblock experimental v deblock filter 1\n" "dr dering deringing filter\n" @@ -642,11 +744,20 @@ char *pp_help= "ci cubicipoldeint cubic interpolating deinterlacer\n" "md mediandeint median deinterlacer\n" "fd ffmpegdeint ffmpeg deinterlacer\n" -"de default hb:a,vb:a,dr:a,al\n" -"fa fast h1:a,v1:a,dr:a,al\n" +"l5 lowpass5 FIR lowpass deinterlacer\n" +"de default hb:a,vb:a,dr:a\n" +"fa fast h1:a,v1:a,dr:a\n" "tn tmpnoise (3 threshold) temporal noise reducer\n" " 1. <= 2. <= 3. larger -> stronger filtering\n" "fq forceQuant <quantizer> force quantizer\n" +"Usage:\n" +"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n" +"long form example:\n" +"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n" +"short form example:\n" +"vb:a/hb:a/lb de,-vb\n" +"more examples:\n" +"tn:64:128:256\n" ; pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) @@ -680,6 +791,7 @@ pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) char *filterName; int q= 1000000; //PP_QUALITY_MAX; int chrom=-1; + int luma=-1; char *option; char *options[OPTIONS_ARRAY_SIZE]; int i; @@ -707,6 +819,7 @@ pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; + else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0; else { options[numOfUnknownOptions] = option; @@ -753,7 +866,7 @@ pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) filterNameOk=1; if(!enable) break; // user wants to disable it - if(q >= filters[i].minLumQuality) + if(q >= filters[i].minLumQuality && luma) ppMode->lumMode|= filters[i].mask; if(chrom==1 || (chrom==-1 && filters[i].chromDefault)) if(q >= filters[i].minChromQuality) @@ -793,7 +906,8 @@ pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) } } } - else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK) + else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK + || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK) { int o; @@ -940,18 +1054,20 @@ void pp_postprocess(uint8_t * src[3], int srcStride[3], int mbHeight= (height+15)>>4; PPMode *mode = (PPMode*)vm; PPContext *c = (PPContext*)vc; - int minStride= MAX(srcStride[0], dstStride[0]); + int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0])); + int absQPStride = ABS(QPStride); - if(c->stride < minStride || c->qpStride < QPStride) + // c->stride and c->QPStride are always positive + if(c->stride < minStride || c->qpStride < absQPStride) reallocBuffers(c, width, height, MAX(minStride, c->stride), - MAX(c->qpStride, QPStride)); + MAX(c->qpStride, absQPStride)); if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) { int i; QP_store= c->forcedQPTable; - QPStride= 0; + absQPStride = QPStride = 0; if(mode->lumMode & FORCE_QUANT) for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant; else @@ -961,7 +1077,7 @@ void pp_postprocess(uint8_t * src[3], int srcStride[3], if(pict_type & PP_PICT_TYPE_QP2){ int i; - const int count= mbHeight * QPStride; + const int count= mbHeight * absQPStride; for(i=0; i<(count>>2); i++){ ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F; } @@ -969,6 +1085,7 @@ void pp_postprocess(uint8_t * src[3], int srcStride[3], c->stdQPTable[i] = QP_store[i]>>1; } QP_store= c->stdQPTable; + QPStride= absQPStride; } if(0){ @@ -984,13 +1101,22 @@ for(y=0; y<mbHeight; y++){ if((pict_type&7)!=3) { - int i; - const int count= mbHeight * QPStride; - for(i=0; i<(count>>2); i++){ - ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F; - } - for(i<<=2; i<count; i++){ - c->nonBQPTable[i] = QP_store[i] & 0x3F; + if (QPStride >= 0) { + int i; + const int count= mbHeight * QPStride; + for(i=0; i<(count>>2); i++){ + ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F; + } + for(i<<=2; i<count; i++){ + c->nonBQPTable[i] = QP_store[i] & 0x3F; + } + } else { + int i,j; + for(i=0; i<mbHeight; i++) { + for(j=0; j<absQPStride; j++) { + c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F; + } + } } } @@ -1014,8 +1140,8 @@ for(y=0; y<mbHeight; y++){ } else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) { - memcpy(dst[1], src[1], srcStride[1]*height); - memcpy(dst[2], src[2], srcStride[2]*height); + linecpy(dst[1], src[1], height, srcStride[1]); + linecpy(dst[2], src[2], height, srcStride[2]); } else { |