From 97c50cb77949856573d7f5f5a3c28cb73e61e011 Mon Sep 17 00:00:00 2001 From: Mike Melanson Date: Tue, 19 Apr 2005 05:16:45 +0000 Subject: sync to FFmpeg build 4752 CVS patchset: 7463 CVS date: 2005/04/19 05:16:45 --- src/libffmpeg/libavcodec/libpostproc/mangle.h | 9 + src/libffmpeg/libavcodec/libpostproc/postprocess.c | 266 +++- .../libpostproc/postprocess_altivec_template.c | 721 ++++++++-- .../libavcodec/libpostproc/postprocess_internal.h | 10 + .../libavcodec/libpostproc/postprocess_template.c | 1477 ++++++++++++++------ 5 files changed, 1853 insertions(+), 630 deletions(-) (limited to 'src/libffmpeg/libavcodec/libpostproc') diff --git a/src/libffmpeg/libavcodec/libpostproc/mangle.h b/src/libffmpeg/libavcodec/libpostproc/mangle.h index f3894cc33..aa09cd6bf 100644 --- a/src/libffmpeg/libavcodec/libpostproc/mangle.h +++ b/src/libffmpeg/libavcodec/libpostproc/mangle.h @@ -8,12 +8,21 @@ #define __MANGLE_H /* Feel free to add more to the list, eg. a.out IMO */ +/* Use rip-relative addressing if compiling PIC code on x86-64. */ #if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__OS2__) || \ (defined(__OpenBSD__) && !defined(__ELF__)) +#if defined(ARCH_X86_64) && defined(PIC) +#define MANGLE(a) "_" #a"(%%rip)" +#else #define MANGLE(a) "_" #a +#endif +#else +#if defined(ARCH_X86_64) && defined(PIC) +#define MANGLE(a) #a"(%%rip)" #else #define MANGLE(a) #a #endif +#endif #endif /* !__MANGLE_H */ diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess.c b/src/libffmpeg/libavcodec/libpostproc/postprocess.c index a03ff133d..e7ca0191d 100644 --- a/src/libffmpeg/libavcodec/libpostproc/postprocess.c +++ b/src/libffmpeg/libavcodec/libpostproc/postprocess.c @@ -29,10 +29,11 @@ isVertDC Ec Ec Ec isVertMinMaxOk Ec Ec Ec doVertLowPass E e e Ec doVertDefFilter Ec Ec e e Ec -isHorizDC Ec Ec -isHorizMinMaxOk a E -doHorizLowPass E e e -doHorizDefFilter Ec Ec e e +isHorizDC Ec Ec Ec +isHorizMinMaxOk a E Ec +doHorizLowPass E e e Ec +doHorizDefFilter Ec Ec e e Ec +do_a_deblock Ec E Ec E deRing E e e* Ecp Vertical RKAlgo1 E a a Horizontal RKAlgo1 a a @@ -42,7 +43,7 @@ LinIpolDeinterlace e E E* CubicIpolDeinterlace a e e* LinBlendDeinterlace e E E* MedianDeinterlace# E Ec Ec -TempDeNoiser# E e e +TempDeNoiser# E e e Ec * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work # more or less selfinvented filters so the exactness isnt too meaningfull @@ -91,6 +92,10 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks #include "mangle.h" //FIXME should be supressed +#ifdef HAVE_ALTIVEC_H +#include +#endif + #ifndef HAVE_MEMALIGN #define memalign(a,b) malloc(b) #endif @@ -108,12 +113,15 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) # define attribute_used __attribute__((used)) +# define always_inline __attribute__((always_inline)) inline #else # define attribute_used +# define always_inline inline #endif -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL; +static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL; static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL; static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL; static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL; @@ -122,7 +130,6 @@ static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x080808080808 static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL; #endif - static uint8_t clip_table[3*256]; static uint8_t * const clip_tab= clip_table + 256; @@ -139,6 +146,8 @@ static struct PPFilter filters[]= {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/ {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, + {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, + {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, {"dr", "dering", 1, 5, 6, DERING}, {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, @@ -154,15 +163,16 @@ static struct PPFilter filters[]= static char *replaceTable[]= { - "default", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", - "de", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", - "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", - "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", + "default", "hdeblock:a,vdeblock:a,dering:a", + "de", "hdeblock:a,vdeblock:a,dering:a", + "fast", "x1hdeblock:a,x1vdeblock:a,dering:a", + "fa", "x1hdeblock:a,x1vdeblock:a,dering:a", + "ac", "ha:a:128:7,va:a,dering:a", NULL //End Marker }; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static inline void prefetchnta(void *p) { asm volatile( "prefetchnta (%0)\n\t" @@ -372,32 +382,32 @@ static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) */ static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) { - int y; for(y=0; yQP ? dst[-1] : dst[0]; const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; - int sums[9]; - sums[0] = first + dst[0]; - sums[1] = dst[0] + dst[1]; - sums[2] = dst[1] + dst[2]; - sums[3] = dst[2] + dst[3]; - sums[4] = dst[3] + dst[4]; - sums[5] = dst[4] + dst[5]; - sums[6] = dst[5] + dst[6]; - sums[7] = dst[6] + dst[7]; - sums[8] = dst[7] + last; - - dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4; - dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4; - dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4; - dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4; - dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4; - dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4; - dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4; - dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4; + int sums[10]; + sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; + sums[1] = sums[0] - first + dst[3]; + sums[2] = sums[1] - first + dst[4]; + sums[3] = sums[2] - first + dst[5]; + sums[4] = sums[3] - first + dst[6]; + sums[5] = sums[4] - dst[0] + dst[7]; + sums[6] = sums[5] - dst[1] + last; + sums[7] = sums[6] - dst[2] + last; + sums[8] = sums[7] - dst[3] + last; + sums[9] = sums[8] - dst[4] + last; + + dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; + dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; + dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; + dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; + dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; + dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; + dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; + dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; dst+= stride; } @@ -469,6 +479,111 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) } } +/** + * accurate deblock filter + */ +static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ + int y; + const int QP= c->QP; + const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; + const int dcThreshold= dcOffset*2 + 1; +//START_TIMER + src+= step*4; // src points to begin of the 8x8 Block + for(y=0; y<8; y++){ + int numEq= 0; + + if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; + if(numEq > c->ppMode.flatnessThreshold){ + int min, max, x; + + if(src[0] > src[step]){ + max= src[0]; + min= src[step]; + }else{ + max= src[step]; + min= src[0]; + } + for(x=2; x<8; x+=2){ + if(src[x*step] > src[(x+1)*step]){ + if(src[x *step] > max) max= src[ x *step]; + if(src[(x+1)*step] < min) min= src[(x+1)*step]; + }else{ + if(src[(x+1)*step] > max) max= src[(x+1)*step]; + if(src[ x *step] < min) min= src[ x *step]; + } + } + if(max-min < 2*QP){ + const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0]; + const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step]; + + int sums[10]; + sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4; + sums[1] = sums[0] - first + src[3*step]; + sums[2] = sums[1] - first + src[4*step]; + sums[3] = sums[2] - first + src[5*step]; + sums[4] = sums[3] - first + src[6*step]; + sums[5] = sums[4] - src[0*step] + src[7*step]; + sums[6] = sums[5] - src[1*step] + last; + sums[7] = sums[6] - src[2*step] + last; + sums[8] = sums[7] - src[3*step] + last; + sums[9] = sums[8] - src[4*step] + last; + + src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4; + src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4; + src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4; + src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4; + src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4; + src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4; + src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4; + src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4; + } + }else{ + const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); + + if(ABS(middleEnergy) < 8*QP) + { + const int q=(src[3*step] - src[4*step])/2; + const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); + const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); + + int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); + d= MAX(d, 0); + + d= (5*d + 32) >> 6; + d*= SIGN(-middleEnergy); + + if(q>0) + { + d= d<0 ? 0 : d; + d= d>q ? q : d; + } + else + { + d= d>0 ? 0 : d; + d= dcpuCaps & PP_CPU_CAPS_MMX2) postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); @@ -586,7 +691,7 @@ static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int #else #ifdef ARCH_POWERPC #ifdef HAVE_ALTIVEC - else if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) + if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); else #endif @@ -614,24 +719,21 @@ static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int /* -pp Command line Help */ char *pp_help= -"[: