summaryrefslogtreecommitdiff
path: root/src/libffmpeg/libavcodec/libpostproc
diff options
context:
space:
mode:
authorMike Melanson <mike@multimedia.cx>2004-02-01 05:31:16 +0000
committerMike Melanson <mike@multimedia.cx>2004-02-01 05:31:16 +0000
commit61d793ef13ac2ef8f9c2b41b71430f21fac80337 (patch)
tree69a1b20b01993b4d61fe1c8c2be33dd2e362a40c /src/libffmpeg/libavcodec/libpostproc
parentf707774ac5d48c02c6a36327304d88629b0e38f9 (diff)
downloadxine-lib-61d793ef13ac2ef8f9c2b41b71430f21fac80337.tar.gz
xine-lib-61d793ef13ac2ef8f9c2b41b71430f21fac80337.tar.bz2
sync to ffmpeg build 4699
CVS patchset: 6090 CVS date: 2004/02/01 05:31:16
Diffstat (limited to 'src/libffmpeg/libavcodec/libpostproc')
-rw-r--r--src/libffmpeg/libavcodec/libpostproc/postprocess.c67
-rw-r--r--src/libffmpeg/libavcodec/libpostproc/postprocess_internal.h4
-rw-r--r--src/libffmpeg/libavcodec/libpostproc/postprocess_template.c56
3 files changed, 83 insertions, 44 deletions
diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess.c b/src/libffmpeg/libavcodec/libpostproc/postprocess.c
index 11267b666..093d94aea 100644
--- a/src/libffmpeg/libavcodec/libpostproc/postprocess.c
+++ b/src/libffmpeg/libavcodec/libpostproc/postprocess.c
@@ -70,9 +70,8 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
#include "config.h"
#include <inttypes.h>
#include <stdio.h>
-
-#include "xineutils.h"
-
+#include <stdlib.h>
+#include <string.h>
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
@@ -81,9 +80,9 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
//#undef HAVE_MMX
//#undef ARCH_X86
//#define DEBUG_BRIGHTNESS
-
-#define memcpy(a,b,c) xine_fast_memcpy(a,b,c)
-
+#ifdef USE_FASTMEMCPY
+#include "../fastmemcpy.h"
+#endif
#include "postprocess.h"
#include "postprocess_internal.h"
@@ -105,13 +104,13 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
#ifdef ARCH_X86
-static const uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005ULL;
-static const uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020ULL;
-static const uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000ULL;
-static const uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101ULL;
-static const uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202ULL;
-static const uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808ULL;
-static const uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080ULL;
+static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL;
+static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL;
+static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL;
+static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL;
+static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL;
+static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL;
+static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL;
#endif
@@ -153,6 +152,44 @@ static char *replaceTable[]=
NULL //End Marker
};
+#ifdef ARCH_X86
+static inline void unusedVariableWarningFixer()
+{
+ if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
+}
+#endif
+
+
+#ifdef ARCH_X86
+static inline void prefetchnta(void *p)
+{
+ asm volatile( "prefetchnta (%0)\n\t"
+ : : "r" (p)
+ );
+}
+
+static inline void prefetcht0(void *p)
+{
+ asm volatile( "prefetcht0 (%0)\n\t"
+ : : "r" (p)
+ );
+}
+
+static inline void prefetcht1(void *p)
+{
+ asm volatile( "prefetcht1 (%0)\n\t"
+ : : "r" (p)
+ );
+}
+
+static inline void prefetcht2(void *p)
+{
+ asm volatile( "prefetcht2 (%0)\n\t"
+ : : "r" (p)
+ );
+}
+#endif
+
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
/**
@@ -906,10 +943,10 @@ for(y=0; y<mbHeight; y++){
int i;
const int count= mbHeight * QPStride;
for(i=0; i<(count>>2); i++){
- ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x1F1F1F1F;
+ ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
}
for(i<<=2; i<count; i++){
- c->nonBQPTable[i] = QP_store[i] & 0x1F;
+ c->nonBQPTable[i] = QP_store[i] & 0x3F;
}
}
diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess_internal.h b/src/libffmpeg/libavcodec/libpostproc/postprocess_internal.h
index 13b3e3831..db50fa3b5 100644
--- a/src/libffmpeg/libavcodec/libpostproc/postprocess_internal.h
+++ b/src/libffmpeg/libavcodec/libpostproc/postprocess_internal.h
@@ -134,8 +134,8 @@ typedef struct PPContext{
uint64_t __attribute__((aligned(8))) pQPb;
uint64_t __attribute__((aligned(8))) pQPb2;
- uint64_t __attribute__((aligned(8))) mmxDcOffset[32];
- uint64_t __attribute__((aligned(8))) mmxDcThreshold[32];
+ uint64_t __attribute__((aligned(8))) mmxDcOffset[64];
+ uint64_t __attribute__((aligned(8))) mmxDcThreshold[64];
QP_STORE_T *stdQPTable; ///< used to fix MPEG2 style qscale
QP_STORE_T *nonBQPTable;
diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c b/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c
index 636212959..7ebc08bd4 100644
--- a/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c
+++ b/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c
@@ -1805,10 +1805,9 @@ DEINT_L5(%%mm1, %%mm0, (%%edx, %1, 2), (%0, %1, 8) , (%%edx, %1, 4))
* will be called for every 8x8 block and can read & write from line 4-15
* lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
* lines 4-12 will be read into the deblocking filter and should be deinterlaced
- * will shift the image up by 1 line (FIXME if this is a problem)
* this filter will read lines 4-13 and write 4-11
*/
-static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride)
+static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= 4*stride;
@@ -1818,43 +1817,43 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride)
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
- "movq (%0), %%mm0 \n\t" // L0
- "movq (%%eax, %1), %%mm1 \n\t" // L2
+ "movq (%2), %%mm0 \n\t" // L0
+ "movq (%%eax), %%mm1 \n\t" // L2
PAVGB(%%mm1, %%mm0) // L0+L2
- "movq (%%eax), %%mm2 \n\t" // L1
+ "movq (%0), %%mm2 \n\t" // L1
PAVGB(%%mm2, %%mm0)
"movq %%mm0, (%0) \n\t"
- "movq (%%eax, %1, 2), %%mm0 \n\t" // L3
+ "movq (%%eax, %1), %%mm0 \n\t" // L3
PAVGB(%%mm0, %%mm2) // L1+L3
PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3
"movq %%mm2, (%%eax) \n\t"
- "movq (%0, %1, 4), %%mm2 \n\t" // L4
+ "movq (%%eax, %1, 2), %%mm2 \n\t" // L4
PAVGB(%%mm2, %%mm1) // L2+L4
PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4
"movq %%mm1, (%%eax, %1) \n\t"
- "movq (%%edx), %%mm1 \n\t" // L5
+ "movq (%0, %1, 4), %%mm1 \n\t" // L5
PAVGB(%%mm1, %%mm0) // L3+L5
PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5
"movq %%mm0, (%%eax, %1, 2) \n\t"
- "movq (%%edx, %1), %%mm0 \n\t" // L6
+ "movq (%%edx), %%mm0 \n\t" // L6
PAVGB(%%mm0, %%mm2) // L4+L6
PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6
"movq %%mm2, (%0, %1, 4) \n\t"
- "movq (%%edx, %1, 2), %%mm2 \n\t" // L7
+ "movq (%%edx, %1), %%mm2 \n\t" // L7
PAVGB(%%mm2, %%mm1) // L5+L7
PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7
"movq %%mm1, (%%edx) \n\t"
- "movq (%0, %1, 8), %%mm1 \n\t" // L8
+ "movq (%%edx, %1, 2), %%mm1 \n\t" // L8
PAVGB(%%mm1, %%mm0) // L6+L8
PAVGB(%%mm2, %%mm0) // 2L7 + L6 + L8
"movq %%mm0, (%%edx, %1) \n\t"
- "movq (%%edx, %1, 4), %%mm0 \n\t" // L9
+ "movq (%0, %1, 8), %%mm0 \n\t" // L9
PAVGB(%%mm0, %%mm2) // L7+L9
PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9
"movq %%mm2, (%%edx, %1, 2) \n\t"
+ "movq %%mm1, (%2) \n\t"
-
- : : "r" (src), "r" (stride)
+ : : "r" (src), "r" (stride), "r" (tmp)
: "%eax", "%edx"
);
#else
@@ -1862,41 +1861,43 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride)
src+= 4*stride;
for(x=0; x<2; x++){
- a= *(uint32_t*)&src[stride*0];
- b= *(uint32_t*)&src[stride*1];
- c= *(uint32_t*)&src[stride*2];
+ a= *(uint32_t*)&tmp[stride*0];
+ b= *(uint32_t*)&src[stride*0];
+ c= *(uint32_t*)&src[stride*1];
a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
*(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
- a= *(uint32_t*)&src[stride*3];
+ a= *(uint32_t*)&src[stride*2];
b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
*(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
- b= *(uint32_t*)&src[stride*4];
+ b= *(uint32_t*)&src[stride*3];
c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
*(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
- c= *(uint32_t*)&src[stride*5];
+ c= *(uint32_t*)&src[stride*4];
a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
*(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
- a= *(uint32_t*)&src[stride*6];
+ a= *(uint32_t*)&src[stride*5];
b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
*(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
- b= *(uint32_t*)&src[stride*7];
+ b= *(uint32_t*)&src[stride*6];
c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
*(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
- c= *(uint32_t*)&src[stride*8];
+ c= *(uint32_t*)&src[stride*7];
a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
*(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
- a= *(uint32_t*)&src[stride*9];
+ a= *(uint32_t*)&src[stride*8];
b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
*(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
+ *(uint32_t*)&tmp[stride*0]= c;
src += 4;
+ tmp += 4;
}
#endif
}
@@ -2788,9 +2789,10 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
uint64_t * const yHistogram= c.yHistogram;
uint8_t * const tempSrc= c.tempSrc;
uint8_t * const tempDst= c.tempDst;
+ const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
#ifdef HAVE_MMX
- for(i=0; i<32; i++){
+ for(i=0; i<57; i++){
int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
int threshold= offset*2 + 1;
c.mmxDcOffset[i]= 0x7F - offset;
@@ -2932,7 +2934,7 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
if(mode & LINEAR_IPOL_DEINT_FILTER)
RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
else if(mode & LINEAR_BLEND_DEINT_FILTER)
- RENAME(deInterlaceBlendLinear)(dstBlock, dstStride);
+ RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
else if(mode & MEDIAN_DEINT_FILTER)
RENAME(deInterlaceMedian)(dstBlock, dstStride);
else if(mode & CUBIC_IPOL_DEINT_FILTER)
@@ -3076,7 +3078,7 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
if(mode & LINEAR_IPOL_DEINT_FILTER)
RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
else if(mode & LINEAR_BLEND_DEINT_FILTER)
- RENAME(deInterlaceBlendLinear)(dstBlock, dstStride);
+ RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
else if(mode & MEDIAN_DEINT_FILTER)
RENAME(deInterlaceMedian)(dstBlock, dstStride);
else if(mode & CUBIC_IPOL_DEINT_FILTER)