diff options
Diffstat (limited to 'src/libffmpeg/libavcodec/libpostproc/postprocess.c')
-rw-r--r-- | src/libffmpeg/libavcodec/libpostproc/postprocess.c | 67 |
1 files changed, 52 insertions, 15 deletions
diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess.c b/src/libffmpeg/libavcodec/libpostproc/postprocess.c index 11267b666..093d94aea 100644 --- a/src/libffmpeg/libavcodec/libpostproc/postprocess.c +++ b/src/libffmpeg/libavcodec/libpostproc/postprocess.c @@ -70,9 +70,8 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks #include "config.h" #include <inttypes.h> #include <stdio.h> - -#include "xineutils.h" - +#include <stdlib.h> +#include <string.h> #ifdef HAVE_MALLOC_H #include <malloc.h> #endif @@ -81,9 +80,9 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks //#undef HAVE_MMX //#undef ARCH_X86 //#define DEBUG_BRIGHTNESS - -#define memcpy(a,b,c) xine_fast_memcpy(a,b,c) - +#ifdef USE_FASTMEMCPY +#include "../fastmemcpy.h" +#endif #include "postprocess.h" #include "postprocess_internal.h" @@ -105,13 +104,13 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks //#define NUM_BLOCKS_AT_ONCE 16 //not used yet #ifdef ARCH_X86 -static const uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005ULL; -static const uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020ULL; -static const uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000ULL; -static const uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101ULL; -static const uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202ULL; -static const uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808ULL; -static const uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080ULL; +static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL; +static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL; +static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL; +static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL; +static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL; +static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL; +static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL; #endif @@ -153,6 +152,44 @@ static char *replaceTable[]= NULL //End Marker }; +#ifdef ARCH_X86 +static inline void unusedVariableWarningFixer() +{ + if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0; +} +#endif + + +#ifdef ARCH_X86 +static inline void prefetchnta(void *p) +{ + asm volatile( "prefetchnta (%0)\n\t" + : : "r" (p) + ); +} + +static inline void prefetcht0(void *p) +{ + asm volatile( "prefetcht0 (%0)\n\t" + : : "r" (p) + ); +} + +static inline void prefetcht1(void *p) +{ + asm volatile( "prefetcht1 (%0)\n\t" + : : "r" (p) + ); +} + +static inline void prefetcht2(void *p) +{ + asm volatile( "prefetcht2 (%0)\n\t" + : : "r" (p) + ); +} +#endif + // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing /** @@ -906,10 +943,10 @@ for(y=0; y<mbHeight; y++){ int i; const int count= mbHeight * QPStride; for(i=0; i<(count>>2); i++){ - ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x1F1F1F1F; + ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F; } for(i<<=2; i<count; i++){ - c->nonBQPTable[i] = QP_store[i] & 0x1F; + c->nonBQPTable[i] = QP_store[i] & 0x3F; } } |