summaryrefslogtreecommitdiff
path: root/src/post/deinterlace/plugins/greedy2frame_template.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/post/deinterlace/plugins/greedy2frame_template.c')
-rw-r--r--src/post/deinterlace/plugins/greedy2frame_template.c166
1 files changed, 72 insertions, 94 deletions
diff --git a/src/post/deinterlace/plugins/greedy2frame_template.c b/src/post/deinterlace/plugins/greedy2frame_template.c
index 7fe52519f..e88124886 100644
--- a/src/post/deinterlace/plugins/greedy2frame_template.c
+++ b/src/post/deinterlace/plugins/greedy2frame_template.c
@@ -85,25 +85,18 @@
*/
-/* debugging feature */
-/* output the value of mm4 at this point which is pink where we will weave */
-/* and green were we are going to bob */
-/* uncomment next line to see this */
-/* #define CHECK_BOBWEAVE */
-
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
#if !defined(MASKS_DEFINED)
#define MASKS_DEFINED
- static const int64_t __attribute__((__used__)) YMask = 0x00ff00ff00ff00ffll;
- static const int64_t __attribute__((__used__)) Mask = 0x7f7f7f7f7f7f7f7fll;
- static const int64_t __attribute__((__used__)) DwordOne = 0x0000000100000001ll;
- static const int64_t __attribute__((__used__)) DwordTwo = 0x0000000200000002ll;
- static int64_t qwGreedyTwoFrameThreshold;
+static const mmx_t Mask = { uq: 0x7f7f7f7f7f7f7f7fll };
+#define TP GREEDYTWOFRAMETHRESHOLD, GREEDYTWOFRAMETHRESHOLD2
+static const mmx_t GreedyTwoFrameThreshold = { ub: {TP, TP, TP, TP} };
+#undef TP
+#endif
#endif
-#include <mangle.h>
-
-#if defined(IS_SSE)
-static void DeinterlaceGreedy2Frame_SSE(uint8_t *output, int outstride,
+#if defined(IS_MMXEXT)
+static void DeinterlaceGreedy2Frame_MMXEXT(uint8_t *output, int outstride,
deinterlace_frame_data_t *data,
int bottom_field, int second_field, int width, int height )
#elif defined(IS_3DNOW)
@@ -132,13 +125,6 @@ static void DeinterlaceGreedy2Frame_MMX(uint8_t *output, int outstride,
uint32_t LineLength = stride;
uint32_t PitchRest = Pitch - (LineLength >> 3)*8;
- qwGreedyTwoFrameThreshold = GreedyTwoFrameThreshold;
- qwGreedyTwoFrameThreshold += (GreedyTwoFrameThreshold2 << 8);
- qwGreedyTwoFrameThreshold += (qwGreedyTwoFrameThreshold << 48) +
- (qwGreedyTwoFrameThreshold << 32) +
- (qwGreedyTwoFrameThreshold << 16);
-
-
if( second_field ) {
M1 = data->f0;
T1 = data->f0;
@@ -185,14 +171,15 @@ static void DeinterlaceGreedy2Frame_MMX(uint8_t *output, int outstride,
asm volatile(
/* Figure out what to do with the scanline above the one we just copied.
* See above for a description of the algorithm.
- */
- ".align 8 \n\t"
- "movq %4, %%mm6 \n\t"
-
+ * weave if (weave(M) AND (weave(T) OR weave(B)))
+ */
"movq %0, %%mm1 \n\t" // T1
"movq %1, %%mm0 \n\t" // M1
"movq %2, %%mm3 \n\t" // B1
"movq %3, %%mm2 \n\t" // M0
+
+ "movq %4, %%mm6 \n\t" // Mask
+
: /* no output */
: "m" (*T1), "m" (*M1),
"m" (*B1), "m" (*M0), "m" (Mask) );
@@ -205,7 +192,7 @@ static void DeinterlaceGreedy2Frame_MMX(uint8_t *output, int outstride,
*/
"movq %%mm3, %%mm7 \n\t" /* mm7 = B1 */
-#if defined(IS_SSE)
+#if defined(IS_MMXEXT)
"pavgb %%mm1, %%mm7 \n\t"
#elif defined(IS_3DNOW)
"pavgusb %%mm1, %%mm7 \n\t"
@@ -224,93 +211,84 @@ static void DeinterlaceGreedy2Frame_MMX(uint8_t *output, int outstride,
* which should make weave look better when there is small amounts of
* movement
*/
-#if defined(IS_SSE)
- "movq %%mm0, %%mm4 \n\t"
- "movq %%mm2, %%mm5 \n\t"
- "psubusb %%mm2, %%mm4 \n\t"
- "psubusb %%mm0, %%mm5 \n\t"
- "por %%mm5, %%mm4 \n\t"
- "psrlw $1, %%mm4 \n\t"
- "pavgb %%mm2, %%mm0 \n\t"
- "pand %%mm6, %%mm4 \n\t"
+#if defined(IS_MMXEXT)
+ "movq %%mm0, %%mm4 \n\t"
+ "movq %%mm2, %%mm5 \n\t"
+ "psubusb %%mm2, %%mm4 \n\t"
+ "psubusb %%mm0, %%mm5 \n\t"
+ "por %%mm5, %%mm4 \n\t"
+ "pavgb %%mm2, %%mm0 \n\t"
#elif defined(IS_3DNOW)
- "movq %%mm0, %%mm4 \n\t"
- "movq %%mm2, %%mm5 \n\t"
- "psubusb %%mm2, %%mm4 \n\t"
- "psubusb %%mm0, %%mm5 \n\t"
- "por %%mm5, %%mm4 \n\t"
- "psrlw $1, %%mm4 \n\t"
- "pavgusb %%mm2, %%mm0 \n\t"
- "pand %%mm6, %%mm4 \n\t"
+ "movq %%mm0, %%mm4 \n\t"
+ "movq %%mm2, %%mm5 \n\t"
+ "psubusb %%mm2, %%mm4 \n\t"
+ "psubusb %%mm0, %%mm5 \n\t"
+ "por %%mm5, %%mm4 \n\t"
+ "pavgusb %%mm2, %%mm0 \n\t"
#else
- "movq %%mm0, %%mm4 \n\t"
- "psubusb %%mm2, %%mm4 \n\t"
- "psubusb %%mm0, %%mm2 \n\t"
- "por %%mm2, %%mm4 \n\t"
- "psrlw $1, %%mm4 \n\t"
- "pand %%mm6, %%mm4 \n\t"
+ "movq %%mm0, %%mm4 \n\t"
+ "psubusb %%mm2, %%mm4 \n\t"
+ "psubusb %%mm0, %%mm2 \n\t"
+ "por %%mm2, %%mm4 \n\t"
#endif
- /* if |M1-M0| > Threshold we want dword worth of twos */
- "pcmpgtb %3, %%mm4 \n\t"
- "pand %4, %%mm4 \n\t" /* get rid of sign bit */
- "pcmpgtd %5, %%mm4 \n\t" /* do we want to bob */
- "pandn %6, %%mm4 \n\t"
-
"movq %1, %%mm2 \n\t" /* mm2 = T0 */
- /* calculate |T1-T0| put result in mm5 */
- "movq %%mm2, %%mm5 \n\t"
- "psubusb %%mm1, %%mm5 \n\t"
- "psubusb %%mm2, %%mm1 \n\t"
- "por %%mm1, %%mm5 \n\t"
- "psrlw $1, %%mm5 \n\t"
- "pand %%mm6, %%mm5 \n\t"
-
- /* if |T1-T0| > Threshold we want dword worth of ones */
- "pcmpgtb %3, %%mm5 \n\t"
- "pand %%mm6, %%mm5 \n\t" /* get rid of sign bit */
+ /* if |M1-M0| > Threshold we want 0 else dword minus one */
+ "psrlw $1, %%mm4 \n\t"
+ "pand %%mm6, %%mm4 \n\t"
+ "pxor %%mm5, %%mm5 \n\t" // zero
+ "pcmpgtb %3, %%mm4 \n\t"
+ "pcmpeqd %%mm5, %%mm4 \n\t" /* do we want to bob */
- "pcmpgtd %5, %%mm5 \n\t"
- "pandn %5, %%mm5 \n\t"
- "paddd %%mm5, %%mm4 \n\t"
+ /* calculate |T1-T0| put result in mm5 */
+ "movq %%mm2, %%mm5 \n\t"
+ "psubusb %%mm1, %%mm5 \n\t"
+ "psubusb %%mm2, %%mm1 \n\t"
+ "por %%mm1, %%mm5 \n\t"
- "movq %2, %%mm2 \n\t" /* B0 */
+ "movq %2, %%mm2 \n\t" /* mm2 = B0 */
- /* calculate |B1-B0| put result in mm5 */
- "movq %%mm2, %%mm5 \n\t"
- "psubusb %%mm3, %%mm5 \n\t"
- "psubusb %%mm2, %%mm3 \n\t"
- "por %%mm3, %%mm5 \n\t"
+ /* if |T1-T0| > Threshold we want 0 else dword minus one */
"psrlw $1, %%mm5 \n\t"
- "pand %%mm6, %%mm5 \n\t"
+ "pand %%mm6, %%mm5 \n\t"
+ "pxor %%mm1, %%mm1 \n\t" // zero
+ "pcmpgtb %3, %%mm5 \n\t"
+ "pcmpeqd %%mm1, %%mm5 \n\t"
+
+ /* calculate |B1-B0| put result in mm1 */
+ "movq %%mm2, %%mm1 \n\t"
+ "psubusb %%mm3, %%mm1 \n\t"
+ "psubusb %%mm2, %%mm3 \n\t"
+ "por %%mm3, %%mm1 \n\t"
- /* if |B1-B0| > Threshold we want dword worth of ones */
- "pcmpgtb %3, %%mm5 \n\t"
- "pand %%mm6, %%mm5 \n\t" /* get rid of any sign bit */
- "pcmpgtd %5, %%mm5 \n\t"
- "pandn %5, %%mm5 \n\t"
- "paddd %%mm5, %%mm4 \n\t"
+ /* if |B1-B0| > Threshold we want 0 else dword minus one */
+ "psrlw $1, %%mm1 \n\t"
+ "pand %%mm6, %%mm1 \n\t"
+ "pxor %%mm3, %%mm3 \n\t" // zero
+ "pcmpgtb %3, %%mm1 \n\t"
+ "pcmpeqd %%mm3, %%mm1 \n\t"
- "pcmpgtd %6, %%mm4 \n\t"
+ "por %%mm1, %%mm5 \n\t"
+ "pand %%mm5, %%mm4 \n\t"
/* debugging feature
* output the value of mm4 at this point which is pink where we will weave
- * and green were we are going to bob */
+ * and green where we are going to bob
+ */
#ifdef CHECK_BOBWEAVE
-#ifdef IS_SSE
+#ifdef IS_MMXEXT
"movntq %%mm4, %0 \n\t"
#else
"movq %%mm4, %0 \n\t"
#endif
#else
- "movq %%mm4, %%mm5 \n\t"
- /* mm4 now is 1 where we want to weave and 0 where we want to bob */
- "pand %%mm0, %%mm4 \n\t"
- "pandn %%mm7, %%mm5 \n\t"
- "por %%mm5, %%mm4 \n\t"
-#ifdef IS_SSE
+ /* mm4 now is 1 where we want to weave and 0 where we want to bob */
+ "pand %%mm4, %%mm0 \n\t"
+ "pandn %%mm7, %%mm4 \n\t"
+ "por %%mm0, %%mm4 \n\t"
+#ifdef IS_MMXEXT
"movntq %%mm4, %0 \n\t"
#else
"movq %%mm4, %0 \n\t"
@@ -318,7 +296,7 @@ static void DeinterlaceGreedy2Frame_MMX(uint8_t *output, int outstride,
#endif
: "=m" (*Dest2)
- : "m" (*T0), "m" (*B0), "m" (qwGreedyTwoFrameThreshold), "m" (Mask), "m" (DwordOne), "m" (DwordTwo) );
+ : "m" (*T0), "m" (*B0), "m" (GreedyTwoFrameThreshold) );
/* Advance to the next set of pixels. */
T1 += 8;
@@ -341,7 +319,7 @@ static void DeinterlaceGreedy2Frame_MMX(uint8_t *output, int outstride,
B0 += PitchRest;
}
-#ifdef IS_SSE
+#ifdef IS_MMXEXT
asm("sfence\n\t");
#endif