From ce3c57412188a0edc4d6bd819960bf72bf097e5d Mon Sep 17 00:00:00 2001
From: Miguel Freitas <miguelfreitas@users.sourceforge.net>
Date: Fri, 27 May 2005 15:22:52 +0000
Subject: material for xine 1.1: new quality deinterlacer GreedyH from dscaler.
 (actually ported dscaler->kdetv->tvtime->xine) obs: i'm not yet considering
 this feature for 1.0.2 since every time i add new asm code the build gets
 broken for some specific gcc version or something...

CVS patchset: 7563
CVS date: 2005/05/27 15:22:52
---
 src/post/deinterlace/plugins/Makefile.am       |  27 ++-
 src/post/deinterlace/plugins/greedyh.asm       | 309 +++++++++++++++++++++++++
 src/post/deinterlace/plugins/greedyhmacros.h   |  74 ++++++
 src/post/deinterlace/plugins/kdetv_greedyh.c   | 129 +++++++++++
 src/post/deinterlace/plugins/x86-64_macros.inc |  81 +++++++
 src/post/deinterlace/xine_plugin.c             |   4 +-
 6 files changed, 618 insertions(+), 6 deletions(-)
 create mode 100644 src/post/deinterlace/plugins/greedyh.asm
 create mode 100644 src/post/deinterlace/plugins/greedyhmacros.h
 create mode 100644 src/post/deinterlace/plugins/kdetv_greedyh.c
 create mode 100644 src/post/deinterlace/plugins/x86-64_macros.inc

diff --git a/src/post/deinterlace/plugins/Makefile.am b/src/post/deinterlace/plugins/Makefile.am
index f08fa250a..817e206cb 100644
--- a/src/post/deinterlace/plugins/Makefile.am
+++ b/src/post/deinterlace/plugins/Makefile.am
@@ -1,8 +1,25 @@
 include $(top_srcdir)/misc/Makefile.common
 
-AM_CPPFLAGS = -I$(top_srcdir)/src/post/deinterlace
+# plugins/Makefile.am distributes the plugins that come with tvtime.
+# Copyright (C) 2002, 2003, 2004  Billy Biggs
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at
+# your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+EXTRA_DIST = greedy2frame_template.c greedyh.asm x86-64_macros.inc
 
-EXTRA_DIST = greedy2frame_template.c
+AM_CPPFLAGS = -I$(top_srcdir)/src/post/deinterlace
 
 libdir = $(XINE_PLUGINDIR)/post
 
@@ -16,8 +33,10 @@ libdeinterlaceplugins_la_SOURCES = \
 	vfir.c \
 	weave.c \
 	greedy2frame.c \
-	scalerbob.c
+	scalerbob.c \
+	kdetv_greedyh.c
+#	kdetv_tomsmocomp.c
 libdeinterlaceplugins_la_LIBADD = $(XINE_LIB)
 libdeinterlaceplugins_la_LDFLAGS = -avoid-version -module @XINE_PLUGIN_MIN_SYMS@
 
-noinst_HEADERS = plugins.h
+noinst_HEADERS = plugins.h greedyhmacros.h
diff --git a/src/post/deinterlace/plugins/greedyh.asm b/src/post/deinterlace/plugins/greedyh.asm
new file mode 100644
index 000000000..d2b3f3566
--- /dev/null
+++ b/src/post/deinterlace/plugins/greedyh.asm
@@ -0,0 +1,309 @@
+// -*- c++ -*-
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2001 Tom Barry.  All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+//
+//	This file is subject to the terms of the GNU General Public License as
+//	published by the Free Software Foundation.  A copy of this license is
+//	included with this software distribution in the file COPYING.  If you
+//	do not have a copy, you may obtain a copy by writing to the Free
+//	Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+//
+//	This software is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details
+//
+/////////////////////////////////////////////////////////////////////////////
+
+#include "x86-64_macros.inc"
+
+static void FUNCT_NAME(uint8_t *output, int outstride,
+                  deinterlace_frame_data_t *data,
+                  int bottom_field, int second_field, int width, int height )
+{
+    int64_t i;
+    int stride = (width*2);
+    int InfoIsOdd = bottom_field;
+
+    // in tight loop some vars are accessed faster in local storage
+    int64_t YMask        = 0x00ff00ff00ff00ffull; // to keep only luma
+    int64_t UVMask       = 0xff00ff00ff00ff00ull; // to keep only chroma
+    int64_t ShiftMask    = 0xfefffefffefffeffull; // to avoid shifting chroma to luma
+    int64_t QW256        = 0x0100010001000100ull; // 4 256's
+
+    int64_t MaxComb;
+    int64_t MotionThreshold;
+    int64_t MotionSense;
+
+    int Line;
+    long LoopCtr;
+    long oldbx;
+    unsigned int Pitch = stride*2;
+    int FieldHeight = height / 2;
+
+    unsigned char* L1;					// ptr to Line1, of 3
+    unsigned char* L2;					// ptr to Line2, the weave line
+    unsigned char* L3;					// ptr to Line3
+
+    unsigned char* L2P;					// ptr to prev Line2
+    unsigned char* Dest = output;
+
+    int64_t QW256B;
+    int64_t LastAvg=0;			//interp value from left qword
+
+    // Set up our two parms that are actually evaluated for each pixel
+    i=GreedyMaxComb;
+    MaxComb = i << 56 | i << 48 | i << 40 | i << 32 | i << 24 | i << 16 | i << 8 | i;
+
+    i = GreedyMotionThreshold;		// scale to range of 0-257
+    MotionThreshold = i << 48 | i << 32 | i << 16 | i | UVMask;
+
+    i = GreedyMotionSense;		// scale to range of 0-257
+    MotionSense = i << 48 | i << 32 | i << 16 | i;
+
+    
+    i = 0xffffffff - 256;
+    QW256B =  i << 48 |  i << 32 | i << 16 | i;  // save a couple instr on PMINSW instruct.
+
+    // copy first even line no matter what, and the first odd line if we're
+    // processing an EVEN field. (note diff from other deint rtns.)
+    if( second_field ) {
+        L1 = data->f0;
+        L2 = data->f0;
+        L2P = data->f1;
+    } else {
+        L1 = data->f1;
+        L2 = data->f0;
+        L2P = data->f1;
+    }
+
+    if( InfoIsOdd ) {
+        L1 += 0;
+        L2 += stride;
+        L3 = L1 + Pitch;
+        L2P += stride;
+
+        // copy first even line
+        xine_fast_memcpy(Dest, L1, stride);
+        Dest += outstride;
+    } else {
+        // copy first even line
+        xine_fast_memcpy(Dest, L2, stride);
+        Dest += outstride;
+
+        L1 += stride;
+        L2 += Pitch;
+        L3 = L1 + Pitch;
+        L2P += Pitch;
+
+        // then first odd line
+        xine_fast_memcpy(Dest, L1, stride);
+        Dest += outstride;
+    }
+
+    for (Line = 0; Line < (FieldHeight - 1); ++Line) {
+        LoopCtr = stride / 8 - 1; // there are LineLength / 8 qwords per line but do 1 less, adj at end of loop
+
+        // For ease of reading, the comments below assume that we're operating on an odd
+        // field (i.e., that InfoIsOdd is true).  Assume the obvious for even lines..
+        __asm__ __volatile__
+            (
+             // save ebx (-fPIC)
+	     MOVX" %%"XBX", %[oldbx]\n\t"
+
+             MOVX"  %[L1],          %%"XAX"\n\t"
+             LEAX"  8(%%"XAX"),     %%"XBX"\n\t"    // next qword needed by DJR
+             MOVX"  %[L3],          %%"XCX"\n\t"
+             SUBX"  %%"XAX",        %%"XCX"\n\t"    // carry L3 addr as an offset
+             MOVX"  %[L2P],         %%"XDX"\n\t"
+             MOVX"  %[L2],          %%"XSI"\n\t"
+             MOVX"  %[Dest],        %%"XDI"\n\t"    // DL1 if Odd or DL2 if Even
+
+             ".align 8\n\t"
+             "1:\n\t"
+
+             "movq  (%%"XSI"),      %%mm0\n\t"      // L2 - the newest weave pixel value
+             "movq  (%%"XAX"),      %%mm1\n\t"      // L1 - the top pixel
+             "movq  (%%"XDX"),      %%mm2\n\t"      // L2P - the prev weave pixel
+             "movq  (%%"XAX", %%"XCX"), %%mm3\n\t"  // L3, next odd row
+             "movq  %%mm1,          %%mm6\n\t"      // L1 - get simple single pixel interp
+             //	pavgb   mm6, mm3                    // use macro below
+             V_PAVGB ("%%mm6", "%%mm3", "%%mm4", "%[ShiftMask]")
+
+             // DJR - Diagonal Jaggie Reduction
+             // In the event that we are going to use an average (Bob) pixel we do not want a jagged
+             // stair step effect.  To combat this we avg in the 2 horizontally adjacen pixels into the
+             // interpolated Bob mix. This will do horizontal smoothing for only the Bob'd pixels.
+
+             "movq  %[LastAvg],     %%mm4\n\t"      // the bob value from prev qword in row
+             "movq  %%mm6,          %[LastAvg]\n\t" // save for next pass
+             "psrlq $48,            %%mm4\n\t"      // right justify 1 pixel
+             "movq  %%mm6,          %%mm7\n\t"      // copy of simple bob pixel
+             "psllq $16,            %%mm7\n\t"      // left justify 3 pixels
+             "por   %%mm7,          %%mm4\n\t"      // and combine
+
+             "movq  (%%"XBX"),      %%mm5\n\t"      // next horiz qword from L1
+             //			pavgb   mm5, qword ptr[ebx+ecx] // next horiz qword from L3, use macro below
+             V_PAVGB ("%%mm5", "(%%"XBX",%%"XCX")", "%%mm7", "%[ShiftMask]")
+             "psllq $48,            %%mm5\n\t"      // left just 1 pixel
+             "movq  %%mm6,          %%mm7\n\t"      // another copy of simple bob pixel
+             "psrlq $16,            %%mm7\n\t"      // right just 3 pixels
+             "por   %%mm7,          %%mm5\n\t"      // combine
+             //			pavgb	mm4, mm5			// avg of forward and prev by 1 pixel, use macro
+             V_PAVGB ("%%mm4", "%%mm5", "%%mm5", "%[ShiftMask]")   // mm5 gets modified if MMX
+             //			pavgb	mm6, mm4			// avg of center and surround interp vals, use macro
+             V_PAVGB ("%%mm6", "%%mm4", "%%mm7", "%[ShiftMask]")
+
+             // Don't do any more averaging than needed for mmx. It hurts performance and causes rounding errors.
+#ifndef IS_MMX
+             //          pavgb	mm4, mm6			// 1/4 center, 3/4 adjacent
+             V_PAVGB ("%%mm4", "%%mm6", "%%mm7", "%[ShiftMask]")
+             //    		pavgb	mm6, mm4			// 3/8 center, 5/8 adjacent
+             V_PAVGB ("%%mm6", "%%mm4", "%%mm7", "%[ShiftMask]")
+#endif
+
+             // get abs value of possible L2 comb
+             "movq    %%mm6,        %%mm4\n\t"      // work copy of interp val
+             "movq    %%mm2,        %%mm7\n\t"      // L2
+             "psubusb %%mm4,        %%mm7\n\t"      // L2 - avg
+             "movq    %%mm4,        %%mm5\n\t"      // avg
+             "psubusb %%mm2,        %%mm5\n\t"      // avg - L2
+             "por     %%mm7,        %%mm5\n\t"      // abs(avg-L2)
+
+             // get abs value of possible L2P comb
+             "movq    %%mm0,        %%mm7\n\t"      // L2P
+             "psubusb %%mm4,        %%mm7\n\t"      // L2P - avg
+             "psubusb %%mm0,        %%mm4\n\t"      // avg - L2P
+             "por     %%mm7,        %%mm4\n\t"      // abs(avg-L2P)
+
+             // use L2 or L2P depending upon which makes smaller comb
+             "psubusb %%mm5,        %%mm4\n\t"      // see if it goes to zero
+             "psubusb %%mm5,        %%mm5\n\t"      // 0
+             "pcmpeqb %%mm5,        %%mm4\n\t"      // if (mm4=0) then FF else 0
+             "pcmpeqb %%mm4,        %%mm5\n\t"      // opposite of mm4
+
+             // if Comb(L2P) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
+             "pand    %%mm2,        %%mm5\n\t"      // use L2 if mm5 == ff, else 0
+             "pand    %%mm0,        %%mm4\n\t"      // use L2P if mm4 = ff, else 0
+             "por     %%mm5,        %%mm4\n\t"      // may the best win
+
+             // Inventory: at this point we have the following values:
+             // mm0 = L2P (or L2)
+             // mm1 = L1
+             // mm2 = L2 (or L2P)
+             // mm3 = L3
+             // mm4 = the best of L2,L2P weave pixel, base upon comb
+             // mm6 = the avg interpolated value, if we need to use it
+
+             // Let's measure movement, as how much the weave pixel has changed
+             "movq    %%mm2,        %%mm7\n\t"
+             "psubusb %%mm0,        %%mm2\n\t"
+             "psubusb %%mm7,        %%mm0\n\t"
+             "por     %%mm2,        %%mm0\n\t"      // abs value of change, used later
+
+             // Now lets clip our chosen value to be not outside of the range
+             // of the high/low range L1-L3 by more than MaxComb.
+             // This allows some comb but limits the damages and also allows more
+             // detail than a boring oversmoothed clip.
+             "movq    %%mm1,        %%mm2\n\t"      // copy L1
+             //	pmaxub mm2, mm3                     // use macro
+             V_PMAXUB ("%%mm2", "%%mm3")            // now = Max(L1,L3)
+             "movq    %%mm1,        %%mm5\n\t"      // copy L1
+             // pminub	mm5, mm3                    // now = Min(L1,L3), use macro
+             V_PMINUB ("%%mm5", "%%mm3", "%%mm7")
+             // allow the value to be above the high or below the low by amt of MaxComb
+             "psubusb %[MaxComb],   %%mm5\n\t"      // lower min by diff
+             "paddusb %[MaxComb],   %%mm2\n\t"      // increase max by diff
+             // pmaxub	mm4, mm5                    // now = Max(best,Min(L1,L3) use macro
+             V_PMAXUB ("%%mm4", "%%mm5")
+             // pminub	mm4, mm2                    // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
+             V_PMINUB ("%%mm4", "%%mm2", "%%mm7")
+
+             // Blend weave pixel with bob pixel, depending on motion val in mm0
+             "psubusb %[MotionThreshold], %%mm0\n\t"// test Threshold, clear chroma change >>>??
+             "pmullw  %[MotionSense], %%mm0\n\t"    // mul by user factor, keep low 16 bits
+             "movq    %[QW256], %%mm7\n\t"
+#ifdef IS_SSE
+             "pminsw  %%mm7,        %%mm0\n\t"      // max = 256
+#else
+             "paddusw %[QW256B],    %%mm0\n\t"      // add, may sat at fff..
+             "psubusw %[QW256B],    %%mm0\n\t"      // now = Min(L1,256)
+#endif
+             "psubusw %%mm0,        %%mm7\n\t"      // so the 2 sum to 256, weighted avg
+             "movq    %%mm4,        %%mm2\n\t"      // save weave chroma info before trashing
+             "pand    %[YMask],     %%mm4\n\t"      // keep only luma from calc'd value
+             "pmullw  %%mm7,        %%mm4\n\t"      // use more weave for less motion
+             "pand    %[YMask],     %%mm6\n\t"      // keep only luma from calc'd value
+             "pmullw  %%mm0,        %%mm6\n\t"      // use more bob for large motion
+             "paddusw %%mm6,        %%mm4\n\t"      // combine
+             "psrlw   $8,           %%mm4\n\t"      // div by 256 to get weighted avg
+
+             // chroma comes from weave pixel
+             "pand    %[UVMask],    %%mm2\n\t"      // keep chroma
+             "por     %%mm4,        %%mm2\n\t"      // and combine
+
+             V_MOVNTQ ("(%%"XDI")", "%%mm2")        // move in our clipped best, use macro
+
+             // bump ptrs and loop
+             LEAX"    8(%%"XAX"),   %%"XAX"\n\t"
+             LEAX"    8(%%"XBX"),   %%"XBX"\n\t"
+             LEAX"    8(%%"XDX"),   %%"XDX"\n\t"
+             LEAX"    8(%%"XDI"),   %%"XDI"\n\t"
+             LEAX"    8(%%"XSI"),   %%"XSI"\n\t"
+             DECX"    %[LoopCtr]\n\t"
+             "jg      1b\n\t"                       // loop if not to last line
+                                                    // note P-III default assumes backward branches taken
+             "jl      1f\n\t"                       // done
+             MOVX"    %%"XAX",      %%"XBX"\n\t"  // sharpness lookahead 1 byte only, be wrong on 1
+             "jmp     1b\n\t"
+
+             "1:\n\t"
+	     MOVX" %[oldbx], %%"XBX"\n\t"
+
+             : /* no outputs */
+
+             : [LastAvg]         "m"(LastAvg),
+               [L1]              "m"(L1),
+               [L3]              "m"(L3),
+               [L2P]             "m"(L2P),
+               [L2]              "m"(L2),
+               [Dest]            "m"(Dest),
+               [ShiftMask]       "m"(ShiftMask),
+               [MaxComb]         "m"(MaxComb),
+               [MotionThreshold] "m"(MotionThreshold),
+               [MotionSense]     "m"(MotionSense),
+               [QW256B]          "m"(QW256B),
+               [YMask]           "m"(YMask),
+               [UVMask]          "m"(UVMask),
+               [LoopCtr]         "m"(LoopCtr),
+               [QW256]           "m"(QW256),
+	       [oldbx]           "m"(oldbx)
+
+             : XAX, XCX, XDX, XSI, XDI,
+#ifdef ARCH_X86
+               "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
+#endif
+               "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
+               "memory", "cc"
+            );
+
+        Dest += outstride;
+        xine_fast_memcpy(Dest, L3, stride);
+        Dest += outstride;
+
+        L1  += Pitch;
+        L2  += Pitch;
+        L3  += Pitch;
+        L2P += Pitch;
+    }
+
+    if (InfoIsOdd) {
+        xine_fast_memcpy(Dest, L2, stride);
+    }
+
+    // clear out the MMX registers ready for doing floating point again
+#ifdef ARCH_X86
+    __asm__ __volatile__ ("emms\n\t");
+#endif
+}
diff --git a/src/post/deinterlace/plugins/greedyhmacros.h b/src/post/deinterlace/plugins/greedyhmacros.h
new file mode 100644
index 000000000..5f65959c3
--- /dev/null
+++ b/src/post/deinterlace/plugins/greedyhmacros.h
@@ -0,0 +1,74 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2001 Tom Barry.  All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+//
+//	This file is subject to the terms of the GNU General Public License as
+//	published by the Free Software Foundation.  A copy of this license is
+//	included with this software distribution in the file COPYING.  If you
+//	do not have a copy, you may obtain a copy by writing to the Free
+//	Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+//
+//	This software is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details
+//
+/////////////////////////////////////////////////////////////////////////////
+
+// Define a few macros for CPU dependent instructions. 
+// I suspect I don't really understand how the C macro preprocessor works but
+// this seems to get the job done.          // TRB 7/01
+
+// BEFORE USING THESE YOU MUST SET:
+
+// #define SSE_TYPE SSE            (or MMX or 3DNOW)
+
+// some macros for pavgb instruction
+//      V_PAVGB(mmr1, mmr2, mmr work register, smask) mmr2 may = mmrw if you can trash it
+
+#define V_PAVGB_MMX(mmr1, mmr2, mmrw, smask) \
+	"movq    "mmr2",  "mmrw"\n\t"            \
+	"pand    "smask", "mmrw"\n\t"            \
+	"psrlw   $1,      "mmrw"\n\t"            \
+	"pand    "smask", "mmr1"\n\t"            \
+	"psrlw   $1,      "mmr1"\n\t"            \
+	"paddusb "mmrw",  "mmr1"\n\t"
+#define V_PAVGB_SSE(mmr1, mmr2, mmrw, smask)      "pavgb   "mmr2", "mmr1"\n\t"
+#define V_PAVGB_3DNOW(mmr1, mmr2, mmrw, smask)    "pavgusb "mmr2", "mmr1"\n\t"
+#define V_PAVGB(mmr1, mmr2, mmrw, smask)          V_PAVGB2(mmr1, mmr2, mmrw, smask, SSE_TYPE) 
+#define V_PAVGB2(mmr1, mmr2, mmrw, smask, ssetyp) V_PAVGB3(mmr1, mmr2, mmrw, smask, ssetyp) 
+#define V_PAVGB3(mmr1, mmr2, mmrw, smask, ssetyp) V_PAVGB_##ssetyp(mmr1, mmr2, mmrw, smask) 
+
+// some macros for pmaxub instruction
+#define V_PMAXUB_MMX(mmr1, mmr2) \
+    "psubusb "mmr2", "mmr1"\n\t" \
+    "paddusb "mmr2", "mmr1"\n\t"
+#define V_PMAXUB_SSE(mmr1, mmr2)      "pmaxub "mmr2", "mmr1"\n\t"
+#define V_PMAXUB_3DNOW(mmr1, mmr2)    V_PMAXUB_MMX(mmr1, mmr2)  // use MMX version
+#define V_PMAXUB(mmr1, mmr2)          V_PMAXUB2(mmr1, mmr2, SSE_TYPE) 
+#define V_PMAXUB2(mmr1, mmr2, ssetyp) V_PMAXUB3(mmr1, mmr2, ssetyp) 
+#define V_PMAXUB3(mmr1, mmr2, ssetyp) V_PMAXUB_##ssetyp(mmr1, mmr2) 
+
+// some macros for pminub instruction
+//      V_PMINUB(mmr1, mmr2, mmr work register)     mmr2 may NOT = mmrw
+#define V_PMINUB_MMX(mmr1, mmr2, mmrw) \
+    "pcmpeqb "mmrw", "mmrw"\n\t"       \
+    "psubusb "mmr2", "mmrw"\n\t"       \
+    "paddusb "mmrw", "mmr1"\n\t"       \
+    "psubusb "mmrw", "mmr1"\n\t"
+#define V_PMINUB_SSE(mmr1, mmr2, mmrw)      "pminub "mmr2", "mmr1"\n\t"
+#define V_PMINUB_3DNOW(mmr1, mmr2, mmrw)    V_PMINUB_MMX(mmr1, mmr2, mmrw)  // use MMX version
+#define V_PMINUB(mmr1, mmr2, mmrw)          V_PMINUB2(mmr1, mmr2, mmrw, SSE_TYPE) 
+#define V_PMINUB2(mmr1, mmr2, mmrw, ssetyp) V_PMINUB3(mmr1, mmr2, mmrw, ssetyp) 
+#define V_PMINUB3(mmr1, mmr2, mmrw, ssetyp) V_PMINUB_##ssetyp(mmr1, mmr2, mmrw) 
+
+// some macros for movntq instruction
+//      V_MOVNTQ(mmr1, mmr2) 
+#define V_MOVNTQ_MMX(mmr1, mmr2)      "movq   "mmr2", "mmr1"\n\t"
+#define V_MOVNTQ_3DNOW(mmr1, mmr2)    "movq   "mmr2", "mmr1"\n\t"
+#define V_MOVNTQ_SSE(mmr1, mmr2)      "movntq "mmr2", "mmr1"\n\t"
+#define V_MOVNTQ(mmr1, mmr2)          V_MOVNTQ2(mmr1, mmr2, SSE_TYPE) 
+#define V_MOVNTQ2(mmr1, mmr2, ssetyp) V_MOVNTQ3(mmr1, mmr2, ssetyp) 
+#define V_MOVNTQ3(mmr1, mmr2, ssetyp) V_MOVNTQ_##ssetyp(mmr1, mmr2)
+
+// end of macros
diff --git a/src/post/deinterlace/plugins/kdetv_greedyh.c b/src/post/deinterlace/plugins/kdetv_greedyh.c
new file mode 100644
index 000000000..04fbbb82e
--- /dev/null
+++ b/src/post/deinterlace/plugins/kdetv_greedyh.c
@@ -0,0 +1,129 @@
+/**
+ * Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if HAVE_INTTYPES_H
+#include <inttypes.h>
+#else
+#include <stdint.h>
+#endif
+
+#include "attributes.h"
+#include "xineutils.h"
+#include "deinterlace.h"
+#include "speedtools.h"
+#include "speedy.h"
+
+#include "greedyhmacros.h"
+
+#define MAXCOMB_DEFAULT          5
+#define MOTIONTHRESHOLD_DEFAULT 25
+#define MOTIONSENSE_DEFAULT     30
+
+static unsigned int GreedyMaxComb = MAXCOMB_DEFAULT;
+static unsigned int GreedyMotionThreshold = MOTIONTHRESHOLD_DEFAULT;
+static unsigned int GreedyMotionSense = MOTIONSENSE_DEFAULT;
+
+
+#define IS_SSE
+#define SSE_TYPE SSE
+#define FUNCT_NAME greedyh_filter_sse
+#include "greedyh.asm"
+#undef SSE_TYPE
+#undef IS_SSE
+#undef FUNCT_NAME
+
+#define IS_3DNOW
+#define FUNCT_NAME greedyh_filter_3dnow
+#define SSE_TYPE 3DNOW
+#include "greedyh.asm"
+#undef SSE_TYPE
+#undef IS_3DNOW
+#undef FUNCT_NAME
+
+#define IS_MMX
+#define SSE_TYPE MMX
+#define FUNCT_NAME greedyh_filter_mmx
+#include "greedyh.asm"
+#undef SSE_TYPE
+#undef IS_MMX
+#undef FUNCT_NAME
+
+
+static void deinterlace_frame_di_greedyh( uint8_t *output, int outstride,
+                                          deinterlace_frame_data_t *data,
+                                          int bottom_field, int second_field,
+                                          int width, int height )
+{
+    if( xine_mm_accel() & MM_ACCEL_X86_MMXEXT ) {
+        greedyh_filter_sse( output, outstride, data,
+                            bottom_field, second_field,
+                            width, height );
+    } else if( xine_mm_accel() & MM_ACCEL_X86_3DNOW ) {
+        greedyh_filter_3dnow( output, outstride, data,
+                              bottom_field, second_field,
+                              width, height );
+    } else {
+        greedyh_filter_mmx( output, outstride, data,
+                            bottom_field, second_field,
+                            width, height );
+    }
+}
+
+
+static deinterlace_method_t greedymethod =
+{
+    "Greedy - High Motion (DScaler)",
+    "GreedyH",
+    /*
+    "Motion Adaptive: Advanced Detection",
+    "AdaptiveAdvanced",
+    */
+    4,
+    MM_ACCEL_X86_MMX,
+    0,
+    0,
+    0,
+    0,
+    deinterlace_frame_di_greedyh,
+    1,
+    { "Uses heuristics to detect motion in the input",
+      "frames and reconstruct image detail where",
+      "possible.  Use this for high quality output",
+      "even on monitors set to an arbitrary refresh",
+      "rate.",
+      "",
+      "Advanced detection uses linear interpolation",
+      "where motion is detected, using a four-field",
+      "buffer.  This is the Greedy: High Motion",
+      "deinterlacer from DScaler." }
+};
+
+deinterlace_method_t *dscaler_greedyh_get_method( void )
+{
+    return &greedymethod;
+}
+
diff --git a/src/post/deinterlace/plugins/x86-64_macros.inc b/src/post/deinterlace/plugins/x86-64_macros.inc
new file mode 100644
index 000000000..3dfd9b63a
--- /dev/null
+++ b/src/post/deinterlace/plugins/x86-64_macros.inc
@@ -0,0 +1,81 @@
+/***************************************************************************
+                           KdetvDScalerFilter_x86-64.inc
+                           -----------------------------
+    begin                : Thu Sep 26 2004
+    copyright            : (C) 2004 by Dirk Ziegelmeier
+    email                : dziegel@gmx.de
+ ***************************************************************************/
+
+/*
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * This file is copied from TVTIME's sources.
+ * Original author: Achim Schneider <batchall@mordor.ch>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef XAX
+
+#if defined (ARCH_X86)
+
+#define XAX   "eax"
+#define XBX   "ebx"
+#define XCX   "ecx"
+#define XDX   "edx"
+#define XSI   "esi"
+#define XDI   "edi"
+#define XSP   "esp"
+#define MOVX  "movl"
+#define LEAX  "leal"
+#define DECX  "decl"
+#define PUSHX "pushl"
+#define POPX  "popl"
+#define CMPX  "cmpl"
+#define ADDX  "addl"
+#define SHLX  "shll"
+#define SHRX  "shrl"
+#define SUBX  "subl"
+
+#elif defined (ARCH_X86_64)
+
+#define XAX   "rax"
+#define XBX   "rbx"
+#define XCX   "rcx"
+#define XDX   "rdx"
+#define XSI   "rsi"
+#define XDI   "rdi"
+#define XSP   "rsp"
+#define MOVX  "movq"
+#define LEAX  "leaq"
+#define DECX  "decq"
+#define PUSHX "pushq"
+#define POPX  "popq"
+#define CMPX  "cmpq"
+#define ADDX  "addq"
+#define SHLX  "shlq"
+#define SHRX  "shrq"
+#define SUBX  "subq"
+
+#else
+#error Undefined architecture. Define either ARCH_X86 or ARCH_X86_64.
+#endif
+
+#endif
diff --git a/src/post/deinterlace/xine_plugin.c b/src/post/deinterlace/xine_plugin.c
index a7999585a..4fea21798 100644
--- a/src/post/deinterlace/xine_plugin.c
+++ b/src/post/deinterlace/xine_plugin.c
@@ -17,7 +17,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
  *
- * $Id: xine_plugin.c,v 1.39 2005/05/16 01:39:02 miguelfreitas Exp $
+ * $Id: xine_plugin.c,v 1.40 2005/05/27 15:22:52 miguelfreitas Exp $
  *
  * advanced video deinterlacer plugin
  * Jun/2003 by Miguel Freitas
@@ -310,7 +310,7 @@ static void *deinterlace_init_plugin(xine_t *xine, void *data)
   register_deinterlace_method( double_get_method() );
   register_deinterlace_method( vfir_get_method() );
   register_deinterlace_method( scalerbob_get_method() );
-
+  register_deinterlace_method( dscaler_greedyh_get_method() );
 
   filter_deinterlace_methods( config_flags, 5 /*fieldsavailable*/ );
   if( !get_num_deinterlace_methods() ) {
-- 
cgit v1.2.3