summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/post/deinterlace/plugins/greedy2frame_template.c659
1 files changed, 331 insertions, 328 deletions
diff --git a/src/post/deinterlace/plugins/greedy2frame_template.c b/src/post/deinterlace/plugins/greedy2frame_template.c
index d3d3d7813..2380ee466 100644
--- a/src/post/deinterlace/plugins/greedy2frame_template.c
+++ b/src/post/deinterlace/plugins/greedy2frame_template.c
@@ -1,331 +1,334 @@
-/////////////////////////////////////////////////////////////////////////////
-// $Id: greedy2frame_template.c,v 1.3 2004/01/02 20:53:43 miguelfreitas Exp $
-/////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2000 John Adcock, Tom Barry, Steve Grimm All rights reserved.
-// port copyright (c) 2003 Miguel Freitas
-/////////////////////////////////////////////////////////////////////////////
-//
-// This file is subject to the terms of the GNU General Public License as
-// published by the Free Software Foundation. A copy of this license is
-// included with this software distribution in the file COPYING. If you
-// do not have a copy, you may obtain a copy by writing to the Free
-// Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
-//
-// This software is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details
-/////////////////////////////////////////////////////////////////////////////
-// CVS Log
-//
+/////////////////////////////////////////////////////////////////////////////
+// $Id: greedy2frame_template.c,v 1.4 2004/01/05 01:47:26 tmmm Exp $
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2000 John Adcock, Tom Barry, Steve Grimm All rights reserved.
+// port copyright (c) 2003 Miguel Freitas
+/////////////////////////////////////////////////////////////////////////////
+//
+// This file is subject to the terms of the GNU General Public License as
+// published by the Free Software Foundation. A copy of this license is
+// included with this software distribution in the file COPYING. If you
+// do not have a copy, you may obtain a copy by writing to the Free
+// Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+//
+// This software is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details
+/////////////////////////////////////////////////////////////////////////////
+// CVS Log
+//
// $Log: greedy2frame_template.c,v $
+// Revision 1.4 2004/01/05 01:47:26 tmmm
+// DOS/Win CRs are forbidden, verboten, interdit
+//
// Revision 1.3 2004/01/02 20:53:43 miguelfreitas
// better MANGLE from ffmpeg
-//
-// Revision 1.2 2004/01/02 20:47:03 miguelfreitas
-// my small contribution to the cygwin port ;-)
-//
-// Revision 1.1 2003/06/22 17:30:03 miguelfreitas
-// use our own port of greedy2frame (tvtime port is currently broken)
-//
-// Revision 1.8 2001/11/23 17:18:54 adcockj
-// Fixed silly and/or confusion
-//
-// Revision 1.7 2001/11/22 22:27:00 adcockj
-// Bug Fixes
-//
-// Revision 1.6 2001/11/21 15:21:40 adcockj
-// Renamed DEINTERLACE_INFO to TDeinterlaceInfo in line with standards
-// Changed TDeinterlaceInfo structure to have history of pictures.
-//
-// Revision 1.5 2001/07/31 06:48:33 adcockj
-// Fixed index bug spotted by Peter Gubanov
-//
-// Revision 1.4 2001/07/13 16:13:33 adcockj
-// Added CVS tags and removed tabs
-//
-/////////////////////////////////////////////////////////////////////////////
-
-// This is the implementation of the Greedy 2-frame deinterlace algorithm described in
-// DI_Greedy2Frame.c. It's in a separate file so we can compile variants for different
-// CPU types; most of the code is the same in the different variants.
-
-
-///////////////////////////////////////////////////////////////////////////////
-// Field 1 | Field 2 | Field 3 | Field 4 |
-// T0 | | T1 | |
-// | M0 | | M1 |
-// B0 | | B1 | |
-//
-
-
-// debugging feature
-// output the value of mm4 at this point which is pink where we will weave
-// and green were we are going to bob
-// uncomment next line to see this
-//#define CHECK_BOBWEAVE
-
-#if !defined(MASKS_DEFINED)
-#define MASKS_DEFINED
- static const int64_t YMask = 0x00ff00ff00ff00ffll;
- static const int64_t Mask = 0x7f7f7f7f7f7f7f7fll;
- static const int64_t DwordOne = 0x0000000100000001ll;
- static const int64_t DwordTwo = 0x0000000200000002ll;
- static int64_t qwGreedyTwoFrameThreshold;
-#endif
-
-#if !defined(MANGLE)
-# if defined(__MINGW32__) || defined(__CYGWIN__) || \
- defined(__OS2__) || (defined (__OpenBSD__) && !defined(__ELF__))
-# define MANGLE(a) "_" #a
-# else
-# define MANGLE(a) #a
-# endif
-#endif
-
-#if defined(IS_SSE)
-static void DeinterlaceGreedy2Frame_SSE(uint8_t *output, int outstride,
- deinterlace_frame_data_t *data,
- int bottom_field, int width, int height )
-#elif defined(IS_3DNOW)
-static void DeinterlaceGreedy2Frame_3DNOW(uint8_t *output, int outstride,
- deinterlace_frame_data_t *data,
- int bottom_field, int width, int height )
-#else
-static void DeinterlaceGreedy2Frame_MMX(uint8_t *output, int outstride,
- deinterlace_frame_data_t *data,
- int bottom_field, int width, int height )
-#endif
-{
-#ifdef ARCH_X86
- int Line;
- int stride = width * 2;
- register uint8_t* M1;
- register uint8_t* M0;
- register uint8_t* T0;
- register uint8_t* T1;
- register uint8_t* B1;
- register uint8_t* B0;
- uint8_t* Dest = output;
- register uint8_t* Dest2;
- register int count;
- uint32_t Pitch = stride*2;
- uint32_t LineLength = stride;
- uint32_t PitchRest = Pitch - (LineLength >> 3)*8;
-
- qwGreedyTwoFrameThreshold = GreedyTwoFrameThreshold;
- qwGreedyTwoFrameThreshold += (GreedyTwoFrameThreshold2 << 8);
- qwGreedyTwoFrameThreshold += (qwGreedyTwoFrameThreshold << 48) +
- (qwGreedyTwoFrameThreshold << 32) +
- (qwGreedyTwoFrameThreshold << 16);
-
-
- if( bottom_field ) {
- M1 = data->f0 + stride;
- T1 = data->f0;
- B1 = T1 + Pitch;
- M0 = data->f1 + stride;
- T0 = data->f1;
- B0 = T0 + Pitch;
- } else {
- M1 = data->f0 + Pitch;
- T1 = data->f1 + stride;
- B1 = T1 + Pitch;
- M0 = data->f1 + Pitch;
- T0 = data->f2 + stride;
- B0 = T0 + Pitch;
-
- xine_fast_memcpy(Dest, M1, LineLength);
- Dest += outstride;
- }
-
- for (Line = 0; Line < (height / 2) - 1; ++Line)
- {
- // Always use the most recent data verbatim. By definition it's correct (it'd
- // be shown on an interlaced display) and our job is to fill in the spaces
- // between the new lines.
- xine_fast_memcpy(Dest, T1, stride);
- Dest += outstride;
- Dest2 = Dest;
-
- count = LineLength >> 3;
- do {
- asm volatile(
- // Figure out what to do with the scanline above the one we just copied.
- // See above for a description of the algorithm.
-
- ".align 8 \n\t"
- "movq "MANGLE(Mask)", %%mm6 \n\t"
-
- "movq %0, %%mm1 \n\t" // T1
- "movq %1, %%mm0 \n\t" // M1
- "movq %2, %%mm3 \n\t" // B1
- "movq %3, %%mm2 \n\t" // M0
- : /* no output */
- : "m" (*T1), "m" (*M1),
- "m" (*B1), "m" (*M0) );
-
-
- asm volatile(
- // Figure out what to do with the scanline above the one we just copied.
- // See above for a description of the algorithm.
-
- // Average T1 and B1 so we can do interpolated bobbing if we bob onto T1.
- "movq %%mm3, %%mm7 \n\t" // mm7 = B1
-
-#if defined(IS_SSE)
- "pavgb %%mm1, %%mm7 \n\t"
-#elif defined(IS_3DNOW)
- "pavgusb %%mm1, %%mm7 \n\t"
-#else
-
- "movq %%mm1, %%mm5 \n\t" // mm5 = T1
- "psrlw $1, %%mm7 \n\t" // mm7 = B1 / 2
- "pand %%mm6, %%mm7 \n\t" // mask off lower bits
- "psrlw $1, %%mm5 \n\t" // mm5 = T1 / 2
- "pand %%mm6, %%mm5 \n\t" // mask off lower bits
- "paddw %%mm5, %%mm7 \n\t" // mm7 = (T1 + B1) / 2
-#endif
-
- // calculate |M1-M0| put result in mm4 need to keep mm0 intact
- // if we have a good processor then make mm0 the average of M1 and M0
- // which should make weave look better when there is small amounts of
- // movement
-#if defined(IS_SSE)
- "movq %%mm0, %%mm4 \n\t"
- "movq %%mm2, %%mm5 \n\t"
- "psubusb %%mm2, %%mm4 \n\t"
- "psubusb %%mm0, %%mm5 \n\t"
- "por %%mm5, %%mm4 \n\t"
- "psrlw $1, %%mm4 \n\t"
- "pavgb %%mm2, %%mm0 \n\t"
- "pand %%mm6, %%mm4 \n\t"
-#elif defined(IS_3DNOW)
- "movq %%mm0, %%mm4 \n\t"
- "movq %%mm2, %%mm5 \n\t"
- "psubusb %%mm2, %%mm4 \n\t"
- "psubusb %%mm0, %%mm5 \n\t"
- "por %%mm5, %%mm4 \n\t"
- "psrlw $1, %%mm4 \n\t"
- "pavgusb %%mm2, %%mm0 \n\t"
- "pand %%mm6, %%mm4 \n\t"
-#else
- "movq %%mm0, %%mm4 \n\t"
- "psubusb %%mm2, %%mm4 \n\t"
- "psubusb %%mm0, %%mm2 \n\t"
- "por %%mm2, %%mm4 \n\t"
- "psrlw $1, %%mm4 \n\t"
- "pand %%mm6, %%mm4 \n\t"
-#endif
-
- // if |M1-M0| > Threshold we want dword worth of twos
- "pcmpgtb "MANGLE(qwGreedyTwoFrameThreshold)", %%mm4 \n\t"
- "pand "MANGLE(Mask)", %%mm4 \n\t" // get rid of any sign bit
- "pcmpgtd "MANGLE(DwordOne)", %%mm4 \n\t" // do we want to bob
- "pandn "MANGLE(DwordTwo)", %%mm4 \n\t"
-
- "movq %1, %%mm2 \n\t" // mm2 = T0
-
- // calculate |T1-T0| put result in mm5
- "movq %%mm2, %%mm5 \n\t"
- "psubusb %%mm1, %%mm5 \n\t"
- "psubusb %%mm2, %%mm1 \n\t"
- "por %%mm1, %%mm5 \n\t"
- "psrlw $1, %%mm5 \n\t"
- "pand %%mm6, %%mm5 \n\t"
-
- // if |T1-T0| > Threshold we want dword worth of ones
- "pcmpgtb "MANGLE(qwGreedyTwoFrameThreshold)", %%mm5 \n\t"
- "pand %%mm6, %%mm5 \n\t" // get rid of any sign bit
-
- "pcmpgtd "MANGLE(DwordOne)", %%mm5 \n\t"
- "pandn "MANGLE(DwordOne)", %%mm5 \n\t"
- "paddd %%mm5, %%mm4 \n\t"
-
- "movq %2, %%mm2 \n\t" // B0
-
- // calculate |B1-B0| put result in mm5
- "movq %%mm2, %%mm5 \n\t"
- "psubusb %%mm3, %%mm5 \n\t"
- "psubusb %%mm2, %%mm3 \n\t"
- "por %%mm3, %%mm5 \n\t"
- "psrlw $1, %%mm5 \n\t"
- "pand %%mm6, %%mm5 \n\t"
-
- // if |B1-B0| > Threshold we want dword worth of ones
- "pcmpgtb "MANGLE(qwGreedyTwoFrameThreshold)", %%mm5 \n\t"
- "pand %%mm6, %%mm5 \n\t" // get rid of any sign bit
- "pcmpgtd "MANGLE(DwordOne)", %%mm5 \n\t"
- "pandn "MANGLE(DwordOne)", %%mm5 \n\t"
- "paddd %%mm5, %%mm4 \n\t"
-
- "pcmpgtd "MANGLE(DwordTwo)", %%mm4 \n\t"
-
-// debugging feature
-// output the value of mm4 at this point which is pink where we will weave
-// and green were we are going to bob
-#ifdef CHECK_BOBWEAVE
-#ifdef IS_SSE
- "movntq %%mm4, %0 \n\t"
-#else
- "movq %%mm4, %0 \n\t"
-#endif
-#else
-
- "movq %%mm4, %%mm5 \n\t"
- // mm4 now is 1 where we want to weave and 0 where we want to bob
- "pand %%mm0, %%mm4 \n\t"
- "pandn %%mm7, %%mm5 \n\t"
- "por %%mm5, %%mm4 \n\t"
-#ifdef IS_SSE
- "movntq %%mm4, %0 \n\t"
-#else
- "movq %%mm4, %0 \n\t"
-#endif
-#endif
-
- : "=m" (*Dest2)
- : "m" (*T0), "m" (*B0) );
-
- // Advance to the next set of pixels.
- T1 += 8;
- M1 += 8;
- B1 += 8;
- M0 += 8;
- T0 += 8;
- B0 += 8;
- Dest2 += 8;
-
- } while( --count );
-
- Dest += outstride;
-
- M1 += PitchRest;
- T1 += PitchRest;
- B1 += PitchRest;
- M0 += PitchRest;
- T0 += PitchRest;
- B0 += PitchRest;
- }
-
-#ifdef IS_SSE
- asm("sfence\n\t");
-#endif
-
- if( bottom_field )
- {
- xine_fast_memcpy(Dest, T1, stride);
- Dest += outstride;
- xine_fast_memcpy(Dest, M1, stride);
- }
- else
- {
- xine_fast_memcpy(Dest, T1, stride);
- }
-
- // clear out the MMX registers ready for doing floating point
- // again
- asm("emms\n\t");
-#endif
-}
-
+//
+// Revision 1.2 2004/01/02 20:47:03 miguelfreitas
+// my small contribution to the cygwin port ;-)
+//
+// Revision 1.1 2003/06/22 17:30:03 miguelfreitas
+// use our own port of greedy2frame (tvtime port is currently broken)
+//
+// Revision 1.8 2001/11/23 17:18:54 adcockj
+// Fixed silly and/or confusion
+//
+// Revision 1.7 2001/11/22 22:27:00 adcockj
+// Bug Fixes
+//
+// Revision 1.6 2001/11/21 15:21:40 adcockj
+// Renamed DEINTERLACE_INFO to TDeinterlaceInfo in line with standards
+// Changed TDeinterlaceInfo structure to have history of pictures.
+//
+// Revision 1.5 2001/07/31 06:48:33 adcockj
+// Fixed index bug spotted by Peter Gubanov
+//
+// Revision 1.4 2001/07/13 16:13:33 adcockj
+// Added CVS tags and removed tabs
+//
+/////////////////////////////////////////////////////////////////////////////
+
+// This is the implementation of the Greedy 2-frame deinterlace algorithm described in
+// DI_Greedy2Frame.c. It's in a separate file so we can compile variants for different
+// CPU types; most of the code is the same in the different variants.
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Field 1 | Field 2 | Field 3 | Field 4 |
+// T0 | | T1 | |
+// | M0 | | M1 |
+// B0 | | B1 | |
+//
+
+
+// debugging feature
+// output the value of mm4 at this point which is pink where we will weave
+// and green were we are going to bob
+// uncomment next line to see this
+//#define CHECK_BOBWEAVE
+
+#if !defined(MASKS_DEFINED)
+#define MASKS_DEFINED
+ static const int64_t YMask = 0x00ff00ff00ff00ffll;
+ static const int64_t Mask = 0x7f7f7f7f7f7f7f7fll;
+ static const int64_t DwordOne = 0x0000000100000001ll;
+ static const int64_t DwordTwo = 0x0000000200000002ll;
+ static int64_t qwGreedyTwoFrameThreshold;
+#endif
+
+#if !defined(MANGLE)
+# if defined(__MINGW32__) || defined(__CYGWIN__) || \
+ defined(__OS2__) || (defined (__OpenBSD__) && !defined(__ELF__))
+# define MANGLE(a) "_" #a
+# else
+# define MANGLE(a) #a
+# endif
+#endif
+
+#if defined(IS_SSE)
+static void DeinterlaceGreedy2Frame_SSE(uint8_t *output, int outstride,
+ deinterlace_frame_data_t *data,
+ int bottom_field, int width, int height )
+#elif defined(IS_3DNOW)
+static void DeinterlaceGreedy2Frame_3DNOW(uint8_t *output, int outstride,
+ deinterlace_frame_data_t *data,
+ int bottom_field, int width, int height )
+#else
+static void DeinterlaceGreedy2Frame_MMX(uint8_t *output, int outstride,
+ deinterlace_frame_data_t *data,
+ int bottom_field, int width, int height )
+#endif
+{
+#ifdef ARCH_X86
+ int Line;
+ int stride = width * 2;
+ register uint8_t* M1;
+ register uint8_t* M0;
+ register uint8_t* T0;
+ register uint8_t* T1;
+ register uint8_t* B1;
+ register uint8_t* B0;
+ uint8_t* Dest = output;
+ register uint8_t* Dest2;
+ register int count;
+ uint32_t Pitch = stride*2;
+ uint32_t LineLength = stride;
+ uint32_t PitchRest = Pitch - (LineLength >> 3)*8;
+
+ qwGreedyTwoFrameThreshold = GreedyTwoFrameThreshold;
+ qwGreedyTwoFrameThreshold += (GreedyTwoFrameThreshold2 << 8);
+ qwGreedyTwoFrameThreshold += (qwGreedyTwoFrameThreshold << 48) +
+ (qwGreedyTwoFrameThreshold << 32) +
+ (qwGreedyTwoFrameThreshold << 16);
+
+
+ if( bottom_field ) {
+ M1 = data->f0 + stride;
+ T1 = data->f0;
+ B1 = T1 + Pitch;
+ M0 = data->f1 + stride;
+ T0 = data->f1;
+ B0 = T0 + Pitch;
+ } else {
+ M1 = data->f0 + Pitch;
+ T1 = data->f1 + stride;
+ B1 = T1 + Pitch;
+ M0 = data->f1 + Pitch;
+ T0 = data->f2 + stride;
+ B0 = T0 + Pitch;
+
+ xine_fast_memcpy(Dest, M1, LineLength);
+ Dest += outstride;
+ }
+
+ for (Line = 0; Line < (height / 2) - 1; ++Line)
+ {
+ // Always use the most recent data verbatim. By definition it's correct (it'd
+ // be shown on an interlaced display) and our job is to fill in the spaces
+ // between the new lines.
+ xine_fast_memcpy(Dest, T1, stride);
+ Dest += outstride;
+ Dest2 = Dest;
+
+ count = LineLength >> 3;
+ do {
+ asm volatile(
+ // Figure out what to do with the scanline above the one we just copied.
+ // See above for a description of the algorithm.
+
+ ".align 8 \n\t"
+ "movq "MANGLE(Mask)", %%mm6 \n\t"
+
+ "movq %0, %%mm1 \n\t" // T1
+ "movq %1, %%mm0 \n\t" // M1
+ "movq %2, %%mm3 \n\t" // B1
+ "movq %3, %%mm2 \n\t" // M0
+ : /* no output */
+ : "m" (*T1), "m" (*M1),
+ "m" (*B1), "m" (*M0) );
+
+
+ asm volatile(
+ // Figure out what to do with the scanline above the one we just copied.
+ // See above for a description of the algorithm.
+
+ // Average T1 and B1 so we can do interpolated bobbing if we bob onto T1.
+ "movq %%mm3, %%mm7 \n\t" // mm7 = B1
+
+#if defined(IS_SSE)
+ "pavgb %%mm1, %%mm7 \n\t"
+#elif defined(IS_3DNOW)
+ "pavgusb %%mm1, %%mm7 \n\t"
+#else
+
+ "movq %%mm1, %%mm5 \n\t" // mm5 = T1
+ "psrlw $1, %%mm7 \n\t" // mm7 = B1 / 2
+ "pand %%mm6, %%mm7 \n\t" // mask off lower bits
+ "psrlw $1, %%mm5 \n\t" // mm5 = T1 / 2
+ "pand %%mm6, %%mm5 \n\t" // mask off lower bits
+ "paddw %%mm5, %%mm7 \n\t" // mm7 = (T1 + B1) / 2
+#endif
+
+ // calculate |M1-M0| put result in mm4 need to keep mm0 intact
+ // if we have a good processor then make mm0 the average of M1 and M0
+ // which should make weave look better when there is small amounts of
+ // movement
+#if defined(IS_SSE)
+ "movq %%mm0, %%mm4 \n\t"
+ "movq %%mm2, %%mm5 \n\t"
+ "psubusb %%mm2, %%mm4 \n\t"
+ "psubusb %%mm0, %%mm5 \n\t"
+ "por %%mm5, %%mm4 \n\t"
+ "psrlw $1, %%mm4 \n\t"
+ "pavgb %%mm2, %%mm0 \n\t"
+ "pand %%mm6, %%mm4 \n\t"
+#elif defined(IS_3DNOW)
+ "movq %%mm0, %%mm4 \n\t"
+ "movq %%mm2, %%mm5 \n\t"
+ "psubusb %%mm2, %%mm4 \n\t"
+ "psubusb %%mm0, %%mm5 \n\t"
+ "por %%mm5, %%mm4 \n\t"
+ "psrlw $1, %%mm4 \n\t"
+ "pavgusb %%mm2, %%mm0 \n\t"
+ "pand %%mm6, %%mm4 \n\t"
+#else
+ "movq %%mm0, %%mm4 \n\t"
+ "psubusb %%mm2, %%mm4 \n\t"
+ "psubusb %%mm0, %%mm2 \n\t"
+ "por %%mm2, %%mm4 \n\t"
+ "psrlw $1, %%mm4 \n\t"
+ "pand %%mm6, %%mm4 \n\t"
+#endif
+
+ // if |M1-M0| > Threshold we want dword worth of twos
+ "pcmpgtb "MANGLE(qwGreedyTwoFrameThreshold)", %%mm4 \n\t"
+ "pand "MANGLE(Mask)", %%mm4 \n\t" // get rid of any sign bit
+ "pcmpgtd "MANGLE(DwordOne)", %%mm4 \n\t" // do we want to bob
+ "pandn "MANGLE(DwordTwo)", %%mm4 \n\t"
+
+ "movq %1, %%mm2 \n\t" // mm2 = T0
+
+ // calculate |T1-T0| put result in mm5
+ "movq %%mm2, %%mm5 \n\t"
+ "psubusb %%mm1, %%mm5 \n\t"
+ "psubusb %%mm2, %%mm1 \n\t"
+ "por %%mm1, %%mm5 \n\t"
+ "psrlw $1, %%mm5 \n\t"
+ "pand %%mm6, %%mm5 \n\t"
+
+ // if |T1-T0| > Threshold we want dword worth of ones
+ "pcmpgtb "MANGLE(qwGreedyTwoFrameThreshold)", %%mm5 \n\t"
+ "pand %%mm6, %%mm5 \n\t" // get rid of any sign bit
+
+ "pcmpgtd "MANGLE(DwordOne)", %%mm5 \n\t"
+ "pandn "MANGLE(DwordOne)", %%mm5 \n\t"
+ "paddd %%mm5, %%mm4 \n\t"
+
+ "movq %2, %%mm2 \n\t" // B0
+
+ // calculate |B1-B0| put result in mm5
+ "movq %%mm2, %%mm5 \n\t"
+ "psubusb %%mm3, %%mm5 \n\t"
+ "psubusb %%mm2, %%mm3 \n\t"
+ "por %%mm3, %%mm5 \n\t"
+ "psrlw $1, %%mm5 \n\t"
+ "pand %%mm6, %%mm5 \n\t"
+
+ // if |B1-B0| > Threshold we want dword worth of ones
+ "pcmpgtb "MANGLE(qwGreedyTwoFrameThreshold)", %%mm5 \n\t"
+ "pand %%mm6, %%mm5 \n\t" // get rid of any sign bit
+ "pcmpgtd "MANGLE(DwordOne)", %%mm5 \n\t"
+ "pandn "MANGLE(DwordOne)", %%mm5 \n\t"
+ "paddd %%mm5, %%mm4 \n\t"
+
+ "pcmpgtd "MANGLE(DwordTwo)", %%mm4 \n\t"
+
+// debugging feature
+// output the value of mm4 at this point which is pink where we will weave
+// and green were we are going to bob
+#ifdef CHECK_BOBWEAVE
+#ifdef IS_SSE
+ "movntq %%mm4, %0 \n\t"
+#else
+ "movq %%mm4, %0 \n\t"
+#endif
+#else
+
+ "movq %%mm4, %%mm5 \n\t"
+ // mm4 now is 1 where we want to weave and 0 where we want to bob
+ "pand %%mm0, %%mm4 \n\t"
+ "pandn %%mm7, %%mm5 \n\t"
+ "por %%mm5, %%mm4 \n\t"
+#ifdef IS_SSE
+ "movntq %%mm4, %0 \n\t"
+#else
+ "movq %%mm4, %0 \n\t"
+#endif
+#endif
+
+ : "=m" (*Dest2)
+ : "m" (*T0), "m" (*B0) );
+
+ // Advance to the next set of pixels.
+ T1 += 8;
+ M1 += 8;
+ B1 += 8;
+ M0 += 8;
+ T0 += 8;
+ B0 += 8;
+ Dest2 += 8;
+
+ } while( --count );
+
+ Dest += outstride;
+
+ M1 += PitchRest;
+ T1 += PitchRest;
+ B1 += PitchRest;
+ M0 += PitchRest;
+ T0 += PitchRest;
+ B0 += PitchRest;
+ }
+
+#ifdef IS_SSE
+ asm("sfence\n\t");
+#endif
+
+ if( bottom_field )
+ {
+ xine_fast_memcpy(Dest, T1, stride);
+ Dest += outstride;
+ xine_fast_memcpy(Dest, M1, stride);
+ }
+ else
+ {
+ xine_fast_memcpy(Dest, T1, stride);
+ }
+
+ // clear out the MMX registers ready for doing floating point
+ // again
+ asm("emms\n\t");
+#endif
+}
+