From f340de13a2004777ccd161fa19b5e57402ee4f1f Mon Sep 17 00:00:00 2001 From: Mike Melanson Date: Mon, 5 Jan 2004 01:47:26 +0000 Subject: DOS/Win CRs are forbidden, verboten, interdit CVS patchset: 6001 CVS date: 2004/01/05 01:47:26 --- .../deinterlace/plugins/greedy2frame_template.c | 659 +++++++++++---------- 1 file changed, 331 insertions(+), 328 deletions(-) (limited to 'src') diff --git a/src/post/deinterlace/plugins/greedy2frame_template.c b/src/post/deinterlace/plugins/greedy2frame_template.c index d3d3d7813..2380ee466 100644 --- a/src/post/deinterlace/plugins/greedy2frame_template.c +++ b/src/post/deinterlace/plugins/greedy2frame_template.c @@ -1,331 +1,334 @@ -///////////////////////////////////////////////////////////////////////////// -// $Id: greedy2frame_template.c,v 1.3 2004/01/02 20:53:43 miguelfreitas Exp $ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2000 John Adcock, Tom Barry, Steve Grimm All rights reserved. -// port copyright (c) 2003 Miguel Freitas -///////////////////////////////////////////////////////////////////////////// -// -// This file is subject to the terms of the GNU General Public License as -// published by the Free Software Foundation. A copy of this license is -// included with this software distribution in the file COPYING. If you -// do not have a copy, you may obtain a copy by writing to the Free -// Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. -// -// This software is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details -///////////////////////////////////////////////////////////////////////////// -// CVS Log -// +///////////////////////////////////////////////////////////////////////////// +// $Id: greedy2frame_template.c,v 1.4 2004/01/05 01:47:26 tmmm Exp $ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2000 John Adcock, Tom Barry, Steve Grimm All rights reserved. +// port copyright (c) 2003 Miguel Freitas +///////////////////////////////////////////////////////////////////////////// +// +// This file is subject to the terms of the GNU General Public License as +// published by the Free Software Foundation. A copy of this license is +// included with this software distribution in the file COPYING. If you +// do not have a copy, you may obtain a copy by writing to the Free +// Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. +// +// This software is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details +///////////////////////////////////////////////////////////////////////////// +// CVS Log +// // $Log: greedy2frame_template.c,v $ +// Revision 1.4 2004/01/05 01:47:26 tmmm +// DOS/Win CRs are forbidden, verboten, interdit +// // Revision 1.3 2004/01/02 20:53:43 miguelfreitas // better MANGLE from ffmpeg -// -// Revision 1.2 2004/01/02 20:47:03 miguelfreitas -// my small contribution to the cygwin port ;-) -// -// Revision 1.1 2003/06/22 17:30:03 miguelfreitas -// use our own port of greedy2frame (tvtime port is currently broken) -// -// Revision 1.8 2001/11/23 17:18:54 adcockj -// Fixed silly and/or confusion -// -// Revision 1.7 2001/11/22 22:27:00 adcockj -// Bug Fixes -// -// Revision 1.6 2001/11/21 15:21:40 adcockj -// Renamed DEINTERLACE_INFO to TDeinterlaceInfo in line with standards -// Changed TDeinterlaceInfo structure to have history of pictures. -// -// Revision 1.5 2001/07/31 06:48:33 adcockj -// Fixed index bug spotted by Peter Gubanov -// -// Revision 1.4 2001/07/13 16:13:33 adcockj -// Added CVS tags and removed tabs -// -///////////////////////////////////////////////////////////////////////////// - -// This is the implementation of the Greedy 2-frame deinterlace algorithm described in -// DI_Greedy2Frame.c. It's in a separate file so we can compile variants for different -// CPU types; most of the code is the same in the different variants. - - -/////////////////////////////////////////////////////////////////////////////// -// Field 1 | Field 2 | Field 3 | Field 4 | -// T0 | | T1 | | -// | M0 | | M1 | -// B0 | | B1 | | -// - - -// debugging feature -// output the value of mm4 at this point which is pink where we will weave -// and green were we are going to bob -// uncomment next line to see this -//#define CHECK_BOBWEAVE - -#if !defined(MASKS_DEFINED) -#define MASKS_DEFINED - static const int64_t YMask = 0x00ff00ff00ff00ffll; - static const int64_t Mask = 0x7f7f7f7f7f7f7f7fll; - static const int64_t DwordOne = 0x0000000100000001ll; - static const int64_t DwordTwo = 0x0000000200000002ll; - static int64_t qwGreedyTwoFrameThreshold; -#endif - -#if !defined(MANGLE) -# if defined(__MINGW32__) || defined(__CYGWIN__) || \ - defined(__OS2__) || (defined (__OpenBSD__) && !defined(__ELF__)) -# define MANGLE(a) "_" #a -# else -# define MANGLE(a) #a -# endif -#endif - -#if defined(IS_SSE) -static void DeinterlaceGreedy2Frame_SSE(uint8_t *output, int outstride, - deinterlace_frame_data_t *data, - int bottom_field, int width, int height ) -#elif defined(IS_3DNOW) -static void DeinterlaceGreedy2Frame_3DNOW(uint8_t *output, int outstride, - deinterlace_frame_data_t *data, - int bottom_field, int width, int height ) -#else -static void DeinterlaceGreedy2Frame_MMX(uint8_t *output, int outstride, - deinterlace_frame_data_t *data, - int bottom_field, int width, int height ) -#endif -{ -#ifdef ARCH_X86 - int Line; - int stride = width * 2; - register uint8_t* M1; - register uint8_t* M0; - register uint8_t* T0; - register uint8_t* T1; - register uint8_t* B1; - register uint8_t* B0; - uint8_t* Dest = output; - register uint8_t* Dest2; - register int count; - uint32_t Pitch = stride*2; - uint32_t LineLength = stride; - uint32_t PitchRest = Pitch - (LineLength >> 3)*8; - - qwGreedyTwoFrameThreshold = GreedyTwoFrameThreshold; - qwGreedyTwoFrameThreshold += (GreedyTwoFrameThreshold2 << 8); - qwGreedyTwoFrameThreshold += (qwGreedyTwoFrameThreshold << 48) + - (qwGreedyTwoFrameThreshold << 32) + - (qwGreedyTwoFrameThreshold << 16); - - - if( bottom_field ) { - M1 = data->f0 + stride; - T1 = data->f0; - B1 = T1 + Pitch; - M0 = data->f1 + stride; - T0 = data->f1; - B0 = T0 + Pitch; - } else { - M1 = data->f0 + Pitch; - T1 = data->f1 + stride; - B1 = T1 + Pitch; - M0 = data->f1 + Pitch; - T0 = data->f2 + stride; - B0 = T0 + Pitch; - - xine_fast_memcpy(Dest, M1, LineLength); - Dest += outstride; - } - - for (Line = 0; Line < (height / 2) - 1; ++Line) - { - // Always use the most recent data verbatim. By definition it's correct (it'd - // be shown on an interlaced display) and our job is to fill in the spaces - // between the new lines. - xine_fast_memcpy(Dest, T1, stride); - Dest += outstride; - Dest2 = Dest; - - count = LineLength >> 3; - do { - asm volatile( - // Figure out what to do with the scanline above the one we just copied. - // See above for a description of the algorithm. - - ".align 8 \n\t" - "movq "MANGLE(Mask)", %%mm6 \n\t" - - "movq %0, %%mm1 \n\t" // T1 - "movq %1, %%mm0 \n\t" // M1 - "movq %2, %%mm3 \n\t" // B1 - "movq %3, %%mm2 \n\t" // M0 - : /* no output */ - : "m" (*T1), "m" (*M1), - "m" (*B1), "m" (*M0) ); - - - asm volatile( - // Figure out what to do with the scanline above the one we just copied. - // See above for a description of the algorithm. - - // Average T1 and B1 so we can do interpolated bobbing if we bob onto T1. - "movq %%mm3, %%mm7 \n\t" // mm7 = B1 - -#if defined(IS_SSE) - "pavgb %%mm1, %%mm7 \n\t" -#elif defined(IS_3DNOW) - "pavgusb %%mm1, %%mm7 \n\t" -#else - - "movq %%mm1, %%mm5 \n\t" // mm5 = T1 - "psrlw $1, %%mm7 \n\t" // mm7 = B1 / 2 - "pand %%mm6, %%mm7 \n\t" // mask off lower bits - "psrlw $1, %%mm5 \n\t" // mm5 = T1 / 2 - "pand %%mm6, %%mm5 \n\t" // mask off lower bits - "paddw %%mm5, %%mm7 \n\t" // mm7 = (T1 + B1) / 2 -#endif - - // calculate |M1-M0| put result in mm4 need to keep mm0 intact - // if we have a good processor then make mm0 the average of M1 and M0 - // which should make weave look better when there is small amounts of - // movement -#if defined(IS_SSE) - "movq %%mm0, %%mm4 \n\t" - "movq %%mm2, %%mm5 \n\t" - "psubusb %%mm2, %%mm4 \n\t" - "psubusb %%mm0, %%mm5 \n\t" - "por %%mm5, %%mm4 \n\t" - "psrlw $1, %%mm4 \n\t" - "pavgb %%mm2, %%mm0 \n\t" - "pand %%mm6, %%mm4 \n\t" -#elif defined(IS_3DNOW) - "movq %%mm0, %%mm4 \n\t" - "movq %%mm2, %%mm5 \n\t" - "psubusb %%mm2, %%mm4 \n\t" - "psubusb %%mm0, %%mm5 \n\t" - "por %%mm5, %%mm4 \n\t" - "psrlw $1, %%mm4 \n\t" - "pavgusb %%mm2, %%mm0 \n\t" - "pand %%mm6, %%mm4 \n\t" -#else - "movq %%mm0, %%mm4 \n\t" - "psubusb %%mm2, %%mm4 \n\t" - "psubusb %%mm0, %%mm2 \n\t" - "por %%mm2, %%mm4 \n\t" - "psrlw $1, %%mm4 \n\t" - "pand %%mm6, %%mm4 \n\t" -#endif - - // if |M1-M0| > Threshold we want dword worth of twos - "pcmpgtb "MANGLE(qwGreedyTwoFrameThreshold)", %%mm4 \n\t" - "pand "MANGLE(Mask)", %%mm4 \n\t" // get rid of any sign bit - "pcmpgtd "MANGLE(DwordOne)", %%mm4 \n\t" // do we want to bob - "pandn "MANGLE(DwordTwo)", %%mm4 \n\t" - - "movq %1, %%mm2 \n\t" // mm2 = T0 - - // calculate |T1-T0| put result in mm5 - "movq %%mm2, %%mm5 \n\t" - "psubusb %%mm1, %%mm5 \n\t" - "psubusb %%mm2, %%mm1 \n\t" - "por %%mm1, %%mm5 \n\t" - "psrlw $1, %%mm5 \n\t" - "pand %%mm6, %%mm5 \n\t" - - // if |T1-T0| > Threshold we want dword worth of ones - "pcmpgtb "MANGLE(qwGreedyTwoFrameThreshold)", %%mm5 \n\t" - "pand %%mm6, %%mm5 \n\t" // get rid of any sign bit - - "pcmpgtd "MANGLE(DwordOne)", %%mm5 \n\t" - "pandn "MANGLE(DwordOne)", %%mm5 \n\t" - "paddd %%mm5, %%mm4 \n\t" - - "movq %2, %%mm2 \n\t" // B0 - - // calculate |B1-B0| put result in mm5 - "movq %%mm2, %%mm5 \n\t" - "psubusb %%mm3, %%mm5 \n\t" - "psubusb %%mm2, %%mm3 \n\t" - "por %%mm3, %%mm5 \n\t" - "psrlw $1, %%mm5 \n\t" - "pand %%mm6, %%mm5 \n\t" - - // if |B1-B0| > Threshold we want dword worth of ones - "pcmpgtb "MANGLE(qwGreedyTwoFrameThreshold)", %%mm5 \n\t" - "pand %%mm6, %%mm5 \n\t" // get rid of any sign bit - "pcmpgtd "MANGLE(DwordOne)", %%mm5 \n\t" - "pandn "MANGLE(DwordOne)", %%mm5 \n\t" - "paddd %%mm5, %%mm4 \n\t" - - "pcmpgtd "MANGLE(DwordTwo)", %%mm4 \n\t" - -// debugging feature -// output the value of mm4 at this point which is pink where we will weave -// and green were we are going to bob -#ifdef CHECK_BOBWEAVE -#ifdef IS_SSE - "movntq %%mm4, %0 \n\t" -#else - "movq %%mm4, %0 \n\t" -#endif -#else - - "movq %%mm4, %%mm5 \n\t" - // mm4 now is 1 where we want to weave and 0 where we want to bob - "pand %%mm0, %%mm4 \n\t" - "pandn %%mm7, %%mm5 \n\t" - "por %%mm5, %%mm4 \n\t" -#ifdef IS_SSE - "movntq %%mm4, %0 \n\t" -#else - "movq %%mm4, %0 \n\t" -#endif -#endif - - : "=m" (*Dest2) - : "m" (*T0), "m" (*B0) ); - - // Advance to the next set of pixels. - T1 += 8; - M1 += 8; - B1 += 8; - M0 += 8; - T0 += 8; - B0 += 8; - Dest2 += 8; - - } while( --count ); - - Dest += outstride; - - M1 += PitchRest; - T1 += PitchRest; - B1 += PitchRest; - M0 += PitchRest; - T0 += PitchRest; - B0 += PitchRest; - } - -#ifdef IS_SSE - asm("sfence\n\t"); -#endif - - if( bottom_field ) - { - xine_fast_memcpy(Dest, T1, stride); - Dest += outstride; - xine_fast_memcpy(Dest, M1, stride); - } - else - { - xine_fast_memcpy(Dest, T1, stride); - } - - // clear out the MMX registers ready for doing floating point - // again - asm("emms\n\t"); -#endif -} - +// +// Revision 1.2 2004/01/02 20:47:03 miguelfreitas +// my small contribution to the cygwin port ;-) +// +// Revision 1.1 2003/06/22 17:30:03 miguelfreitas +// use our own port of greedy2frame (tvtime port is currently broken) +// +// Revision 1.8 2001/11/23 17:18:54 adcockj +// Fixed silly and/or confusion +// +// Revision 1.7 2001/11/22 22:27:00 adcockj +// Bug Fixes +// +// Revision 1.6 2001/11/21 15:21:40 adcockj +// Renamed DEINTERLACE_INFO to TDeinterlaceInfo in line with standards +// Changed TDeinterlaceInfo structure to have history of pictures. +// +// Revision 1.5 2001/07/31 06:48:33 adcockj +// Fixed index bug spotted by Peter Gubanov +// +// Revision 1.4 2001/07/13 16:13:33 adcockj +// Added CVS tags and removed tabs +// +///////////////////////////////////////////////////////////////////////////// + +// This is the implementation of the Greedy 2-frame deinterlace algorithm described in +// DI_Greedy2Frame.c. It's in a separate file so we can compile variants for different +// CPU types; most of the code is the same in the different variants. + + +/////////////////////////////////////////////////////////////////////////////// +// Field 1 | Field 2 | Field 3 | Field 4 | +// T0 | | T1 | | +// | M0 | | M1 | +// B0 | | B1 | | +// + + +// debugging feature +// output the value of mm4 at this point which is pink where we will weave +// and green were we are going to bob +// uncomment next line to see this +//#define CHECK_BOBWEAVE + +#if !defined(MASKS_DEFINED) +#define MASKS_DEFINED + static const int64_t YMask = 0x00ff00ff00ff00ffll; + static const int64_t Mask = 0x7f7f7f7f7f7f7f7fll; + static const int64_t DwordOne = 0x0000000100000001ll; + static const int64_t DwordTwo = 0x0000000200000002ll; + static int64_t qwGreedyTwoFrameThreshold; +#endif + +#if !defined(MANGLE) +# if defined(__MINGW32__) || defined(__CYGWIN__) || \ + defined(__OS2__) || (defined (__OpenBSD__) && !defined(__ELF__)) +# define MANGLE(a) "_" #a +# else +# define MANGLE(a) #a +# endif +#endif + +#if defined(IS_SSE) +static void DeinterlaceGreedy2Frame_SSE(uint8_t *output, int outstride, + deinterlace_frame_data_t *data, + int bottom_field, int width, int height ) +#elif defined(IS_3DNOW) +static void DeinterlaceGreedy2Frame_3DNOW(uint8_t *output, int outstride, + deinterlace_frame_data_t *data, + int bottom_field, int width, int height ) +#else +static void DeinterlaceGreedy2Frame_MMX(uint8_t *output, int outstride, + deinterlace_frame_data_t *data, + int bottom_field, int width, int height ) +#endif +{ +#ifdef ARCH_X86 + int Line; + int stride = width * 2; + register uint8_t* M1; + register uint8_t* M0; + register uint8_t* T0; + register uint8_t* T1; + register uint8_t* B1; + register uint8_t* B0; + uint8_t* Dest = output; + register uint8_t* Dest2; + register int count; + uint32_t Pitch = stride*2; + uint32_t LineLength = stride; + uint32_t PitchRest = Pitch - (LineLength >> 3)*8; + + qwGreedyTwoFrameThreshold = GreedyTwoFrameThreshold; + qwGreedyTwoFrameThreshold += (GreedyTwoFrameThreshold2 << 8); + qwGreedyTwoFrameThreshold += (qwGreedyTwoFrameThreshold << 48) + + (qwGreedyTwoFrameThreshold << 32) + + (qwGreedyTwoFrameThreshold << 16); + + + if( bottom_field ) { + M1 = data->f0 + stride; + T1 = data->f0; + B1 = T1 + Pitch; + M0 = data->f1 + stride; + T0 = data->f1; + B0 = T0 + Pitch; + } else { + M1 = data->f0 + Pitch; + T1 = data->f1 + stride; + B1 = T1 + Pitch; + M0 = data->f1 + Pitch; + T0 = data->f2 + stride; + B0 = T0 + Pitch; + + xine_fast_memcpy(Dest, M1, LineLength); + Dest += outstride; + } + + for (Line = 0; Line < (height / 2) - 1; ++Line) + { + // Always use the most recent data verbatim. By definition it's correct (it'd + // be shown on an interlaced display) and our job is to fill in the spaces + // between the new lines. + xine_fast_memcpy(Dest, T1, stride); + Dest += outstride; + Dest2 = Dest; + + count = LineLength >> 3; + do { + asm volatile( + // Figure out what to do with the scanline above the one we just copied. + // See above for a description of the algorithm. + + ".align 8 \n\t" + "movq "MANGLE(Mask)", %%mm6 \n\t" + + "movq %0, %%mm1 \n\t" // T1 + "movq %1, %%mm0 \n\t" // M1 + "movq %2, %%mm3 \n\t" // B1 + "movq %3, %%mm2 \n\t" // M0 + : /* no output */ + : "m" (*T1), "m" (*M1), + "m" (*B1), "m" (*M0) ); + + + asm volatile( + // Figure out what to do with the scanline above the one we just copied. + // See above for a description of the algorithm. + + // Average T1 and B1 so we can do interpolated bobbing if we bob onto T1. + "movq %%mm3, %%mm7 \n\t" // mm7 = B1 + +#if defined(IS_SSE) + "pavgb %%mm1, %%mm7 \n\t" +#elif defined(IS_3DNOW) + "pavgusb %%mm1, %%mm7 \n\t" +#else + + "movq %%mm1, %%mm5 \n\t" // mm5 = T1 + "psrlw $1, %%mm7 \n\t" // mm7 = B1 / 2 + "pand %%mm6, %%mm7 \n\t" // mask off lower bits + "psrlw $1, %%mm5 \n\t" // mm5 = T1 / 2 + "pand %%mm6, %%mm5 \n\t" // mask off lower bits + "paddw %%mm5, %%mm7 \n\t" // mm7 = (T1 + B1) / 2 +#endif + + // calculate |M1-M0| put result in mm4 need to keep mm0 intact + // if we have a good processor then make mm0 the average of M1 and M0 + // which should make weave look better when there is small amounts of + // movement +#if defined(IS_SSE) + "movq %%mm0, %%mm4 \n\t" + "movq %%mm2, %%mm5 \n\t" + "psubusb %%mm2, %%mm4 \n\t" + "psubusb %%mm0, %%mm5 \n\t" + "por %%mm5, %%mm4 \n\t" + "psrlw $1, %%mm4 \n\t" + "pavgb %%mm2, %%mm0 \n\t" + "pand %%mm6, %%mm4 \n\t" +#elif defined(IS_3DNOW) + "movq %%mm0, %%mm4 \n\t" + "movq %%mm2, %%mm5 \n\t" + "psubusb %%mm2, %%mm4 \n\t" + "psubusb %%mm0, %%mm5 \n\t" + "por %%mm5, %%mm4 \n\t" + "psrlw $1, %%mm4 \n\t" + "pavgusb %%mm2, %%mm0 \n\t" + "pand %%mm6, %%mm4 \n\t" +#else + "movq %%mm0, %%mm4 \n\t" + "psubusb %%mm2, %%mm4 \n\t" + "psubusb %%mm0, %%mm2 \n\t" + "por %%mm2, %%mm4 \n\t" + "psrlw $1, %%mm4 \n\t" + "pand %%mm6, %%mm4 \n\t" +#endif + + // if |M1-M0| > Threshold we want dword worth of twos + "pcmpgtb "MANGLE(qwGreedyTwoFrameThreshold)", %%mm4 \n\t" + "pand "MANGLE(Mask)", %%mm4 \n\t" // get rid of any sign bit + "pcmpgtd "MANGLE(DwordOne)", %%mm4 \n\t" // do we want to bob + "pandn "MANGLE(DwordTwo)", %%mm4 \n\t" + + "movq %1, %%mm2 \n\t" // mm2 = T0 + + // calculate |T1-T0| put result in mm5 + "movq %%mm2, %%mm5 \n\t" + "psubusb %%mm1, %%mm5 \n\t" + "psubusb %%mm2, %%mm1 \n\t" + "por %%mm1, %%mm5 \n\t" + "psrlw $1, %%mm5 \n\t" + "pand %%mm6, %%mm5 \n\t" + + // if |T1-T0| > Threshold we want dword worth of ones + "pcmpgtb "MANGLE(qwGreedyTwoFrameThreshold)", %%mm5 \n\t" + "pand %%mm6, %%mm5 \n\t" // get rid of any sign bit + + "pcmpgtd "MANGLE(DwordOne)", %%mm5 \n\t" + "pandn "MANGLE(DwordOne)", %%mm5 \n\t" + "paddd %%mm5, %%mm4 \n\t" + + "movq %2, %%mm2 \n\t" // B0 + + // calculate |B1-B0| put result in mm5 + "movq %%mm2, %%mm5 \n\t" + "psubusb %%mm3, %%mm5 \n\t" + "psubusb %%mm2, %%mm3 \n\t" + "por %%mm3, %%mm5 \n\t" + "psrlw $1, %%mm5 \n\t" + "pand %%mm6, %%mm5 \n\t" + + // if |B1-B0| > Threshold we want dword worth of ones + "pcmpgtb "MANGLE(qwGreedyTwoFrameThreshold)", %%mm5 \n\t" + "pand %%mm6, %%mm5 \n\t" // get rid of any sign bit + "pcmpgtd "MANGLE(DwordOne)", %%mm5 \n\t" + "pandn "MANGLE(DwordOne)", %%mm5 \n\t" + "paddd %%mm5, %%mm4 \n\t" + + "pcmpgtd "MANGLE(DwordTwo)", %%mm4 \n\t" + +// debugging feature +// output the value of mm4 at this point which is pink where we will weave +// and green were we are going to bob +#ifdef CHECK_BOBWEAVE +#ifdef IS_SSE + "movntq %%mm4, %0 \n\t" +#else + "movq %%mm4, %0 \n\t" +#endif +#else + + "movq %%mm4, %%mm5 \n\t" + // mm4 now is 1 where we want to weave and 0 where we want to bob + "pand %%mm0, %%mm4 \n\t" + "pandn %%mm7, %%mm5 \n\t" + "por %%mm5, %%mm4 \n\t" +#ifdef IS_SSE + "movntq %%mm4, %0 \n\t" +#else + "movq %%mm4, %0 \n\t" +#endif +#endif + + : "=m" (*Dest2) + : "m" (*T0), "m" (*B0) ); + + // Advance to the next set of pixels. + T1 += 8; + M1 += 8; + B1 += 8; + M0 += 8; + T0 += 8; + B0 += 8; + Dest2 += 8; + + } while( --count ); + + Dest += outstride; + + M1 += PitchRest; + T1 += PitchRest; + B1 += PitchRest; + M0 += PitchRest; + T0 += PitchRest; + B0 += PitchRest; + } + +#ifdef IS_SSE + asm("sfence\n\t"); +#endif + + if( bottom_field ) + { + xine_fast_memcpy(Dest, T1, stride); + Dest += outstride; + xine_fast_memcpy(Dest, M1, stride); + } + else + { + xine_fast_memcpy(Dest, T1, stride); + } + + // clear out the MMX registers ready for doing floating point + // again + asm("emms\n\t"); +#endif +} + -- cgit v1.2.3