/** * Copyright (c) 2000 Tom Barry All rights reserved. * mmx.h port copyright (c) 2002 Billy Biggs . * * This code is ported from DScaler: http://deinterlace.sf.net/ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #if HAVE_INTTYPES_H #include #else #include #endif #include "attributes.h" #include "xineutils.h" #include "deinterlace.h" #include "speedtools.h" #include "speedy.h" #include "plugins.h" // This is a simple lightweight DeInterlace method that uses little CPU time // but gives very good results for low or intermedite motion. // It defers frames by one field, but that does not seem to produce noticeable // lip sync problems. // // The method used is to take either the older or newer weave pixel depending // upon which give the smaller comb factor, and then clip to avoid large damage // when wrong. // // I'd intended this to be part of a larger more elaborate method added to // Blended Clip but this give too good results for the CPU to ignore here. static void copy_scanline( uint8_t *output, deinterlace_scanline_data_t *data, int width ) { blit_packed422_scanline( output, data->m1, width ); } static int GreedyMaxComb = 15; static void deinterlace_greedy_packed422_scanline_mmxext( uint8_t *output, deinterlace_scanline_data_t *data, int width ) { #ifdef ARCH_X86 mmx_t MaxComb; uint8_t *m0 = data->m0; uint8_t *t1 = data->t1; uint8_t *b1 = data->b1; uint8_t *m2 = data->m2; // How badly do we let it weave? 0-255 MaxComb.ub[ 0 ] = GreedyMaxComb; MaxComb.ub[ 1 ] = GreedyMaxComb; MaxComb.ub[ 2 ] = GreedyMaxComb; MaxComb.ub[ 3 ] = GreedyMaxComb; MaxComb.ub[ 4 ] = GreedyMaxComb; MaxComb.ub[ 5 ] = GreedyMaxComb; MaxComb.ub[ 6 ] = GreedyMaxComb; MaxComb.ub[ 7 ] = GreedyMaxComb; // L2 == m0 // L1 == t1 // L3 == b1 // LP2 == m2 width /= 4; while( width-- ) { movq_m2r( *t1, mm1 ); // L1 movq_m2r( *m0, mm2 ); // L2 movq_m2r( *b1, mm3 ); // L3 movq_m2r( *m2, mm0 ); // LP2 // average L1 and L3 leave result in mm4 movq_r2r( mm1, mm4 ); // L1 pavgb_r2r( mm3, mm4 ); // (L1 + L3)/2 // get abs value of possible L2 comb movq_r2r( mm2, mm7 ); // L2 psubusb_r2r( mm4, mm7 ); // L2 - avg movq_r2r( mm4, mm5 ); // avg psubusb_r2r( mm2, mm5 ); // avg - L2 por_r2r( mm7, mm5 ); // abs(avg-L2) movq_r2r( mm4, mm6 ); // copy of avg for later // get abs value of possible LP2 comb movq_r2r( mm0, mm7 ); // LP2 psubusb_r2r( mm4, mm7 ); // LP2 - avg psubusb_r2r( mm0, mm4 ); // avg - LP2 por_r2r( mm7, mm4 ); // abs(avg-LP2) // use L2 or LP2 depending upon which makes smaller comb psubusb_r2r( mm5, mm4 ); // see if it goes to zero psubusb_r2r( mm5, mm5 ); // 0 pcmpeqb_r2r( mm5, mm4 ); // if (mm4=0) then FF else 0 pcmpeqb_r2r( mm4, mm5 ); // opposite of mm4 // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55 pand_r2r( mm2, mm5 ); // use L2 if mm5 == ff, else 0 pand_r2r( mm0, mm4 ); // use LP2 if mm4 = ff, else 0 por_r2r( mm5, mm4 ); // may the best win // Now lets clip our chosen value to be not outside of the range // of the high/low range L1-L3 by more than abs(L1-L3) // This allows some comb but limits the damages and also allows more // detail than a boring oversmoothed clip. movq_r2r( mm1, mm2 ); // copy L1 psubusb_r2r( mm3, mm2 ); // - L3, with saturation paddusb_r2r( mm3, mm2 ); // now = Max(L1,L3) pcmpeqb_r2r( mm7, mm7 ); // all ffffffff psubusb_r2r( mm1, mm7 ); // - L1 paddusb_r2r( mm7, mm3 ); // add, may sat at fff.. psubusb_r2r( mm7, mm3 ); // now = Min(L1,L3) // allow the value to be above the high or below the low by amt of MaxComb paddusb_m2r( MaxComb, mm2 ); // increase max by diff psubusb_m2r( MaxComb, mm3 ); // lower min by diff psubusb_r2r( mm3, mm4 ); // best - Min paddusb_r2r( mm3, mm4 ); // now = Max(best,Min(L1,L3) pcmpeqb_r2r( mm7, mm7 ); // all ffffffff psubusb_r2r( mm4, mm7 ); // - Max(best,Min(best,L3) paddusb_r2r( mm7, mm2 ); // add may sat at FFF.. psubusb_r2r( mm7, mm2 ); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped movntq_r2m( mm2, *output ); // move in our clipped best // Advance to the next set of pixels. output += 8; m0 += 8; t1 += 8; b1 += 8; m2 += 8; } sfence(); emms(); #endif } static deinterlace_setting_t settings[] = { { "Greedy Max Comb", SETTING_SLIDER, &GreedyMaxComb, 15, 0, 255, 1, 0 } }; static deinterlace_method_t greedymethod = { DEINTERLACE_PLUGIN_API_VERSION, "DScaler: Greedy - Low motion", "Greedy", 3, MM_ACCEL_X86_MMXEXT, 0, 1, settings, 1, copy_scanline, deinterlace_greedy_packed422_scanline_mmxext, 0 }; #ifdef BUILD_TVTIME_PLUGINS void deinterlace_plugin_init( void ) #else void greedy_plugin_init( void ) #endif { register_deinterlace_method( &greedymethod ); }