diff options
Diffstat (limited to 'src/post/deinterlace/plugins/greedy.c')
-rw-r--r-- | src/post/deinterlace/plugins/greedy.c | 195 |
1 files changed, 195 insertions, 0 deletions
diff --git a/src/post/deinterlace/plugins/greedy.c b/src/post/deinterlace/plugins/greedy.c new file mode 100644 index 000000000..fb7eb454c --- /dev/null +++ b/src/post/deinterlace/plugins/greedy.c @@ -0,0 +1,195 @@ +/** + * Copyright (c) 2000 Tom Barry All rights reserved. + * mmx.h port copyright (c) 2002 Billy Biggs <vektor@dumbterm.net>. + * + * This code is ported from DScaler: http://deinterlace.sf.net/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <stdio.h> +#include <stdint.h> + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "attributes.h" +#include "xineutils.h" +#include "deinterlace.h" +#include "speedtools.h" +#include "speedy.h" + +// This is a simple lightweight DeInterlace method that uses little CPU time +// but gives very good results for low or intermedite motion. +// It defers frames by one field, but that does not seem to produce noticeable +// lip sync problems. +// +// The method used is to take either the older or newer weave pixel depending +// upon which give the smaller comb factor, and then clip to avoid large damage +// when wrong. +// +// I'd intended this to be part of a larger more elaborate method added to +// Blended Clip but this give too good results for the CPU to ignore here. + +static void copy_scanline( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ + blit_packed422_scanline( output, data->m1, width ); +} + +static int GreedyMaxComb = 15; + +static void deinterlace_greedy_packed422_scanline_mmxext( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ +#ifdef ARCH_X86 + mmx_t MaxComb; + uint8_t *m0 = data->m0; + uint8_t *t1 = data->t1; + uint8_t *b1 = data->b1; + uint8_t *m2 = data->m2; + + // How badly do we let it weave? 0-255 + MaxComb.ub[ 0 ] = GreedyMaxComb; + MaxComb.ub[ 1 ] = GreedyMaxComb; + MaxComb.ub[ 2 ] = GreedyMaxComb; + MaxComb.ub[ 3 ] = GreedyMaxComb; + MaxComb.ub[ 4 ] = GreedyMaxComb; + MaxComb.ub[ 5 ] = GreedyMaxComb; + MaxComb.ub[ 6 ] = GreedyMaxComb; + MaxComb.ub[ 7 ] = GreedyMaxComb; + + // L2 == m0 + // L1 == t1 + // L3 == b1 + // LP2 == m2 + + width /= 4; + while( width-- ) { + movq_m2r( *t1, mm1 ); // L1 + movq_m2r( *m0, mm2 ); // L2 + movq_m2r( *b1, mm3 ); // L3 + movq_m2r( *m2, mm0 ); // LP2 + + // average L1 and L3 leave result in mm4 + movq_r2r( mm1, mm4 ); // L1 + pavgb_r2r( mm3, mm4 ); // (L1 + L3)/2 + + + // get abs value of possible L2 comb + movq_r2r( mm2, mm7 ); // L2 + psubusb_r2r( mm4, mm7 ); // L2 - avg + movq_r2r( mm4, mm5 ); // avg + psubusb_r2r( mm2, mm5 ); // avg - L2 + por_r2r( mm7, mm5 ); // abs(avg-L2) + movq_r2r( mm4, mm6 ); // copy of avg for later + + + // get abs value of possible LP2 comb + movq_r2r( mm0, mm7 ); // LP2 + psubusb_r2r( mm4, mm7 ); // LP2 - avg + psubusb_r2r( mm0, mm4 ); // avg - LP2 + por_r2r( mm7, mm4 ); // abs(avg-LP2) + + // use L2 or LP2 depending upon which makes smaller comb + psubusb_r2r( mm5, mm4 ); // see if it goes to zero + psubusb_r2r( mm5, mm5 ); // 0 + pcmpeqb_r2r( mm5, mm4 ); // if (mm4=0) then FF else 0 + pcmpeqb_r2r( mm4, mm5 ); // opposite of mm4 + + // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55 + pand_r2r( mm2, mm5 ); // use L2 if mm5 == ff, else 0 + pand_r2r( mm0, mm4 ); // use LP2 if mm4 = ff, else 0 + por_r2r( mm5, mm4 ); // may the best win + + // Now lets clip our chosen value to be not outside of the range + // of the high/low range L1-L3 by more than abs(L1-L3) + // This allows some comb but limits the damages and also allows more + // detail than a boring oversmoothed clip. + + movq_r2r( mm1, mm2 ); // copy L1 + psubusb_r2r( mm3, mm2 ); // - L3, with saturation + paddusb_r2r( mm3, mm2 ); // now = Max(L1,L3) + + pcmpeqb_r2r( mm7, mm7 ); // all ffffffff + psubusb_r2r( mm1, mm7 ); // - L1 + paddusb_r2r( mm7, mm3 ); // add, may sat at fff.. + psubusb_r2r( mm7, mm3 ); // now = Min(L1,L3) + + // allow the value to be above the high or below the low by amt of MaxComb + paddusb_m2r( MaxComb, mm2 ); // increase max by diff + psubusb_m2r( MaxComb, mm3 ); // lower min by diff + + psubusb_r2r( mm3, mm4 ); // best - Min + paddusb_r2r( mm3, mm4 ); // now = Max(best,Min(L1,L3) + + pcmpeqb_r2r( mm7, mm7 ); // all ffffffff + psubusb_r2r( mm4, mm7 ); // - Max(best,Min(best,L3) + paddusb_r2r( mm7, mm2 ); // add may sat at FFF.. + psubusb_r2r( mm7, mm2 ); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped + + movntq_r2m( mm2, *output ); // move in our clipped best + + // Advance to the next set of pixels. + output += 8; + m0 += 8; + t1 += 8; + b1 += 8; + m2 += 8; + } + sfence(); + emms(); +#endif +} + +static deinterlace_setting_t settings[] = +{ + { + "Greedy Max Comb", + SETTING_SLIDER, + &GreedyMaxComb, + 15, 0, 255, 1, + 0 + } +}; + +static deinterlace_method_t greedymethod = +{ + DEINTERLACE_PLUGIN_API_VERSION, + "DScaler: Greedy - Low motion", + "Greedy", + 3, + MM_ACCEL_X86_MMXEXT, + 0, + 1, + settings, + 1, + copy_scanline, + deinterlace_greedy_packed422_scanline_mmxext, + 0 +}; + +#ifdef BUILD_TVTIME_PLUGINS +void deinterlace_plugin_init( void ) +#else +void greedy_plugin_init( void ) +#endif +{ + register_deinterlace_method( &greedymethod ); +} + |