/** * Linear blend deinterlacing plugin. The idea for this algorithm came * from the linear blend deinterlacer which originated in the mplayer * sources. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #if HAVE_INTTYPES_H #include #else #include #endif #include #include #include "xine_mmx.h" #include "speedtools.h" #include "speedy.h" #include "deinterlace.h" #include "plugins.h" static const char linearblendmethod_help[] = "Avoids flicker by blurring consecutive frames of input. Use this if " "you want to run your monitor at an arbitrary refresh rate and not use " "much CPU, and are willing to sacrifice detail.\n" "\n" "Temporal mode evenly blurs content for least flicker, but with visible " "trails on fast motion. From the linear blend deinterlacer in mplayer."; static void deinterlace_scanline_linear_blend( uint8_t *output, deinterlace_scanline_data_t *data, int width ) { uint8_t *t0 = data->t0; uint8_t *b0 = data->b0; uint8_t *m1 = data->m1; #if defined(ARCH_X86) || defined(ARCH_X86_64) int i; // Get width in bytes. width *= 2; i = width / 8; width -= i * 8; pxor_r2r( mm7, mm7 ); while( i-- ) { movd_m2r( *t0, mm0 ); movd_m2r( *b0, mm1 ); movd_m2r( *m1, mm2 ); movd_m2r( *(t0+4), mm3 ); movd_m2r( *(b0+4), mm4 ); movd_m2r( *(m1+4), mm5 ); punpcklbw_r2r( mm7, mm0 ); punpcklbw_r2r( mm7, mm1 ); punpcklbw_r2r( mm7, mm2 ); punpcklbw_r2r( mm7, mm3 ); punpcklbw_r2r( mm7, mm4 ); punpcklbw_r2r( mm7, mm5 ); psllw_i2r( 1, mm2 ); psllw_i2r( 1, mm5 ); paddw_r2r( mm0, mm2 ); paddw_r2r( mm3, mm5 ); paddw_r2r( mm1, mm2 ); paddw_r2r( mm4, mm5 ); psrlw_i2r( 2, mm2 ); psrlw_i2r( 2, mm5 ); packuswb_r2r( mm2, mm2 ); packuswb_r2r( mm5, mm5 ); movd_r2m( mm2, *output ); movd_r2m( mm5, *(output+4) ); output += 8; t0 += 8; b0 += 8; m1 += 8; } while( width-- ) { *output++ = (*t0++ + *b0++ + (*m1++ << 1)) >> 2; } emms(); #else width *= 2; while( width-- ) { *output++ = (*t0++ + *b0++ + (*m1++ << 1)) >> 2; } #endif } static void deinterlace_scanline_linear_blend2( uint8_t *output, deinterlace_scanline_data_t *data, int width ) { uint8_t *m0 = data->m0; uint8_t *t1 = data->t1; uint8_t *b1 = data->b1; #if defined(ARCH_X86) || defined(ARCH_X86_64) int i; // Get width in bytes. width *= 2; i = width / 8; width -= i * 8; pxor_r2r( mm7, mm7 ); while( i-- ) { movd_m2r( *t1, mm0 ); movd_m2r( *b1, mm1 ); movd_m2r( *m0, mm2 ); movd_m2r( *(t1+4), mm3 ); movd_m2r( *(b1+4), mm4 ); movd_m2r( *(m0+4), mm5 ); punpcklbw_r2r( mm7, mm0 ); punpcklbw_r2r( mm7, mm1 ); punpcklbw_r2r( mm7, mm2 ); punpcklbw_r2r( mm7, mm3 ); punpcklbw_r2r( mm7, mm4 ); punpcklbw_r2r( mm7, mm5 ); psllw_i2r( 1, mm2 ); psllw_i2r( 1, mm5 ); paddw_r2r( mm0, mm2 ); paddw_r2r( mm3, mm5 ); paddw_r2r( mm1, mm2 ); paddw_r2r( mm4, mm5 ); psrlw_i2r( 2, mm2 ); psrlw_i2r( 2, mm5 ); packuswb_r2r( mm2, mm2 ); packuswb_r2r( mm5, mm5 ); movd_r2m( mm2, *output ); movd_r2m( mm5, *(output+4) ); output += 8; t1 += 8; b1 += 8; m0 += 8; } while( width-- ) { *output++ = (*t1++ + *b1++ + (*m0++ << 1)) >> 2; } emms(); #else width *= 2; while( width-- ) { *output++ = (*t1++ + *b1++ + (*m0++ << 1)) >> 2; } #endif } #if defined(ARCH_X86) || defined(ARCH_X86_64) /* MMXEXT version is about 15% faster with Athlon XP [MF] */ static void deinterlace_scanline_linear_blend_mmxext( uint8_t *output, deinterlace_scanline_data_t *data, int width ) { uint8_t *t0 = data->t0; uint8_t *b0 = data->b0; uint8_t *m1 = data->m1; int i; static mmx_t high_mask = {ub:{0xff,0xff,0xff,0xff,0,0,0,0}}; READ_PREFETCH_2048( t0 ); READ_PREFETCH_2048( b0 ); READ_PREFETCH_2048( m1 ); // Get width in bytes. width *= 2; i = width / 8; width -= i * 8; movd_m2r( high_mask, mm6 ); pxor_r2r( mm7, mm7 ); while( i-- ) { movd_m2r( *t0, mm0 ); movd_m2r( *b0, mm1 ); movd_m2r( *m1, mm2 ); movd_m2r( *(t0+4), mm3 ); movd_m2r( *(b0+4), mm4 ); movd_m2r( *(m1+4), mm5 ); punpcklbw_r2r( mm7, mm0 ); punpcklbw_r2r( mm7, mm1 ); punpcklbw_r2r( mm7, mm2 ); punpcklbw_r2r( mm7, mm3 ); punpcklbw_r2r( mm7, mm4 ); punpcklbw_r2r( mm7, mm5 ); psllw_i2r( 1, mm2 ); psllw_i2r( 1, mm5 ); paddw_r2r( mm0, mm2 ); paddw_r2r( mm3, mm5 ); paddw_r2r( mm1, mm2 ); paddw_r2r( mm4, mm5 ); psrlw_i2r( 2, mm2 ); psrlw_i2r( 2, mm5 ); packuswb_r2r( mm2, mm2 ); packuswb_r2r( mm5, mm5 ); psllq_i2r( 32, mm5 ); pand_r2r( mm6, mm2 ); por_r2r( mm2, mm5 ); movntq_r2m( mm5, *output ); output += 8; t0 += 8; b0 += 8; m1 += 8; } while( width-- ) { *output++ = (*t0++ + *b0++ + (*m1++ << 1)) >> 2; } sfence(); emms(); } static void deinterlace_scanline_linear_blend2_mmxext( uint8_t *output, deinterlace_scanline_data_t *data, int width ) { uint8_t *m0 = data->m0; uint8_t *t1 = data->t1; uint8_t *b1 = data->b1; int i; READ_PREFETCH_2048( t1 ); READ_PREFETCH_2048( b1 ); READ_PREFETCH_2048( m0 ); // Get width in bytes. width *= 2; i = width / 8; width -= i * 8; pxor_r2r( mm7, mm7 ); while( i-- ) { movd_m2r( *t1, mm0 ); movd_m2r( *b1, mm1 ); movd_m2r( *m0, mm2 ); movd_m2r( *(t1+4), mm3 ); movd_m2r( *(b1+4), mm4 ); movd_m2r( *(m0+4), mm5 ); punpcklbw_r2r( mm7, mm0 ); punpcklbw_r2r( mm7, mm1 ); punpcklbw_r2r( mm7, mm2 ); punpcklbw_r2r( mm7, mm3 ); punpcklbw_r2r( mm7, mm4 ); punpcklbw_r2r( mm7, mm5 ); psllw_i2r( 1, mm2 ); psllw_i2r( 1, mm5 ); paddw_r2r( mm0, mm2 ); paddw_r2r( mm3, mm5 ); paddw_r2r( mm1, mm2 ); paddw_r2r( mm4, mm5 ); psrlw_i2r( 2, mm2 ); psrlw_i2r( 2, mm5 ); packuswb_r2r( mm2, mm2 ); packuswb_r2r( mm5, mm5 ); psllq_i2r( 32, mm5 ); pand_r2r( mm6, mm2 ); por_r2r( mm2, mm5 ); movntq_r2m( mm5, *output ); output += 8; t1 += 8; b1 += 8; m0 += 8; } while( width-- ) { *output++ = (*t1++ + *b1++ + (*m0++ << 1)) >> 2; } sfence(); emms(); } static deinterlace_method_t linearblendmethod_mmxext = { "Linear Blend (mplayer)", "LinearBlend", 2, MM_ACCEL_X86_MMXEXT, 0, 1, deinterlace_scanline_linear_blend_mmxext, deinterlace_scanline_linear_blend2_mmxext, 0, 0, linearblendmethod_help }; #endif static deinterlace_method_t linearblendmethod = { "Linear Blend (mplayer)", "LinearBlend", /* "Blur: Temporal", "BlurTemporal", */ 2, #if defined(ARCH_X86) || defined(ARCH_X86_64) MM_ACCEL_X86_MMX, #else 0, #endif 0, 1, deinterlace_scanline_linear_blend, deinterlace_scanline_linear_blend2, 0, 0, linearblendmethod_help }; deinterlace_method_t *linearblend_get_method( void ) { #if defined(ARCH_X86) || defined(ARCH_X86_64) if( xine_mm_accel() & MM_ACCEL_X86_MMXEXT ) return &linearblendmethod_mmxext; else #endif return &linearblendmethod; }