From 0e18f82b79ac54f306aa1ee31170bd3099b5c17a Mon Sep 17 00:00:00 2001 From: Miguel Freitas Date: Mon, 8 Dec 2003 17:06:53 +0000 Subject: sfence is not a valid MMX instruction. provide a new MMXEXT version. about 15% faster here. CVS patchset: 5875 CVS date: 2003/12/08 17:06:53 --- src/post/deinterlace/plugins/linearblend.c | 155 ++++++++++++++++++++++++++++- 1 file changed, 152 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/post/deinterlace/plugins/linearblend.c b/src/post/deinterlace/plugins/linearblend.c index ef7ab52bc..977efe2b3 100644 --- a/src/post/deinterlace/plugins/linearblend.c +++ b/src/post/deinterlace/plugins/linearblend.c @@ -86,6 +86,7 @@ static void deinterlace_scanline_linear_blend( uint8_t *output, movd_r2m( mm2, *output ); movd_r2m( mm5, *(output+4) ); + output += 8; t0 += 8; b0 += 8; @@ -94,7 +95,6 @@ static void deinterlace_scanline_linear_blend( uint8_t *output, while( width-- ) { *output++ = (*t0++ + *b0++ + (*m1++ << 1)) >> 2; } - sfence(); emms(); #else width *= 2; @@ -151,6 +151,7 @@ static void deinterlace_scanline_linear_blend2( uint8_t *output, movd_r2m( mm2, *output ); movd_r2m( mm5, *(output+4) ); + output += 8; t1 += 8; b1 += 8; @@ -159,7 +160,6 @@ static void deinterlace_scanline_linear_blend2( uint8_t *output, while( width-- ) { *output++ = (*t1++ + *b1++ + (*m0++ << 1)) >> 2; } - sfence(); emms(); #else width *= 2; @@ -169,6 +169,150 @@ static void deinterlace_scanline_linear_blend2( uint8_t *output, #endif } +#ifdef ARCH_X86 + +/* MMXEXT version is about 15% faster with Athlon XP [MF] */ + +static void deinterlace_scanline_linear_blend_mmxext( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ + uint8_t *t0 = data->t0; + uint8_t *b0 = data->b0; + uint8_t *m1 = data->m1; + int i; + static mmx_t high_mask = {ub:{0xff,0xff,0xff,0xff,0,0,0,0}}; + + // Get width in bytes. + width *= 2; + i = width / 8; + width -= i * 8; + + movd_m2r( high_mask, mm6 ); + pxor_r2r( mm7, mm7 ); + while( i-- ) { + movd_m2r( *t0, mm0 ); + movd_m2r( *b0, mm1 ); + movd_m2r( *m1, mm2 ); + + movd_m2r( *(t0+4), mm3 ); + movd_m2r( *(b0+4), mm4 ); + movd_m2r( *(m1+4), mm5 ); + + punpcklbw_r2r( mm7, mm0 ); + punpcklbw_r2r( mm7, mm1 ); + punpcklbw_r2r( mm7, mm2 ); + + punpcklbw_r2r( mm7, mm3 ); + punpcklbw_r2r( mm7, mm4 ); + punpcklbw_r2r( mm7, mm5 ); + + psllw_i2r( 1, mm2 ); + psllw_i2r( 1, mm5 ); + paddw_r2r( mm0, mm2 ); + paddw_r2r( mm3, mm5 ); + paddw_r2r( mm1, mm2 ); + paddw_r2r( mm4, mm5 ); + psrlw_i2r( 2, mm2 ); + psrlw_i2r( 2, mm5 ); + packuswb_r2r( mm2, mm2 ); + packuswb_r2r( mm5, mm5 ); + + psllq_i2r( 32, mm5 ); + pand_r2r( mm6, mm2 ); + por_r2r( mm2, mm5 ); + movntq_r2m( mm5, *output ); + + output += 8; + t0 += 8; + b0 += 8; + m1 += 8; + } + while( width-- ) { + *output++ = (*t0++ + *b0++ + (*m1++ << 1)) >> 2; + } + sfence(); + emms(); +} + +static void deinterlace_scanline_linear_blend2_mmxext( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ + uint8_t *m0 = data->m0; + uint8_t *t1 = data->t1; + uint8_t *b1 = data->b1; + + int i; + + // Get width in bytes. + width *= 2; + i = width / 8; + width -= i * 8; + + pxor_r2r( mm7, mm7 ); + while( i-- ) { + movd_m2r( *t1, mm0 ); + movd_m2r( *b1, mm1 ); + movd_m2r( *m0, mm2 ); + + movd_m2r( *(t1+4), mm3 ); + movd_m2r( *(b1+4), mm4 ); + movd_m2r( *(m0+4), mm5 ); + + punpcklbw_r2r( mm7, mm0 ); + punpcklbw_r2r( mm7, mm1 ); + punpcklbw_r2r( mm7, mm2 ); + + punpcklbw_r2r( mm7, mm3 ); + punpcklbw_r2r( mm7, mm4 ); + punpcklbw_r2r( mm7, mm5 ); + + psllw_i2r( 1, mm2 ); + psllw_i2r( 1, mm5 ); + paddw_r2r( mm0, mm2 ); + paddw_r2r( mm3, mm5 ); + paddw_r2r( mm1, mm2 ); + paddw_r2r( mm4, mm5 ); + psrlw_i2r( 2, mm2 ); + psrlw_i2r( 2, mm5 ); + packuswb_r2r( mm2, mm2 ); + packuswb_r2r( mm5, mm5 ); + + psllq_i2r( 32, mm5 ); + pand_r2r( mm6, mm2 ); + por_r2r( mm2, mm5 ); + movntq_r2m( mm5, *output ); + + output += 8; + t1 += 8; + b1 += 8; + m0 += 8; + } + while( width-- ) { + *output++ = (*t1++ + *b1++ + (*m0++ << 1)) >> 2; + } + sfence(); + emms(); +} + +static deinterlace_method_t linearblendmethod_mmxext = +{ + DEINTERLACE_PLUGIN_API_VERSION, + "mplayer: Linear Blend", + "LinearBlend", + 2, + MM_ACCEL_X86_MMXEXT, + 0, + 0, + 0, + 1, + deinterlace_scanline_linear_blend_mmxext, + deinterlace_scanline_linear_blend2_mmxext, + 0 +}; + +#endif static deinterlace_method_t linearblendmethod = { @@ -196,5 +340,10 @@ void deinterlace_plugin_init( void ) void linearblend_plugin_init( void ) #endif { - register_deinterlace_method( &linearblendmethod ); +#ifdef ARCH_X86 + if( xine_mm_accel() & MM_ACCEL_X86_MMXEXT ) + register_deinterlace_method( &linearblendmethod_mmxext ); + else +#endif + register_deinterlace_method( &linearblendmethod ); } -- cgit v1.2.3