From 88ff161b2a4e44854f934adf82b0d1d872e20bac Mon Sep 17 00:00:00 2001 From: Miguel Freitas Date: Sun, 22 Jun 2003 15:42:07 +0000 Subject: - disable chroma filter by default - sync to tvtime cvs (speedy.c, speedy.h) CVS patchset: 5087 CVS date: 2003/06/22 15:42:07 --- src/post/deinterlace/speedy.c | 491 +++++++++++++++++++++++++++---------- src/post/deinterlace/speedy.h | 216 ++++++++++++++-- src/post/deinterlace/xine_plugin.c | 4 +- 3 files changed, 565 insertions(+), 146 deletions(-) diff --git a/src/post/deinterlace/speedy.c b/src/post/deinterlace/speedy.c index 9ad929679..a49bf776d 100644 --- a/src/post/deinterlace/speedy.c +++ b/src/post/deinterlace/speedy.c @@ -17,16 +17,40 @@ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/** + * Includes 420to422, 422to444 scaling filters from the MPEG2 reference + * implementation. The v12 source code indicates that they were written + * by Cheung Auyeung . The file they were in was: + * + * store.c, picture output routines + * Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. + * + * Disclaimer of Warranty + * + * These software programs are available to the user without any license fee or + * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims + * any and all warranties, whether express, implied, or statuary, including any + * implied warranties or merchantability or of fitness for a particular + * purpose. In no event shall the copyright-holder be liable for any + * incidental, punitive, or consequential damages of any kind whatsoever + * arising from the use of these programs. + * + * This disclaimer of warranty extends to the user of these programs and user's + * customers, employees, agents, transferees, successors, and assigns. + * + * The MPEG Software Simulation Group does not represent or warrant that the + * programs furnished hereunder are free of infringement of any third-party + * patents. + * + * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware, + * are subject to royalty fees to patent holders. Many of these patents are + * general enough such that they are unavoidable regardless of implementation + * design. + * + */ + #include #include -#include -#include -#include -#if defined (__SVR4) && defined (__sun) -# include -#else -# include -#endif #ifdef HAVE_CONFIG_H # include "config.h" @@ -70,10 +94,6 @@ void (*filter_luma_14641_packed422_inplace_scanline)( uint8_t *data, int width ) unsigned int (*diff_factor_packed422_scanline)( uint8_t *cur, uint8_t *old, int width ); unsigned int (*comb_factor_packed422_scanline)( uint8_t *top, uint8_t *mid, uint8_t *bot, int width ); -void (*vfilter_chroma_121_packed422_scanline)( uint8_t *output, int width, - uint8_t *m, uint8_t *t, uint8_t *b); -void (*vfilter_chroma_332_packed422_scanline)( uint8_t *output, int width, - uint8_t *m, uint8_t *t, uint8_t *b); void (*kill_chroma_packed422_inplace_scanline)( uint8_t *data, int width ); void (*mirror_packed422_inplace_scanline)( uint8_t *data, int width ); void (*halfmirror_packed422_inplace_scanline)( uint8_t *data, int width ); @@ -86,6 +106,44 @@ void (*quarter_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *one, uint8_t *three, int width ); void (*subpix_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *top, uint8_t *bot, int subpixpos, int width ); +void (*composite_bars_packed4444_scanline)( uint8_t *output, + uint8_t *background, int width, + int a, int luma, int cb, int cr, + int percentage ); +void (*packed444_to_nonpremultiplied_packed4444_scanline)( uint8_t *output, + uint8_t *input, + int width, int alpha ); +void (*aspect_adjust_packed4444_scanline)( uint8_t *output, + uint8_t *input, + int width, + double pixel_aspect ); +void (*packed444_to_packed422_scanline)( uint8_t *output, + uint8_t *input, + int width ); +void (*packed422_to_packed444_scanline)( uint8_t *output, + uint8_t *input, + int width ); +void (*packed422_to_packed444_rec601_scanline)( uint8_t *dest, + uint8_t *src, + int width ); +void (*packed444_to_rgb24_rec601_scanline)( uint8_t *output, + uint8_t *input, + int width ); +void (*rgb24_to_packed444_rec601_scanline)( uint8_t *output, + uint8_t *input, + int width ); +void (*rgba32_to_packed4444_rec601_scanline)( uint8_t *output, + uint8_t *input, + int width ); +void (*chroma_422_to_444_mpeg2_plane)( uint8_t *dst, uint8_t *src, + int width, int height ); +void (*chroma_420_to_422_mpeg2_plane)( uint8_t *dst, uint8_t *src, + int width, int height, int progressive ); +void (*invert_colour_packed422_inplace_scanline)( uint8_t *data, int width ); +void (*vfilter_chroma_121_packed422_scanline)( uint8_t *output, int width, + uint8_t *m, uint8_t *t, uint8_t *b ); +void (*vfilter_chroma_332_packed422_scanline)( uint8_t *output, int width, + uint8_t *m, uint8_t *t, uint8_t *b ); /** @@ -114,10 +172,10 @@ static inline __attribute__ ((always_inline,const)) uint8_t clip255( int x ) static unsigned long CombJaggieThreshold = 73; +#ifdef ARCH_X86 static unsigned int comb_factor_packed422_scanline_mmx( uint8_t *top, uint8_t *mid, - uint8_t *bot, int width ) + uint8_t *bot, int width ) { -#ifdef ARCH_X86 const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL }; const mmx_t qwOnes = { 0x0001000100010001ULL }; mmx_t qwThreshold; @@ -193,10 +251,8 @@ static unsigned int comb_factor_packed422_scanline_mmx( uint8_t *top, uint8_t *m emms(); return temp1; -#else - return 0; -#endif } +#endif static unsigned long BitShift = 6; @@ -220,6 +276,7 @@ static unsigned int diff_factor_packed422_scanline_c( uint8_t *cur, uint8_t *old return ret; } +/* static unsigned int diff_factor_packed422_scanline_test_c( uint8_t *cur, uint8_t *old, int width ) { unsigned int ret = 0; @@ -239,11 +296,11 @@ static unsigned int diff_factor_packed422_scanline_test_c( uint8_t *cur, uint8_t return ret; } +*/ - +#ifdef ARCH_X86 static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *old, int width ) { -#ifdef ARCH_X86 const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL }; unsigned int temp1, temp2; @@ -277,17 +334,15 @@ static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *o emms(); return temp1; -#else - return 0; -#endif } +#endif #define ABS(a) (((a) < 0)?-(a):(a)) +#ifdef ARCH_X86 static void diff_packed422_block8x8_mmx( pulldown_metrics_t *m, uint8_t *old, - uint8_t *new, int os, int ns ) + uint8_t *new, int os, int ns ) { -#ifdef ARCH_X86 const mmx_t ymask = { 0x00ff00ff00ff00ffULL }; short out[ 24 ]; /* Output buffer for the partial metrics from the mmx code. */ uint8_t *outdata = (uint8_t *) out; @@ -426,11 +481,11 @@ static void diff_packed422_block8x8_mmx( pulldown_metrics_t *m, uint8_t *old, } emms(); -#endif } +#endif static void diff_packed422_block8x8_c( pulldown_metrics_t *m, uint8_t *old, - uint8_t *new, int os, int ns ) + uint8_t *new, int os, int ns ) { int x, y, e=0, o=0, s=0, p=0, t=0; uint8_t *oldp, *newp; @@ -491,7 +546,7 @@ static void packed422_to_packed444_scanline_c( uint8_t *output, uint8_t *input, * * [-1 3 -6 12 -24 80 80 -24 12 -6 3 -1] */ -void packed422_to_packed444_rec601_scanline( uint8_t *dest, uint8_t *src, int width ) +static void packed422_to_packed444_rec601_scanline_c( uint8_t *dest, uint8_t *src, int width ) { int i; @@ -525,10 +580,10 @@ void packed422_to_packed444_rec601_scanline( uint8_t *dest, uint8_t *src, int wi } } +#ifdef ARCH_X86 static void vfilter_chroma_121_packed422_scanline_mmx( uint8_t *output, int width, - uint8_t *m, uint8_t *t, uint8_t *b) + uint8_t *m, uint8_t *t, uint8_t *b ) { -#ifdef ARCH_X86 int i; const mmx_t ymask = { 0x00ff00ff00ff00ffULL }; const mmx_t cmask = { 0xff00ff00ff00ff00ULL }; @@ -561,7 +616,7 @@ static void vfilter_chroma_121_packed422_scanline_mmx( uint8_t *output, int widt paddw_r2r( mm1, mm2 ); psllw_i2r( 6, mm2 ); - pand_r2r( mm6, mm2 ); + pand_r2r( mm6, mm2 ); por_r2r ( mm3, mm2 ); @@ -576,33 +631,31 @@ static void vfilter_chroma_121_packed422_scanline_mmx( uint8_t *output, int widt *output = (*t + *b + (*m << 1)) >> 2; output+=2; t+=2; b+=2; m+=2; } - + emms(); -#endif } - +#endif static void vfilter_chroma_121_packed422_scanline_c( uint8_t *output, int width, - uint8_t *m, uint8_t *t, uint8_t *b) + uint8_t *m, uint8_t *t, uint8_t *b ) { output++; t++; b++; m++; while( width-- ) { *output = (*t + *b + (*m << 1)) >> 2; - output+=2; t+=2; b+=2; m+=2; + output +=2; t+=2; b+=2; m+=2; } } - +#ifdef ARCH_X86 static void vfilter_chroma_332_packed422_scanline_mmx( uint8_t *output, int width, - uint8_t *m, uint8_t *t, uint8_t *b) + uint8_t *m, uint8_t *t, uint8_t *b ) { -#ifdef ARCH_X86 int i; const mmx_t ymask = { 0x00ff00ff00ff00ffULL }; const mmx_t cmask = { 0xff00ff00ff00ff00ULL }; // Get width in bytes. - width *= 2; + width *= 2; i = width / 8; width -= i * 8; @@ -650,26 +703,26 @@ static void vfilter_chroma_332_packed422_scanline_mmx( uint8_t *output, int widt output++; t++; b++; m++; while( width-- ) { *output = (3 * *t + 3 * *m + 2 * *b) >> 3; - output+=2; t+=2; b+=2; m+=2; + output +=2; t+=2; b+=2; m+=2; } emms(); -#endif } +#endif static void vfilter_chroma_332_packed422_scanline_c( uint8_t *output, int width, - uint8_t *m, uint8_t *t, uint8_t *b) + uint8_t *m, uint8_t *t, uint8_t *b ) { output++; t++; b++; m++; while( width-- ) { *output = (3 * *t + 3 * *m + 2 * *b) >> 3; - output+=2; t+=2; b+=2; m+=2; + output +=2; t+=2; b+=2; m+=2; } } +#ifdef ARCH_X86 static void kill_chroma_packed422_inplace_scanline_mmx( uint8_t *data, int width ) { -#ifdef ARCH_X86 const mmx_t ymask = { 0x00ff00ff00ff00ffULL }; const mmx_t nullchroma = { 0x8000800080008000ULL }; @@ -688,8 +741,8 @@ static void kill_chroma_packed422_inplace_scanline_mmx( uint8_t *data, int width data[ 1 ] = 128; data += 2; } -#endif } +#endif static void kill_chroma_packed422_inplace_scanline_c( uint8_t *data, int width ) { @@ -699,11 +752,43 @@ static void kill_chroma_packed422_inplace_scanline_c( uint8_t *data, int width ) } } +#ifdef ARCH_X86 +static void invert_colour_packed422_inplace_scanline_mmx( uint8_t *data, int width ) +{ + const mmx_t allones = { 0xffffffffffffffffULL }; + + movq_m2r( allones, mm1 ); + for(; width > 4; width -= 4 ) { + movq_r2r( mm1, mm2 ); + movq_m2r( *data, mm0 ); + psubb_r2r( mm0, mm2 ); + movq_r2m( mm2, *data ); + data += 8; + } + emms(); + + width *= 2; + while( width-- ) { + *data = 255 - *data; + data++; + } +} +#endif + +static void invert_colour_packed422_inplace_scanline_c( uint8_t *data, int width ) +{ + width *= 2; + while( width-- ) { + *data = 255 - *data; + data++; + } +} + /* // this duplicates alternate lines in alternate frames to highlight or mute // the effects of chroma crawl. it is not a solution or proper filter. it's // only for testing. -static void testing_packed422_inplace_scanline_c( uint8_t *data, int width, int scanline ) +void testing_packed422_inplace_scanline_c( uint8_t *data, int width, int scanline ) { volatile static int topbottom = 0; static uint8_t scanbuffer[2048]; @@ -785,7 +870,7 @@ static void filter_luma_14641_packed422_inplace_scanline_c( uint8_t *data, int w } static void interpolate_packed422_scanline_c( uint8_t *output, uint8_t *top, - uint8_t *bot, int width ) + uint8_t *bot, int width ) { int i; @@ -794,10 +879,10 @@ static void interpolate_packed422_scanline_c( uint8_t *output, uint8_t *top, } } +#ifdef ARCH_X86 static void interpolate_packed422_scanline_mmx( uint8_t *output, uint8_t *top, - uint8_t *bot, int width ) + uint8_t *bot, int width ) { -#ifdef ARCH_X86 const mmx_t shiftmask = { 0xfefffefffefffeffULL }; /* To avoid shifting chroma to luma. */ int i; @@ -861,13 +946,13 @@ static void interpolate_packed422_scanline_mmx( uint8_t *output, uint8_t *top, } emms(); -#endif } +#endif +#ifdef ARCH_X86 static void interpolate_packed422_scanline_mmxext( uint8_t *output, uint8_t *top, - uint8_t *bot, int width ) + uint8_t *bot, int width ) { -#ifdef ARCH_X86 int i; for( i = width/16; i; --i ) { @@ -911,8 +996,8 @@ static void interpolate_packed422_scanline_mmxext( uint8_t *output, uint8_t *top sfence(); emms(); -#endif } +#endif static void blit_colour_packed422_scanline_c( uint8_t *output, int width, int y, int cb, int cr ) { @@ -924,9 +1009,9 @@ static void blit_colour_packed422_scanline_c( uint8_t *output, int width, int y, } } +#ifdef ARCH_X86 static void blit_colour_packed422_scanline_mmx( uint8_t *output, int width, int y, int cb, int cr ) { -#ifdef ARCH_X86 uint32_t colour = cr << 24 | y << 16 | cb << 8 | y; int i; @@ -961,12 +1046,12 @@ static void blit_colour_packed422_scanline_mmx( uint8_t *output, int width, int } emms(); -#endif } +#endif +#ifdef ARCH_X86 static void blit_colour_packed422_scanline_mmxext( uint8_t *output, int width, int y, int cb, int cr ) { -#ifdef ARCH_X86 uint32_t colour = cr << 24 | y << 16 | cb << 8 | y; int i; @@ -1002,11 +1087,11 @@ static void blit_colour_packed422_scanline_mmxext( uint8_t *output, int width, i sfence(); emms(); -#endif } +#endif static void blit_colour_packed4444_scanline_c( uint8_t *output, int width, - int alpha, int luma, int cb, int cr ) + int alpha, int luma, int cb, int cr ) { int j; @@ -1018,11 +1103,11 @@ static void blit_colour_packed4444_scanline_c( uint8_t *output, int width, } } +#ifdef ARCH_X86 static void blit_colour_packed4444_scanline_mmx( uint8_t *output, int width, - int alpha, int luma, - int cb, int cr ) + int alpha, int luma, + int cb, int cr ) { -#ifdef ARCH_X86 uint32_t colour = (cr << 24) | (cb << 16) | (luma << 8) | alpha; int i; @@ -1052,14 +1137,14 @@ static void blit_colour_packed4444_scanline_mmx( uint8_t *output, int width, } emms(); -#endif } +#endif -void blit_colour_packed4444_scanline_mmxext( uint8_t *output, int width, - int alpha, int luma, - int cb, int cr ) -{ #ifdef ARCH_X86 +static void blit_colour_packed4444_scanline_mmxext( uint8_t *output, int width, + int alpha, int luma, + int cb, int cr ) +{ uint32_t colour = (cr << 24) | (cb << 16) | (luma << 8) | alpha; int i; @@ -1090,8 +1175,8 @@ void blit_colour_packed4444_scanline_mmxext( uint8_t *output, int width, sfence(); emms(); -#endif } +#endif #define speedy_memcpy_c xine_fast_memcpy @@ -1104,18 +1189,22 @@ static void blit_packed422_scanline_c( uint8_t *dest, const uint8_t *src, int wi speedy_memcpy_c( dest, src, width*2 ); } +#ifdef ARCH_X86 static void blit_packed422_scanline_mmx( uint8_t *dest, const uint8_t *src, int width ) { speedy_memcpy_mmx( dest, src, width*2 ); } +#endif +#ifdef ARCH_X86 static void blit_packed422_scanline_mmxext( uint8_t *dest, const uint8_t *src, int width ) { speedy_memcpy_mmxext( dest, src, width*2 ); } +#endif static void composite_packed4444_alpha_to_packed422_scanline_c( uint8_t *output, uint8_t *input, - uint8_t *foreground, int width, int alpha ) + uint8_t *foreground, int width, int alpha ) { int i; @@ -1172,12 +1261,12 @@ static void composite_packed4444_alpha_to_packed422_scanline_c( uint8_t *output, } } +#ifdef ARCH_X86 static void composite_packed4444_alpha_to_packed422_scanline_mmxext( uint8_t *output, - uint8_t *input, - uint8_t *foreground, - int width, int alpha ) + uint8_t *input, + uint8_t *foreground, + int width, int alpha ) { -#ifdef ARCH_X86 const mmx_t alpha2 = { 0x0000FFFF00000000ULL }; const mmx_t alpha1 = { 0xFFFF0000FFFFFFFFULL }; const mmx_t round = { 0x0080008000800080ULL }; @@ -1271,11 +1360,11 @@ static void composite_packed4444_alpha_to_packed422_scanline_mmxext( uint8_t *ou } sfence(); emms(); -#endif } +#endif static void composite_packed4444_to_packed422_scanline_c( uint8_t *output, uint8_t *input, - uint8_t *foreground, int width ) + uint8_t *foreground, int width ) { int i; for( i = 0; i < width; i++ ) { @@ -1314,10 +1403,10 @@ static void composite_packed4444_to_packed422_scanline_c( uint8_t *output, uint8 } +#ifdef ARCH_X86 static void composite_packed4444_to_packed422_scanline_mmxext( uint8_t *output, uint8_t *input, - uint8_t *foreground, int width ) + uint8_t *foreground, int width ) { -#ifdef ARCH_X86 const mmx_t alpha2 = { 0x0000FFFF00000000ULL }; const mmx_t alpha1 = { 0xFFFF0000FFFFFFFFULL }; const mmx_t round = { 0x0080008000800080ULL }; @@ -1406,8 +1495,8 @@ static void composite_packed4444_to_packed422_scanline_mmxext( uint8_t *output, } sfence(); emms(); -#endif } +#endif /** * um... just need some scrap paper... @@ -1418,11 +1507,11 @@ static void composite_packed4444_to_packed422_scanline_mmxext( uint8_t *output, * Da = (1 - a)*b + a */ static void composite_alphamask_to_packed4444_scanline_c( uint8_t *output, - uint8_t *input, - uint8_t *mask, - int width, - int textluma, int textcb, - int textcr ) + uint8_t *input, + uint8_t *mask, + int width, + int textluma, int textcb, + int textcr ) { uint32_t opaque = (textcr << 24) | (textcb << 16) | (textluma << 8) | 0xff; int i; @@ -1448,14 +1537,14 @@ static void composite_alphamask_to_packed4444_scanline_c( uint8_t *output, } } +#ifdef ARCH_X86 static void composite_alphamask_to_packed4444_scanline_mmxext( uint8_t *output, - uint8_t *input, - uint8_t *mask, - int width, - int textluma, int textcb, - int textcr ) + uint8_t *input, + uint8_t *mask, + int width, + int textluma, int textcb, + int textcr ) { -#ifdef ARCH_X86 uint32_t opaque = (textcr << 24) | (textcb << 16) | (textluma << 8) | 0xff; const mmx_t round = { 0x0080008000800080ULL }; const mmx_t fullalpha = { 0x00000000000000ffULL }; @@ -1544,14 +1633,14 @@ static void composite_alphamask_to_packed4444_scanline_mmxext( uint8_t *output, } sfence(); emms(); -#endif } +#endif static void composite_alphamask_alpha_to_packed4444_scanline_c( uint8_t *output, - uint8_t *input, - uint8_t *mask, int width, - int textluma, int textcb, - int textcr, int alpha ) + uint8_t *input, + uint8_t *mask, int width, + int textluma, int textcb, + int textcr, int alpha ) { uint32_t opaque = (textcr << 24) | (textcb << 16) | (textluma << 8) | 0xff; int i; @@ -1596,9 +1685,9 @@ static void premultiply_packed4444_scanline_c( uint8_t *output, uint8_t *input, } } +#ifdef ARCH_X86 static void premultiply_packed4444_scanline_mmxext( uint8_t *output, uint8_t *input, int width ) { -#ifdef ARCH_X86 const mmx_t round = { 0x0080008000800080ULL }; const mmx_t alpha = { 0x00000000000000ffULL }; const mmx_t noalp = { 0xffffffffffff0000ULL }; @@ -1632,11 +1721,11 @@ static void premultiply_packed4444_scanline_mmxext( uint8_t *output, uint8_t *in } sfence(); emms(); -#endif } +#endif static void blend_packed422_scanline_c( uint8_t *output, uint8_t *src1, - uint8_t *src2, int width, int pos ) + uint8_t *src2, int width, int pos ) { if( pos == 0 ) { blit_packed422_scanline( output, src1, width ); @@ -1652,10 +1741,10 @@ static void blend_packed422_scanline_c( uint8_t *output, uint8_t *src1, } } +#ifdef ARCH_X86 static void blend_packed422_scanline_mmxext( uint8_t *output, uint8_t *src1, - uint8_t *src2, int width, int pos ) + uint8_t *src2, int width, int pos ) { -#ifdef ARCH_X86 if( pos <= 0 ) { blit_packed422_scanline( output, src1, width ); } else if( pos >= 256 ) { @@ -1694,13 +1783,13 @@ static void blend_packed422_scanline_mmxext( uint8_t *output, uint8_t *src1, sfence(); emms(); } -#endif } +#endif +#ifdef ARCH_X86 static void quarter_blit_vertical_packed422_scanline_mmxext( uint8_t *output, uint8_t *one, - uint8_t *three, int width ) + uint8_t *three, int width ) { -#ifdef ARCH_X86 int i; for( i = width/16; i; --i ) { @@ -1751,12 +1840,12 @@ static void quarter_blit_vertical_packed422_scanline_mmxext( uint8_t *output, ui sfence(); emms(); -#endif } +#endif static void quarter_blit_vertical_packed422_scanline_c( uint8_t *output, uint8_t *one, - uint8_t *three, int width ) + uint8_t *three, int width ) { width *= 2; while( width-- ) { @@ -1767,7 +1856,7 @@ static void quarter_blit_vertical_packed422_scanline_c( uint8_t *output, uint8_t } static void subpix_blit_vertical_packed422_scanline_c( uint8_t *output, uint8_t *top, - uint8_t *bot, int subpixpos, int width ) + uint8_t *bot, int subpixpos, int width ) { if( subpixpos == 32768 ) { interpolate_packed422_scanline( output, top, bot, width ); @@ -1786,7 +1875,7 @@ static void subpix_blit_vertical_packed422_scanline_c( uint8_t *output, uint8_t } static void a8_subpix_blit_scanline_c( uint8_t *output, uint8_t *input, - int lasta, int startpos, int width ) + int lasta, int startpos, int width ) { int pos = 0xffff - (startpos & 0xffff); int prev = lasta; @@ -1934,7 +2023,7 @@ static void init_YCbCr_to_RGB_tables(void) conv_YR_inited = 1; } -void rgb24_to_packed444_rec601_scanline( uint8_t *output, uint8_t *input, int width ) +static void rgb24_to_packed444_rec601_scanline_c( uint8_t *output, uint8_t *input, int width ) { if( !conv_RY_inited ) init_RGB_to_YCbCr_tables(); @@ -1951,7 +2040,7 @@ void rgb24_to_packed444_rec601_scanline( uint8_t *output, uint8_t *input, int wi } } -void rgba32_to_packed4444_rec601_scanline( uint8_t *output, uint8_t *input, int width ) +static void rgba32_to_packed4444_rec601_scanline_c( uint8_t *output, uint8_t *input, int width ) { if( !conv_RY_inited ) init_RGB_to_YCbCr_tables(); @@ -1970,7 +2059,7 @@ void rgba32_to_packed4444_rec601_scanline( uint8_t *output, uint8_t *input, int } } -void packed444_to_rgb24_rec601_scanline( uint8_t *output, uint8_t *input, int width ) +static void packed444_to_rgb24_rec601_scanline_c( uint8_t *output, uint8_t *input, int width ) { if( !conv_YR_inited ) init_YCbCr_to_RGB_tables(); @@ -2006,7 +2095,8 @@ void packed444_to_rgb24_rec601_scanline( uint8_t *output, uint8_t *input, int wi * B-Y' = -0.299*R' - 0.587*G' + 0.886*B' * R-Y' = 0.701*R' - 0.587*G' - 0.114*B' */ -void packed444_to_rgb24_rec601_reference_scanline( uint8_t *output, uint8_t *input, int width ) +/* +static void packed444_to_rgb24_rec601_reference_scanline( uint8_t *output, uint8_t *input, int width ) { while( width-- ) { double yp = (((double) input[ 0 ]) - 16.0) / 255.0; @@ -2030,10 +2120,11 @@ void packed444_to_rgb24_rec601_reference_scanline( uint8_t *output, uint8_t *inp input += 3; } } +*/ -void packed444_to_nonpremultiplied_packed4444_scanline( uint8_t *output, - uint8_t *input, - int width, int alpha ) +static void packed444_to_nonpremultiplied_packed4444_scanline_c( uint8_t *output, + uint8_t *input, + int width, int alpha ) { int i; @@ -2046,13 +2137,12 @@ void packed444_to_nonpremultiplied_packed4444_scanline( uint8_t *output, output += 4; input += 3; } - } -void aspect_adjust_packed4444_scanline( uint8_t *output, - uint8_t *input, - int width, - double pixel_aspect ) +static void aspect_adjust_packed4444_scanline_c( uint8_t *output, + uint8_t *input, + int width, + double pixel_aspect ) { double i; int prev_i = 0; @@ -2098,10 +2188,10 @@ void aspect_adjust_packed4444_scanline( uint8_t *output, /** * Sub-pixel data bar renderer. There are 128 bars. */ -void composite_bars_packed4444_scanline( uint8_t *output, - uint8_t *background, int width, - int a, int luma, int cb, int cr, - int percentage ) +static void composite_bars_packed4444_scanline_c( uint8_t *output, + uint8_t *background, int width, + int a, int luma, int cb, int cr, + int percentage ) { /** * This is the size of both the bar and the spacing in between in subpixel @@ -2141,6 +2231,139 @@ void composite_bars_packed4444_scanline( uint8_t *output, } } +/* horizontal 1:2 interpolation filter */ +static void chroma_422_to_444_mpeg2_plane_c( uint8_t *dst, uint8_t *src, int width, int height ) +{ + int i, i2, w, j, im2, im1, ip1, ip2, ip3; + + w = width / 2; + + for( j = 0; j < height; j++ ) { + for( i = 0; i < w; i++ ) { + i2 = i << 1; + im2 = (i<2) ? 0 : i-2; + im1 = (i<1) ? 0 : i-1; + ip1 = (i> 8 ); + } + src += w; + dst += width; + } +} + +/* vertical 1:2 interpolation filter */ +static void chroma_420_to_422_mpeg2_plane_c( uint8_t *dst, uint8_t *src, + int width, int height, int progressive ) +{ + int w, h, i, j, j2; + int jm6, jm5, jm4, jm3, jm2, jm1, jp1, jp2, jp3, jp4, jp5, jp6, jp7; + + w = width / 2; + h = height / 2; + + if( progressive ) { + /* intra frame */ + for( i = 0; i < w; i++ ) { + for( j = 0; j < h; j++ ) { + j2 = j << 1; + jm3 = (j<3) ? 0 : j-3; + jm2 = (j<2) ? 0 : j-2; + jm1 = (j<1) ? 0 : j-1; + jp1 = (j> 8 ); + + dst[w*(j2+1)] = clip255( ( 3*src[w*jp3] + - 16*src[w*jp2] + + 67*src[w*jp1] + + 227*src[w*j] + - 32*src[w*jm1] + + 7*src[w*jm2] + 128 ) >> 8 ); + } + src++; + dst++; + } + } else { + /* intra field */ + for( i = 0; i < w; i++ ) { + for( j = 0; j < h; j += 2 ) { + j2 = j << 1; + + /* top field */ + jm6 = (j<6) ? 0 : j-6; + jm4 = (j<4) ? 0 : j-4; + jm2 = (j<2) ? 0 : j-2; + jp2 = (j> 8 ); + + /* Polyphase FIR filter coefficients (*256): 11 -38 192 113 -30 8 */ + /* New polyphase FIR filter coefficients (*256):7 -35 194 110 -24 4 */ + dst[w*(j2+2)] = clip255( ( 7*src[w*jm4] + -35*src[w*jm2] + +194*src[w*j] + +110*src[w*jp2] + -24*src[w*jp4] + +4*src[w*jp6] + 128 ) >> 8 ); + + /* bottom field */ + jm5 = (j<5) ? 1 : j-5; + jm3 = (j<3) ? 1 : j-3; + jm1 = (j<1) ? 1 : j-1; + jp1 = (j> 8 ); + + dst[w*(j2+3)] = clip255( ( 1*src[w*jp7] + -7*src[w*jp5] + +30*src[w*jp3] + +248*src[w*jp1] + -21*src[w*jm1] + +5*src[w*jm3] + 128 ) >> 8 ); + } + src++; + dst++; + } + } +} static uint32_t speedy_accel; @@ -2162,8 +2385,6 @@ void setup_speedy_calls( uint32_t accel, int verbose ) filter_luma_14641_packed422_inplace_scanline = filter_luma_14641_packed422_inplace_scanline_c; comb_factor_packed422_scanline = 0; diff_factor_packed422_scanline = diff_factor_packed422_scanline_c; - vfilter_chroma_121_packed422_scanline = vfilter_chroma_121_packed422_scanline_c; - vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_c; kill_chroma_packed422_inplace_scanline = kill_chroma_packed422_inplace_scanline_c; mirror_packed422_inplace_scanline = mirror_packed422_inplace_scanline_c; halfmirror_packed422_inplace_scanline = halfmirror_packed422_inplace_scanline_c; @@ -2172,6 +2393,20 @@ void setup_speedy_calls( uint32_t accel, int verbose ) a8_subpix_blit_scanline = a8_subpix_blit_scanline_c; quarter_blit_vertical_packed422_scanline = quarter_blit_vertical_packed422_scanline_c; subpix_blit_vertical_packed422_scanline = subpix_blit_vertical_packed422_scanline_c; + composite_bars_packed4444_scanline = composite_bars_packed4444_scanline_c; + packed444_to_nonpremultiplied_packed4444_scanline = packed444_to_nonpremultiplied_packed4444_scanline_c; + aspect_adjust_packed4444_scanline = aspect_adjust_packed4444_scanline_c; + packed444_to_packed422_scanline = packed444_to_packed422_scanline_c; + packed422_to_packed444_scanline = packed422_to_packed444_scanline_c; + packed422_to_packed444_rec601_scanline = packed422_to_packed444_rec601_scanline_c; + packed444_to_rgb24_rec601_scanline = packed444_to_rgb24_rec601_scanline_c; + rgb24_to_packed444_rec601_scanline = rgb24_to_packed444_rec601_scanline_c; + rgba32_to_packed4444_rec601_scanline = rgba32_to_packed4444_rec601_scanline_c; + chroma_422_to_444_mpeg2_plane = chroma_422_to_444_mpeg2_plane_c; + chroma_420_to_422_mpeg2_plane = chroma_420_to_422_mpeg2_plane_c; + invert_colour_packed422_inplace_scanline = invert_colour_packed422_inplace_scanline_c; + vfilter_chroma_121_packed422_scanline = vfilter_chroma_121_packed422_scanline_c; + vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_c; #ifdef ARCH_X86 if( speedy_accel & MM_ACCEL_X86_MMXEXT ) { @@ -2186,14 +2421,15 @@ void setup_speedy_calls( uint32_t accel, int verbose ) composite_packed4444_alpha_to_packed422_scanline = composite_packed4444_alpha_to_packed422_scanline_mmxext; composite_alphamask_to_packed4444_scanline = composite_alphamask_to_packed4444_scanline_mmxext; premultiply_packed4444_scanline = premultiply_packed4444_scanline_mmxext; - vfilter_chroma_121_packed422_scanline = vfilter_chroma_121_packed422_scanline_mmx; - vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_mmx; kill_chroma_packed422_inplace_scanline = kill_chroma_packed422_inplace_scanline_mmx; blend_packed422_scanline = blend_packed422_scanline_mmxext; diff_factor_packed422_scanline = diff_factor_packed422_scanline_mmx; comb_factor_packed422_scanline = comb_factor_packed422_scanline_mmx; diff_packed422_block8x8 = diff_packed422_block8x8_mmx; quarter_blit_vertical_packed422_scanline = quarter_blit_vertical_packed422_scanline_mmxext; + invert_colour_packed422_inplace_scanline = invert_colour_packed422_inplace_scanline_mmx; + vfilter_chroma_121_packed422_scanline = vfilter_chroma_121_packed422_scanline_mmx; + vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_mmx; speedy_memcpy = speedy_memcpy_mmxext; } else if( speedy_accel & MM_ACCEL_X86_MMX ) { if( verbose ) { @@ -2205,10 +2441,11 @@ void setup_speedy_calls( uint32_t accel, int verbose ) blit_packed422_scanline = blit_packed422_scanline_mmx; diff_factor_packed422_scanline = diff_factor_packed422_scanline_mmx; comb_factor_packed422_scanline = comb_factor_packed422_scanline_mmx; - vfilter_chroma_121_packed422_scanline = vfilter_chroma_121_packed422_scanline_mmx; - vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_mmx; kill_chroma_packed422_inplace_scanline = kill_chroma_packed422_inplace_scanline_mmx; diff_packed422_block8x8 = diff_packed422_block8x8_mmx; + invert_colour_packed422_inplace_scanline = invert_colour_packed422_inplace_scanline_mmx; + vfilter_chroma_121_packed422_scanline = vfilter_chroma_121_packed422_scanline_mmx; + vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_mmx; speedy_memcpy = speedy_memcpy_mmx; } else { if( verbose ) { diff --git a/src/post/deinterlace/speedy.h b/src/post/deinterlace/speedy.h index 78fa42468..3b835f5a6 100644 --- a/src/post/deinterlace/speedy.h +++ b/src/post/deinterlace/speedy.h @@ -36,15 +36,15 @@ extern "C" { * * The optimizations are done with the help of the mmx.h system, from * libmpeg2 by Michel Lespinasse and Aaron Holtzman. + * + * The library is a collection of function pointers which must be first + * initialized by setup_speedy_calls() to point at the fastest available + * implementation of each function. */ /** - * This filter actually does not meet the spec so calling it rec601 - * is a bit of a lie. I got the filter from Poynton's site. + * Struct for pulldown detection metrics. */ -void packed422_to_packed444_rec601_scanline( uint8_t *dest, uint8_t *src, int width ); - -/* Struct for pulldown detection metrics. */ typedef struct pulldown_metrics_s { /* difference: total, even lines, odd lines */ int d, e, o; @@ -52,11 +52,6 @@ typedef struct pulldown_metrics_s { int t, s, p; } pulldown_metrics_t; -/** - * Here are the function pointers which will be initialized to point at the - * fastest available version of the above after a call to setup_speedy_calls(). - */ - /** * Interpolates a packed 4:2:2 scanline using linear interpolation. */ @@ -78,55 +73,242 @@ extern void (*blit_colour_packed4444_scanline)( uint8_t *output, int cb, int cr ); /** - * Scanline blitter for packed 4:2:2 scanlines. This implementation uses - * the fast memcpy code from xine which got it from mplayer. + * Blit from and to packed 4:2:2 scanline. */ extern void (*blit_packed422_scanline)( uint8_t *dest, const uint8_t *src, int width ); -/* Alpha provided is from 0-256 not 0-255. */ +/** + * Composites a packed 4:4:4:4 scanline onto a packed 4:2:2 scanline. + * Chroma is downsampled by dropping samples (nearest neighbour). + */ extern void (*composite_packed4444_to_packed422_scanline)( uint8_t *output, uint8_t *input, uint8_t *foreground, int width ); + +/** + * Composites a packed 4:4:4:4 scanline onto a packed 4:2:2 scanline. + * Chroma is downsampled by dropping samples (nearest neighbour). The + * alpha value provided is in the range 0-256 and is first applied to + * the input (for fadeouts). + */ extern void (*composite_packed4444_alpha_to_packed422_scanline)( uint8_t *output, uint8_t *input, uint8_t *foreground, int width, int alpha ); + +/** + * Takes an alphamask and the given colour (in Y'CbCr) and composites it + * onto a packed 4:4:4:4 scanline. + */ extern void (*composite_alphamask_to_packed4444_scanline)( uint8_t *output, uint8_t *input, uint8_t *mask, int width, int textluma, int textcb, int textcr ); + +/** + * Takes an alphamask and the given colour (in Y'CbCr) and composites it + * onto a packed 4:4:4:4 scanline. The alpha value provided is in the + * range 0-256 and is first applied to the input (for fadeouts). + */ extern void (*composite_alphamask_alpha_to_packed4444_scanline)( uint8_t *output, uint8_t *input, uint8_t *mask, int width, int textluma, int textcb, int textcr, int alpha ); + +/** + * Sub-pixel data bar renderer. There are 128 bars. + */ +extern void (*composite_bars_packed4444_scanline)( uint8_t *output, + uint8_t *background, int width, + int a, int luma, int cb, int cr, + int percentage ); + + +/** + * Premultiplies the colour by the alpha channel in a packed 4:4:4:4 + * scanline. + */ extern void (*premultiply_packed4444_scanline)( uint8_t *output, uint8_t *input, int width ); + +/** + * Blend between two packed 4:2:2 scanline. Pos is the fade value in + * the range 0-256. A value of 0 gives 100% src1, and a value of 256 + * gives 100% src2. Anything in between gives the appropriate faded + * version. + */ extern void (*blend_packed422_scanline)( uint8_t *output, uint8_t *src1, uint8_t *src2, int width, int pos ); -extern void (*filter_luma_121_packed422_inplace_scanline)( uint8_t *data, int width ); -extern void (*filter_luma_14641_packed422_inplace_scanline)( uint8_t *data, int width ); + +/** + * Calculates the 'difference factor' for two scanlines. This is a + * metric where higher values indicate that the two scanlines are more + * different. + */ extern unsigned int (*diff_factor_packed422_scanline)( uint8_t *cur, uint8_t *old, int width ); + +/** + * Calculates the 'comb factor' for a set of three scanlines. This is a + * metric where higher values indicate a more likely chance that the two + * fields are at separate points in time. + */ extern unsigned int (*comb_factor_packed422_scanline)( uint8_t *top, uint8_t *mid, uint8_t *bot, int width ); + +/** + * Vertical [1 2 1] chroma filter. + */ extern void (*vfilter_chroma_121_packed422_scanline)( uint8_t *output, int width, - uint8_t *m, uint8_t *t, uint8_t *b); + uint8_t *m, uint8_t *t, uint8_t *b ); + +/** + * Vertical [3 3 2] chroma filter. + */ extern void (*vfilter_chroma_332_packed422_scanline)( uint8_t *output, int width, - uint8_t *m, uint8_t *t, uint8_t *b); + uint8_t *m, uint8_t *t, uint8_t *b ); + +/** + * In-place [1 2 1] filter. + */ +extern void (*filter_luma_121_packed422_inplace_scanline)( uint8_t *data, int width ); + +/** + * In-place [1 4 6 4 1] filter. + */ +extern void (*filter_luma_14641_packed422_inplace_scanline)( uint8_t *data, int width ); + +/** + * Sets the chroma of the scanline to neutral (128) in-place. + */ extern void (*kill_chroma_packed422_inplace_scanline)( uint8_t *data, int width ); + +/** + * Mirrors the scanline in-place. + */ extern void (*mirror_packed422_inplace_scanline)( uint8_t *data, int width ); + +/** + * Mirrors the first half of the scanline onto the second half in-place. + */ extern void (*halfmirror_packed422_inplace_scanline)( uint8_t *data, int width ); + +/** + * Inverts the colours on a scanline in-place. + */ +extern void (*invert_colour_packed422_inplace_scanline)( uint8_t *data, int width ); + +/** + * Fast memcpy function, used by all of the blit functions. Won't blit + * anything if dest == src. + */ extern void *(*speedy_memcpy)( void *output, const void *input, size_t size ); + +/** + * Calculates the block difference metrics for dalias' pulldown + * detection algorithm. + */ extern void (*diff_packed422_block8x8)( pulldown_metrics_t *m, uint8_t *old, uint8_t *new, int os, int ns ); + +/** + * Takes an alpha mask and subpixelly blits it using linear + * interpolation. + */ extern void (*a8_subpix_blit_scanline)( uint8_t *output, uint8_t *input, int lasta, int startpos, int width ); + +/** + * 1/4 vertical subpixel blit for packed 4:2:2 scanlines using linear + * interpolation. + */ extern void (*quarter_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *one, uint8_t *three, int width ); + +/** + * Vertical subpixel blit for packed 4:2:2 scanlines using linear + * interpolation. + */ extern void (*subpix_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *top, uint8_t *bot, int subpixpos, int width ); +/** + * Simple function to convert a 4:4:4 scanline to a 4:4:4:4 scanline by + * adding an alpha channel. Result is non-premultiplied. + */ +extern void (*packed444_to_nonpremultiplied_packed4444_scanline)( uint8_t *output, + uint8_t *input, + int width, int alpha ); + +/** + * I think this function needs to be rethought and renamed, but here + * it is for now. This function horizontally resamples a scanline + * using linear interpolation to compensate for a change in pixel + * aspect ratio. + */ +extern void (*aspect_adjust_packed4444_scanline)( uint8_t *output, + uint8_t *input, + int width, + double pixel_aspect ); + +/** + * Convert a packed 4:4:4 surface to a packed 4:2:2 surface using + * nearest neighbour chroma downsampling. + */ +extern void (*packed444_to_packed422_scanline)( uint8_t *output, + uint8_t *input, + int width ); + +/** + * Converts packed 4:2:2 to packed 4:4:4 scanlines using nearest + * neighbour chroma upsampling. + */ +extern void (*packed422_to_packed444_scanline)( uint8_t *output, + uint8_t *input, + int width ); + +/** + * This filter actually does not meet the spec so calling it rec601 + * is a bit of a lie. I got the filter from Poynton's site. This + * converts a scanline from packed 4:2:2 to packed 4:4:4. But this + * function should point at some high quality to-the-spec resampler. + */ +extern void (*packed422_to_packed444_rec601_scanline)( uint8_t *dest, + uint8_t *src, + int width ); + +/** + * Conversions between Y'CbCr and R'G'B'. We use Rec.601 numbers + * since our source is broadcast video, but I think there is an + * argument to be made for switching to Rec.709. + */ +extern void (*packed444_to_rgb24_rec601_scanline)( uint8_t *output, + uint8_t *input, + int width ); +extern void (*rgb24_to_packed444_rec601_scanline)( uint8_t *output, + uint8_t *input, + int width ); +extern void (*rgba32_to_packed4444_rec601_scanline)( uint8_t *output, + uint8_t *input, + int width ); + +/** + * Chroma upsampler for a chroma plane (8 bit per pixel) from + * 4:2:2 to 4:4:4. I believe that implements the filter described + * in the MPEG2 spec, but I have not confirmed. + */ +extern void (*chroma_422_to_444_mpeg2_plane)( uint8_t *dst, uint8_t *src, + int width, int height ); + +/** + * Chroma upsampler for a chroma plane (8 bit per pixel) from + * 4:2:0 to 4:2:2. I believe that implements the filter described + * in the MPEG2 spec, but I have not confirmed. + */ +extern void (*chroma_420_to_422_mpeg2_plane)( uint8_t *dst, uint8_t *src, + int width, int height, int progressive ); + /** * Sets up the function pointers to point at the fastest function * available. Requires accelleration settings (see mm_accel.h). diff --git a/src/post/deinterlace/xine_plugin.c b/src/post/deinterlace/xine_plugin.c index a9440ae5b..3db6a3160 100644 --- a/src/post/deinterlace/xine_plugin.c +++ b/src/post/deinterlace/xine_plugin.c @@ -17,7 +17,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * - * $Id: xine_plugin.c,v 1.7 2003/06/21 11:28:04 mroi Exp $ + * $Id: xine_plugin.c,v 1.8 2003/06/22 15:42:07 miguelfreitas Exp $ * * advanced video deinterlacer plugin * Jun/2003 by Miguel Freitas @@ -298,7 +298,7 @@ static post_plugin_t *deinterlace_open_plugin(post_class_t *class_gen, int input this->framerate_mode = 0; this->judder_correction = 1; this->use_progressive_frame_flag = 1; - this->chroma_filter = 1; + this->chroma_filter = 0; this->framecounter = 0; memset( &this->recent_frame, 0, sizeof(this->recent_frame) ); -- cgit v1.2.3