diff options
author | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2003-06-22 15:42:07 +0000 |
---|---|---|
committer | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2003-06-22 15:42:07 +0000 |
commit | 88ff161b2a4e44854f934adf82b0d1d872e20bac (patch) | |
tree | d4067ae6045e5727649cc437c1813b285c75a3c7 /src/post/deinterlace/speedy.c | |
parent | 8531aac55030077b509f4db3cb5c85f04b08f7a5 (diff) | |
download | xine-lib-88ff161b2a4e44854f934adf82b0d1d872e20bac.tar.gz xine-lib-88ff161b2a4e44854f934adf82b0d1d872e20bac.tar.bz2 |
- disable chroma filter by default
- sync to tvtime cvs (speedy.c, speedy.h)
CVS patchset: 5087
CVS date: 2003/06/22 15:42:07
Diffstat (limited to 'src/post/deinterlace/speedy.c')
-rw-r--r-- | src/post/deinterlace/speedy.c | 491 |
1 files changed, 364 insertions, 127 deletions
diff --git a/src/post/deinterlace/speedy.c b/src/post/deinterlace/speedy.c index 9ad929679..a49bf776d 100644 --- a/src/post/deinterlace/speedy.c +++ b/src/post/deinterlace/speedy.c @@ -17,16 +17,40 @@ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/** + * Includes 420to422, 422to444 scaling filters from the MPEG2 reference + * implementation. The v12 source code indicates that they were written + * by Cheung Auyeung <auyeung@mot.com>. The file they were in was: + * + * store.c, picture output routines + * Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. + * + * Disclaimer of Warranty + * + * These software programs are available to the user without any license fee or + * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims + * any and all warranties, whether express, implied, or statuary, including any + * implied warranties or merchantability or of fitness for a particular + * purpose. In no event shall the copyright-holder be liable for any + * incidental, punitive, or consequential damages of any kind whatsoever + * arising from the use of these programs. + * + * This disclaimer of warranty extends to the user of these programs and user's + * customers, employees, agents, transferees, successors, and assigns. + * + * The MPEG Software Simulation Group does not represent or warrant that the + * programs furnished hereunder are free of infringement of any third-party + * patents. + * + * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware, + * are subject to royalty fees to patent holders. Many of these patents are + * general enough such that they are unavoidable regardless of implementation + * design. + * + */ + #include <stdio.h> #include <string.h> -#include <sys/time.h> -#include <ctype.h> -#include <unistd.h> -#if defined (__SVR4) && defined (__sun) -# include <sys/int_types.h> -#else -# include <stdint.h> -#endif #ifdef HAVE_CONFIG_H # include "config.h" @@ -70,10 +94,6 @@ void (*filter_luma_14641_packed422_inplace_scanline)( uint8_t *data, int width ) unsigned int (*diff_factor_packed422_scanline)( uint8_t *cur, uint8_t *old, int width ); unsigned int (*comb_factor_packed422_scanline)( uint8_t *top, uint8_t *mid, uint8_t *bot, int width ); -void (*vfilter_chroma_121_packed422_scanline)( uint8_t *output, int width, - uint8_t *m, uint8_t *t, uint8_t *b); -void (*vfilter_chroma_332_packed422_scanline)( uint8_t *output, int width, - uint8_t *m, uint8_t *t, uint8_t *b); void (*kill_chroma_packed422_inplace_scanline)( uint8_t *data, int width ); void (*mirror_packed422_inplace_scanline)( uint8_t *data, int width ); void (*halfmirror_packed422_inplace_scanline)( uint8_t *data, int width ); @@ -86,6 +106,44 @@ void (*quarter_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *one, uint8_t *three, int width ); void (*subpix_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *top, uint8_t *bot, int subpixpos, int width ); +void (*composite_bars_packed4444_scanline)( uint8_t *output, + uint8_t *background, int width, + int a, int luma, int cb, int cr, + int percentage ); +void (*packed444_to_nonpremultiplied_packed4444_scanline)( uint8_t *output, + uint8_t *input, + int width, int alpha ); +void (*aspect_adjust_packed4444_scanline)( uint8_t *output, + uint8_t *input, + int width, + double pixel_aspect ); +void (*packed444_to_packed422_scanline)( uint8_t *output, + uint8_t *input, + int width ); +void (*packed422_to_packed444_scanline)( uint8_t *output, + uint8_t *input, + int width ); +void (*packed422_to_packed444_rec601_scanline)( uint8_t *dest, + uint8_t *src, + int width ); +void (*packed444_to_rgb24_rec601_scanline)( uint8_t *output, + uint8_t *input, + int width ); +void (*rgb24_to_packed444_rec601_scanline)( uint8_t *output, + uint8_t *input, + int width ); +void (*rgba32_to_packed4444_rec601_scanline)( uint8_t *output, + uint8_t *input, + int width ); +void (*chroma_422_to_444_mpeg2_plane)( uint8_t *dst, uint8_t *src, + int width, int height ); +void (*chroma_420_to_422_mpeg2_plane)( uint8_t *dst, uint8_t *src, + int width, int height, int progressive ); +void (*invert_colour_packed422_inplace_scanline)( uint8_t *data, int width ); +void (*vfilter_chroma_121_packed422_scanline)( uint8_t *output, int width, + uint8_t *m, uint8_t *t, uint8_t *b ); +void (*vfilter_chroma_332_packed422_scanline)( uint8_t *output, int width, + uint8_t *m, uint8_t *t, uint8_t *b ); /** @@ -114,10 +172,10 @@ static inline __attribute__ ((always_inline,const)) uint8_t clip255( int x ) static unsigned long CombJaggieThreshold = 73; +#ifdef ARCH_X86 static unsigned int comb_factor_packed422_scanline_mmx( uint8_t *top, uint8_t *mid, - uint8_t *bot, int width ) + uint8_t *bot, int width ) { -#ifdef ARCH_X86 const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL }; const mmx_t qwOnes = { 0x0001000100010001ULL }; mmx_t qwThreshold; @@ -193,10 +251,8 @@ static unsigned int comb_factor_packed422_scanline_mmx( uint8_t *top, uint8_t *m emms(); return temp1; -#else - return 0; -#endif } +#endif static unsigned long BitShift = 6; @@ -220,6 +276,7 @@ static unsigned int diff_factor_packed422_scanline_c( uint8_t *cur, uint8_t *old return ret; } +/* static unsigned int diff_factor_packed422_scanline_test_c( uint8_t *cur, uint8_t *old, int width ) { unsigned int ret = 0; @@ -239,11 +296,11 @@ static unsigned int diff_factor_packed422_scanline_test_c( uint8_t *cur, uint8_t return ret; } +*/ - +#ifdef ARCH_X86 static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *old, int width ) { -#ifdef ARCH_X86 const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL }; unsigned int temp1, temp2; @@ -277,17 +334,15 @@ static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *o emms(); return temp1; -#else - return 0; -#endif } +#endif #define ABS(a) (((a) < 0)?-(a):(a)) +#ifdef ARCH_X86 static void diff_packed422_block8x8_mmx( pulldown_metrics_t *m, uint8_t *old, - uint8_t *new, int os, int ns ) + uint8_t *new, int os, int ns ) { -#ifdef ARCH_X86 const mmx_t ymask = { 0x00ff00ff00ff00ffULL }; short out[ 24 ]; /* Output buffer for the partial metrics from the mmx code. */ uint8_t *outdata = (uint8_t *) out; @@ -426,11 +481,11 @@ static void diff_packed422_block8x8_mmx( pulldown_metrics_t *m, uint8_t *old, } emms(); -#endif } +#endif static void diff_packed422_block8x8_c( pulldown_metrics_t *m, uint8_t *old, - uint8_t *new, int os, int ns ) + uint8_t *new, int os, int ns ) { int x, y, e=0, o=0, s=0, p=0, t=0; uint8_t *oldp, *newp; @@ -491,7 +546,7 @@ static void packed422_to_packed444_scanline_c( uint8_t *output, uint8_t *input, * * [-1 3 -6 12 -24 80 80 -24 12 -6 3 -1] */ -void packed422_to_packed444_rec601_scanline( uint8_t *dest, uint8_t *src, int width ) +static void packed422_to_packed444_rec601_scanline_c( uint8_t *dest, uint8_t *src, int width ) { int i; @@ -525,10 +580,10 @@ void packed422_to_packed444_rec601_scanline( uint8_t *dest, uint8_t *src, int wi } } +#ifdef ARCH_X86 static void vfilter_chroma_121_packed422_scanline_mmx( uint8_t *output, int width, - uint8_t *m, uint8_t *t, uint8_t *b) + uint8_t *m, uint8_t *t, uint8_t *b ) { -#ifdef ARCH_X86 int i; const mmx_t ymask = { 0x00ff00ff00ff00ffULL }; const mmx_t cmask = { 0xff00ff00ff00ff00ULL }; @@ -561,7 +616,7 @@ static void vfilter_chroma_121_packed422_scanline_mmx( uint8_t *output, int widt paddw_r2r( mm1, mm2 ); psllw_i2r( 6, mm2 ); - pand_r2r( mm6, mm2 ); + pand_r2r( mm6, mm2 ); por_r2r ( mm3, mm2 ); @@ -576,33 +631,31 @@ static void vfilter_chroma_121_packed422_scanline_mmx( uint8_t *output, int widt *output = (*t + *b + (*m << 1)) >> 2; output+=2; t+=2; b+=2; m+=2; } - + emms(); -#endif } - +#endif static void vfilter_chroma_121_packed422_scanline_c( uint8_t *output, int width, - uint8_t *m, uint8_t *t, uint8_t *b) + uint8_t *m, uint8_t *t, uint8_t *b ) { output++; t++; b++; m++; while( width-- ) { *output = (*t + *b + (*m << 1)) >> 2; - output+=2; t+=2; b+=2; m+=2; + output +=2; t+=2; b+=2; m+=2; } } - +#ifdef ARCH_X86 static void vfilter_chroma_332_packed422_scanline_mmx( uint8_t *output, int width, - uint8_t *m, uint8_t *t, uint8_t *b) + uint8_t *m, uint8_t *t, uint8_t *b ) { -#ifdef ARCH_X86 int i; const mmx_t ymask = { 0x00ff00ff00ff00ffULL }; const mmx_t cmask = { 0xff00ff00ff00ff00ULL }; // Get width in bytes. - width *= 2; + width *= 2; i = width / 8; width -= i * 8; @@ -650,26 +703,26 @@ static void vfilter_chroma_332_packed422_scanline_mmx( uint8_t *output, int widt output++; t++; b++; m++; while( width-- ) { *output = (3 * *t + 3 * *m + 2 * *b) >> 3; - output+=2; t+=2; b+=2; m+=2; + output +=2; t+=2; b+=2; m+=2; } emms(); -#endif } +#endif static void vfilter_chroma_332_packed422_scanline_c( uint8_t *output, int width, - uint8_t *m, uint8_t *t, uint8_t *b) + uint8_t *m, uint8_t *t, uint8_t *b ) { output++; t++; b++; m++; while( width-- ) { *output = (3 * *t + 3 * *m + 2 * *b) >> 3; - output+=2; t+=2; b+=2; m+=2; + output +=2; t+=2; b+=2; m+=2; } } +#ifdef ARCH_X86 static void kill_chroma_packed422_inplace_scanline_mmx( uint8_t *data, int width ) { -#ifdef ARCH_X86 const mmx_t ymask = { 0x00ff00ff00ff00ffULL }; const mmx_t nullchroma = { 0x8000800080008000ULL }; @@ -688,8 +741,8 @@ static void kill_chroma_packed422_inplace_scanline_mmx( uint8_t *data, int width data[ 1 ] = 128; data += 2; } -#endif } +#endif static void kill_chroma_packed422_inplace_scanline_c( uint8_t *data, int width ) { @@ -699,11 +752,43 @@ static void kill_chroma_packed422_inplace_scanline_c( uint8_t *data, int width ) } } +#ifdef ARCH_X86 +static void invert_colour_packed422_inplace_scanline_mmx( uint8_t *data, int width ) +{ + const mmx_t allones = { 0xffffffffffffffffULL }; + + movq_m2r( allones, mm1 ); + for(; width > 4; width -= 4 ) { + movq_r2r( mm1, mm2 ); + movq_m2r( *data, mm0 ); + psubb_r2r( mm0, mm2 ); + movq_r2m( mm2, *data ); + data += 8; + } + emms(); + + width *= 2; + while( width-- ) { + *data = 255 - *data; + data++; + } +} +#endif + +static void invert_colour_packed422_inplace_scanline_c( uint8_t *data, int width ) +{ + width *= 2; + while( width-- ) { + *data = 255 - *data; + data++; + } +} + /* // this duplicates alternate lines in alternate frames to highlight or mute // the effects of chroma crawl. it is not a solution or proper filter. it's // only for testing. -static void testing_packed422_inplace_scanline_c( uint8_t *data, int width, int scanline ) +void testing_packed422_inplace_scanline_c( uint8_t *data, int width, int scanline ) { volatile static int topbottom = 0; static uint8_t scanbuffer[2048]; @@ -785,7 +870,7 @@ static void filter_luma_14641_packed422_inplace_scanline_c( uint8_t *data, int w } static void interpolate_packed422_scanline_c( uint8_t *output, uint8_t *top, - uint8_t *bot, int width ) + uint8_t *bot, int width ) { int i; @@ -794,10 +879,10 @@ static void interpolate_packed422_scanline_c( uint8_t *output, uint8_t *top, } } +#ifdef ARCH_X86 static void interpolate_packed422_scanline_mmx( uint8_t *output, uint8_t *top, - uint8_t *bot, int width ) + uint8_t *bot, int width ) { -#ifdef ARCH_X86 const mmx_t shiftmask = { 0xfefffefffefffeffULL }; /* To avoid shifting chroma to luma. */ int i; @@ -861,13 +946,13 @@ static void interpolate_packed422_scanline_mmx( uint8_t *output, uint8_t *top, } emms(); -#endif } +#endif +#ifdef ARCH_X86 static void interpolate_packed422_scanline_mmxext( uint8_t *output, uint8_t *top, - uint8_t *bot, int width ) + uint8_t *bot, int width ) { -#ifdef ARCH_X86 int i; for( i = width/16; i; --i ) { @@ -911,8 +996,8 @@ static void interpolate_packed422_scanline_mmxext( uint8_t *output, uint8_t *top sfence(); emms(); -#endif } +#endif static void blit_colour_packed422_scanline_c( uint8_t *output, int width, int y, int cb, int cr ) { @@ -924,9 +1009,9 @@ static void blit_colour_packed422_scanline_c( uint8_t *output, int width, int y, } } +#ifdef ARCH_X86 static void blit_colour_packed422_scanline_mmx( uint8_t *output, int width, int y, int cb, int cr ) { -#ifdef ARCH_X86 uint32_t colour = cr << 24 | y << 16 | cb << 8 | y; int i; @@ -961,12 +1046,12 @@ static void blit_colour_packed422_scanline_mmx( uint8_t *output, int width, int } emms(); -#endif } +#endif +#ifdef ARCH_X86 static void blit_colour_packed422_scanline_mmxext( uint8_t *output, int width, int y, int cb, int cr ) { -#ifdef ARCH_X86 uint32_t colour = cr << 24 | y << 16 | cb << 8 | y; int i; @@ -1002,11 +1087,11 @@ static void blit_colour_packed422_scanline_mmxext( uint8_t *output, int width, i sfence(); emms(); -#endif } +#endif static void blit_colour_packed4444_scanline_c( uint8_t *output, int width, - int alpha, int luma, int cb, int cr ) + int alpha, int luma, int cb, int cr ) { int j; @@ -1018,11 +1103,11 @@ static void blit_colour_packed4444_scanline_c( uint8_t *output, int width, } } +#ifdef ARCH_X86 static void blit_colour_packed4444_scanline_mmx( uint8_t *output, int width, - int alpha, int luma, - int cb, int cr ) + int alpha, int luma, + int cb, int cr ) { -#ifdef ARCH_X86 uint32_t colour = (cr << 24) | (cb << 16) | (luma << 8) | alpha; int i; @@ -1052,14 +1137,14 @@ static void blit_colour_packed4444_scanline_mmx( uint8_t *output, int width, } emms(); -#endif } +#endif -void blit_colour_packed4444_scanline_mmxext( uint8_t *output, int width, - int alpha, int luma, - int cb, int cr ) -{ #ifdef ARCH_X86 +static void blit_colour_packed4444_scanline_mmxext( uint8_t *output, int width, + int alpha, int luma, + int cb, int cr ) +{ uint32_t colour = (cr << 24) | (cb << 16) | (luma << 8) | alpha; int i; @@ -1090,8 +1175,8 @@ void blit_colour_packed4444_scanline_mmxext( uint8_t *output, int width, sfence(); emms(); -#endif } +#endif #define speedy_memcpy_c xine_fast_memcpy @@ -1104,18 +1189,22 @@ static void blit_packed422_scanline_c( uint8_t *dest, const uint8_t *src, int wi speedy_memcpy_c( dest, src, width*2 ); } +#ifdef ARCH_X86 static void blit_packed422_scanline_mmx( uint8_t *dest, const uint8_t *src, int width ) { speedy_memcpy_mmx( dest, src, width*2 ); } +#endif +#ifdef ARCH_X86 static void blit_packed422_scanline_mmxext( uint8_t *dest, const uint8_t *src, int width ) { speedy_memcpy_mmxext( dest, src, width*2 ); } +#endif static void composite_packed4444_alpha_to_packed422_scanline_c( uint8_t *output, uint8_t *input, - uint8_t *foreground, int width, int alpha ) + uint8_t *foreground, int width, int alpha ) { int i; @@ -1172,12 +1261,12 @@ static void composite_packed4444_alpha_to_packed422_scanline_c( uint8_t *output, } } +#ifdef ARCH_X86 static void composite_packed4444_alpha_to_packed422_scanline_mmxext( uint8_t *output, - uint8_t *input, - uint8_t *foreground, - int width, int alpha ) + uint8_t *input, + uint8_t *foreground, + int width, int alpha ) { -#ifdef ARCH_X86 const mmx_t alpha2 = { 0x0000FFFF00000000ULL }; const mmx_t alpha1 = { 0xFFFF0000FFFFFFFFULL }; const mmx_t round = { 0x0080008000800080ULL }; @@ -1271,11 +1360,11 @@ static void composite_packed4444_alpha_to_packed422_scanline_mmxext( uint8_t *ou } sfence(); emms(); -#endif } +#endif static void composite_packed4444_to_packed422_scanline_c( uint8_t *output, uint8_t *input, - uint8_t *foreground, int width ) + uint8_t *foreground, int width ) { int i; for( i = 0; i < width; i++ ) { @@ -1314,10 +1403,10 @@ static void composite_packed4444_to_packed422_scanline_c( uint8_t *output, uint8 } +#ifdef ARCH_X86 static void composite_packed4444_to_packed422_scanline_mmxext( uint8_t *output, uint8_t *input, - uint8_t *foreground, int width ) + uint8_t *foreground, int width ) { -#ifdef ARCH_X86 const mmx_t alpha2 = { 0x0000FFFF00000000ULL }; const mmx_t alpha1 = { 0xFFFF0000FFFFFFFFULL }; const mmx_t round = { 0x0080008000800080ULL }; @@ -1406,8 +1495,8 @@ static void composite_packed4444_to_packed422_scanline_mmxext( uint8_t *output, } sfence(); emms(); -#endif } +#endif /** * um... just need some scrap paper... @@ -1418,11 +1507,11 @@ static void composite_packed4444_to_packed422_scanline_mmxext( uint8_t *output, * Da = (1 - a)*b + a */ static void composite_alphamask_to_packed4444_scanline_c( uint8_t *output, - uint8_t *input, - uint8_t *mask, - int width, - int textluma, int textcb, - int textcr ) + uint8_t *input, + uint8_t *mask, + int width, + int textluma, int textcb, + int textcr ) { uint32_t opaque = (textcr << 24) | (textcb << 16) | (textluma << 8) | 0xff; int i; @@ -1448,14 +1537,14 @@ static void composite_alphamask_to_packed4444_scanline_c( uint8_t *output, } } +#ifdef ARCH_X86 static void composite_alphamask_to_packed4444_scanline_mmxext( uint8_t *output, - uint8_t *input, - uint8_t *mask, - int width, - int textluma, int textcb, - int textcr ) + uint8_t *input, + uint8_t *mask, + int width, + int textluma, int textcb, + int textcr ) { -#ifdef ARCH_X86 uint32_t opaque = (textcr << 24) | (textcb << 16) | (textluma << 8) | 0xff; const mmx_t round = { 0x0080008000800080ULL }; const mmx_t fullalpha = { 0x00000000000000ffULL }; @@ -1544,14 +1633,14 @@ static void composite_alphamask_to_packed4444_scanline_mmxext( uint8_t *output, } sfence(); emms(); -#endif } +#endif static void composite_alphamask_alpha_to_packed4444_scanline_c( uint8_t *output, - uint8_t *input, - uint8_t *mask, int width, - int textluma, int textcb, - int textcr, int alpha ) + uint8_t *input, + uint8_t *mask, int width, + int textluma, int textcb, + int textcr, int alpha ) { uint32_t opaque = (textcr << 24) | (textcb << 16) | (textluma << 8) | 0xff; int i; @@ -1596,9 +1685,9 @@ static void premultiply_packed4444_scanline_c( uint8_t *output, uint8_t *input, } } +#ifdef ARCH_X86 static void premultiply_packed4444_scanline_mmxext( uint8_t *output, uint8_t *input, int width ) { -#ifdef ARCH_X86 const mmx_t round = { 0x0080008000800080ULL }; const mmx_t alpha = { 0x00000000000000ffULL }; const mmx_t noalp = { 0xffffffffffff0000ULL }; @@ -1632,11 +1721,11 @@ static void premultiply_packed4444_scanline_mmxext( uint8_t *output, uint8_t *in } sfence(); emms(); -#endif } +#endif static void blend_packed422_scanline_c( uint8_t *output, uint8_t *src1, - uint8_t *src2, int width, int pos ) + uint8_t *src2, int width, int pos ) { if( pos == 0 ) { blit_packed422_scanline( output, src1, width ); @@ -1652,10 +1741,10 @@ static void blend_packed422_scanline_c( uint8_t *output, uint8_t *src1, } } +#ifdef ARCH_X86 static void blend_packed422_scanline_mmxext( uint8_t *output, uint8_t *src1, - uint8_t *src2, int width, int pos ) + uint8_t *src2, int width, int pos ) { -#ifdef ARCH_X86 if( pos <= 0 ) { blit_packed422_scanline( output, src1, width ); } else if( pos >= 256 ) { @@ -1694,13 +1783,13 @@ static void blend_packed422_scanline_mmxext( uint8_t *output, uint8_t *src1, sfence(); emms(); } -#endif } +#endif +#ifdef ARCH_X86 static void quarter_blit_vertical_packed422_scanline_mmxext( uint8_t *output, uint8_t *one, - uint8_t *three, int width ) + uint8_t *three, int width ) { -#ifdef ARCH_X86 int i; for( i = width/16; i; --i ) { @@ -1751,12 +1840,12 @@ static void quarter_blit_vertical_packed422_scanline_mmxext( uint8_t *output, ui sfence(); emms(); -#endif } +#endif static void quarter_blit_vertical_packed422_scanline_c( uint8_t *output, uint8_t *one, - uint8_t *three, int width ) + uint8_t *three, int width ) { width *= 2; while( width-- ) { @@ -1767,7 +1856,7 @@ static void quarter_blit_vertical_packed422_scanline_c( uint8_t *output, uint8_t } static void subpix_blit_vertical_packed422_scanline_c( uint8_t *output, uint8_t *top, - uint8_t *bot, int subpixpos, int width ) + uint8_t *bot, int subpixpos, int width ) { if( subpixpos == 32768 ) { interpolate_packed422_scanline( output, top, bot, width ); @@ -1786,7 +1875,7 @@ static void subpix_blit_vertical_packed422_scanline_c( uint8_t *output, uint8_t } static void a8_subpix_blit_scanline_c( uint8_t *output, uint8_t *input, - int lasta, int startpos, int width ) + int lasta, int startpos, int width ) { int pos = 0xffff - (startpos & 0xffff); int prev = lasta; @@ -1934,7 +2023,7 @@ static void init_YCbCr_to_RGB_tables(void) conv_YR_inited = 1; } -void rgb24_to_packed444_rec601_scanline( uint8_t *output, uint8_t *input, int width ) +static void rgb24_to_packed444_rec601_scanline_c( uint8_t *output, uint8_t *input, int width ) { if( !conv_RY_inited ) init_RGB_to_YCbCr_tables(); @@ -1951,7 +2040,7 @@ void rgb24_to_packed444_rec601_scanline( uint8_t *output, uint8_t *input, int wi } } -void rgba32_to_packed4444_rec601_scanline( uint8_t *output, uint8_t *input, int width ) +static void rgba32_to_packed4444_rec601_scanline_c( uint8_t *output, uint8_t *input, int width ) { if( !conv_RY_inited ) init_RGB_to_YCbCr_tables(); @@ -1970,7 +2059,7 @@ void rgba32_to_packed4444_rec601_scanline( uint8_t *output, uint8_t *input, int } } -void packed444_to_rgb24_rec601_scanline( uint8_t *output, uint8_t *input, int width ) +static void packed444_to_rgb24_rec601_scanline_c( uint8_t *output, uint8_t *input, int width ) { if( !conv_YR_inited ) init_YCbCr_to_RGB_tables(); @@ -2006,7 +2095,8 @@ void packed444_to_rgb24_rec601_scanline( uint8_t *output, uint8_t *input, int wi * B-Y' = -0.299*R' - 0.587*G' + 0.886*B' * R-Y' = 0.701*R' - 0.587*G' - 0.114*B' */ -void packed444_to_rgb24_rec601_reference_scanline( uint8_t *output, uint8_t *input, int width ) +/* +static void packed444_to_rgb24_rec601_reference_scanline( uint8_t *output, uint8_t *input, int width ) { while( width-- ) { double yp = (((double) input[ 0 ]) - 16.0) / 255.0; @@ -2030,10 +2120,11 @@ void packed444_to_rgb24_rec601_reference_scanline( uint8_t *output, uint8_t *inp input += 3; } } +*/ -void packed444_to_nonpremultiplied_packed4444_scanline( uint8_t *output, - uint8_t *input, - int width, int alpha ) +static void packed444_to_nonpremultiplied_packed4444_scanline_c( uint8_t *output, + uint8_t *input, + int width, int alpha ) { int i; @@ -2046,13 +2137,12 @@ void packed444_to_nonpremultiplied_packed4444_scanline( uint8_t *output, output += 4; input += 3; } - } -void aspect_adjust_packed4444_scanline( uint8_t *output, - uint8_t *input, - int width, - double pixel_aspect ) +static void aspect_adjust_packed4444_scanline_c( uint8_t *output, + uint8_t *input, + int width, + double pixel_aspect ) { double i; int prev_i = 0; @@ -2098,10 +2188,10 @@ void aspect_adjust_packed4444_scanline( uint8_t *output, /** * Sub-pixel data bar renderer. There are 128 bars. */ -void composite_bars_packed4444_scanline( uint8_t *output, - uint8_t *background, int width, - int a, int luma, int cb, int cr, - int percentage ) +static void composite_bars_packed4444_scanline_c( uint8_t *output, + uint8_t *background, int width, + int a, int luma, int cb, int cr, + int percentage ) { /** * This is the size of both the bar and the spacing in between in subpixel @@ -2141,6 +2231,139 @@ void composite_bars_packed4444_scanline( uint8_t *output, } } +/* horizontal 1:2 interpolation filter */ +static void chroma_422_to_444_mpeg2_plane_c( uint8_t *dst, uint8_t *src, int width, int height ) +{ + int i, i2, w, j, im2, im1, ip1, ip2, ip3; + + w = width / 2; + + for( j = 0; j < height; j++ ) { + for( i = 0; i < w; i++ ) { + i2 = i << 1; + im2 = (i<2) ? 0 : i-2; + im1 = (i<1) ? 0 : i-1; + ip1 = (i<w-1) ? i+1 : w-1; + ip2 = (i<w-2) ? i+2 : w-1; + ip3 = (i<w-3) ? i+3 : w-1; + + /* FIR filter coefficients (*256): 21 0 -52 0 159 256 159 0 -52 0 21 */ + /* even samples (0 0 256 0 0) */ + dst[ i2 ] = src[i]; + + /* odd samples (21 -52 159 159 -52 21) */ + dst[ i2 + 1 ] = clip255( ( 21*(src[im2]+src[ip3]) + - 52*(src[im1]+src[ip2]) + + 159*(src[i]+src[ip1]) + 128 ) >> 8 ); + } + src += w; + dst += width; + } +} + +/* vertical 1:2 interpolation filter */ +static void chroma_420_to_422_mpeg2_plane_c( uint8_t *dst, uint8_t *src, + int width, int height, int progressive ) +{ + int w, h, i, j, j2; + int jm6, jm5, jm4, jm3, jm2, jm1, jp1, jp2, jp3, jp4, jp5, jp6, jp7; + + w = width / 2; + h = height / 2; + + if( progressive ) { + /* intra frame */ + for( i = 0; i < w; i++ ) { + for( j = 0; j < h; j++ ) { + j2 = j << 1; + jm3 = (j<3) ? 0 : j-3; + jm2 = (j<2) ? 0 : j-2; + jm1 = (j<1) ? 0 : j-1; + jp1 = (j<h-1) ? j+1 : h-1; + jp2 = (j<h-2) ? j+2 : h-1; + jp3 = (j<h-3) ? j+3 : h-1; + + /* FIR filter coefficients (*256): 5 -21 70 228 -37 11 */ + /* New FIR filter coefficients (*256): 3 -16 67 227 -32 7 */ + dst[w*j2] = clip255( ( 3*src[w*jm3] + - 16*src[w*jm2] + + 67*src[w*jm1] + + 227*src[w*j] + - 32*src[w*jp1] + + 7*src[w*jp2] + 128 ) >> 8 ); + + dst[w*(j2+1)] = clip255( ( 3*src[w*jp3] + - 16*src[w*jp2] + + 67*src[w*jp1] + + 227*src[w*j] + - 32*src[w*jm1] + + 7*src[w*jm2] + 128 ) >> 8 ); + } + src++; + dst++; + } + } else { + /* intra field */ + for( i = 0; i < w; i++ ) { + for( j = 0; j < h; j += 2 ) { + j2 = j << 1; + + /* top field */ + jm6 = (j<6) ? 0 : j-6; + jm4 = (j<4) ? 0 : j-4; + jm2 = (j<2) ? 0 : j-2; + jp2 = (j<h-2) ? j+2 : h-2; + jp4 = (j<h-4) ? j+4 : h-2; + jp6 = (j<h-6) ? j+6 : h-2; + + /* Polyphase FIR filter coefficients (*256): 2 -10 35 242 -18 5 */ + /* New polyphase FIR filter coefficients (*256): 1 -7 30 248 -21 5 */ + dst[w*j2] = clip255( ( 1*src[w*jm6] + - 7*src[w*jm4] + + 30*src[w*jm2] + + 248*src[w*j] + - 21*src[w*jp2] + + 5*src[w*jp4] + 128 ) >> 8 ); + + /* Polyphase FIR filter coefficients (*256): 11 -38 192 113 -30 8 */ + /* New polyphase FIR filter coefficients (*256):7 -35 194 110 -24 4 */ + dst[w*(j2+2)] = clip255( ( 7*src[w*jm4] + -35*src[w*jm2] + +194*src[w*j] + +110*src[w*jp2] + -24*src[w*jp4] + +4*src[w*jp6] + 128 ) >> 8 ); + + /* bottom field */ + jm5 = (j<5) ? 1 : j-5; + jm3 = (j<3) ? 1 : j-3; + jm1 = (j<1) ? 1 : j-1; + jp1 = (j<h-1) ? j+1 : h-1; + jp3 = (j<h-3) ? j+3 : h-1; + jp5 = (j<h-5) ? j+5 : h-1; + jp7 = (j<h-7) ? j+7 : h-1; + + /* Polyphase FIR filter coefficients (*256): 11 -38 192 113 -30 8 */ + /* New polyphase FIR filter coefficients (*256):7 -35 194 110 -24 4 */ + dst[w*(j2+1)] = clip255( ( 7*src[w*jp5] + -35*src[w*jp3] + +194*src[w*jp1] + +110*src[w*jm1] + -24*src[w*jm3] + +4*src[w*jm5] + 128 ) >> 8 ); + + dst[w*(j2+3)] = clip255( ( 1*src[w*jp7] + -7*src[w*jp5] + +30*src[w*jp3] + +248*src[w*jp1] + -21*src[w*jm1] + +5*src[w*jm3] + 128 ) >> 8 ); + } + src++; + dst++; + } + } +} static uint32_t speedy_accel; @@ -2162,8 +2385,6 @@ void setup_speedy_calls( uint32_t accel, int verbose ) filter_luma_14641_packed422_inplace_scanline = filter_luma_14641_packed422_inplace_scanline_c; comb_factor_packed422_scanline = 0; diff_factor_packed422_scanline = diff_factor_packed422_scanline_c; - vfilter_chroma_121_packed422_scanline = vfilter_chroma_121_packed422_scanline_c; - vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_c; kill_chroma_packed422_inplace_scanline = kill_chroma_packed422_inplace_scanline_c; mirror_packed422_inplace_scanline = mirror_packed422_inplace_scanline_c; halfmirror_packed422_inplace_scanline = halfmirror_packed422_inplace_scanline_c; @@ -2172,6 +2393,20 @@ void setup_speedy_calls( uint32_t accel, int verbose ) a8_subpix_blit_scanline = a8_subpix_blit_scanline_c; quarter_blit_vertical_packed422_scanline = quarter_blit_vertical_packed422_scanline_c; subpix_blit_vertical_packed422_scanline = subpix_blit_vertical_packed422_scanline_c; + composite_bars_packed4444_scanline = composite_bars_packed4444_scanline_c; + packed444_to_nonpremultiplied_packed4444_scanline = packed444_to_nonpremultiplied_packed4444_scanline_c; + aspect_adjust_packed4444_scanline = aspect_adjust_packed4444_scanline_c; + packed444_to_packed422_scanline = packed444_to_packed422_scanline_c; + packed422_to_packed444_scanline = packed422_to_packed444_scanline_c; + packed422_to_packed444_rec601_scanline = packed422_to_packed444_rec601_scanline_c; + packed444_to_rgb24_rec601_scanline = packed444_to_rgb24_rec601_scanline_c; + rgb24_to_packed444_rec601_scanline = rgb24_to_packed444_rec601_scanline_c; + rgba32_to_packed4444_rec601_scanline = rgba32_to_packed4444_rec601_scanline_c; + chroma_422_to_444_mpeg2_plane = chroma_422_to_444_mpeg2_plane_c; + chroma_420_to_422_mpeg2_plane = chroma_420_to_422_mpeg2_plane_c; + invert_colour_packed422_inplace_scanline = invert_colour_packed422_inplace_scanline_c; + vfilter_chroma_121_packed422_scanline = vfilter_chroma_121_packed422_scanline_c; + vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_c; #ifdef ARCH_X86 if( speedy_accel & MM_ACCEL_X86_MMXEXT ) { @@ -2186,14 +2421,15 @@ void setup_speedy_calls( uint32_t accel, int verbose ) composite_packed4444_alpha_to_packed422_scanline = composite_packed4444_alpha_to_packed422_scanline_mmxext; composite_alphamask_to_packed4444_scanline = composite_alphamask_to_packed4444_scanline_mmxext; premultiply_packed4444_scanline = premultiply_packed4444_scanline_mmxext; - vfilter_chroma_121_packed422_scanline = vfilter_chroma_121_packed422_scanline_mmx; - vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_mmx; kill_chroma_packed422_inplace_scanline = kill_chroma_packed422_inplace_scanline_mmx; blend_packed422_scanline = blend_packed422_scanline_mmxext; diff_factor_packed422_scanline = diff_factor_packed422_scanline_mmx; comb_factor_packed422_scanline = comb_factor_packed422_scanline_mmx; diff_packed422_block8x8 = diff_packed422_block8x8_mmx; quarter_blit_vertical_packed422_scanline = quarter_blit_vertical_packed422_scanline_mmxext; + invert_colour_packed422_inplace_scanline = invert_colour_packed422_inplace_scanline_mmx; + vfilter_chroma_121_packed422_scanline = vfilter_chroma_121_packed422_scanline_mmx; + vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_mmx; speedy_memcpy = speedy_memcpy_mmxext; } else if( speedy_accel & MM_ACCEL_X86_MMX ) { if( verbose ) { @@ -2205,10 +2441,11 @@ void setup_speedy_calls( uint32_t accel, int verbose ) blit_packed422_scanline = blit_packed422_scanline_mmx; diff_factor_packed422_scanline = diff_factor_packed422_scanline_mmx; comb_factor_packed422_scanline = comb_factor_packed422_scanline_mmx; - vfilter_chroma_121_packed422_scanline = vfilter_chroma_121_packed422_scanline_mmx; - vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_mmx; kill_chroma_packed422_inplace_scanline = kill_chroma_packed422_inplace_scanline_mmx; diff_packed422_block8x8 = diff_packed422_block8x8_mmx; + invert_colour_packed422_inplace_scanline = invert_colour_packed422_inplace_scanline_mmx; + vfilter_chroma_121_packed422_scanline = vfilter_chroma_121_packed422_scanline_mmx; + vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_mmx; speedy_memcpy = speedy_memcpy_mmx; } else { if( verbose ) { |