summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/post/deinterlace/deinterlace.c37
-rw-r--r--src/post/deinterlace/speedy.c579
-rw-r--r--src/post/deinterlace/speedy.h10
-rw-r--r--src/post/deinterlace/xine_plugin.c4
4 files changed, 426 insertions, 204 deletions
diff --git a/src/post/deinterlace/deinterlace.c b/src/post/deinterlace/deinterlace.c
index f797f28cf..60f5b50ab 100644
--- a/src/post/deinterlace/deinterlace.c
+++ b/src/post/deinterlace/deinterlace.c
@@ -30,24 +30,19 @@ struct methodlist_item_s
};
static methodlist_item_t *methodlist = 0;
-static initialized = 0;
void register_deinterlace_method( deinterlace_method_t *method )
{
- methodlist_item_t **dest;
-
- if( initialized )
- return;
+ methodlist_item_t **dest = &methodlist;
+ methodlist_item_t *cur = methodlist;
- if( !methodlist ) {
- dest = &methodlist;
- } else {
- methodlist_item_t *cur = methodlist;
- while( cur->next ) cur = cur->next;
+ while( cur ) {
+ if( cur->method == method ) return;
dest = &(cur->next);
+ cur = cur->next;
}
- *dest = (methodlist_item_t *) malloc( sizeof( methodlist_item_t ) );
+ *dest = malloc( sizeof( methodlist_item_t ) );
if( *dest ) {
(*dest)->method = method;
(*dest)->next = 0;
@@ -80,14 +75,27 @@ deinterlace_method_t *get_deinterlace_method( int i )
return cur->method;
}
+void register_deinterlace_plugin( const char *filename )
+{
+ void *handle = dlopen( filename, RTLD_NOW );
+
+ if( !handle ) {
+ fprintf( stderr, "deinterlace: Can't load plugin '%s': %s\n",
+ filename, dlerror() );
+ } else {
+ deinterlace_plugin_init_t plugin_init;
+ plugin_init = (deinterlace_plugin_init_t) dlsym( handle, "deinterlace_plugin_init" );
+ if( plugin_init ) {
+ plugin_init();
+ }
+ }
+}
+
void filter_deinterlace_methods( int accel, int fields_available )
{
methodlist_item_t *prev = 0;
methodlist_item_t *cur = methodlist;
- if( initialized )
- return;
-
while( cur ) {
methodlist_item_t *next = cur->next;
int drop = 0;
@@ -120,6 +128,5 @@ void filter_deinterlace_methods( int accel, int fields_available )
}
cur = next;
}
- initialized = 1;
}
diff --git a/src/post/deinterlace/speedy.c b/src/post/deinterlace/speedy.c
index 2a1e7f673..3a94cec55 100644
--- a/src/post/deinterlace/speedy.c
+++ b/src/post/deinterlace/speedy.c
@@ -17,18 +17,6 @@
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
-/*
- * Uses code from:
- *
- * linux/arch/i386/kernel/setup.c
- *
- * Copyright (C) 1995 Linus Torvalds
- *
- * Found in linux 2.4.20.
- *
- * Also helped from code in 'cpuinfo.c' found in mplayer.
- */
-
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
@@ -96,10 +84,6 @@ void (*subpix_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *top,
uint8_t *bot, int subpixpos, int width );
-#define SPEEDY_START()
-
-#define SPEEDY_END()
-
/**
* result = (1 - alpha)B + alpha*F
* = B - alpha*B + alpha*F
@@ -124,18 +108,16 @@ static inline __attribute__ ((always_inline,const)) uint8_t clip255( int x )
}
}
-#ifdef ARCH_X86
+static unsigned long CombJaggieThreshold = 73;
static unsigned int comb_factor_packed422_scanline_mmx( uint8_t *top, uint8_t *mid,
uint8_t *bot, int width )
{
+#ifdef ARCH_X86
const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL };
const mmx_t qwOnes = { 0x0001000100010001ULL };
mmx_t qwThreshold;
unsigned int temp1, temp2;
- unsigned long CombJaggieThreshold = 73;
-
- SPEEDY_START();
width /= 4;
@@ -206,12 +188,11 @@ static unsigned int comb_factor_packed422_scanline_mmx( uint8_t *top, uint8_t *m
emms();
- SPEEDY_END();
-
return temp1;
-}
-
+#else
+ return 0;
#endif
+}
static unsigned long BitShift = 6;
@@ -219,8 +200,6 @@ static unsigned int diff_factor_packed422_scanline_c( uint8_t *cur, uint8_t *old
{
unsigned int ret = 0;
- SPEEDY_START();
-
width /= 4;
while( width-- ) {
@@ -233,7 +212,6 @@ static unsigned int diff_factor_packed422_scanline_c( uint8_t *cur, uint8_t *old
cur += 8;
old += 8;
}
- SPEEDY_END();
return ret;
}
@@ -242,8 +220,6 @@ static unsigned int diff_factor_packed422_scanline_test_c( uint8_t *cur, uint8_t
{
unsigned int ret = 0;
- SPEEDY_START();
-
width /= 16;
while( width-- ) {
@@ -256,20 +232,17 @@ static unsigned int diff_factor_packed422_scanline_test_c( uint8_t *cur, uint8_t
cur += (8*4);
old += (8*4);
}
- SPEEDY_END();
return ret;
}
-#ifdef ARCH_X86
static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *old, int width )
{
+#ifdef ARCH_X86
const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL };
unsigned int temp1, temp2;
- SPEEDY_START();
-
width /= 4;
movq_m2r( qwYMask, mm1 );
@@ -299,9 +272,10 @@ static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *o
emms();
- SPEEDY_END();
-
return temp1;
+#else
+ return 0;
+#endif
}
#define ABS(a) (((a) < 0)?-(a):(a))
@@ -309,14 +283,13 @@ static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *o
static void diff_packed422_block8x8_mmx( pulldown_metrics_t *m, uint8_t *old,
uint8_t *new, int os, int ns )
{
+#ifdef ARCH_X86
const mmx_t ymask = { 0x00ff00ff00ff00ffULL };
short out[ 24 ]; /* Output buffer for the partial metrics from the mmx code. */
uint8_t *outdata = (uint8_t *) out;
uint8_t *oldp, *newp;
int i;
- SPEEDY_START();
-
pxor_r2r( mm4, mm4 ); // 4 even difference sums.
pxor_r2r( mm5, mm5 ); // 4 odd difference sums.
pxor_r2r( mm7, mm7 ); // zeros
@@ -449,11 +422,8 @@ static void diff_packed422_block8x8_mmx( pulldown_metrics_t *m, uint8_t *old,
}
emms();
-
- SPEEDY_END();
-}
-
#endif
+}
static void diff_packed422_block8x8_c( pulldown_metrics_t *m, uint8_t *old,
uint8_t *new, int os, int ns )
@@ -461,7 +431,6 @@ static void diff_packed422_block8x8_c( pulldown_metrics_t *m, uint8_t *old,
int x, y, e=0, o=0, s=0, p=0, t=0;
uint8_t *oldp, *newp;
- SPEEDY_START();
m->s = m->p = m->t = 0;
for (x = 8; x; x--) {
oldp = old; old += 2;
@@ -483,12 +452,10 @@ static void diff_packed422_block8x8_c( pulldown_metrics_t *m, uint8_t *old,
m->e = e;
m->o = o;
m->d = e+o;
- SPEEDY_END();
}
static void packed444_to_packed422_scanline_c( uint8_t *output, uint8_t *input, int width )
{
- SPEEDY_START();
width /= 2;
while( width-- ) {
output[ 0 ] = input[ 0 ];
@@ -498,12 +465,10 @@ static void packed444_to_packed422_scanline_c( uint8_t *output, uint8_t *input,
output += 4;
input += 6;
}
- SPEEDY_END();
}
static void packed422_to_packed444_scanline_c( uint8_t *output, uint8_t *input, int width )
{
- SPEEDY_START();
width /= 2;
while( width-- ) {
output[ 0 ] = input[ 0 ];
@@ -515,7 +480,6 @@ static void packed422_to_packed444_scanline_c( uint8_t *output, uint8_t *input,
output += 6;
input += 4;
}
- SPEEDY_END();
}
/**
@@ -527,7 +491,6 @@ void packed422_to_packed444_rec601_scanline( uint8_t *dest, uint8_t *src, int wi
{
int i;
- SPEEDY_START();
/* Process two input pixels at a time. Input is [Y'][Cb][Y'][Cr]. */
for( i = 0; i < width / 2; i++ ) {
dest[ (i*6) + 0 ] = src[ (i*4) + 0 ];
@@ -556,18 +519,14 @@ void packed422_to_packed444_rec601_scanline( uint8_t *dest, uint8_t *src, int wi
dest[ (i*6) + 5 ] = src[ (i*4) + 3 ];
}
}
- SPEEDY_END();
}
-#ifdef ARCH_X86
-
static void kill_chroma_packed422_inplace_scanline_mmx( uint8_t *data, int width )
{
+#ifdef ARCH_X86
const mmx_t ymask = { 0x00ff00ff00ff00ffULL };
const mmx_t nullchroma = { 0x8000800080008000ULL };
- SPEEDY_START();
-
movq_m2r( ymask, mm7 );
movq_m2r( nullchroma, mm6 );
for(; width > 4; width -= 4 ) {
@@ -583,19 +542,15 @@ static void kill_chroma_packed422_inplace_scanline_mmx( uint8_t *data, int width
data[ 1 ] = 128;
data += 2;
}
- SPEEDY_END();
-}
-
#endif
+}
static void kill_chroma_packed422_inplace_scanline_c( uint8_t *data, int width )
{
- SPEEDY_START();
while( width-- ) {
data[ 1 ] = 128;
data += 2;
}
- SPEEDY_END();
}
/*
@@ -607,7 +562,6 @@ static void testing_packed422_inplace_scanline_c( uint8_t *data, int width, int
volatile static int topbottom = 0;
static uint8_t scanbuffer[2048];
- SPEEDY_START();
if( scanline <= 1 ) {
topbottom = scanline;
memcpy(scanbuffer, data, width*2);
@@ -620,7 +574,6 @@ static void testing_packed422_inplace_scanline_c( uint8_t *data, int width, int
} else {
memcpy(scanbuffer, data, width*2);
}
- SPEEDY_END();
}
*/
@@ -629,7 +582,6 @@ static void mirror_packed422_inplace_scanline_c( uint8_t *data, int width )
int x, tmp1, tmp2;
int width2 = width*2;
- SPEEDY_START();
for( x = 0; x < width; x += 2 ) {
tmp1 = data[ x ];
tmp2 = data[ x+1 ];
@@ -638,19 +590,16 @@ static void mirror_packed422_inplace_scanline_c( uint8_t *data, int width )
data[ width2 - x ] = tmp1;
data[ width2 - x + 1 ] = tmp2;
}
- SPEEDY_END();
}
static void halfmirror_packed422_inplace_scanline_c( uint8_t *data, int width )
{
int x;
- SPEEDY_START();
for( x = 0; x < width; x += 2 ) {
data[ width + x ] = data[ width - x ];
data[ width + x + 1 ] = data[ width - x + 1 ];
}
- SPEEDY_END();
}
static void filter_luma_121_packed422_inplace_scanline_c( uint8_t *data, int width )
@@ -658,7 +607,6 @@ static void filter_luma_121_packed422_inplace_scanline_c( uint8_t *data, int wid
int r1 = 0;
int r2 = 0;
- SPEEDY_START();
data += 2;
width -= 1;
while( width-- ) {
@@ -668,7 +616,6 @@ static void filter_luma_121_packed422_inplace_scanline_c( uint8_t *data, int wid
*(data - 2) = s2 >> 2;
data += 2;
}
- SPEEDY_END();
}
static void filter_luma_14641_packed422_inplace_scanline_c( uint8_t *data, int width )
@@ -678,7 +625,6 @@ static void filter_luma_14641_packed422_inplace_scanline_c( uint8_t *data, int w
int r3 = 0;
int r4 = 0;
- SPEEDY_START();
width -= 4;
data += 4;
while( width-- ) {
@@ -690,7 +636,6 @@ static void filter_luma_14641_packed422_inplace_scanline_c( uint8_t *data, int w
*(data - 4) = s4 >> 4;
data += 2;
}
- SPEEDY_END();
}
static void interpolate_packed422_scanline_c( uint8_t *output, uint8_t *top,
@@ -698,25 +643,18 @@ static void interpolate_packed422_scanline_c( uint8_t *output, uint8_t *top,
{
int i;
- SPEEDY_START();
-
for( i = width*2; i; --i ) {
*output++ = ((*top++) + (*bot++)) >> 1;
}
-
- SPEEDY_END();
}
-#ifdef ARCH_X86
-
static void interpolate_packed422_scanline_mmx( uint8_t *output, uint8_t *top,
uint8_t *bot, int width )
{
+#ifdef ARCH_X86
const mmx_t shiftmask = { 0xfefffefffefffeffULL }; /* To avoid shifting chroma to luma. */
int i;
- SPEEDY_START();
-
for( i = width/16; i; --i ) {
movq_m2r( *bot, mm0 );
movq_m2r( *top, mm1 );
@@ -777,17 +715,15 @@ static void interpolate_packed422_scanline_mmx( uint8_t *output, uint8_t *top,
}
emms();
-
- SPEEDY_END();
+#endif
}
static void interpolate_packed422_scanline_mmxext( uint8_t *output, uint8_t *top,
uint8_t *bot, int width )
{
+#ifdef ARCH_X86
int i;
- SPEEDY_START();
-
for( i = width/16; i; --i ) {
movq_m2r( *bot, mm0 );
movq_m2r( *top, mm1 );
@@ -829,35 +765,25 @@ static void interpolate_packed422_scanline_mmxext( uint8_t *output, uint8_t *top
sfence();
emms();
-
- SPEEDY_END();
-}
-
#endif
+}
static void blit_colour_packed422_scanline_c( uint8_t *output, int width, int y, int cb, int cr )
{
uint32_t colour = cr << 24 | y << 16 | cb << 8 | y;
uint32_t *o = (uint32_t *) output;
- SPEEDY_START();
-
for( width /= 2; width; --width ) {
*o++ = colour;
}
-
- SPEEDY_END();
}
-#ifdef ARCH_X86
-
static void blit_colour_packed422_scanline_mmx( uint8_t *output, int width, int y, int cb, int cr )
{
+#ifdef ARCH_X86
uint32_t colour = cr << 24 | y << 16 | cb << 8 | y;
int i;
- SPEEDY_START();
-
movd_m2r( colour, mm1 );
movd_m2r( colour, mm2 );
psllq_i2r( 32, mm1 );
@@ -889,17 +815,15 @@ static void blit_colour_packed422_scanline_mmx( uint8_t *output, int width, int
}
emms();
-
- SPEEDY_END();
+#endif
}
static void blit_colour_packed422_scanline_mmxext( uint8_t *output, int width, int y, int cb, int cr )
{
+#ifdef ARCH_X86
uint32_t colour = cr << 24 | y << 16 | cb << 8 | y;
int i;
- SPEEDY_START();
-
movd_m2r( colour, mm1 );
movd_m2r( colour, mm2 );
psllq_i2r( 32, mm1 );
@@ -932,40 +856,30 @@ static void blit_colour_packed422_scanline_mmxext( uint8_t *output, int width, i
sfence();
emms();
-
- SPEEDY_END();
-}
-
#endif
+}
static void blit_colour_packed4444_scanline_c( uint8_t *output, int width,
int alpha, int luma, int cb, int cr )
{
int j;
- SPEEDY_START();
-
for( j = 0; j < width; j++ ) {
*output++ = alpha;
*output++ = luma;
*output++ = cb;
*output++ = cr;
}
-
- SPEEDY_END();
}
-#ifdef ARCH_X86
-
static void blit_colour_packed4444_scanline_mmx( uint8_t *output, int width,
int alpha, int luma,
int cb, int cr )
{
+#ifdef ARCH_X86
uint32_t colour = (cr << 24) | (cb << 16) | (luma << 8) | alpha;
int i;
- SPEEDY_START();
-
movd_m2r( colour, mm1 );
movd_m2r( colour, mm2 );
psllq_i2r( 32, mm1 );
@@ -992,19 +906,17 @@ static void blit_colour_packed4444_scanline_mmx( uint8_t *output, int width,
}
emms();
-
- SPEEDY_END();
+#endif
}
void blit_colour_packed4444_scanline_mmxext( uint8_t *output, int width,
int alpha, int luma,
int cb, int cr )
{
+#ifdef ARCH_X86
uint32_t colour = (cr << 24) | (cb << 16) | (luma << 8) | alpha;
int i;
- SPEEDY_START();
-
movd_m2r( colour, mm1 );
movd_m2r( colour, mm2 );
psllq_i2r( 32, mm1 );
@@ -1032,25 +944,28 @@ void blit_colour_packed4444_scanline_mmxext( uint8_t *output, int width,
sfence();
emms();
-
- SPEEDY_END();
+#endif
}
-#endif
+
+#define speedy_memcpy_c xine_fast_memcpy
+#define speedy_memcpy_mmx xine_fast_memcpy
+#define speedy_memcpy_mmxext xine_fast_memcpy
+
static void blit_packed422_scanline_c( uint8_t *dest, const uint8_t *src, int width )
{
- xine_fast_memcpy( dest, src, width*2 );
+ speedy_memcpy_c( dest, src, width*2 );
}
static void blit_packed422_scanline_mmx( uint8_t *dest, const uint8_t *src, int width )
{
- xine_fast_memcpy( dest, src, width*2 );
+ speedy_memcpy_mmx( dest, src, width*2 );
}
static void blit_packed422_scanline_mmxext( uint8_t *dest, const uint8_t *src, int width )
{
- xine_fast_memcpy( dest, src, width*2 );
+ speedy_memcpy_mmxext( dest, src, width*2 );
}
static void composite_packed4444_alpha_to_packed422_scanline_c( uint8_t *output, uint8_t *input,
@@ -1058,7 +973,6 @@ static void composite_packed4444_alpha_to_packed422_scanline_c( uint8_t *output,
{
int i;
- SPEEDY_START();
for( i = 0; i < width; i++ ) {
int af = foreground[ 0 ];
@@ -1110,16 +1024,14 @@ static void composite_packed4444_alpha_to_packed422_scanline_c( uint8_t *output,
output += 2;
input += 2;
}
- SPEEDY_END();
}
-#ifdef ARCH_X86
-
static void composite_packed4444_alpha_to_packed422_scanline_mmxext( uint8_t *output,
uint8_t *input,
uint8_t *foreground,
int width, int alpha )
{
+#ifdef ARCH_X86
const mmx_t alpha2 = { 0x0000FFFF00000000ULL };
const mmx_t alpha1 = { 0xFFFF0000FFFFFFFFULL };
const mmx_t round = { 0x0080008000800080ULL };
@@ -1135,7 +1047,6 @@ static void composite_packed4444_alpha_to_packed422_scanline_mmxext( uint8_t *ou
return;
}
- SPEEDY_START();
READ_PREFETCH_2048( input );
READ_PREFETCH_2048( foreground );
@@ -1214,17 +1125,13 @@ static void composite_packed4444_alpha_to_packed422_scanline_mmxext( uint8_t *ou
}
sfence();
emms();
-
- SPEEDY_END();
-}
-
#endif
+}
static void composite_packed4444_to_packed422_scanline_c( uint8_t *output, uint8_t *input,
uint8_t *foreground, int width )
{
int i;
- SPEEDY_START();
for( i = 0; i < width; i++ ) {
int a = foreground[ 0 ];
@@ -1258,20 +1165,18 @@ static void composite_packed4444_to_packed422_scanline_c( uint8_t *output, uint8
output += 2;
input += 2;
}
- SPEEDY_END();
}
-#ifdef ARCH_X86
static void composite_packed4444_to_packed422_scanline_mmxext( uint8_t *output, uint8_t *input,
uint8_t *foreground, int width )
{
+#ifdef ARCH_X86
const mmx_t alpha2 = { 0x0000FFFF00000000ULL };
const mmx_t alpha1 = { 0xFFFF0000FFFFFFFFULL };
const mmx_t round = { 0x0080008000800080ULL };
int i;
- SPEEDY_START();
READ_PREFETCH_2048( input );
READ_PREFETCH_2048( foreground );
@@ -1355,11 +1260,8 @@ static void composite_packed4444_to_packed422_scanline_mmxext( uint8_t *output,
}
sfence();
emms();
-
- SPEEDY_END();
-}
-
#endif
+}
/**
* um... just need some scrap paper...
@@ -1379,8 +1281,6 @@ static void composite_alphamask_to_packed4444_scanline_c( uint8_t *output,
uint32_t opaque = (textcr << 24) | (textcb << 16) | (textluma << 8) | 0xff;
int i;
- SPEEDY_START();
-
for( i = 0; i < width; i++ ) {
int a = *mask;
@@ -1400,11 +1300,8 @@ static void composite_alphamask_to_packed4444_scanline_c( uint8_t *output,
output += 4;
input += 4;
}
- SPEEDY_END();
}
-#ifdef ARCH_X86
-
static void composite_alphamask_to_packed4444_scanline_mmxext( uint8_t *output,
uint8_t *input,
uint8_t *mask,
@@ -1412,13 +1309,12 @@ static void composite_alphamask_to_packed4444_scanline_mmxext( uint8_t *output,
int textluma, int textcb,
int textcr )
{
+#ifdef ARCH_X86
uint32_t opaque = (textcr << 24) | (textcb << 16) | (textluma << 8) | 0xff;
const mmx_t round = { 0x0080008000800080ULL };
const mmx_t fullalpha = { 0x00000000000000ffULL };
mmx_t colour;
- SPEEDY_START();
-
colour.w[ 0 ] = 0x00;
colour.w[ 1 ] = textluma;
colour.w[ 2 ] = textcb;
@@ -1502,10 +1398,8 @@ static void composite_alphamask_to_packed4444_scanline_mmxext( uint8_t *output,
}
sfence();
emms();
- SPEEDY_END();
-}
-
#endif
+}
static void composite_alphamask_alpha_to_packed4444_scanline_c( uint8_t *output,
uint8_t *input,
@@ -1516,8 +1410,6 @@ static void composite_alphamask_alpha_to_packed4444_scanline_c( uint8_t *output,
uint32_t opaque = (textcr << 24) | (textcb << 16) | (textluma << 8) | 0xff;
int i;
- SPEEDY_START();
-
for( i = 0; i < width; i++ ) {
int af = *mask;
@@ -1541,14 +1433,10 @@ static void composite_alphamask_alpha_to_packed4444_scanline_c( uint8_t *output,
output += 4;
input += 4;
}
-
- SPEEDY_END();
}
static void premultiply_packed4444_scanline_c( uint8_t *output, uint8_t *input, int width )
{
- SPEEDY_START();
-
while( width-- ) {
unsigned int cur_a = input[ 0 ];
@@ -1560,20 +1448,15 @@ static void premultiply_packed4444_scanline_c( uint8_t *output, uint8_t *input,
output += 4;
input += 4;
}
-
- SPEEDY_END();
}
-#ifdef ARCH_X86
-
static void premultiply_packed4444_scanline_mmxext( uint8_t *output, uint8_t *input, int width )
{
+#ifdef ARCH_X86
const mmx_t round = { 0x0080008000800080ULL };
const mmx_t alpha = { 0x00000000000000ffULL };
const mmx_t noalp = { 0xffffffffffff0000ULL };
- SPEEDY_START();
-
pxor_r2r( mm7, mm7 );
while( width-- ) {
movd_m2r( *input, mm0 );
@@ -1603,11 +1486,8 @@ static void premultiply_packed4444_scanline_mmxext( uint8_t *output, uint8_t *in
}
sfence();
emms();
-
- SPEEDY_END();
-}
-
#endif
+}
static void blend_packed422_scanline_c( uint8_t *output, uint8_t *src1,
uint8_t *src2, int width, int pos )
@@ -1626,11 +1506,10 @@ static void blend_packed422_scanline_c( uint8_t *output, uint8_t *src1,
}
}
-#ifdef ARCH_X86
-
static void blend_packed422_scanline_mmxext( uint8_t *output, uint8_t *src1,
uint8_t *src2, int width, int pos )
{
+#ifdef ARCH_X86
if( pos <= 0 ) {
blit_packed422_scanline( output, src1, width );
} else if( pos >= 256 ) {
@@ -1641,8 +1520,6 @@ static void blend_packed422_scanline_mmxext( uint8_t *output, uint8_t *src1,
const mmx_t all256 = { 0x0100010001000100ULL };
const mmx_t round = { 0x0080008000800080ULL };
- SPEEDY_START();
-
movd_m2r( pos, mm0 );
pshufw_r2r( mm0, mm0, 0 );
movq_m2r( all256, mm1 );
@@ -1670,17 +1547,16 @@ static void blend_packed422_scanline_mmxext( uint8_t *output, uint8_t *src1,
}
sfence();
emms();
-
- SPEEDY_END();
}
+#endif
}
static void quarter_blit_vertical_packed422_scanline_mmxext( uint8_t *output, uint8_t *one,
uint8_t *three, int width )
{
+#ifdef ARCH_X86
int i;
- SPEEDY_START();
for( i = width/16; i; --i ) {
movq_m2r( *one, mm0 );
movq_m2r( *three, mm1 );
@@ -1729,23 +1605,19 @@ static void quarter_blit_vertical_packed422_scanline_mmxext( uint8_t *output, ui
sfence();
emms();
-
- SPEEDY_END();
+#endif
}
-#endif
static void quarter_blit_vertical_packed422_scanline_c( uint8_t *output, uint8_t *one,
uint8_t *three, int width )
{
- SPEEDY_START();
width *= 2;
while( width-- ) {
*output++ = (*one + *three + *three + *three + 2) / 4;
one++;
three++;
}
- SPEEDY_END();
}
static void subpix_blit_vertical_packed422_scanline_c( uint8_t *output, uint8_t *top,
@@ -1760,13 +1632,10 @@ static void subpix_blit_vertical_packed422_scanline_c( uint8_t *output, uint8_t
} else {
int x;
- SPEEDY_START();
-
width *= 2;
for( x = 0; x < width; x++ ) {
output[ x ] = ( ( top[ x ] * subpixpos ) + ( bot[ x ] * ( 0xffff - subpixpos ) ) ) >> 16;
}
- SPEEDY_END();
}
}
@@ -1783,12 +1652,355 @@ static void a8_subpix_blit_scanline_c( uint8_t *output, uint8_t *input,
}
}
+/**
+ * These are from lavtools in mjpegtools:
+ *
+ * colorspace.c: Routines to perform colorspace conversions.
+ *
+ * Copyright (C) 2001 Matthew J. Marjanovic <maddog@mir.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#define FP_BITS 18
+
+/* precomputed tables */
+
+static int Y_R[256];
+static int Y_G[256];
+static int Y_B[256];
+static int Cb_R[256];
+static int Cb_G[256];
+static int Cb_B[256];
+static int Cr_R[256];
+static int Cr_G[256];
+static int Cr_B[256];
+static int conv_RY_inited = 0;
+
+static int RGB_Y[256];
+static int R_Cr[256];
+static int G_Cb[256];
+static int G_Cr[256];
+static int B_Cb[256];
+static int conv_YR_inited = 0;
+
+static int myround(double n)
+{
+ if (n >= 0)
+ return (int)(n + 0.5);
+ else
+ return (int)(n - 0.5);
+}
+
+static void init_RGB_to_YCbCr_tables(void)
+{
+ int i;
+
+ /*
+ * Q_Z[i] = (coefficient * i
+ * * (Q-excursion) / (Z-excursion) * fixed-point-factor)
+ *
+ * to one of each, add the following:
+ * + (fixed-point-factor / 2) --- for rounding later
+ * + (Q-offset * fixed-point-factor) --- to add the offset
+ *
+ */
+ for (i = 0; i < 256; i++) {
+ Y_R[i] = myround(0.299 * (double)i * 219.0 / 255.0 * (double)(1<<FP_BITS));
+ Y_G[i] = myround(0.587 * (double)i * 219.0 / 255.0 * (double)(1<<FP_BITS));
+ Y_B[i] = myround((0.114 * (double)i * 219.0 / 255.0 * (double)(1<<FP_BITS))
+ + (double)(1<<(FP_BITS-1)) + (16.0 * (double)(1<<FP_BITS)));
+
+ Cb_R[i] = myround(-0.168736 * (double)i * 224.0 / 255.0 * (double)(1<<FP_BITS));
+ Cb_G[i] = myround(-0.331264 * (double)i * 224.0 / 255.0 * (double)(1<<FP_BITS));
+ Cb_B[i] = myround((0.500 * (double)i * 224.0 / 255.0 * (double)(1<<FP_BITS))
+ + (double)(1<<(FP_BITS-1)) + (128.0 * (double)(1<<FP_BITS)));
+
+ Cr_R[i] = myround(0.500 * (double)i * 224.0 / 255.0 * (double)(1<<FP_BITS));
+ Cr_G[i] = myround(-0.418688 * (double)i * 224.0 / 255.0 * (double)(1<<FP_BITS));
+ Cr_B[i] = myround((-0.081312 * (double)i * 224.0 / 255.0 * (double)(1<<FP_BITS))
+ + (double)(1<<(FP_BITS-1)) + (128.0 * (double)(1<<FP_BITS)));
+ }
+ conv_RY_inited = 1;
+}
+
+static void init_YCbCr_to_RGB_tables(void)
+{
+ int i;
+
+ /*
+ * Q_Z[i] = (coefficient * i
+ * * (Q-excursion) / (Z-excursion) * fixed-point-factor)
+ *
+ * to one of each, add the following:
+ * + (fixed-point-factor / 2) --- for rounding later
+ * + (Q-offset * fixed-point-factor) --- to add the offset
+ *
+ */
+
+ /* clip Y values under 16 */
+ for (i = 0; i < 16; i++) {
+ RGB_Y[i] = myround((1.0 * (double)(16) * 255.0 / 219.0 * (double)(1<<FP_BITS))
+ + (double)(1<<(FP_BITS-1)));
+ }
+ for (i = 16; i < 236; i++) {
+ RGB_Y[i] = myround((1.0 * (double)(i - 16) * 255.0 / 219.0 * (double)(1<<FP_BITS))
+ + (double)(1<<(FP_BITS-1)));
+ }
+ /* clip Y values above 235 */
+ for (i = 236; i < 256; i++) {
+ RGB_Y[i] = myround((1.0 * (double)(235) * 255.0 / 219.0 * (double)(1<<FP_BITS))
+ + (double)(1<<(FP_BITS-1)));
+ }
+
+ /* clip Cb/Cr values below 16 */
+ for (i = 0; i < 16; i++) {
+ R_Cr[i] = myround(1.402 * (double)(-112) * 255.0 / 224.0 * (double)(1<<FP_BITS));
+ G_Cr[i] = myround(-0.714136 * (double)(-112) * 255.0 / 224.0 * (double)(1<<FP_BITS));
+ G_Cb[i] = myround(-0.344136 * (double)(-112) * 255.0 / 224.0 * (double)(1<<FP_BITS));
+ B_Cb[i] = myround(1.772 * (double)(-112) * 255.0 / 224.0 * (double)(1<<FP_BITS));
+ }
+ for (i = 16; i < 241; i++) {
+ R_Cr[i] = myround(1.402 * (double)(i - 128) * 255.0 / 224.0 * (double)(1<<FP_BITS));
+ G_Cr[i] = myround(-0.714136 * (double)(i - 128) * 255.0 / 224.0 * (double)(1<<FP_BITS));
+ G_Cb[i] = myround(-0.344136 * (double)(i - 128) * 255.0 / 224.0 * (double)(1<<FP_BITS));
+ B_Cb[i] = myround(1.772 * (double)(i - 128) * 255.0 / 224.0 * (double)(1<<FP_BITS));
+ }
+ /* clip Cb/Cr values above 240 */
+ for (i = 241; i < 256; i++) {
+ R_Cr[i] = myround(1.402 * (double)(112) * 255.0 / 224.0 * (double)(1<<FP_BITS));
+ G_Cr[i] = myround(-0.714136 * (double)(112) * 255.0 / 224.0 * (double)(1<<FP_BITS));
+ G_Cb[i] = myround(-0.344136 * (double)(i - 128) * 255.0 / 224.0 * (double)(1<<FP_BITS));
+ B_Cb[i] = myround(1.772 * (double)(112) * 255.0 / 224.0 * (double)(1<<FP_BITS));
+ }
+ conv_YR_inited = 1;
+}
+
+void rgb24_to_packed444_rec601_scanline( uint8_t *output, uint8_t *input, int width )
+{
+ if( !conv_RY_inited ) init_RGB_to_YCbCr_tables();
+
+ while( width-- ) {
+ int r = input[ 0 ];
+ int g = input[ 1 ];
+ int b = input[ 2 ];
+
+ output[ 0 ] = (Y_R[ r ] + Y_G[ g ] + Y_B[ b ]) >> FP_BITS;
+ output[ 1 ] = (Cb_R[ r ] + Cb_G[ g ] + Cb_B[ b ]) >> FP_BITS;
+ output[ 2 ] = (Cr_R[ r ] + Cr_G[ g ] + Cr_B[ b ]) >> FP_BITS;
+ output += 3;
+ input += 3;
+ }
+}
+
+void rgba32_to_packed4444_rec601_scanline( uint8_t *output, uint8_t *input, int width )
+{
+ if( !conv_RY_inited ) init_RGB_to_YCbCr_tables();
+
+ while( width-- ) {
+ int r = input[ 0 ];
+ int g = input[ 1 ];
+ int b = input[ 2 ];
+ int a = input[ 3 ];
+
+ output[ 0 ] = a;
+ output[ 1 ] = (Y_R[ r ] + Y_G[ g ] + Y_B[ b ]) >> FP_BITS;
+ output[ 2 ] = (Cb_R[ r ] + Cb_G[ g ] + Cb_B[ b ]) >> FP_BITS;
+ output[ 3 ] = (Cr_R[ r ] + Cr_G[ g ] + Cr_B[ b ]) >> FP_BITS;
+ output += 4;
+ input += 4;
+ }
+}
+
+void packed444_to_rgb24_rec601_scanline( uint8_t *output, uint8_t *input, int width )
+{
+ if( !conv_YR_inited ) init_YCbCr_to_RGB_tables();
+
+ while( width-- ) {
+ int luma = input[ 0 ];
+ int cb = input[ 1 ];
+ int cr = input[ 2 ];
+
+ output[ 0 ] = clip255( (RGB_Y[ luma ] + R_Cr[ cr ]) >> FP_BITS );
+ output[ 1 ] = clip255( (RGB_Y[ luma ] + G_Cb[ cb ] + G_Cr[cr]) >> FP_BITS );
+ output[ 2 ] = clip255( (RGB_Y[ luma ] + B_Cb[ cb ]) >> FP_BITS );
+
+ output += 3;
+ input += 3;
+ }
+}
+
+/**
+ * 601 numbers:
+ *
+ * Y' = 0.299*R' + 0.587*G' + 0.114*B' (in 0.0 to 1.0)
+ * Cb = -0.169*R' - 0.331*G' + 0.500*B' (in -0.5 to +0.5)
+ * Cr = 0.500*R' - 0.419*G' - 0.081*B' (in -0.5 to +0.5)
+ *
+ * Inverse:
+ * Y Cb Cr
+ * R 1.0000 -0.0009 1.4017
+ * G 1.0000 -0.3437 -0.7142
+ * B 1.0000 1.7722 0.0010
+ *
+ * S170M numbers:
+ * Y' = 0.299*R' + 0.587*G' + 0.114*B' (in 0.0 to 1.0)
+ * B-Y' = -0.299*R' - 0.587*G' + 0.886*B'
+ * R-Y' = 0.701*R' - 0.587*G' - 0.114*B'
+ */
+void packed444_to_rgb24_rec601_reference_scanline( uint8_t *output, uint8_t *input, int width )
+{
+ while( width-- ) {
+ double yp = (((double) input[ 0 ]) - 16.0) / 255.0;
+ double cb = (((double) input[ 1 ]) - 128.0) / 255.0;
+ double cr = (((double) input[ 2 ]) - 128.0) / 255.0;
+ double r, g, b;
+
+ r = yp - (0.0009*cb) + (1.4017*cr);
+ g = yp - (0.3437*cb) - (0.7142*cr);
+ b = yp + (1.7722*cb) + (0.0010*cr);
+
+ if( r > 1.0 ) r = 1.0; else if( r < 0.0 ) r = 0.0;
+ if( g > 1.0 ) g = 1.0; else if( g < 0.0 ) g = 0.0;
+ if( b > 1.0 ) b = 1.0; else if( b < 0.0 ) b = 0.0;
+
+ output[ 0 ] = (int) ((r * 255.0) + 0.5);
+ output[ 1 ] = (int) ((g * 255.0) + 0.5);
+ output[ 2 ] = (int) ((b * 255.0) + 0.5);
+
+ output += 3;
+ input += 3;
+ }
+}
+
+void packed444_to_nonpremultiplied_packed4444_scanline( uint8_t *output,
+ uint8_t *input,
+ int width, int alpha )
+{
+ int i;
+
+ for( i = 0; i < width; i++ ) {
+ output[ 0 ] = alpha & 0xff;
+ output[ 1 ] = input[ 0 ] & 0xff;
+ output[ 2 ] = input[ 1 ] & 0xff;
+ output[ 3 ] = input[ 2 ] & 0xff;
+
+ output += 4;
+ input += 3;
+ }
+
+}
+
+void aspect_adjust_packed4444_scanline( uint8_t *output,
+ uint8_t *input,
+ int width,
+ double pixel_aspect )
+{
+ double i;
+ int prev_i = 0;
+ int w = 0;
+
+ pixel_aspect = 1.0 / pixel_aspect;
+
+ for( i = 0.0; i < width; i += pixel_aspect ) {
+ uint8_t *curin = input + ((int) i)*4;
+
+ if( !prev_i ) {
+ output[ 0 ] = curin[ 0 ];
+ output[ 1 ] = curin[ 1 ];
+ output[ 2 ] = curin[ 2 ];
+ output[ 3 ] = curin[ 3 ];
+ } else {
+ int avg_a = 0;
+ int avg_y = 0;
+ int avg_cb = 0;
+ int avg_cr = 0;
+ int pos = prev_i * 4;
+ int c = 0;
+ int j;
+
+ for( j = prev_i; j <= (int) i; j++ ) {
+ avg_a += input[ pos++ ];
+ avg_y += input[ pos++ ];
+ avg_cb += input[ pos++ ];
+ avg_cr += input[ pos++ ];
+ c++;
+ }
+ output[ 0 ] = avg_a / c;
+ output[ 1 ] = avg_y / c;
+ output[ 2 ] = avg_cb / c;
+ output[ 3 ] = avg_cr / c;
+ }
+ output += 4;
+ prev_i = (int) i;
+ w++;
+ }
+}
+
+/**
+ * Sub-pixel data bar renderer. There are 128 bars.
+ */
+void composite_bars_packed4444_scanline( uint8_t *output,
+ uint8_t *background, int width,
+ int a, int luma, int cb, int cr,
+ int percentage )
+{
+ /**
+ * This is the size of both the bar and the spacing in between in subpixel
+ * units out of 256. Yes, as it so happens, that puts it equal to 'width'.
+ */
+ int barsize = ( width * 256 ) / 256;
+ int i;
+
+ /* We only need to composite the bar on the pixels that matter. */
+ for( i = 0; i < percentage; i++ ) {
+ int barstart = i * barsize * 2;
+ int barend = barstart + barsize;
+ int pixstart = barstart / 256;
+ int pixend = barend / 256;
+ int j;
+
+ for( j = pixstart; j <= pixend; j++ ) {
+ uint8_t *curout = output + (j*4);
+ uint8_t *curin = background + (j*4);
+ int curstart = j * 256;
+ int curend = curstart + 256;
+ int alpha;
+
+ if( barstart > curstart ) curstart = barstart;
+ if( barend < curend ) curend = barend;
+ if( curend - curstart < 256 ) {
+ alpha = ( ( curend - curstart ) * a ) / 256;
+ } else {
+ alpha = a;
+ }
+
+ curout[ 0 ] = curin[ 0 ] + multiply_alpha( alpha - curin[ 0 ], alpha );
+ curout[ 1 ] = curin[ 1 ] + multiply_alpha( luma - curin[ 1 ], alpha );
+ curout[ 2 ] = curin[ 2 ] + multiply_alpha( cb - curin[ 2 ], alpha );
+ curout[ 3 ] = curin[ 3 ] + multiply_alpha( cr - curin[ 3 ], alpha );
+ }
+ }
+}
+
static uint32_t speedy_accel;
-void setup_speedy_calls( int verbose )
+void setup_speedy_calls( uint32_t accel, int verbose )
{
- speedy_accel = xine_mm_accel();
+ speedy_accel = accel;
interpolate_packed422_scanline = interpolate_packed422_scanline_c;
blit_colour_packed422_scanline = blit_colour_packed422_scanline_c;
@@ -1807,7 +2019,7 @@ void setup_speedy_calls( int verbose )
kill_chroma_packed422_inplace_scanline = kill_chroma_packed422_inplace_scanline_c;
mirror_packed422_inplace_scanline = mirror_packed422_inplace_scanline_c;
halfmirror_packed422_inplace_scanline = halfmirror_packed422_inplace_scanline_c;
- speedy_memcpy = xine_fast_memcpy;
+ speedy_memcpy = speedy_memcpy_c;
diff_packed422_block8x8 = diff_packed422_block8x8_c;
a8_subpix_blit_scanline = a8_subpix_blit_scanline_c;
quarter_blit_vertical_packed422_scanline = quarter_blit_vertical_packed422_scanline_c;
@@ -1832,6 +2044,7 @@ void setup_speedy_calls( int verbose )
comb_factor_packed422_scanline = comb_factor_packed422_scanline_mmx;
diff_packed422_block8x8 = diff_packed422_block8x8_mmx;
quarter_blit_vertical_packed422_scanline = quarter_blit_vertical_packed422_scanline_mmxext;
+ speedy_memcpy = speedy_memcpy_mmxext;
} else if( speedy_accel & MM_ACCEL_X86_MMX ) {
if( verbose ) {
fprintf( stderr, "speedycode: Using MMX optimized functions.\n" );
@@ -1844,16 +2057,16 @@ void setup_speedy_calls( int verbose )
comb_factor_packed422_scanline = comb_factor_packed422_scanline_mmx;
kill_chroma_packed422_inplace_scanline = kill_chroma_packed422_inplace_scanline_mmx;
diff_packed422_block8x8 = diff_packed422_block8x8_mmx;
- } else
-#endif
- {
+ speedy_memcpy = speedy_memcpy_mmx;
+ } else {
if( verbose ) {
fprintf( stderr, "speedycode: No MMX or MMXEXT support detected, using C fallbacks.\n" );
}
}
+#endif
}
-int speedy_get_accel( void )
+uint32_t speedy_get_accel( void )
{
return speedy_accel;
}
diff --git a/src/post/deinterlace/speedy.h b/src/post/deinterlace/speedy.h
index 341f85494..734050b40 100644
--- a/src/post/deinterlace/speedy.h
+++ b/src/post/deinterlace/speedy.h
@@ -124,14 +124,16 @@ extern void (*subpix_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t
uint8_t *bot, int subpixpos, int width );
/**
- * Sets up the function pointers to point at the fastest function available.
+ * Sets up the function pointers to point at the fastest function
+ * available. Requires accelleration settings (see mm_accel.h).
*/
-void setup_speedy_calls( int verbose );
+void setup_speedy_calls( uint32_t accel, int verbose );
/**
- * Returns a bitfield of what accellerations are available. See mm_accel.h.
+ * Returns a bitfield of what accellerations were used when speedy was
+ * initialized. See mm_accel.h.
*/
-int speedy_get_accel( void );
+uint32_t speedy_get_accel( void );
#ifdef __cplusplus
};
diff --git a/src/post/deinterlace/xine_plugin.c b/src/post/deinterlace/xine_plugin.c
index fa65df4f0..76bb1d46e 100644
--- a/src/post/deinterlace/xine_plugin.c
+++ b/src/post/deinterlace/xine_plugin.c
@@ -17,7 +17,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*
- * $Id: xine_plugin.c,v 1.3 2003/06/15 15:25:35 miguelfreitas Exp $
+ * $Id: xine_plugin.c,v 1.4 2003/06/16 18:28:11 miguelfreitas Exp $
*
* advanced video deinterlacer plugin
* Jun/2003 by Miguel Freitas
@@ -231,7 +231,7 @@ static void *deinterlace_init_plugin(xine_t *xine, void *data)
class->dispose = deinterlace_class_dispose;
- setup_speedy_calls(0);
+ setup_speedy_calls(xine_mm_accel(),0);
linear_plugin_init();
linearblend_plugin_init();