diff options
author | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2003-06-13 01:48:09 +0000 |
---|---|---|
committer | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2003-06-13 01:48:09 +0000 |
commit | d349cabadd8f4b3530a9ae7049faa3f11387eab8 (patch) | |
tree | 8d6e84bc870bc7473ea55e18bf3b58062251c59d | |
parent | a6b05cacbf52dd3b3b7aa4974d81e2221e203097 (diff) | |
download | xine-lib-d349cabadd8f4b3530a9ae7049faa3f11387eab8.tar.gz xine-lib-d349cabadd8f4b3530a9ae7049faa3f11387eab8.tar.bz2 |
new deinterlacer plugin based on tvtime
(please report if compilation breaks on non-x86 plataform)
see xine-devel for more information
CVS patchset: 5032
CVS date: 2003/06/13 01:48:09
21 files changed, 5288 insertions, 416 deletions
diff --git a/src/post/deinterlace/Makefile.am b/src/post/deinterlace/Makefile.am new file mode 100644 index 000000000..b1c35cb99 --- /dev/null +++ b/src/post/deinterlace/Makefile.am @@ -0,0 +1,18 @@ +include $(top_srcdir)/misc/Makefile.common + +SUBDIRS = plugins + +EXTRA_DIST = + +libdir = $(XINE_PLUGINDIR)/post + +lib_LTLIBRARIES = xineplug_post_tvtime.la + +xineplug_post_tvtime_la_SOURCES = xine_plugin.c \ + deinterlace.c pulldown.c speedy.c tvtime.c +xineplug_post_tvtime_la_LIBADD = $(XINE_LIB) \ + $(top_builddir)/src/post/deinterlace/plugins/libdeinterlaceplugins.la + +xineplug_post_tvtime_la_LDFLAGS = -avoid-version -module @XINE_PLUGIN_MIN_SYMS@ + +noinst_HEADERS = deinterlace.h pulldown.h speedtools.h speedy.h tvtime.h diff --git a/src/post/deinterlace/deinterlace.c b/src/post/deinterlace/deinterlace.c new file mode 100644 index 000000000..f797f28cf --- /dev/null +++ b/src/post/deinterlace/deinterlace.c @@ -0,0 +1,125 @@ +/** + * Copyright (c) 2002 Billy Biggs <vektor@dumbterm.net>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <dlfcn.h> +#include <stdio.h> +#include <stdlib.h> +#include "deinterlace.h" + +typedef struct methodlist_item_s methodlist_item_t; + +struct methodlist_item_s +{ + deinterlace_method_t *method; + methodlist_item_t *next; +}; + +static methodlist_item_t *methodlist = 0; +static initialized = 0; + +void register_deinterlace_method( deinterlace_method_t *method ) +{ + methodlist_item_t **dest; + + if( initialized ) + return; + + if( !methodlist ) { + dest = &methodlist; + } else { + methodlist_item_t *cur = methodlist; + while( cur->next ) cur = cur->next; + dest = &(cur->next); + } + + *dest = (methodlist_item_t *) malloc( sizeof( methodlist_item_t ) ); + if( *dest ) { + (*dest)->method = method; + (*dest)->next = 0; + } else { + fprintf( stderr, "deinterlace: Can't allocate memory.\n" ); + } +} + +int get_num_deinterlace_methods( void ) +{ + methodlist_item_t *cur = methodlist; + int count = 0; + while( cur ) { + count++; + cur = cur->next; + } + return count; +} + +deinterlace_method_t *get_deinterlace_method( int i ) +{ + methodlist_item_t *cur = methodlist; + + if( !cur ) return 0; + while( i-- ) { + if( !cur->next ) return 0; + cur = cur->next; + } + + return cur->method; +} + +void filter_deinterlace_methods( int accel, int fields_available ) +{ + methodlist_item_t *prev = 0; + methodlist_item_t *cur = methodlist; + + if( initialized ) + return; + + while( cur ) { + methodlist_item_t *next = cur->next; + int drop = 0; + + if( (cur->method->accelrequired & accel) != cur->method->accelrequired ) { + /* This method is no good, drop it from the list. */ + fprintf( stderr, "deinterlace: %s disabled: required " + "CPU accelleration features unavailable.\n", + cur->method->short_name ); + drop = 1; + } + if( cur->method->fields_required > fields_available ) { + /* This method is no good, drop it from the list. */ + fprintf( stderr, "deinterlace: %s disabled: requires " + "%d field buffers, only %d available.\n", + cur->method->short_name, cur->method->fields_required, + fields_available ); + drop = 1; + } + + if( drop ) { + if( prev ) { + prev->next = next; + } else { + methodlist = next; + } + free( cur ); + } else { + prev = cur; + } + cur = next; + } + initialized = 1; +} + diff --git a/src/post/deinterlace/deinterlace.h b/src/post/deinterlace/deinterlace.h new file mode 100644 index 000000000..141500c21 --- /dev/null +++ b/src/post/deinterlace/deinterlace.h @@ -0,0 +1,198 @@ +/** + * Copyright (C) 2002 Billy Biggs <vektor@dumbterm.net>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef DEINTERLACE_H_INCLUDED +#define DEINTERLACE_H_INCLUDED + +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define DEINTERLACE_PLUGIN_API_VERSION 0x00000004 + +/** + * Our deinterlacer plugin API is modeled after DScaler's. This module + * represents the API that all deinterlacer plugins must export, and + * also provides a registration mechanism for the application to be able + * to iterate through available plugins and select an appropriate one. + */ + +typedef struct deinterlace_setting_s deinterlace_setting_t; +typedef struct deinterlace_method_s deinterlace_method_t; +typedef struct deinterlace_scanline_data_s deinterlace_scanline_data_t; +typedef struct deinterlace_frame_data_s deinterlace_frame_data_t; + +/** + * Callback for setting change notification. + */ +typedef void (*setting_onchange_t)(deinterlace_setting_t *); + +/** + * Interface for plugin initialization. + */ +typedef void (*deinterlace_plugin_init_t)( void ); + +/** + * There are two scanline functions that every deinterlacer plugin + * must implement to do its work: one for a 'copy' and one for + * an 'interpolate' for the currently active field. This so so that + * while plugins may be delaying fields, the external API assumes that + * the plugin is completely realtime. + * + * Each deinterlacing routine can require data from up to four fields. + * The most recent field captured is field 0, and increasing numbers go + * backwards in time. + */ +struct deinterlace_scanline_data_s +{ + uint8_t *tt0, *t0, *m0, *b0, *bb0; + uint8_t *tt1, *t1, *m1, *b1, *bb1; + uint8_t *tt2, *t2, *m2, *b2, *bb2; + uint8_t *tt3, *t3, *m3, *b3, *bb3; + int bottom_field; +}; + +/** + * | t-3 t-2 t-1 t + * | Field 3 | Field 2 | Field 1 | Field 0 | + * | TT3 | | TT1 | | + * | | T2 | | T0 | + * | M3 | | M1 | | + * | | B2 | | B0 | + * | BB3 | | BB1 | | + * + * While all pointers are passed in, each plugin is only guarenteed for + * the ones it indicates it requires (in the fields_required parameter) + * to be available. + * + * Pointers are always to scanlines in the standard packed 4:2:2 format. + */ +typedef void (*deinterlace_interp_scanline_t)( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ); +/** + * For the copy scanline, the API is basically the same, except that + * we're given a scanline to 'copy'. + * + * | t-3 t-2 t-1 t + * | Field 3 | Field 2 | Field 1 | Field 0 | + * | | TT2 | | TT0 | + * | T3 | | T1 | | + * | | M2 | | M0 | + * | B3 | | B1 | | + * | | BB2 | | BB0 | + */ +typedef void (*deinterlace_copy_scanline_t)( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ); + +/** + * The frame function is for deinterlacing plugins that can only act + * on whole frames, rather than on a scanline at a time. + */ +struct deinterlace_frame_data_s +{ + uint8_t *f0; + uint8_t *f1; + uint8_t *f2; + uint8_t *f3; +}; + +typedef void (*deinterlace_frame_t)( uint8_t *output, int outstride, + deinterlace_frame_data_t *data, + int bottom_field, int width, int height ); + + +/** + * Plugin settings can be any of the following. + */ +typedef enum +{ + SETTING_ONOFF, + SETTING_YESNO, + SETTING_ITEMFROMLIST, + SETTING_SLIDER +} setting_type_t; + +/** + * Each setting provides a pointer to the value, the min, max, default + * and step increment, and if it's not 0, a function to be called + * when the parameter is updated. + */ +struct deinterlace_setting_s +{ + const char *name; + setting_type_t type; + int *value; + int defvalue; + int minvalue; + int maxvalue; + int stepvalue; + setting_onchange_t onchange; +}; + +/** + * This structure defines the deinterlacer plugin. + */ +struct deinterlace_method_s +{ + int version; + const char *name; + const char *short_name; + int fields_required; + int accelrequired; + int doscalerbob; + int numsettings; + deinterlace_setting_t *settings; + int scanlinemode; + deinterlace_interp_scanline_t interpolate_scanline; + deinterlace_copy_scanline_t copy_scanline; + deinterlace_frame_t deinterlace_frame; +}; + +/** + * Registers a new deinterlace method. + */ +void register_deinterlace_method( deinterlace_method_t *method ); + +/** + * Returns how many deinterlacing methods are available. + */ +int get_num_deinterlace_methods( void ); + +/** + * Returns the specified method in the list. + */ +deinterlace_method_t *get_deinterlace_method( int i ); + +/** + * Loads a deinterlace plugin from the given file. + */ +void register_deinterlace_plugin( const char *filename ); + +/** + * Builds the usable method list. + */ +void filter_deinterlace_methods( int accel, int fieldsavailable ); + +#ifdef __cplusplus +}; +#endif +#endif /* DEINTERLACE_H_INCLUDED */ diff --git a/src/post/deinterlace/plugins/Makefile.am b/src/post/deinterlace/plugins/Makefile.am new file mode 100644 index 000000000..65e430134 --- /dev/null +++ b/src/post/deinterlace/plugins/Makefile.am @@ -0,0 +1,22 @@ +include $(top_srcdir)/misc/Makefile.common + +AM_CPPFLAGS = -I../ + +EXTRA_DIST = + +libdir = $(XINE_PLUGINDIR)/post + +noinst_LTLIBRARIES = libdeinterlaceplugins.la + +libdeinterlaceplugins_la_SOURCES = \ + double.c \ + greedy.c \ + linear.c \ + linearblend.c \ + vfir.c \ + weave.c \ + greedy2frame.c +libdeinterlaceplugins_la_LIBADD = $(XINE_LIB) +libdeinterlaceplugins_la_LDFLAGS = -avoid-version -module @XINE_PLUGIN_MIN_SYMS@ + +noinst_HEADERS = plugins.h diff --git a/src/post/deinterlace/plugins/double.c b/src/post/deinterlace/plugins/double.c new file mode 100644 index 000000000..f39e1ddfc --- /dev/null +++ b/src/post/deinterlace/plugins/double.c @@ -0,0 +1,65 @@ +/** + * Line doubler deinterlacing plugin. + * + * Copyright (C) 2002 Billy Biggs <vektor@dumbterm.net>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <stdio.h> +#include <stdint.h> +#include "speedy.h" +#include "deinterlace.h" + +static void deinterlace_scanline_double( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ + blit_packed422_scanline( output, data->t0, width ); +} + +static void copy_scanline( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ + blit_packed422_scanline( output, data->m0, width ); +} + + +static deinterlace_method_t doublemethod = +{ + DEINTERLACE_PLUGIN_API_VERSION, + "Line Doubler", + "LineDoubler", + 1, + 0, + 0, + 0, + 0, + 1, + deinterlace_scanline_double, + copy_scanline, + 0 +}; + +#ifdef BUILD_TVTIME_PLUGINS +void deinterlace_plugin_init( void ) +#else +void double_plugin_init( void ) +#endif +{ + register_deinterlace_method( &doublemethod ); +} + diff --git a/src/post/deinterlace/plugins/greedy.c b/src/post/deinterlace/plugins/greedy.c new file mode 100644 index 000000000..fb7eb454c --- /dev/null +++ b/src/post/deinterlace/plugins/greedy.c @@ -0,0 +1,195 @@ +/** + * Copyright (c) 2000 Tom Barry All rights reserved. + * mmx.h port copyright (c) 2002 Billy Biggs <vektor@dumbterm.net>. + * + * This code is ported from DScaler: http://deinterlace.sf.net/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <stdio.h> +#include <stdint.h> + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "attributes.h" +#include "xineutils.h" +#include "deinterlace.h" +#include "speedtools.h" +#include "speedy.h" + +// This is a simple lightweight DeInterlace method that uses little CPU time +// but gives very good results for low or intermedite motion. +// It defers frames by one field, but that does not seem to produce noticeable +// lip sync problems. +// +// The method used is to take either the older or newer weave pixel depending +// upon which give the smaller comb factor, and then clip to avoid large damage +// when wrong. +// +// I'd intended this to be part of a larger more elaborate method added to +// Blended Clip but this give too good results for the CPU to ignore here. + +static void copy_scanline( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ + blit_packed422_scanline( output, data->m1, width ); +} + +static int GreedyMaxComb = 15; + +static void deinterlace_greedy_packed422_scanline_mmxext( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ +#ifdef ARCH_X86 + mmx_t MaxComb; + uint8_t *m0 = data->m0; + uint8_t *t1 = data->t1; + uint8_t *b1 = data->b1; + uint8_t *m2 = data->m2; + + // How badly do we let it weave? 0-255 + MaxComb.ub[ 0 ] = GreedyMaxComb; + MaxComb.ub[ 1 ] = GreedyMaxComb; + MaxComb.ub[ 2 ] = GreedyMaxComb; + MaxComb.ub[ 3 ] = GreedyMaxComb; + MaxComb.ub[ 4 ] = GreedyMaxComb; + MaxComb.ub[ 5 ] = GreedyMaxComb; + MaxComb.ub[ 6 ] = GreedyMaxComb; + MaxComb.ub[ 7 ] = GreedyMaxComb; + + // L2 == m0 + // L1 == t1 + // L3 == b1 + // LP2 == m2 + + width /= 4; + while( width-- ) { + movq_m2r( *t1, mm1 ); // L1 + movq_m2r( *m0, mm2 ); // L2 + movq_m2r( *b1, mm3 ); // L3 + movq_m2r( *m2, mm0 ); // LP2 + + // average L1 and L3 leave result in mm4 + movq_r2r( mm1, mm4 ); // L1 + pavgb_r2r( mm3, mm4 ); // (L1 + L3)/2 + + + // get abs value of possible L2 comb + movq_r2r( mm2, mm7 ); // L2 + psubusb_r2r( mm4, mm7 ); // L2 - avg + movq_r2r( mm4, mm5 ); // avg + psubusb_r2r( mm2, mm5 ); // avg - L2 + por_r2r( mm7, mm5 ); // abs(avg-L2) + movq_r2r( mm4, mm6 ); // copy of avg for later + + + // get abs value of possible LP2 comb + movq_r2r( mm0, mm7 ); // LP2 + psubusb_r2r( mm4, mm7 ); // LP2 - avg + psubusb_r2r( mm0, mm4 ); // avg - LP2 + por_r2r( mm7, mm4 ); // abs(avg-LP2) + + // use L2 or LP2 depending upon which makes smaller comb + psubusb_r2r( mm5, mm4 ); // see if it goes to zero + psubusb_r2r( mm5, mm5 ); // 0 + pcmpeqb_r2r( mm5, mm4 ); // if (mm4=0) then FF else 0 + pcmpeqb_r2r( mm4, mm5 ); // opposite of mm4 + + // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55 + pand_r2r( mm2, mm5 ); // use L2 if mm5 == ff, else 0 + pand_r2r( mm0, mm4 ); // use LP2 if mm4 = ff, else 0 + por_r2r( mm5, mm4 ); // may the best win + + // Now lets clip our chosen value to be not outside of the range + // of the high/low range L1-L3 by more than abs(L1-L3) + // This allows some comb but limits the damages and also allows more + // detail than a boring oversmoothed clip. + + movq_r2r( mm1, mm2 ); // copy L1 + psubusb_r2r( mm3, mm2 ); // - L3, with saturation + paddusb_r2r( mm3, mm2 ); // now = Max(L1,L3) + + pcmpeqb_r2r( mm7, mm7 ); // all ffffffff + psubusb_r2r( mm1, mm7 ); // - L1 + paddusb_r2r( mm7, mm3 ); // add, may sat at fff.. + psubusb_r2r( mm7, mm3 ); // now = Min(L1,L3) + + // allow the value to be above the high or below the low by amt of MaxComb + paddusb_m2r( MaxComb, mm2 ); // increase max by diff + psubusb_m2r( MaxComb, mm3 ); // lower min by diff + + psubusb_r2r( mm3, mm4 ); // best - Min + paddusb_r2r( mm3, mm4 ); // now = Max(best,Min(L1,L3) + + pcmpeqb_r2r( mm7, mm7 ); // all ffffffff + psubusb_r2r( mm4, mm7 ); // - Max(best,Min(best,L3) + paddusb_r2r( mm7, mm2 ); // add may sat at FFF.. + psubusb_r2r( mm7, mm2 ); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped + + movntq_r2m( mm2, *output ); // move in our clipped best + + // Advance to the next set of pixels. + output += 8; + m0 += 8; + t1 += 8; + b1 += 8; + m2 += 8; + } + sfence(); + emms(); +#endif +} + +static deinterlace_setting_t settings[] = +{ + { + "Greedy Max Comb", + SETTING_SLIDER, + &GreedyMaxComb, + 15, 0, 255, 1, + 0 + } +}; + +static deinterlace_method_t greedymethod = +{ + DEINTERLACE_PLUGIN_API_VERSION, + "DScaler: Greedy - Low motion", + "Greedy", + 3, + MM_ACCEL_X86_MMXEXT, + 0, + 1, + settings, + 1, + copy_scanline, + deinterlace_greedy_packed422_scanline_mmxext, + 0 +}; + +#ifdef BUILD_TVTIME_PLUGINS +void deinterlace_plugin_init( void ) +#else +void greedy_plugin_init( void ) +#endif +{ + register_deinterlace_method( &greedymethod ); +} + diff --git a/src/post/deinterlace/plugins/greedy2frame.c b/src/post/deinterlace/plugins/greedy2frame.c new file mode 100644 index 000000000..00c9b34cb --- /dev/null +++ b/src/post/deinterlace/plugins/greedy2frame.c @@ -0,0 +1,198 @@ +/** + * Copyright (c) 2000 John Adcock, Tom Barry, Steve Grimm All rights reserved. + * mmx.h port copyright (c) 2002 Billy Biggs <vektor@dumbterm.net>. + * + * This code is ported from DScaler: http://deinterlace.sf.net/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <stdio.h> +#include <stdint.h> + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "attributes.h" +#include "xineutils.h" +#include "deinterlace.h" +#include "speedtools.h" +#include "speedy.h" + +static int GreedyTwoFrameThreshold = 4; +static int GreedyTwoFrameThreshold2 = 8; + +static void deinterlace_greedytwoframe_packed422_scanline_mmxext( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ +#ifdef ARCH_X86 + const mmx_t Mask = { 0x7f7f7f7f7f7f7f7fULL }; + const mmx_t DwordOne = { 0x0000000100000001ULL }; + const mmx_t DwordTwo = { 0x0000000200000002ULL }; + mmx_t qwGreedyTwoFrameThreshold; + uint8_t *m0 = data->m0; + uint8_t *t1 = data->t1; + uint8_t *b1 = data->b1; + uint8_t *m2 = data->m2; + uint8_t *t3 = data->t1; + uint8_t *b3 = data->b1; + + qwGreedyTwoFrameThreshold.b[ 0 ] = GreedyTwoFrameThreshold; + qwGreedyTwoFrameThreshold.b[ 1 ] = GreedyTwoFrameThreshold2; + qwGreedyTwoFrameThreshold.b[ 2 ] = GreedyTwoFrameThreshold; + qwGreedyTwoFrameThreshold.b[ 4 ] = GreedyTwoFrameThreshold; + qwGreedyTwoFrameThreshold.b[ 6 ] = GreedyTwoFrameThreshold; + + width /= 4; + while( width-- ) { + movq_m2r( *m0, mm0 ); + movq_m2r( *t1, mm1 ); + movq_m2r( *b1, mm3 ); + movq_m2r( *m2, mm2 ); + + // Average T1 and B1 so we can do interpolated bobbing if we bob onto T1. + movq_r2r( mm3, mm7 ); // mm7 = B1 + pavgb_r2r( mm1, mm7 ); + + // calculate |M1-M0| put result in mm4 need to keep mm0 intact + // if we have a good processor then make mm0 the average of M1 and M0 + // which should make weave look better when there is small amounts of + // movement + movq_r2r( mm0, mm4 ); + movq_r2r( mm2, mm5 ); + psubusb_r2r( mm2, mm4 ); + psubusb_r2r( mm0, mm5 ); + por_r2r( mm5, mm4 ); + psrlw_i2r( 1, mm4 ); + pavgb_r2r( mm2, mm0 ); + pand_r2r( mm6, mm4 ); + + // if |M1-M0| > Threshold we want dword worth of twos + pcmpgtb_m2r( qwGreedyTwoFrameThreshold, mm4 ); + pand_m2r( Mask, mm4 ); // get rid of any sign bit + pcmpgtd_m2r( DwordOne, mm4 ); // do we want to bob + pandn_m2r( DwordTwo, mm4 ); + + movq_m2r( *t3, mm2 ); // mm2 = T0 + + // calculate |T1-T0| put result in mm5 + movq_r2r( mm2, mm5 ); + psubusb_r2r( mm1, mm5 ); + psubusb_r2r( mm2, mm1 ); + por_r2r( mm1, mm5 ); + psrlw_i2r( 1, mm5 ); + pand_r2r( mm6, mm5 ); + + // if |T1-T0| > Threshold we want dword worth of ones + pcmpgtb_m2r( qwGreedyTwoFrameThreshold, mm5 ); + pand_r2r( mm6, mm5 ); // get rid of any sign bit + pcmpgtd_m2r( DwordOne, mm5 ); + pandn_m2r( DwordOne, mm5 ); + paddd_r2r( mm5, mm4 ); + + movq_m2r( *b3, mm2 ); // B0 + + // calculate |B1-B0| put result in mm5 + movq_r2r( mm2, mm5 ); + psubusb_r2r( mm3, mm5 ); + psubusb_r2r( mm2, mm3 ); + por_r2r( mm3, mm5 ); + psrlw_i2r( 1, mm5 ); + pand_r2r( mm6, mm5 ); + + // if |B1-B0| > Threshold we want dword worth of ones + pcmpgtb_m2r( qwGreedyTwoFrameThreshold, mm5 ); + pand_r2r( mm6, mm5 ); // get rid of any sign bit + pcmpgtd_m2r( DwordOne, mm5 ); + pandn_m2r( DwordOne, mm5 ); + paddd_r2r( mm5, mm4 ); + + pcmpgtd_m2r( DwordTwo, mm4 ); + + movq_r2r( mm4, mm5 ); + // mm4 now is 1 where we want to weave and 0 where we want to bob + pand_r2r( mm0, mm4 ); + pandn_r2r( mm7, mm5 ); + por_r2r( mm5, mm4 ); + + movq_r2m( mm4, *output ); + + // Advance to the next set of pixels. + output += 8; + m0 += 8; + t1 += 8; + b1 += 8; + m2 += 8; + t3 += 8; + b3 += 8; + } + sfence(); + emms(); +#endif +} + +static void copy_scanline( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ + blit_packed422_scanline( output, data->m1, width ); +} + + +static deinterlace_setting_t settings[] = +{ + { + "Greedy 2 Frame Luma Threshold", + SETTING_SLIDER, + &GreedyTwoFrameThreshold, + 4, 0, 128, 1, + 0 + }, + { + "Greedy 2 Frame Chroma Threshold", + SETTING_SLIDER, + &GreedyTwoFrameThreshold2, + 8, 0, 128, 1, + 0 + } +}; + +static deinterlace_method_t greedymethod = +{ + DEINTERLACE_PLUGIN_API_VERSION, + "Greedy - 2-frame (DScaler)", + "Greedy2Frame", + 4, + MM_ACCEL_X86_MMXEXT, + 0, + 2, + settings, + 1, + copy_scanline, + deinterlace_greedytwoframe_packed422_scanline_mmxext, + 0 +}; + +#ifdef BUILD_TVTIME_PLUGINS +void deinterlace_plugin_init( void ) +#else +void greedy2frame_plugin_init( void ) +#endif +{ + register_deinterlace_method( &greedymethod ); +} + diff --git a/src/post/deinterlace/plugins/linear.c b/src/post/deinterlace/plugins/linear.c new file mode 100644 index 000000000..1e84fb722 --- /dev/null +++ b/src/post/deinterlace/plugins/linear.c @@ -0,0 +1,63 @@ +/** + * Copyright (C) 2002 Billy Biggs <vektor@dumbterm.net>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <stdio.h> +#include <stdint.h> +#include "speedy.h" +#include "deinterlace.h" + +static void deinterlace_scanline_linear( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ + interpolate_packed422_scanline( output, data->t0, data->b0, width ); +} + +static void copy_scanline( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ + blit_packed422_scanline( output, data->m0, width ); +} + + +static deinterlace_method_t linearmethod = +{ + DEINTERLACE_PLUGIN_API_VERSION, + "Linear Interpolation", + "Linear", + 1, + 0, + 0, + 0, + 0, + 1, + deinterlace_scanline_linear, + copy_scanline, + 0 +}; + +#ifdef BUILD_TVTIME_PLUGINS +void deinterlace_plugin_init( void ) +#else +void linear_plugin_init( void ) +#endif +{ + register_deinterlace_method( &linearmethod ); +} + diff --git a/src/post/deinterlace/plugins/linearblend.c b/src/post/deinterlace/plugins/linearblend.c new file mode 100644 index 000000000..1da84c24b --- /dev/null +++ b/src/post/deinterlace/plugins/linearblend.c @@ -0,0 +1,180 @@ +/** + * Linear blend deinterlacing plugin. The algorithm for this filter is based + * on the mythtv sources, which took it from the mplayer sources. + * + * The file is postprocess_template.c in mplayer, and is + * + * Copyright (C) 2001-2002 Michael Niedermayer (michaelni@gmx.at) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <stdio.h> +#include <stdint.h> + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "attributes.h" +#include "xineutils.h" +#include "speedtools.h" +#include "speedy.h" +#include "deinterlace.h" + +static void deinterlace_scanline_linear_blend( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ +#ifdef ARCH_X86 + uint8_t *t0 = data->t0; + uint8_t *b0 = data->b0; + uint8_t *m1 = data->m1; + int i; + + // Get width in bytes. + width *= 2; + i = width / 8; + width -= i * 8; + + pxor_r2r( mm7, mm7 ); + while( i-- ) { + movd_m2r( *t0, mm0 ); + movd_m2r( *b0, mm1 ); + movd_m2r( *m1, mm2 ); + + movd_m2r( *(t0+4), mm3 ); + movd_m2r( *(b0+4), mm4 ); + movd_m2r( *(m1+4), mm5 ); + + punpcklbw_r2r( mm7, mm0 ); + punpcklbw_r2r( mm7, mm1 ); + punpcklbw_r2r( mm7, mm2 ); + + punpcklbw_r2r( mm7, mm3 ); + punpcklbw_r2r( mm7, mm4 ); + punpcklbw_r2r( mm7, mm5 ); + + psllw_i2r( 1, mm2 ); + psllw_i2r( 1, mm5 ); + paddw_r2r( mm0, mm2 ); + paddw_r2r( mm3, mm5 ); + paddw_r2r( mm1, mm2 ); + paddw_r2r( mm4, mm5 ); + psrlw_i2r( 2, mm2 ); + psrlw_i2r( 2, mm5 ); + packuswb_r2r( mm2, mm2 ); + packuswb_r2r( mm5, mm5 ); + + movd_r2m( mm2, *output ); + movd_r2m( mm5, *(output+4) ); + output += 8; + t0 += 8; + b0 += 8; + m1 += 8; + } + while( width-- ) { + *output++ = (*t0++ + *b0++ + (2 * *m1++))>>2; + } + sfence(); + emms(); +#endif +} + +static void deinterlace_scanline_linear_blend2( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ +#ifdef ARCH_X86 + uint8_t *m0 = data->m0; + uint8_t *t1 = data->t1; + uint8_t *b1 = data->b1; + int i; + + // Get width in bytes. + width *= 2; + i = width / 8; + width -= i * 8; + + pxor_r2r( mm7, mm7 ); + while( i-- ) { + movd_m2r( *t1, mm0 ); + movd_m2r( *b1, mm1 ); + movd_m2r( *m0, mm2 ); + + movd_m2r( *(t1+4), mm3 ); + movd_m2r( *(b1+4), mm4 ); + movd_m2r( *(m0+4), mm5 ); + + punpcklbw_r2r( mm7, mm0 ); + punpcklbw_r2r( mm7, mm1 ); + punpcklbw_r2r( mm7, mm2 ); + + punpcklbw_r2r( mm7, mm3 ); + punpcklbw_r2r( mm7, mm4 ); + punpcklbw_r2r( mm7, mm5 ); + + psllw_i2r( 1, mm2 ); + psllw_i2r( 1, mm5 ); + paddw_r2r( mm0, mm2 ); + paddw_r2r( mm3, mm5 ); + paddw_r2r( mm1, mm2 ); + paddw_r2r( mm4, mm5 ); + psrlw_i2r( 2, mm2 ); + psrlw_i2r( 2, mm5 ); + packuswb_r2r( mm2, mm2 ); + packuswb_r2r( mm5, mm5 ); + + movd_r2m( mm2, *output ); + movd_r2m( mm5, *(output+4) ); + output += 8; + t1 += 8; + b1 += 8; + m0 += 8; + } + while( width-- ) { + *output++ = (*t1++ + *b1++ + (2 * *m0++))>>2; + } + sfence(); + emms(); +#endif +} + + +static deinterlace_method_t linearblendmethod = +{ + DEINTERLACE_PLUGIN_API_VERSION, + "mplayer: Linear Blend", + "LinearBlend", + 2, + MM_ACCEL_X86_MMX, + 0, + 0, + 0, + 1, + deinterlace_scanline_linear_blend, + deinterlace_scanline_linear_blend2, + 0 +}; + +#ifdef BUILD_TVTIME_PLUGINS +void deinterlace_plugin_init( void ) +#else +void linearblend_plugin_init( void ) +#endif +{ + register_deinterlace_method( &linearblendmethod ); +} + diff --git a/src/post/deinterlace/plugins/plugins.h b/src/post/deinterlace/plugins/plugins.h new file mode 100644 index 000000000..b39623bc1 --- /dev/null +++ b/src/post/deinterlace/plugins/plugins.h @@ -0,0 +1,52 @@ +/** + * Copyright (C) 2002 Billy Biggs <vektor@dumbterm.net>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef TVTIME_PLUGINS_H_INCLUDED +#define TVTIME_PLUGINS_H_INCLUDED + +/** + * tvtime has a plugin system for deinterlacer plugins. + * However, at this point it's a bit silly to bother using + * them as 'dynamic' plugins. So, for the standard plugins, + * we allow them to be built into the executable, and their + * initializer methods go here. + */ + +void greedy_plugin_init( void ); +void greedy2frame_plugin_init( void ); +void twoframe_plugin_init( void ); +void linear_plugin_init( void ); +void weave_plugin_init( void ); +void videobob_plugin_init( void ); +void double_plugin_init( void ); +void linearblend_plugin_init( void ); +void scalerbob_plugin_init( void ); +void simplemo_plugin_init( void ); +void gamedither_plugin_init( void ); +void vfir_plugin_init( void ); + +void dscaler_greedy2frame_plugin_init( void ); +void dscaler_twoframe_plugin_init( void ); +void dscaler_greedyh_plugin_init( void ); +void dscaler_greedy_plugin_init( void ); +void dscaler_videobob_plugin_init( void ); +void dscaler_videoweave_plugin_init( void ); +void dscaler_oldgame_plugin_init( void ); +void dscaler_tomsmocomp_plugin_init( void ); + +#endif /* TVTIME_PLUGINS_H_INCLUDED */ diff --git a/src/post/deinterlace/plugins/vfir.c b/src/post/deinterlace/plugins/vfir.c new file mode 100644 index 000000000..f60bbecd8 --- /dev/null +++ b/src/post/deinterlace/plugins/vfir.c @@ -0,0 +1,161 @@ +/** + * This file contains code from ffmpeg, see http://ffmpeg.org/ + * + * Originated in imgconvert.c: Misc image convertion routines + * Copyright (c) 2001, 2002, 2003 Fabrice Bellard. + * + * tvtime port Copyright (C) 2003 Billy Biggs <vektor@dumbterm.net>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <stdio.h> +#include <stdint.h> + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "attributes.h" +#include "xineutils.h" +#include "speedy.h" +#include "deinterlace.h" + +/** + * The MPEG2 spec uses a slightly harsher filter, they specify + * [-1 8 2 8 -1]. ffmpeg uses a similar filter but with more of + * a tendancy to blur than to use the local information. The + * filter taps here are: [-1 4 2 4 -1]. + */ + +static void deinterlace_line( uint8_t *dst, uint8_t *lum_m4, + uint8_t *lum_m3, uint8_t *lum_m2, + uint8_t *lum_m1, uint8_t *lum, int size ) +{ + /** + * C implementation. + int sum; + + for(;size > 0;size--) { + sum = -lum_m4[0]; + sum += lum_m3[0] << 2; + sum += lum_m2[0] << 1; + sum += lum_m1[0] << 2; + sum += -lum[0]; + dst[0] = (sum + 4) >> 3; // This needs to be clipped at 0 and 255: cm[(sum + 4) >> 3]; + lum_m4++; + lum_m3++; + lum_m2++; + lum_m1++; + lum++; + dst++; + } + */ + + mmx_t rounder; + + rounder.uw[0]=4; + rounder.uw[1]=4; + rounder.uw[2]=4; + rounder.uw[3]=4; + pxor_r2r(mm7,mm7); + movq_m2r(rounder,mm6); + + for (;size > 3; size-=4) { + movd_m2r(lum_m4[0],mm0); + movd_m2r(lum_m3[0],mm1); + movd_m2r(lum_m2[0],mm2); + movd_m2r(lum_m1[0],mm3); + movd_m2r(lum[0],mm4); + punpcklbw_r2r(mm7,mm0); + punpcklbw_r2r(mm7,mm1); + punpcklbw_r2r(mm7,mm2); + punpcklbw_r2r(mm7,mm3); + punpcklbw_r2r(mm7,mm4); + paddw_r2r(mm3,mm1); + psllw_i2r(1,mm2); + paddw_r2r(mm4,mm0); + psllw_i2r(2,mm1);// 2 + paddw_r2r(mm6,mm2); + paddw_r2r(mm2,mm1); + psubusw_r2r(mm0,mm1); + psrlw_i2r(3,mm1); // 3 + packuswb_r2r(mm7,mm1); + movd_r2m(mm1,dst[0]); + lum_m4+=4; + lum_m3+=4; + lum_m2+=4; + lum_m1+=4; + lum+=4; + dst+=4; + } + emms(); +} + + +/** + * The commented-out method below that uses the bottom_field member is more + * like the filter as specified in the MPEG2 spec, but it doesn't seem to + * have the desired effect. + */ + +static void deinterlace_scanline_vfir( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ + deinterlace_line( output, data->tt1, data->t0, data->m1, data->b0, data->bb1, width*2 ); + // blit_packed422_scanline( output, data->m1, width ); +} + +static void copy_scanline( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ + blit_packed422_scanline( output, data->m0, width ); + /* + if( data->bottom_field ) { + deinterlace_line( output, data->tt2, data->t1, data->m2, data->b1, data->bb2, width*2 ); + } else { + deinterlace_line( output, data->tt0, data->t1, data->m0, data->b1, data->bb0, width*2 ); + } + */ +} + + +static deinterlace_method_t vfirmethod = +{ + DEINTERLACE_PLUGIN_API_VERSION, + "ffmpeg: Vertical Blend", + "Vertical", + 1, + MM_ACCEL_X86_MMXEXT, + 0, + 0, + 0, + 1, + deinterlace_scanline_vfir, + copy_scanline, + 0 +}; + +#ifdef BUILD_TVTIME_PLUGINS +void deinterlace_plugin_init( void ) +#else +void vfir_plugin_init( void ) +#endif +{ + register_deinterlace_method( &vfirmethod ); +} + diff --git a/src/post/deinterlace/plugins/weave.c b/src/post/deinterlace/plugins/weave.c new file mode 100644 index 000000000..3a9546aa8 --- /dev/null +++ b/src/post/deinterlace/plugins/weave.c @@ -0,0 +1,65 @@ +/** + * Pure weave deinterlacing plugin. + * + * Copyright (C) 2002 Billy Biggs <vektor@dumbterm.net>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <stdio.h> +#include <stdint.h> +#include "speedy.h" +#include "deinterlace.h" + +static void deinterlace_scanline_weave( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ + blit_packed422_scanline( output, data->m1, width ); +} + +static void copy_scanline( uint8_t *output, + deinterlace_scanline_data_t *data, + int width ) +{ + blit_packed422_scanline( output, data->m0, width ); +} + + +static deinterlace_method_t weavemethod = +{ + DEINTERLACE_PLUGIN_API_VERSION, + "Weave Last Field", + "Weave", + 2, + 0, + 0, + 0, + 0, + 1, + deinterlace_scanline_weave, + copy_scanline, + 0 +}; + +#ifdef BUILD_TVTIME_PLUGINS +void deinterlace_plugin_init( void ) +#else +void weave_plugin_init( void ) +#endif +{ + register_deinterlace_method( &weavemethod ); +} + diff --git a/src/post/deinterlace/pulldown.c b/src/post/deinterlace/pulldown.c new file mode 100644 index 000000000..b99c4fdd4 --- /dev/null +++ b/src/post/deinterlace/pulldown.c @@ -0,0 +1,561 @@ +/** + * Copyright (C) 2001, 2002, 2003 Billy Biggs <vektor@dumbterm.net>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <stdio.h> +#include <limits.h> +#include <string.h> +#include "pulldown.h" + +/** + * scratch paper: + * + * A A A B B C C C D D + * [T B T][B T][B T B][T B] + * [1 1][2 2][3 3][4 4][5 5] + * [C C] [M M][C C][C C] + * D A A A B B C C C D + * + * Top 1 : Drop + * Bot 1 : Show + * Top 2 : Drop + * Bot 2 : Drop + * Top 3 : Merge + * Bot 3 : Drop + * Top 4 : Show + * Bot 4 : Drop + * Top 5 : Drop + * Bot 5 : Show + */ + +/* Offset 1 2 3 4 5 */ +/* Field Pattern [T B T][B T][B T B] [T B] */ +/* Action Copy Save Merge Copy Copy */ +/* Bot Top */ +int tff_top_pattern[] = { 0, 1, 0, 0, 0 }; +int tff_bot_pattern[] = { 0, 0, 0, 1, 0 }; + +/* Offset 1 2 3 4 5 */ +/* Field Pattern [B T B][T B][T B T] [B T] */ +/* Action Copy Save Merge Copy Copy */ +/* Top Bot */ +int bff_top_pattern[] = { 0, 0, 0, 1, 0 }; +int bff_bot_pattern[] = { 0, 1, 0, 0, 0 }; + +/* Timestamp mangling */ +/* From the DVD : 0 + 3003+ 6006 + 9009+ 12012 = 15015 */ +/* In 24fps time: 0 + + 3754 + 7508+ 11262 = 15016 */ + +/** + * Flag Pattern Treat as + * on DVD last offset + * ============================ + * T B T bff 3 + * B T bff 4 + * B T B tff 3 + * T B tff 4 + */ + +int determine_pulldown_offset( int top_repeat, int bot_repeat, int tff, + int last_offset ) +{ + int predicted_offset; + int pd_patterns = 0; + int offset = -1; + int exact = -1; + int i; + + predicted_offset = last_offset << 1; + if( predicted_offset > PULLDOWN_SEQ_DD ) predicted_offset = PULLDOWN_SEQ_AA; + + /** + * Detect our pattern. + */ + for( i = 0; i < 5; i++ ) { + + /** + * Truth table: + * + * ref repeat, frame repeat valid + * ===========+==============+======= + * 0 0 -> 1 + * 0 1 -> 1 + * 1 0 -> 0 + * 1 1 -> 1 + */ + + if( tff ) { + if( ( !tff_top_pattern[ i ] || top_repeat ) + && ( !tff_bot_pattern[ i ] || bot_repeat ) ) { + + pd_patterns |= ( 1 << i ); + offset = i; + } + } else { + if( ( !bff_top_pattern[ i ] || top_repeat ) + && ( !bff_bot_pattern[ i ] || bot_repeat ) ) { + + pd_patterns |= ( 1 << i ); + offset = i; + } + if( bff_top_pattern[ i ] == top_repeat && bff_bot_pattern[ i ] == bot_repeat ) { + exact = i; + } + } + } + + offset = 1 << offset; + + /** + * Check if the 3:2 pulldown pattern we previously decided on is + * valid for this set. If so, we use that. + */ + if( pd_patterns & predicted_offset ) offset = predicted_offset; + if( ( top_repeat || bot_repeat ) && exact > 0 ) offset = ( 1 << exact ); + + return offset; +} + +#define HISTORY_SIZE 5 + +static int tophistory[ 5 ]; +static int bothistory[ 5 ]; + +static int tophistory_diff[ 5 ]; +static int bothistory_diff[ 5 ]; + +static int histpos = 0; + +static void fill_history( int tff ) +{ + if( tff ) { + tophistory[ 0 ] = INT_MAX; bothistory[ 0 ] = INT_MAX; + tophistory[ 1 ] = 0; bothistory[ 1 ] = INT_MAX; + tophistory[ 2 ] = INT_MAX; bothistory[ 2 ] = INT_MAX; + tophistory[ 3 ] = INT_MAX; bothistory[ 3 ] = 0; + tophistory[ 4 ] = INT_MAX; bothistory[ 3 ] = INT_MAX; + + tophistory_diff[ 0 ] = 0; bothistory_diff[ 0 ] = 0; + tophistory_diff[ 1 ] = 1; bothistory_diff[ 1 ] = 0; + tophistory_diff[ 2 ] = 0; bothistory_diff[ 2 ] = 0; + tophistory_diff[ 3 ] = 0; bothistory_diff[ 3 ] = 1; + tophistory_diff[ 4 ] = 0; bothistory_diff[ 3 ] = 0; + } else { + tophistory[ 0 ] = INT_MAX; bothistory[ 0 ] = INT_MAX; + tophistory[ 1 ] = INT_MAX; bothistory[ 1 ] = 0; + tophistory[ 2 ] = INT_MAX; bothistory[ 2 ] = INT_MAX; + tophistory[ 3 ] = 0; bothistory[ 3 ] = INT_MAX; + tophistory[ 4 ] = INT_MAX; bothistory[ 3 ] = INT_MAX; + + tophistory_diff[ 0 ] = 0; bothistory_diff[ 0 ] = 0; + tophistory_diff[ 1 ] = 0; bothistory_diff[ 1 ] = 1; + tophistory_diff[ 2 ] = 0; bothistory_diff[ 2 ] = 0; + tophistory_diff[ 3 ] = 1; bothistory_diff[ 3 ] = 0; + tophistory_diff[ 4 ] = 0; bothistory_diff[ 3 ] = 0; + } + + histpos = 0; +} + + +int determine_pulldown_offset_history( int top_repeat, int bot_repeat, int tff, int *realbest ) +{ + int avgbot = 0; + int avgtop = 0; + int best = 0; + int min = -1; + int minpos = 0; + int minbot = 0; + int j; + int ret; + int mintopval = -1; + int mintoppos = -1; + int minbotval = -1; + int minbotpos = -1; + + tophistory[ histpos ] = top_repeat; + bothistory[ histpos ] = bot_repeat; + + for( j = 0; j < HISTORY_SIZE; j++ ) { + avgtop += tophistory[ j ]; + avgbot += bothistory[ j ]; + } + avgtop /= 5; + avgbot /= 5; + + for( j = 0; j < HISTORY_SIZE; j++ ) { + // int cur = (tophistory[ j ] - avgtop); + int cur = tophistory[ j ]; + if( cur < min || min < 0 ) { + min = cur; + minpos = j; + } + if( cur < mintopval || mintopval < 0 ) { + mintopval = cur; + mintoppos = j; + } + } + + for( j = 0; j < HISTORY_SIZE; j++ ) { + // int cur = (bothistory[ j ] - avgbot); + int cur = bothistory[ j ]; + if( cur < min || min < 0 ) { + min = cur; + minpos = j; + minbot = 1; + } + if( cur < minbotval || minbotval < 0 ) { + minbotval = cur; + minbotpos = j; + } + } + + if( minbot ) { + best = tff ? ( minpos + 2 ) : ( minpos + 4 ); + } else { + best = tff ? ( minpos + 4 ) : ( minpos + 2 ); + } + best = best % HISTORY_SIZE; + *realbest = 1 << ( ( histpos + (2*HISTORY_SIZE) - best ) % HISTORY_SIZE ); + + best = (minbotpos + 2) % 5; + ret = 1 << ( ( histpos + (2*HISTORY_SIZE) - best ) % HISTORY_SIZE ); + best = (mintoppos + 4) % 5; + ret |= 1 << ( ( histpos + (2*HISTORY_SIZE) - best ) % HISTORY_SIZE ); + + histpos = (histpos + 1) % HISTORY_SIZE; + return ret; +} + +static int reference = 0; + +int determine_pulldown_offset_history_new( int top_repeat, int bot_repeat, int tff, int predicted ) +{ + int avgbot = 0; + int avgtop = 0; + int i, j; + int ret; + int mintopval = -1; + int mintoppos = -1; + int min2topval = -1; + int min2toppos = -1; + int minbotval = -1; + int minbotpos = -1; + int min2botval = -1; + int min2botpos = -1; + int predicted_pos = 0; + + tophistory[ histpos ] = top_repeat; + bothistory[ histpos ] = bot_repeat; + + for( j = 0; j < HISTORY_SIZE; j++ ) { + avgtop += tophistory[ j ]; + avgbot += bothistory[ j ]; + } + avgtop /= 5; + avgbot /= 5; + + for( i = 0; i < 5; i++ ) { if( (1<<i) == predicted ) { predicted_pos = i; break; } } + + /* + fprintf( stderr, "top: %8d bot: %8d\ttop-avg: %8d bot-avg: %8d (%d)\n", top_repeat, bot_repeat, top_repeat - avgtop, bot_repeat - avgbot, (5 + predicted_pos - reference) % 5 ); + */ + + for( j = 0; j < HISTORY_SIZE; j++ ) { + int cur = tophistory[ j ]; + if( cur < mintopval || mintopval < 0 ) { + min2topval = mintopval; + min2toppos = mintoppos; + mintopval = cur; + mintoppos = j; + } else if( cur < min2topval || min2topval < 0 ) { + min2topval = cur; + min2toppos = j; + } + } + + for( j = 0; j < HISTORY_SIZE; j++ ) { + int cur = bothistory[ j ]; + if( cur < minbotval || minbotval < 0 ) { + min2botval = minbotval; + min2botpos = minbotpos; + minbotval = cur; + minbotpos = j; + } else if( cur < min2botval || min2botval < 0 ) { + min2botval = cur; + min2botpos = j; + } + } + + tophistory_diff[ histpos ] = ((mintoppos == histpos) || (min2toppos == histpos)); + bothistory_diff[ histpos ] = ((minbotpos == histpos) || (min2botpos == histpos)); + + ret = 0; + for( i = 0; i < 5; i++ ) { + int valid = 1; + for( j = 0; j < 5; j++ ) { + // if( tff_top_pattern[ j ] && !tophistory_diff[ (i + j) % 5 ] && tophistory[ (i + j) % 5 ] != mintopval ) { + if( tff_top_pattern[ j ] && (tophistory[ (i + j) % 5 ] > avgtop || !tophistory_diff[ (i + j) % 5 ]) ) { + valid = 0; + break; + } + // if( tff_bot_pattern[ j ] && !bothistory_diff[ (i + j) % 5 ] && bothistory[ (i + j) % 5 ] != minbotval ) { + if( tff_bot_pattern[ j ] && (bothistory[ (i + j) % 5 ] > avgbot || !bothistory_diff[ (i + j) % 5 ]) ) { + valid = 0; + break; + } + } + if( valid ) ret |= (1<<(((5-i)+histpos)%5)); + } + + /* + fprintf( stderr, "ret: %d %d %d %d %d\n", + PULLDOWN_OFFSET_1 & ret, + PULLDOWN_OFFSET_2 & ret, + PULLDOWN_OFFSET_3 & ret, + PULLDOWN_OFFSET_4 & ret, + PULLDOWN_OFFSET_5 & ret ); + */ + + histpos = (histpos + 1) % HISTORY_SIZE; + reference = (reference + 1) % 5; + + if( !ret ) { + /* No pulldown sequence is valid, return an error. */ + return 0; + } else if( !(predicted & ret) ) { + /** + * We have a valid sequence, but it doesn't match our prediction. + * Return the first 'valid' sequence in the list. + */ + for( i = 0; i < 5; i++ ) { if( ret & (1<<i) ) return (1<<i); } + } + + /** + * The predicted phase is still valid. + */ + return predicted; +} + +int determine_pulldown_offset_short_history_new( int top_repeat, int bot_repeat, int tff, int predicted ) +{ + int avgbot = 0; + int avgtop = 0; + int i, j; + int ret; + int mintopval = -1; + int mintoppos = -1; + int min2topval = -1; + int min2toppos = -1; + int minbotval = -1; + int minbotpos = -1; + int min2botval = -1; + int min2botpos = -1; + int predicted_pos = 0; + + tophistory[ histpos ] = top_repeat; + bothistory[ histpos ] = bot_repeat; + + for( j = 0; j < 3; j++ ) { + avgtop += tophistory[ (histpos + 5 - j) % 5 ]; + avgbot += bothistory[ (histpos + 5 - j) % 5 ]; + } + avgtop /= 3; + avgbot /= 3; + + for( i = 0; i < 5; i++ ) { if( (1<<i) == predicted ) { predicted_pos = i; break; } } + + /* + fprintf( stderr, "top: %8d bot: %8d\ttop-avg: %8d bot-avg: %8d (%d)\n", + top_repeat, bot_repeat, top_repeat - avgtop, bot_repeat - avgbot, + (5 + predicted_pos - reference) % 5 ); + */ + + for( j = 0; j < 3; j++ ) { + int cur = tophistory[ (histpos + 5 - j) % 5 ]; + if( cur < mintopval || mintopval < 0 ) { + min2topval = mintopval; + min2toppos = mintoppos; + mintopval = cur; + mintoppos = j; + } else if( cur < min2topval || min2topval < 0 ) { + min2topval = cur; + min2toppos = j; + } + } + + for( j = 0; j < 3; j++ ) { + int cur = bothistory[ (histpos + 5 - j) % 5 ]; + if( cur < minbotval || minbotval < 0 ) { + min2botval = minbotval; + min2botpos = minbotpos; + minbotval = cur; + minbotpos = j; + } else if( cur < min2botval || min2botval < 0 ) { + min2botval = cur; + min2botpos = j; + } + } + + tophistory_diff[ histpos ] = ((mintoppos == histpos) || (min2toppos == histpos)); + bothistory_diff[ histpos ] = ((minbotpos == histpos) || (min2botpos == histpos)); + + ret = 0; + for( i = 0; i < 5; i++ ) { + int valid = 1; + for( j = 0; j < 3; j++ ) { + // if( tff_top_pattern[ j ] && !tophistory_diff[ (i + j) % 5 ] && tophistory[ (i + j) % 5 ] != mintopval ) { + // if( tff_top_pattern[ j ] && (tophistory[ (i + j) % 5 ] > avgtop || !tophistory_diff[ (i + j) % 5 ]) ) { + if( tff_top_pattern[ (i + 5 - j) % 5 ] && tophistory[ (histpos + 5 - j) % 5 ] > avgtop ) { + // if( tff_top_pattern[ (i + 5 - j) % 5 ] && !tophistory_diff[ (histpos + 5 - j) % 5 ] && tophistory[ (histpos + 5 - j) % 5 ] != mintopval ) { + valid = 0; + break; + } + // if( tff_bot_pattern[ j ] && !bothistory_diff[ (i + j) % 5 ] && bothistory[ (i + j) % 5 ] != minbotval ) { + // if( tff_bot_pattern[ j ] && (bothistory[ (i + j) % 5 ] > avgbot || !bothistory_diff[ (i + j) % 5 ]) ) { + if( tff_bot_pattern[ (i + 5 - j) % 5 ] && bothistory[ (histpos + 5 - j) % 5 ] > avgbot ) { + // if( tff_bot_pattern[ (i + 5 - j) % 5 ] && !bothistory_diff[ (histpos + 5 - j) % 5 ] && bothistory[ (histpos + 5 - j) % 5 ] != minbotval ) { + valid = 0; + break; + } + } + if( valid ) ret |= (1<<i); + } + + /* + fprintf( stderr, "ret: %d %d %d %d %d\n", + PULLDOWN_OFFSET_1 & ret, + PULLDOWN_OFFSET_2 & ret, + PULLDOWN_OFFSET_3 & ret, + PULLDOWN_OFFSET_4 & ret, + PULLDOWN_OFFSET_5 & ret ); + */ + + histpos = (histpos + 1) % HISTORY_SIZE; + reference = (reference + 1) % 5; + + if( !ret ) { + /* No pulldown sequence is valid, return an error. */ + return 0; + } else if( !(predicted & ret) ) { + /** + * We have a valid sequence, but it doesn't match our prediction. + * Return the first 'valid' sequence in the list. + */ + for( i = 0; i < 5; i++ ) { if( ret & (1<<i) ) return (1<<i); } + } + + /** + * The predicted phase is still valid. + */ + return predicted; +} + +int determine_pulldown_offset_dalias( pulldown_metrics_t *old_peak, + pulldown_metrics_t *old_relative, + pulldown_metrics_t *old_mean, + pulldown_metrics_t *new_peak, + pulldown_metrics_t *new_relative, + pulldown_metrics_t *new_mean ) +{ + int laced = 0; + + if (old_peak->d > 360) { + if (3*old_relative->e < old_relative->o) laced=1; + if ((2*old_relative->d < old_relative->s) && (old_relative->s > 600)) + laced=1; + } + if (new_peak->d > 360) { + if ((2*new_relative->t < new_relative->p) && (new_relative->p > 600)) + laced=1; + } + if( !laced ) return PULLDOWN_ACTION_NEXT_PREV; + + if (new_relative->t < 2*new_relative->p) { + if ((3*old_relative->e < old_relative->o) || (2*new_relative->t < new_relative->p)) { + return PULLDOWN_ACTION_PREV_NEXT; + } + } + return PULLDOWN_ACTION_PREV_NEXT; +} + +#define MAXUP(a,b) ((a) = ((a)>(b)) ? (a) : (b)) + +void diff_factor_packed422_frame( pulldown_metrics_t *peak, pulldown_metrics_t *rel, pulldown_metrics_t *mean, + uint8_t *old, uint8_t *new, int w, int h, int os, int ns ) +{ + int x, y; + pulldown_metrics_t l; + memset(peak, 0, sizeof(pulldown_metrics_t)); + memset(rel, 0, sizeof(pulldown_metrics_t)); + memset(mean, 0, sizeof(pulldown_metrics_t)); + for (y = 0; y < h-7; y += 8) { + for (x = 8; x < w-8-7; x += 8) { + diff_packed422_block8x8(&l, old+x+y*os, new+x+y*ns, os, ns); + mean->d += l.d; + mean->e += l.e; + mean->o += l.o; + mean->s += l.s; + mean->p += l.p; + mean->t += l.t; + MAXUP(peak->d, l.d); + MAXUP(peak->e, l.e); + MAXUP(peak->o, l.o); + MAXUP(peak->s, l.s); + MAXUP(peak->p, l.p); + MAXUP(peak->t, l.t); + MAXUP(rel->e, l.e-l.o); + MAXUP(rel->o, l.o-l.e); + MAXUP(rel->s, l.s-l.t); + MAXUP(rel->p, l.p-l.t); + MAXUP(rel->t, l.t-l.p); + MAXUP(rel->d, l.t-l.s); /* hack */ + } + } + x = (w/8-2)*(h/8); + mean->d /= x; + mean->e /= x; + mean->o /= x; + mean->s /= x; + mean->p /= x; + mean->t /= x; +} + +int pulldown_source( int action, int bottom_field ) +{ + if( action == PULLDOWN_SEQ_AB || action == PULLDOWN_SEQ_BC ) { + return bottom_field; + } else { + return !bottom_field; + } +} + +int pulldown_drop( int action, int bottom_field ) +{ + int ret = 1; + + if( action == PULLDOWN_SEQ_AA && bottom_field ) + ret = 0; + if( action == PULLDOWN_SEQ_BC && !bottom_field ) + ret = 0; + if( action == PULLDOWN_SEQ_CC && !bottom_field ) + ret = 0; + if( action == PULLDOWN_SEQ_DD && bottom_field ) + ret = 0; + + return ret; +} diff --git a/src/post/deinterlace/pulldown.h b/src/post/deinterlace/pulldown.h new file mode 100644 index 000000000..197ec84be --- /dev/null +++ b/src/post/deinterlace/pulldown.h @@ -0,0 +1,60 @@ +/** + * Copyright (c) 2001, 2002, 2003 Billy Biggs <vektor@dumbterm.net>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef PULLDOWN_H_INCLUDED +#define PULLDOWN_H_INCLUDED + +#include <stdint.h> +#include "speedy.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define PULLDOWN_SEQ_AA (1<<0) /* next - prev */ +#define PULLDOWN_SEQ_AB (1<<1) /* prev - next */ +#define PULLDOWN_SEQ_BC (1<<2) /* prev - next */ +#define PULLDOWN_SEQ_CC (1<<3) /* next - prev */ +#define PULLDOWN_SEQ_DD (1<<4) /* next - prev */ + +#define PULLDOWN_ACTION_NEXT_PREV (1<<0) /* next - prev */ +#define PULLDOWN_ACTION_PREV_NEXT (1<<1) /* prev - next */ + +/** + * Returns 1 if the source is the previous field, 0 if it is + * the next field, for the given action. + */ +int pulldown_source( int action, int bottom_field ); + +int determine_pulldown_offset( int top_repeat, int bot_repeat, int tff, int last_offset ); +int determine_pulldown_offset_history( int top_repeat, int bot_repeat, int tff, int *realbest ); +int determine_pulldown_offset_history_new( int top_repeat, int bot_repeat, int tff, int predicted ); +int determine_pulldown_offset_short_history_new( int top_repeat, int bot_repeat, int tff, int predicted ); +int determine_pulldown_offset_dalias( pulldown_metrics_t *old_peak, pulldown_metrics_t *old_relative, + pulldown_metrics_t *old_mean, pulldown_metrics_t *new_peak, + pulldown_metrics_t *new_relative, pulldown_metrics_t *new_mean ); + +void diff_factor_packed422_frame( pulldown_metrics_t *peak, pulldown_metrics_t *rel, pulldown_metrics_t *mean, + uint8_t *old, uint8_t *new, int w, int h, int os, int ns ); + +int pulldown_drop( int action, int bottom_field ); + +#ifdef __cplusplus +}; +#endif +#endif /* PULLDOWN_H_INCLUDED */ diff --git a/src/post/deinterlace/speedtools.h b/src/post/deinterlace/speedtools.h new file mode 100644 index 000000000..059d8a5f3 --- /dev/null +++ b/src/post/deinterlace/speedtools.h @@ -0,0 +1,46 @@ +/** + * Copyright (c) 2002 Billy Biggs <vektor@dumbterm.net>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef SPEEDTOOLS_H_INCLUDED +#define SPEEDTOOLS_H_INCLUDED + +#define PREFETCH_2048(x) \ + { int *pfetcha = (int *) x; \ + prefetchnta( pfetcha ); \ + prefetchnta( pfetcha + 64 ); \ + prefetchnta( pfetcha + 128 ); \ + prefetchnta( pfetcha + 192 ); \ + pfetcha += 256; \ + prefetchnta( pfetcha ); \ + prefetchnta( pfetcha + 64 ); \ + prefetchnta( pfetcha + 128 ); \ + prefetchnta( pfetcha + 192 ); } + +#define READ_PREFETCH_2048(x) \ + { int *pfetcha = (int *) x; int pfetchtmp; \ + pfetchtmp = pfetcha[ 0 ] + pfetcha[ 16 ] + pfetcha[ 32 ] + pfetcha[ 48 ] + \ + pfetcha[ 64 ] + pfetcha[ 80 ] + pfetcha[ 96 ] + pfetcha[ 112 ] + \ + pfetcha[ 128 ] + pfetcha[ 144 ] + pfetcha[ 160 ] + pfetcha[ 176 ] + \ + pfetcha[ 192 ] + pfetcha[ 208 ] + pfetcha[ 224 ] + pfetcha[ 240 ]; \ + pfetcha += 256; \ + pfetchtmp = pfetcha[ 0 ] + pfetcha[ 16 ] + pfetcha[ 32 ] + pfetcha[ 48 ] + \ + pfetcha[ 64 ] + pfetcha[ 80 ] + pfetcha[ 96 ] + pfetcha[ 112 ] + \ + pfetcha[ 128 ] + pfetcha[ 144 ] + pfetcha[ 160 ] + pfetcha[ 176 ] + \ + pfetcha[ 192 ] + pfetcha[ 208 ] + pfetcha[ 224 ] + pfetcha[ 240 ]; } + +#endif /* SPEEDTOOLS_H_INCLUDED */ diff --git a/src/post/deinterlace/speedy.c b/src/post/deinterlace/speedy.c new file mode 100644 index 000000000..b06e4bc88 --- /dev/null +++ b/src/post/deinterlace/speedy.c @@ -0,0 +1,1856 @@ +/** + * Copyright (c) 2002, 2003 Billy Biggs <vektor@dumbterm.net>. + * Copyright (C) 2001 Matthew J. Marjanovic <maddog@mir.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* + * Uses code from: + * + * linux/arch/i386/kernel/setup.c + * + * Copyright (C) 1995 Linus Torvalds + * + * Found in linux 2.4.20. + * + * Also helped from code in 'cpuinfo.c' found in mplayer. + */ + +#include <stdio.h> +#include <string.h> +#include <sys/time.h> +#include <stdint.h> +#include <unistd.h> +#include <ctype.h> + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "attributes.h" +#include "xineutils.h" +#include "speedtools.h" +#include "speedy.h" + +/* Function pointer definitions. */ +void (*interpolate_packed422_scanline)( uint8_t *output, uint8_t *top, + uint8_t *bot, int width ); +void (*blit_colour_packed422_scanline)( uint8_t *output, + int width, int y, int cb, int cr ); +void (*blit_colour_packed4444_scanline)( uint8_t *output, + int width, int alpha, int luma, + int cb, int cr ); +void (*blit_packed422_scanline)( uint8_t *dest, const uint8_t *src, int width ); +void (*composite_packed4444_to_packed422_scanline)( uint8_t *output, uint8_t *input, + uint8_t *foreground, int width ); +void (*composite_packed4444_alpha_to_packed422_scanline)( uint8_t *output, + uint8_t *input, + uint8_t *foreground, + int width, int alpha ); +void (*composite_alphamask_to_packed4444_scanline)( uint8_t *output, + uint8_t *input, + uint8_t *mask, int width, + int textluma, int textcb, + int textcr ); +void (*composite_alphamask_alpha_to_packed4444_scanline)( uint8_t *output, + uint8_t *input, + uint8_t *mask, int width, + int textluma, int textcb, + int textcr, int alpha ); +void (*premultiply_packed4444_scanline)( uint8_t *output, uint8_t *input, int width ); +void (*blend_packed422_scanline)( uint8_t *output, uint8_t *src1, + uint8_t *src2, int width, int pos ); +void (*filter_luma_121_packed422_inplace_scanline)( uint8_t *data, int width ); +void (*filter_luma_14641_packed422_inplace_scanline)( uint8_t *data, int width ); +unsigned int (*diff_factor_packed422_scanline)( uint8_t *cur, uint8_t *old, int width ); +unsigned int (*comb_factor_packed422_scanline)( uint8_t *top, uint8_t *mid, + uint8_t *bot, int width ); +void (*kill_chroma_packed422_inplace_scanline)( uint8_t *data, int width ); +void (*mirror_packed422_inplace_scanline)( uint8_t *data, int width ); +void (*halfmirror_packed422_inplace_scanline)( uint8_t *data, int width ); +void *(*speedy_memcpy)( void *output, const void *input, size_t size ); +void (*diff_packed422_block8x8)( pulldown_metrics_t *m, uint8_t *old, + uint8_t *new, int os, int ns ); +void (*a8_subpix_blit_scanline)( uint8_t *output, uint8_t *input, + int lasta, int startpos, int width ); +void (*quarter_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *one, + uint8_t *three, int width ); +void (*subpix_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *top, + uint8_t *bot, int subpixpos, int width ); + + +#define SPEEDY_START() + +#define SPEEDY_END() + +/** + * result = (1 - alpha)B + alpha*F + * = B - alpha*B + alpha*F + * = B + alpha*(F - B) + */ + +static inline __attribute__ ((always_inline,const)) int multiply_alpha( int a, int r ) +{ + int temp; + temp = (r * a) + 0x80; + return ((temp + (temp >> 8)) >> 8); +} + +static inline __attribute__ ((always_inline,const)) uint8_t clip255( int x ) +{ + if( x > 255 ) { + return 255; + } else if( x < 0 ) { + return 0; + } else { + return x; + } +} + +#ifdef ARCH_X86 + +static unsigned int comb_factor_packed422_scanline_mmx( uint8_t *top, uint8_t *mid, + uint8_t *bot, int width ) +{ + const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL }; + const mmx_t qwOnes = { 0x0001000100010001ULL }; + mmx_t qwThreshold; + unsigned int temp1, temp2; + unsigned long CombJaggieThreshold = 73; + + SPEEDY_START(); + + width /= 4; + + qwThreshold.uw[ 0 ] = CombJaggieThreshold; + qwThreshold.uw[ 1 ] = CombJaggieThreshold; + qwThreshold.uw[ 2 ] = CombJaggieThreshold; + qwThreshold.uw[ 3 ] = CombJaggieThreshold; + + movq_m2r( qwThreshold, mm0 ); + movq_m2r( qwYMask, mm1 ); + movq_m2r( qwOnes, mm2 ); + pxor_r2r( mm7, mm7 ); /* mm7 = 0. */ + + while( width-- ) { + /* Load and keep just the luma. */ + movq_m2r( *top, mm3 ); + movq_m2r( *mid, mm4 ); + movq_m2r( *bot, mm5 ); + + pand_r2r( mm1, mm3 ); + pand_r2r( mm1, mm4 ); + pand_r2r( mm1, mm5 ); + + /* Work out mm6 = (top - mid) * (bot - mid) - ( (top - mid)^2 >> 7 ) */ + psrlw_i2r( 1, mm3 ); + psrlw_i2r( 1, mm4 ); + psrlw_i2r( 1, mm5 ); + + /* mm6 = (top - mid) */ + movq_r2r( mm3, mm6 ); + psubw_r2r( mm4, mm6 ); + + /* mm3 = (top - bot) */ + psubw_r2r( mm5, mm3 ); + + /* mm5 = (bot - mid) */ + psubw_r2r( mm4, mm5 ); + + /* mm6 = (top - mid) * (bot - mid) */ + pmullw_r2r( mm5, mm6 ); + + /* mm3 = (top - bot)^2 >> 7 */ + pmullw_r2r( mm3, mm3 ); /* mm3 = (top - bot)^2 */ + psrlw_i2r( 7, mm3 ); /* mm3 = ((top - bot)^2 >> 7) */ + + /* mm6 is what we want. */ + psubw_r2r( mm3, mm6 ); + + /* FF's if greater than qwTheshold */ + pcmpgtw_r2r( mm0, mm6 ); + + /* Add to count if we are greater than threshold */ + pand_r2r( mm2, mm6 ); + paddw_r2r( mm6, mm7 ); + + top += 8; + mid += 8; + bot += 8; + } + + movd_r2m( mm7, temp1 ); + psrlq_i2r( 32, mm7 ); + movd_r2m( mm7, temp2 ); + temp1 += temp2; + temp2 = temp1; + temp1 >>= 16; + temp1 += temp2 & 0xffff; + + emms(); + + SPEEDY_END(); + + return temp1; +} + +#endif + +static unsigned long BitShift = 6; + +static unsigned int diff_factor_packed422_scanline_c( uint8_t *cur, uint8_t *old, int width ) +{ + unsigned int ret = 0; + + SPEEDY_START(); + + width /= 4; + + while( width-- ) { + unsigned int tmp1 = (cur[ 0 ] + cur[ 2 ] + cur[ 4 ] + cur[ 6 ] + 2)>>2; + unsigned int tmp2 = (old[ 0 ] + old[ 2 ] + old[ 4 ] + old[ 6 ] + 2)>>2; + tmp1 = (tmp1 - tmp2); + tmp1 *= tmp1; + tmp1 >>= BitShift; + ret += tmp1; + cur += 8; + old += 8; + } + SPEEDY_END(); + + return ret; +} + +static unsigned int diff_factor_packed422_scanline_test_c( uint8_t *cur, uint8_t *old, int width ) +{ + unsigned int ret = 0; + + SPEEDY_START(); + + width /= 16; + + while( width-- ) { + unsigned int tmp1 = (cur[ 0 ] + cur[ 2 ] + cur[ 4 ] + cur[ 6 ])>>2; + unsigned int tmp2 = (old[ 0 ] + old[ 2 ] + old[ 4 ] + old[ 6 ])>>2; + tmp1 = (tmp1 - tmp2); + tmp1 *= tmp1; + tmp1 >>= BitShift; + ret += tmp1; + cur += (8*4); + old += (8*4); + } + SPEEDY_END(); + + return ret; +} + +#ifdef ARCH_X86 + +static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *old, int width ) +{ + const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL }; + unsigned int temp1, temp2; + + SPEEDY_START(); + + width /= 4; + + movq_m2r( qwYMask, mm1 ); + movd_m2r( BitShift, mm7 ); + pxor_r2r( mm0, mm0 ); + + while( width-- ) { + movq_m2r( *cur, mm4 ); + movq_m2r( *old, mm5 ); + + pand_r2r( mm1, mm4 ); + pand_r2r( mm1, mm5 ); + + psubw_r2r( mm5, mm4 ); /* mm4 = Y1 - Y2 */ + pmaddwd_r2r( mm4, mm4 ); /* mm4 = (Y1 - Y2)^2 */ + psrld_r2r( mm7, mm4 ); /* divide mm4 by 2^BitShift */ + paddd_r2r( mm4, mm0 ); /* keep total in mm0 */ + + cur += 8; + old += 8; + } + + movd_r2m( mm0, temp1 ); + psrlq_i2r( 32, mm0 ); + movd_r2m( mm0, temp2 ); + temp1 += temp2; + + emms(); + + SPEEDY_END(); + + return temp1; +} + +#define ABS(a) (((a) < 0)?-(a):(a)) + +static void diff_packed422_block8x8_mmx( pulldown_metrics_t *m, uint8_t *old, + uint8_t *new, int os, int ns ) +{ + const mmx_t ymask = { 0x00ff00ff00ff00ffULL }; + short out[ 24 ]; /* Output buffer for the partial metrics from the mmx code. */ + uint8_t *outdata = (uint8_t *) out; + uint8_t *oldp, *newp; + int i; + + SPEEDY_START(); + + pxor_r2r( mm4, mm4 ); // 4 even difference sums. + pxor_r2r( mm5, mm5 ); // 4 odd difference sums. + pxor_r2r( mm7, mm7 ); // zeros + + oldp = old; newp = new; + for( i = 4; i; --i ) { + // Even difference. + movq_m2r( oldp[0], mm0 ); + movq_m2r( oldp[8], mm2 ); + pand_m2r( ymask, mm0 ); + pand_m2r( ymask, mm2 ); + oldp += os; + + movq_m2r( newp[0], mm1 ); + movq_m2r( newp[8], mm3 ); + pand_m2r( ymask, mm1 ); + pand_m2r( ymask, mm3 ); + newp += ns; + + movq_r2r( mm0, mm6 ); + psubusb_r2r( mm1, mm0 ); + psubusb_r2r( mm6, mm1 ); + movq_r2r( mm2, mm6 ); + psubusb_r2r( mm3, mm2 ); + psubusb_r2r( mm6, mm3 ); + + paddw_r2r( mm0, mm4 ); + paddw_r2r( mm1, mm4 ); + paddw_r2r( mm2, mm4 ); + paddw_r2r( mm3, mm4 ); + + // Odd difference. + movq_m2r( oldp[0], mm0 ); + movq_m2r( oldp[8], mm2 ); + pand_m2r( ymask, mm0 ); + pand_m2r( ymask, mm2 ); + oldp += os; + + movq_m2r( newp[0], mm1 ); + movq_m2r( newp[8], mm3 ); + pand_m2r( ymask, mm1 ); + pand_m2r( ymask, mm3 ); + newp += ns; + + movq_r2r( mm0, mm6 ); + psubusb_r2r( mm1, mm0 ); + psubusb_r2r( mm6, mm1 ); + movq_r2r( mm2, mm6 ); + psubusb_r2r( mm3, mm2 ); + psubusb_r2r( mm6, mm3 ); + + paddw_r2r( mm0, mm5 ); + paddw_r2r( mm1, mm5 ); + paddw_r2r( mm2, mm5 ); + paddw_r2r( mm3, mm5 ); + } + movq_r2m( mm4, outdata[0] ); + movq_r2m( mm5, outdata[8] ); + + m->e = out[0] + out[1] + out[2] + out[3]; + m->o = out[4] + out[5] + out[6] + out[7]; + m->d = m->e + m->o; + + pxor_r2r( mm4, mm4 ); // Past spacial noise. + pxor_r2r( mm5, mm5 ); // Temporal noise. + pxor_r2r( mm6, mm6 ); // Current spacial noise. + + // First loop to measure first four columns + oldp = old; newp = new; + for( i = 4; i; --i ) { + movq_m2r( oldp[0], mm0 ); + movq_m2r( oldp[os], mm1 ); + pand_m2r( ymask, mm0 ); + pand_m2r( ymask, mm1 ); + oldp += (os*2); + + movq_m2r( newp[0], mm2 ); + movq_m2r( newp[ns], mm3 ); + pand_m2r( ymask, mm2 ); + pand_m2r( ymask, mm3 ); + newp += (ns*2); + + paddw_r2r( mm1, mm4 ); + paddw_r2r( mm1, mm5 ); + paddw_r2r( mm3, mm6 ); + psubw_r2r( mm0, mm4 ); + psubw_r2r( mm2, mm5 ); + psubw_r2r( mm2, mm6 ); + } + movq_r2m( mm4, outdata[0] ); + movq_r2m( mm5, outdata[16] ); + movq_r2m( mm6, outdata[32] ); + + pxor_r2r( mm4, mm4 ); + pxor_r2r( mm5, mm5 ); + pxor_r2r( mm6, mm6 ); + + // Second loop for the last four columns + oldp = old; newp = new; + for( i = 4; i; --i ) { + movq_m2r( oldp[8], mm0 ); + movq_m2r( oldp[os+8], mm1 ); + pand_m2r( ymask, mm0 ); + pand_m2r( ymask, mm1 ); + oldp += (os*2); + + movq_m2r( newp[8], mm2 ); + movq_m2r( newp[ns+8], mm3 ); + pand_m2r( ymask, mm2 ); + pand_m2r( ymask, mm3 ); + newp += (ns*2); + + paddw_r2r( mm1, mm4 ); + paddw_r2r( mm1, mm5 ); + paddw_r2r( mm3, mm6 ); + psubw_r2r( mm0, mm4 ); + psubw_r2r( mm2, mm5 ); + psubw_r2r( mm2, mm6 ); + } + movq_r2m( mm4, outdata[8] ); + movq_r2m( mm5, outdata[24] ); + movq_r2m( mm6, outdata[40] ); + + m->p = m->t = m->s = 0; + for (i=0; i<8; i++) { + // FIXME: move abs() into the mmx code! + m->p += ABS(out[i]); + m->t += ABS(out[8+i]); + m->s += ABS(out[16+i]); + } + + emms(); + + SPEEDY_END(); +} + +#endif + +static void diff_packed422_block8x8_c( pulldown_metrics_t *m, uint8_t *old, + uint8_t *new, int os, int ns ) +{ + int x, y, e=0, o=0, s=0, p=0, t=0; + uint8_t *oldp, *newp; + + SPEEDY_START(); + m->s = m->p = m->t = 0; + for (x = 8; x; x--) { + oldp = old; old += 2; + newp = new; new += 2; + s = p = t = 0; + for (y = 4; y; y--) { + e += ABS(newp[0] - oldp[0]); + o += ABS(newp[ns] - oldp[os]); + s += newp[ns]-newp[0]; + p += oldp[os]-oldp[0]; + t += oldp[os]-newp[0]; + oldp += os<<1; + newp += ns<<1; + } + m->s += ABS(s); + m->p += ABS(p); + m->t += ABS(t); + } + m->e = e; + m->o = o; + m->d = e+o; + SPEEDY_END(); +} + +static void packed444_to_packed422_scanline_c( uint8_t *output, uint8_t *input, int width ) +{ + SPEEDY_START(); + width /= 2; + while( width-- ) { + output[ 0 ] = input[ 0 ]; + output[ 1 ] = input[ 1 ]; + output[ 2 ] = input[ 3 ]; + output[ 3 ] = input[ 2 ]; + output += 4; + input += 6; + } + SPEEDY_END(); +} + +static void packed422_to_packed444_scanline_c( uint8_t *output, uint8_t *input, int width ) +{ + SPEEDY_START(); + width /= 2; + while( width-- ) { + output[ 0 ] = input[ 0 ]; + output[ 1 ] = input[ 1 ]; + output[ 2 ] = input[ 3 ]; + output[ 3 ] = input[ 2 ]; + output[ 4 ] = input[ 1 ]; + output[ 5 ] = input[ 3 ]; + output += 6; + input += 4; + } + SPEEDY_END(); +} + +/** + * For the middle pixels, the filter kernel is: + * + * [-1 3 -6 12 -24 80 80 -24 12 -6 3 -1] + */ +void packed422_to_packed444_rec601_scanline( uint8_t *dest, uint8_t *src, int width ) +{ + int i; + + SPEEDY_START(); + /* Process two input pixels at a time. Input is [Y'][Cb][Y'][Cr]. */ + for( i = 0; i < width / 2; i++ ) { + dest[ (i*6) + 0 ] = src[ (i*4) + 0 ]; + dest[ (i*6) + 1 ] = src[ (i*4) + 1 ]; + dest[ (i*6) + 2 ] = src[ (i*4) + 3 ]; + + dest[ (i*6) + 3 ] = src[ (i*4) + 2 ]; + if( i > (5*2) && i < ((width/2) - (6*2)) ) { + dest[ (i*6) + 4 ] = clip255( (( (80*(src[ (i*4) + 1 ] + src[ (i*4) + 5 ])) + - (24*(src[ (i*4) - 3 ] + src[ (i*4) + 9 ])) + + (12*(src[ (i*4) - 7 ] + src[ (i*4) + 13])) + - ( 6*(src[ (i*4) - 11] + src[ (i*4) + 17])) + + ( 3*(src[ (i*4) - 15] + src[ (i*4) + 21])) + - ( (src[ (i*4) - 19] + src[ (i*4) + 25]))) + 64) >> 7 ); + dest[ (i*6) + 5 ] = clip255( (( (80*(src[ (i*4) + 3 ] + src[ (i*4) + 7 ])) + - (24*(src[ (i*4) - 1 ] + src[ (i*4) + 11])) + + (12*(src[ (i*4) - 5 ] + src[ (i*4) + 15])) + - ( 6*(src[ (i*4) - 9 ] + src[ (i*4) + 19])) + + ( 3*(src[ (i*4) - 13] + src[ (i*4) + 23])) + - ( (src[ (i*4) - 17] + src[ (i*4) + 27]))) + 64) >> 7 ); + } else if( i < ((width/2) - 1) ) { + dest[ (i*6) + 4 ] = (src[ (i*4) + 1 ] + src[ (i*4) + 5 ] + 1) >> 1; + dest[ (i*6) + 5 ] = (src[ (i*4) + 3 ] + src[ (i*4) + 7 ] + 1) >> 1; + } else { + dest[ (i*6) + 4 ] = src[ (i*4) + 1 ]; + dest[ (i*6) + 5 ] = src[ (i*4) + 3 ]; + } + } + SPEEDY_END(); +} + +#ifdef ARCH_X86 + +static void kill_chroma_packed422_inplace_scanline_mmx( uint8_t *data, int width ) +{ + const mmx_t ymask = { 0x00ff00ff00ff00ffULL }; + const mmx_t nullchroma = { 0x8000800080008000ULL }; + + SPEEDY_START(); + + movq_m2r( ymask, mm7 ); + movq_m2r( nullchroma, mm6 ); + for(; width > 4; width -= 4 ) { + movq_m2r( *data, mm0 ); + pand_r2r( mm7, mm0 ); + paddb_r2r( mm6, mm0 ); + movq_r2m( mm0, *data ); + data += 8; + } + emms(); + + while( width-- ) { + data[ 1 ] = 128; + data += 2; + } + SPEEDY_END(); +} + +#endif + +static void kill_chroma_packed422_inplace_scanline_c( uint8_t *data, int width ) +{ + SPEEDY_START(); + while( width-- ) { + data[ 1 ] = 128; + data += 2; + } + SPEEDY_END(); +} + +/* +// this duplicates alternate lines in alternate frames to highlight or mute +// the effects of chroma crawl. it is not a solution or proper filter. it's +// only for testing. +static void testing_packed422_inplace_scanline_c( uint8_t *data, int width, int scanline ) +{ + volatile static int topbottom = 0; + static uint8_t scanbuffer[2048]; + + SPEEDY_START(); + if( scanline <= 1 ) { + topbottom = scanline; + memcpy(scanbuffer, data, width*2); + } + if ( scanline < 10 ) { + printf("scanline: %d %d\n", scanline, topbottom); + } + if ( ((scanline-topbottom)/2)%2 && scanline > 1 ) { + memcpy(data, scanbuffer, width*2); + } else { + memcpy(scanbuffer, data, width*2); + } + SPEEDY_END(); +} +*/ + +static void mirror_packed422_inplace_scanline_c( uint8_t *data, int width ) +{ + int x, tmp1, tmp2; + int width2 = width*2; + + SPEEDY_START(); + for( x = 0; x < width; x += 2 ) { + tmp1 = data[ x ]; + tmp2 = data[ x+1 ]; + data[ x ] = data[ width2 - x ]; + data[ x+1 ] = data[ width2 - x + 1 ]; + data[ width2 - x ] = tmp1; + data[ width2 - x + 1 ] = tmp2; + } + SPEEDY_END(); +} + +static void halfmirror_packed422_inplace_scanline_c( uint8_t *data, int width ) +{ + int x; + + SPEEDY_START(); + for( x = 0; x < width; x += 2 ) { + data[ width + x ] = data[ width - x ]; + data[ width + x + 1 ] = data[ width - x + 1 ]; + } + SPEEDY_END(); +} + +static void filter_luma_121_packed422_inplace_scanline_c( uint8_t *data, int width ) +{ + int r1 = 0; + int r2 = 0; + + SPEEDY_START(); + data += 2; + width -= 1; + while( width-- ) { + int s1, s2; + s1 = *data + r1; r1 = *data; + s2 = s1 + r2; r2 = s1; + *(data - 2) = s2 >> 2; + data += 2; + } + SPEEDY_END(); +} + +static void filter_luma_14641_packed422_inplace_scanline_c( uint8_t *data, int width ) +{ + int r1 = 0; + int r2 = 0; + int r3 = 0; + int r4 = 0; + + SPEEDY_START(); + width -= 4; + data += 4; + while( width-- ) { + int s1, s2, s3, s4; + s1 = *data + r1; r1 = *data; + s2 = s1 + r2; r2 = s1; + s3 = s2 + r3; r3 = s2; + s4 = s3 + r4; r4 = s3; + *(data - 4) = s4 >> 4; + data += 2; + } + SPEEDY_END(); +} + +static void interpolate_packed422_scanline_c( uint8_t *output, uint8_t *top, + uint8_t *bot, int width ) +{ + int i; + + SPEEDY_START(); + + for( i = width*2; i; --i ) { + *output++ = ((*top++) + (*bot++)) >> 1; + } + + SPEEDY_END(); +} + +#ifdef ARCH_X86 + +static void interpolate_packed422_scanline_mmx( uint8_t *output, uint8_t *top, + uint8_t *bot, int width ) +{ + const mmx_t shiftmask = { 0xfefffefffefffeffULL }; /* To avoid shifting chroma to luma. */ + int i; + + SPEEDY_START(); + + for( i = width/16; i; --i ) { + movq_m2r( *bot, mm0 ); + movq_m2r( *top, mm1 ); + movq_m2r( *(bot + 8), mm2 ); + movq_m2r( *(top + 8), mm3 ); + movq_m2r( *(bot + 16), mm4 ); + movq_m2r( *(top + 16), mm5 ); + movq_m2r( *(bot + 24), mm6 ); + movq_m2r( *(top + 24), mm7 ); + pand_m2r( shiftmask, mm0 ); + pand_m2r( shiftmask, mm1 ); + pand_m2r( shiftmask, mm2 ); + pand_m2r( shiftmask, mm3 ); + pand_m2r( shiftmask, mm4 ); + pand_m2r( shiftmask, mm5 ); + pand_m2r( shiftmask, mm6 ); + pand_m2r( shiftmask, mm7 ); + psrlw_i2r( 1, mm0 ); + psrlw_i2r( 1, mm1 ); + psrlw_i2r( 1, mm2 ); + psrlw_i2r( 1, mm3 ); + psrlw_i2r( 1, mm4 ); + psrlw_i2r( 1, mm5 ); + psrlw_i2r( 1, mm6 ); + psrlw_i2r( 1, mm7 ); + paddb_r2r( mm1, mm0 ); + paddb_r2r( mm3, mm2 ); + paddb_r2r( mm5, mm4 ); + paddb_r2r( mm7, mm6 ); + movq_r2m( mm0, *output ); + movq_r2m( mm2, *(output + 8) ); + movq_r2m( mm4, *(output + 16) ); + movq_r2m( mm6, *(output + 24) ); + output += 32; + top += 32; + bot += 32; + } + width = (width & 0xf); + + for( i = width/4; i; --i ) { + movq_m2r( *bot, mm0 ); + movq_m2r( *top, mm1 ); + pand_m2r( shiftmask, mm0 ); + pand_m2r( shiftmask, mm1 ); + psrlw_i2r( 1, mm0 ); + psrlw_i2r( 1, mm1 ); + paddb_r2r( mm1, mm0 ); + movq_r2m( mm0, *output ); + output += 8; + top += 8; + bot += 8; + } + width = width & 0x7; + + /* Handle last few pixels. */ + for( i = width * 2; i; --i ) { + *output++ = ((*top++) + (*bot++)) >> 1; + } + + emms(); + + SPEEDY_END(); +} + +static void interpolate_packed422_scanline_mmxext( uint8_t *output, uint8_t *top, + uint8_t *bot, int width ) +{ + int i; + + SPEEDY_START(); + + for( i = width/16; i; --i ) { + movq_m2r( *bot, mm0 ); + movq_m2r( *top, mm1 ); + movq_m2r( *(bot + 8), mm2 ); + movq_m2r( *(top + 8), mm3 ); + movq_m2r( *(bot + 16), mm4 ); + movq_m2r( *(top + 16), mm5 ); + movq_m2r( *(bot + 24), mm6 ); + movq_m2r( *(top + 24), mm7 ); + pavgb_r2r( mm1, mm0 ); + pavgb_r2r( mm3, mm2 ); + pavgb_r2r( mm5, mm4 ); + pavgb_r2r( mm7, mm6 ); + movntq_r2m( mm0, *output ); + movntq_r2m( mm2, *(output + 8) ); + movntq_r2m( mm4, *(output + 16) ); + movntq_r2m( mm6, *(output + 24) ); + output += 32; + top += 32; + bot += 32; + } + width = (width & 0xf); + + for( i = width/4; i; --i ) { + movq_m2r( *bot, mm0 ); + movq_m2r( *top, mm1 ); + pavgb_r2r( mm1, mm0 ); + movntq_r2m( mm0, *output ); + output += 8; + top += 8; + bot += 8; + } + width = width & 0x7; + + /* Handle last few pixels. */ + for( i = width * 2; i; --i ) { + *output++ = ((*top++) + (*bot++)) >> 1; + } + + sfence(); + emms(); + + SPEEDY_END(); +} + +#endif + +static void blit_colour_packed422_scanline_c( uint8_t *output, int width, int y, int cb, int cr ) +{ + uint32_t colour = cr << 24 | y << 16 | cb << 8 | y; + uint32_t *o = (uint32_t *) output; + + SPEEDY_START(); + + for( width /= 2; width; --width ) { + *o++ = colour; + } + + SPEEDY_END(); +} + +#ifdef ARCH_X86 + +static void blit_colour_packed422_scanline_mmx( uint8_t *output, int width, int y, int cb, int cr ) +{ + uint32_t colour = cr << 24 | y << 16 | cb << 8 | y; + int i; + + SPEEDY_START(); + + movd_m2r( colour, mm1 ); + movd_m2r( colour, mm2 ); + psllq_i2r( 32, mm1 ); + por_r2r( mm1, mm2 ); + + for( i = width / 16; i; --i ) { + movq_r2m( mm2, *output ); + movq_r2m( mm2, *(output + 8) ); + movq_r2m( mm2, *(output + 16) ); + movq_r2m( mm2, *(output + 24) ); + output += 32; + } + width = (width & 0xf); + + for( i = width / 4; i; --i ) { + movq_r2m( mm2, *output ); + output += 8; + } + width = (width & 0x7); + + for( i = width / 2; i; --i ) { + *((uint32_t *) output) = colour; + output += 4; + } + + if( width & 1 ) { + *output = y; + *(output + 1) = cb; + } + + emms(); + + SPEEDY_END(); +} + +static void blit_colour_packed422_scanline_mmxext( uint8_t *output, int width, int y, int cb, int cr ) +{ + uint32_t colour = cr << 24 | y << 16 | cb << 8 | y; + int i; + + SPEEDY_START(); + + movd_m2r( colour, mm1 ); + movd_m2r( colour, mm2 ); + psllq_i2r( 32, mm1 ); + por_r2r( mm1, mm2 ); + + for( i = width / 16; i; --i ) { + movntq_r2m( mm2, *output ); + movntq_r2m( mm2, *(output + 8) ); + movntq_r2m( mm2, *(output + 16) ); + movntq_r2m( mm2, *(output + 24) ); + output += 32; + } + width = (width & 0xf); + + for( i = width / 4; i; --i ) { + movntq_r2m( mm2, *output ); + output += 8; + } + width = (width & 0x7); + + for( i = width / 2; i; --i ) { + *((uint32_t *) output) = colour; + output += 4; + } + + if( width & 1 ) { + *output = y; + *(output + 1) = cb; + } + + sfence(); + emms(); + + SPEEDY_END(); +} + +#endif + +static void blit_colour_packed4444_scanline_c( uint8_t *output, int width, + int alpha, int luma, int cb, int cr ) +{ + int j; + + SPEEDY_START(); + + for( j = 0; j < width; j++ ) { + *output++ = alpha; + *output++ = luma; + *output++ = cb; + *output++ = cr; + } + + SPEEDY_END(); +} + +#ifdef ARCH_X86 + +static void blit_colour_packed4444_scanline_mmx( uint8_t *output, int width, + int alpha, int luma, + int cb, int cr ) +{ + uint32_t colour = (cr << 24) | (cb << 16) | (luma << 8) | alpha; + int i; + + SPEEDY_START(); + + movd_m2r( colour, mm1 ); + movd_m2r( colour, mm2 ); + psllq_i2r( 32, mm1 ); + por_r2r( mm1, mm2 ); + + for( i = width / 8; i; --i ) { + movq_r2m( mm2, *output ); + movq_r2m( mm2, *(output + 8) ); + movq_r2m( mm2, *(output + 16) ); + movq_r2m( mm2, *(output + 24) ); + output += 32; + } + width = (width & 0x7); + + for( i = width / 2; i; --i ) { + movq_r2m( mm2, *output ); + output += 8; + } + width = (width & 0x1); + + if( width ) { + *((uint32_t *) output) = colour; + output += 4; + } + + emms(); + + SPEEDY_END(); +} + +void blit_colour_packed4444_scanline_mmxext( uint8_t *output, int width, + int alpha, int luma, + int cb, int cr ) +{ + uint32_t colour = (cr << 24) | (cb << 16) | (luma << 8) | alpha; + int i; + + SPEEDY_START(); + + movd_m2r( colour, mm1 ); + movd_m2r( colour, mm2 ); + psllq_i2r( 32, mm1 ); + por_r2r( mm1, mm2 ); + + for( i = width / 8; i; --i ) { + movntq_r2m( mm2, *output ); + movntq_r2m( mm2, *(output + 8) ); + movntq_r2m( mm2, *(output + 16) ); + movntq_r2m( mm2, *(output + 24) ); + output += 32; + } + width = (width & 0x7); + + for( i = width / 2; i; --i ) { + movntq_r2m( mm2, *output ); + output += 8; + } + width = (width & 0x1); + + if( width ) { + *((uint32_t *) output) = colour; + output += 4; + } + + sfence(); + emms(); + + SPEEDY_END(); +} + +#endif + +static void blit_packed422_scanline_c( uint8_t *dest, const uint8_t *src, int width ) +{ + xine_fast_memcpy( dest, src, width*2 ); +} + +static void blit_packed422_scanline_mmx( uint8_t *dest, const uint8_t *src, int width ) +{ + xine_fast_memcpy( dest, src, width*2 ); +} + +static void blit_packed422_scanline_mmxext( uint8_t *dest, const uint8_t *src, int width ) +{ + xine_fast_memcpy( dest, src, width*2 ); +} + +static void composite_packed4444_alpha_to_packed422_scanline_c( uint8_t *output, uint8_t *input, + uint8_t *foreground, int width, int alpha ) +{ + int i; + + SPEEDY_START(); + for( i = 0; i < width; i++ ) { + int af = foreground[ 0 ]; + + if( af ) { + int a = ((af * alpha) + 0x80) >> 8; + + + if( a == 0xff ) { + output[ 0 ] = foreground[ 1 ]; + + if( ( i & 1 ) == 0 ) { + output[ 1 ] = foreground[ 2 ]; + output[ 3 ] = foreground[ 3 ]; + } + } else if( a ) { + /** + * (1 - alpha)*B + alpha*F + * (1 - af*a)*B + af*a*F + * B - af*a*B + af*a*F + * B + a*(af*F - af*B) + */ + + output[ 0 ] = input[ 0 ] + + ((alpha*( foreground[ 1 ] + - multiply_alpha( foreground[ 0 ], input[ 0 ] ) ) + 0x80) >> 8); + + if( ( i & 1 ) == 0 ) { + + /** + * At first I thought I was doing this incorrectly, but + * the following math has convinced me otherwise. + * + * C_r = (1 - alpha)*B + alpha*F + * C_r = B - af*a*B + af*a*F + * + * C_r = 128 + ((1 - af*a)*(B - 128) + a*af*(F - 128)) + * C_r = 128 + (B - af*a*B - 128 + af*a*128 + a*af*F - a*af*128) + * C_r = B - af*a*B + a*af*F + */ + + output[ 1 ] = input[ 1 ] + ((alpha*( foreground[ 2 ] + - multiply_alpha( foreground[ 0 ], input[ 1 ] ) ) + 0x80) >> 8); + output[ 3 ] = input[ 3 ] + ((alpha*( foreground[ 3 ] + - multiply_alpha( foreground[ 0 ], input[ 3 ] ) ) + 0x80) >> 8); + } + } + } + foreground += 4; + output += 2; + input += 2; + } + SPEEDY_END(); +} + +#ifdef ARCH_X86 + +static void composite_packed4444_alpha_to_packed422_scanline_mmxext( uint8_t *output, + uint8_t *input, + uint8_t *foreground, + int width, int alpha ) +{ + const mmx_t alpha2 = { 0x0000FFFF00000000ULL }; + const mmx_t alpha1 = { 0xFFFF0000FFFFFFFFULL }; + const mmx_t round = { 0x0080008000800080ULL }; + int i; + + if( !alpha ) { + blit_packed422_scanline( output, input, width ); + return; + } + + if( alpha == 256 ) { + composite_packed4444_to_packed422_scanline( output, input, foreground, width ); + return; + } + + SPEEDY_START(); + READ_PREFETCH_2048( input ); + READ_PREFETCH_2048( foreground ); + + movq_m2r( alpha, mm2 ); + pshufw_r2r( mm2, mm2, 0 ); + pxor_r2r( mm7, mm7 ); + + for( i = width/2; i; i-- ) { + int fg1 = *((uint32_t *) foreground); + int fg2 = *(((uint32_t *) foreground)+1); + + if( fg1 || fg2 ) { + /* mm1 = [ cr ][ y ][ cb ][ y ] */ + movd_m2r( *input, mm1 ); + punpcklbw_r2r( mm7, mm1 ); + + movq_m2r( *foreground, mm3 ); + movq_r2r( mm3, mm4 ); + punpcklbw_r2r( mm7, mm3 ); + punpckhbw_r2r( mm7, mm4 ); + /* mm3 and mm4 will be the appropriate colours, mm5 and mm6 for alpha. */ + + /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 0 a ][ 0 a ][ 0 a ][ 0 a ] */ + pshufw_r2r( mm3, mm5, 0 ); + pshufw_r2r( mm4, mm6, 0 ); + /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 3 cr ][ 0 a ][ 2 cb ][ 1 y ] == 11001000 == 201 */ + pshufw_r2r( mm3, mm3, 201 ); + /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 0 a ][ 1 y ][ 0 a ][ 0 a ] == 00010000 == 16 */ + pshufw_r2r( mm4, mm4, 16 ); + + pand_m2r( alpha1, mm3 ); + pand_m2r( alpha2, mm4 ); + pand_m2r( alpha1, mm5 ); + pand_m2r( alpha2, mm6 ); + por_r2r( mm4, mm3 ); + por_r2r( mm6, mm5 ); + + /* now, mm5 is af and mm1 is B. Need to multiply them. */ + pmullw_r2r( mm1, mm5 ); + + /* Multiply by appalpha. */ + pmullw_r2r( mm2, mm3 ); + paddw_m2r( round, mm3 ); + psrlw_i2r( 8, mm3 ); + /* Result is now B + F. */ + paddw_r2r( mm3, mm1 ); + + /* Round up appropriately. */ + paddw_m2r( round, mm5 ); + + /* mm6 contains our i>>8; */ + movq_r2r( mm5, mm6 ); + psrlw_i2r( 8, mm6 ); + + /* Add mm6 back into mm5. Now our result is in the high bytes. */ + paddw_r2r( mm6, mm5 ); + + /* Shift down. */ + psrlw_i2r( 8, mm5 ); + + /* Multiply by appalpha. */ + pmullw_r2r( mm2, mm5 ); + paddw_m2r( round, mm5 ); + psrlw_i2r( 8, mm5 ); + + psubusw_r2r( mm5, mm1 ); + + /* mm1 = [ B + F - af*B ] */ + packuswb_r2r( mm1, mm1 ); + movd_r2m( mm1, *output ); + } + + foreground += 8; + output += 4; + input += 4; + } + sfence(); + emms(); + + SPEEDY_END(); +} + +#endif + +static void composite_packed4444_to_packed422_scanline_c( uint8_t *output, uint8_t *input, + uint8_t *foreground, int width ) +{ + int i; + SPEEDY_START(); + for( i = 0; i < width; i++ ) { + int a = foreground[ 0 ]; + + if( a == 0xff ) { + output[ 0 ] = foreground[ 1 ]; + + if( ( i & 1 ) == 0 ) { + output[ 1 ] = foreground[ 2 ]; + output[ 3 ] = foreground[ 3 ]; + } + } else if( a ) { + /** + * (1 - alpha)*B + alpha*F + * B + af*F - af*B + */ + + output[ 0 ] = input[ 0 ] + foreground[ 1 ] - multiply_alpha( foreground[ 0 ], input[ 0 ] ); + + if( ( i & 1 ) == 0 ) { + + /** + * C_r = (1 - af)*B + af*F + * C_r = B - af*B + af*F + */ + + output[ 1 ] = input[ 1 ] + foreground[ 2 ] - multiply_alpha( foreground[ 0 ], input[ 1 ] ); + output[ 3 ] = input[ 3 ] + foreground[ 3 ] - multiply_alpha( foreground[ 0 ], input[ 3 ] ); + } + } + foreground += 4; + output += 2; + input += 2; + } + SPEEDY_END(); +} + +#ifdef ARCH_X86 + +static void composite_packed4444_to_packed422_scanline_mmxext( uint8_t *output, uint8_t *input, + uint8_t *foreground, int width ) +{ + const mmx_t alpha2 = { 0x0000FFFF00000000ULL }; + const mmx_t alpha1 = { 0xFFFF0000FFFFFFFFULL }; + const mmx_t round = { 0x0080008000800080ULL }; + int i; + + SPEEDY_START(); + READ_PREFETCH_2048( input ); + READ_PREFETCH_2048( foreground ); + + pxor_r2r( mm7, mm7 ); + for( i = width/2; i; i-- ) { + int fg1 = *((uint32_t *) foreground); + int fg2 = *(((uint32_t *) foreground)+1); + + if( (fg1 & 0xff) == 0xff && (fg2 & 0xff) == 0xff ) { + movq_m2r( *foreground, mm3 ); + movq_r2r( mm3, mm4 ); + punpcklbw_r2r( mm7, mm3 ); + punpckhbw_r2r( mm7, mm4 ); + /* mm3 and mm4 will be the appropriate colours, mm5 and mm6 for alpha. */ + /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 3 cr ][ 0 a ][ 2 cb ][ 1 y ] == 11001000 == 201 */ + pshufw_r2r( mm3, mm3, 201 ); + /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 0 a ][ 1 y ][ 0 a ][ 0 a ] == 00010000 == 16 */ + pshufw_r2r( mm4, mm4, 16 ); + pand_m2r( alpha1, mm3 ); + pand_m2r( alpha2, mm4 ); + por_r2r( mm4, mm3 ); + /* mm1 = [ B + F - af*B ] */ + packuswb_r2r( mm3, mm3 ); + movd_r2m( mm3, *output ); + } else if( fg1 || fg2 ) { + + /* mm1 = [ cr ][ y ][ cb ][ y ] */ + movd_m2r( *input, mm1 ); + punpcklbw_r2r( mm7, mm1 ); + + movq_m2r( *foreground, mm3 ); + movq_r2r( mm3, mm4 ); + punpcklbw_r2r( mm7, mm3 ); + punpckhbw_r2r( mm7, mm4 ); + /* mm3 and mm4 will be the appropriate colours, mm5 and mm6 for alpha. */ + + /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 0 a ][ 0 a ][ 0 a ][ 0 a ] */ + pshufw_r2r( mm3, mm5, 0 ); + pshufw_r2r( mm4, mm6, 0 ); + /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 3 cr ][ 0 a ][ 2 cb ][ 1 y ] == 11001000 == 201 */ + pshufw_r2r( mm3, mm3, 201 ); + /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 0 a ][ 1 y ][ 0 a ][ 0 a ] == 00010000 == 16 */ + pshufw_r2r( mm4, mm4, 16 ); + + pand_m2r( alpha1, mm3 ); + pand_m2r( alpha2, mm4 ); + pand_m2r( alpha1, mm5 ); + pand_m2r( alpha2, mm6 ); + por_r2r( mm4, mm3 ); + por_r2r( mm6, mm5 ); + + /* now, mm5 is af and mm1 is B. Need to multiply them. */ + pmullw_r2r( mm1, mm5 ); + + /* Result is now B + F. */ + paddw_r2r( mm3, mm1 ); + + /* Round up appropriately. */ + paddw_m2r( round, mm5 ); + + /* mm6 contains our i>>8; */ + movq_r2r( mm5, mm6 ); + psrlw_i2r( 8, mm6 ); + + /* Add mm6 back into mm5. Now our result is in the high bytes. */ + paddw_r2r( mm6, mm5 ); + + /* Shift down. */ + psrlw_i2r( 8, mm5 ); + + psubusw_r2r( mm5, mm1 ); + + /* mm1 = [ B + F - af*B ] */ + packuswb_r2r( mm1, mm1 ); + movd_r2m( mm1, *output ); + } + + foreground += 8; + output += 4; + input += 4; + } + sfence(); + emms(); + + SPEEDY_END(); +} + +#endif + +/** + * um... just need some scrap paper... + * D = (1 - alpha)*B + alpha*F + * D = (1 - a)*B + a*textluma + * = B - a*B + a*textluma + * = B + a*(textluma - B) + * Da = (1 - a)*b + a + */ +static void composite_alphamask_to_packed4444_scanline_c( uint8_t *output, + uint8_t *input, + uint8_t *mask, + int width, + int textluma, int textcb, + int textcr ) +{ + uint32_t opaque = (textcr << 24) | (textcb << 16) | (textluma << 8) | 0xff; + int i; + + SPEEDY_START(); + + for( i = 0; i < width; i++ ) { + int a = *mask; + + if( a == 0xff ) { + *((uint32_t *) output) = opaque; + } else if( (input[ 0 ] == 0x00) ) { + *((uint32_t *) output) = (multiply_alpha( a, textcr ) << 24) + | (multiply_alpha( a, textcb ) << 16) + | (multiply_alpha( a, textluma ) << 8) | a; + } else if( a ) { + *((uint32_t *) output) = ((input[ 3 ] + multiply_alpha( a, textcr - input[ 3 ] )) << 24) + | ((input[ 2 ] + multiply_alpha( a, textcb - input[ 2 ] )) << 16) + | ((input[ 1 ] + multiply_alpha( a, textluma - input[ 1 ] )) << 8) + | (input[ 0 ] + multiply_alpha( a, 0xff - input[ 0 ] )); + } + mask++; + output += 4; + input += 4; + } + SPEEDY_END(); +} + +#ifdef ARCH_X86 + +static void composite_alphamask_to_packed4444_scanline_mmxext( uint8_t *output, + uint8_t *input, + uint8_t *mask, + int width, + int textluma, int textcb, + int textcr ) +{ + uint32_t opaque = (textcr << 24) | (textcb << 16) | (textluma << 8) | 0xff; + const mmx_t round = { 0x0080008000800080ULL }; + const mmx_t fullalpha = { 0x00000000000000ffULL }; + mmx_t colour; + + SPEEDY_START(); + + colour.w[ 0 ] = 0x00; + colour.w[ 1 ] = textluma; + colour.w[ 2 ] = textcb; + colour.w[ 3 ] = textcr; + + movq_m2r( colour, mm1 ); + movq_r2r( mm1, mm0 ); + + /* mm0 = [ cr ][ cb ][ y ][ 0xff ] */ + paddw_m2r( fullalpha, mm0 ); + + /* mm7 = 0 */ + pxor_r2r( mm7, mm7 ); + + /* mm6 = round */ + movq_m2r( round, mm6 ); + + while( width-- ) { + int a = *mask; + + if( a == 0xff ) { + *((uint32_t *) output) = opaque; + } else if( (input[ 0 ] == 0x00) ) { + /* We just need to multiply our colour by the alpha value. */ + + /* mm2 = [ a ][ a ][ a ][ a ] */ + movd_m2r( a, mm2 ); + movq_r2r( mm2, mm3 ); + pshufw_r2r( mm2, mm2, 0 ); + + /* mm5 = [ cr ][ cb ][ y ][ 0 ] */ + movq_r2r( mm1, mm5 ); + + /* Multiply by alpha. */ + pmullw_r2r( mm2, mm5 ); + paddw_m2r( round, mm5 ); + movq_r2r( mm5, mm6 ); + psrlw_i2r( 8, mm6 ); + paddw_r2r( mm6, mm5 ); + psrlw_i2r( 8, mm5 ); + + /* Set alpha to a. */ + por_r2r( mm3, mm5 ); + + /* Pack and write our result. */ + packuswb_r2r( mm5, mm5 ); + movd_r2m( mm5, *output ); + } else if( a ) { + /* mm2 = [ a ][ a ][ a ][ a ] */ + movd_m2r( a, mm2 ); + pshufw_r2r( mm2, mm2, 0 ); + + /* mm3 = [ cr ][ cb ][ y ][ 0xff ] */ + movq_r2r( mm0, mm3 ); + + /* mm4 = [ i_cr ][ i_cb ][ i_y ][ i_a ] */ + movd_m2r( *input, mm4 ); + punpcklbw_r2r( mm7, mm4 ); + + /* Subtract input and colour. */ + psubw_r2r( mm4, mm3 ); /* mm3 = mm3 - mm4 */ + + /* Multiply alpha. */ + pmullw_r2r( mm2, mm3 ); + paddw_r2r( mm6, mm3 ); + movq_r2r( mm3, mm2 ); + psrlw_i2r( 8, mm3 ); + paddw_r2r( mm2, mm3 ); + psrlw_i2r( 8, mm3 ); + + /* Add back in the input. */ + paddb_r2r( mm3, mm4 ); + + /* Write result. */ + packuswb_r2r( mm4, mm4 ); + movd_r2m( mm4, *output ); + } + mask++; + output += 4; + input += 4; + } + sfence(); + emms(); + SPEEDY_END(); +} + +#endif + +static void composite_alphamask_alpha_to_packed4444_scanline_c( uint8_t *output, + uint8_t *input, + uint8_t *mask, int width, + int textluma, int textcb, + int textcr, int alpha ) +{ + uint32_t opaque = (textcr << 24) | (textcb << 16) | (textluma << 8) | 0xff; + int i; + + SPEEDY_START(); + + for( i = 0; i < width; i++ ) { + int af = *mask; + + if( af ) { + int a = ((af * alpha) + 0x80) >> 8; + + if( a == 0xff ) { + *((uint32_t *) output) = opaque; + } else if( input[ 0 ] == 0x00 ) { + *((uint32_t *) output) = (multiply_alpha( a, textcr ) << 24) + | (multiply_alpha( a, textcb ) << 16) + | (multiply_alpha( a, textluma ) << 8) | a; + } else if( a ) { + *((uint32_t *) output) = ((input[ 3 ] + multiply_alpha( a, textcr - input[ 3 ] )) << 24) + | ((input[ 2 ] + multiply_alpha( a, textcb - input[ 2 ] )) << 16) + | ((input[ 1 ] + multiply_alpha( a, textluma - input[ 1 ] )) << 8) + | (a + multiply_alpha( 0xff - a, input[ 0 ] )); + } + } + mask++; + output += 4; + input += 4; + } + + SPEEDY_END(); +} + +static void premultiply_packed4444_scanline_c( uint8_t *output, uint8_t *input, int width ) +{ + SPEEDY_START(); + + while( width-- ) { + unsigned int cur_a = input[ 0 ]; + + *((uint32_t *) output) = (multiply_alpha( cur_a, input[ 3 ] ) << 24) + | (multiply_alpha( cur_a, input[ 2 ] ) << 16) + | (multiply_alpha( cur_a, input[ 1 ] ) << 8) + | cur_a; + + output += 4; + input += 4; + } + + SPEEDY_END(); +} + +#ifdef ARCH_X86 + +static void premultiply_packed4444_scanline_mmxext( uint8_t *output, uint8_t *input, int width ) +{ + const mmx_t round = { 0x0080008000800080ULL }; + const mmx_t alpha = { 0x00000000000000ffULL }; + const mmx_t noalp = { 0xffffffffffff0000ULL }; + + SPEEDY_START(); + + pxor_r2r( mm7, mm7 ); + while( width-- ) { + movd_m2r( *input, mm0 ); + punpcklbw_r2r( mm7, mm0 ); + + movq_r2r( mm0, mm2 ); + pshufw_r2r( mm2, mm2, 0 ); + movq_r2r( mm2, mm4 ); + pand_m2r( alpha, mm4 ); + + pmullw_r2r( mm2, mm0 ); + paddw_m2r( round, mm0 ); + + movq_r2r( mm0, mm3 ); + psrlw_i2r( 8, mm3 ); + paddw_r2r( mm3, mm0 ); + psrlw_i2r( 8, mm0 ); + + pand_m2r( noalp, mm0 ); + paddw_r2r( mm4, mm0 ); + + packuswb_r2r( mm0, mm0 ); + movd_r2m( mm0, *output ); + + output += 4; + input += 4; + } + sfence(); + emms(); + + SPEEDY_END(); +} + +#endif + +static void blend_packed422_scanline_c( uint8_t *output, uint8_t *src1, + uint8_t *src2, int width, int pos ) +{ + if( pos == 0 ) { + blit_packed422_scanline( output, src1, width ); + } else if( pos == 256 ) { + blit_packed422_scanline( output, src2, width ); + } else if( pos == 128 ) { + interpolate_packed422_scanline( output, src1, src2, width ); + } else { + width *= 2; + while( width-- ) { + *output++ = ( (*src1++ * ( 256 - pos )) + (*src2++ * pos) + 0x80 ) >> 8; + } + } +} + +#ifdef ARCH_X86 + +static void blend_packed422_scanline_mmxext( uint8_t *output, uint8_t *src1, + uint8_t *src2, int width, int pos ) +{ + if( pos <= 0 ) { + blit_packed422_scanline( output, src1, width ); + } else if( pos >= 256 ) { + blit_packed422_scanline( output, src2, width ); + } else if( pos == 128 ) { + interpolate_packed422_scanline( output, src1, src2, width ); + } else { + const mmx_t all256 = { 0x0100010001000100ULL }; + const mmx_t round = { 0x0080008000800080ULL }; + + SPEEDY_START(); + + movd_m2r( pos, mm0 ); + pshufw_r2r( mm0, mm0, 0 ); + movq_m2r( all256, mm1 ); + psubw_r2r( mm0, mm1 ); + pxor_r2r( mm7, mm7 ); + + for( width /= 2; width; width-- ) { + movd_m2r( *src1, mm3 ); + movd_m2r( *src2, mm4 ); + punpcklbw_r2r( mm7, mm3 ); + punpcklbw_r2r( mm7, mm4 ); + + pmullw_r2r( mm1, mm3 ); + pmullw_r2r( mm0, mm4 ); + paddw_r2r( mm4, mm3 ); + paddw_m2r( round, mm3 ); + psrlw_i2r( 8, mm3 ); + + packuswb_r2r( mm3, mm3 ); + movd_r2m( mm3, *output ); + + output += 4; + src1 += 4; + src2 += 4; + } + sfence(); + emms(); + + SPEEDY_END(); + } +} + +static void quarter_blit_vertical_packed422_scanline_mmxext( uint8_t *output, uint8_t *one, + uint8_t *three, int width ) +{ + int i; + + SPEEDY_START(); + for( i = width/16; i; --i ) { + movq_m2r( *one, mm0 ); + movq_m2r( *three, mm1 ); + movq_m2r( *(one + 8), mm2 ); + movq_m2r( *(three + 8), mm3 ); + movq_m2r( *(one + 16), mm4 ); + movq_m2r( *(three + 16), mm5 ); + movq_m2r( *(one + 24), mm6 ); + movq_m2r( *(three + 24), mm7 ); + pavgb_r2r( mm1, mm0 ); + pavgb_r2r( mm1, mm0 ); + pavgb_r2r( mm3, mm2 ); + pavgb_r2r( mm3, mm2 ); + pavgb_r2r( mm5, mm4 ); + pavgb_r2r( mm5, mm4 ); + pavgb_r2r( mm7, mm6 ); + pavgb_r2r( mm7, mm6 ); + movntq_r2m( mm0, *output ); + movntq_r2m( mm2, *(output + 8) ); + movntq_r2m( mm4, *(output + 16) ); + movntq_r2m( mm6, *(output + 24) ); + output += 32; + one += 32; + three += 32; + } + width = (width & 0xf); + + for( i = width/4; i; --i ) { + movq_m2r( *one, mm0 ); + movq_m2r( *three, mm1 ); + pavgb_r2r( mm1, mm0 ); + pavgb_r2r( mm1, mm0 ); + movntq_r2m( mm0, *output ); + output += 8; + one += 8; + three += 8; + } + width = width & 0x7; + + /* Handle last few pixels. */ + for( i = width * 2; i; --i ) { + *output++ = (*one + *three + *three + *three + 2) / 4; + one++; + three++; + } + + sfence(); + emms(); + + SPEEDY_END(); +} + +#endif + +static void quarter_blit_vertical_packed422_scanline_c( uint8_t *output, uint8_t *one, + uint8_t *three, int width ) +{ + SPEEDY_START(); + width *= 2; + while( width-- ) { + *output++ = (*one + *three + *three + *three + 2) / 4; + one++; + three++; + } + SPEEDY_END(); +} + +static void subpix_blit_vertical_packed422_scanline_c( uint8_t *output, uint8_t *top, + uint8_t *bot, int subpixpos, int width ) +{ + if( subpixpos == 32768 ) { + interpolate_packed422_scanline( output, top, bot, width ); + } else if( subpixpos == 16384 ) { + quarter_blit_vertical_packed422_scanline( output, top, bot, width ); + } else if( subpixpos == 49152 ) { + quarter_blit_vertical_packed422_scanline( output, bot, top, width ); + } else { + int x; + + SPEEDY_START(); + + width *= 2; + for( x = 0; x < width; x++ ) { + output[ x ] = ( ( top[ x ] * subpixpos ) + ( bot[ x ] * ( 0xffff - subpixpos ) ) ) >> 16; + } + SPEEDY_END(); + } +} + +static void a8_subpix_blit_scanline_c( uint8_t *output, uint8_t *input, + int lasta, int startpos, int width ) +{ + int pos = 0xffff - (startpos & 0xffff); + int prev = lasta; + int x; + + for( x = 0; x < width; x++ ) { + output[ x ] = ( ( prev * pos ) + ( input[ x ] * ( 0xffff - pos ) ) ) >> 16; + prev = input[ x ]; + } +} + + +static uint32_t speedy_accel; + +void setup_speedy_calls( int verbose ) +{ + speedy_accel = xine_mm_accel(); + + interpolate_packed422_scanline = interpolate_packed422_scanline_c; + blit_colour_packed422_scanline = blit_colour_packed422_scanline_c; + blit_colour_packed4444_scanline = blit_colour_packed4444_scanline_c; + blit_packed422_scanline = blit_packed422_scanline_c; + composite_packed4444_to_packed422_scanline = composite_packed4444_to_packed422_scanline_c; + composite_packed4444_alpha_to_packed422_scanline = composite_packed4444_alpha_to_packed422_scanline_c; + composite_alphamask_to_packed4444_scanline = composite_alphamask_to_packed4444_scanline_c; + composite_alphamask_alpha_to_packed4444_scanline = composite_alphamask_alpha_to_packed4444_scanline_c; + premultiply_packed4444_scanline = premultiply_packed4444_scanline_c; + blend_packed422_scanline = blend_packed422_scanline_c; + filter_luma_121_packed422_inplace_scanline = filter_luma_121_packed422_inplace_scanline_c; + filter_luma_14641_packed422_inplace_scanline = filter_luma_14641_packed422_inplace_scanline_c; + comb_factor_packed422_scanline = 0; + diff_factor_packed422_scanline = diff_factor_packed422_scanline_c; + kill_chroma_packed422_inplace_scanline = kill_chroma_packed422_inplace_scanline_c; + mirror_packed422_inplace_scanline = mirror_packed422_inplace_scanline_c; + halfmirror_packed422_inplace_scanline = halfmirror_packed422_inplace_scanline_c; + speedy_memcpy = xine_fast_memcpy; + diff_packed422_block8x8 = diff_packed422_block8x8_c; + a8_subpix_blit_scanline = a8_subpix_blit_scanline_c; + quarter_blit_vertical_packed422_scanline = quarter_blit_vertical_packed422_scanline_c; + subpix_blit_vertical_packed422_scanline = subpix_blit_vertical_packed422_scanline_c; + +#ifdef ARCH_X86 + if( speedy_accel & MM_ACCEL_X86_MMXEXT ) { + if( verbose ) { + fprintf( stderr, "speedycode: Using MMXEXT optimized functions.\n" ); + } + interpolate_packed422_scanline = interpolate_packed422_scanline_mmxext; + blit_colour_packed422_scanline = blit_colour_packed422_scanline_mmxext; + blit_colour_packed4444_scanline = blit_colour_packed4444_scanline_mmxext; + blit_packed422_scanline = blit_packed422_scanline_mmxext; + composite_packed4444_to_packed422_scanline = composite_packed4444_to_packed422_scanline_mmxext; + composite_packed4444_alpha_to_packed422_scanline = composite_packed4444_alpha_to_packed422_scanline_mmxext; + composite_alphamask_to_packed4444_scanline = composite_alphamask_to_packed4444_scanline_mmxext; + premultiply_packed4444_scanline = premultiply_packed4444_scanline_mmxext; + kill_chroma_packed422_inplace_scanline = kill_chroma_packed422_inplace_scanline_mmx; + blend_packed422_scanline = blend_packed422_scanline_mmxext; + diff_factor_packed422_scanline = diff_factor_packed422_scanline_mmx; + comb_factor_packed422_scanline = comb_factor_packed422_scanline_mmx; + diff_packed422_block8x8 = diff_packed422_block8x8_mmx; + quarter_blit_vertical_packed422_scanline = quarter_blit_vertical_packed422_scanline_mmxext; + } else if( speedy_accel & MM_ACCEL_X86_MMX ) { + if( verbose ) { + fprintf( stderr, "speedycode: Using MMX optimized functions.\n" ); + } + interpolate_packed422_scanline = interpolate_packed422_scanline_mmx; + blit_colour_packed422_scanline = blit_colour_packed422_scanline_mmx; + blit_colour_packed4444_scanline = blit_colour_packed4444_scanline_mmx; + blit_packed422_scanline = blit_packed422_scanline_mmx; + diff_factor_packed422_scanline = diff_factor_packed422_scanline_mmx; + comb_factor_packed422_scanline = comb_factor_packed422_scanline_mmx; + kill_chroma_packed422_inplace_scanline = kill_chroma_packed422_inplace_scanline_mmx; + diff_packed422_block8x8 = diff_packed422_block8x8_mmx; + } else +#endif + { + if( verbose ) { + fprintf( stderr, "speedycode: No MMX or MMXEXT support detected, using C fallbacks.\n" ); + } + } +} + +int speedy_get_accel( void ) +{ + return speedy_accel; +} + diff --git a/src/post/deinterlace/speedy.h b/src/post/deinterlace/speedy.h new file mode 100644 index 000000000..6c40abc8c --- /dev/null +++ b/src/post/deinterlace/speedy.h @@ -0,0 +1,135 @@ +/** + * Copyright (c) 2002, 2003 Billy Biggs <vektor@dumbterm.net>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef SPEEDY_H_INCLUDED +#define SPEEDY_H_INCLUDED + +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Speedy is a collection of optimized functions plus their C fallbacks. + * This includes a simple system to select which functions to use + * at runtime. + * + * The optimizations are done with the help of the mmx.h system, from + * libmpeg2 by Michel Lespinasse and Aaron Holtzman. + */ + +/** + * This filter actually does not meet the spec so calling it rec601 + * is a bit of a lie. I got the filter from Poynton's site. + */ +void packed422_to_packed444_rec601_scanline( uint8_t *dest, uint8_t *src, int width ); + +/* Struct for pulldown detection metrics. */ +typedef struct pulldown_metrics_s { + /* difference: total, even lines, odd lines */ + int d, e, o; + /* noise: temporal, spacial (current), spacial (past) */ + int t, s, p; +} pulldown_metrics_t; + +/** + * Here are the function pointers which will be initialized to point at the + * fastest available version of the above after a call to setup_speedy_calls(). + */ + +/** + * Interpolates a packed 4:2:2 scanline using linear interpolation. + */ +extern void (*interpolate_packed422_scanline)( uint8_t *output, uint8_t *top, + uint8_t *bot, int width ); + +/** + * Blits a colour to a packed 4:2:2 scanline. + */ +extern void (*blit_colour_packed422_scanline)( uint8_t *output, + int width, int y, int cb, int cr ); + +/** + * Blits a colour to a packed 4:4:4:4 scanline. I use luma/cb/cr instead of + * RGB but this will of course work for either. + */ +extern void (*blit_colour_packed4444_scanline)( uint8_t *output, + int width, int alpha, int luma, + int cb, int cr ); + +/** + * Scanline blitter for packed 4:2:2 scanlines. This implementation uses + * the fast memcpy code from xine which got it from mplayer. + */ +extern void (*blit_packed422_scanline)( uint8_t *dest, const uint8_t *src, int width ); + +/* Alpha provided is from 0-256 not 0-255. */ +extern void (*composite_packed4444_to_packed422_scanline)( uint8_t *output, + uint8_t *input, + uint8_t *foreground, + int width ); +extern void (*composite_packed4444_alpha_to_packed422_scanline)( uint8_t *output, + uint8_t *input, + uint8_t *foreground, + int width, int alpha ); +extern void (*composite_alphamask_to_packed4444_scanline)( uint8_t *output, + uint8_t *input, + uint8_t *mask, int width, + int textluma, int textcb, + int textcr ); +extern void (*composite_alphamask_alpha_to_packed4444_scanline)( uint8_t *output, + uint8_t *input, + uint8_t *mask, int width, + int textluma, int textcb, + int textcr, int alpha ); +extern void (*premultiply_packed4444_scanline)( uint8_t *output, uint8_t *input, int width ); +extern void (*blend_packed422_scanline)( uint8_t *output, uint8_t *src1, + uint8_t *src2, int width, int pos ); +extern void (*filter_luma_121_packed422_inplace_scanline)( uint8_t *data, int width ); +extern void (*filter_luma_14641_packed422_inplace_scanline)( uint8_t *data, int width ); +extern unsigned int (*diff_factor_packed422_scanline)( uint8_t *cur, uint8_t *old, int width ); +extern unsigned int (*comb_factor_packed422_scanline)( uint8_t *top, uint8_t *mid, + uint8_t *bot, int width ); +extern void (*kill_chroma_packed422_inplace_scanline)( uint8_t *data, int width ); +extern void (*mirror_packed422_inplace_scanline)( uint8_t *data, int width ); +extern void (*halfmirror_packed422_inplace_scanline)( uint8_t *data, int width ); +extern void *(*speedy_memcpy)( void *output, const void *input, size_t size ); +extern void (*diff_packed422_block8x8)( pulldown_metrics_t *m, uint8_t *old, + uint8_t *new, int os, int ns ); +extern void (*a8_subpix_blit_scanline)( uint8_t *output, uint8_t *input, + int lasta, int startpos, int width ); +extern void (*quarter_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *one, + uint8_t *three, int width ); +extern void (*subpix_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *top, + uint8_t *bot, int subpixpos, int width ); + +/** + * Sets up the function pointers to point at the fastest function available. + */ +void setup_speedy_calls( int verbose ); + +/** + * Returns a bitfield of what accellerations are available. See mm_accel.h. + */ +int speedy_get_accel( void ); + +#ifdef __cplusplus +}; +#endif +#endif /* SPEEDY_H_INCLUDED */ diff --git a/src/post/deinterlace/tvtime.c b/src/post/deinterlace/tvtime.c new file mode 100644 index 000000000..2fe23e559 --- /dev/null +++ b/src/post/deinterlace/tvtime.c @@ -0,0 +1,458 @@ +/** + * Copyright (c) 2001, 2002, 2003 Billy Biggs <vektor@dumbterm.net>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <math.h> +#include "speedy.h" +#include "deinterlace.h" + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "pulldown.h" +#include "tvtime.h" + + +/* use tvtime_t */ +#define pulldown_alg this->pulldown_alg +#define curmethod this->curmethod + +#define last_topdiff this->last_topdiff +#define last_botdiff this->last_botdiff + +#define pdoffset this->pdoffset +#define pderror this->pderror +#define pdlastbusted this->pdlastbusted +#define filmmode this->filmmode + + + +/** + * This is how many frames to wait until deciding if the pulldown phase + * has changed or if we've really found a pulldown sequence. This is + * currently set to about 1 second, that is, we won't go into film mode + * until we've seen a pulldown sequence successfully for 1 second. + */ +#define PULLDOWN_ERROR_WAIT 60 + +/** + * This is how many predictions have to be incorrect before we fall back to + * video mode. Right now, if we mess up, we jump to video mode immediately. + */ +#define PULLDOWN_ERROR_THRESHOLD 2 + + +/** + * Explination of the loop: + * + * We want to build frames so that they look like this: + * Top field: Bot field: + * Copy Double + * Interp Copy + * Copy Interp + * Interp Copy + * Copy -- + * -- -- + * -- -- + * Copy Interp + * Interp Copy + * Copy Interp + * Double Copy + * + * So, say a frame is n high. + * For the bottom field, the first scanline is blank (special case). + * For the top field, the final scanline is blank (special case). + * For the rest of the scanlines, we alternate between Copy then Interpolate. + * + * To do the loop, I go 'Interp then Copy', and handle the first copy + * outside the loop for both top and bottom. + * The top field therefore handles n-2 scanlines in the loop. + * The bot field handles n-2 scanlines in the loop. + * + * What we pass to the deinterlacing routines: + * + * Each deinterlacing routine can require data from up to four fields. + * The current field is being output is Field 4: + * + * | Field 3 | Field 2 | Field 1 | Field 0 | + * | | T2 | | T0 | + * | M3 | | M1 | | + * | | B2 | | B0 | + * | NX3 | | NX1 | | + * + * So, since we currently get frames not individual fields from V4L, there + * are two possibilities for where these come from: + * + * CASE 1: Deinterlacing the top field: + * | Field 4 | Field 3 | Field 2 | Field 1 | Field 0 | + * | T4 | | T2 | | T0 | + * | | M3 | | M1 | | + * | B4 | | B2 | | B0 | + * [-- secondlast --] [-- lastframe --] [-- curframe --] + * + * CASE 2: Deinterlacing the bottom field: + * | Field 4 | Field 3 | Field 2 | Field 1 | Field 0 | + * | T4 | | T2 | | T0 | + * | | M3 | | M1 | | + * | B4 | | B2 | | B0 | + * ndlast --] [-- lastframe --] [-- curframe --] + * + * So, in case 1, we need the previous 2 frames as well as the current + * frame, and in case 2, we only need the previous frame, since the + * current frame contains both Field 3 and Field 4. + */ +static void pulldown_merge_fields( uint8_t *output, + uint8_t *topfield, + uint8_t *botfield, + int width, + int frame_height, + int fieldstride, + int outstride ) +{ + int i; + + for( i = 0; i < frame_height; i++ ) { + uint8_t *curoutput = output + (i * outstride); + + if( i & 1 ) { + blit_packed422_scanline( curoutput, botfield + ((i / 2) * fieldstride), width ); + } else { + blit_packed422_scanline( curoutput, topfield + ((i / 2) * fieldstride), width ); + } + } +} + + +static void calculate_pulldown_score_vektor( tvtime_t *this, uint8_t *curframe, + uint8_t *lastframe, + int instride, + int frame_height, + int width ) +{ + int i; + + last_topdiff = 0; + last_botdiff = 0; + + for( i = 0; i < frame_height; i++ ) { + + if( i > 40 && (i & 3) == 0 && i < frame_height - 40 ) { + last_topdiff += diff_factor_packed422_scanline( curframe + (i*instride), + lastframe + (i*instride), width ); + last_botdiff += diff_factor_packed422_scanline( curframe + (i*instride) + instride, + lastframe + (i*instride) + instride, + width ); + } + } +} + + +int tvtime_build_deinterlaced_frame( tvtime_t *this, uint8_t *output, + uint8_t *curframe, + uint8_t *lastframe, + uint8_t *secondlastframe, + int bottom_field, + int width, + int frame_height, + int instride, + int outstride ) +{ + int i; + + if( pulldown_alg != PULLDOWN_VEKTOR ) { + /* If we leave vektor pulldown mode, lose our state. */ + filmmode = 0; + } + + if( pulldown_alg == PULLDOWN_VEKTOR ) { + /* Make pulldown phase decisions every top field. */ + if( !bottom_field ) { + int predicted; + + predicted = pdoffset << 1; + if( predicted > PULLDOWN_SEQ_DD ) predicted = PULLDOWN_SEQ_AA; + + /** + * Old algorithm: + pdoffset = determine_pulldown_offset_history( last_topdiff, last_botdiff, 1, &realbest ); + if( pdoffset & predicted ) { pdoffset = predicted; } else { pdoffset = realbest; } + */ + + calculate_pulldown_score_vektor( this, curframe, lastframe, instride, frame_height, width ); + + pdoffset = determine_pulldown_offset_short_history_new( last_topdiff, last_botdiff, 1, predicted ); + //pdoffset = determine_pulldown_offset_history_new( last_topdiff, last_botdiff, 1, predicted ); + + /* 3:2 pulldown state machine. */ + if( !pdoffset ) { + /* No pulldown offset applies, drop out of pulldown immediately. */ + pdlastbusted = 0; + pderror = PULLDOWN_ERROR_WAIT; + } else if( pdoffset != predicted ) { + if( pdlastbusted ) { + pdlastbusted--; + pdoffset = predicted; + } else { + pderror = PULLDOWN_ERROR_WAIT; + } + } else { + if( pderror ) { + pderror--; + } + + if( !pderror ) { + pdlastbusted = PULLDOWN_ERROR_THRESHOLD; + } + } + + + if( !pderror ) { + // We're in pulldown, reverse it. + if( !filmmode ) { + fprintf( stderr, "Film mode enabled.\n" ); + filmmode = 1; + } + + if( pulldown_drop( pdoffset, 0 ) ) + return 0; + + if( pulldown_source( pdoffset, 0 ) ) { + pulldown_merge_fields( output, curframe, curframe + instride, + width, frame_height, instride*2, outstride ); + } else { + pulldown_merge_fields( output, curframe, lastframe + instride, + width, frame_height, instride*2, outstride ); + } + + return 1; + } else { + if( filmmode ) { + fprintf( stderr, "Film mode disabled.\n" ); + filmmode = 0; + } + } + } else if( !pderror ) { + if( pulldown_drop( pdoffset, 1 ) ) + return 0; + + if( pulldown_source( pdoffset, 1 ) ) { + pulldown_merge_fields( output, curframe, lastframe + instride, + width, frame_height, instride*2, outstride ); + } else { + pulldown_merge_fields( output, curframe, curframe + instride, + width, frame_height, instride*2, outstride ); + } + + return 1; + } + } + + if( !curmethod->scanlinemode ) { + deinterlace_frame_data_t data; + + data.f0 = curframe; + data.f1 = lastframe; + data.f2 = secondlastframe; + + curmethod->deinterlace_frame( output, outstride, &data, bottom_field, width, frame_height ); + + } else { + int loop_size; + int scanline = 0; + + if( bottom_field ) { + /* Advance frame pointers to the next input line. */ + curframe += instride; + lastframe += instride; + secondlastframe += instride; + + /* Double the top scanline a scanline. */ + blit_packed422_scanline( output, curframe, width ); + + output += outstride; + scanline++; + } + + /* Copy a scanline. */ + blit_packed422_scanline( output, curframe, width ); + + output += outstride; + scanline++; + + /* Something is wrong here. -Billy */ + loop_size = ((frame_height - 2) / 2); + for( i = loop_size; i; --i ) { + deinterlace_scanline_data_t data; + + data.bottom_field = bottom_field; + + data.t0 = curframe; + data.b0 = curframe + (instride*2); + + if( bottom_field ) { + data.tt1 = (i < loop_size) ? (curframe - instride) : (curframe + instride); + data.m1 = curframe + instride; + data.bb1 = (i > 1) ? (curframe + (instride*3)) : (curframe + instride); + } else { + data.tt1 = (i < loop_size) ? (lastframe - instride) : (lastframe + instride); + data.m1 = lastframe + instride; + data.bb1 = (i > 1) ? (lastframe + (instride*3)) : (lastframe + instride); + } + + data.t2 = lastframe; + data.b2 = lastframe + (instride*2); + + if( bottom_field ) { + data.tt3 = (i < loop_size) ? (lastframe - instride) : (lastframe + instride); + data.m3 = lastframe + instride; + data.bb3 = (i > 1) ? (lastframe + (instride*3)) : (lastframe + instride); + } else { + data.tt3 = (i < loop_size) ? (secondlastframe - instride) : (secondlastframe + instride); + data.m3 = secondlastframe + instride; + data.bb3 = (i > 1) ? (secondlastframe + (instride*3)) : (secondlastframe + instride); + } + + curmethod->interpolate_scanline( output, &data, width ); + + output += outstride; + scanline++; + + data.tt0 = curframe; + data.m0 = curframe + (instride*2); + data.bb0 = (i > 1) ? (curframe + (instride*4)) : (curframe + (instride*2)); + + if( bottom_field ) { + data.t1 = curframe + instride; + data.b1 = (i > 1) ? (curframe + (instride*3)) : (curframe + instride); + } else { + data.t1 = lastframe + instride; + data.b1 = (i > 1) ? (lastframe + (instride*3)) : (lastframe + instride); + } + + data.tt2 = lastframe; + data.m2 = lastframe + (instride*2); + data.bb2 = (i > 1) ? (lastframe + (instride*4)) : (lastframe + (instride*2)); + + if( bottom_field ) { + data.t2 = lastframe + instride; + data.b2 = (i > 1) ? (lastframe + (instride*3)) : (lastframe + instride); + } else { + data.t2 = secondlastframe + instride; + data.b2 = (i > 1) ? (secondlastframe + (instride*3)) : (secondlastframe + instride); + } + + /* Copy a scanline. */ + curmethod->copy_scanline( output, &data, width ); + curframe += instride * 2; + lastframe += instride * 2; + secondlastframe += instride * 2; + + output += outstride; + scanline++; + } + + if( !bottom_field ) { + /* Double the bottom scanline. */ + blit_packed422_scanline( output, curframe, width ); + + output += outstride; + scanline++; + } + } + + return 1; +} + + +int tvtime_build_copied_field( tvtime_t *this, uint8_t *output, + uint8_t *curframe, + int bottom_field, + int width, + int frame_height, + int instride, + int outstride ) +{ + int scanline = 0; + int i; + + if( bottom_field ) { + /* Advance frame pointers to the next input line. */ + curframe += instride; + } + + /* Copy a scanline. */ + // blit_packed422_scanline( output, curframe, width ); + quarter_blit_vertical_packed422_scanline( output, curframe + (instride*2), curframe, width ); + + curframe += instride * 2; + output += outstride; + scanline += 2; + + for( i = ((frame_height - 2) / 2); i; --i ) { + /* Copy/interpolate a scanline. */ + if( bottom_field ) { + // interpolate_packed422_scanline( output, curframe, curframe - (instride*2), width ); + quarter_blit_vertical_packed422_scanline( output, curframe - (instride*2), curframe, width ); + } else { + // blit_packed422_scanline( output, curframe, width ); + if( i > 1 ) { + quarter_blit_vertical_packed422_scanline( output, curframe + (instride*2), curframe, width ); + } else { + blit_packed422_scanline( output, curframe, width ); + } + } + curframe += instride * 2; + + output += outstride; + scanline += 2; + } + + return 1; +} + +tvtime_t *tvtime_new_context(void) +{ + tvtime_t *this; + + this = malloc(sizeof(tvtime_t)); + + pulldown_alg = PULLDOWN_NONE; + + curmethod = NULL; + + tvtime_reset_context(this); + + return this; +} + +void tvtime_reset_context( tvtime_t *this ) +{ + last_topdiff = 0; + last_botdiff = 0; + + pdoffset = PULLDOWN_SEQ_AA; + pderror = PULLDOWN_ERROR_WAIT; + pdlastbusted = 0; + filmmode = 0; +} diff --git a/src/post/deinterlace/tvtime.h b/src/post/deinterlace/tvtime.h new file mode 100644 index 000000000..3b435e639 --- /dev/null +++ b/src/post/deinterlace/tvtime.h @@ -0,0 +1,91 @@ +/** + * Copyright (c) 2001, 2002, 2003 Billy Biggs <vektor@dumbterm.net>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef TVTIME_H_INCLUDED +#define TVTIME_H_INCLUDED + +#include "deinterlace.h" + +/** + * Which pulldown algorithm we're using. + */ +enum { + PULLDOWN_NONE = 0, + PULLDOWN_VEKTOR = 1, /* vektor's adaptive pulldown detection. */ + PULLDOWN_DALIAS = 2, /* Using dalias's pulldown detection */ + PULLDOWN_MAX = 4, +}; + +enum +{ + FRAMERATE_FULL = 0, + FRAMERATE_HALF_TFF = 1, + FRAMERATE_HALF_BFF = 2, + FRAMERATE_MAX = 3 +}; + + +typedef struct { + /** + * Which pulldown algorithm we're using. + */ + unsigned int pulldown_alg; + + /** + * Current deinterlacing method. + */ + deinterlace_method_t *curmethod; + + + /* internal data */ + int last_topdiff; + int last_botdiff; + + int pdoffset; + int pderror; + int pdlastbusted; + int filmmode; + + +} tvtime_t; + + +int tvtime_build_deinterlaced_frame( tvtime_t *this, uint8_t *output, + uint8_t *curframe, + uint8_t *lastframe, + uint8_t *secondlastframe, + int bottom_field, + int width, + int frame_height, + int instride, + int outstride ); + + +int tvtime_build_copied_field( tvtime_t *this, uint8_t *output, + uint8_t *curframe, + int bottom_field, + int width, + int frame_height, + int instride, + int outstride ); +tvtime_t *tvtime_new_context(void); + +void tvtime_reset_context( tvtime_t *this ); + + +#endif diff --git a/src/post/deinterlace/xine_plugin.c b/src/post/deinterlace/xine_plugin.c new file mode 100644 index 000000000..62967c490 --- /dev/null +++ b/src/post/deinterlace/xine_plugin.c @@ -0,0 +1,739 @@ +/* + * Copyright (C) 2000-2003 the xine project + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * $Id: xine_plugin.c,v 1.1 2003/06/13 01:48:10 miguelfreitas Exp $ + * + * advanced video deinterlacer plugin + * Jun/2003 by Miguel Freitas + * + * heavily based on tvtime.sf.net by Billy Biggs + */ + +#include "xine_internal.h" +#include "post.h" +#include "xineutils.h" +#include <pthread.h> + +#include "tvtime.h" +#include "speedy.h" +#include "deinterlace.h" +#include "plugins/plugins.h" + +/* plugin class initialization function */ +static void *deinterlace_init_plugin(xine_t *xine, void *); + + +/* plugin catalog information */ +post_info_t deinterlace_special_info = { XINE_POST_TYPE_VIDEO_FILTER }; + +plugin_info_t xine_plugin_info[] = { + /* type, API, "name", version, special_info, init_function */ + { PLUGIN_POST, 3, "tvtime", XINE_VERSION_CODE, &deinterlace_special_info, &deinterlace_init_plugin }, + { PLUGIN_NONE, 0, "", 0, NULL, NULL } +}; + + +typedef struct post_plugin_deinterlace_s post_plugin_deinterlace_t; + +#define MAX_NUM_METHODS 30 +static char *enum_methods[MAX_NUM_METHODS]; +static char *enum_pulldown[] = { "none", "vektor", NULL }; +static char *enum_framerate[] = { "full", "half (top)", "half (bottom)", NULL }; + +/* + * this is the struct used by "parameters api" + */ +typedef struct deinterlace_parameters_s { + + int method; + int enabled; + int pulldown; + int framerate_mode; + int judder_correction; + int use_progressive_frame_flag; + +} deinterlace_parameters_t; + +/* + * description of params struct + */ +START_PARAM_DESCR( deinterlace_parameters_t ) +PARAM_ITEM( POST_PARAM_TYPE_INT, method, enum_methods, 0, 0, 0, + "deinterlace method" ) +PARAM_ITEM( POST_PARAM_TYPE_BOOL, enabled, NULL, 0, 1, 0, + "enable/disable" ) +PARAM_ITEM( POST_PARAM_TYPE_INT, pulldown, enum_pulldown, 0, 0, 0, + "pulldown algorithm" ) +PARAM_ITEM( POST_PARAM_TYPE_INT, framerate_mode, enum_framerate, 0, 0, 0, + "framerate output mode" ) +PARAM_ITEM( POST_PARAM_TYPE_BOOL, judder_correction, NULL, 0, 1, 0, + "make frames evenly spaced for film mode (24 fps)" ) +PARAM_ITEM( POST_PARAM_TYPE_BOOL, use_progressive_frame_flag, NULL, 0, 1, 0, + "disable deinterlacing when progressive_frame flag is set" ) +END_PARAM_DESCR( param_descr ) + + +#define NUM_RECENT_FRAMES 2 +#define FPS_24_DURATION 3754 +#define FRAMES_TO_SYNC 20 + +/* plugin structure */ +struct post_plugin_deinterlace_s { + post_plugin_t post; + + /* private data */ + xine_video_port_t *vo_port; + xine_stream_t *stream; + + int cur_method; + int enabled; + int pulldown; + int framerate_mode; + int judder_correction; + int use_progressive_frame_flag; + tvtime_t *tvtime; + + int framecounter; + uint8_t rff_pattern; + + vo_frame_t *recent_frame[NUM_RECENT_FRAMES]; + + pthread_mutex_t lock; +}; + + +static void _flush_frames(post_plugin_deinterlace_t *this) +{ + int i; + + for( i = 0; i < NUM_RECENT_FRAMES; i++ ) { + if( this->recent_frame[i] ) { + this->recent_frame[i]->free(this->recent_frame[i]); + this->recent_frame[i] = NULL; + } + } + tvtime_reset_context(this->tvtime); +} + +static int set_parameters (xine_post_t *this_gen, void *param_gen) { + post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)this_gen; + deinterlace_parameters_t *param = (deinterlace_parameters_t *)param_gen; + + pthread_mutex_lock (&this->lock); + + if( this->enabled != param->enabled ) + _flush_frames(this); + + this->cur_method = param->method; + + this->enabled = param->enabled; + + this->pulldown = param->pulldown; + this->framerate_mode = param->framerate_mode; + this->judder_correction = param->judder_correction; + this->use_progressive_frame_flag = param->use_progressive_frame_flag; + + this->tvtime->pulldown_alg = this->pulldown; + this->tvtime->curmethod = get_deinterlace_method( this->cur_method-1 ); + + pthread_mutex_unlock (&this->lock); + + return 1; +} + +int get_parameters (xine_post_t *this_gen, void *param_gen) { + post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)this_gen; + deinterlace_parameters_t *param = (deinterlace_parameters_t *)param_gen; + + param->method = this->cur_method; + param->enabled = this->enabled; + param->pulldown = this->pulldown; + param->framerate_mode = this->framerate_mode; + param->judder_correction = this->judder_correction; + param->use_progressive_frame_flag = this->use_progressive_frame_flag; + + return 1; +} + +xine_post_api_descr_t * get_param_descr (void) { + return ¶m_descr; +} + +static xine_post_api_t post_api = { + set_parameters, + get_parameters, + get_param_descr, +}; + +typedef struct post_deinterlace_out_s post_deinterlace_out_t; +struct post_deinterlace_out_s { + xine_post_out_t xine_out; + + post_plugin_deinterlace_t *plugin; +}; + +/* plugin class functions */ +static post_plugin_t *deinterlace_open_plugin(post_class_t *class_gen, int inputs, + xine_audio_port_t **audio_target, + xine_video_port_t **video_target); +static char *deinterlace_get_identifier(post_class_t *class_gen); +static char *deinterlace_get_description(post_class_t *class_gen); +static void deinterlace_class_dispose(post_class_t *class_gen); + +/* plugin instance functions */ +static void deinterlace_dispose(post_plugin_t *this_gen); + +/* rewire function */ +static int deinterlace_rewire(xine_post_out_t *output, void *data); + +/* replaced video_port functions */ +static int deinterlace_get_property(xine_video_port_t *port_gen, int property); +static int deinterlace_set_property(xine_video_port_t *port_gen, int property, int value); +static void deinterlace_flush(xine_video_port_t *port_gen); +static void deinterlace_open(xine_video_port_t *port_gen, xine_stream_t *stream); +static vo_frame_t *deinterlace_get_frame(xine_video_port_t *port_gen, uint32_t width, + uint32_t height, int ratio_code, + int format, int flags); +static void deinterlace_close(xine_video_port_t *port_gen, xine_stream_t *stream); + +/* replaced vo_frame functions */ +static int deinterlace_draw(vo_frame_t *frame, xine_stream_t *stream); + + +static void *deinterlace_init_plugin(xine_t *xine, void *data) +{ + post_class_t *class = (post_class_t *)malloc(sizeof(post_class_t)); + uint32_t config_flags = xine_mm_accel(); + int i; + + if (!class) + return NULL; + + class->open_plugin = deinterlace_open_plugin; + class->get_identifier = deinterlace_get_identifier; + class->get_description = deinterlace_get_description; + class->dispose = deinterlace_class_dispose; + + + setup_speedy_calls(0); + + greedy_plugin_init(); + + linearblend_plugin_init(); + + linear_plugin_init(); + weave_plugin_init(); + double_plugin_init(); + vfir_plugin_init(); + + /* scalerbob_plugin_init(); -- not supported properly, use onefieldxv */ + + greedy2frame_plugin_init(); + /* + dscaler_greedyh_plugin_init(); + dscaler_twoframe_plugin_init(); + + dscaler_videobob_plugin_init(); + dscaler_videoweave_plugin_init(); + dscaler_tomsmocomp_plugin_init(); + */ + filter_deinterlace_methods( config_flags, 5 /*fieldsavailable*/ ); + if( !get_num_deinterlace_methods() ) { + fprintf( stderr, "tvtime: No deinterlacing methods " + "available, exiting.\n" ); + return NULL; + } + + enum_methods[0] = "by driver"; + for(i = 0; i < get_num_deinterlace_methods(); i++ ) { + enum_methods[i+1] = (char *)get_deinterlace_method(i)->short_name; + } + enum_methods[i+1] = NULL; + + return class; +} + + +static post_plugin_t *deinterlace_open_plugin(post_class_t *class_gen, int inputs, + xine_audio_port_t **audio_target, + xine_video_port_t **video_target) +{ + post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)malloc(sizeof(post_plugin_deinterlace_t)); + xine_post_in_t *input = (xine_post_in_t *)malloc(sizeof(xine_post_in_t)); + xine_post_in_t *input_api = (xine_post_in_t *)malloc(sizeof(xine_post_in_t)); + post_deinterlace_out_t *output = (post_deinterlace_out_t *)malloc(sizeof(post_deinterlace_out_t)); + post_video_port_t *port; + + if (!this || !input || !output || !video_target || !video_target[0]) { + free(this); + free(input); + free(output); + return NULL; + } + + this->stream = NULL; + this->cur_method = 7; + this->pulldown = 1; + this->enabled = 1; + this->framerate_mode = 0; + this->judder_correction = 1; + this->use_progressive_frame_flag = 0; + this->framecounter = 0; + memset( &this->recent_frame, 0, sizeof(this->recent_frame) ); + + this->tvtime = tvtime_new_context(); + + this->tvtime->pulldown_alg = this->pulldown; + this->tvtime->curmethod = get_deinterlace_method( this->cur_method-1 ); + + pthread_mutex_init (&this->lock, NULL); + + port = post_intercept_video_port(&this->post, video_target[0]); + /* replace with our own get_frame function */ + port->port.open = deinterlace_open; + port->port.get_frame = deinterlace_get_frame; + port->port.close = deinterlace_close; + port->port.get_property = deinterlace_get_property; + port->port.set_property = deinterlace_set_property; + port->port.flush = deinterlace_flush; + + input->name = "video"; + input->type = XINE_POST_DATA_VIDEO; + input->data = (xine_video_port_t *)&port->port; + + input_api->name = "parameters"; + input_api->type = XINE_POST_DATA_PARAMETERS; + input_api->data = &post_api; + + output->xine_out.name = "deinterlaced video"; + output->xine_out.type = XINE_POST_DATA_VIDEO; + output->xine_out.data = (xine_video_port_t **)&port->original_port; + output->xine_out.rewire = deinterlace_rewire; + output->plugin = this; + + this->post.xine_post.audio_input = (xine_audio_port_t **)malloc(sizeof(xine_audio_port_t *)); + this->post.xine_post.audio_input[0] = NULL; + this->post.xine_post.video_input = (xine_video_port_t **)malloc(sizeof(xine_video_port_t *) * 2); + this->post.xine_post.video_input[0] = &port->port; + this->post.xine_post.video_input[1] = NULL; + + this->post.input = xine_list_new(); + this->post.output = xine_list_new(); + + xine_list_append_content(this->post.input, input); + xine_list_append_content(this->post.input, input_api); + xine_list_append_content(this->post.output, output); + + this->post.dispose = deinterlace_dispose; + + return &this->post; +} + +static char *deinterlace_get_identifier(post_class_t *class_gen) +{ + return "tvtime"; +} + +static char *deinterlace_get_description(post_class_t *class_gen) +{ + return "advanced deinterlacer plugin with pulldown detection"; +} + +static void deinterlace_class_dispose(post_class_t *class_gen) +{ + free(class_gen); +} + + +static void deinterlace_dispose(post_plugin_t *this_gen) +{ + post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)this_gen; + post_deinterlace_out_t *output = (post_deinterlace_out_t *)xine_list_first_content(this->post.output); + xine_video_port_t *port = *(xine_video_port_t **)output->xine_out.data; + + _flush_frames(this); + + if (this->stream) + port->close(port, this->stream); + + free(this->post.xine_post.audio_input); + free(this->post.xine_post.video_input); + free(xine_list_first_content(this->post.input)); + free(xine_list_first_content(this->post.output)); + xine_list_free(this->post.input); + xine_list_free(this->post.output); + free(this); +} + + +static int deinterlace_rewire(xine_post_out_t *output_gen, void *data) +{ + post_deinterlace_out_t *output = (post_deinterlace_out_t *)output_gen; + xine_video_port_t *old_port = *(xine_video_port_t **)output_gen->data; + xine_video_port_t *new_port = (xine_video_port_t *)data; + + if (!data) + return 0; + + if (output->plugin->stream) { + /* register our stream at the new output port */ + old_port->close(old_port, output->plugin->stream); + new_port->open(new_port, output->plugin->stream); + } + /* reconnect ourselves */ + *(xine_video_port_t **)output_gen->data = new_port; + + return 1; +} + +static int deinterlace_get_property(xine_video_port_t *port_gen, int property) { + post_video_port_t *port = (post_video_port_t *)port_gen; + post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)port->post; + if( property == XINE_PARAM_VO_DEINTERLACE && this->cur_method ) + return this->enabled; + else + return port->original_port->get_property(port->original_port, property); +} + +static int deinterlace_set_property(xine_video_port_t *port_gen, int property, int value) { + post_video_port_t *port = (post_video_port_t *)port_gen; + post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)port->post; + if( property == XINE_PARAM_VO_DEINTERLACE && this->cur_method ) { + pthread_mutex_lock (&this->lock); + + if( this->enabled != value ) { + int i; + + for( i = 0; i < NUM_RECENT_FRAMES; i++ ) { + if( this->recent_frame[i] ) { + this->recent_frame[i]->free(this->recent_frame[i]); + this->recent_frame[i] = NULL; + } + } + } + + this->enabled = value; + + pthread_mutex_unlock (&this->lock); + + return this->enabled; + } else + return port->original_port->set_property(port->original_port, property, value); +} + +static void deinterlace_flush(xine_video_port_t *port_gen) { + post_video_port_t *port = (post_video_port_t *)port_gen; + post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)port->post; + + _flush_frames(this); + + port->original_port->flush(port->original_port); +} + +static void deinterlace_open(xine_video_port_t *port_gen, xine_stream_t *stream) +{ + post_video_port_t *port = (post_video_port_t *)port_gen; + post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)port->post; + this->stream = stream; + port->original_port->open(port->original_port, stream); +} + +static vo_frame_t *deinterlace_get_frame(xine_video_port_t *port_gen, uint32_t width, + uint32_t height, int ratio_code, + int format, int flags) +{ + post_video_port_t *port = (post_video_port_t *)port_gen; + post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)port->post; + vo_frame_t *frame; + + frame = port->original_port->get_frame(port->original_port, + width, height, ratio_code, format, flags); + + pthread_mutex_lock (&this->lock); + + /* do not intercept if not enabled or not interlaced */ + if( this->enabled && this->cur_method && + (flags & VO_INTERLACED_FLAG) ) { + post_intercept_video_frame(frame, port); + /* replace with our own draw function */ + frame->draw = deinterlace_draw; + /* decoders should not copy the frames, since they won't be displayed */ + frame->copy = NULL; + } + + pthread_mutex_unlock (&this->lock); + + return frame; +} + +static void deinterlace_close(xine_video_port_t *port_gen, xine_stream_t *stream) +{ + post_video_port_t *port = (post_video_port_t *)port_gen; + post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)port->post; + int i; + + this->stream = NULL; + + for( i = 0; i < NUM_RECENT_FRAMES; i++ ) { + if( this->recent_frame[i] ) { + this->recent_frame[i]->free(this->recent_frame[i]); + this->recent_frame[i] = NULL; + } + } + + port->original_port->close(port->original_port, stream); +} + + +static int deinterlace_draw(vo_frame_t *frame, xine_stream_t *stream) +{ + post_video_port_t *port = (post_video_port_t *)frame->port; + post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)port->post; + vo_frame_t *deinterlaced_frame; + vo_frame_t *yuy2_frame; + int i, skip, progressive = 0; + + post_restore_video_frame(frame, port); + + /* this should be used to detect any special rff pattern */ + this->rff_pattern = this->rff_pattern << 1; + this->rff_pattern |= !!frame->repeat_first_field; + + if( ((this->rff_pattern & 0xff) == 0xaa || + (this->rff_pattern & 0xff) == 0x55) ) { + /* special case for ntsc 3:2 pulldown */ + progressive = 1; + } + + if( !frame->bad_frame ) { + + + /* convert to YUY2 if needed */ + if( frame->format == XINE_IMGFMT_YV12 ) { + + yuy2_frame = port->original_port->get_frame(port->original_port, + frame->width, frame->height, frame->ratio, XINE_IMGFMT_YUY2, VO_BOTH_FIELDS); + + yuy2_frame->pts = frame->pts; + yuy2_frame->duration = frame->duration; + extra_info_merge(yuy2_frame->extra_info, frame->extra_info); + + /* the logic for deciding upsampling to use comes from: + * http://www.hometheaterhifi.com/volume_8_2/dvd-benchmark-special-report-chroma-bug-4-2001.html + */ + yv12_to_yuy2(frame->base[0], frame->pitches[0], + frame->base[1], frame->pitches[1], + frame->base[2], frame->pitches[2], + yuy2_frame->base[0], yuy2_frame->pitches[0], + frame->width, frame->height, + frame->progressive_frame || progressive ); + + } else { + yuy2_frame = frame; + yuy2_frame->lock(yuy2_frame); + } + + + pthread_mutex_lock (&this->lock); + /* check if frame format changed */ + for(i = 0; i < NUM_RECENT_FRAMES; i++ ) { + if( this->recent_frame[i] && + (this->recent_frame[i]->width != frame->width || + this->recent_frame[i]->height != frame->height) ) { + this->recent_frame[i]->free(this->recent_frame[i]); + this->recent_frame[i] = NULL; + } + } + + + /* using frame->progressive_frame may help displaying still menus. + * however, it is known that some rare material set it wrong. + */ + if( progressive || + (this->use_progressive_frame_flag && frame->progressive_frame) ) { + + pthread_mutex_unlock (&this->lock); + skip = yuy2_frame->draw(yuy2_frame, stream); + pthread_mutex_lock (&this->lock); + frame->vpts = yuy2_frame->vpts; + + } else { + int force24fps; + int fields[2]; + + if( this->framerate_mode == FRAMERATE_FULL ) { + fields[0] = 0; + fields[1] = 1; + } else if ( this->framerate_mode == FRAMERATE_HALF_TFF ) { + fields[0] = 0; + } else if ( this->framerate_mode == FRAMERATE_HALF_BFF ) { + fields[0] = 1; + } + + + force24fps = this->judder_correction && + ( (this->pulldown == PULLDOWN_DALIAS) || + (this->pulldown == PULLDOWN_VEKTOR && this->tvtime->filmmode) ); + + skip = 0; + + /* Build the output from the first field. */ + pthread_mutex_unlock (&this->lock); + deinterlaced_frame = port->original_port->get_frame(port->original_port, + frame->width, frame->height, frame->ratio, XINE_IMGFMT_YUY2, VO_BOTH_FIELDS); + pthread_mutex_lock (&this->lock); + + extra_info_merge(deinterlaced_frame->extra_info, frame->extra_info); + + if( this->tvtime->curmethod->doscalerbob ) { + deinterlaced_frame->bad_frame = !tvtime_build_copied_field(this->tvtime, + deinterlaced_frame->base[0], + yuy2_frame->base[0], fields[0], + frame->width, frame->height, + yuy2_frame->pitches[0], deinterlaced_frame->pitches[0] ); + } else { + deinterlaced_frame->bad_frame = !tvtime_build_deinterlaced_frame(this->tvtime, + deinterlaced_frame->base[0], + yuy2_frame->base[0], + (this->recent_frame[0])?this->recent_frame[0]->base[0]:yuy2_frame->base[0], + (this->recent_frame[1])?this->recent_frame[1]->base[0]:yuy2_frame->base[0], + fields[0], frame->width, frame->height, + yuy2_frame->pitches[0], deinterlaced_frame->pitches[0]); + } + + if( this->tvtime->curmethod->doscalerbob ) { + deinterlaced_frame->height /= 2; + } else { + + } + + pthread_mutex_unlock (&this->lock); + if( force24fps ) { + if( !deinterlaced_frame->bad_frame ) { + this->framecounter++; + if( frame->pts && this->framecounter > FRAMES_TO_SYNC ) { + deinterlaced_frame->pts = frame->pts; + this->framecounter = 0; + } else + deinterlaced_frame->pts = 0; + deinterlaced_frame->duration = FPS_24_DURATION; + skip = deinterlaced_frame->draw(deinterlaced_frame, stream); + } else { + skip = 0; + } + } else { + deinterlaced_frame->pts = frame->pts; + deinterlaced_frame->duration = (this->framerate_mode == FRAMERATE_FULL)? + frame->duration/2:frame->duration; + skip = deinterlaced_frame->draw(deinterlaced_frame, stream); + } + + frame->vpts = deinterlaced_frame->vpts; + deinterlaced_frame->free(deinterlaced_frame); + pthread_mutex_lock (&this->lock); + + force24fps = this->judder_correction && + ( (this->pulldown == PULLDOWN_DALIAS) || + (this->pulldown == PULLDOWN_VEKTOR && this->tvtime->filmmode) ); + + if( this->framerate_mode == FRAMERATE_FULL ) { + + /* Build the output from the second field. */ + pthread_mutex_unlock (&this->lock); + deinterlaced_frame = port->original_port->get_frame(port->original_port, + frame->width, frame->height, frame->ratio, XINE_IMGFMT_YUY2, VO_BOTH_FIELDS); + pthread_mutex_lock (&this->lock); + + extra_info_merge(deinterlaced_frame->extra_info, frame->extra_info); + + if( skip > 0 && !this->pulldown ) { + deinterlaced_frame->bad_frame = 1; + } else { + if( this->tvtime->curmethod->doscalerbob ) { + deinterlaced_frame->bad_frame = !tvtime_build_copied_field(this->tvtime, + deinterlaced_frame->base[0], + yuy2_frame->base[0], fields[1], + frame->width, frame->height, + yuy2_frame->pitches[0], deinterlaced_frame->pitches[0] ); + } else { + deinterlaced_frame->bad_frame = !tvtime_build_deinterlaced_frame(this->tvtime, + deinterlaced_frame->base[0], + yuy2_frame->base[0], + (this->recent_frame[0])?this->recent_frame[0]->base[0]:yuy2_frame->base[0], + (this->recent_frame[1])?this->recent_frame[1]->base[0]:yuy2_frame->base[0], + fields[1], frame->width, frame->height, + yuy2_frame->pitches[0], deinterlaced_frame->pitches[0]); + } + } + + if( this->tvtime->curmethod->doscalerbob ) { + deinterlaced_frame->height /= 2; + } else { + + } + + pthread_mutex_unlock (&this->lock); + if( force24fps ) { + if( !deinterlaced_frame->bad_frame ) { + this->framecounter++; + if( frame->pts && this->framecounter > FRAMES_TO_SYNC ) { + deinterlaced_frame->pts = frame->pts; + this->framecounter = 0; + } else + deinterlaced_frame->pts = 0; + deinterlaced_frame->duration = FPS_24_DURATION; + skip = deinterlaced_frame->draw(deinterlaced_frame, stream); + } else { + skip = 0; + } + } else { + deinterlaced_frame->pts = 0; + deinterlaced_frame->duration = frame->duration/2; + skip = deinterlaced_frame->draw(deinterlaced_frame, stream); + } + + frame->vpts = deinterlaced_frame->vpts; + deinterlaced_frame->free(deinterlaced_frame); + pthread_mutex_lock (&this->lock); + } + } + + /* don't drop frames when pulldown mode is enabled. otherwise + * pulldown detection fails (yo-yo effect has also been seen) + */ + if( this->pulldown ) + skip = 0; + + /* keep track of recent frames */ + i = NUM_RECENT_FRAMES-1; + if( this->recent_frame[i] ) + this->recent_frame[i]->free(this->recent_frame[i]); + for( ; i ; i-- ) + this->recent_frame[i] = this->recent_frame[i-1]; + this->recent_frame[0] = yuy2_frame; + + pthread_mutex_unlock (&this->lock); + + } else { + skip = frame->draw(frame, stream); + } + + + return skip; +} diff --git a/src/post/planar/deinterlace.c b/src/post/planar/deinterlace.c deleted file mode 100644 index 4abb6887a..000000000 --- a/src/post/planar/deinterlace.c +++ /dev/null @@ -1,416 +0,0 @@ -/* - * Copyright (C) 2000-2002 the xine project - * - * This file is part of xine, a free video player. - * - * xine is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * xine is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * $Id: deinterlace.c,v 1.2 2003/05/28 12:21:53 miguelfreitas Exp $ - */ - -/* - * simple video deinterlacer plugin - */ - -#include "xine_internal.h" -#include "post.h" -#include "xineutils.h" -#include <pthread.h> - - -/* plugin class initialization function */ -static void *deinterlace_init_plugin(xine_t *xine, void *); - - -/* plugin catalog information */ -post_info_t deinterlace_special_info = { XINE_POST_TYPE_VIDEO_FILTER }; - -plugin_info_t xine_plugin_info[] = { - /* type, API, "name", version, special_info, init_function */ - { PLUGIN_POST, 2, "deinterlace", XINE_VERSION_CODE, &deinterlace_special_info, &deinterlace_init_plugin }, - { PLUGIN_NONE, 0, "", 0, NULL, NULL } -}; - - -typedef struct post_plugin_deinterlace_s post_plugin_deinterlace_t; - -void deinterlace_bob_mmx(post_plugin_deinterlace_t *this, vo_frame_t *frame); -void deinterlace_weave_mmx(post_plugin_deinterlace_t *this, vo_frame_t *frame); -void deinterlace_greedy_mmx(post_plugin_deinterlace_t *this, vo_frame_t *frame); -void deinterlace_onefield(post_plugin_deinterlace_t *this, vo_frame_t *frame); -void deinterlace_linearblend_mmx(post_plugin_deinterlace_t *this, vo_frame_t *frame); -void deinterlace_linearblend(post_plugin_deinterlace_t *this, vo_frame_t *frame); - -static struct { - char *name; - void (*function)(post_plugin_deinterlace_t *this, vo_frame_t *frame); - uint32_t cpu_require; -} deinterlace_method[] = -{ - { "by driver", NULL, 0 }, - { "bob MMX", NULL /*deinterlace_bob_mmx*/, MM_MMX }, - { "weave MMX", NULL /*deinterlace_weave_mmx*/, MM_MMX }, - { "greedy MMX", NULL /*deinterlace_greedy_mmx*/, MM_MMX }, - { "non supported", NULL, 9999999 }, - { "linearblend MMX", NULL /*deinterlace_linearblend_mmx*/, MM_MMX }, - { "linearblend", NULL /*deinterlace_linearblend*/, 0 }, - { NULL, NULL, 0 } -}; - -static char *enum_methods[sizeof(deinterlace_method)/sizeof(deinterlace_method[0])+1]; - -/* - * this is the struct used by "parameters api" - */ -typedef struct deinterlace_parameters_s { - - int method; - int enabled; - -} deinterlace_parameters_t; - -/* - * description of params struct - */ -START_PARAM_DESCR( deinterlace_parameters_t ) -PARAM_ITEM( POST_PARAM_TYPE_INT, method, enum_methods, 0, 0, 0, - "deinterlace method" ) -PARAM_ITEM( POST_PARAM_TYPE_BOOL, enabled, NULL, 0, 1, 0, - "enable/disable" ) -END_PARAM_DESCR( param_descr ) - -/* plugin structure */ -struct post_plugin_deinterlace_s { - post_plugin_t post; - - /* private data */ - xine_video_port_t *vo_port; - xine_stream_t *stream; - - int cur_method; - void (*cur_function)(post_plugin_deinterlace_t *this, vo_frame_t *frame); - int enabled; - - pthread_mutex_t lock; -}; - - -static int set_parameters (xine_post_t *this_gen, void *param_gen) { - post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)this_gen; - deinterlace_parameters_t *param = (deinterlace_parameters_t *)param_gen; - char *name; - int i; - - pthread_mutex_lock (&this->lock); - - this->cur_method = param->method; - name = enum_methods[this->cur_method]; - - for(i = 0; deinterlace_method[i].name; i++ ) { - if( !strcmp(name, deinterlace_method[i].name) ) - this->cur_function = deinterlace_method[i].function; - } - - this->enabled = param->enabled; - pthread_mutex_unlock (&this->lock); - - return 1; -} - -int get_parameters (xine_post_t *this_gen, void *param_gen) { - post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)this_gen; - deinterlace_parameters_t *param = (deinterlace_parameters_t *)param_gen; - - param->method = this->cur_method; - param->enabled = this->enabled; - - return 1; -} - -xine_post_api_descr_t * get_param_descr (void) { - return ¶m_descr; -} - -static xine_post_api_t post_api = { - set_parameters, - get_parameters, - get_param_descr, -}; - -typedef struct post_deinterlace_out_s post_deinterlace_out_t; -struct post_deinterlace_out_s { - xine_post_out_t xine_out; - - post_plugin_deinterlace_t *plugin; -}; - -/* plugin class functions */ -static post_plugin_t *deinterlace_open_plugin(post_class_t *class_gen, int inputs, - xine_audio_port_t **audio_target, - xine_video_port_t **video_target); -static char *deinterlace_get_identifier(post_class_t *class_gen); -static char *deinterlace_get_description(post_class_t *class_gen); -static void deinterlace_class_dispose(post_class_t *class_gen); - -/* plugin instance functions */ -static void deinterlace_dispose(post_plugin_t *this_gen); - -/* rewire function */ -static int deinterlace_rewire(xine_post_out_t *output, void *data); - -/* replaced video_port functions */ -static void deinterlace_open(xine_video_port_t *port_gen, xine_stream_t *stream); -static vo_frame_t *deinterlace_get_frame(xine_video_port_t *port_gen, uint32_t width, - uint32_t height, int ratio_code, - int format, int flags); -static void deinterlace_close(xine_video_port_t *port_gen, xine_stream_t *stream); - -/* replaced vo_frame functions */ -static int deinterlace_draw(vo_frame_t *frame, xine_stream_t *stream); - - -static void *deinterlace_init_plugin(xine_t *xine, void *data) -{ - post_class_t *class = (post_class_t *)malloc(sizeof(post_class_t)); - uint32_t config_flags = xine_mm_accel(); - int i, j; - - if (!class) - return NULL; - - class->open_plugin = deinterlace_open_plugin; - class->get_identifier = deinterlace_get_identifier; - class->get_description = deinterlace_get_description; - class->dispose = deinterlace_class_dispose; - - for(i = 0, j = 0; deinterlace_method[i].name; i++ ) { - if( (config_flags & deinterlace_method[i].cpu_require) == - deinterlace_method[i].cpu_require ) - enum_methods[j++] = deinterlace_method[i].name; - } - enum_methods[j] = NULL; - - return class; -} - - -static post_plugin_t *deinterlace_open_plugin(post_class_t *class_gen, int inputs, - xine_audio_port_t **audio_target, - xine_video_port_t **video_target) -{ - post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)malloc(sizeof(post_plugin_deinterlace_t)); - xine_post_in_t *input = (xine_post_in_t *)malloc(sizeof(xine_post_in_t)); - xine_post_in_t *input_api = (xine_post_in_t *)malloc(sizeof(xine_post_in_t)); - post_deinterlace_out_t *output = (post_deinterlace_out_t *)malloc(sizeof(post_deinterlace_out_t)); - post_video_port_t *port; - - if (!this || !input || !output || !video_target || !video_target[0]) { - free(this); - free(input); - free(output); - return NULL; - } - - this->stream = NULL; - this->cur_function = NULL; - this->cur_method = 0; - this->enabled = 0; - pthread_mutex_init (&this->lock, NULL); - - port = post_intercept_video_port(&this->post, video_target[0]); - /* replace with our own get_frame function */ - port->port.open = deinterlace_open; - port->port.get_frame = deinterlace_get_frame; - port->port.close = deinterlace_close; - - input->name = "video"; - input->type = XINE_POST_DATA_VIDEO; - input->data = (xine_video_port_t *)&port->port; - - input_api->name = "parameters"; - input_api->type = XINE_POST_DATA_PARAMETERS; - input_api->data = &post_api; - - output->xine_out.name = "deinterlaced video"; - output->xine_out.type = XINE_POST_DATA_VIDEO; - output->xine_out.data = (xine_video_port_t **)&port->original_port; - output->xine_out.rewire = deinterlace_rewire; - output->plugin = this; - - this->post.xine_post.audio_input = (xine_audio_port_t **)malloc(sizeof(xine_audio_port_t *)); - this->post.xine_post.audio_input[0] = NULL; - this->post.xine_post.video_input = (xine_video_port_t **)malloc(sizeof(xine_video_port_t *) * 2); - this->post.xine_post.video_input[0] = &port->port; - this->post.xine_post.video_input[1] = NULL; - - this->post.input = xine_list_new(); - this->post.output = xine_list_new(); - - xine_list_append_content(this->post.input, input); - xine_list_append_content(this->post.input, input_api); - xine_list_append_content(this->post.output, output); - - this->post.dispose = deinterlace_dispose; - - return &this->post; -} - -static char *deinterlace_get_identifier(post_class_t *class_gen) -{ - return "deinterlace"; -} - -static char *deinterlace_get_description(post_class_t *class_gen) -{ - return "frame deinterlacer"; -} - -static void deinterlace_class_dispose(post_class_t *class_gen) -{ - free(class_gen); -} - - -static void deinterlace_dispose(post_plugin_t *this_gen) -{ - post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)this_gen; - post_deinterlace_out_t *output = (post_deinterlace_out_t *)xine_list_first_content(this->post.output); - xine_video_port_t *port = *(xine_video_port_t **)output->xine_out.data; - - if (this->stream) - port->close(port, this->stream); - - free(this->post.xine_post.audio_input); - free(this->post.xine_post.video_input); - free(xine_list_first_content(this->post.input)); - free(xine_list_first_content(this->post.output)); - xine_list_free(this->post.input); - xine_list_free(this->post.output); - free(this); -} - - -static int deinterlace_rewire(xine_post_out_t *output_gen, void *data) -{ - post_deinterlace_out_t *output = (post_deinterlace_out_t *)output_gen; - xine_video_port_t *old_port = *(xine_video_port_t **)output_gen->data; - xine_video_port_t *new_port = (xine_video_port_t *)data; - - if (!data) - return 0; - if (output->plugin->stream) { - /* register our stream at the new output port */ - old_port->close(old_port, output->plugin->stream); - new_port->open(new_port, output->plugin->stream); - } - /* reconnect ourselves */ - *(xine_video_port_t **)output_gen->data = new_port; - return 1; -} - - -static void deinterlace_open(xine_video_port_t *port_gen, xine_stream_t *stream) -{ - post_video_port_t *port = (post_video_port_t *)port_gen; - post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)port->post; - this->stream = stream; - port->original_port->open(port->original_port, stream); -} - -static vo_frame_t *deinterlace_get_frame(xine_video_port_t *port_gen, uint32_t width, - uint32_t height, int ratio_code, - int format, int flags) -{ - post_video_port_t *port = (post_video_port_t *)port_gen; - post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)port->post; - vo_frame_t *frame; - - frame = port->original_port->get_frame(port->original_port, - width, height, ratio_code, format, flags); - - pthread_mutex_lock (&this->lock); - - /* do not intercept if not enabled */ - if( this->enabled && this->cur_method ) { - post_intercept_video_frame(frame, port); - /* replace with our own draw function */ - frame->draw = deinterlace_draw; - /* decoders should not copy the frames, since they won't be displayed */ - frame->copy = NULL; - } - - pthread_mutex_unlock (&this->lock); - - return frame; -} - -static void deinterlace_close(xine_video_port_t *port_gen, xine_stream_t *stream) -{ - post_video_port_t *port = (post_video_port_t *)port_gen; - post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)port->post; - this->stream = NULL; - port->original_port->close(port->original_port, stream); -} - - -static int deinterlace_draw(vo_frame_t *frame, xine_stream_t *stream) -{ - post_video_port_t *port = (post_video_port_t *)frame->port; - post_plugin_deinterlace_t *this = (post_plugin_deinterlace_t *)port->post; - vo_frame_t *deinterlaced_frame; - int size, i, skip; - - /* deinterlacer not implemented yet: this is the "inverter" function */ - - deinterlaced_frame = port->original_port->get_frame(port->original_port, - frame->width, frame->height, frame->ratio, frame->format, VO_BOTH_FIELDS); - deinterlaced_frame->pts = frame->pts; - deinterlaced_frame->duration = frame->duration; - deinterlaced_frame->bad_frame = frame->bad_frame; - extra_info_merge(deinterlaced_frame->extra_info, frame->extra_info); - - switch (deinterlaced_frame->format) { - case XINE_IMGFMT_YUY2: - size = deinterlaced_frame->pitches[0] * deinterlaced_frame->height; - for (i = 0; i < size; i++) - deinterlaced_frame->base[0][i] = 0xff - frame->base[0][i]; - break; - case XINE_IMGFMT_YV12: - /* Y */ - size = deinterlaced_frame->pitches[0] * deinterlaced_frame->height; - for (i = 0; i < size; i++) - deinterlaced_frame->base[0][i] = 0xff - frame->base[0][i]; - /* U */ - size = deinterlaced_frame->pitches[1] * ((deinterlaced_frame->height + 1) / 2); - for (i = 0; i < size; i++) - deinterlaced_frame->base[1][i] = 0xff - frame->base[1][i]; - /* V */ - size = deinterlaced_frame->pitches[2] * ((deinterlaced_frame->height + 1) / 2); - for (i = 0; i < size; i++) - deinterlaced_frame->base[2][i] = 0xff - frame->base[2][i]; - break; - default: - printf("deinterlace: cannot handle image format %d\n", frame->format); - deinterlaced_frame->free(deinterlaced_frame); - post_restore_video_frame(frame, port); - return frame->draw(frame, stream); - } - skip = deinterlaced_frame->draw(deinterlaced_frame, stream); - deinterlaced_frame->free(deinterlaced_frame); - frame->vpts = deinterlaced_frame->vpts; - post_restore_video_frame(frame, port); - - return skip; -} |