diff options
| -rw-r--r-- | src/video_out/yuv2rgb.c | 68 | ||||
| -rw-r--r-- | src/video_out/yuv2rgb.h | 10 | ||||
| -rw-r--r-- | src/video_out/yuv2rgb_mmx.c | 80 | 
3 files changed, 87 insertions, 71 deletions
| diff --git a/src/video_out/yuv2rgb.c b/src/video_out/yuv2rgb.c index 94664721f..63674e675 100644 --- a/src/video_out/yuv2rgb.c +++ b/src/video_out/yuv2rgb.c @@ -22,7 +22,7 @@   * along with this program; if not, write to the Free Software   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA   * - * $Id: yuv2rgb.c,v 1.39 2003/02/02 13:38:24 esnel Exp $ + * $Id: yuv2rgb.c,v 1.40 2003/02/02 17:27:45 esnel Exp $   */  #include "config.h" @@ -2215,7 +2215,8 @@ static int div_round (int dividend, int divisor)      return -((-dividend + (divisor>>1)) / divisor);  } -static void yuv2rgb_setup_tables (yuv2rgb_factory_t *this, int mode, int swapped)  +static void yuv2rgb_set_csc_levels (yuv2rgb_factory_t *this, +				    int brightness, int contrast, int saturation)  {    int i;    uint8_t table_Y[1024]; @@ -2231,10 +2232,13 @@ static void yuv2rgb_setup_tables (yuv2rgb_factory_t *this, int mode, int swapped    int cgu = -Inverse_Table_6_9[this->matrix_coefficients][2];    int cgv = -Inverse_Table_6_9[this->matrix_coefficients][3]; +  int mode = this->mode; +  int swapped = this->swapped; +    for (i = 0; i < 1024; i++) {      int j; -    j = (76309 * (i - 384 - 16) + 32768) >> 16; +    j = (76309 * (i - 384 - 16 + brightness) + 32768) >> 16;      j = (j < 0) ? 0 : ((j > 255) ? 255 : j);      table_Y[i] = j;    } @@ -2242,8 +2246,10 @@ static void yuv2rgb_setup_tables (yuv2rgb_factory_t *this, int mode, int swapped    switch (mode) {    case MODE_32_RGB:    case MODE_32_BGR: -    table_32 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint32_t)); -    this->table_base = table_32; +    if (this->table_base == NULL) { +      this->table_base = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint32_t)); +    } +    table_32 = this->table_base;      entry_size = sizeof (uint32_t);      table_r = table_32 + 197; @@ -2272,8 +2278,10 @@ static void yuv2rgb_setup_tables (yuv2rgb_factory_t *this, int mode, int swapped    case MODE_24_RGB:    case MODE_24_BGR: -    table_8 = malloc ((256 + 2*232) * sizeof (uint8_t)); -    this->table_base = table_8; +    if (this->table_base == NULL) { +      this->table_base = malloc ((256 + 2*232) * sizeof (uint8_t)); +    } +    table_8 = this->table_base;      entry_size = sizeof (uint8_t);      table_r = table_g = table_b = table_8 + 232; @@ -2286,8 +2294,10 @@ static void yuv2rgb_setup_tables (yuv2rgb_factory_t *this, int mode, int swapped    case MODE_16_BGR:    case MODE_15_RGB:    case MODE_16_RGB: -    table_16 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint16_t)); -    this->table_base = table_16; +    if (this->table_base == NULL) { +      this->table_base = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint16_t)); +    } +    table_16 = this->table_base;      entry_size = sizeof (uint16_t);      table_r = table_16 + 197; @@ -2327,8 +2337,10 @@ static void yuv2rgb_setup_tables (yuv2rgb_factory_t *this, int mode, int swapped    case MODE_8_RGB:    case MODE_8_BGR: -    table_8 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint8_t)); -    this->table_base = table_8; +    if (this->table_base == NULL) { +      this->table_base = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint8_t)); +    } +    table_8 = this->table_base;      entry_size = sizeof (uint8_t);      table_r = table_8 + 197; @@ -2352,8 +2364,10 @@ static void yuv2rgb_setup_tables (yuv2rgb_factory_t *this, int mode, int swapped      return;    case MODE_PALETTE: -    table_16 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint16_t)); -    this->table_base = table_16; +    if (this->table_base == NULL) { +      this->table_base = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint16_t)); +    } +    table_16 = this->table_base;      entry_size = sizeof (uint16_t);      table_r = table_16 + 197; @@ -2390,8 +2404,10 @@ static void yuv2rgb_setup_tables (yuv2rgb_factory_t *this, int mode, int swapped      this->table_bU[i] = (((uint8_t *)table_b) +  			 entry_size * div_round (cbu * (i-128), 76309));    } -  this->gamma = 0; -  this->entry_size = entry_size; + +#ifdef ARCH_X86 +  mmx_yuv2rgb_set_csc_levels (this, brightness, contrast, saturation); +#endif    }  static uint32_t yuv2rgb_single_pixel_32 (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v) @@ -3110,6 +3126,7 @@ yuv2rgb_t *yuv2rgb_create_converter (yuv2rgb_factory_t *factory) {    this->table_gU                 = factory->table_gU;    this->table_gV                 = factory->table_gV;    this->table_bU                 = factory->table_bU; +  this->table_mmx                = factory->table_mmx;    this->yuv2rgb_fun              = factory->yuv2rgb_fun;    this->yuy22rgb_fun             = factory->yuy22rgb_fun; @@ -3125,25 +3142,10 @@ yuv2rgb_t *yuv2rgb_create_converter (yuv2rgb_factory_t *factory) {   * factory functions    */ -void yuv2rgb_set_csc_levels (yuv2rgb_factory_t *this, -			     int brightness, int contrast, int saturation) { - -  int i, gamma = brightness; -   -  for (i = 0; i < 256; i++) { -    (uint8_t *)this->table_rV[i] += this->entry_size*(gamma - this->gamma); -    (uint8_t *)this->table_gU[i] += this->entry_size*(gamma - this->gamma); -    (uint8_t *)this->table_bU[i] += this->entry_size*(gamma - this->gamma); -  } -#ifdef ARCH_X86 -  mmx_yuv2rgb_set_csc_levels (this, brightness, contrast, saturation); -#endif   -  this->gamma = gamma; -} -  static void yuv2rgb_factory_dispose (yuv2rgb_factory_t *this) {    free (this->table_base); +  free (this->table_mmx_base);    free (this);  } @@ -3166,9 +3168,11 @@ yuv2rgb_factory_t* yuv2rgb_factory_init (int mode, int swapped,    this->dispose             = yuv2rgb_factory_dispose;    this->matrix_coefficients = 6;    this->table_base          = NULL; +  this->table_mmx           = NULL; +  this->table_mmx_base      = NULL; -  yuv2rgb_setup_tables (this, mode, swapped); +  yuv2rgb_set_csc_levels (this, 0, 128, 128);    /*     * auto-probe for the best yuv2rgb function diff --git a/src/video_out/yuv2rgb.h b/src/video_out/yuv2rgb.h index a85d37458..8beea99e9 100644 --- a/src/video_out/yuv2rgb.h +++ b/src/video_out/yuv2rgb.h @@ -99,10 +99,11 @@ struct yuv2rgb_s {    void            **table_gU;    int              *table_gV;    void            **table_bU; +  void             *table_mmx;    uint8_t          *cmap; -  scale_line_func_t scale_line; -   +  scale_line_func_t scale_line;   +  } ;  /* @@ -131,9 +132,6 @@ struct yuv2rgb_factory_s {    int      swapped;    uint8_t *cmap; -  int      gamma; -  int      entry_size; -    uint32_t matrix_coefficients;    void    *table_base; @@ -141,6 +139,8 @@ struct yuv2rgb_factory_s {    void    *table_gU[256];    int      table_gV[256];    void    *table_bU[256]; +  void    *table_mmx_base; +  void    *table_mmx;    /* preselected functions for mode/swap/hardware */    yuv2rgb_fun_t               yuv2rgb_fun; diff --git a/src/video_out/yuv2rgb_mmx.c b/src/video_out/yuv2rgb_mmx.c index c631db885..5c4908bde 100644 --- a/src/video_out/yuv2rgb_mmx.c +++ b/src/video_out/yuv2rgb_mmx.c @@ -47,13 +47,17 @@ do {				\  	movq_r2m (src, dest);	\  } while (0) -static mmx_t mmx_subYw = {0x1010101010101010}; -static mmx_t mmx_addYw = {0x0000000000000000}; -static mmx_t mmx_U_green = {0xf37df37df37df37d}; -static mmx_t mmx_U_blue = {0x4093409340934093}; -static mmx_t mmx_V_red = {0x3312331233123312}; -static mmx_t mmx_V_green = {0xe5fce5fce5fce5fc}; -static mmx_t mmx_Y_coeff = {0x253f253f253f253f}; +typedef struct mmx_csc_s mmx_csc_t; + +struct mmx_csc_s { +  mmx_t subYw; +  mmx_t addYw; +  mmx_t U_green; +  mmx_t U_blue; +  mmx_t V_red; +  mmx_t V_green; +  mmx_t Y_coeff; +};  extern const int32_t Inverse_Table_6_9[8][4]; @@ -62,6 +66,12 @@ void mmx_yuv2rgb_set_csc_levels(yuv2rgb_factory_t *this,  {    int a,s,i;    int crv, cbu, cgu, cgv, cty; +  mmx_csc_t *csc; + +  /* 'table_mmx' is 64bit aligned for better performance */ +  if (this->table_mmx == NULL) { +    this->table_mmx = xine_xmalloc_aligned (8, sizeof(mmx_csc_t), &this->table_mmx_base); +  }    if( brightness <= 16 ) {      a = 0; @@ -70,10 +80,12 @@ void mmx_yuv2rgb_set_csc_levels(yuv2rgb_factory_t *this,      a = brightness - 16;      s = 0;    } -   + +  csc = (mmx_csc_t *) this->table_mmx; +    for( i = 0; i < 8; i++ ) { -    *((unsigned char *)&mmx_subYw + i) = s; -    *((unsigned char *)&mmx_addYw + i) = a; +    csc->subYw.ub[i] = s; +    csc->addYw.ub[i] = a;    }    crv = Inverse_Table_6_9[this->matrix_coefficients][0]; @@ -88,15 +100,15 @@ void mmx_yuv2rgb_set_csc_levels(yuv2rgb_factory_t *this,    cty = (76309 * contrast + 512) / 1024;    for (i=0; i < 4; i++) { -    *((int16_t *)&mmx_U_green + i) = -cgu; -    *((int16_t *)&mmx_U_blue  + i) =  cbu; -    *((int16_t *)&mmx_V_red   + i) =  crv; -    *((int16_t *)&mmx_V_green + i) = -cgv; -    *((int16_t *)&mmx_Y_coeff + i) =  cty; +    csc->U_green.w[i] = -cgu; +    csc->U_blue.w[i]  =  cbu; +    csc->V_red.w[i]   =  crv; +    csc->V_green.w[i] = -cgv; +    csc->Y_coeff.w[i] =  cty;    }  } -static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv) +static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv, mmx_csc_t *csc)  {      static mmx_t mmx_80w = {0x0080008000800080};      static mmx_t mmx_00ffw = {0x00ff00ff00ff00ff}; @@ -104,8 +116,8 @@ static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv)      movq_m2r (*py, mm6);		// mm6 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0      pxor_r2r (mm4, mm4);		// mm4 = 0 -    psubusb_m2r (mmx_subYw, mm6);	// Y -= 16 -    paddusb_m2r (mmx_addYw, mm6); +    psubusb_m2r (csc->subYw, mm6);	// Y -= 16 +    paddusb_m2r (csc->addYw, mm6);      movd_m2r (*pu, mm0);		// mm0 = 00 00 00 00 u3 u2 u1 u0      movq_r2r (mm6, mm7);		// mm7 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 @@ -116,7 +128,7 @@ static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv)      movd_m2r (*pv, mm1);		// mm1 = 00 00 00 00 v3 v2 v1 v0      psllw_i2r (3, mm6);			// promote precision -    pmulhw_m2r (mmx_Y_coeff, mm6);	// mm6 = luma_rgb even +    pmulhw_m2r (csc->Y_coeff, mm6);	// mm6 = luma_rgb even      psllw_i2r (3, mm7);			// promote precision      punpcklbw_r2r (mm4, mm0);		// mm0 = u3 u2 u1 u0 @@ -124,7 +136,7 @@ static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv)      psubsw_m2r (mmx_80w, mm0);		// u -= 128      punpcklbw_r2r (mm4, mm1);		// mm1 = v3 v2 v1 v0 -    pmulhw_m2r (mmx_Y_coeff, mm7);	// mm7 = luma_rgb odd +    pmulhw_m2r (csc->Y_coeff, mm7);	// mm7 = luma_rgb odd      psllw_i2r (3, mm0);			// promote precision      psubsw_m2r (mmx_80w, mm1);		// v -= 128 @@ -134,7 +146,7 @@ static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv)      movq_r2r (mm1, mm4);		// mm4 = v3 v2 v1 v0 -    pmulhw_m2r (mmx_U_blue, mm0);	// mm0 = chroma_b +    pmulhw_m2r (csc->U_blue, mm0);		// mm0 = chroma_b      // slot @@ -143,7 +155,7 @@ static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv)      // slot -    pmulhw_m2r (mmx_V_red, mm1);	// mm1 = chroma_r +    pmulhw_m2r (csc->V_red, mm1);	// mm1 = chroma_r      movq_r2r (mm0, mm3);		// mm3 = chroma_b      paddsw_r2r (mm6, mm0);		// mm0 = B6 B4 B2 B0 @@ -152,7 +164,7 @@ static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv)      packuswb_r2r (mm0, mm0);		// saturate to 0-255 -    pmulhw_m2r (mmx_U_green, mm2);	// mm2 = u * u_green +    pmulhw_m2r (csc->U_green, mm2);	// mm2 = u * u_green      packuswb_r2r (mm3, mm3);		// saturate to 0-255 @@ -161,7 +173,7 @@ static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv)      punpcklbw_r2r (mm3, mm0);		// mm0 = B7 B6 B5 B4 B3 B2 B1 B0 -    pmulhw_m2r (mmx_V_green, mm4);	// mm4 = v * v_green +    pmulhw_m2r (csc->V_green, mm4);	// mm4 = v * v_green      // slot @@ -420,7 +432,7 @@ static inline void yuv420_rgb16 (yuv2rgb_t *this,  	i = width; img = image;  	do { -	  mmx_yuv2rgb (py, pu, pv);  +	  mmx_yuv2rgb (py, pu, pv, this->table_mmx);  	  mmx_unpack_16rgb (img, cpu);   	  py += 8;  	  pu += 4; @@ -461,7 +473,7 @@ static inline void yuv420_rgb16 (yuv2rgb_t *this,  	do {  	  /* printf ("i : %d\n",i); */ -	  mmx_yuv2rgb (y_buf, u_buf, v_buf);  +	  mmx_yuv2rgb (y_buf, u_buf, v_buf, this->table_mmx);  	  mmx_unpack_16rgb (img, cpu);   	  y_buf += 8;  	  u_buf += 4; @@ -531,7 +543,7 @@ static inline void yuv420_rgb15 (yuv2rgb_t *this,  	i = width; img = image;  	do { -	  mmx_yuv2rgb (py, pu, pv);  +	  mmx_yuv2rgb (py, pu, pv, this->table_mmx);  	  mmx_unpack_15rgb (img, cpu);   	  py += 8;  	  pu += 4; @@ -572,7 +584,7 @@ static inline void yuv420_rgb15 (yuv2rgb_t *this,  	do {  	  /* printf ("i : %d\n",i); */ -	  mmx_yuv2rgb (y_buf, u_buf, v_buf);  +	  mmx_yuv2rgb (y_buf, u_buf, v_buf, this->table_mmx);  	  mmx_unpack_15rgb (img, cpu);   	  y_buf += 8;  	  u_buf += 4; @@ -640,7 +652,7 @@ static inline void yuv420_rgb24 (yuv2rgb_t *this,        do {  	i = width; img = image;  	do { -	  mmx_yuv2rgb (py, pu, pv); +	  mmx_yuv2rgb (py, pu, pv, this->table_mmx);  	  mmx_unpack_24rgb (img, cpu);  	  py += 8;  	  pu += 4; @@ -682,7 +694,7 @@ static inline void yuv420_rgb24 (yuv2rgb_t *this,  	do {  	  /* printf ("i : %d\n",i); */ -	  mmx_yuv2rgb (y_buf, u_buf, v_buf);  +	  mmx_yuv2rgb (y_buf, u_buf, v_buf, this->table_mmx);  	  mmx_unpack_24rgb (img, cpu);   	  y_buf += 8;  	  u_buf += 4; @@ -751,7 +763,7 @@ static inline void yuv420_argb32 (yuv2rgb_t *this,        do {  	i = width; img = image;  	do { -	  mmx_yuv2rgb (py, pu, pv); +	  mmx_yuv2rgb (py, pu, pv, this->table_mmx);  	  mmx_unpack_32rgb (img, cpu);  	  py += 8;  	  pu += 4; @@ -793,7 +805,7 @@ static inline void yuv420_argb32 (yuv2rgb_t *this,  	do {  	  /* printf ("i : %d\n",i); */ -	  mmx_yuv2rgb (y_buf, u_buf, v_buf);  +	  mmx_yuv2rgb (y_buf, u_buf, v_buf, this->table_mmx);  	  mmx_unpack_32rgb (img, cpu);   	  y_buf += 8;  	  u_buf += 4; @@ -861,7 +873,7 @@ static inline void yuv420_abgr32 (yuv2rgb_t *this,        do {  	i = width; img = image;  	do { -	  mmx_yuv2rgb (py, pu, pv); +	  mmx_yuv2rgb (py, pu, pv, this->table_mmx);  	  mmx_unpack_32bgr (img, cpu);  	  py += 8;  	  pu += 4; @@ -903,7 +915,7 @@ static inline void yuv420_abgr32 (yuv2rgb_t *this,  	do {  	  /* printf ("i : %d\n",i); */ -	  mmx_yuv2rgb (y_buf, u_buf, v_buf);  +	  mmx_yuv2rgb (y_buf, u_buf, v_buf, this->table_mmx);  	  mmx_unpack_32bgr (img, cpu);   	  y_buf += 8;  	  u_buf += 4; | 
