Add a faster RGB->YUV conversion

CVS patchset: 664 CVS date: 2001/09/19 11:16:08
author: Juergen Keil <jkeil@users.sourceforge.net> 2001-09-19 11:16:08 +0000
committer: Juergen Keil <jkeil@users.sourceforge.net> 2001-09-19 11:16:08 +0000
commit: 91538feb86939b0cffab173b59a3f7fd5fa5212f (patch)
tree: 342cf84b9c0f295094feef77379f789a02d6b687
parent: ab4359a489433aa212b981b9961be3a73eeaf082 (diff)
download: xine-lib-91538feb86939b0cffab173b59a3f7fd5fa5212f.tar.gz
xine-lib-91538feb86939b0cffab173b59a3f7fd5fa5212f.tar.bz2
1 files changed, 127 insertions, 13 deletions
diff --git a/src/libw32dll/w32codec.c b/src/libw32dll/w32codec.c
index 25119f7d4..ca52da936 100644
--- a/src/libw32dll/w32codec.c
+++ b/src/libw32dll/w32codec.c
@@ -17,7 +17,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
  *
- * $Id: w32codec.c,v 1.25 2001/09/18 17:41:48 jkeil Exp $
+ * $Id: w32codec.c,v 1.26 2001/09/19 11:16:08 jkeil Exp $
  *
  * routines for using w32 codecs
  *
@@ -37,6 +37,7 @@
 #include "video_out.h"
 #include "audio_out.h"
 #include "buffer.h"
+#include "monitor.h"
 #include "xine_internal.h"
 
 extern char*   win32_codec_name; 
@@ -56,6 +57,9 @@ typedef struct w32v_decoder_s {
   unsigned char     buf[128*1024];
   void             *img_buffer;
   int               size;
+
+  /* profiler */
+  int		   prof_rgb2yuv;
 } w32v_decoder_t;
 
 typedef struct w32a_decoder_s {
@@ -74,6 +78,96 @@ typedef struct w32a_decoder_s {
 } w32a_decoder_t;
 
 
+/*
+ * RGB->YUY2 conversion, we need is for xine video-codec ->
+ * video-output interface
+ *
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
+ * normalized to the range 0..MAXSAMPLE rather than -0.5 .. 0.5.
+ * The conversion equations to be implemented are therefore
+ *      Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+ *      Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + CENTERSAMPLE
+ *      Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + CENTERSAMPLE
+ * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ *
+ * To avoid floating-point arithmetic, we represent the fractional
+ * constants as integers scaled up by 2^16 (about 4 digits precision);
+ * we have to divide the products by 2^16, with appropriate rounding,
+ * to get the correct answer.
+ *
+ * FIXME: For the XShm video-out driver, this conversion is a huge
+ * waste of time (converting from RGB->YUY2 here and converting back
+ * from YUY2->RGB in the XShm driver).
+ */
+#define	MAXSAMPLE	255
+#define	CENTERSAMPLE	128
+
+#define	SCALEBITS	16
+#define	FIX(x)	 	( (int32_t) ( (x) * (1<<SCALEBITS) + 0.5 ) )
+#define	ONE_HALF	( (int32_t) (1<< (SCALEBITS-1)) )
+#define	CBCR_OFFSET	(CENTERSAMPLE << SCALEBITS)
+
+#define R_Y_OFF         0                       /* offset to R => Y section */
+#define G_Y_OFF         (1*(MAXSAMPLE+1))       /* offset to G => Y section */
+#define B_Y_OFF         (2*(MAXSAMPLE+1))       /* etc. */
+#define R_CB_OFF        (3*(MAXSAMPLE+1))
+#define G_CB_OFF        (4*(MAXSAMPLE+1))
+#define B_CB_OFF        (5*(MAXSAMPLE+1))
+#define R_CR_OFF        B_CB_OFF                /* B=>Cb, R=>Cr are the same */
+#define G_CR_OFF        (6*(MAXSAMPLE+1))
+#define B_CR_OFF        (7*(MAXSAMPLE+1))
+#define TABLE_SIZE      (8*(MAXSAMPLE+1))
+
+
+/*
+ * HAS_SLOW_MULT:
+ * 0: use integer multiplication in inner loop of rgb2yuv conversion
+ * 1: use precomputed tables (avoids slow integer multiplication)
+ *
+ * (On a P-II/Athlon, the version using the precomputed tables is
+ * slightly faster)
+ */
+#define	HAS_SLOW_MULT	1
+
+#if	HAS_SLOW_MULT
+static int32_t *rgb_ycc_tab;
+#endif
+
+static void w32v_init_rgb_ycc(void)
+{
+#if	HAS_SLOW_MULT
+  /*
+   * System has slow integer multiplication, so we precompute
+   * the YCbCr constants times R,G,B for all possible values.
+   */
+  int i;
+    
+  if (rgb_ycc_tab) return;
+
+  rgb_ycc_tab = malloc(TABLE_SIZE * sizeof(int32_t));
+
+  for (i = 0; i <= MAXSAMPLE; i++) {
+    rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i;
+    rgb_ycc_tab[i+G_Y_OFF] = FIX(0.58700) * i;
+    rgb_ycc_tab[i+B_Y_OFF] = FIX(0.11400) * i     + ONE_HALF;
+    rgb_ycc_tab[i+R_CB_OFF] = (-FIX(0.16874)) * i;
+    rgb_ycc_tab[i+G_CB_OFF] = (-FIX(0.33126)) * i;
+    /*
+     * We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr.
+     * This ensures that the maximum output will round to MAXJSAMPLE
+     * not MAXJSAMPLE+1, and thus that we don't have to range-limit.
+     */
+    rgb_ycc_tab[i+B_CB_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
+    /*
+     * B=>Cb and R=>Cr tables are the same
+    rgb_ycc_tab[i+R_CR_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
+     */
+    rgb_ycc_tab[i+G_CR_OFF] = (-FIX(0.41869)) * i;
+    rgb_ycc_tab[i+B_CR_OFF] = (-FIX(0.08131)) * i;
+  }
+#endif
+}
+
 static char* get_vids_codec_name(w32v_decoder_t *this,
 				 int buf_type) {
 
@@ -184,6 +278,8 @@ static void w32v_init_codec (w32v_decoder_t *this, int buf_type) {
 
   HRESULT  ret;
 
+  w32v_init_rgb_ycc();
+
   printf ("init codec...\n");
 
   memset(&this->o_bih, 0, sizeof(BITMAPINFOHEADER));
@@ -228,7 +324,8 @@ static void w32v_init_codec (w32v_decoder_t *this, int buf_type) {
     this->o_bih.biBitCount = 16;
   }
 
-  this->o_bih.biSizeImage = this->o_bih.biWidth*this->o_bih.biHeight*(this->o_bih.biBitCount+7)/8;
+  this->o_bih.biSizeImage = this->o_bih.biWidth * abs(this->o_bih.biHeight)
+      * this->o_bih.biBitCount/8;
 
   ret = ICDecompressQuery(this->hic, &this->bih, &this->o_bih);
   
@@ -247,10 +344,7 @@ static void w32v_init_codec (w32v_decoder_t *this, int buf_type) {
 
   this->size = 0;
 
-  if (this->flipped)
-    this->img_buffer = malloc (-this->o_bih.biSizeImage);
-  else
-    this->img_buffer = malloc (this->o_bih.biSizeImage);
+  this->img_buffer = malloc (this->o_bih.biSizeImage);
 
   this->video_out->open (this->video_out);
 
@@ -314,6 +408,11 @@ static void w32v_decode_data (video_decoder_t *this_gen, buf_element_t *buf) {
       } else {
 	/* now, convert rgb to yuv (this is to slow) */
 	int row, col;
+#if	HAS_SLOW_MULT
+	int32_t *ctab = rgb_ycc_tab;
+#endif
+
+	profiler_start_count (this->prof_rgb2yuv);
 
 	for (row=0; row<this->bih.biHeight; row++) {
 
@@ -327,25 +426,38 @@ static void w32v_decode_data (video_decoder_t *this_gen, buf_element_t *buf) {
 	    uint8_t   r,g,b;
 	    uint8_t   y,u,v;
 	    
-	    b = (*pixel & 0x003C) << 3;
+	    b = (*pixel & 0x001F) << 3;
 	    g = (*pixel & 0x03E0) >> 5 << 3;
-	    r = (*pixel & 0xF800) >> 10 << 3;
-	    
-	    y = (uint8_t) (0.299 * (double) r + 0.587 * (double) g + 0.114 * (double) b);
+	    r = (*pixel & 0x7C00) >> 10 << 3;
 	    
+#if	HAS_SLOW_MULT
+	    y = (ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) >> SCALEBITS;
 	    if (!(col & 0x0001)) {
 	      /* even pixel, do u */
-	      u = (uint8_t) (- 0.1684 * (double) r - 0.3316 * (double) g + 0.5000 * (double) b + 128.0);
+	      u = (ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) >> SCALEBITS;
 	      *out = ( (uint16_t) u << 8) | (uint16_t) y;
 	    } else {
 	      /* odd pixel, do v */
-	      v = (uint8_t) (0.5000 * (double) r - 0.4187 * (double) g - 0.0813 * (double) b + 128.0);
+	      v = (ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) >> SCALEBITS;
 	      *out = ( (uint16_t) v << 8) | (uint16_t) y;
 	    }
-
+#else
+	    y = (FIX(0.299) * r + FIX(0.587) * g + FIX(0.114) * b + ONE_HALF) >> SCALEBITS;
+	    if (!(col & 0x0001)) {
+	      /* even pixel, do u */
+	      u = (- FIX(0.16874) * r - FIX(0.33126) * g + FIX(0.5) * b + CBCR_OFFSET + ONE_HALF-1) >> SCALEBITS;
+	      *out = ( (uint16_t) u << 8) | (uint16_t) y;
+	    } else {
+	      /* odd pixel, do v */
+	      v = (FIX(0.5) * r - FIX(0.41869) * g - FIX(0.08131) * b + CBCR_OFFSET + ONE_HALF-1) >> SCALEBITS;
+	      *out = ( (uint16_t) v << 8) | (uint16_t) y;
+	    }
+#endif
 	    //printf("r %02x g %02x b %02x y %02x u %02x v %02x\n",r,g,b,y,u,v);
 	  }
 	}
+
+	profiler_stop_count (this->prof_rgb2yuv);
       }
 
       img->PTS = buf->PTS;
@@ -635,6 +747,8 @@ video_decoder_t *init_video_decoder_plugin (int iface_version, config_values_t *
   this->video_decoder.get_identifier      = w32v_get_id;
   this->video_decoder.priority            = 1;
 
+  this->prof_rgb2yuv = profiler_allocate_slot ("w32codec rgb2yuv convert");
+
   return (video_decoder_t *) this;
 }
author	Juergen Keil <jkeil@users.sourceforge.net>	2001-09-19 11:16:08 +0000
committer	Juergen Keil <jkeil@users.sourceforge.net>	2001-09-19 11:16:08 +0000
commit	91538feb86939b0cffab173b59a3f7fd5fa5212f (patch)
tree	342cf84b9c0f295094feef77379f789a02d6b687
parent	ab4359a489433aa212b981b9961be3a73eeaf082 (diff)
download	xine-lib-91538feb86939b0cffab173b59a3f7fd5fa5212f.tar.gz xine-lib-91538feb86939b0cffab173b59a3f7fd5fa5212f.tar.bz2