summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJuergen Keil <jkeil@users.sourceforge.net>2001-09-19 11:16:08 +0000
committerJuergen Keil <jkeil@users.sourceforge.net>2001-09-19 11:16:08 +0000
commit91538feb86939b0cffab173b59a3f7fd5fa5212f (patch)
tree342cf84b9c0f295094feef77379f789a02d6b687
parentab4359a489433aa212b981b9961be3a73eeaf082 (diff)
downloadxine-lib-91538feb86939b0cffab173b59a3f7fd5fa5212f.tar.gz
xine-lib-91538feb86939b0cffab173b59a3f7fd5fa5212f.tar.bz2
Add a faster RGB->YUV conversion
CVS patchset: 664 CVS date: 2001/09/19 11:16:08
-rw-r--r--src/libw32dll/w32codec.c140
1 files changed, 127 insertions, 13 deletions
diff --git a/src/libw32dll/w32codec.c b/src/libw32dll/w32codec.c
index 25119f7d4..ca52da936 100644
--- a/src/libw32dll/w32codec.c
+++ b/src/libw32dll/w32codec.c
@@ -17,7 +17,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*
- * $Id: w32codec.c,v 1.25 2001/09/18 17:41:48 jkeil Exp $
+ * $Id: w32codec.c,v 1.26 2001/09/19 11:16:08 jkeil Exp $
*
* routines for using w32 codecs
*
@@ -37,6 +37,7 @@
#include "video_out.h"
#include "audio_out.h"
#include "buffer.h"
+#include "monitor.h"
#include "xine_internal.h"
extern char* win32_codec_name;
@@ -56,6 +57,9 @@ typedef struct w32v_decoder_s {
unsigned char buf[128*1024];
void *img_buffer;
int size;
+
+ /* profiler */
+ int prof_rgb2yuv;
} w32v_decoder_t;
typedef struct w32a_decoder_s {
@@ -74,6 +78,96 @@ typedef struct w32a_decoder_s {
} w32a_decoder_t;
+/*
+ * RGB->YUY2 conversion, we need is for xine video-codec ->
+ * video-output interface
+ *
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
+ * normalized to the range 0..MAXSAMPLE rather than -0.5 .. 0.5.
+ * The conversion equations to be implemented are therefore
+ * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
+ * Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERSAMPLE
+ * Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERSAMPLE
+ * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ *
+ * To avoid floating-point arithmetic, we represent the fractional
+ * constants as integers scaled up by 2^16 (about 4 digits precision);
+ * we have to divide the products by 2^16, with appropriate rounding,
+ * to get the correct answer.
+ *
+ * FIXME: For the XShm video-out driver, this conversion is a huge
+ * waste of time (converting from RGB->YUY2 here and converting back
+ * from YUY2->RGB in the XShm driver).
+ */
+#define MAXSAMPLE 255
+#define CENTERSAMPLE 128
+
+#define SCALEBITS 16
+#define FIX(x) ( (int32_t) ( (x) * (1<<SCALEBITS) + 0.5 ) )
+#define ONE_HALF ( (int32_t) (1<< (SCALEBITS-1)) )
+#define CBCR_OFFSET (CENTERSAMPLE << SCALEBITS)
+
+#define R_Y_OFF 0 /* offset to R => Y section */
+#define G_Y_OFF (1*(MAXSAMPLE+1)) /* offset to G => Y section */
+#define B_Y_OFF (2*(MAXSAMPLE+1)) /* etc. */
+#define R_CB_OFF (3*(MAXSAMPLE+1))
+#define G_CB_OFF (4*(MAXSAMPLE+1))
+#define B_CB_OFF (5*(MAXSAMPLE+1))
+#define R_CR_OFF B_CB_OFF /* B=>Cb, R=>Cr are the same */
+#define G_CR_OFF (6*(MAXSAMPLE+1))
+#define B_CR_OFF (7*(MAXSAMPLE+1))
+#define TABLE_SIZE (8*(MAXSAMPLE+1))
+
+
+/*
+ * HAS_SLOW_MULT:
+ * 0: use integer multiplication in inner loop of rgb2yuv conversion
+ * 1: use precomputed tables (avoids slow integer multiplication)
+ *
+ * (On a P-II/Athlon, the version using the precomputed tables is
+ * slightly faster)
+ */
+#define HAS_SLOW_MULT 1
+
+#if HAS_SLOW_MULT
+static int32_t *rgb_ycc_tab;
+#endif
+
+static void w32v_init_rgb_ycc(void)
+{
+#if HAS_SLOW_MULT
+ /*
+ * System has slow integer multiplication, so we precompute
+ * the YCbCr constants times R,G,B for all possible values.
+ */
+ int i;
+
+ if (rgb_ycc_tab) return;
+
+ rgb_ycc_tab = malloc(TABLE_SIZE * sizeof(int32_t));
+
+ for (i = 0; i <= MAXSAMPLE; i++) {
+ rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i;
+ rgb_ycc_tab[i+G_Y_OFF] = FIX(0.58700) * i;
+ rgb_ycc_tab[i+B_Y_OFF] = FIX(0.11400) * i + ONE_HALF;
+ rgb_ycc_tab[i+R_CB_OFF] = (-FIX(0.16874)) * i;
+ rgb_ycc_tab[i+G_CB_OFF] = (-FIX(0.33126)) * i;
+ /*
+ * We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr.
+ * This ensures that the maximum output will round to MAXJSAMPLE
+ * not MAXJSAMPLE+1, and thus that we don't have to range-limit.
+ */
+ rgb_ycc_tab[i+B_CB_OFF] = FIX(0.50000) * i + CBCR_OFFSET + ONE_HALF-1;
+ /*
+ * B=>Cb and R=>Cr tables are the same
+ rgb_ycc_tab[i+R_CR_OFF] = FIX(0.50000) * i + CBCR_OFFSET + ONE_HALF-1;
+ */
+ rgb_ycc_tab[i+G_CR_OFF] = (-FIX(0.41869)) * i;
+ rgb_ycc_tab[i+B_CR_OFF] = (-FIX(0.08131)) * i;
+ }
+#endif
+}
+
static char* get_vids_codec_name(w32v_decoder_t *this,
int buf_type) {
@@ -184,6 +278,8 @@ static void w32v_init_codec (w32v_decoder_t *this, int buf_type) {
HRESULT ret;
+ w32v_init_rgb_ycc();
+
printf ("init codec...\n");
memset(&this->o_bih, 0, sizeof(BITMAPINFOHEADER));
@@ -228,7 +324,8 @@ static void w32v_init_codec (w32v_decoder_t *this, int buf_type) {
this->o_bih.biBitCount = 16;
}
- this->o_bih.biSizeImage = this->o_bih.biWidth*this->o_bih.biHeight*(this->o_bih.biBitCount+7)/8;
+ this->o_bih.biSizeImage = this->o_bih.biWidth * abs(this->o_bih.biHeight)
+ * this->o_bih.biBitCount/8;
ret = ICDecompressQuery(this->hic, &this->bih, &this->o_bih);
@@ -247,10 +344,7 @@ static void w32v_init_codec (w32v_decoder_t *this, int buf_type) {
this->size = 0;
- if (this->flipped)
- this->img_buffer = malloc (-this->o_bih.biSizeImage);
- else
- this->img_buffer = malloc (this->o_bih.biSizeImage);
+ this->img_buffer = malloc (this->o_bih.biSizeImage);
this->video_out->open (this->video_out);
@@ -314,6 +408,11 @@ static void w32v_decode_data (video_decoder_t *this_gen, buf_element_t *buf) {
} else {
/* now, convert rgb to yuv (this is to slow) */
int row, col;
+#if HAS_SLOW_MULT
+ int32_t *ctab = rgb_ycc_tab;
+#endif
+
+ profiler_start_count (this->prof_rgb2yuv);
for (row=0; row<this->bih.biHeight; row++) {
@@ -327,25 +426,38 @@ static void w32v_decode_data (video_decoder_t *this_gen, buf_element_t *buf) {
uint8_t r,g,b;
uint8_t y,u,v;
- b = (*pixel & 0x003C) << 3;
+ b = (*pixel & 0x001F) << 3;
g = (*pixel & 0x03E0) >> 5 << 3;
- r = (*pixel & 0xF800) >> 10 << 3;
-
- y = (uint8_t) (0.299 * (double) r + 0.587 * (double) g + 0.114 * (double) b);
+ r = (*pixel & 0x7C00) >> 10 << 3;
+#if HAS_SLOW_MULT
+ y = (ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) >> SCALEBITS;
if (!(col & 0x0001)) {
/* even pixel, do u */
- u = (uint8_t) (- 0.1684 * (double) r - 0.3316 * (double) g + 0.5000 * (double) b + 128.0);
+ u = (ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) >> SCALEBITS;
*out = ( (uint16_t) u << 8) | (uint16_t) y;
} else {
/* odd pixel, do v */
- v = (uint8_t) (0.5000 * (double) r - 0.4187 * (double) g - 0.0813 * (double) b + 128.0);
+ v = (ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) >> SCALEBITS;
*out = ( (uint16_t) v << 8) | (uint16_t) y;
}
-
+#else
+ y = (FIX(0.299) * r + FIX(0.587) * g + FIX(0.114) * b + ONE_HALF) >> SCALEBITS;
+ if (!(col & 0x0001)) {
+ /* even pixel, do u */
+ u = (- FIX(0.16874) * r - FIX(0.33126) * g + FIX(0.5) * b + CBCR_OFFSET + ONE_HALF-1) >> SCALEBITS;
+ *out = ( (uint16_t) u << 8) | (uint16_t) y;
+ } else {
+ /* odd pixel, do v */
+ v = (FIX(0.5) * r - FIX(0.41869) * g - FIX(0.08131) * b + CBCR_OFFSET + ONE_HALF-1) >> SCALEBITS;
+ *out = ( (uint16_t) v << 8) | (uint16_t) y;
+ }
+#endif
//printf("r %02x g %02x b %02x y %02x u %02x v %02x\n",r,g,b,y,u,v);
}
}
+
+ profiler_stop_count (this->prof_rgb2yuv);
}
img->PTS = buf->PTS;
@@ -635,6 +747,8 @@ video_decoder_t *init_video_decoder_plugin (int iface_version, config_values_t *
this->video_decoder.get_identifier = w32v_get_id;
this->video_decoder.priority = 1;
+ this->prof_rgb2yuv = profiler_allocate_slot ("w32codec rgb2yuv convert");
+
return (video_decoder_t *) this;
}