diff options
author | Juergen Keil <jkeil@users.sourceforge.net> | 2001-09-19 11:16:08 +0000 |
---|---|---|
committer | Juergen Keil <jkeil@users.sourceforge.net> | 2001-09-19 11:16:08 +0000 |
commit | 91538feb86939b0cffab173b59a3f7fd5fa5212f (patch) | |
tree | 342cf84b9c0f295094feef77379f789a02d6b687 | |
parent | ab4359a489433aa212b981b9961be3a73eeaf082 (diff) | |
download | xine-lib-91538feb86939b0cffab173b59a3f7fd5fa5212f.tar.gz xine-lib-91538feb86939b0cffab173b59a3f7fd5fa5212f.tar.bz2 |
Add a faster RGB->YUV conversion
CVS patchset: 664
CVS date: 2001/09/19 11:16:08
-rw-r--r-- | src/libw32dll/w32codec.c | 140 |
1 files changed, 127 insertions, 13 deletions
diff --git a/src/libw32dll/w32codec.c b/src/libw32dll/w32codec.c index 25119f7d4..ca52da936 100644 --- a/src/libw32dll/w32codec.c +++ b/src/libw32dll/w32codec.c @@ -17,7 +17,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * - * $Id: w32codec.c,v 1.25 2001/09/18 17:41:48 jkeil Exp $ + * $Id: w32codec.c,v 1.26 2001/09/19 11:16:08 jkeil Exp $ * * routines for using w32 codecs * @@ -37,6 +37,7 @@ #include "video_out.h" #include "audio_out.h" #include "buffer.h" +#include "monitor.h" #include "xine_internal.h" extern char* win32_codec_name; @@ -56,6 +57,9 @@ typedef struct w32v_decoder_s { unsigned char buf[128*1024]; void *img_buffer; int size; + + /* profiler */ + int prof_rgb2yuv; } w32v_decoder_t; typedef struct w32a_decoder_s { @@ -74,6 +78,96 @@ typedef struct w32a_decoder_s { } w32a_decoder_t; +/* + * RGB->YUY2 conversion, we need is for xine video-codec -> + * video-output interface + * + * YCbCr is defined per CCIR 601-1, except that Cb and Cr are + * normalized to the range 0..MAXSAMPLE rather than -0.5 .. 0.5. + * The conversion equations to be implemented are therefore + * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + * Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERSAMPLE + * Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERSAMPLE + * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.) + * + * To avoid floating-point arithmetic, we represent the fractional + * constants as integers scaled up by 2^16 (about 4 digits precision); + * we have to divide the products by 2^16, with appropriate rounding, + * to get the correct answer. + * + * FIXME: For the XShm video-out driver, this conversion is a huge + * waste of time (converting from RGB->YUY2 here and converting back + * from YUY2->RGB in the XShm driver). + */ +#define MAXSAMPLE 255 +#define CENTERSAMPLE 128 + +#define SCALEBITS 16 +#define FIX(x) ( (int32_t) ( (x) * (1<<SCALEBITS) + 0.5 ) ) +#define ONE_HALF ( (int32_t) (1<< (SCALEBITS-1)) ) +#define CBCR_OFFSET (CENTERSAMPLE << SCALEBITS) + +#define R_Y_OFF 0 /* offset to R => Y section */ +#define G_Y_OFF (1*(MAXSAMPLE+1)) /* offset to G => Y section */ +#define B_Y_OFF (2*(MAXSAMPLE+1)) /* etc. */ +#define R_CB_OFF (3*(MAXSAMPLE+1)) +#define G_CB_OFF (4*(MAXSAMPLE+1)) +#define B_CB_OFF (5*(MAXSAMPLE+1)) +#define R_CR_OFF B_CB_OFF /* B=>Cb, R=>Cr are the same */ +#define G_CR_OFF (6*(MAXSAMPLE+1)) +#define B_CR_OFF (7*(MAXSAMPLE+1)) +#define TABLE_SIZE (8*(MAXSAMPLE+1)) + + +/* + * HAS_SLOW_MULT: + * 0: use integer multiplication in inner loop of rgb2yuv conversion + * 1: use precomputed tables (avoids slow integer multiplication) + * + * (On a P-II/Athlon, the version using the precomputed tables is + * slightly faster) + */ +#define HAS_SLOW_MULT 1 + +#if HAS_SLOW_MULT +static int32_t *rgb_ycc_tab; +#endif + +static void w32v_init_rgb_ycc(void) +{ +#if HAS_SLOW_MULT + /* + * System has slow integer multiplication, so we precompute + * the YCbCr constants times R,G,B for all possible values. + */ + int i; + + if (rgb_ycc_tab) return; + + rgb_ycc_tab = malloc(TABLE_SIZE * sizeof(int32_t)); + + for (i = 0; i <= MAXSAMPLE; i++) { + rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i; + rgb_ycc_tab[i+G_Y_OFF] = FIX(0.58700) * i; + rgb_ycc_tab[i+B_Y_OFF] = FIX(0.11400) * i + ONE_HALF; + rgb_ycc_tab[i+R_CB_OFF] = (-FIX(0.16874)) * i; + rgb_ycc_tab[i+G_CB_OFF] = (-FIX(0.33126)) * i; + /* + * We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr. + * This ensures that the maximum output will round to MAXJSAMPLE + * not MAXJSAMPLE+1, and thus that we don't have to range-limit. + */ + rgb_ycc_tab[i+B_CB_OFF] = FIX(0.50000) * i + CBCR_OFFSET + ONE_HALF-1; + /* + * B=>Cb and R=>Cr tables are the same + rgb_ycc_tab[i+R_CR_OFF] = FIX(0.50000) * i + CBCR_OFFSET + ONE_HALF-1; + */ + rgb_ycc_tab[i+G_CR_OFF] = (-FIX(0.41869)) * i; + rgb_ycc_tab[i+B_CR_OFF] = (-FIX(0.08131)) * i; + } +#endif +} + static char* get_vids_codec_name(w32v_decoder_t *this, int buf_type) { @@ -184,6 +278,8 @@ static void w32v_init_codec (w32v_decoder_t *this, int buf_type) { HRESULT ret; + w32v_init_rgb_ycc(); + printf ("init codec...\n"); memset(&this->o_bih, 0, sizeof(BITMAPINFOHEADER)); @@ -228,7 +324,8 @@ static void w32v_init_codec (w32v_decoder_t *this, int buf_type) { this->o_bih.biBitCount = 16; } - this->o_bih.biSizeImage = this->o_bih.biWidth*this->o_bih.biHeight*(this->o_bih.biBitCount+7)/8; + this->o_bih.biSizeImage = this->o_bih.biWidth * abs(this->o_bih.biHeight) + * this->o_bih.biBitCount/8; ret = ICDecompressQuery(this->hic, &this->bih, &this->o_bih); @@ -247,10 +344,7 @@ static void w32v_init_codec (w32v_decoder_t *this, int buf_type) { this->size = 0; - if (this->flipped) - this->img_buffer = malloc (-this->o_bih.biSizeImage); - else - this->img_buffer = malloc (this->o_bih.biSizeImage); + this->img_buffer = malloc (this->o_bih.biSizeImage); this->video_out->open (this->video_out); @@ -314,6 +408,11 @@ static void w32v_decode_data (video_decoder_t *this_gen, buf_element_t *buf) { } else { /* now, convert rgb to yuv (this is to slow) */ int row, col; +#if HAS_SLOW_MULT + int32_t *ctab = rgb_ycc_tab; +#endif + + profiler_start_count (this->prof_rgb2yuv); for (row=0; row<this->bih.biHeight; row++) { @@ -327,25 +426,38 @@ static void w32v_decode_data (video_decoder_t *this_gen, buf_element_t *buf) { uint8_t r,g,b; uint8_t y,u,v; - b = (*pixel & 0x003C) << 3; + b = (*pixel & 0x001F) << 3; g = (*pixel & 0x03E0) >> 5 << 3; - r = (*pixel & 0xF800) >> 10 << 3; - - y = (uint8_t) (0.299 * (double) r + 0.587 * (double) g + 0.114 * (double) b); + r = (*pixel & 0x7C00) >> 10 << 3; +#if HAS_SLOW_MULT + y = (ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) >> SCALEBITS; if (!(col & 0x0001)) { /* even pixel, do u */ - u = (uint8_t) (- 0.1684 * (double) r - 0.3316 * (double) g + 0.5000 * (double) b + 128.0); + u = (ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) >> SCALEBITS; *out = ( (uint16_t) u << 8) | (uint16_t) y; } else { /* odd pixel, do v */ - v = (uint8_t) (0.5000 * (double) r - 0.4187 * (double) g - 0.0813 * (double) b + 128.0); + v = (ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) >> SCALEBITS; *out = ( (uint16_t) v << 8) | (uint16_t) y; } - +#else + y = (FIX(0.299) * r + FIX(0.587) * g + FIX(0.114) * b + ONE_HALF) >> SCALEBITS; + if (!(col & 0x0001)) { + /* even pixel, do u */ + u = (- FIX(0.16874) * r - FIX(0.33126) * g + FIX(0.5) * b + CBCR_OFFSET + ONE_HALF-1) >> SCALEBITS; + *out = ( (uint16_t) u << 8) | (uint16_t) y; + } else { + /* odd pixel, do v */ + v = (FIX(0.5) * r - FIX(0.41869) * g - FIX(0.08131) * b + CBCR_OFFSET + ONE_HALF-1) >> SCALEBITS; + *out = ( (uint16_t) v << 8) | (uint16_t) y; + } +#endif //printf("r %02x g %02x b %02x y %02x u %02x v %02x\n",r,g,b,y,u,v); } } + + profiler_stop_count (this->prof_rgb2yuv); } img->PTS = buf->PTS; @@ -635,6 +747,8 @@ video_decoder_t *init_video_decoder_plugin (int iface_version, config_values_t * this->video_decoder.get_identifier = w32v_get_id; this->video_decoder.priority = 1; + this->prof_rgb2yuv = profiler_allocate_slot ("w32codec rgb2yuv convert"); + return (video_decoder_t *) this; } |