diff options
author | Torsten Jager <t.jager@gmx.de> | 2012-04-17 14:27:36 +0300 |
---|---|---|
committer | Torsten Jager <t.jager@gmx.de> | 2012-04-17 14:27:36 +0300 |
commit | 6f50ec4dfb4bc53ca1f9a6caaf703281edea86b8 (patch) | |
tree | af2f25b48a733426597d8efa7638727837fe64a4 | |
parent | 5a6586d1c506525cbcc17f557944094b3bb3bd28 (diff) | |
download | xine-lib-6f50ec4dfb4bc53ca1f9a6caaf703281edea86b8.tar.gz xine-lib-6f50ec4dfb4bc53ca1f9a6caaf703281edea86b8.tar.bz2 |
Improved mmx_yuv2rgb()
yuv2rgb_mmx.c scales YUV and rounds them down to 8 bits
individually before the addition. That causes red and
blue to be off by up to 2, green even off by 3.
This little patch does the stuff using 10 bits per
component, plus correct rounding.
There seems to be no noticable impact on performance,
but color gradients come out much smoother now.
-rw-r--r-- | src/video_out/yuv2rgb_mmx.c | 17 |
1 files changed, 13 insertions, 4 deletions
diff --git a/src/video_out/yuv2rgb_mmx.c b/src/video_out/yuv2rgb_mmx.c index 9cbb675bb..c0c362bfd 100644 --- a/src/video_out/yuv2rgb_mmx.c +++ b/src/video_out/yuv2rgb_mmx.c @@ -119,6 +119,7 @@ static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv, mmx_cs { static mmx_t mmx_80w = {0x0080008000800080ULL}; static mmx_t mmx_00ffw = {0x00ff00ff00ff00ffULL}; + static mmx_t mmx_0002w = {0x0002000200020002ULL}; movq_m2r (*py, mm6); // mm6 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 pxor_r2r (mm4, mm4); // mm4 = 0 @@ -133,10 +134,11 @@ static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv, mmx_cs psrlw_i2r (8, mm7); // mm7 = Y7 Y5 Y3 Y1 movd_m2r (*pv, mm1); // mm1 = 00 00 00 00 v3 v2 v1 v0 - psllw_i2r (3, mm6); // promote precision + psllw_i2r (5, mm6); // promote precision pmulhw_m2r (csc->Y_coeff, mm6); // mm6 = luma_rgb even - psllw_i2r (3, mm7); // promote precision + paddsw_m2r (mmx_0002w, mm6); // +0.5 for later rounding + psllw_i2r (5, mm7); // promote precision punpcklbw_r2r (mm4, mm0); // mm0 = u3 u2 u1 u0 @@ -144,12 +146,13 @@ static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv, mmx_cs punpcklbw_r2r (mm4, mm1); // mm1 = v3 v2 v1 v0 pmulhw_m2r (csc->Y_coeff, mm7); // mm7 = luma_rgb odd - psllw_i2r (3, mm0); // promote precision + paddsw_m2r (mmx_0002w, mm7); // +0.5 for later rounding + psllw_i2r (5, mm0); // promote precision psubsw_m2r (mmx_80w, mm1); // v -= 128 movq_r2r (mm0, mm2); // mm2 = u3 u2 u1 u0 - psllw_i2r (3, mm1); // promote precision + psllw_i2r (5, mm1); // promote precision movq_r2r (mm1, mm4); // mm4 = v3 v2 v1 v0 @@ -168,12 +171,14 @@ static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv, mmx_cs paddsw_r2r (mm6, mm0); // mm0 = B6 B4 B2 B0 paddsw_r2r (mm7, mm3); // mm3 = B7 B5 B3 B1 + psraw_i2r (2, mm0); // div round packuswb_r2r (mm0, mm0); // saturate to 0-255 pmulhw_m2r (csc->U_green, mm2); // mm2 = u * u_green + psraw_i2r (2, mm3); // div round packuswb_r2r (mm3, mm3); // saturate to 0-255 @@ -197,16 +202,20 @@ static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv, mmx_cs paddsw_r2r (mm6, mm2); // mm2 = G6 G4 G2 G0 + psraw_i2r (2, mm2); // div round packuswb_r2r (mm2, mm2); // saturate to 0-255 paddsw_r2r (mm6, mm1); // mm1 = R6 R4 R2 R0 + psraw_i2r (2, mm1); // div round packuswb_r2r (mm1, mm1); // saturate to 0-255 paddsw_r2r (mm7, mm4); // mm4 = R7 R5 R3 R1 + psraw_i2r (2, mm4); // div round packuswb_r2r (mm4, mm4); // saturate to 0-255 paddsw_r2r (mm7, mm5); // mm5 = G7 G5 G3 G1 + psraw_i2r (2, mm5); // div round packuswb_r2r (mm5, mm5); // saturate to 0-255 |