summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/xine-utils/color.c103
1 files changed, 46 insertions, 57 deletions
diff --git a/src/xine-utils/color.c b/src/xine-utils/color.c
index b99e663b1..d9d1d74cb 100644
--- a/src/xine-utils/color.c
+++ b/src/xine-utils/color.c
@@ -26,6 +26,8 @@
* to an RGB value in a palette table, or each pixel is encoded with
* red, green, and blue values. In the latter case, typically either
* 15, 16, 24, or 32 bits are used to represent a single pixel.
+ * The facilities in this file are designed to ease the pain of converting
+ * RGB -> YUV.
*
* If you want to use these facilities in your decoder, include the
* xineutils.h header file. Then declare a yuv_planes_t structure. This
@@ -59,7 +61,7 @@
* instructions), these macros will automatically map to those special
* instructions.
*
- * $Id: color.c,v 1.6 2002/08/28 03:32:48 tmmm Exp $
+ * $Id: color.c,v 1.7 2002/09/12 01:25:28 tmmm Exp $
*/
#include "xine_internal.h"
@@ -143,9 +145,7 @@ void init_yuv_planes(yuv_planes_t *yuv_planes, int width, int height) {
yuv_planes->row_width = width;
yuv_planes->row_count = height;
- /* add 6 extra bytes to the plane size to account for residual filtering
- * on the C planes */
- plane_size = yuv_planes->row_width * yuv_planes->row_count + 6;
+ plane_size = yuv_planes->row_width * yuv_planes->row_count;
yuv_planes->y = xine_xmalloc(plane_size);
yuv_planes->u = xine_xmalloc(plane_size);
@@ -287,64 +287,47 @@ void yuv444_to_yuy2_c(yuv_planes_t *yuv_planes, unsigned char *yuy2_map,
* There is a special case when the filter hits the end of the line since
* it is always necessary to rely on phantom samples beyond the end of the
* line in order to compute the final 1-3 C samples of a line. This function
- * uses zeros in those phantom positions in order to compute the final
- * samples. However, the function might read up to 6 samples from the next
- * line which might not exist if the filter is already operation on the
- * last line of the plane. This is why the planes are allocated to be 6
- * bytes larger than width * height.
+ * rewinds the C sample stream by a few bytes and reuses a few samples in
+ * order to compute the final samples. This is not strictly correct; a
+ * better approach would be to mirror the final samples before computing
+ * the filter. But this reuse method is fast and apparently accurate
+ * enough.
*
*/
void yuv444_to_yuy2_mmx(yuv_planes_t *yuv_planes, unsigned char *yuy2_map,
int pitch) {
#ifdef ARCH_X86
int h, i, j, k;
+ int width_div_8 = yuv_planes->row_width / 8;
+ int width_mod_8 = yuv_planes->row_width % 8;
unsigned char *source_plane;
unsigned char *dest_plane;
- unsigned char vector[8];
unsigned char filter[] = {
0x01, 0x00,
0x03, 0x00,
0x03, 0x00,
0x01, 0x00
};
- unsigned char advance2_andmask[] = {
- 0xFF, 0xFF,
- 0x00, 0x00,
- 0x00, 0x00,
- 0x00, 0x00
- };
- unsigned char advance4_andmask[] = {
- 0xFF, 0xFF,
- 0xFF, 0xFF,
- 0x00, 0x00,
- 0x00, 0x00
- };
- unsigned char advance6_andmask[] = {
- 0xFF, 0xFF,
- 0xFF, 0xFF,
- 0xFF, 0xFF,
- 0x00, 0x00
- };
int block_loops = yuv_planes->row_width / 6;
int filter_loops;
- int advance_count;
+ int residual_filter_loops;
int row_inc = (pitch - 2 * yuv_planes->row_width);
+ residual_filter_loops = (yuv_planes->row_width % 6) / 2;
+ if (!residual_filter_loops)
+ residual_filter_loops = 3;
+
/* set up some MMX registers:
- * mm0 = 0, mm7 = color filter,
- * mm4..6 = advance 2,4,6 and masks */
+ * mm0 = 0, mm7 = color filter */
pxor_r2r(mm0, mm0);
movq_m2r(*filter, mm7);
- movq_m2r(*advance2_andmask, mm4);
- movq_m2r(*advance4_andmask, mm5);
- movq_m2r(*advance6_andmask, mm6);
/* copy the Y samples */
source_plane = yuv_planes->y;
dest_plane = yuy2_map;
for (i = 0; i < yuv_planes->row_count; i++) {
- /* iterate through blocks of 8 samples, disregarding extra 2 samples */
- for (j = 0; j < yuv_planes->row_width / 8; j++) {
+ /* iterate through blocks of 8 Y samples */
+ for (j = 0; j < width_div_8; j++) {
movq_m2r(*source_plane, mm1); /* load 8 Y samples */
source_plane += 8;
@@ -360,6 +343,14 @@ void yuv444_to_yuy2_mmx(yuv_planes_t *yuv_planes, unsigned char *yuy2_map,
dest_plane += 8;
}
+ /* iterate through residual samples in row if row is not divisible by 8 */
+ for (j = 0; j < width_mod_8; j++) {
+
+ *dest_plane = *source_plane;
+ dest_plane += 2;
+ source_plane++;
+ }
+
dest_plane += row_inc;
}
@@ -382,28 +373,22 @@ void yuv444_to_yuy2_mmx(yuv_planes_t *yuv_planes, unsigned char *yuy2_map,
/* iterate through blocks of 6 samples */
for (j = 0; j <= block_loops; j++) {
- /* special case for end-of-line residual */
- if (j != block_loops) {
- movq_m2r(*source_plane, mm1); /* load 8 C samples */
- source_plane += 6;
- } else {
- advance_count = yuv_planes->row_width % 6;
- if (!advance_count)
- advance_count = 6;
- filter_loops = advance_count / 2;
+ if (j == block_loops) {
+ /* special case for end-of-line residual */
+ filter_loops = residual_filter_loops;
+ source_plane -= (8 - residual_filter_loops * 2);
movq_m2r(*source_plane, mm1); /* load 8 C samples */
- source_plane += advance_count;
+ source_plane += 8;
+ if (residual_filter_loops == 1)
+ psrlq_i2r(32, mm1); /* toss out 4 samples before starting */
+ else if (residual_filter_loops == 2)
+ psrlq_i2r(16, mm1); /* toss out 2 samples before starting */
- /* zero out the rest of the samples */
-/*
- if (advance_count == 2)
- pand_r2r(mm4, mm1);
- else if (advance_count == 4)
- pand_r2r(mm5, mm1);
- else if (advance_count == 6)
- pand_r2r(mm6, mm1);
-*/
+ } else {
+ /* normal case */
+ movq_m2r(*source_plane, mm1); /* load 8 C samples */
+ source_plane += 6;
}
for (k = 0; k < filter_loops; k++) {
@@ -416,8 +401,12 @@ void yuv444_to_yuy2_mmx(yuv_planes_t *yuv_planes, unsigned char *yuy2_map,
paddd_r2r(mm3, mm2); /* mm2 += mm3 */
psrlq_i2r(3, mm2); /* divide by 8 */
- movq_r2m(mm2, *vector);
- dest_plane[0] = vector[0];
+ /* move the lower 32 bits of mm2 into eax */
+ movd_r2r(mm2, eax);
+ /* move al (the final filtered sample) to its spot it memory */
+ __asm__ __volatile__ ("mov %%" "al" ", %0"
+ : "=X" (*dest_plane)
+ : /* nothing */ );
dest_plane += 4;
psrlq_i2r(16, mm1); /* toss out 2 C samples and loop again */