diff options
author | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2001-09-19 02:40:58 +0000 |
---|---|---|
committer | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2001-09-19 02:40:58 +0000 |
commit | ab4359a489433aa212b981b9961be3a73eeaf082 (patch) | |
tree | 829e849b91e9204043edd5a848078d3c5cac399f /src/video_out/deinterlace.c | |
parent | 4562bf4231063f333ce38498b031a740b1ff811a (diff) | |
download | xine-lib-ab4359a489433aa212b981b9961be3a73eeaf082.tar.gz xine-lib-ab4359a489433aa212b981b9961be3a73eeaf082.tar.bz2 |
deinterlace update, not finished yet (weave looks broken).
changes to Xv driver to keep a list of the recent frames.
(this is needed to support a lot of DScaler algorithms)
the driver may not be the ideal place to have the recent frames
list as we could give "not so recent" frames to deinterlace code
if some are dropped at video_out.c. Though this is better than
waste our time deinterlacing frames that will be dropped anyway...
CVS patchset: 663
CVS date: 2001/09/19 02:40:58
Diffstat (limited to 'src/video_out/deinterlace.c')
-rw-r--r-- | src/video_out/deinterlace.c | 254 |
1 files changed, 229 insertions, 25 deletions
diff --git a/src/video_out/deinterlace.c b/src/video_out/deinterlace.c index 13217b111..67f932612 100644 --- a/src/video_out/deinterlace.c +++ b/src/video_out/deinterlace.c @@ -30,6 +30,7 @@ #include "cpu_accel.h" #include "deinterlace.h" + /* DeinterlaceFieldBob algorithm Based on Virtual Dub plugin by Gunnar Thalin @@ -37,30 +38,30 @@ Linux version for Xine player by Miguel Freitas Todo: use a MMX optimized memcpy */ -static void deinterlace_bob_yuv_mmx( uint8_t *pdst, uint8_t *psrc, +static void deinterlace_bob_yuv_mmx( uint8_t *pdst, uint8_t *psrc[], int width, int height ) { #ifdef ARCH_X86 int Line; - long long* YVal1; - long long* YVal2; - long long* YVal3; - long long* Dest; - uint8_t* pEvenLines = psrc; - uint8_t* pOddLines = psrc+width; + uint64_t *YVal1; + uint64_t *YVal2; + uint64_t *YVal3; + uint64_t *Dest; + uint8_t* pEvenLines = psrc[0]; + uint8_t* pOddLines = psrc[0]+width; int LineLength = width; - int Pitch = width * 2; + int SourcePitch = width * 2; int IsOdd = 1; long EdgeDetect = 625; long JaggieThreshold = 73; int n; - unsigned long long qwEdgeDetect; - unsigned long long qwThreshold; - const unsigned long long Mask = 0xfefefefefefefefe; - const unsigned long long YMask = 0x00ff00ff00ff00ff; + uint64_t qwEdgeDetect; + uint64_t qwThreshold; + const uint64_t Mask = 0xfefefefefefefefe; + const uint64_t YMask = 0x00ff00ff00ff00ff; qwEdgeDetect = EdgeDetect; qwEdgeDetect += (qwEdgeDetect << 48) + (qwEdgeDetect << 32) + (qwEdgeDetect << 16); @@ -79,17 +80,17 @@ static void deinterlace_bob_yuv_mmx( uint8_t *pdst, uint8_t *psrc, { if (IsOdd) { - YVal1 = (long long *)(pOddLines + Line * Pitch); - YVal2 = (long long *)(pEvenLines + (Line + 1) * Pitch); - YVal3 = (long long *)(pOddLines + (Line + 1) * Pitch); - Dest = (long long *)(pdst + (Line * 2 + 2) * LineLength); + YVal1 = (uint64_t *)(pOddLines + Line * SourcePitch); + YVal2 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + YVal3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch); + Dest = (uint64_t *)(pdst + (Line * 2 + 2) * LineLength); } else { - YVal1 = (long long *)(pEvenLines + Line * Pitch); - YVal2 = (long long *)(pOddLines + Line * Pitch); - YVal3 = (long long *)(pEvenLines + (Line + 1) * Pitch); - Dest = (long long *)(pdst + (Line * 2 + 1) * LineLength); + YVal1 = (uint64_t *)(pEvenLines + Line * SourcePitch); + YVal2 = (uint64_t *)(pOddLines + Line * SourcePitch); + YVal3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + Dest = (uint64_t *)(pdst + (Line * 2 + 1) * LineLength); } // For ease of reading, the comments below assume that we're operating on an odd @@ -167,7 +168,7 @@ static void deinterlace_bob_yuv_mmx( uint8_t *pdst, uint8_t *psrc, if (! IsOdd) { memcpy(pdst + (height * 2 - 1) * LineLength, - pOddLines + (height - 1) * Pitch, + pOddLines + (height - 1) * SourcePitch, LineLength); } @@ -177,6 +178,193 @@ static void deinterlace_bob_yuv_mmx( uint8_t *pdst, uint8_t *psrc, #endif } +/* Deinterlace the latest field, with a tendency to weave rather than bob. + Good for high detail on low-movement scenes. + NOT FINISHED! WEIRD OUTPUT!!! +*/ +static int deinterlace_weave_yuv_mmx( uint8_t *pdst, uint8_t *psrc[], + int width, int height ) +{ +#ifdef ARCH_X86 + + int Line; + uint64_t *YVal1; + uint64_t *YVal2; + uint64_t *YVal3; + uint64_t *YVal4; + uint64_t *Dest; + uint8_t* pEvenLines = psrc[0]; + uint8_t* pOddLines = psrc[0]+width; + uint8_t* pPrevLines; + + int LineLength = width; + int SourcePitch = width * 2; + int IsOdd = 1; + + long TemporalTolerance = 300; + long SpatialTolerance = 600; + long SimilarityThreshold = 25; + + const uint64_t YMask = 0x00ff00ff00ff00ff; + + int n; + + uint64_t qwSpatialTolerance; + uint64_t qwTemporalTolerance; + uint64_t qwThreshold; + const uint64_t Mask = 0xfefefefefefefefe; + + + // Make sure we have all the data we need. + if ( psrc[0] == NULL || psrc[1] == NULL ) + return 0; + + if (IsOdd) + pPrevLines = psrc[1] + width; + else + pPrevLines = psrc[1]; + + // Since the code uses MMX to process 4 pixels at a time, we need our constants + // to be represented 4 times per quadword. + qwSpatialTolerance = SpatialTolerance; + qwSpatialTolerance += (qwSpatialTolerance << 48) + (qwSpatialTolerance << 32) + (qwSpatialTolerance << 16); + qwTemporalTolerance = TemporalTolerance; + qwTemporalTolerance += (qwTemporalTolerance << 48) + (qwTemporalTolerance << 32) + (qwTemporalTolerance << 16); + qwThreshold = SimilarityThreshold; + qwThreshold += (qwThreshold << 48) + (qwThreshold << 32) + (qwThreshold << 16); + + // copy first even line no matter what, and the first odd line if we're + // processing an even field. + memcpy(pdst, pEvenLines, LineLength); + if (!IsOdd) + memcpy(pdst + LineLength, pOddLines, LineLength); + + height = height / 2; + for (Line = 0; Line < height - 1; ++Line) + { + if (IsOdd) + { + YVal1 = (uint64_t *)(pEvenLines + Line * SourcePitch); + YVal2 = (uint64_t *)(pOddLines + Line * SourcePitch); + YVal3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + YVal4 = (uint64_t *)(pPrevLines + Line * SourcePitch); + Dest = (uint64_t *)(pdst + (Line * 2 + 1) * LineLength); + } + else + { + YVal1 = (uint64_t *)(pOddLines + Line * SourcePitch); + YVal2 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + YVal3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch); + YVal4 = (uint64_t *)(pPrevLines + (Line + 1) * SourcePitch); + Dest = (uint64_t *)(pdst + (Line * 2 + 2) * LineLength); + } + + // For ease of reading, the comments below assume that we're operating on an odd + // field (i.e., that bIsOdd is true). The exact same processing is done when we + // operate on an even field, but the roles of the odd and even fields are reversed. + // It's just too cumbersome to explain the algorithm in terms of "the next odd + // line if we're doing an odd field, or the next even line if we're doing an + // even field" etc. So wherever you see "odd" or "even" below, keep in mind that + // half the time this function is called, those words' meanings will invert. + + // Copy the even scanline below this one to the overlay buffer, since we'll be + // adapting the current scanline to the even lines surrounding it. The scanline + // above has already been copied by the previous pass through the loop. + memcpy((char *)Dest + LineLength, YVal3, LineLength); + + n = LineLength >> 3; + while( n-- ) + { + movq_m2r ( *YVal1++, mm0 ); // mm0 = E1 + movq_m2r ( *YVal2++, mm1 ); // mm1 = O + movq_m2r ( *YVal3++, mm2 ); // mm2 = E2 + + movq_r2r ( mm0, mm3 ); // mm3 = intensity(E1) + movq_r2r ( mm1, mm4 ); // mm4 = intensity(O) + movq_r2r ( mm2, mm6 ); // mm6 = intensity(E2) + + pand_m2r ( *&YMask, mm3 ); + pand_m2r ( *&YMask, mm4 ); + pand_m2r ( *&YMask, mm6 ); + + // Average E1 and E2 for interpolated bobbing. + // leave result in mm0 + pand_m2r ( *&Mask, mm0 ); // mm0 = E1 with lower chroma bit stripped off + pand_m2r ( *&Mask, mm2 ); // mm2 = E2 with lower chroma bit stripped off + psrlw_i2r ( 01, mm0 ); // mm0 = E1 / 2 + psrlw_i2r ( 01, mm2 ); // mm2 = E2 / 2 + paddb_r2r ( mm2, mm0 ); + + // The meat of the work is done here. We want to see whether this pixel is + // close in luminosity to ANY of: its top neighbor, its bottom neighbor, + // or its predecessor. To do this without branching, we use MMX's + // saturation feature, which gives us Z(x) = x if x>=0, or 0 if x<0. + // + // The formula we're computing here is + // Z(ST - (E1 - O) ^ 2) + Z(ST - (E2 - O) ^ 2) + Z(TT - (Oold - O) ^ 2) + // where ST is spatial tolerance and TT is temporal tolerance. The idea + // is that if a pixel is similar to none of its neighbors, the resulting + // value will be pretty low, probably zero. A high value therefore indicates + // that the pixel had a similar neighbor. The pixel in the same position + // in the field before last (Oold) is considered a neighbor since we want + // to be able to display 1-pixel-high horizontal lines. + + movq_m2r ( *&qwSpatialTolerance, mm7 ); + movq_r2r ( mm3, mm5 ); // mm5 = E1 + psubsw_r2r ( mm4, mm5 ); // mm5 = E1 - O + psraw_i2r ( 1, mm5 ); + pmullw_r2r ( mm5, mm5 ); // mm5 = (E1 - O) ^ 2 + psubusw_r2r ( mm5, mm7 ); // mm7 = ST - (E1 - O) ^ 2, or 0 if that's negative + + movq_m2r ( *&qwSpatialTolerance, mm3 ); + movq_r2r ( mm6, mm5 ); // mm5 = E2 + psubsw_r2r ( mm4, mm5 ); // mm5 = E2 - O + psraw_i2r ( 1, mm5 ); + pmullw_r2r ( mm5, mm5 ); // mm5 = (E2 - O) ^ 2 + psubusw_r2r ( mm5, mm3 ); // mm0 = ST - (E2 - O) ^ 2, or 0 if that's negative + paddusw_r2r ( mm3, mm7 ); // mm7 = (ST - (E1 - O) ^ 2) + (ST - (E2 - O) ^ 2) + + movq_m2r ( *&qwTemporalTolerance, mm3 ); + movq_m2r ( *YVal4++, mm5 ); // mm5 = Oold + pand_m2r ( *&YMask, mm5 ); + psubsw_r2r ( mm4, mm5 ); // mm5 = Oold - O + psraw_i2r ( 1, mm5 ); // XXX + pmullw_r2r ( mm5, mm5 ); // mm5 = (Oold - O) ^ 2 + psubusw_r2r ( mm5, mm3 ); // mm0 = TT - (Oold - O) ^ 2, or 0 if that's negative + paddusw_r2r ( mm3, mm7 ); // mm7 = our magic number + + // Now compare the similarity totals against our threshold. The pcmpgtw + // instruction will populate the target register with a bunch of mask bits, + // filling words where the comparison is true with 1s and ones where it's + // false with 0s. A few ANDs and NOTs and an OR later, we have bobbed + // values for pixels under the similarity threshold and weaved ones for + // pixels over the threshold. + + pcmpgtw_m2r( *&qwThreshold, mm7 ); // mm7 = 0xffff where we're greater than the threshold, 0 elsewhere + movq_r2r ( mm7, mm6 ); // mm6 = 0xffff where we're greater than the threshold, 0 elsewhere + pand_r2r ( mm1, mm7 ); // mm7 = weaved data where we're greater than the threshold, 0 elsewhere + pandn_r2r ( mm0, mm6 ); // mm6 = bobbed data where we're not greater than the threshold, 0 elsewhere + por_r2r ( mm6, mm7 ); // mm7 = bobbed and weaved data + + movq_r2m ( mm7, *Dest++ ); + } + } + + // Copy last odd line if we're processing an odd field. + if (IsOdd) + { + memcpy(pdst + (height * 2 - 1) * LineLength, + pOddLines + (height - 1) * SourcePitch, + LineLength); + } + + // clear out the MMX registers ready for doing floating point + // again + emms(); + + return 1; +#endif +} static int check_for_mmx(void) { @@ -200,18 +388,34 @@ static void abort_mmx_missing(void) exit(1); } -void deinterlace_yuv( uint8_t *pdst, uint8_t *psrc, +/* generic YUV deinterlacer + pdst -> pointer to destination bitmap + psrc -> array of pointers to source bitmaps ([0] = most recent) + width,height -> dimension for bitmaps + method -> DEINTERLACE_xxx +*/ + +void deinterlace_yuv( uint8_t *pdst, uint8_t *psrc[], int width, int height, int method ) { switch( method ) { case DEINTERLACE_NONE: - memcpy(pdst,psrc,width*height); + memcpy(pdst,psrc[0],width*height); break; case DEINTERLACE_BOB: if( check_for_mmx() ) - deinterlace_bob_yuv_mmx(pdst,psrc,width,height); + deinterlace_bob_yuv_mmx(pdst,psrc,width,height); + else /* FIXME: provide an alternative? */ + abort_mmx_missing(); + break; + case DEINTERLACE_WEAVE: + if( check_for_mmx() ) + { + if( !deinterlace_weave_yuv_mmx(pdst,psrc,width,height) ) + memcpy(pdst,psrc[0],width*height); + } else /* FIXME: provide an alternative? */ - abort_mmx_missing(); + abort_mmx_missing(); break; } } |