diff options
Diffstat (limited to 'src/video_out/deinterlace.c')
-rw-r--r-- | src/video_out/deinterlace.c | 254 |
1 files changed, 229 insertions, 25 deletions
diff --git a/src/video_out/deinterlace.c b/src/video_out/deinterlace.c index 13217b111..67f932612 100644 --- a/src/video_out/deinterlace.c +++ b/src/video_out/deinterlace.c @@ -30,6 +30,7 @@ #include "cpu_accel.h" #include "deinterlace.h" + /* DeinterlaceFieldBob algorithm Based on Virtual Dub plugin by Gunnar Thalin @@ -37,30 +38,30 @@ Linux version for Xine player by Miguel Freitas Todo: use a MMX optimized memcpy */ -static void deinterlace_bob_yuv_mmx( uint8_t *pdst, uint8_t *psrc, +static void deinterlace_bob_yuv_mmx( uint8_t *pdst, uint8_t *psrc[], int width, int height ) { #ifdef ARCH_X86 int Line; - long long* YVal1; - long long* YVal2; - long long* YVal3; - long long* Dest; - uint8_t* pEvenLines = psrc; - uint8_t* pOddLines = psrc+width; + uint64_t *YVal1; + uint64_t *YVal2; + uint64_t *YVal3; + uint64_t *Dest; + uint8_t* pEvenLines = psrc[0]; + uint8_t* pOddLines = psrc[0]+width; int LineLength = width; - int Pitch = width * 2; + int SourcePitch = width * 2; int IsOdd = 1; long EdgeDetect = 625; long JaggieThreshold = 73; int n; - unsigned long long qwEdgeDetect; - unsigned long long qwThreshold; - const unsigned long long Mask = 0xfefefefefefefefe; - const unsigned long long YMask = 0x00ff00ff00ff00ff; + uint64_t qwEdgeDetect; + uint64_t qwThreshold; + const uint64_t Mask = 0xfefefefefefefefe; + const uint64_t YMask = 0x00ff00ff00ff00ff; qwEdgeDetect = EdgeDetect; qwEdgeDetect += (qwEdgeDetect << 48) + (qwEdgeDetect << 32) + (qwEdgeDetect << 16); @@ -79,17 +80,17 @@ static void deinterlace_bob_yuv_mmx( uint8_t *pdst, uint8_t *psrc, { if (IsOdd) { - YVal1 = (long long *)(pOddLines + Line * Pitch); - YVal2 = (long long *)(pEvenLines + (Line + 1) * Pitch); - YVal3 = (long long *)(pOddLines + (Line + 1) * Pitch); - Dest = (long long *)(pdst + (Line * 2 + 2) * LineLength); + YVal1 = (uint64_t *)(pOddLines + Line * SourcePitch); + YVal2 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + YVal3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch); + Dest = (uint64_t *)(pdst + (Line * 2 + 2) * LineLength); } else { - YVal1 = (long long *)(pEvenLines + Line * Pitch); - YVal2 = (long long *)(pOddLines + Line * Pitch); - YVal3 = (long long *)(pEvenLines + (Line + 1) * Pitch); - Dest = (long long *)(pdst + (Line * 2 + 1) * LineLength); + YVal1 = (uint64_t *)(pEvenLines + Line * SourcePitch); + YVal2 = (uint64_t *)(pOddLines + Line * SourcePitch); + YVal3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + Dest = (uint64_t *)(pdst + (Line * 2 + 1) * LineLength); } // For ease of reading, the comments below assume that we're operating on an odd @@ -167,7 +168,7 @@ static void deinterlace_bob_yuv_mmx( uint8_t *pdst, uint8_t *psrc, if (! IsOdd) { memcpy(pdst + (height * 2 - 1) * LineLength, - pOddLines + (height - 1) * Pitch, + pOddLines + (height - 1) * SourcePitch, LineLength); } @@ -177,6 +178,193 @@ static void deinterlace_bob_yuv_mmx( uint8_t *pdst, uint8_t *psrc, #endif } +/* Deinterlace the latest field, with a tendency to weave rather than bob. + Good for high detail on low-movement scenes. + NOT FINISHED! WEIRD OUTPUT!!! +*/ +static int deinterlace_weave_yuv_mmx( uint8_t *pdst, uint8_t *psrc[], + int width, int height ) +{ +#ifdef ARCH_X86 + + int Line; + uint64_t *YVal1; + uint64_t *YVal2; + uint64_t *YVal3; + uint64_t *YVal4; + uint64_t *Dest; + uint8_t* pEvenLines = psrc[0]; + uint8_t* pOddLines = psrc[0]+width; + uint8_t* pPrevLines; + + int LineLength = width; + int SourcePitch = width * 2; + int IsOdd = 1; + + long TemporalTolerance = 300; + long SpatialTolerance = 600; + long SimilarityThreshold = 25; + + const uint64_t YMask = 0x00ff00ff00ff00ff; + + int n; + + uint64_t qwSpatialTolerance; + uint64_t qwTemporalTolerance; + uint64_t qwThreshold; + const uint64_t Mask = 0xfefefefefefefefe; + + + // Make sure we have all the data we need. + if ( psrc[0] == NULL || psrc[1] == NULL ) + return 0; + + if (IsOdd) + pPrevLines = psrc[1] + width; + else + pPrevLines = psrc[1]; + + // Since the code uses MMX to process 4 pixels at a time, we need our constants + // to be represented 4 times per quadword. + qwSpatialTolerance = SpatialTolerance; + qwSpatialTolerance += (qwSpatialTolerance << 48) + (qwSpatialTolerance << 32) + (qwSpatialTolerance << 16); + qwTemporalTolerance = TemporalTolerance; + qwTemporalTolerance += (qwTemporalTolerance << 48) + (qwTemporalTolerance << 32) + (qwTemporalTolerance << 16); + qwThreshold = SimilarityThreshold; + qwThreshold += (qwThreshold << 48) + (qwThreshold << 32) + (qwThreshold << 16); + + // copy first even line no matter what, and the first odd line if we're + // processing an even field. + memcpy(pdst, pEvenLines, LineLength); + if (!IsOdd) + memcpy(pdst + LineLength, pOddLines, LineLength); + + height = height / 2; + for (Line = 0; Line < height - 1; ++Line) + { + if (IsOdd) + { + YVal1 = (uint64_t *)(pEvenLines + Line * SourcePitch); + YVal2 = (uint64_t *)(pOddLines + Line * SourcePitch); + YVal3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + YVal4 = (uint64_t *)(pPrevLines + Line * SourcePitch); + Dest = (uint64_t *)(pdst + (Line * 2 + 1) * LineLength); + } + else + { + YVal1 = (uint64_t *)(pOddLines + Line * SourcePitch); + YVal2 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + YVal3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch); + YVal4 = (uint64_t *)(pPrevLines + (Line + 1) * SourcePitch); + Dest = (uint64_t *)(pdst + (Line * 2 + 2) * LineLength); + } + + // For ease of reading, the comments below assume that we're operating on an odd + // field (i.e., that bIsOdd is true). The exact same processing is done when we + // operate on an even field, but the roles of the odd and even fields are reversed. + // It's just too cumbersome to explain the algorithm in terms of "the next odd + // line if we're doing an odd field, or the next even line if we're doing an + // even field" etc. So wherever you see "odd" or "even" below, keep in mind that + // half the time this function is called, those words' meanings will invert. + + // Copy the even scanline below this one to the overlay buffer, since we'll be + // adapting the current scanline to the even lines surrounding it. The scanline + // above has already been copied by the previous pass through the loop. + memcpy((char *)Dest + LineLength, YVal3, LineLength); + + n = LineLength >> 3; + while( n-- ) + { + movq_m2r ( *YVal1++, mm0 ); // mm0 = E1 + movq_m2r ( *YVal2++, mm1 ); // mm1 = O + movq_m2r ( *YVal3++, mm2 ); // mm2 = E2 + + movq_r2r ( mm0, mm3 ); // mm3 = intensity(E1) + movq_r2r ( mm1, mm4 ); // mm4 = intensity(O) + movq_r2r ( mm2, mm6 ); // mm6 = intensity(E2) + + pand_m2r ( *&YMask, mm3 ); + pand_m2r ( *&YMask, mm4 ); + pand_m2r ( *&YMask, mm6 ); + + // Average E1 and E2 for interpolated bobbing. + // leave result in mm0 + pand_m2r ( *&Mask, mm0 ); // mm0 = E1 with lower chroma bit stripped off + pand_m2r ( *&Mask, mm2 ); // mm2 = E2 with lower chroma bit stripped off + psrlw_i2r ( 01, mm0 ); // mm0 = E1 / 2 + psrlw_i2r ( 01, mm2 ); // mm2 = E2 / 2 + paddb_r2r ( mm2, mm0 ); + + // The meat of the work is done here. We want to see whether this pixel is + // close in luminosity to ANY of: its top neighbor, its bottom neighbor, + // or its predecessor. To do this without branching, we use MMX's + // saturation feature, which gives us Z(x) = x if x>=0, or 0 if x<0. + // + // The formula we're computing here is + // Z(ST - (E1 - O) ^ 2) + Z(ST - (E2 - O) ^ 2) + Z(TT - (Oold - O) ^ 2) + // where ST is spatial tolerance and TT is temporal tolerance. The idea + // is that if a pixel is similar to none of its neighbors, the resulting + // value will be pretty low, probably zero. A high value therefore indicates + // that the pixel had a similar neighbor. The pixel in the same position + // in the field before last (Oold) is considered a neighbor since we want + // to be able to display 1-pixel-high horizontal lines. + + movq_m2r ( *&qwSpatialTolerance, mm7 ); + movq_r2r ( mm3, mm5 ); // mm5 = E1 + psubsw_r2r ( mm4, mm5 ); // mm5 = E1 - O + psraw_i2r ( 1, mm5 ); + pmullw_r2r ( mm5, mm5 ); // mm5 = (E1 - O) ^ 2 + psubusw_r2r ( mm5, mm7 ); // mm7 = ST - (E1 - O) ^ 2, or 0 if that's negative + + movq_m2r ( *&qwSpatialTolerance, mm3 ); + movq_r2r ( mm6, mm5 ); // mm5 = E2 + psubsw_r2r ( mm4, mm5 ); // mm5 = E2 - O + psraw_i2r ( 1, mm5 ); + pmullw_r2r ( mm5, mm5 ); // mm5 = (E2 - O) ^ 2 + psubusw_r2r ( mm5, mm3 ); // mm0 = ST - (E2 - O) ^ 2, or 0 if that's negative + paddusw_r2r ( mm3, mm7 ); // mm7 = (ST - (E1 - O) ^ 2) + (ST - (E2 - O) ^ 2) + + movq_m2r ( *&qwTemporalTolerance, mm3 ); + movq_m2r ( *YVal4++, mm5 ); // mm5 = Oold + pand_m2r ( *&YMask, mm5 ); + psubsw_r2r ( mm4, mm5 ); // mm5 = Oold - O + psraw_i2r ( 1, mm5 ); // XXX + pmullw_r2r ( mm5, mm5 ); // mm5 = (Oold - O) ^ 2 + psubusw_r2r ( mm5, mm3 ); // mm0 = TT - (Oold - O) ^ 2, or 0 if that's negative + paddusw_r2r ( mm3, mm7 ); // mm7 = our magic number + + // Now compare the similarity totals against our threshold. The pcmpgtw + // instruction will populate the target register with a bunch of mask bits, + // filling words where the comparison is true with 1s and ones where it's + // false with 0s. A few ANDs and NOTs and an OR later, we have bobbed + // values for pixels under the similarity threshold and weaved ones for + // pixels over the threshold. + + pcmpgtw_m2r( *&qwThreshold, mm7 ); // mm7 = 0xffff where we're greater than the threshold, 0 elsewhere + movq_r2r ( mm7, mm6 ); // mm6 = 0xffff where we're greater than the threshold, 0 elsewhere + pand_r2r ( mm1, mm7 ); // mm7 = weaved data where we're greater than the threshold, 0 elsewhere + pandn_r2r ( mm0, mm6 ); // mm6 = bobbed data where we're not greater than the threshold, 0 elsewhere + por_r2r ( mm6, mm7 ); // mm7 = bobbed and weaved data + + movq_r2m ( mm7, *Dest++ ); + } + } + + // Copy last odd line if we're processing an odd field. + if (IsOdd) + { + memcpy(pdst + (height * 2 - 1) * LineLength, + pOddLines + (height - 1) * SourcePitch, + LineLength); + } + + // clear out the MMX registers ready for doing floating point + // again + emms(); + + return 1; +#endif +} static int check_for_mmx(void) { @@ -200,18 +388,34 @@ static void abort_mmx_missing(void) exit(1); } -void deinterlace_yuv( uint8_t *pdst, uint8_t *psrc, +/* generic YUV deinterlacer + pdst -> pointer to destination bitmap + psrc -> array of pointers to source bitmaps ([0] = most recent) + width,height -> dimension for bitmaps + method -> DEINTERLACE_xxx +*/ + +void deinterlace_yuv( uint8_t *pdst, uint8_t *psrc[], int width, int height, int method ) { switch( method ) { case DEINTERLACE_NONE: - memcpy(pdst,psrc,width*height); + memcpy(pdst,psrc[0],width*height); break; case DEINTERLACE_BOB: if( check_for_mmx() ) - deinterlace_bob_yuv_mmx(pdst,psrc,width,height); + deinterlace_bob_yuv_mmx(pdst,psrc,width,height); + else /* FIXME: provide an alternative? */ + abort_mmx_missing(); + break; + case DEINTERLACE_WEAVE: + if( check_for_mmx() ) + { + if( !deinterlace_weave_yuv_mmx(pdst,psrc,width,height) ) + memcpy(pdst,psrc[0],width*height); + } else /* FIXME: provide an alternative? */ - abort_mmx_missing(); + abort_mmx_missing(); break; } } |