diff options
author | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2001-09-19 12:02:03 +0000 |
---|---|---|
committer | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2001-09-19 12:02:03 +0000 |
commit | 311eeae70070cfd1d7b17c30ef361f3d327774dc (patch) | |
tree | 51328ffd5f7d133b39a4fa6b299236d65ea1a819 | |
parent | 91538feb86939b0cffab173b59a3f7fd5fa5212f (diff) | |
download | xine-lib-311eeae70070cfd1d7b17c30ef361f3d327774dc.tar.gz xine-lib-311eeae70070cfd1d7b17c30ef361f3d327774dc.tar.bz2 |
New greedy deinterlace method
(looks good! albeit more cpu intensive than bob)
CVS patchset: 665
CVS date: 2001/09/19 12:02:03
-rw-r--r-- | src/video_out/deinterlace.c | 180 | ||||
-rw-r--r-- | src/video_out/deinterlace.h | 1 |
2 files changed, 181 insertions, 0 deletions
diff --git a/src/video_out/deinterlace.c b/src/video_out/deinterlace.c index 67f932612..ac428488d 100644 --- a/src/video_out/deinterlace.c +++ b/src/video_out/deinterlace.c @@ -366,6 +366,177 @@ static int deinterlace_weave_yuv_mmx( uint8_t *pdst, uint8_t *psrc[], #endif } + +// This is a simple lightweight DeInterlace method that uses little CPU time +// but gives very good results for low or intermedite motion. (MORE CPU THAN BOB) +// It defers frames by one field, but that does not seem to produce noticeable +// lip sync problems. +// +// The method used is to take either the older or newer weave pixel depending +// upon which give the smaller comb factor, and then clip to avoid large damage +// when wrong. +// +// I'd intended this to be part of a larger more elaborate method added to +// Blended Clip but this give too good results for the CPU to ignore here. +static int deinterlace_greedy_yuv_mmx( uint8_t *pdst, uint8_t *psrc[], + int width, int height ) +{ +#ifdef ARCH_X86 + int Line; + int LoopCtr; + uint64_t *L1; // ptr to Line1, of 3 + uint64_t *L2; // ptr to Line2, the weave line + uint64_t *L3; // ptr to Line3 + uint64_t *LP2; // ptr to prev Line2 + uint64_t *Dest; + uint8_t* pEvenLines = psrc[0]; + uint8_t* pOddLines = psrc[0]+width; + uint8_t* pPrevLines; + + const uint64_t ShiftMask = 0xfefefefefefefefe; + + int LineLength = width; + int SourcePitch = width * 2; + int IsOdd = 1; + long GreedyMaxComb = 15; + uint64_t MaxComb; + uint64_t i; + + if ( psrc[0] == NULL || psrc[1] == NULL ) + return 0; + + if (IsOdd) + pPrevLines = psrc[1] + width; + else + pPrevLines = psrc[1]; + + + i = GreedyMaxComb; // How badly do we let it weave? 0-255 + MaxComb = i << 56 | i << 48 | i << 40 | i << 32 | i << 24 | i << 16 | i << 8 | i; + + + // copy first even line no matter what, and the first odd line if we're + // processing an EVEN field. (note diff from other deint rtns.) + memcpy(pdst, pEvenLines, LineLength); //DL0 + if (!IsOdd) + memcpy(pdst + LineLength, pOddLines, LineLength); //DL1 + + height = height / 2; + for (Line = 0; Line < height - 1; ++Line) + { + LoopCtr = LineLength / 8; // there are LineLength / 8 qwords per line + + if (IsOdd) + { + L1 = (uint64_t *)(pEvenLines + Line * SourcePitch); + L2 = (uint64_t *)(pOddLines + Line * SourcePitch); + L3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + LP2 = (uint64_t *)(pPrevLines + Line * SourcePitch); // prev Odd lines + Dest = (uint64_t *)(pdst + (Line * 2 + 1) * LineLength); + } + else + { + L1 = (uint64_t *)(pOddLines + Line * SourcePitch); + L2 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + L3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch); + LP2 = (uint64_t *)(pPrevLines + (Line + 1) * SourcePitch); //prev even lines + Dest = (uint64_t *)(pdst + (Line * 2 + 2) * LineLength); + } + + memcpy((char *)Dest + LineLength, L3, LineLength); + +// For ease of reading, the comments below assume that we're operating on an odd +// field (i.e., that info->IsOdd is true). Assume the obvious for even lines.. + + while( LoopCtr-- ) + { + movq_m2r ( *L1++, mm1 ); + movq_m2r ( *L2++, mm2 ); + movq_m2r ( *L3++, mm3 ); + movq_m2r ( *LP2++, mm0 ); + + // average L1 and L3 leave result in mm4 + movq_r2r ( mm1, mm4 ); // L1 + + pand_m2r ( *&ShiftMask, mm4 ); + psrlw_i2r ( 01, mm4 ); + movq_r2r ( mm3, mm5 ); // L3 + pand_m2r ( *&ShiftMask, mm5 ); + psrlw_i2r ( 01, mm5 ); + paddb_r2r ( mm5, mm4 ); // the average, for computing comb + + // get abs value of possible L2 comb + movq_r2r ( mm2, mm7 ); // L2 + psubusb_r2r ( mm4, mm7 ); // L2 - avg + movq_r2r ( mm4, mm5 ); // avg + psubusb_r2r ( mm2, mm5 ); // avg - L2 + por_r2r ( mm7, mm5 ); // abs(avg-L2) + movq_r2r ( mm4, mm6 ); // copy of avg for later + + // get abs value of possible LP2 comb + movq_r2r ( mm0, mm7 ); // LP2 + psubusb_r2r ( mm4, mm7 ); // LP2 - avg + psubusb_r2r ( mm0, mm4 ); // avg - LP2 + por_r2r ( mm7, mm4 ); // abs(avg-LP2) + + // use L2 or LP2 depending upon which makes smaller comb + psubusb_r2r ( mm5, mm4 ); // see if it goes to zero + psubusb_r2r ( mm5, mm5 ); // 0 + pcmpeqb_r2r ( mm5, mm4 ); // if (mm4=0) then FF else 0 + pcmpeqb_r2r ( mm4, mm5 ); // opposite of mm4 + + // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55 + pand_r2r ( mm2, mm5 ); // use L2 if mm5 == ff, else 0 + pand_r2r ( mm0, mm4 ); // use LP2 if mm4 = ff, else 0 + por_r2r ( mm5, mm4 ); // may the best win + + // Now lets clip our chosen value to be not outside of the range + // of the high/low range L1-L3 by more than abs(L1-L3) + // This allows some comb but limits the damages and also allows more + // detail than a boring oversmoothed clip. + + movq_r2r ( mm1, mm2 ); // copy L1 + psubusb_r2r ( mm3, mm2 ); // - L3, with saturation + paddusb_r2r ( mm3, mm2 ); // now = Max(L1,L3) + + pcmpeqb_r2r ( mm7, mm7 ); // all ffffffff + psubusb_r2r ( mm1, mm7 ); // - L1 + paddusb_r2r ( mm7, mm3 ); // add, may sat at fff.. + psubusb_r2r ( mm7, mm3 ); // now = Min(L1,L3) + + // allow the value to be above the high or below the low by amt of MaxComb + paddusb_m2r ( *&MaxComb, mm2 ); // increase max by diff + psubusb_m2r ( *&MaxComb, mm3 ); // lower min by diff + + psubusb_r2r ( mm3, mm4 ); // best - Min + paddusb_r2r ( mm3, mm4 ); // now = Max(best,Min(L1,L3) + + pcmpeqb_r2r ( mm7, mm7 ); // all ffffffff + psubusb_r2r ( mm4, mm7 ); // - Max(best,Min(best,L3) + paddusb_r2r ( mm7, mm2 ); // add may sat at FFF.. + psubusb_r2r ( mm7, mm2 ); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped + + movq_r2m ( mm2, *Dest++ ); // move in our clipped best + + } + } + + // Copy last odd line if we're processing an Odd field. + if (IsOdd) + { + memcpy(pdst + (height * 2 - 1) * LineLength, + pOddLines + (height - 1) * SourcePitch, + LineLength); + } + + // clear out the MMX registers ready for doing floating point again + emms(); + + return 1; +#endif +} + + static int check_for_mmx(void) { #ifdef ARCH_X86 @@ -417,5 +588,14 @@ void deinterlace_yuv( uint8_t *pdst, uint8_t *psrc[], else /* FIXME: provide an alternative? */ abort_mmx_missing(); break; + case DEINTERLACE_GREEDY: + if( check_for_mmx() ) + { + if( !deinterlace_greedy_yuv_mmx(pdst,psrc,width,height) ) + memcpy(pdst,psrc[0],width*height); + } + else /* FIXME: provide an alternative? */ + abort_mmx_missing(); + break; } } diff --git a/src/video_out/deinterlace.h b/src/video_out/deinterlace.h index a4a3d22ae..a1a759648 100644 --- a/src/video_out/deinterlace.h +++ b/src/video_out/deinterlace.h @@ -35,5 +35,6 @@ void deinterlace_yuv( uint8_t *pdst, uint8_t *psrc[], #define DEINTERLACE_NONE 0 #define DEINTERLACE_BOB 1 #define DEINTERLACE_WEAVE 2 +#define DEINTERLACE_GREEDY 3 #endif |