New greedy deinterlace method

(looks good! albeit more cpu intensive than bob) CVS patchset: 665 CVS date: 2001/09/19 12:02:03
author: Miguel Freitas <miguelfreitas@users.sourceforge.net> 2001-09-19 12:02:03 +0000
committer: Miguel Freitas <miguelfreitas@users.sourceforge.net> 2001-09-19 12:02:03 +0000
commit: 311eeae70070cfd1d7b17c30ef361f3d327774dc (patch)
tree: 51328ffd5f7d133b39a4fa6b299236d65ea1a819
parent: 91538feb86939b0cffab173b59a3f7fd5fa5212f (diff)
download: xine-lib-311eeae70070cfd1d7b17c30ef361f3d327774dc.tar.gz
xine-lib-311eeae70070cfd1d7b17c30ef361f3d327774dc.tar.bz2
2 files changed, 181 insertions, 0 deletions
diff --git a/src/video_out/deinterlace.c b/src/video_out/deinterlace.c
index 67f932612..ac428488d 100644
--- a/src/video_out/deinterlace.c
+++ b/src/video_out/deinterlace.c
@@ -366,6 +366,177 @@ static int deinterlace_weave_yuv_mmx( uint8_t *pdst, uint8_t *psrc[],
 #endif
 }
 
+
+// This is a simple lightweight DeInterlace method that uses little CPU time
+// but gives very good results for low or intermedite motion. (MORE CPU THAN BOB)
+// It defers frames by one field, but that does not seem to produce noticeable
+// lip sync problems.
+//
+// The method used is to take either the older or newer weave pixel depending
+// upon which give the smaller comb factor, and then clip to avoid large damage
+// when wrong.
+//
+// I'd intended this to be part of a larger more elaborate method added to
+// Blended Clip but this give too good results for the CPU to ignore here.
+static int deinterlace_greedy_yuv_mmx( uint8_t *pdst, uint8_t *psrc[],
+    int width, int height )
+{
+#ifdef ARCH_X86
+  int Line;
+  int	LoopCtr;
+  uint64_t *L1;					// ptr to Line1, of 3
+  uint64_t *L2;					// ptr to Line2, the weave line
+  uint64_t *L3;					// ptr to Line3
+  uint64_t *LP2;					// ptr to prev Line2
+  uint64_t *Dest;
+  uint8_t* pEvenLines = psrc[0];
+  uint8_t* pOddLines = psrc[0]+width;
+  uint8_t* pPrevLines;
+
+  const uint64_t ShiftMask = 0xfefefefefefefefe;
+
+  int LineLength = width;
+  int SourcePitch = width * 2;
+  int IsOdd = 1;
+  long GreedyMaxComb = 15;
+  uint64_t MaxComb;
+  uint64_t i;
+
+  if ( psrc[0] == NULL || psrc[1] == NULL )
+    return 0;
+
+  if (IsOdd)
+    pPrevLines = psrc[1] + width;
+  else
+    pPrevLines = psrc[1];
+
+
+	i = GreedyMaxComb;			// How badly do we let it weave? 0-255
+	MaxComb = i << 56 | i << 48 | i << 40 | i << 32 | i << 24 | i << 16 | i << 8 | i;
+
+
+	// copy first even line no matter what, and the first odd line if we're
+	// processing an EVEN field. (note diff from other deint rtns.)
+  memcpy(pdst, pEvenLines, LineLength); //DL0
+  if (!IsOdd)
+    memcpy(pdst + LineLength, pOddLines, LineLength); //DL1
+
+  height = height / 2;
+  for (Line = 0; Line < height - 1; ++Line)
+  {
+    LoopCtr = LineLength / 8;				// there are LineLength / 8 qwords per line
+
+    if (IsOdd)
+    {
+      L1 = (uint64_t *)(pEvenLines + Line * SourcePitch);
+      L2 = (uint64_t *)(pOddLines + Line * SourcePitch);
+      L3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch);
+      LP2 = (uint64_t *)(pPrevLines + Line * SourcePitch); // prev Odd lines
+      Dest = (uint64_t *)(pdst + (Line * 2 + 1) * LineLength);
+    }
+    else
+    {
+      L1 = (uint64_t *)(pOddLines + Line * SourcePitch);
+      L2 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch);
+      L3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch);
+      LP2 = (uint64_t *)(pPrevLines + (Line + 1) * SourcePitch); //prev even lines
+      Dest = (uint64_t *)(pdst + (Line * 2 + 2) * LineLength);
+    }
+
+    memcpy((char *)Dest + LineLength, L3, LineLength);
+
+// For ease of reading, the comments below assume that we're operating on an odd
+// field (i.e., that info->IsOdd is true).  Assume the obvious for even lines..
+
+    while( LoopCtr-- )
+		{
+      movq_m2r ( *L1++, mm1 );
+      movq_m2r ( *L2++, mm2 );
+      movq_m2r ( *L3++, mm3 );
+      movq_m2r ( *LP2++, mm0 );
+
+      // average L1 and L3 leave result in mm4
+      movq_r2r ( mm1, mm4 );	// L1
+
+      pand_m2r ( *&ShiftMask, mm4 );
+      psrlw_i2r ( 01, mm4 );
+      movq_r2r ( mm3, mm5 );  // L3
+      pand_m2r ( *&ShiftMask, mm5 );
+      psrlw_i2r ( 01, mm5 );
+      paddb_r2r ( mm5, mm4 );  // the average, for computing comb
+
+      // get abs value of possible L2 comb
+			movq_r2r	( mm2, mm7 );				// L2
+			psubusb_r2r ( mm4, mm7 );				// L2 - avg
+			movq_r2r ( mm4, mm5 );				// avg
+			psubusb_r2r ( mm2, mm5 );				// avg - L2
+			por_r2r ( mm7, mm5 );				// abs(avg-L2)
+			movq_r2r ( mm4, mm6 );     // copy of avg for later
+
+      // get abs value of possible LP2 comb
+			movq_r2r ( mm0, mm7 );				// LP2
+			psubusb_r2r ( mm4, mm7 );				// LP2 - avg
+			psubusb_r2r ( mm0, mm4 );				// avg - LP2
+			por_r2r ( mm7, mm4 );				// abs(avg-LP2)
+
+      // use L2 or LP2 depending upon which makes smaller comb
+			psubusb_r2r ( mm5, mm4 );				// see if it goes to zero
+			psubusb_r2r ( mm5, mm5 );				// 0
+			pcmpeqb_r2r ( mm5, mm4 );				// if (mm4=0) then FF else 0
+			pcmpeqb_r2r ( mm4, mm5 );				// opposite of mm4
+
+      // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
+			pand_r2r ( mm2, mm5 );				// use L2 if mm5 == ff, else 0
+			pand_r2r ( mm0, mm4 );				// use LP2 if mm4 = ff, else 0
+			por_r2r ( mm5, mm4 );				// may the best win
+
+      // Now lets clip our chosen value to be not outside of the range
+      // of the high/low range L1-L3 by more than abs(L1-L3)
+      // This allows some comb but limits the damages and also allows more
+      // detail than a boring oversmoothed clip.
+
+			movq_r2r ( mm1, mm2 );				// copy L1
+			psubusb_r2r ( mm3, mm2 );				// - L3, with saturation
+			paddusb_r2r ( mm3, mm2 );                // now = Max(L1,L3)
+
+			pcmpeqb_r2r ( mm7, mm7 );				// all ffffffff
+			psubusb_r2r ( mm1, mm7 );				// - L1
+			paddusb_r2r ( mm7, mm3 );				// add, may sat at fff..
+			psubusb_r2r ( mm7, mm3 );				// now = Min(L1,L3)
+
+      // allow the value to be above the high or below the low by amt of MaxComb
+			paddusb_m2r ( *&MaxComb, mm2 );			// increase max by diff
+			psubusb_m2r ( *&MaxComb, mm3 );			// lower min by diff
+
+			psubusb_r2r ( mm3, mm4 );				// best - Min
+			paddusb_r2r ( mm3, mm4 );				// now = Max(best,Min(L1,L3)
+
+			pcmpeqb_r2r ( mm7, mm7 );				// all ffffffff
+			psubusb_r2r ( mm4, mm7 );				// - Max(best,Min(best,L3)
+			paddusb_r2r ( mm7, mm2 );				// add may sat at FFF..
+			psubusb_r2r ( mm7, mm2 );				// now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
+
+		  movq_r2m ( mm2, *Dest++ );        // move in our clipped best
+
+		}
+	}
+
+	// Copy last odd line if we're processing an Odd field.
+  if (IsOdd)
+  {
+    memcpy(pdst + (height * 2 - 1) * LineLength,
+                      pOddLines + (height - 1) * SourcePitch,
+                      LineLength);
+  }
+
+  // clear out the MMX registers ready for doing floating point again
+  emms();
+
+  return 1;
+#endif
+}
+
+
 static int check_for_mmx(void)
 {
 #ifdef ARCH_X86
@@ -417,5 +588,14 @@ void deinterlace_yuv( uint8_t *pdst, uint8_t *psrc[],
       else /* FIXME: provide an alternative? */
         abort_mmx_missing();
       break;
+    case DEINTERLACE_GREEDY:
+      if( check_for_mmx() )
+      {
+        if( !deinterlace_greedy_yuv_mmx(pdst,psrc,width,height) )
+          memcpy(pdst,psrc[0],width*height);
+      }
+      else /* FIXME: provide an alternative? */
+        abort_mmx_missing();
+      break;
   }
 }
diff --git a/src/video_out/deinterlace.h b/src/video_out/deinterlace.h
index a4a3d22ae..a1a759648 100644
--- a/src/video_out/deinterlace.h
+++ b/src/video_out/deinterlace.h
@@ -35,5 +35,6 @@ void deinterlace_yuv( uint8_t *pdst, uint8_t *psrc[],
 #define DEINTERLACE_NONE     0
 #define DEINTERLACE_BOB      1
 #define DEINTERLACE_WEAVE    2
+#define DEINTERLACE_GREEDY   3
 
 #endif
author	Miguel Freitas <miguelfreitas@users.sourceforge.net>	2001-09-19 12:02:03 +0000
committer	Miguel Freitas <miguelfreitas@users.sourceforge.net>	2001-09-19 12:02:03 +0000
commit	311eeae70070cfd1d7b17c30ef361f3d327774dc (patch)
tree	51328ffd5f7d133b39a4fa6b299236d65ea1a819
parent	91538feb86939b0cffab173b59a3f7fd5fa5212f (diff)
download	xine-lib-311eeae70070cfd1d7b17c30ef361f3d327774dc.tar.gz xine-lib-311eeae70070cfd1d7b17c30ef361f3d327774dc.tar.bz2