1 files changed, 59 insertions, 49 deletions
diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h
index 6126c78b5..d8346d509 100644
--- a/src/libffmpeg/libavcodec/dsputil.h
+++ b/src/libffmpeg/libavcodec/dsputil.h
@@ -1,6 +1,7 @@
 /*
  * DSP utils
  * Copyright (c) 2000, 2001, 2002 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -29,28 +30,31 @@
 
 #include "common.h"
 #include "avcodec.h"
-#include "xineutils.h"
 
 #if defined(ARCH_X86)
-#define HAVE_MMX 1 
+#define HAVE_MMX 1
 #endif
 
-#undef DEBUG
-
+//#define DEBUG
 /* dct code */
 typedef short DCTELEM;
 
 void fdct_ifast (DCTELEM *data);
+void fdct_ifast248 (DCTELEM *data);
 void ff_jpeg_fdct_islow (DCTELEM *data);
+void ff_fdct248_islow (DCTELEM *data);
 
 void j_rev_dct (DCTELEM *data);
 
 void ff_fdct_mmx(DCTELEM *block);
+void ff_fdct_mmx2(DCTELEM *block);
+void ff_fdct_sse2(DCTELEM *block);
 
 /* encoding scans */
 extern const uint8_t ff_alternate_horizontal_scan[64];
 extern const uint8_t ff_alternate_vertical_scan[64];
 extern const uint8_t ff_zigzag_direct[64];
+extern const uint8_t ff_zigzag248_direct[64];
 
 /* pixel operations */
 #define MAX_NEG_CROP 384
@@ -80,6 +84,7 @@ void clear_blocks_c(DCTELEM *blocks);
 
 /* add and put pixel (decoding) */
 // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
+//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4
 typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
 typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
 typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
@@ -110,10 +115,9 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
 }
 
 /* motion estimation */
-
-typedef int (*op_pixels_abs_func)(uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/;
-
-typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/;
+// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2
+// allthough currently h<4 is not used as functions with width <8 are not used and neither implemented
+typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
 
 
 /**
@@ -137,25 +141,28 @@ typedef struct DSPContext {
     void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
     int (*pix_sum)(uint8_t * pix, int line_size);
     int (*pix_norm1)(uint8_t * pix, int line_size);
-    me_cmp_func sad[2]; /* identical to pix_absAxA except additional void * */
-    me_cmp_func sse[2];
-    me_cmp_func hadamard8_diff[2];
-    me_cmp_func dct_sad[2];
-    me_cmp_func quant_psnr[2];
-    me_cmp_func bit[2];
-    me_cmp_func rd[2];
-    int (*hadamard8_abs )(uint8_t *src, int stride, int mean);
-
-    me_cmp_func me_pre_cmp[11];
-    me_cmp_func me_cmp[11];
-    me_cmp_func me_sub_cmp[11];
-    me_cmp_func mb_cmp[11];
-
-    /* maybe create an array for 16/8/4/2 functions */
+// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
+    
+    me_cmp_func sad[5]; /* identical to pix_absAxA except additional void * */
+    me_cmp_func sse[5];
+    me_cmp_func hadamard8_diff[5];
+    me_cmp_func dct_sad[5];
+    me_cmp_func quant_psnr[5];
+    me_cmp_func bit[5];
+    me_cmp_func rd[5];
+    me_cmp_func vsad[5];
+    me_cmp_func vsse[5];
+
+    me_cmp_func me_pre_cmp[5];
+    me_cmp_func me_cmp[5];
+    me_cmp_func me_sub_cmp[5];
+    me_cmp_func mb_cmp[5];
+    me_cmp_func ildct_cmp[5]; //only width 16 used
+
     /**
      * Halfpel motion compensation with rounding (a+b+1)>>1.
      * this is an array[4][4] of motion compensation funcions for 4 
-     * horizontal blocksizes (2,4,8,16) and the 4 halfpel positions<br>
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
      * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
      * @param block destination where the result is stored
      * @param pixels source
@@ -167,7 +174,7 @@ typedef struct DSPContext {
     /**
      * Halfpel motion compensation with rounding (a+b+1)>>1.
      * This is an array[4][4] of motion compensation functions for 4 
-     * horizontal blocksizes (2,4,8,16) and the 4 halfpel positions<br>
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
      * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
      * @param block destination into which the result is averaged (a+b+1)>>1
      * @param pixels source
@@ -227,14 +234,7 @@ typedef struct DSPContext {
     qpel_mc_func put_h264_qpel_pixels_tab[3][16];
     qpel_mc_func avg_h264_qpel_pixels_tab[3][16];
     
-    op_pixels_abs_func pix_abs16x16;
-    op_pixels_abs_func pix_abs16x16_x2;
-    op_pixels_abs_func pix_abs16x16_y2;
-    op_pixels_abs_func pix_abs16x16_xy2;
-    op_pixels_abs_func pix_abs8x8;
-    op_pixels_abs_func pix_abs8x8_x2;
-    op_pixels_abs_func pix_abs8x8_y2;
-    op_pixels_abs_func pix_abs8x8_xy2;
+    me_cmp_func pix_abs[2][4];
     
     /* huffyuv specific */
     void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
@@ -246,8 +246,12 @@ typedef struct DSPContext {
     void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top);
     void (*bswap_buf)(uint32_t *dst, uint32_t *src, int w);
     
+    void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
+    void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
+
     /* (I)DCT */
     void (*fdct)(DCTELEM *block/* align 16*/);
+    void (*fdct248)(DCTELEM *block/* align 16*/);
     
     /* IDCT really*/
     void (*idct)(DCTELEM *block/* align 16*/);
@@ -295,6 +299,8 @@ void dsputil_init(DSPContext* p, AVCodecContext *avctx);
  */
 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
 
+void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
+
 #define	BYTE_VEC32(c)	((c)*0x01010101UL)
 
 static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
@@ -322,25 +328,21 @@ int mm_support(void);
 
 #undef emms_c
 
-#if 0
 #define MM_MMX    0x0001 /* standard MMX */
 #define MM_3DNOW  0x0004 /* AMD 3DNOW */
 #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
 #define MM_SSE    0x0008 /* SSE functions */
 #define MM_SSE2   0x0010 /* PIV SSE2 functions */
-#endif
 
 extern int mm_flags;
 
 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
 void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
 
-#if 0 
 static inline void emms(void)
 {
     __asm __volatile ("emms;":::"memory");
 }
-#endif
 
 
 #define emms_c() \
@@ -382,7 +384,9 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
 extern int mm_flags;
 
 #if defined(HAVE_ALTIVEC) && !defined(CONFIG_DARWIN)
+#define pixel altivec_pixel
 #include <altivec.h>
+#undef pixel
 #endif
 
 #define __align8 __attribute__ ((aligned (16)))
@@ -483,12 +487,24 @@ void ff_mdct_calc(MDCTContext *s, FFTSample *out,
                const FFTSample *input, FFTSample *tmp);
 void ff_mdct_end(MDCTContext *s);
 
-#define WARPER88_1616(name8, name16)\
-static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride){\
-    return name8(s, dst           , src           , stride)\
-          +name8(s, dst+8         , src+8         , stride)\
-          +name8(s, dst  +8*stride, src  +8*stride, stride)\
-          +name8(s, dst+8+8*stride, src+8+8*stride, stride);\
+#define WARPER8_16(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
+    return name8(s, dst           , src           , stride, h)\
+          +name8(s, dst+8         , src+8         , stride, h);\
+}
+
+#define WARPER8_16_SQ(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
+    int score=0;\
+    score +=name8(s, dst           , src           , stride, 8);\
+    score +=name8(s, dst+8         , src+8         , stride, 8);\
+    if(h==16){\
+        dst += 8*stride;\
+        src += 8*stride;\
+        score +=name8(s, dst           , src           , stride, 8);\
+        score +=name8(s, dst+8         , src+8         , stride, 8);\
+    }\
+    return score;\
 }
 
 #ifndef HAVE_LRINTF
@@ -506,10 +522,4 @@ static inline long int lrintf(float x)
 }
 #endif
 
-#if defined(CONFIG_OS2) || defined(CONFIG_SUNOS)
-static inline float floorf(float f) { 
-    return floor(f); 
-}
-#endif
-
 #endif