summaryrefslogtreecommitdiff
path: root/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/libffmpeg/libavcodec/i386/motion_est_mmx.c')
-rw-r--r--src/libffmpeg/libavcodec/i386/motion_est_mmx.c123
1 files changed, 54 insertions, 69 deletions
diff --git a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
index 950100e63..f32afae0b 100644
--- a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
@@ -1,6 +1,7 @@
/*
* MMX optimized motion estimation
* Copyright (c) 2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -26,11 +27,11 @@ static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={
0x0002000200020002ULL,
};
-static const __attribute__ ((aligned(8), unused)) uint64_t bone= 0x0101010101010101LL;
+static __attribute__ ((aligned(8), unused)) uint64_t bone= 0x0101010101010101LL;
-static inline void sad8_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
+static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
- int len= -(stride<<h);
+ int len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"1: \n\t"
@@ -64,9 +65,9 @@ static inline void sad8_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
);
}
-static inline void sad8_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
+static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
- int len= -(stride<<h);
+ int len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"1: \n\t"
@@ -88,7 +89,7 @@ static inline void sad8_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
{
- int len= -(stride<<h);
+ int len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"1: \n\t"
@@ -114,7 +115,7 @@ static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, in
static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ //FIXME reuse src
- int len= -(stride<<h);
+ int len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"movq "MANGLE(bone)", %%mm5 \n\t"
@@ -151,7 +152,7 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
{
- int len= -(stride<<h);
+ int len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"1: \n\t"
@@ -189,7 +190,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int
static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
- int len= -(stride<<h);
+ int len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"1: \n\t"
@@ -265,85 +266,69 @@ static inline int sum_mmx2(void)
#define PIX_SAD(suf)\
-static int pix_abs8x8_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
+ assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
\
- sad8_ ## suf(blk1, blk2, stride, 3);\
+ sad8_1_ ## suf(blk1, blk2, stride, 8);\
\
return sum_ ## suf();\
}\
-static int sad8x8_ ## suf(void *s, uint8_t *blk2, uint8_t *blk1, int stride)\
-{\
- asm volatile("pxor %%mm7, %%mm7 \n\t"\
- "pxor %%mm6, %%mm6 \n\t":);\
-\
- sad8_ ## suf(blk1, blk2, stride, 3);\
-\
- return sum_ ## suf();\
-}\
-\
-static int pix_abs8x8_x2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
+ assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
);\
\
- sad8_2_ ## suf(blk1, blk1+1, blk2, stride, 3);\
+ sad8_2_ ## suf(blk1, blk1+1, blk2, stride, 8);\
\
return sum_ ## suf();\
}\
\
-static int pix_abs8x8_y2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
+ assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
);\
\
- sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 3);\
+ sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 8);\
\
return sum_ ## suf();\
}\
\
-static int pix_abs8x8_xy2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
+ assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[2]) \
);\
\
- sad8_4_ ## suf(blk1, blk2, stride, 3);\
+ sad8_4_ ## suf(blk1, blk2, stride, 8);\
\
return sum_ ## suf();\
}\
\
-static int pix_abs16x16_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
-{\
- asm volatile("pxor %%mm7, %%mm7 \n\t"\
- "pxor %%mm6, %%mm6 \n\t":);\
-\
- sad8_ ## suf(blk1 , blk2 , stride, 4);\
- sad8_ ## suf(blk1+8, blk2+8, stride, 4);\
-\
- return sum_ ## suf();\
-}\
-static int sad16x16_ ## suf(void *s, uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
\
- sad8_ ## suf(blk1 , blk2 , stride, 4);\
- sad8_ ## suf(blk1+8, blk2+8, stride, 4);\
+ sad8_1_ ## suf(blk1 , blk2 , stride, h);\
+ sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\
\
return sum_ ## suf();\
}\
-static int pix_abs16x16_x2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
@@ -351,12 +336,12 @@ static int pix_abs16x16_x2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
:: "m"(round_tab[1]) \
);\
\
- sad8_2_ ## suf(blk1 , blk1+1, blk2 , stride, 4);\
- sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, 4);\
+ sad8_2_ ## suf(blk1 , blk1+1, blk2 , stride, h);\
+ sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, h);\
\
return sum_ ## suf();\
}\
-static int pix_abs16x16_y2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
@@ -364,12 +349,12 @@ static int pix_abs16x16_y2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
:: "m"(round_tab[1]) \
);\
\
- sad8_2_ ## suf(blk1 , blk1+stride, blk2 , stride, 4);\
- sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, 4);\
+ sad8_2_ ## suf(blk1 , blk1+stride, blk2 , stride, h);\
+ sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, h);\
\
return sum_ ## suf();\
}\
-static int pix_abs16x16_xy2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
@@ -377,8 +362,8 @@ static int pix_abs16x16_xy2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
:: "m"(round_tab[2]) \
);\
\
- sad8_4_ ## suf(blk1 , blk2 , stride, 4);\
- sad8_4_ ## suf(blk1+8, blk2+8, stride, 4);\
+ sad8_4_ ## suf(blk1 , blk2 , stride, h);\
+ sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\
\
return sum_ ## suf();\
}\
@@ -389,32 +374,32 @@ PIX_SAD(mmx2)
void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
{
if (mm_flags & MM_MMX) {
- c->pix_abs16x16 = pix_abs16x16_mmx;
- c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
- c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
- c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
- c->pix_abs8x8 = pix_abs8x8_mmx;
- c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
- c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
- c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx;
+ c->pix_abs[0][0] = sad16_mmx;
+ c->pix_abs[0][1] = sad16_x2_mmx;
+ c->pix_abs[0][2] = sad16_y2_mmx;
+ c->pix_abs[0][3] = sad16_xy2_mmx;
+ c->pix_abs[1][0] = sad8_mmx;
+ c->pix_abs[1][1] = sad8_x2_mmx;
+ c->pix_abs[1][2] = sad8_y2_mmx;
+ c->pix_abs[1][3] = sad8_xy2_mmx;
- c->sad[0]= sad16x16_mmx;
- c->sad[1]= sad8x8_mmx;
+ c->sad[0]= sad16_mmx;
+ c->sad[1]= sad8_mmx;
}
if (mm_flags & MM_MMXEXT) {
- c->pix_abs16x16 = pix_abs16x16_mmx2;
- c->pix_abs8x8 = pix_abs8x8_mmx2;
+ c->pix_abs[0][0] = sad16_mmx2;
+ c->pix_abs[1][0] = sad8_mmx2;
- c->sad[0]= sad16x16_mmx2;
- c->sad[1]= sad8x8_mmx2;
+ c->sad[0]= sad16_mmx2;
+ c->sad[1]= sad8_mmx2;
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
- c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
- c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
- c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2;
- c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
- c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
- c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
+ c->pix_abs[0][1] = sad16_x2_mmx2;
+ c->pix_abs[0][2] = sad16_y2_mmx2;
+ c->pix_abs[0][3] = sad16_xy2_mmx2;
+ c->pix_abs[1][1] = sad8_x2_mmx2;
+ c->pix_abs[1][2] = sad8_y2_mmx2;
+ c->pix_abs[1][3] = sad8_xy2_mmx2;
}
}
}