summaryrefslogtreecommitdiff
path: root/src/libffmpeg/libavcodec/i386
diff options
context:
space:
mode:
authorMiguel Freitas <miguelfreitas@users.sourceforge.net>2002-11-11 13:45:34 +0000
committerMiguel Freitas <miguelfreitas@users.sourceforge.net>2002-11-11 13:45:34 +0000
commit810ddddbc0d5f6587b9154115fa60d546d728310 (patch)
tree67256930a6cd947fb3f09b070b19dedd4fbbcd10 /src/libffmpeg/libavcodec/i386
parente38a8cca2c6be40954a44f167f7cc9fac0813ede (diff)
downloadxine-lib-810ddddbc0d5f6587b9154115fa60d546d728310.tar.gz
xine-lib-810ddddbc0d5f6587b9154115fa60d546d728310.tar.bz2
sync ffmpeg
- add wma decoder - fix mmx macro - remove changes from fdct_mmx.c (it should work fine now with the mmx macro fix) CVS patchset: 3234 CVS date: 2002/11/11 13:45:34
Diffstat (limited to 'src/libffmpeg/libavcodec/i386')
-rw-r--r--src/libffmpeg/libavcodec/i386/Makefile.am1
-rw-r--r--src/libffmpeg/libavcodec/i386/cputest.c21
-rw-r--r--src/libffmpeg/libavcodec/i386/dsputil_mmx.c241
-rw-r--r--src/libffmpeg/libavcodec/i386/fdct_mmx.c24
-rw-r--r--src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c11
-rw-r--r--src/libffmpeg/libavcodec/i386/simple_idct_mmx.c4
6 files changed, 162 insertions, 140 deletions
diff --git a/src/libffmpeg/libavcodec/i386/Makefile.am b/src/libffmpeg/libavcodec/i386/Makefile.am
index 53f1f6528..0ef6bb0eb 100644
--- a/src/libffmpeg/libavcodec/i386/Makefile.am
+++ b/src/libffmpeg/libavcodec/i386/Makefile.am
@@ -13,6 +13,7 @@ LIBTOOL = $(SHELL) $(top_builddir)/libtool-nofpic
noinst_LTLIBRARIES = libavcodec_mmx.la
libavcodec_mmx_src = \
+ cputest.c \
dsputil_mmx.c \
fdct_mmx.c \
idct_mmx.c \
diff --git a/src/libffmpeg/libavcodec/i386/cputest.c b/src/libffmpeg/libavcodec/i386/cputest.c
new file mode 100644
index 000000000..b885548ee
--- /dev/null
+++ b/src/libffmpeg/libavcodec/i386/cputest.c
@@ -0,0 +1,21 @@
+/* dummy file to use xine mm_support function */
+
+#include "xineutils.h"
+#include "../dsputil.h"
+
+
+/* Function to test if multimedia instructions are supported... */
+int mm_support(void)
+{
+ return xine_mm_accel();
+}
+
+#ifdef __TEST__
+int main ( void )
+{
+ int mm_flags;
+ mm_flags = mm_support();
+ printf("mm_support = 0x%08u\n",mm_flags);
+ return 0;
+}
+#endif
diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
index 708d0b091..94b88b70e 100644
--- a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
@@ -22,7 +22,7 @@
#include "../dsputil.h"
int mm_flags; /* multimedia extension flags */
-
+/* FIXME use them in static form */
int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
@@ -242,7 +242,7 @@ static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, in
);
}
-static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
+void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
{
const DCTELEM *p;
UINT8 *pix;
@@ -297,7 +297,7 @@ static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line
:"memory");
}
-static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
+void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
{
const DCTELEM *p;
UINT8 *pix;
@@ -457,7 +457,7 @@ static int pix_sum16_mmx(UINT8 * pix, int line_size){
static void just_return() { return; }
#endif
-void dsputil_init_mmx(void)
+void dsputil_init_mmx(DSPContext* c, unsigned mask)
{
mm_flags = mm_support();
#if 0
@@ -476,112 +476,112 @@ void dsputil_init_mmx(void)
#endif
if (mm_flags & MM_MMX) {
- get_pixels = get_pixels_mmx;
- diff_pixels = diff_pixels_mmx;
- put_pixels_clamped = put_pixels_clamped_mmx;
- add_pixels_clamped = add_pixels_clamped_mmx;
- clear_blocks= clear_blocks_mmx;
- pix_sum= pix_sum16_mmx;
-
- pix_abs16x16 = pix_abs16x16_mmx;
- pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
- pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
- pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
- pix_abs8x8 = pix_abs8x8_mmx;
- pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
- pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
- pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
-
- put_pixels_tab[0][0] = put_pixels16_mmx;
- put_pixels_tab[0][1] = put_pixels16_x2_mmx;
- put_pixels_tab[0][2] = put_pixels16_y2_mmx;
- put_pixels_tab[0][3] = put_pixels16_xy2_mmx;
-
- put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx;
- put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
- put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
- put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx;
-
- avg_pixels_tab[0][0] = avg_pixels16_mmx;
- avg_pixels_tab[0][1] = avg_pixels16_x2_mmx;
- avg_pixels_tab[0][2] = avg_pixels16_y2_mmx;
- avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
-
- avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx;
- avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx;
- avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx;
- avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx;
-
- put_pixels_tab[1][0] = put_pixels8_mmx;
- put_pixels_tab[1][1] = put_pixels8_x2_mmx;
- put_pixels_tab[1][2] = put_pixels8_y2_mmx;
- put_pixels_tab[1][3] = put_pixels8_xy2_mmx;
-
- put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx;
- put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
- put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
- put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx;
-
- avg_pixels_tab[1][0] = avg_pixels8_mmx;
- avg_pixels_tab[1][1] = avg_pixels8_x2_mmx;
- avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
- avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
-
- avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx;
- avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
- avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
- avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
+ c->get_pixels = get_pixels_mmx;
+ c->diff_pixels = diff_pixels_mmx;
+ c->put_pixels_clamped = put_pixels_clamped_mmx;
+ c->add_pixels_clamped = add_pixels_clamped_mmx;
+ c->clear_blocks = clear_blocks_mmx;
+ c->pix_sum = pix_sum16_mmx;
+
+ c->pix_abs16x16 = pix_abs16x16_mmx;
+ c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
+ c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
+ c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
+ c->pix_abs8x8 = pix_abs8x8_mmx;
+ c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
+ c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
+ c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx;
+
+ c->put_pixels_tab[0][0] = put_pixels16_mmx;
+ c->put_pixels_tab[0][1] = put_pixels16_x2_mmx;
+ c->put_pixels_tab[0][2] = put_pixels16_y2_mmx;
+ c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx;
+
+ c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx;
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
+ c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx;
+
+ c->avg_pixels_tab[0][0] = avg_pixels16_mmx;
+ c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx;
+ c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx;
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
+
+ c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx;
+ c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx;
+ c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx;
+ c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx;
+
+ c->put_pixels_tab[1][0] = put_pixels8_mmx;
+ c->put_pixels_tab[1][1] = put_pixels8_x2_mmx;
+ c->put_pixels_tab[1][2] = put_pixels8_y2_mmx;
+ c->put_pixels_tab[1][3] = put_pixels8_xy2_mmx;
+
+ c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx;
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
+ c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx;
+
+ c->avg_pixels_tab[1][0] = avg_pixels8_mmx;
+ c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx;
+ c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
+ c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
+
+ c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx;
+ c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
+ c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
+ c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
if (mm_flags & MM_MMXEXT) {
- pix_abs16x16 = pix_abs16x16_mmx2;
- pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
- pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
- pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2;
-
- pix_abs8x8 = pix_abs8x8_mmx2;
- pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
- pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
- pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2;
-
- put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
- put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
- put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
- put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
-
- avg_pixels_tab[0][0] = avg_pixels16_mmx2;
- avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
- avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
- avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
-
- put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
- put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
- put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
- put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
-
- avg_pixels_tab[1][0] = avg_pixels8_mmx2;
- avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
- avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
- avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
+ c->pix_abs16x16 = pix_abs16x16_mmx2;
+ c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
+ c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
+ c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2;
+
+ c->pix_abs8x8 = pix_abs8x8_mmx2;
+ c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
+ c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
+ c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
+
+ c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
+ c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
+
+ c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
+ c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
+ c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
+
+ c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
+ c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
+
+ c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
+ c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
+ c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
+ c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
} else if (mm_flags & MM_3DNOW) {
- put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
- put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
- put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
- put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
-
- avg_pixels_tab[0][0] = avg_pixels16_3dnow;
- avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
- avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
- avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
-
- put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
- put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
- put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
- put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
-
- avg_pixels_tab[1][0] = avg_pixels8_3dnow;
- avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
- avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
- avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
+ c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
+ c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
+
+ c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
+ c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
+ c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
+
+ c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
+ c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
+
+ c->avg_pixels_tab[1][0] = avg_pixels8_3dnow;
+ c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
+ c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
+ c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
}
}
@@ -624,25 +624,24 @@ void dsputil_init_mmx(void)
/* remove any non bit exact operation (testing purpose). NOTE that
this function should be kept as small as possible because it is
always difficult to test automatically non bit exact cases. */
-void dsputil_set_bit_exact_mmx(void)
+void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask)
{
if (mm_flags & MM_MMX) {
-
/* MMX2 & 3DNOW */
- put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
- put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
- avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
- put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
- put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
- avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
+ c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
if (mm_flags & MM_MMXEXT) {
- pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
- pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
- pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
- pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
- pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
- pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
+ c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
+ c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
+ c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
+ c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
+ c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
+ c->pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
}
}
}
diff --git a/src/libffmpeg/libavcodec/i386/fdct_mmx.c b/src/libffmpeg/libavcodec/i386/fdct_mmx.c
index 19f656afd..8d3552d2a 100644
--- a/src/libffmpeg/libavcodec/i386/fdct_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/fdct_mmx.c
@@ -32,23 +32,23 @@
#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1))
//concatenated table, for forward DCT transformation
-const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = {
+static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = {
13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5
27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5
-21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5
};
-const int16_t cos_4_16[4] = {
+static const int16_t cos_4_16[4] ATTR_ALIGN(8) = {
-19195, -19195, -19195, -19195, //cos * (2<<16) + 0.5
};
-const int16_t ocos_4_16[4] = {
+static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = {
23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5
};
-static const mmx_t fdct_one_corr = {0x0001000100010001LL};
-static volatile mmx_t fdct_r_row = { d:{RND_FRW_ROW, RND_FRW_ROW} };
+static const long long fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL;
+static const long fdct_r_row[2] ATTR_ALIGN(8) = {RND_FRW_ROW, RND_FRW_ROW };
-const int16_t tab_frw_01234567[] ATTR_ALIGN(8) = { // forward_dct coeff table
+static const int16_t tab_frw_01234567[] ATTR_ALIGN(8) = { // forward_dct coeff table
//row0
16384, 16384, 21407, -8867, // w09 w01 w08 w00
16384, 16384, 8867, -21407, // w13 w05 w12 w04
@@ -242,18 +242,18 @@ static inline void fdct_row(const int16_t *in, int16_t *out, const int16_t *tabl
punpckhdq_r2r(mm6, mm6);
movq_m2r(*(table + 20), mm7);
pmaddwd_r2r(mm5, mm1);
- paddd_m2r(fdct_r_row, mm3);
+ paddd_m2r(*fdct_r_row, mm3);
pmaddwd_r2r(mm6, mm7);
pmaddwd_m2r(*(table + 12), mm2);
paddd_r2r(mm4, mm3);
pmaddwd_m2r(*(table + 24), mm5);
pmaddwd_m2r(*(table + 28), mm6);
paddd_r2r(mm7, mm1);
- paddd_m2r(fdct_r_row, mm0);
+ paddd_m2r(*fdct_r_row, mm0);
psrad_i2r(SHIFT_FRW_ROW, mm3);
- paddd_m2r(fdct_r_row, mm1);
+ paddd_m2r(*fdct_r_row, mm1);
paddd_r2r(mm2, mm0);
- paddd_m2r(fdct_r_row, mm5);
+ paddd_m2r(*fdct_r_row, mm5);
psrad_i2r(SHIFT_FRW_ROW, mm1);
paddd_r2r(mm6, mm5);
psrad_i2r(SHIFT_FRW_ROW, mm0);
@@ -269,8 +269,8 @@ static inline void fdct_row(const int16_t *in, int16_t *out, const int16_t *tabl
void ff_fdct_mmx(int16_t *block)
{
- /* XXX: not thread safe */
- static int16_t block_tmp[64] ATTR_ALIGN(8);
+ int64_t align_tmp[16] ATTR_ALIGN(8);
+ int16_t * const block_tmp= (int16_t*)align_tmp;
int16_t *block1, *out;
const int16_t *table;
int i;
diff --git a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c
index 8645fa370..cb7af3e62 100644
--- a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c
@@ -55,7 +55,8 @@ static void dct_unquantize_h263_mmx(MpegEncContext *s,
}
nCoeffs=63;
} else {
- nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+ nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+ level = 0;/* keep gcc quiet */
}
//printf("%d %d ", qmul, qadd);
asm volatile(
@@ -507,22 +508,22 @@ void ff_mmxext_idct(DCTELEM *block);
static void ff_libmpeg2mmx_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
{
ff_mmx_idct (block);
- put_pixels_clamped(block, dest, line_size);
+ put_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_libmpeg2mmx_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
{
ff_mmx_idct (block);
- add_pixels_clamped(block, dest, line_size);
+ add_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_libmpeg2mmx2_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
{
ff_mmxext_idct (block);
- put_pixels_clamped(block, dest, line_size);
+ put_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_libmpeg2mmx2_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
{
ff_mmxext_idct (block);
- add_pixels_clamped(block, dest, line_size);
+ add_pixels_clamped_mmx(block, dest, line_size);
}
void MPV_common_init_mmx(MpegEncContext *s)
diff --git a/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c b/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c
index 0c859862e..9dfd5f149 100644
--- a/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c
@@ -1301,10 +1301,10 @@ void ff_simple_idct_mmx(int16_t *block)
void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, DCTELEM *block)
{
idct(block);
- put_pixels_clamped(block, dest, line_size);
+ put_pixels_clamped_mmx(block, dest, line_size);
}
void ff_simple_idct_add_mmx(UINT8 *dest, int line_size, DCTELEM *block)
{
idct(block);
- add_pixels_clamped(block, dest, line_size);
+ add_pixels_clamped_mmx(block, dest, line_size);
}