summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGuenter Bartsch <guenter@users.sourceforge.net>2001-08-07 23:59:50 +0000
committerGuenter Bartsch <guenter@users.sourceforge.net>2001-08-07 23:59:50 +0000
commit4c0836e8f1504db9bbb329a1351050a8ff2cf469 (patch)
tree5a15df1f39b66be829da3641336d0cac1b846e34 /src
parent116de8566118cae73b42132985c174c372ed6c41 (diff)
downloadxine-lib-4c0836e8f1504db9bbb329a1351050a8ff2cf469.tar.gz
xine-lib-4c0836e8f1504db9bbb329a1351050a8ff2cf469.tar.bz2
latest ffmpeg updates - mmx works\! :-)
CVS patchset: 400 CVS date: 2001/08/07 23:59:50
Diffstat (limited to 'src')
-rw-r--r--src/libffmpeg/config.h3
-rw-r--r--src/libffmpeg/libavcodec/Makefile.am4
-rw-r--r--src/libffmpeg/libavcodec/dsputil.c75
-rw-r--r--src/libffmpeg/libavcodec/dsputil.h12
-rw-r--r--src/libffmpeg/libavcodec/dsputil_mmx.c38
-rw-r--r--src/libffmpeg/libavcodec/fdctref.c36
-rw-r--r--src/libffmpeg/libavcodec/idct_mmx.c592
-rw-r--r--src/libffmpeg/libavcodec/jrevdct.c16
-rw-r--r--src/libffmpeg/libavcodec/mjpeg.c16
-rw-r--r--src/libffmpeg/libavcodec/mpeg12.c30
-rw-r--r--src/libffmpeg/libavcodec/mpeg12data.h16
-rw-r--r--src/libffmpeg/libavcodec/mpeg4data.h23
-rw-r--r--src/libffmpeg/libavcodec/mpegvideo.c9
-rw-r--r--src/libffmpeg/libavcodec/mpegvideo.h9
-rw-r--r--src/video_out/yuv2rgb.c25
-rw-r--r--src/xine-engine/cpu_accel.h2
16 files changed, 795 insertions, 111 deletions
diff --git a/src/libffmpeg/config.h b/src/libffmpeg/config.h
index e560cbebb..674150a19 100644
--- a/src/libffmpeg/config.h
+++ b/src/libffmpeg/config.h
@@ -4,3 +4,6 @@
#include "../../config.h"
+#ifdef ARCH_X86
+#define HAVE_MMX
+#endif
diff --git a/src/libffmpeg/libavcodec/Makefile.am b/src/libffmpeg/libavcodec/Makefile.am
index 046974ac8..c9a19f498 100644
--- a/src/libffmpeg/libavcodec/Makefile.am
+++ b/src/libffmpeg/libavcodec/Makefile.am
@@ -9,8 +9,8 @@ LIBTOOL = $(SHELL) $(top_builddir)/libtool-nofpic
noinst_LTLIBRARIES = libavcodec.la
if HAVE_FFMMX
-#mmx_modules = mpegvideo_mmx.c sad_mmx.s dsputil_mmx.c
-mmx_modules = mpegvideo_mmx.c sad_mmx.s
+mmx_modules = mpegvideo_mmx.c sad_mmx.s dsputil_mmx.c idct_mmx.c
+#mmx_modules = mpegvideo_mmx.c sad_mmx.s
endif
libavcodec_la_SOURCES = dsputil.c fdctref.c jfdctfst.c mpeg12.c \
diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c
index a41f1bef6..b4c06b820 100644
--- a/src/libffmpeg/libavcodec/dsputil.c
+++ b/src/libffmpeg/libavcodec/dsputil.c
@@ -34,6 +34,42 @@ op_pixels_abs_func pix_abs16x16_xy2;
static UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
UINT32 squareTbl[512];
+extern UINT16 default_intra_matrix[64];
+extern UINT16 default_non_intra_matrix[64];
+
+UINT8 zigzag_direct[64] = {
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+UINT8 ff_alternate_horizontal_scan[64] = {
+ 0, 1, 2, 3, 8, 9, 16, 17,
+ 10, 11, 4, 5, 6, 7, 15, 14,
+ 13, 12, 19, 18, 24, 25, 32, 33,
+ 26, 27, 20, 21, 22, 23, 28, 29,
+ 30, 31, 34, 35, 40, 41, 48, 49,
+ 42, 43, 36, 37, 38, 39, 44, 45,
+ 46, 47, 50, 51, 56, 57, 58, 59,
+ 52, 53, 54, 55, 60, 61, 62, 63,
+};
+
+UINT8 ff_alternate_vertical_scan[64] = {
+ 0, 8, 16, 24, 1, 9, 2, 10,
+ 17, 25, 32, 40, 48, 56, 57, 49,
+ 41, 33, 26, 18, 3, 11, 4, 12,
+ 19, 27, 34, 42, 50, 58, 35, 43,
+ 51, 59, 20, 28, 5, 13, 6, 14,
+ 21, 29, 36, 44, 52, 60, 37, 45,
+ 53, 61, 22, 30, 7, 15, 23, 31,
+ 38, 46, 54, 62, 39, 47, 55, 63,
+};
+
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
{
DCTELEM *p;
@@ -350,10 +386,34 @@ int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
return s;
}
-void dsputil_init(void)
+/* permute block according so that it corresponds to the MMX idct
+ order */
+void block_permute(INT16 *block)
{
+ int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
int i;
+ for(i=0;i<8;i++) {
+ tmp1 = block[1];
+ tmp2 = block[2];
+ tmp3 = block[3];
+ tmp4 = block[4];
+ tmp5 = block[5];
+ tmp6 = block[6];
+ block[1] = tmp2;
+ block[2] = tmp4;
+ block[3] = tmp6;
+ block[4] = tmp1;
+ block[5] = tmp3;
+ block[6] = tmp5;
+ block += 8;
+ }
+}
+
+void dsputil_init(void)
+{
+ int i, j;
+
for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
for(i=0;i<MAX_NEG_CROP;i++) {
cropTbl[i] = 0;
@@ -375,7 +435,20 @@ void dsputil_init(void)
pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
av_fdct = jpeg_fdct_ifast;
+ /* permute for IDCT */
+ for(i=0;i<64;i++) {
+ j = zigzag_direct[i];
+ zigzag_direct[i] = block_permute_op(j);
+ j = ff_alternate_horizontal_scan[i];
+ ff_alternate_horizontal_scan[i] = block_permute_op(j);
+ j = ff_alternate_vertical_scan[i];
+ ff_alternate_vertical_scan[i] = block_permute_op(j);
+ }
+ block_permute(default_intra_matrix);
+ block_permute(default_non_intra_matrix);
+
#ifdef HAVE_MMX
+ printf ("ffmpeg: init mmx\n");
dsputil_init_mmx();
#endif
}
diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h
index ffbc395ba..ebb4d8446 100644
--- a/src/libffmpeg/libavcodec/dsputil.h
+++ b/src/libffmpeg/libavcodec/dsputil.h
@@ -15,6 +15,11 @@ void fdct_mmx(DCTELEM *block);
void (*av_fdct)(DCTELEM *block);
+/* encoding scans */
+extern UINT8 ff_alternate_horizontal_scan[64];
+extern UINT8 ff_alternate_vertical_scan[64];
+extern UINT8 zigzag_direct[64];
+
/* pixel operations */
#define MAX_NEG_CROP 384
@@ -62,6 +67,13 @@ int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h);
int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h);
int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h);
+static inline int block_permute_op(int j)
+{
+ return (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
+}
+
+void block_permute(INT16 *block);
+
#ifdef HAVE_MMX
#define MM_MMX 0x0001 /* standard MMX */
diff --git a/src/libffmpeg/libavcodec/dsputil_mmx.c b/src/libffmpeg/libavcodec/dsputil_mmx.c
index ddb91f54c..a4d40eb72 100644
--- a/src/libffmpeg/libavcodec/dsputil_mmx.c
+++ b/src/libffmpeg/libavcodec/dsputil_mmx.c
@@ -30,15 +30,9 @@ int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
-#ifdef USE_MMX_IDCT
-/* external functions, defined in libmpeg2 */
-void mmx_idct(DCTELEM *block);
-void mmxext_idct(DCTELEM *block);
-/* this should be in dsputil.h? -- A'rpi */
-extern UINT8 ff_alternate_horizontal_scan[64];
-extern UINT8 ff_alternate_vertical_scan[64];
-extern UINT8 zigzag_direct[64];
-#endif
+/* external functions, from idct_mmx.c */
+void ff_mmx_idct(DCTELEM *block);
+void ff_mmxext_idct(DCTELEM *block);
/* pixel operations */
static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001;
@@ -996,8 +990,7 @@ void dsputil_init_mmx(void)
pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
- //av_fdct = fdct_mmx;
- av_fdct = 0;
+ av_fdct = fdct_mmx;
put_pixels_tab[0] = put_pixels_mmx;
put_pixels_tab[1] = put_pixels_x2_mmx;
@@ -1052,22 +1045,11 @@ void dsputil_init_mmx(void)
sub_pixels_tab[2] = sub_pixels_y2_3dnow;
}
-#ifdef USE_MMX_IDCT
- /* use MMX / MMXEXT iDCT code from libmpeg2 */
- //printf("LIBAVCODEC: Using MMX%s iDCT code\n",(mm_flags & MM_MMXEXT)?"EXT":"");
- ff_idct = (mm_flags & MM_MMXEXT) ? mmxext_idct : mmx_idct;
- /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
- { int i,j;
- for (i = 0; i < 64; i++) {
- j = zigzag_direct[i];
- zigzag_direct[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
- j = ff_alternate_horizontal_scan[i];
- ff_alternate_horizontal_scan[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
- j = ff_alternate_vertical_scan[i];
- ff_alternate_vertical_scan[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
- }
- }
-#endif
-
+ /* idct */
+ if (mm_flags & MM_MMXEXT) {
+ ff_idct = ff_mmxext_idct;
+ } else {
+ ff_idct = ff_mmx_idct;
+ }
}
}
diff --git a/src/libffmpeg/libavcodec/fdctref.c b/src/libffmpeg/libavcodec/fdctref.c
index b90a2e52e..245492496 100644
--- a/src/libffmpeg/libavcodec/fdctref.c
+++ b/src/libffmpeg/libavcodec/fdctref.c
@@ -116,3 +116,39 @@ short *block;
*/
}
}
+
+/* perform IDCT matrix multiply for 8x8 coefficient block */
+
+void idct(block)
+short *block;
+{
+ int i, j, k, v;
+ double partial_product;
+ double tmp[64];
+
+ for (i=0; i<8; i++)
+ for (j=0; j<8; j++)
+ {
+ partial_product = 0.0;
+
+ for (k=0; k<8; k++)
+ partial_product+= c[k][j]*block[8*i+k];
+
+ tmp[8*i+j] = partial_product;
+ }
+
+ /* Transpose operation is integrated into address mapping by switching
+ loop order of i and j */
+
+ for (j=0; j<8; j++)
+ for (i=0; i<8; i++)
+ {
+ partial_product = 0.0;
+
+ for (k=0; k<8; k++)
+ partial_product+= c[k][i]*tmp[8*k+j];
+
+ v = (int) floor(partial_product+0.5);
+ block[8*i+j] = v;
+ }
+}
diff --git a/src/libffmpeg/libavcodec/idct_mmx.c b/src/libffmpeg/libavcodec/idct_mmx.c
new file mode 100644
index 000000000..d004481b1
--- /dev/null
+++ b/src/libffmpeg/libavcodec/idct_mmx.c
@@ -0,0 +1,592 @@
+/*
+ * Note: For libavcodec, this code can also be used under the LGPL license
+ */
+/*
+ * idct_mmx.c
+ * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <inttypes.h>
+
+#include "config.h"
+
+#include "cpu_accel.h"
+
+#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))
+
+#define ROW_SHIFT 11
+#define COL_SHIFT 6
+
+#define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT)))
+#define rounder(bias) {round (bias), round (bias)}
+
+#if 0
+/* C row IDCT - its just here to document the MMXEXT and MMX versions */
+static inline void idct_row (int16_t * row, int offset,
+ int16_t * table, int32_t * rounder)
+{
+ int C1, C2, C3, C4, C5, C6, C7;
+ int a0, a1, a2, a3, b0, b1, b2, b3;
+
+ row += offset;
+
+ C1 = table[1];
+ C2 = table[2];
+ C3 = table[3];
+ C4 = table[4];
+ C5 = table[5];
+ C6 = table[6];
+ C7 = table[7];
+
+ a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder;
+ a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder;
+ a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder;
+ a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder;
+
+ b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
+ b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
+ b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
+ b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
+
+ row[0] = (a0 + b0) >> ROW_SHIFT;
+ row[1] = (a1 + b1) >> ROW_SHIFT;
+ row[2] = (a2 + b2) >> ROW_SHIFT;
+ row[3] = (a3 + b3) >> ROW_SHIFT;
+ row[4] = (a3 - b3) >> ROW_SHIFT;
+ row[5] = (a2 - b2) >> ROW_SHIFT;
+ row[6] = (a1 - b1) >> ROW_SHIFT;
+ row[7] = (a0 - b0) >> ROW_SHIFT;
+}
+#endif
+
+
+/* MMXEXT row IDCT */
+
+#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
+ c4, c6, c4, c6, \
+ c1, c3, -c1, -c5, \
+ c5, c7, c3, -c7, \
+ c4, -c6, c4, -c6, \
+ -c4, c2, c4, -c2, \
+ c5, -c1, c3, -c1, \
+ c7, c3, c7, -c5 }
+
+static inline void mmxext_row_head (int16_t * row, int offset, int16_t * table)
+{
+ movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
+
+ movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
+ movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
+
+ movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
+ movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
+
+ movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
+ pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
+
+ pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
+}
+
+static inline void mmxext_row (int16_t * table, int32_t * rounder)
+{
+ movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1
+ pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6
+
+ pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2
+ pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5
+
+ movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5
+ pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3
+
+ paddd_m2r (*rounder, mm3); // mm3 += rounder
+ pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7
+
+ pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6
+ paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
+
+ pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3
+ movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
+
+ pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7
+ paddd_r2r (mm7, mm1); // mm1 = b1 b0
+
+ paddd_m2r (*rounder, mm0); // mm0 += rounder
+ psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
+
+ psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
+ paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
+
+ paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
+ psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
+
+ paddd_r2r (mm6, mm5); // mm5 = b3 b2
+ movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder
+
+ paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
+ psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder
+}
+
+static inline void mmxext_row_tail (int16_t * row, int store)
+{
+ psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
+
+ psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
+
+ packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
+
+ packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
+
+ movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
+ pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
+
+ /* slot */
+
+ movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
+}
+
+static inline void mmxext_row_mid (int16_t * row, int store,
+ int offset, int16_t * table)
+{
+ movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
+ psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
+
+ movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
+ psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
+
+ packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
+ movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
+
+ packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
+ movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
+
+ movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
+ pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
+
+ movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
+ movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
+
+ pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
+
+ movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
+ pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
+}
+
+
+/* MMX row IDCT */
+
+#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
+ c4, c6, -c4, -c2, \
+ c1, c3, c3, -c7, \
+ c5, c7, -c1, -c5, \
+ c4, -c6, c4, -c2, \
+ -c4, c2, c4, -c6, \
+ c5, -c1, c7, -c5, \
+ c7, c3, c3, -c1 }
+
+static inline void mmx_row_head (int16_t * row, int offset, int16_t * table)
+{
+ movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
+
+ movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
+ movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
+
+ movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
+ movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
+
+ punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
+
+ movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
+ pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
+
+ movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
+ punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
+}
+
+static inline void mmx_row (int16_t * table, int32_t * rounder)
+{
+ pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
+ punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1
+
+ pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2
+ punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5
+
+ movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5
+ pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3
+
+ paddd_m2r (*rounder, mm3); // mm3 += rounder
+ pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7
+
+ pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6
+ paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
+
+ pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3
+ movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
+
+ pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7
+ paddd_r2r (mm7, mm1); // mm1 = b1 b0
+
+ paddd_m2r (*rounder, mm0); // mm0 += rounder
+ psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
+
+ psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
+ paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
+
+ paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
+ psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
+
+ paddd_r2r (mm6, mm5); // mm5 = b3 b2
+ movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder
+
+ paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
+ psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder
+}
+
+static inline void mmx_row_tail (int16_t * row, int store)
+{
+ psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
+
+ psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
+
+ packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
+
+ packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
+
+ movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
+ movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5
+
+ pslld_i2r (16, mm7); // mm7 = y7 0 y5 0
+
+ psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4
+
+ por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4
+
+ /* slot */
+
+ movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
+}
+
+static inline void mmx_row_mid (int16_t * row, int store,
+ int offset, int16_t * table)
+{
+ movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
+ psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
+
+ movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
+ psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
+
+ packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
+ movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
+
+ packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
+ movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
+
+ movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
+ movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5
+
+ punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
+ psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4
+
+ movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
+ pslld_i2r (16, mm1); // mm1 = y7 0 y5 0
+
+ movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
+ por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4
+
+ movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
+ punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
+
+ movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
+ pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
+}
+
+
+#if 0
+// C column IDCT - its just here to document the MMXEXT and MMX versions
+static inline void idct_col (int16_t * col, int offset)
+{
+/* multiplication - as implemented on mmx */
+#define F(c,x) (((c) * (x)) >> 16)
+
+/* saturation - it helps us handle torture test cases */
+#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x))
+
+ int16_t x0, x1, x2, x3, x4, x5, x6, x7;
+ int16_t y0, y1, y2, y3, y4, y5, y6, y7;
+ int16_t a0, a1, a2, a3, b0, b1, b2, b3;
+ int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12;
+
+ col += offset;
+
+ x0 = col[0*8];
+ x1 = col[1*8];
+ x2 = col[2*8];
+ x3 = col[3*8];
+ x4 = col[4*8];
+ x5 = col[5*8];
+ x6 = col[6*8];
+ x7 = col[7*8];
+
+ u04 = S (x0 + x4);
+ v04 = S (x0 - x4);
+ u26 = S (F (T2, x6) + x2);
+ v26 = S (F (T2, x2) - x6);
+
+ a0 = S (u04 + u26);
+ a1 = S (v04 + v26);
+ a2 = S (v04 - v26);
+ a3 = S (u04 - u26);
+
+ u17 = S (F (T1, x7) + x1);
+ v17 = S (F (T1, x1) - x7);
+ u35 = S (F (T3, x5) + x3);
+ v35 = S (F (T3, x3) - x5);
+
+ b0 = S (u17 + u35);
+ b3 = S (v17 - v35);
+ u12 = S (u17 - u35);
+ v12 = S (v17 + v35);
+ u12 = S (2 * F (C4, u12));
+ v12 = S (2 * F (C4, v12));
+ b1 = S (u12 + v12);
+ b2 = S (u12 - v12);
+
+ y0 = S (a0 + b0) >> COL_SHIFT;
+ y1 = S (a1 + b1) >> COL_SHIFT;
+ y2 = S (a2 + b2) >> COL_SHIFT;
+ y3 = S (a3 + b3) >> COL_SHIFT;
+
+ y4 = S (a3 - b3) >> COL_SHIFT;
+ y5 = S (a2 - b2) >> COL_SHIFT;
+ y6 = S (a1 - b1) >> COL_SHIFT;
+ y7 = S (a0 - b0) >> COL_SHIFT;
+
+ col[0*8] = y0;
+ col[1*8] = y1;
+ col[2*8] = y2;
+ col[3*8] = y3;
+ col[4*8] = y4;
+ col[5*8] = y5;
+ col[6*8] = y6;
+ col[7*8] = y7;
+}
+#endif
+
+
+// MMX column IDCT
+static inline void idct_col (int16_t * col, int offset)
+{
+#define T1 13036
+#define T2 27146
+#define T3 43790
+#define C4 23170
+
+ static short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
+ static short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
+ static short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
+ static short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
+
+ /* column code adapted from peter gubanov */
+ /* http://www.elecard.com/peter/idct.shtml */
+
+ movq_m2r (*_T1, mm0); // mm0 = T1
+
+ movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1
+ movq_r2r (mm0, mm2); // mm2 = T1
+
+ movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7
+ pmulhw_r2r (mm1, mm0); // mm0 = T1*x1
+
+ movq_m2r (*_T3, mm5); // mm5 = T3
+ pmulhw_r2r (mm4, mm2); // mm2 = T1*x7
+
+ movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5
+ movq_r2r (mm5, mm7); // mm7 = T3-1
+
+ movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3
+ psubsw_r2r (mm4, mm0); // mm0 = v17
+
+ movq_m2r (*_T2, mm4); // mm4 = T2
+ pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3
+
+ paddsw_r2r (mm2, mm1); // mm1 = u17
+ pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5
+
+ /* slot */
+
+ movq_r2r (mm4, mm2); // mm2 = T2
+ paddsw_r2r (mm3, mm5); // mm5 = T3*x3
+
+ pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2
+ paddsw_r2r (mm6, mm7); // mm7 = T3*x5
+
+ psubsw_r2r (mm6, mm5); // mm5 = v35
+ paddsw_r2r (mm3, mm7); // mm7 = u35
+
+ movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6
+ movq_r2r (mm0, mm6); // mm6 = v17
+
+ pmulhw_r2r (mm3, mm2); // mm2 = T2*x6
+ psubsw_r2r (mm5, mm0); // mm0 = b3
+
+ psubsw_r2r (mm3, mm4); // mm4 = v26
+ paddsw_r2r (mm6, mm5); // mm5 = v12
+
+ movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0
+ movq_r2r (mm1, mm6); // mm6 = u17
+
+ paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26
+ paddsw_r2r (mm7, mm6); // mm6 = b0
+
+ psubsw_r2r (mm7, mm1); // mm1 = u12
+ movq_r2r (mm1, mm7); // mm7 = u12
+
+ movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0
+ paddsw_r2r (mm5, mm1); // mm1 = u12+v12
+
+ movq_m2r (*_C4, mm0); // mm0 = C4/2
+ psubsw_r2r (mm5, mm7); // mm7 = u12-v12
+
+ movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1
+ pmulhw_r2r (mm0, mm1); // mm1 = b1/2
+
+ movq_r2r (mm4, mm6); // mm6 = v26
+ pmulhw_r2r (mm0, mm7); // mm7 = b2/2
+
+ movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4
+ movq_r2r (mm3, mm0); // mm0 = x0
+
+ psubsw_r2r (mm5, mm3); // mm3 = v04
+ paddsw_r2r (mm5, mm0); // mm0 = u04
+
+ paddsw_r2r (mm3, mm4); // mm4 = a1
+ movq_r2r (mm0, mm5); // mm5 = u04
+
+ psubsw_r2r (mm6, mm3); // mm3 = a2
+ paddsw_r2r (mm2, mm5); // mm5 = a0
+
+ paddsw_r2r (mm1, mm1); // mm1 = b1
+ psubsw_r2r (mm2, mm0); // mm0 = a3
+
+ paddsw_r2r (mm7, mm7); // mm7 = b2
+ movq_r2r (mm3, mm2); // mm2 = a2
+
+ movq_r2r (mm4, mm6); // mm6 = a1
+ paddsw_r2r (mm7, mm3); // mm3 = a2+b2
+
+ psraw_i2r (COL_SHIFT, mm3); // mm3 = y2
+ paddsw_r2r (mm1, mm4); // mm4 = a1+b1
+
+ psraw_i2r (COL_SHIFT, mm4); // mm4 = y1
+ psubsw_r2r (mm1, mm6); // mm6 = a1-b1
+
+ movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0
+ psubsw_r2r (mm7, mm2); // mm2 = a2-b2
+
+ psraw_i2r (COL_SHIFT, mm6); // mm6 = y6
+ movq_r2r (mm5, mm7); // mm7 = a0
+
+ movq_r2m (mm4, *(col+offset+1*8)); // save y1
+ psraw_i2r (COL_SHIFT, mm2); // mm2 = y5
+
+ movq_r2m (mm3, *(col+offset+2*8)); // save y2
+ paddsw_r2r (mm1, mm5); // mm5 = a0+b0
+
+ movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3
+ psubsw_r2r (mm1, mm7); // mm7 = a0-b0
+
+ psraw_i2r (COL_SHIFT, mm5); // mm5 = y0
+ movq_r2r (mm0, mm3); // mm3 = a3
+
+ movq_r2m (mm2, *(col+offset+5*8)); // save y5
+ psubsw_r2r (mm4, mm3); // mm3 = a3-b3
+
+ psraw_i2r (COL_SHIFT, mm7); // mm7 = y7
+ paddsw_r2r (mm0, mm4); // mm4 = a3+b3
+
+ movq_r2m (mm5, *(col+offset+0*8)); // save y0
+ psraw_i2r (COL_SHIFT, mm3); // mm3 = y4
+
+ movq_r2m (mm6, *(col+offset+6*8)); // save y6
+ psraw_i2r (COL_SHIFT, mm4); // mm4 = y3
+
+ movq_r2m (mm7, *(col+offset+7*8)); // save y7
+
+ movq_r2m (mm3, *(col+offset+4*8)); // save y4
+
+ movq_r2m (mm4, *(col+offset+3*8)); // save y3
+}
+
+
+static int32_t rounder0[] ATTR_ALIGN(8) =
+ rounder ((1 << (COL_SHIFT - 1)) - 0.5);
+static int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
+static int32_t rounder1[] ATTR_ALIGN(8) =
+ rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
+static int32_t rounder7[] ATTR_ALIGN(8) =
+ rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
+static int32_t rounder2[] ATTR_ALIGN(8) =
+ rounder (0.60355339059); /* C2 * (C6+C2)/2 */
+static int32_t rounder6[] ATTR_ALIGN(8) =
+ rounder (-0.25); /* C2 * (C6-C2)/2 */
+static int32_t rounder3[] ATTR_ALIGN(8) =
+ rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
+static int32_t rounder5[] ATTR_ALIGN(8) =
+ rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
+
+
+#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
+void idct (int16_t * block) \
+{ \
+ static int16_t table04[] ATTR_ALIGN(16) = \
+ table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
+ static int16_t table17[] ATTR_ALIGN(16) = \
+ table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
+ static int16_t table26[] ATTR_ALIGN(16) = \
+ table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
+ static int16_t table35[] ATTR_ALIGN(16) = \
+ table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
+ \
+ idct_row_head (block, 0*8, table04); \
+ idct_row (table04, rounder0); \
+ idct_row_mid (block, 0*8, 4*8, table04); \
+ idct_row (table04, rounder4); \
+ idct_row_mid (block, 4*8, 1*8, table17); \
+ idct_row (table17, rounder1); \
+ idct_row_mid (block, 1*8, 7*8, table17); \
+ idct_row (table17, rounder7); \
+ idct_row_mid (block, 7*8, 2*8, table26); \
+ idct_row (table26, rounder2); \
+ idct_row_mid (block, 2*8, 6*8, table26); \
+ idct_row (table26, rounder6); \
+ idct_row_mid (block, 6*8, 3*8, table35); \
+ idct_row (table35, rounder3); \
+ idct_row_mid (block, 3*8, 5*8, table35); \
+ idct_row (table35, rounder5); \
+ idct_row_tail (block, 5*8); \
+ \
+ idct_col (block, 0); \
+ idct_col (block, 4); \
+}
+
+
+declare_idct (ff_mmxext_idct, mmxext_table,
+ mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
+
+declare_idct (ff_mmx_idct, mmx_table,
+ mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
diff --git a/src/libffmpeg/libavcodec/jrevdct.c b/src/libffmpeg/libavcodec/jrevdct.c
index 2ef40f38e..246f1b190 100644
--- a/src/libffmpeg/libavcodec/jrevdct.c
+++ b/src/libffmpeg/libavcodec/jrevdct.c
@@ -197,16 +197,18 @@ void j_rev_dct(DCTBLOCK data)
register int *idataptr = (int*)dataptr;
+ /* WARNING: we do the same permutation as MMX idct to simplify the
+ video core */
d0 = dataptr[0];
- d1 = dataptr[1];
- d2 = dataptr[2];
- d3 = dataptr[3];
- d4 = dataptr[4];
- d5 = dataptr[5];
- d6 = dataptr[6];
+ d2 = dataptr[1];
+ d4 = dataptr[2];
+ d6 = dataptr[3];
+ d1 = dataptr[4];
+ d3 = dataptr[5];
+ d5 = dataptr[6];
d7 = dataptr[7];
- if ((d1 == 0) && (idataptr[1] | idataptr[2] | idataptr[3]) == 0) {
+ if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) {
/* AC terms all zero */
if (d0) {
/* Compute a 32 bit value to assign. */
diff --git a/src/libffmpeg/libavcodec/mjpeg.c b/src/libffmpeg/libavcodec/mjpeg.c
index e119df66f..df7415b81 100644
--- a/src/libffmpeg/libavcodec/mjpeg.c
+++ b/src/libffmpeg/libavcodec/mjpeg.c
@@ -220,7 +220,7 @@ static int put_huffman_table(MpegEncContext *s, int table_class, int table_id,
static void jpeg_table_header(MpegEncContext *s)
{
PutBitContext *p = &s->pb;
- int i, size;
+ int i, j, size;
UINT8 *ptr;
/* quant matrixes */
@@ -229,13 +229,15 @@ static void jpeg_table_header(MpegEncContext *s)
put_bits(p, 4, 0); /* 8 bit precision */
put_bits(p, 4, 0); /* table 0 */
for(i=0;i<64;i++) {
- put_bits(p, 8, s->intra_matrix[i]);
+ j = zigzag_direct[i];
+ put_bits(p, 8, s->intra_matrix[j]);
}
#if 0
put_bits(p, 4, 0); /* 8 bit precision */
put_bits(p, 4, 1); /* table 1 */
for(i=0;i<64;i++) {
- put_bits(p, 8, s->chroma_intra_matrix[i]);
+ j = zigzag_direct[i];
+ put_bits(p, 8, s->chroma_intra_matrix[j]);
}
#endif
@@ -489,7 +491,7 @@ static int mjpeg_decode_init(AVCodecContext *avctx)
static int mjpeg_decode_dqt(MJpegDecodeContext *s,
UINT8 *buf, int buf_size)
{
- int len, index, i;
+ int len, index, i, j;
init_get_bits(&s->gb, buf, buf_size);
len = get_bits(&s->gb, 16);
@@ -504,8 +506,10 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s,
return -1;
dprintf("index=%d\n", index);
/* read quant table */
- for(i=0;i<64;i++)
- s->quant_matrixes[index][i] = get_bits(&s->gb, 8);
+ for(i=0;i<64;i++) {
+ j = zigzag_direct[i];
+ s->quant_matrixes[index][j] = get_bits(&s->gb, 8);
+ }
len -= 65;
}
return 0;
diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c
index 182c341e4..381fafadd 100644
--- a/src/libffmpeg/libavcodec/mpeg12.c
+++ b/src/libffmpeg/libavcodec/mpeg12.c
@@ -1172,32 +1172,36 @@ static void mpeg_decode_sequence_extension(MpegEncContext *s)
static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
{
- int i, v;
+ int i, v, j;
if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8);
- s->intra_matrix[i] = v;
- s->chroma_intra_matrix[i] = v;
+ j = block_permute_op(i);
+ s->intra_matrix[j] = v;
+ s->chroma_intra_matrix[j] = v;
}
}
if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8);
- s->non_intra_matrix[i] = v;
- s->chroma_non_intra_matrix[i] = v;
+ j = block_permute_op(i);
+ s->non_intra_matrix[j] = v;
+ s->chroma_non_intra_matrix[j] = v;
}
}
if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8);
- s->chroma_intra_matrix[i] = v;
+ j = block_permute_op(i);
+ s->chroma_intra_matrix[j] = v;
}
}
if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8);
- s->chroma_non_intra_matrix[i] = v;
+ j = block_permute_op(i);
+ s->chroma_non_intra_matrix[j] = v;
}
}
}
@@ -1345,7 +1349,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
{
Mpeg1Context *s1 = avctx->priv_data;
MpegEncContext *s = &s1->mpeg_enc_ctx;
- int width, height, i, v;
+ int width, height, i, v, j;
init_get_bits(&s->gb, buf, buf_size);
@@ -1389,8 +1393,9 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8);
- s->intra_matrix[i] = v;
- s->chroma_intra_matrix[i] = v;
+ j = block_permute_op(i);
+ s->intra_matrix[j] = v;
+ s->chroma_intra_matrix[j] = v;
}
} else {
for(i=0;i<64;i++) {
@@ -1402,8 +1407,9 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8);
- s->non_intra_matrix[i] = v;
- s->chroma_non_intra_matrix[i] = v;
+ j = block_permute_op(i);
+ s->non_intra_matrix[j] = v;
+ s->chroma_non_intra_matrix[j] = v;
}
} else {
for(i=0;i<64;i++) {
diff --git a/src/libffmpeg/libavcodec/mpeg12data.h b/src/libffmpeg/libavcodec/mpeg12data.h
index f397c4a17..4f6a95b79 100644
--- a/src/libffmpeg/libavcodec/mpeg12data.h
+++ b/src/libffmpeg/libavcodec/mpeg12data.h
@@ -2,7 +2,7 @@
* MPEG1/2 tables
*/
-const UINT8 default_intra_matrix[64] = {
+INT16 default_intra_matrix[64] = {
8, 16, 19, 22, 26, 27, 29, 34,
16, 16, 22, 24, 27, 29, 34, 37,
19, 22, 26, 27, 29, 34, 34, 38,
@@ -13,7 +13,7 @@ const UINT8 default_intra_matrix[64] = {
27, 29, 35, 38, 46, 56, 69, 83
};
-const UINT8 default_non_intra_matrix[64] = {
+INT16 default_non_intra_matrix[64] = {
16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16,
@@ -331,18 +331,6 @@ static const UINT8 mbMotionVectorTable[17][2] = {
{ 0xc, 10 },
};
-//const
-UINT8 zigzag_direct[64] = {
- 0, 1, 8, 16, 9, 2, 3, 10,
- 17, 24, 32, 25, 18, 11, 4, 5,
- 12, 19, 26, 33, 40, 48, 41, 34,
- 27, 20, 13, 6, 7, 14, 21, 28,
- 35, 42, 49, 56, 57, 50, 43, 36,
- 29, 22, 15, 23, 30, 37, 44, 51,
- 58, 59, 52, 45, 38, 31, 39, 46,
- 53, 60, 61, 54, 47, 55, 62, 63
-};
-
static const int frame_rate_tab[9] = {
0,
(int)(23.976 * FRAME_RATE_BASE),
diff --git a/src/libffmpeg/libavcodec/mpeg4data.h b/src/libffmpeg/libavcodec/mpeg4data.h
index 54b93d97e..3821a591c 100644
--- a/src/libffmpeg/libavcodec/mpeg4data.h
+++ b/src/libffmpeg/libavcodec/mpeg4data.h
@@ -81,26 +81,3 @@ static RLTable rl_intra = {
intra_run,
intra_level,
};
-
-/* alternate scan orders used when doing AC prediction */
-UINT8 ff_alternate_horizontal_scan[64] = {
- 0, 1, 2, 3, 8, 9, 16, 17,
- 10, 11, 4, 5, 6, 7, 15, 14,
- 13, 12, 19, 18, 24, 25, 32, 33,
- 26, 27, 20, 21, 22, 23, 28, 29,
- 30, 31, 34, 35, 40, 41, 48, 49,
- 42, 43, 36, 37, 38, 39, 44, 45,
- 46, 47, 50, 51, 56, 57, 58, 59,
- 52, 53, 54, 55, 60, 61, 62, 63,
-};
-
-UINT8 ff_alternate_vertical_scan[64] = {
- 0, 8, 16, 24, 1, 9, 2, 10,
- 17, 25, 32, 40, 48, 56, 57, 49,
- 41, 33, 26, 18, 3, 11, 4, 12,
- 19, 27, 34, 42, 50, 58, 35, 43,
- 51, 59, 20, 28, 5, 13, 6, 14,
- 21, 29, 36, 44, 52, 60, 37, 45,
- 53, 61, 22, 30, 7, 15, 23, 31,
- 38, 46, 54, 62, 39, 47, 55, 63,
-};
diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c
index 8a5745a8f..a2b1cb61e 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.c
+++ b/src/libffmpeg/libavcodec/mpegvideo.c
@@ -28,11 +28,6 @@
#include "fastmemcpy.h"
#endif
-/* FIXME */
-#ifdef ARCH_X86
-#define HAVE_MMX
-#endif
-
static void encode_picture(MpegEncContext *s, int picture_number);
static void rate_control_init(MpegEncContext *s);
static int rate_estimate_qscale(MpegEncContext *s);
@@ -1073,6 +1068,10 @@ static int dct_quantize_mmx(MpegEncContext *s,
const int *qmat;
av_fdct (block);
+
+ /* we need this permutation so that we correct the IDCT
+ permutation. will be moved into DCT code */
+ block_permute(block);
if (s->mb_intra) {
if (n < 4)
diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h
index 9f9307393..a225dedbe 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.h
+++ b/src/libffmpeg/libavcodec/mpegvideo.h
@@ -179,9 +179,6 @@ typedef struct MpegEncContext {
DCTELEM *block, int n, int qscale);
} MpegEncContext;
-//const
-extern UINT8 zigzag_direct[64];
-
int MPV_common_init(MpegEncContext *s);
void MPV_common_end(MpegEncContext *s);
void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
@@ -198,8 +195,8 @@ int estimate_motion(MpegEncContext *s,
int *mx_ptr, int *my_ptr);
/* mpeg12.c */
-extern const UINT8 default_intra_matrix[64];
-extern const UINT8 default_non_intra_matrix[64];
+extern INT16 default_intra_matrix[64];
+extern INT16 default_non_intra_matrix[64];
void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number);
void mpeg1_encode_mb(MpegEncContext *s,
@@ -257,8 +254,6 @@ int intel_h263_decode_picture_header(MpegEncContext *s);
int h263_decode_mb(MpegEncContext *s,
DCTELEM block[6][64]);
int h263_get_picture_format(int width, int height);
-extern UINT8 ff_alternate_horizontal_scan[64];
-extern UINT8 ff_alternate_vertical_scan[64];
/* rv10.c */
void rv10_encode_picture_header(MpegEncContext *s, int picture_number);
diff --git a/src/video_out/yuv2rgb.c b/src/video_out/yuv2rgb.c
index c4ad9d43d..d92b62839 100644
--- a/src/video_out/yuv2rgb.c
+++ b/src/video_out/yuv2rgb.c
@@ -22,7 +22,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
- * $Id: yuv2rgb.c,v 1.10 2001/07/30 19:37:18 guenter Exp $
+ * $Id: yuv2rgb.c,v 1.11 2001/08/07 23:59:50 guenter Exp $
*/
#include "config.h"
@@ -95,9 +95,24 @@ int yuv2rgb_setup (yuv2rgb_t *this,
this->v_buffer = this->v_chunk = NULL;
}
- if ((source_width == dest_width) && (source_height == dest_height))
+ if ((source_width == dest_width) && (source_height == dest_height)) {
this->do_scale = 0;
- else {
+
+ /*
+ * space for two y-lines (for yuv2rgb_mlib)
+ * u,v subsampled 2:1
+ */
+ this->y_buffer = my_malloc_aligned (16, 2*dest_width, &this->y_chunk);
+ if (!this->y_buffer)
+ return 0;
+ this->u_buffer = my_malloc_aligned (16, (dest_width+1)/2, &this->u_chunk);
+ if (!this->u_buffer)
+ return 0;
+ this->v_buffer = my_malloc_aligned (16, (dest_width+1)/2, &this->v_chunk);
+ if (!this->v_buffer)
+ return 0;
+
+ } else {
this->do_scale = 1;
this->step_dx = source_width * 32768 / dest_width;
@@ -1227,8 +1242,8 @@ yuv2rgb_t *yuv2rgb_init (int mode) {
this->matrix_coefficients = 6;
this->y_chunk = this->y_buffer = NULL;
- this->y_chunk = this->u_buffer = NULL;
- this->y_chunk = this->v_buffer = NULL;
+ this->u_chunk = this->u_buffer = NULL;
+ this->v_chunk = this->v_buffer = NULL;
yuv2rgb_setup_tables(this, mode);
diff --git a/src/xine-engine/cpu_accel.h b/src/xine-engine/cpu_accel.h
index ca2713ff4..498b219fb 100644
--- a/src/xine-engine/cpu_accel.h
+++ b/src/xine-engine/cpu_accel.h
@@ -51,7 +51,7 @@ extern "C" {
#define MM_SSE2 0x00000000
uint32_t mm_accel (void) ;
-uint32_t mm_support (void) ;
+/* uint32_t mm_support (void) ; */
#ifdef ARCH_X86