summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/libffmpeg/libavcodec/Makefile.am3
-rw-r--r--src/libffmpeg/libavcodec/alpha/asm.h141
-rw-r--r--src/libffmpeg/libavcodec/alpha/dsputil_alpha.c223
-rw-r--r--src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c88
-rw-r--r--src/libffmpeg/libavcodec/alpha/pixops.h135
-rw-r--r--src/libffmpeg/libavcodec/armv4l/dsputil_arm.c20
-rw-r--r--src/libffmpeg/libavcodec/avcodec.h293
-rw-r--r--src/libffmpeg/libavcodec/common.c59
-rw-r--r--src/libffmpeg/libavcodec/common.h119
-rw-r--r--src/libffmpeg/libavcodec/dsputil.c468
-rw-r--r--src/libffmpeg/libavcodec/dsputil.h44
-rw-r--r--src/libffmpeg/libavcodec/h263.c1883
-rw-r--r--src/libffmpeg/libavcodec/h263data.h101
-rw-r--r--src/libffmpeg/libavcodec/h263dec.c297
-rw-r--r--src/libffmpeg/libavcodec/i386/dsputil_mmx.c1104
-rw-r--r--src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h558
-rw-r--r--src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h305
-rw-r--r--src/libffmpeg/libavcodec/i386/fdct_mmx.c4
-rw-r--r--src/libffmpeg/libavcodec/i386/idct_mmx.c8
-rw-r--r--src/libffmpeg/libavcodec/i386/motion_est_mmx.c27
-rw-r--r--src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c339
-rw-r--r--src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c292
-rw-r--r--src/libffmpeg/libavcodec/i386/simple_idct_mmx.c2096
-rw-r--r--src/libffmpeg/libavcodec/imgconvert.c30
-rw-r--r--src/libffmpeg/libavcodec/imgresample.c38
-rw-r--r--src/libffmpeg/libavcodec/jfdctfst.c9
-rw-r--r--src/libffmpeg/libavcodec/jrevdct.c3
-rw-r--r--src/libffmpeg/libavcodec/mjpeg.c489
-rw-r--r--src/libffmpeg/libavcodec/mlib/dsputil_mlib.c22
-rw-r--r--src/libffmpeg/libavcodec/motion_est.c1459
-rw-r--r--src/libffmpeg/libavcodec/mpeg12.c109
-rw-r--r--src/libffmpeg/libavcodec/mpeg4data.h33
-rw-r--r--src/libffmpeg/libavcodec/mpegvideo.c2119
-rw-r--r--src/libffmpeg/libavcodec/mpegvideo.h305
-rw-r--r--src/libffmpeg/libavcodec/msmpeg4.c496
-rw-r--r--src/libffmpeg/libavcodec/msmpeg4data.h51
-rw-r--r--src/libffmpeg/libavcodec/ratecontrol.c402
-rw-r--r--src/libffmpeg/libavcodec/rv10.c61
-rw-r--r--src/libffmpeg/libavcodec/simple_idct.c639
-rw-r--r--src/libffmpeg/libavcodec/simple_idct.h37
-rw-r--r--src/libffmpeg/libavcodec/utils.c151
41 files changed, 9784 insertions, 5276 deletions
diff --git a/src/libffmpeg/libavcodec/Makefile.am b/src/libffmpeg/libavcodec/Makefile.am
index f05501807..0fcae49fb 100644
--- a/src/libffmpeg/libavcodec/Makefile.am
+++ b/src/libffmpeg/libavcodec/Makefile.am
@@ -16,7 +16,8 @@ noinst_LTLIBRARIES = libavcodec.la
libavcodec_la_SOURCES = common.c utils.c mpegvideo.c h263.c jrevdct.c jfdctfst.c \
mjpeg.c dsputil.c \
motion_est.c imgconvert.c msmpeg4.c \
- mpeg12.c h263dec.c rv10.c simple_idct.c
+ mpeg12.c h263dec.c rv10.c simple_idct.c \
+ ratecontrol.c
#imgresample.c
libavcodec_la_LDFLAGS = \
diff --git a/src/libffmpeg/libavcodec/alpha/asm.h b/src/libffmpeg/libavcodec/alpha/asm.h
new file mode 100644
index 000000000..0f4685f11
--- /dev/null
+++ b/src/libffmpeg/libavcodec/alpha/asm.h
@@ -0,0 +1,141 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef LIBAVCODEC_ALPHA_ASM_H
+#define LIBAVCODEC_ALPHA_ASM_H
+
+#include <stdint.h>
+
+#define AMASK_BWX (1 << 0)
+#define AMASK_FIX (1 << 1)
+#define AMASK_MVI (1 << 8)
+
+static inline uint64_t BYTE_VEC(uint64_t x)
+{
+ x |= x << 8;
+ x |= x << 16;
+ x |= x << 32;
+ return x;
+}
+static inline uint64_t WORD_VEC(uint64_t x)
+{
+ x |= x << 16;
+ x |= x << 32;
+ return x;
+}
+
+static inline int32_t ldl(const void* p)
+{
+ return *(const int32_t*) p;
+}
+static inline uint64_t ldq(const void* p)
+{
+ return *(const uint64_t*) p;
+}
+/* FIXME ccc doesn't seem to get it? Use inline asm? */
+static inline uint64_t ldq_u(const void* p)
+{
+ return *(const uint64_t*) ((uintptr_t) p & ~7ul);
+}
+static inline void stl(uint32_t l, void* p)
+{
+ *(uint32_t*) p = l;
+}
+static inline void stq(uint64_t l, void* p)
+{
+ *(uint64_t*) p = l;
+}
+
+#ifdef __GNUC__
+#define OPCODE1(name) \
+static inline uint64_t name(uint64_t l) \
+{ \
+ uint64_t r; \
+ asm (#name " %1, %0" : "=r" (r) : "r" (l)); \
+ return r; \
+}
+
+#define OPCODE2(name) \
+static inline uint64_t name(uint64_t l1, uint64_t l2) \
+{ \
+ uint64_t r; \
+ asm (#name " %1, %2, %0" : "=r" (r) : "r" (l1), "rI" (l2)); \
+ return r; \
+}
+
+/* We don't want gcc to move this around or combine it with another
+ rpcc, so mark it volatile. */
+static inline uint64_t rpcc(void)
+{
+ uint64_t r;
+ asm volatile ("rpcc %0" : "=r" (r));
+ return r;
+}
+
+static inline uint64_t uldq(const void* v)
+{
+ struct foo {
+ unsigned long l;
+ } __attribute__((packed));
+
+ return ((const struct foo*) v)->l;
+}
+
+#elif defined(__DECC) /* Compaq "ccc" compiler */
+
+#include <c_asm.h>
+#define OPCODE1(name) \
+static inline uint64_t name(uint64_t l) \
+{ \
+ return asm (#name " %a0, %v0", l); \
+}
+
+#define OPCODE2(name) \
+static inline uint64_t name(uint64_t l1, uint64_t l2) \
+{ \
+ return asm (#name " %a0, %a1, %v0", l1, l2); \
+}
+
+static inline uint64_t rpcc(void)
+{
+ return asm ("rpcc %v0");
+}
+
+static inline uint64_t uldq(const void* v)
+{
+ return *(const __unaligned uint64_t *) v;
+}
+
+#endif
+
+OPCODE1(amask);
+OPCODE1(unpkbw);
+OPCODE1(pkwb);
+OPCODE2(extql);
+OPCODE2(extqh);
+OPCODE2(zap);
+OPCODE2(cmpbge);
+OPCODE2(minsw4);
+OPCODE2(minuw4);
+OPCODE2(minub8);
+OPCODE2(maxsw4);
+OPCODE2(maxuw4);
+OPCODE2(perr);
+
+#endif /* LIBAVCODEC_ALPHA_ASM_H */
diff --git a/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c
new file mode 100644
index 000000000..3a54904f4
--- /dev/null
+++ b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c
@@ -0,0 +1,223 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "asm.h"
+#include "../dsputil.h"
+
+void simple_idct_axp(DCTELEM *block);
+
+static void put_pixels_clamped_axp(const DCTELEM *block, UINT8 *pixels,
+ int line_size)
+{
+ int i = 8;
+ do {
+ UINT64 shorts;
+
+ shorts = ldq(block);
+ shorts = maxsw4(shorts, 0);
+ shorts = minsw4(shorts, WORD_VEC(0x00ff));
+ stl(pkwb(shorts), pixels);
+
+ shorts = ldq(block + 4);
+ shorts = maxsw4(shorts, 0);
+ shorts = minsw4(shorts, WORD_VEC(0x00ff));
+ stl(pkwb(shorts), pixels + 4);
+
+ pixels += line_size;
+ block += 8;
+ } while (--i);
+}
+
+static void add_pixels_clamped_axp(const DCTELEM *block, UINT8 *pixels,
+ int line_size)
+{
+ int i = 8;
+ do {
+ UINT64 shorts;
+
+ shorts = ldq(block);
+ shorts &= ~WORD_VEC(0x8000); /* clear highest bit to avoid overflow */
+ shorts += unpkbw(ldl(pixels));
+ shorts &= ~WORD_VEC(0x8000); /* hibit would be set for e. g. -2 + 3 */
+ shorts = minuw4(shorts, WORD_VEC(0x4000)); /* set neg. to 0x4000 */
+ shorts &= ~WORD_VEC(0x4000); /* ...and zap them */
+ shorts = minsw4(shorts, WORD_VEC(0x00ff)); /* clamp to 255 */
+ stl(pkwb(shorts), pixels);
+
+ /* next 4 */
+ shorts = ldq(block + 4);
+ shorts &= ~WORD_VEC(0x8000);
+ shorts += unpkbw(ldl(pixels + 4));
+ shorts &= ~WORD_VEC(0x8000);
+ shorts = minuw4(shorts, WORD_VEC(0x4000));
+ shorts &= ~WORD_VEC(0x4000);
+ shorts = minsw4(shorts, WORD_VEC(0x00ff));
+ stl(pkwb(shorts), pixels + 4);
+
+ pixels += line_size;
+ block += 8;
+ } while (--i);
+}
+
+/* Average 8 unsigned bytes in parallel: (b1 + b2) >> 1
+ Since the immediate result could be greater than 255, we do the
+ shift first. The result is too low by one if the bytes were both
+ odd, so we need to add (l1 & l2) & BYTE_VEC(0x01). */
+static inline UINT64 avg2_no_rnd(UINT64 l1, UINT64 l2)
+{
+ UINT64 correction = (l1 & l2) & BYTE_VEC(0x01);
+ l1 = (l1 & ~BYTE_VEC(0x01)) >> 1;
+ l2 = (l2 & ~BYTE_VEC(0x01)) >> 1;
+ return l1 + l2 + correction;
+}
+
+/* Average 8 bytes with rounding: (b1 + b2 + 1) >> 1
+ The '1' only has an effect when one byte is even and the other odd,
+ i. e. we also need to add (l1 ^ l2) & BYTE_VEC(0x01).
+ Incidentally, that is equivalent to (l1 | l2) & BYTE_VEC(0x01). */
+static inline UINT64 avg2(UINT64 l1, UINT64 l2)
+{
+ UINT64 correction = (l1 | l2) & BYTE_VEC(0x01);
+ l1 = (l1 & ~BYTE_VEC(0x01)) >> 1;
+ l2 = (l2 & ~BYTE_VEC(0x01)) >> 1;
+ return l1 + l2 + correction;
+}
+
+static inline UINT64 avg4(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4)
+{
+ UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l2 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l3 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l4 & ~BYTE_VEC(0x03)) >> 2);
+ UINT64 r2 = (( (l1 & BYTE_VEC(0x03))
+ + (l2 & BYTE_VEC(0x03))
+ + (l3 & BYTE_VEC(0x03))
+ + (l4 & BYTE_VEC(0x03))
+ + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
+ return r1 + r2;
+}
+
+static inline UINT64 avg4_no_rnd(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4)
+{
+ UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l2 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l3 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l4 & ~BYTE_VEC(0x03)) >> 2);
+ UINT64 r2 = (( (l1 & BYTE_VEC(0x03))
+ + (l2 & BYTE_VEC(0x03))
+ + (l3 & BYTE_VEC(0x03))
+ + (l4 & BYTE_VEC(0x03))
+ + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03);
+ return r1 + r2;
+}
+
+#define PIXOPNAME(suffix) put ## suffix
+#define BTYPE UINT8
+#define AVG2 avg2
+#define AVG4 avg4
+#define STORE(l, b) stq(l, b)
+#include "pixops.h"
+#undef PIXOPNAME
+#undef BTYPE
+#undef AVG2
+#undef AVG4
+#undef STORE
+
+#define PIXOPNAME(suffix) put_no_rnd ## suffix
+#define BTYPE UINT8
+#define AVG2 avg2_no_rnd
+#define AVG4 avg4_no_rnd
+#define STORE(l, b) stq(l, b)
+#include "pixops.h"
+#undef PIXOPNAME
+#undef BTYPE
+#undef AVG2
+#undef AVG4
+#undef STORE
+
+/* The following functions are untested. */
+#if 0
+
+#define PIXOPNAME(suffix) avg ## suffix
+#define BTYPE UINT8
+#define AVG2 avg2
+#define AVG4 avg4
+#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
+#include "pixops.h"
+#undef PIXOPNAME
+#undef BTYPE
+#undef AVG2
+#undef AVG4
+#undef STORE
+
+#define PIXOPNAME(suffix) avg_no_rnd ## suffix
+#define BTYPE UINT8
+#define AVG2 avg2_no_rnd
+#define AVG4 avg4_no_rnd
+#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
+#include "pixops.h"
+#undef PIXOPNAME
+#undef BTYPE
+#undef AVG2
+#undef AVG4
+#undef STORE
+
+#define PIXOPNAME(suffix) sub ## suffix
+#define BTYPE DCTELEM
+#define AVG2 avg2
+#define AVG4 avg4
+#define STORE(l, block) do { \
+ UINT64 xxx = l; \
+ (block)[0] -= (xxx >> 0) & 0xff; \
+ (block)[1] -= (xxx >> 8) & 0xff; \
+ (block)[2] -= (xxx >> 16) & 0xff; \
+ (block)[3] -= (xxx >> 24) & 0xff; \
+ (block)[4] -= (xxx >> 32) & 0xff; \
+ (block)[5] -= (xxx >> 40) & 0xff; \
+ (block)[6] -= (xxx >> 48) & 0xff; \
+ (block)[7] -= (xxx >> 56) & 0xff; \
+} while (0)
+#include "pixops.h"
+#undef PIXOPNAME
+#undef BTYPE
+#undef AVG2
+#undef AVG4
+#undef STORE
+
+#endif
+
+void dsputil_init_alpha(void)
+{
+ put_pixels_tab[0] = put_pixels_axp;
+ put_pixels_tab[1] = put_pixels_x2_axp;
+ put_pixels_tab[2] = put_pixels_y2_axp;
+ put_pixels_tab[3] = put_pixels_xy2_axp;
+
+ put_no_rnd_pixels_tab[0] = put_pixels_axp;
+ put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_axp;
+ put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_axp;
+ put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_axp;
+
+ /* amask clears all bits that correspond to present features. */
+ if (amask(AMASK_MVI) == 0) {
+ fprintf(stderr, "MVI extension detected\n");
+ put_pixels_clamped = put_pixels_clamped_axp;
+ add_pixels_clamped = add_pixels_clamped_axp;
+ }
+}
diff --git a/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c b/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c
new file mode 100644
index 000000000..d0af5e1d3
--- /dev/null
+++ b/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c
@@ -0,0 +1,88 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "asm.h"
+#include "../dsputil.h"
+#include "../mpegvideo.h"
+
+extern UINT8 zigzag_end[64];
+
+static void dct_unquantize_h263_axp(MpegEncContext *s,
+ DCTELEM *block, int n, int qscale)
+{
+ int i, level;
+ UINT64 qmul, qadd;
+ if (s->mb_intra) {
+ if (n < 4)
+ block[0] = block[0] * s->y_dc_scale;
+ else
+ block[0] = block[0] * s->c_dc_scale;
+ /* Catch up to aligned point. */
+ qmul = s->qscale << 1;
+ qadd = (s->qscale - 1) | 1;
+ for (i = 1; i < 4; ++i) {
+ level = block[i];
+ if (level) {
+ if (level < 0) {
+ level = level * qmul - qadd;
+ } else {
+ level = level * qmul + qadd;
+ }
+ block[i] = level;
+ }
+ }
+ block += 4;
+ i = 60 / 4;
+ } else {
+ i = zigzag_end[s->block_last_index[n]] / 4;
+ }
+ qmul = s->qscale << 1;
+ qadd = WORD_VEC((qscale - 1) | 1);
+ do {
+ UINT64 levels, negmask, zeromask, corr;
+ levels = ldq(block);
+ if (levels == 0)
+ continue;
+ zeromask = cmpbge(0, levels);
+ zeromask &= zeromask >> 1;
+ /* Negate all negative words. */
+ negmask = maxsw4(levels, WORD_VEC(0xffff)); /* negative -> ffff (-1) */
+ negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */
+ corr = negmask & WORD_VEC(0x0001); /* twos-complement correction */
+ levels ^= negmask;
+ levels += corr;
+
+ levels = levels * qmul;
+ levels += zap(qadd, zeromask);
+
+ /* Re-negate negative words. */
+ levels -= corr;
+ levels ^= negmask;
+
+ stq(levels, block);
+ } while (block += 4, --i);
+}
+
+void MPV_common_init_axp(MpegEncContext *s)
+{
+ if (amask(AMASK_MVI) == 0) {
+ if (s->out_format == FMT_H263)
+ s->dct_unquantize = dct_unquantize_h263_axp;
+ }
+}
diff --git a/src/libffmpeg/libavcodec/alpha/pixops.h b/src/libffmpeg/libavcodec/alpha/pixops.h
new file mode 100644
index 000000000..118d7ae23
--- /dev/null
+++ b/src/libffmpeg/libavcodec/alpha/pixops.h
@@ -0,0 +1,135 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* This file is intended to be #included with proper definitions of
+ * PIXOPNAME, BTYPE, AVG2, AVG4 and STORE. */
+
+static void PIXOPNAME(_pixels_axp)(BTYPE *block, const UINT8 *pixels,
+ int line_size, int h)
+{
+ if ((size_t) pixels & 0x7) {
+ do {
+ STORE(uldq(pixels), block);
+ pixels += line_size;
+ block += line_size;
+ } while (--h);
+ } else {
+ do {
+ STORE(ldq(pixels), block);
+ pixels += line_size;
+ block += line_size;
+ } while (--h);
+ }
+}
+
+static void PIXOPNAME(_pixels_x2_axp)(BTYPE *block, const UINT8 *pixels,
+ int line_size, int h)
+{
+ if ((size_t) pixels & 0x7) {
+ do {
+ UINT64 pix1, pix2;
+
+ pix1 = uldq(pixels);
+ pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
+ STORE(AVG2(pix1, pix2), block);
+ pixels += line_size;
+ block += line_size;
+ } while (--h);
+ } else {
+ do {
+ UINT64 pix1, pix2;
+
+ pix1 = ldq(pixels);
+ pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
+ STORE(AVG2(pix1, pix2), block);
+ pixels += line_size;
+ block += line_size;
+ } while (--h);
+ }
+}
+
+static void PIXOPNAME(_pixels_y2_axp)(BTYPE *block, const UINT8 *pixels,
+ int line_size, int h)
+{
+ if ((size_t) pixels & 0x7) {
+ UINT64 pix = uldq(pixels);
+ do {
+ UINT64 next_pix;
+
+ pixels += line_size;
+ next_pix = uldq(pixels);
+ STORE(AVG2(pix, next_pix), block);
+ block += line_size;
+ pix = next_pix;
+ } while (--h);
+ } else {
+ UINT64 pix = ldq(pixels);
+ do {
+ UINT64 next_pix;
+
+ pixels += line_size;
+ next_pix = ldq(pixels);
+ STORE(AVG2(pix, next_pix), block);
+ block += line_size;
+ pix = next_pix;
+ } while (--h);
+ }
+}
+
+/* This could be further sped up by recycling AVG4 intermediate
+ results from the previous loop pass. */
+static void PIXOPNAME(_pixels_xy2_axp)(BTYPE *block, const UINT8 *pixels,
+ int line_size, int h)
+{
+ if ((size_t) pixels & 0x7) {
+ UINT64 pix1 = uldq(pixels);
+ UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
+
+ do {
+ UINT64 next_pix1, next_pix2;
+
+ pixels += line_size;
+ next_pix1 = uldq(pixels);
+ next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56);
+
+ STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block);
+
+ block += line_size;
+ pix1 = next_pix1;
+ pix2 = next_pix2;
+ } while (--h);
+ } else {
+ UINT64 pix1 = ldq(pixels);
+ UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
+
+ do {
+ UINT64 next_pix1, next_pix2;
+
+ pixels += line_size;
+ next_pix1 = ldq(pixels);
+ next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56);
+
+ STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block);
+
+ block += line_size;
+ pix1 = next_pix1;
+ pix2 = next_pix2;
+ } while (--h);
+ }
+}
diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c b/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c
index 1cf7b4fba..cd362ca48 100644
--- a/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c
+++ b/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c
@@ -2,19 +2,19 @@
* ARMv4L optimized DSP utils
* Copyright (c) 2001 Lionel Ulmer.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "../dsputil.h"
diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h
index 05b27d8c2..68b67154d 100644
--- a/src/libffmpeg/libavcodec/avcodec.h
+++ b/src/libffmpeg/libavcodec/avcodec.h
@@ -3,6 +3,11 @@
#include "common.h"
+#define LIBAVCODEC_VERSION_INT 0x000406
+#define LIBAVCODEC_VERSION "0.4.6"
+#define LIBAVCODEC_BUILD 4614
+#define LIBAVCODEC_BUILD_STR "4614"
+
enum CodecID {
CODEC_ID_NONE,
CODEC_ID_MPEG1VIDEO,
@@ -17,18 +22,31 @@ enum CodecID {
CODEC_ID_MSMPEG4V1,
CODEC_ID_MSMPEG4V2,
CODEC_ID_MSMPEG4V3,
+ CODEC_ID_WMV1,
+ CODEC_ID_WMV2,
CODEC_ID_H263P,
CODEC_ID_H263I,
+ /* various pcm "codecs" */
+ CODEC_ID_PCM_S16LE,
+ CODEC_ID_PCM_S16BE,
+ CODEC_ID_PCM_U16LE,
+ CODEC_ID_PCM_U16BE,
+ CODEC_ID_PCM_S8,
+ CODEC_ID_PCM_U8,
+ CODEC_ID_PCM_MULAW,
+ CODEC_ID_PCM_ALAW,
};
#define CODEC_ID_MSMPEG4 CODEC_ID_MSMPEG4V3
enum CodecType {
+ CODEC_TYPE_UNKNOWN = -1,
CODEC_TYPE_VIDEO,
CODEC_TYPE_AUDIO,
};
enum PixelFormat {
+ PIX_FMT_ANY = -1,
PIX_FMT_YUV420P,
PIX_FMT_YUV422,
PIX_FMT_RGB24,
@@ -45,14 +63,24 @@ enum SampleFormat {
/* in bytes */
#define AVCODEC_MAX_AUDIO_FRAME_SIZE 18432
-/* motion estimation type */
+/* motion estimation type, EPZS by default */
+enum Motion_Est_ID {
+ ME_ZERO = 1,
+ ME_FULL,
+ ME_LOG,
+ ME_PHODS,
+ ME_EPZS,
+ ME_X1
+};
+
+/* only for ME compatiblity with old apps */
extern int motion_estimation_method;
-#define ME_ZERO 0
-#define ME_FULL 1
-#define ME_LOG 2
-#define ME_PHODS 3
-#define ME_EPZS 4
-#define ME_X1 5
+
+/* ME algos sorted by quality */
+static const int Motion_Est_QTab[] = { ME_ZERO, ME_PHODS, ME_LOG,
+ ME_X1, ME_EPZS, ME_FULL };
+
+#define FF_MAX_B_FRAMES 4
/* encoding support */
/* note not everything is supported yet */
@@ -60,10 +88,17 @@ extern int motion_estimation_method;
#define CODEC_FLAG_HQ 0x0001 /* high quality (non real time) encoding */
#define CODEC_FLAG_QSCALE 0x0002 /* use fixed qscale */
#define CODEC_FLAG_4MV 0x0004 /* 4 MV per MB allowed */
-#define CODEC_FLAG_B 0x0008 /* use B frames */
#define CODEC_FLAG_QPEL 0x0010 /* use qpel MC */
#define CODEC_FLAG_GMC 0x0020 /* use GMC */
#define CODEC_FLAG_TYPE 0x0040 /* fixed I/P frame type, from avctx->key_frame */
+#define CODEC_FLAG_PART 0x0080 /* use data partitioning */
+/* parent program gurantees that the input for b-frame containing streams is not written to
+ for at least s->max_b_frames+1 frames, if this is not set than the input will be copied */
+#define CODEC_FLAG_INPUT_PRESERVED 0x0100
+#define CODEC_FLAG_PASS1 0x0200 /* use internal 2pass ratecontrol in first pass mode */
+#define CODEC_FLAG_PASS2 0x0400 /* use internal 2pass ratecontrol in second pass mode */
+#define CODEC_FLAG_EXTERN_HUFF 0x1000 /* use external huffman table (for mjpeg) */
+#define CODEC_FLAG_GRAY 0x2000 /* only decode/encode grayscale */
/* codec capabilities */
@@ -78,6 +113,15 @@ typedef struct AVCodecContext {
int flags;
int sub_id; /* some codecs needs additionnal format info. It is
stored there */
+
+ int me_method; /* ME algorithm used for video coding */
+
+ /* extra data from parent application to codec, e.g. huffman table
+ for mjpeg */
+ /* the parent should allocate and free this buffer */
+ void *extradata;
+ int extradata_size;
+
/* video only */
int frame_rate; /* frames per sec multiplied by FRAME_RATE_BASE */
int width, height;
@@ -88,8 +132,10 @@ typedef struct AVCodecContext {
#define FF_ASPECT_16_9_625 4
#define FF_ASPECT_16_9_525 5
int gop_size; /* 0 = intra only */
- int pix_fmt; /* pixel format, see PIX_FMT_xxx */
-
+ enum PixelFormat pix_fmt; /* pixel format, see PIX_FMT_xxx */
+ int repeat_pict; /* when decoding, this signal how much the picture */
+ /* must be delayed. */
+ /* extra_delay = (repeat_pict / 2) * (1/fps) */
/* if non NULL, 'draw_horiz_band' is called by the libavcodec
decoder to draw an horizontal band. It improve cache usage. Not
all codecs can do that. You must check the codec capabilities
@@ -104,23 +150,48 @@ typedef struct AVCodecContext {
int sample_fmt; /* sample format, currenly unused */
/* the following data should not be initialized */
- int frame_size; /* in samples, initialized when calling 'init' */
- int frame_number; /* audio or video frame number */
- int key_frame; /* true if the previous compressed frame was
- a key frame (intra, or seekable) */
+ int frame_size; /* in samples, initialized when calling 'init' */
+ int frame_number; /* audio or video frame number */
+ int real_pict_num; /* returns the real picture number of
+ previous encoded frame */
+ int key_frame; /* true if the previous compressed frame was
+ a key frame (intra, or seekable) */
+ int pict_type; /* picture type of the previous
+ encoded frame */
+/* FIXME: these should have FF_ */
+#define I_TYPE 1 // Intra
+#define P_TYPE 2 // Predicted
+#define B_TYPE 3 // Bi-dir predicted
+#define S_TYPE 4 // S(GMC)-VOP MPEG4
+
+ int delay; /* number of frames the decoded output
+ will be delayed relative to the encoded input */
+ uint8_t *mbskip_table; /* =1 if MB didnt change, is only valid for I/P frames
+ stride= mb_width = (width+15)>>4 */
+
+ /* encoding parameters */
int quality; /* quality of the previous encoded frame
- (between 1 (good) and 31 (bad)) */
+ (between 1 (good) and 31 (bad))
+ this is allso used to set the quality in vbr mode
+ and the per frame quality in CODEC_FLAG_TYPE (second pass mode) */
float qcompress; /* amount of qscale change between easy & hard scenes (0.0-1.0)*/
float qblur; /* amount of qscale smoothing over time (0.0-1.0) */
int qmin; /* min qscale */
int qmax; /* max qscale */
int max_qdiff; /* max qscale difference between frames */
+ int max_b_frames; /* maximum b frames, the output will be delayed by max_b_frames+1 relative to the input */
+ float b_quant_factor;/* qscale factor between ips and b frames */
+ int rc_strategy;
+ int b_frame_strategy;
+
+ int hurry_up; /* when set to 1 during decoding, b frames will be skiped
+ when set to 2 idct/dequant will be skipped too */
struct AVCodec *codec;
void *priv_data;
/* The following data is for RTP friendly coding */
- /* By now only H.263/H.263+ coder honours this */
+ /* By now only H.263/H.263+/MPEG4 coder honours this */
int rtp_mode; /* 1 for activate RTP friendly-mode */
/* highers numbers represent more error-prone */
/* enviroments, by now just "1" exist */
@@ -145,7 +216,7 @@ typedef struct AVCodecContext {
float psnr_y;
float psnr_cb;
float psnr_cr;
-
+
/* statistics, used for 2-pass encoding */
int mv_bits;
int header_bits;
@@ -156,13 +227,57 @@ typedef struct AVCodecContext {
int skip_count;
int misc_bits; // cbp, mb_type
int frame_bits;
-
+
/* the following fields are ignored */
void *opaque; /* can be used to carry app specific stuff */
char codec_name[32];
- int codec_type; /* see CODEC_TYPE_xxx */
- int codec_id; /* see CODEC_ID_xxx */
+ enum CodecType codec_type; /* see CODEC_TYPE_xxx */
+ enum CodecID codec_id; /* see CODEC_ID_xxx */
unsigned int codec_tag; /* codec tag, only used if unknown codec */
+
+ int workaround_bugs; /* workaround bugs in encoders which cannot be detected automatically */
+ int luma_elim_threshold;
+ int chroma_elim_threshold;
+ int strict_std_compliance; /* strictly follow the std (MPEG4, ...) */
+ float b_quant_offset;/* qscale offset between ips and b frames, not implemented yet */
+ int error_resilience;
+
+#ifndef MBC
+#define MBC 128
+#define MBR 96
+#endif
+ int *quant_store; /* field for communicating with external postprocessing */
+ unsigned qstride;
+ //FIXME this should be reordered after kabis API is finished ...
+ /*
+ Note: Below are located reserved fields for further usage
+ It requires for ABI !!!
+ If you'll perform some changes then borrow new space from these fields
+ (void * can be safety replaced with struct * ;)
+ P L E A S E ! ! !
+ IMPORTANT: Never change order of already declared fields!!!
+ */
+ unsigned long long int
+ ull_res0,ull_res1,ull_res2,ull_res3,ull_res4,ull_res5,
+ ull_res6,ull_res7,ull_res8,ull_res9,ull_res10,ull_res11,ull_res12;
+ float
+ flt_res0,flt_res1,flt_res2,flt_res3,flt_res4,flt_res5,
+ flt_res6,flt_res7,flt_res8,flt_res9,flt_res10,flt_res11;
+ void
+ *ptr_res0,*ptr_res1,*ptr_res2,*ptr_res3,*ptr_res4,*ptr_res5,
+ *ptr_res6,*ptr_res7,*ptr_res8,*ptr_res9,*ptr_res10,*ptr_res11;
+ unsigned long int
+ ul_res0,ul_res1,ul_res2,ul_res3,ul_res4,ul_res5,
+ ul_res6,ul_res7,ul_res8,ul_res9,ul_res10,ul_res11,ul_res12;
+ unsigned int
+ ui_res0,ui_res1,ui_res2,ui_res3,ui_res4,ui_res5,
+ ui_res6;
+ unsigned short int
+ us_res0,us_res1,us_res2,us_res3,us_res4,us_res5,
+ us_res6,us_res7,us_res8,us_res9,us_res10,us_res11,us_res12;
+ unsigned char
+ uc_res0,uc_res1,uc_res2,uc_res3,uc_res4,uc_res5,
+ uc_res6,uc_res7,uc_res8,uc_res9,uc_res10,uc_res11,uc_res12;
} AVCodecContext;
typedef struct AVCodec {
@@ -177,6 +292,23 @@ typedef struct AVCodec {
UINT8 *buf, int buf_size);
int capabilities;
struct AVCodec *next;
+ /*
+ Note: Below are located reserved fields for further usage
+ It requires for ABI !!!
+ If you'll perform some changes then borrow new space from these fields
+ (void * can be safety replaced with struct * ;)
+ P L E A S E ! ! !
+ IMPORTANT: Never change order of already declared fields!!!
+ */
+ unsigned long long int
+ ull_res0,ull_res1,ull_res2,ull_res3,ull_res4,ull_res5,
+ ull_res6,ull_res7,ull_res8,ull_res9,ull_res10,ull_res11,ull_res12;
+ float
+ flt_res0,flt_res1,flt_res2,flt_res3,flt_res4,flt_res5,
+ flt_res6,flt_res7,flt_res8,flt_res9,flt_res10,flt_res11,flt_res12;
+ void
+ *ptr_res0,*ptr_res1,*ptr_res2,*ptr_res3,*ptr_res4,*ptr_res5,
+ *ptr_res6,*ptr_res7,*ptr_res8,*ptr_res9,*ptr_res10,*ptr_res11,*ptr_res12;
} AVCodec;
/* three components are given, that's all */
@@ -185,15 +317,47 @@ typedef struct AVPicture {
int linesize[3];
} AVPicture;
+extern AVCodec ac3_encoder;
+extern AVCodec mp2_encoder;
+extern AVCodec mp3lame_encoder;
+extern AVCodec mpeg1video_encoder;
+extern AVCodec h263_encoder;
+extern AVCodec h263p_encoder;
+extern AVCodec rv10_encoder;
+extern AVCodec mjpeg_encoder;
+extern AVCodec mpeg4_encoder;
+extern AVCodec msmpeg4v1_encoder;
+extern AVCodec msmpeg4v2_encoder;
+extern AVCodec msmpeg4v3_encoder;
+
extern AVCodec h263_decoder;
extern AVCodec mpeg4_decoder;
extern AVCodec msmpeg4v1_decoder;
extern AVCodec msmpeg4v2_decoder;
extern AVCodec msmpeg4v3_decoder;
+extern AVCodec wmv1_decoder;
extern AVCodec mpeg_decoder;
extern AVCodec h263i_decoder;
extern AVCodec rv10_decoder;
extern AVCodec mjpeg_decoder;
+extern AVCodec mp2_decoder;
+extern AVCodec mp3_decoder;
+
+/* pcm codecs */
+#define PCM_CODEC(id, name) \
+extern AVCodec name ## _decoder; \
+extern AVCodec name ## _encoder;
+
+PCM_CODEC(CODEC_ID_PCM_S16LE, pcm_s16le);
+PCM_CODEC(CODEC_ID_PCM_S16BE, pcm_s16be);
+PCM_CODEC(CODEC_ID_PCM_U16LE, pcm_u16le);
+PCM_CODEC(CODEC_ID_PCM_U16BE, pcm_u16be);
+PCM_CODEC(CODEC_ID_PCM_S8, pcm_s8);
+PCM_CODEC(CODEC_ID_PCM_U8, pcm_u8);
+PCM_CODEC(CODEC_ID_PCM_ALAW, pcm_alaw);
+PCM_CODEC(CODEC_ID_PCM_MULAW, pcm_mulaw);
+
+#undef PCM_CODEC
/* dummy raw video codec */
extern AVCodec rawvideo_codec;
@@ -242,8 +406,14 @@ int avpicture_deinterlace(AVPicture *dst, AVPicture *src,
extern AVCodec *first_avcodec;
+/* returns LIBAVCODEC_VERSION_INT constant */
+unsigned avcodec_version(void);
+/* returns LIBAVCODEC_BUILD constant */
+unsigned avcodec_build(void);
void avcodec_init(void);
+void avcodec_set_bit_exact(void);
+
void register_avcodec(AVCodec *format);
AVCodec *avcodec_find_encoder(enum CodecID id);
AVCodec *avcodec_find_encoder_by_name(const char *name);
@@ -267,12 +437,87 @@ int avcodec_close(AVCodecContext *avctx);
void avcodec_register_all(void);
+void avcodec_flush_buffers(AVCodecContext *avctx);
+
#ifdef FF_POSTPROCESS
-#ifndef MBC
-#define MBC 48
-#define MBR 36
-#endif
extern int quant_store[MBR+1][MBC+1]; // [Review]
#endif
+
+/**
+ * Interface for 0.5.0 version
+ *
+ * do not even think about it's usage for this moment
+ */
+
+typedef struct {
+ // compressed size used from given memory buffer
+ int size;
+ /// I/P/B frame type
+ int frame_type;
+} avc_enc_result_t;
+
+/**
+ * Commands
+ * order can't be changed - once it was defined
+ */
+typedef enum {
+ // general commands
+ AVC_OPEN_BY_NAME = 0xACA000,
+ AVC_OPEN_BY_CODEC_ID,
+ AVC_OPEN_BY_FOURCC,
+ AVC_CLOSE,
+
+ AVC_FLUSH,
+ // pin - struct { uint8_t* src, uint_t src_size }
+ // pout - struct { AVPicture* img, consumed_bytes,
+ AVC_DECODE,
+ // pin - struct { AVPicture* img, uint8_t* dest, uint_t dest_size }
+ // pout - uint_t used_from_dest_size
+ AVC_ENCODE,
+
+ // query/get video commands
+ AVC_GET_VERSION = 0xACB000,
+ AVC_GET_WIDTH,
+ AVC_GET_HEIGHT,
+ AVC_GET_DELAY,
+ AVC_GET_QUANT_TABLE,
+ // ...
+
+ // query/get audio commands
+ AVC_GET_FRAME_SIZE = 0xABC000,
+
+ // maybe define some simple structure which
+ // might be passed to the user - but they can't
+ // contain any codec specific parts and these
+ // calls are usualy necessary only few times
+
+ // set video commands
+ AVC_SET_WIDTH = 0xACD000,
+ AVC_SET_HEIGHT,
+
+ // set video encoding commands
+ AVC_SET_FRAME_RATE = 0xACD800,
+ AVC_SET_QUALITY,
+ AVC_SET_HURRY_UP,
+
+ // set audio commands
+ AVC_SET_SAMPLE_RATE = 0xACE000,
+ AVC_SET_CHANNELS,
+
+} avc_cmd_t;
+
+/**
+ * \param handle allocated private structure by libavcodec
+ * for initialization pass NULL - will be returned pout
+ * user is supposed to know nothing about its structure
+ * \param cmd type of operation to be performed
+ * \param pint input parameter
+ * \param pout output parameter
+ *
+ * \returns command status - eventually for query command it might return
+ * integer resulting value
+ */
+int avcodec(void* handle, avc_cmd_t cmd, void* pin, void* pout);
+
#endif /* AVCODEC_H */
diff --git a/src/libffmpeg/libavcodec/common.c b/src/libffmpeg/libavcodec/common.c
index f7fe2e1d1..571de1afc 100644
--- a/src/libffmpeg/libavcodec/common.c
+++ b/src/libffmpeg/libavcodec/common.c
@@ -1,25 +1,24 @@
/*
* Common bit i/o utils
- * Copyright (c) 2000, 2001 Gerard Lantau.
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
*/
#include "common.h"
-#include <math.h>
void init_put_bits(PutBitContext *s,
UINT8 *buffer, int buffer_size,
@@ -108,6 +107,15 @@ void jflush_put_bits(PutBitContext *s)
}
#endif
+void put_string(PutBitContext * pbc, char *s)
+{
+ while(*s){
+ put_bits(pbc, 8, *s);
+ s++;
+ }
+ put_bits(pbc, 8, 0);
+}
+
/* bit input functions */
void init_get_bits(GetBitContext *s,
@@ -166,6 +174,9 @@ unsigned int get_bits_long(GetBitContext *s, int n)
(buf_ptr[-2] << 8) |
(buf_ptr[-1]);
#endif
+ val |= bit_buf >> (32 + bit_cnt);
+ bit_buf <<= - bit_cnt;
+ bit_cnt += 32;
} else {
buf_ptr -= 4;
bit_buf = 0;
@@ -177,11 +188,13 @@ unsigned int get_bits_long(GetBitContext *s, int n)
bit_buf |= *buf_ptr++ << 8;
if (buf_ptr < s->buf_end)
bit_buf |= *buf_ptr++;
+
+ val |= bit_buf >> (32 + bit_cnt);
+ bit_buf <<= - bit_cnt;
+ bit_cnt += 8*(buf_ptr - s->buf_ptr);
+ if(bit_cnt<0) bit_cnt=0;
}
s->buf_ptr = buf_ptr;
- val |= bit_buf >> (32 + bit_cnt);
- bit_buf <<= - bit_cnt;
- bit_cnt += 32;
}
s->bit_buf = bit_buf;
s->bit_cnt = bit_cnt;
@@ -349,7 +362,7 @@ static int build_table(VLC *vlc, int table_nb_bits,
#endif
if (table_bits[j] != 0) {
fprintf(stderr, "incorrect codes\n");
- abort();
+ exit(1);
}
table_bits[j] = n;
table_codes[j] = i;
@@ -435,10 +448,8 @@ int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
bits, bits_wrap, bits_size,
codes, codes_wrap, codes_size,
0, 0) < 0) {
- if (vlc->table_bits)
- free(vlc->table_bits);
- if (vlc->table_codes)
- free(vlc->table_codes);
+ av_free(vlc->table_bits);
+ av_free(vlc->table_codes);
return -1;
}
return 0;
@@ -447,7 +458,11 @@ int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
void free_vlc(VLC *vlc)
{
- free(vlc->table_bits);
- free(vlc->table_codes);
+ av_free(vlc->table_bits);
+ av_free(vlc->table_codes);
}
+int ff_gcd(int a, int b){
+ if(b) return ff_gcd(b, a%b);
+ else return a;
+}
diff --git a/src/libffmpeg/libavcodec/common.h b/src/libffmpeg/libavcodec/common.h
index 9c7b086d8..24bd367d6 100644
--- a/src/libffmpeg/libavcodec/common.h
+++ b/src/libffmpeg/libavcodec/common.h
@@ -1,8 +1,6 @@
#ifndef COMMON_H
#define COMMON_H
-#undef DEBUG
-
#define FFMPEG_VERSION_INT 0x000406
#define FFMPEG_VERSION "0.4.6"
@@ -19,18 +17,19 @@
#ifdef HAVE_AV_CONFIG_H
/* only include the following when compiling package */
-#include "../config.h"
+#include "config.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
+#include <math.h>
#ifndef ENODATA
#define ENODATA 61
#endif
-#endif
+#endif /* HAVE_AV_CONFIG_H */
#ifdef CONFIG_WIN32
@@ -51,6 +50,8 @@ typedef UINT16 uint16_t;
typedef INT16 int16_t;
typedef UINT32 uint32_t;
typedef INT32 int32_t;
+typedef UINT64 uint64_t;
+typedef INT64 int64_t;
#ifndef __MINGW32__
#define INT64_C(c) (c ## i64)
@@ -58,14 +59,6 @@ typedef INT32 int32_t;
#define inline __inline
-/*
- Disable warning messages:
- warning C4244: '=' : conversion from 'double' to 'float', possible loss of data
- warning C4305: 'argument' : truncation from 'const double' to 'float'
-*/
-#pragma warning( disable : 4244 )
-#pragma warning( disable : 4305 )
-
#else
#define INT64_C(c) (c ## LL)
#define UINT64_C(c) (c ## ULL)
@@ -78,22 +71,9 @@ typedef INT32 int32_t;
#define DEBUG
#endif
-// code from bits/byteswap.h (C) 1997, 1998 Free Software Foundation, Inc.
-#define bswap_32(x) \
- ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \
- (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24))
-#define be2me_32(x) bswap_32(x)
-
#define snprintf _snprintf
-#ifndef __MINGW32__
-/* no config.h with VC */
-#define CONFIG_ENCODERS 1
-#define CONFIG_DECODERS 1
-#define CONFIG_AC3 1
-#endif
-
-#else
+#else /* CONFIG_WIN32 */
/* unix */
@@ -112,8 +92,6 @@ typedef signed char INT8;
typedef signed int INT32;
typedef signed long long INT64;
-#include "xine-engine/bswap.h"
-
#ifdef HAVE_AV_CONFIG_H
#ifdef __FreeBSD__
@@ -133,10 +111,19 @@ typedef signed long long INT64;
#endif /* !CONFIG_WIN32 */
+#include "bswap.h"
-/* debug stuff */
#ifdef HAVE_AV_CONFIG_H
+#if defined(__MINGW32__) || defined(__CYGWIN__) || \
+ defined(__OS2__) || defined (__OpenBSD__)
+#define MANGLE(a) "_" #a
+#else
+#define MANGLE(a) #a
+#endif
+
+/* debug stuff */
+
#ifndef DEBUG
#define NDEBUG
#endif
@@ -150,11 +137,7 @@ inline void dprintf(const char* fmt,...) {}
#else
#ifdef DEBUG
-#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95) || !defined(__GNUC__)
-#define dprintf(...) printf(__VA_ARGS__)
-#else
#define dprintf(fmt,args...) printf(fmt, ## args)
-#endif
#else
#define dprintf(fmt,args...)
#endif
@@ -163,6 +146,14 @@ inline void dprintf(const char* fmt,...) {}
#endif /* HAVE_AV_CONFIG_H */
+#define av_abort() do { fprintf(stderr, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0)
+
+/* assume b>0 */
+#define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
+#define ABS(a) ((a) >= 0 ? (a) : (-(a)))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#define MIN(a,b) ((a) > (b) ? (b) : (a))
+
/* bit output */
struct PutBitContext;
@@ -189,6 +180,7 @@ void init_put_bits(PutBitContext *s,
INT64 get_bit_count(PutBitContext *s); /* XXX: change function name */
void align_put_bits(PutBitContext *s);
void flush_put_bits(PutBitContext *s);
+void put_string(PutBitContext * pbc, char *s);
/* jpeg specific put_bits */
void jflush_put_bits(PutBitContext *s);
@@ -250,7 +242,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
#endif
// printf("put_bits=%d %x\n", n, value);
assert(n == 32 || value < (1U << n));
-
+
bit_buf = s->bit_buf;
bit_left = s->bit_left;
@@ -430,7 +422,6 @@ static inline void jput_bits(PutBitContext *s, int n, int value)
}
#endif
-
static inline uint8_t* pbBufPtr(PutBitContext *s)
{
#ifdef ALT_BITSTREAM_WRITER
@@ -483,7 +474,6 @@ static inline unsigned int get_bits(GetBitContext *s, int n){
}
printf(" ");
#endif
-
return result;
#endif //!ALIGNED_BITSTREAM
#else //ALT_BITSTREAM_READER
@@ -509,10 +499,10 @@ static inline unsigned int get_bits1(GetBitContext *s){
result>>= 8 - 1;
index++;
s->index= index;
+
#ifdef DUMP_STREAM
printf("%d ", result);
#endif
-
return result;
#else
if(s->bit_cnt>0){
@@ -888,7 +878,62 @@ static inline int mid_pred(int a, int b, int c)
return a + b + c - vmin - vmax;
}
+static inline int clip(int a, int amin, int amax)
+{
+ if (a < amin)
+ return amin;
+ else if (a > amax)
+ return amax;
+ else
+ return a;
+}
+
/* memory */
+void *av_malloc(int size);
void *av_mallocz(int size);
+void av_free(void *ptr);
+void __av_freep(void **ptr);
+#define av_freep(p) __av_freep((void **)(p))
+
+/* math */
+int ff_gcd(int a, int b);
+
+static inline int ff_sqrt(int a)
+{
+ int ret=0;
+ int s;
+ int ret_sq=0;
+
+ for(s=15; s>=0; s--){
+ int b= ret_sq + (1<<(s*2)) + (ret<<s)*2;
+ if(b<=a){
+ ret_sq=b;
+ ret+= 1<<s;
+ }
+ }
+ return ret;
+}
+#define RUNTIME_CPUDETECT
+
+#if __CPU__ >= 686 && !defined(RUNTIME_CPUDETECT)
+#define COPY3_IF_LT(x,y,a,b,c,d)\
+asm volatile (\
+ "cmpl %0, %3 \n\t"\
+ "cmovl %3, %0 \n\t"\
+ "cmovl %4, %1 \n\t"\
+ "cmovl %5, %2 \n\t"\
+ : "+r" (x), "+r" (a), "+r" (c)\
+ : "r" (y), "r" (b), "r" (d)\
+);
+#else
+#define COPY3_IF_LT(x,y,a,b,c,d)\
+if((y)<(x)){\
+ (x)=(y);\
+ (a)=(b);\
+ (c)=(d);\
+}
+#endif
+
+#define CLAMP_TO_8BIT(d) ((d > 0xff) ? 0xff : (d < 0) ? 0 : d)
#endif
diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c
index dcfad05a5..945b7cc9d 100644
--- a/src/libffmpeg/libavcodec/dsputil.c
+++ b/src/libffmpeg/libavcodec/dsputil.c
@@ -1,32 +1,33 @@
/*
* DSP utils
- * Copyright (c) 2000, 2001 Gerard Lantau.
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
- * gmc & q-pel support by Michael Niedermayer <michaelni@gmx.at>
+ * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
*/
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
#include "avcodec.h"
#include "dsputil.h"
#include "simple_idct.h"
void (*ff_idct)(DCTELEM *block);
+void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block);
+void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block);
+void (*av_fdct)(DCTELEM *block);
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
+void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
@@ -45,8 +46,10 @@ op_pixels_abs_func pix_abs8x8_xy2;
UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
UINT32 squareTbl[512];
-extern UINT16 default_intra_matrix[64];
-extern UINT16 default_non_intra_matrix[64];
+extern INT16 default_intra_matrix[64];
+extern INT16 default_non_intra_matrix[64];
+extern INT16 ff_mpeg4_default_intra_matrix[64];
+extern INT16 ff_mpeg4_default_non_intra_matrix[64];
UINT8 zigzag_direct[64] = {
0, 1, 8, 16, 9, 2, 3, 10,
@@ -87,6 +90,8 @@ UINT8 ff_alternate_vertical_scan[64] = {
38, 46, 54, 62, 39, 47, 55, 63,
};
+#ifdef SIMPLE_IDCT
+
/* Input permutation for the simple_idct_mmx */
static UINT8 simple_mmx_permutation[64]={
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
@@ -98,6 +103,7 @@ static UINT8 simple_mmx_permutation[64]={
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
};
+#endif
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
UINT32 inverse[256]={
@@ -141,7 +147,7 @@ UINT8 zigzag_end[64];
UINT8 permutation[64];
//UINT8 invPermutation[64];
-static void build_zigzag_end()
+static void build_zigzag_end(void)
{
int lastIndex;
int lastIndexAfterPerm=0;
@@ -176,6 +182,28 @@ void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
}
}
+void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride){
+ DCTELEM *p;
+ int i;
+
+ /* read the pixels */
+ p = block;
+ for(i=0;i<8;i++) {
+ p[0] = s1[0] - s2[0];
+ p[1] = s1[1] - s2[1];
+ p[2] = s1[2] - s2[2];
+ p[3] = s1[3] - s2[3];
+ p[4] = s1[4] - s2[4];
+ p[5] = s1[5] - s2[5];
+ p[6] = s1[6] - s2[6];
+ p[7] = s1[7] - s2[7];
+ s1 += stride;
+ s2 += stride;
+ p += 8;
+ }
+}
+
+
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
{
const DCTELEM *p;
@@ -224,6 +252,358 @@ void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
}
}
+#ifdef __GNUC__
+
+struct unaligned_64 { uint64_t l; } __attribute__((packed));
+struct unaligned_32 { uint32_t l; } __attribute__((packed));
+
+#define LD32(a) (((const struct unaligned_32 *) (a))->l)
+#define LD64(a) (((const struct unaligned_64 *) (a))->l)
+
+#else /* __GNUC__ */
+
+#define LD32(a) (*((uint32_t*)(a)))
+#define LD64(a) (*((uint64_t*)(a)))
+
+#endif /* !__GNUC__ */
+
+#if 0
+
+#define PIXOP2(OPNAME, OP) \
+void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ for(i=0; i<h; i++){\
+ OP(*((uint64_t*)block), LD64(pixels));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+void OPNAME ## _no_rnd_pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ for(i=0; i<h; i++){\
+ const uint64_t a= LD64(pixels );\
+ const uint64_t b= LD64(pixels+1);\
+ OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+void OPNAME ## _pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ for(i=0; i<h; i++){\
+ const uint64_t a= LD64(pixels );\
+ const uint64_t b= LD64(pixels+1);\
+ OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+void OPNAME ## _no_rnd_pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ for(i=0; i<h; i++){\
+ const uint64_t a= LD64(pixels );\
+ const uint64_t b= LD64(pixels+line_size);\
+ OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+void OPNAME ## _pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ for(i=0; i<h; i++){\
+ const uint64_t a= LD64(pixels );\
+ const uint64_t b= LD64(pixels+line_size);\
+ OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+void OPNAME ## _pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ const uint64_t a= LD64(pixels );\
+ const uint64_t b= LD64(pixels+1);\
+ uint64_t l0= (a&0x0303030303030303ULL)\
+ + (b&0x0303030303030303ULL)\
+ + 0x0202020202020202ULL;\
+ uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+ uint64_t l1,h1;\
+\
+ pixels+=line_size;\
+ for(i=0; i<h; i+=2){\
+ uint64_t a= LD64(pixels );\
+ uint64_t b= LD64(pixels+1);\
+ l1= (a&0x0303030303030303ULL)\
+ + (b&0x0303030303030303ULL);\
+ h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+ OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ a= LD64(pixels );\
+ b= LD64(pixels+1);\
+ l0= (a&0x0303030303030303ULL)\
+ + (b&0x0303030303030303ULL)\
+ + 0x0202020202020202ULL;\
+ h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+ OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+void OPNAME ## _no_rnd_pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ const uint64_t a= LD64(pixels );\
+ const uint64_t b= LD64(pixels+1);\
+ uint64_t l0= (a&0x0303030303030303ULL)\
+ + (b&0x0303030303030303ULL)\
+ + 0x0101010101010101ULL;\
+ uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+ uint64_t l1,h1;\
+\
+ pixels+=line_size;\
+ for(i=0; i<h; i+=2){\
+ uint64_t a= LD64(pixels );\
+ uint64_t b= LD64(pixels+1);\
+ l1= (a&0x0303030303030303ULL)\
+ + (b&0x0303030303030303ULL);\
+ h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+ OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ a= LD64(pixels );\
+ b= LD64(pixels+1);\
+ l0= (a&0x0303030303030303ULL)\
+ + (b&0x0303030303030303ULL)\
+ + 0x0101010101010101ULL;\
+ h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+ OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+void (*OPNAME ## _pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
+ OPNAME ## _pixels,\
+ OPNAME ## _pixels_x2,\
+ OPNAME ## _pixels_y2,\
+ OPNAME ## _pixels_xy2,\
+};\
+\
+void (*OPNAME ## _no_rnd_pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
+ OPNAME ## _pixels,\
+ OPNAME ## _no_rnd_pixels_x2,\
+ OPNAME ## _no_rnd_pixels_y2,\
+ OPNAME ## _no_rnd_pixels_xy2,\
+};
+
+#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
+#else // 64 bit variant
+
+#define PIXOP2(OPNAME, OP) \
+void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ for(i=0; i<h; i++){\
+ OP(*((uint32_t*)(block )), LD32(pixels ));\
+ OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+void OPNAME ## _no_rnd_pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ for(i=0; i<h; i++){\
+ int j;\
+ for(j=0; j<2; j++){\
+ const uint32_t a= LD32(pixels );\
+ const uint32_t b= LD32(pixels+1);\
+ OP(*((uint32_t*)block), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
+ pixels+=4;\
+ block +=4;\
+ }\
+ pixels+=line_size-8;\
+ block +=line_size-8;\
+ }\
+}\
+\
+void OPNAME ## _pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ for(i=0; i<h; i++){\
+ int j;\
+ for(j=0; j<2; j++){\
+ const uint32_t a= LD32(pixels );\
+ const uint32_t b= LD32(pixels+1);\
+ OP(*((uint32_t*)block), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
+ pixels+=4;\
+ block +=4;\
+ }\
+ pixels+=line_size-8;\
+ block +=line_size-8;\
+ }\
+}\
+\
+void OPNAME ## _no_rnd_pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ for(i=0; i<h; i++){\
+ int j;\
+ for(j=0; j<2; j++){\
+ const uint32_t a= LD32(pixels );\
+ const uint32_t b= LD32(pixels+line_size);\
+ OP(*((uint32_t*)block), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
+ pixels+=4;\
+ block +=4;\
+ }\
+ pixels+=line_size-8;\
+ block +=line_size-8;\
+ }\
+}\
+\
+void OPNAME ## _pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ for(i=0; i<h; i++){\
+ int j;\
+ for(j=0; j<2; j++){\
+ const uint32_t a= LD32(pixels );\
+ const uint32_t b= LD32(pixels+line_size);\
+ OP(*((uint32_t*)block), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
+ pixels+=4;\
+ block +=4;\
+ }\
+ pixels+=line_size-8;\
+ block +=line_size-8;\
+ }\
+}\
+\
+void OPNAME ## _pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int j;\
+ for(j=0; j<2; j++){\
+ int i;\
+ const uint32_t a= LD32(pixels );\
+ const uint32_t b= LD32(pixels+1);\
+ uint32_t l0= (a&0x03030303UL)\
+ + (b&0x03030303UL)\
+ + 0x02020202UL;\
+ uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ uint32_t l1,h1;\
+\
+ pixels+=line_size;\
+ for(i=0; i<h; i+=2){\
+ uint32_t a= LD32(pixels );\
+ uint32_t b= LD32(pixels+1);\
+ l1= (a&0x03030303UL)\
+ + (b&0x03030303UL);\
+ h1= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ a= LD32(pixels );\
+ b= LD32(pixels+1);\
+ l0= (a&0x03030303UL)\
+ + (b&0x03030303UL)\
+ + 0x02020202UL;\
+ h0= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+ pixels+=4-line_size*(h+1);\
+ block +=4-line_size*h;\
+ }\
+}\
+\
+void OPNAME ## _no_rnd_pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int j;\
+ for(j=0; j<2; j++){\
+ int i;\
+ const uint32_t a= LD32(pixels );\
+ const uint32_t b= LD32(pixels+1);\
+ uint32_t l0= (a&0x03030303UL)\
+ + (b&0x03030303UL)\
+ + 0x01010101UL;\
+ uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ uint32_t l1,h1;\
+\
+ pixels+=line_size;\
+ for(i=0; i<h; i+=2){\
+ uint32_t a= LD32(pixels );\
+ uint32_t b= LD32(pixels+1);\
+ l1= (a&0x03030303UL)\
+ + (b&0x03030303UL);\
+ h1= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ a= LD32(pixels );\
+ b= LD32(pixels+1);\
+ l0= (a&0x03030303UL)\
+ + (b&0x03030303UL)\
+ + 0x01010101UL;\
+ h0= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+ pixels+=4-line_size*(h+1);\
+ block +=4-line_size*h;\
+ }\
+}\
+\
+void (*OPNAME ## _pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
+ OPNAME ## _pixels,\
+ OPNAME ## _pixels_x2,\
+ OPNAME ## _pixels_y2,\
+ OPNAME ## _pixels_xy2,\
+};\
+\
+void (*OPNAME ## _no_rnd_pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
+ OPNAME ## _pixels,\
+ OPNAME ## _no_rnd_pixels_x2,\
+ OPNAME ## _no_rnd_pixels_y2,\
+ OPNAME ## _no_rnd_pixels_xy2,\
+};
+#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
+#endif
+
+#define op_put(a, b) a = b
+
+PIXOP2(avg, op_avg)
+PIXOP2(put, op_put)
+#undef op_avg
+#undef op_put
+
+#if 0
+/* FIXME this stuff could be removed as its ot really used anymore */
#define PIXOP(BTYPE, OPNAME, OP, INCR) \
\
static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
@@ -323,18 +703,13 @@ void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_siz
OPNAME ## _pixels_xy2, \
};
-
/* rounding primitives */
#define avg2(a,b) ((a+b+1)>>1)
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
-#define op_put(a, b) a = b
#define op_avg(a, b) a = avg2(a, b)
#define op_sub(a, b) a -= b
-PIXOP(UINT8, put, op_put, line_size)
-PIXOP(UINT8, avg, op_avg, line_size)
-
PIXOP(DCTELEM, sub, op_sub, 8)
/* not rounding primitives */
@@ -343,13 +718,12 @@ PIXOP(DCTELEM, sub, op_sub, 8)
#define avg2(a,b) ((a+b)>>1)
#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
-PIXOP(UINT8, put_no_rnd, op_put, line_size)
-PIXOP(UINT8, avg_no_rnd, op_avg, line_size)
-
/* motion estimation */
#undef avg2
#undef avg4
+#endif
+
#define avg2(a,b) ((a+b+1)>>1)
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
@@ -872,6 +1246,20 @@ void clear_blocks_c(DCTELEM *blocks)
memset(blocks, 0, sizeof(DCTELEM)*6*64);
}
+/* XXX: those functions should be suppressed ASAP when all IDCTs are
+ converted */
+void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
+{
+ ff_idct (block);
+ put_pixels_clamped(block, dest, line_size);
+}
+
+void gen_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
+{
+ ff_idct (block);
+ add_pixels_clamped(block, dest, line_size);
+}
+
void dsputil_init(void)
{
int i, j;
@@ -888,11 +1276,12 @@ void dsputil_init(void)
}
#ifdef SIMPLE_IDCT
- ff_idct = simple_idct;
+ ff_idct = NULL;
#else
ff_idct = j_rev_dct;
#endif
get_pixels = get_pixels_c;
+ diff_pixels = diff_pixels_c;
put_pixels_clamped = put_pixels_clamped_c;
add_pixels_clamped = add_pixels_clamped_c;
gmc1= gmc1_c;
@@ -906,7 +1295,7 @@ void dsputil_init(void)
pix_abs8x8_x2 = pix_abs8x8_x2_c;
pix_abs8x8_y2 = pix_abs8x8_y2_c;
pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
- av_fdct = jpeg_fdct_ifast;
+ av_fdct = fdct_ifast;
use_permuted_idct = 1;
@@ -925,9 +1314,16 @@ void dsputil_init(void)
use_permuted_idct = 0;
#endif
-#ifdef SIMPLE_IDCT
- if(ff_idct == simple_idct) use_permuted_idct=0;
-#endif
+//#ifdef SIMPLE_IDCT
+ if (ff_idct == NULL) {
+ ff_idct_put = simple_idct_put;
+ ff_idct_add = simple_idct_add;
+ use_permuted_idct=0;
+ } else {
+ ff_idct_put = gen_idct_put;
+ ff_idct_add = gen_idct_add;
+ }
+//#endif
if(use_permuted_idct)
#ifdef SIMPLE_IDCT
@@ -953,11 +1349,21 @@ void dsputil_init(void)
}
block_permute(default_intra_matrix);
block_permute(default_non_intra_matrix);
+ block_permute(ff_mpeg4_default_intra_matrix);
+ block_permute(ff_mpeg4_default_non_intra_matrix);
}
build_zigzag_end();
}
+/* remove any non bit exact operation (testing purpose) */
+void avcodec_set_bit_exact(void)
+{
+#ifdef HAVE_MMX
+ dsputil_set_bit_exact_mmx();
+#endif
+}
+
void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
int orig_linesize[3], int coded_linesize,
AVCodecContext *avctx)
diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h
index dc63f06f1..b7b7e999c 100644
--- a/src/libffmpeg/libavcodec/dsputil.h
+++ b/src/libffmpeg/libavcodec/dsputil.h
@@ -1,21 +1,39 @@
+/*
+ * DSP utils
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
#ifndef DSPUTIL_H
#define DSPUTIL_H
#include "common.h"
#include "avcodec.h"
+#include "xineutils.h"
#undef DEBUG
-//#define DEBUG
/* dct code */
typedef short DCTELEM;
-void jpeg_fdct_ifast (DCTELEM *data);
+void fdct_ifast (DCTELEM *data);
void j_rev_dct (DCTELEM *data);
void fdct_mmx(DCTELEM *block);
-void (*av_fdct)(DCTELEM *block);
+extern void (*av_fdct)(DCTELEM *block);
/* encoding scans */
extern UINT8 ff_alternate_horizontal_scan[64];
@@ -37,7 +55,10 @@ void dsputil_init(void);
/* pixel ops : interface with DCT */
extern void (*ff_idct)(DCTELEM *block);
+extern void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block);
+extern void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block);
extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
+extern void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
@@ -45,6 +66,7 @@ extern void (*clear_blocks)(DCTELEM *blocks);
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
+void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
void clear_blocks_c(DCTELEM *blocks);
@@ -60,13 +82,6 @@ extern op_pixels_func avg_no_rnd_pixels_tab[4];
extern qpel_mc_func qpel_mc_rnd_tab[16];
extern qpel_mc_func qpel_mc_no_rnd_tab[16];
-
-/* sub pixel (encoding) */
-extern void (*sub_pixels_tab[4])(DCTELEM *block, const UINT8 *pixels, int line_size, int h);
-
-#define sub_pixels_2(block, pixels, line_size, dxy) \
- sub_pixels_tab[dxy](block, pixels, line_size, 8)
-
/* motion estimation */
typedef int (*op_pixels_abs_func)(UINT8 *blk1, UINT8 *blk2, int line_size);
@@ -91,9 +106,9 @@ static inline int block_permute_op(int j)
}
void block_permute(INT16 *block);
-
+
#if defined(ARCH_X86)
-#define HAVE_MMX
+#define HAVE_MMX 1
#endif
#if defined(HAVE_MMX)
@@ -108,10 +123,10 @@ void block_permute(INT16 *block);
extern int mm_flags;
-/* int mm_support(void); */
+/*int mm_support(void);*/
#define mm_support() xine_mm_accel()
-#if 0
+#if 0
static inline void emms(void)
{
__asm __volatile ("emms;":::"memory");
@@ -127,6 +142,7 @@ static inline void emms(void)
#define __align8 __attribute__ ((aligned (8)))
void dsputil_init_mmx(void);
+void dsputil_set_bit_exact_mmx(void);
#elif defined(ARCH_ARMV4L)
diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c
index a8d04d58a..decddd344 100644
--- a/src/libffmpeg/libavcodec/h263.c
+++ b/src/libffmpeg/libavcodec/h263.c
@@ -1,25 +1,27 @@
/*
* H263/MPEG4 backend for ffmpeg encoder and decoder
- * Copyright (c) 2000,2001 Gerard Lantau.
+ * Copyright (c) 2000,2001 Fabrice Bellard.
* H263+ support.
* Copyright (c) 2001 Juan J. Sierralta P.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
- * ac prediction encoding by Michael Niedermayer <michaelni@gmx.at>
+ * ac prediction encoding & b-frame support by Michael Niedermayer <michaelni@gmx.at>
*/
+
+//#define DEBUG
#include "common.h"
#include "dsputil.h"
#include "avcodec.h"
@@ -28,24 +30,26 @@
#include "mpeg4data.h"
//rounded divison & shift
-#define RDIV(a,b) ((a) > 0 ? ((a)+((b)>>1))/(b) : ((a)-((b)>>1))/(b))
#define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
-#define ABS(a) (((a)>=0)?(a):(-(a)))
-#define MAX(a,b) ((a) > (b) ? (a) : (b))
-#define MIN(a,b) ((a) < (b) ? (a) : (b))
+
+#define PRINT_MB_TYPE(a) ;
+//#define PRINT_MB_TYPE(a) printf(a);
static void h263_encode_block(MpegEncContext * s, DCTELEM * block,
int n);
-static void h263_encode_motion(MpegEncContext * s, int val);
+static void h263_encode_motion(MpegEncContext * s, int val, int fcode);
static void h263p_encode_umotion(MpegEncContext * s, int val);
static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block,
- int n, int dc, UINT8 *scan_table);
+ int n, int dc, UINT8 *scan_table,
+ PutBitContext *dc_pb, PutBitContext *ac_pb);
static int h263_decode_motion(MpegEncContext * s, int pred, int fcode);
static int h263p_decode_umotion(MpegEncContext * s, int pred);
static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
int n, int coded);
-static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
+static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
+static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
int n, int coded);
+static int h263_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr);
static inline int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr);
static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
int dir);
@@ -65,15 +69,15 @@ int h263_get_picture_format(int width, int height)
int format;
if (width == 128 && height == 96)
- format = 1;
+ format = 1;
else if (width == 176 && height == 144)
- format = 2;
+ format = 2;
else if (width == 352 && height == 288)
- format = 3;
+ format = 3;
else if (width == 704 && height == 576)
- format = 4;
+ format = 4;
else if (width == 1408 && height == 1152)
- format = 5;
+ format = 5;
else
format = 7;
return format;
@@ -128,7 +132,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb, 1, s->umvplus); /* Unrestricted Motion Vector */
put_bits(&s->pb,1,0); /* SAC: off */
put_bits(&s->pb,1,0); /* Advanced Prediction Mode: off */
- put_bits(&s->pb,1,0); /* Advanced Intra Coding: off */
+ put_bits(&s->pb,1,s->h263_aic); /* Advanced Intra Coding */
put_bits(&s->pb,1,0); /* Deblocking Filter: off */
put_bits(&s->pb,1,0); /* Slice Structured: off */
put_bits(&s->pb,1,0); /* Reference Picture Selection: off */
@@ -142,7 +146,11 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb,1,0); /* Reference Picture Resampling: off */
put_bits(&s->pb,1,0); /* Reduced-Resolution Update: off */
- put_bits(&s->pb,1,0); /* Rounding Type */
+ if (s->pict_type == I_TYPE)
+ s->no_rounding = 0;
+ else
+ s->no_rounding ^= 1;
+ put_bits(&s->pb,1,s->no_rounding); /* Rounding Type */
put_bits(&s->pb,2,0); /* Reserved */
put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
@@ -152,6 +160,9 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
if (format == 7) {
/* Custom Picture Format (CPFMT) */
+ if (s->aspect_ratio_info)
+ put_bits(&s->pb,4,s->aspect_ratio_info);
+ else
put_bits(&s->pb,4,2); /* Aspect ratio: CIF 12:11 (4:3) picture */
put_bits(&s->pb,9,(s->width >> 2) - 1);
put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
@@ -252,78 +263,230 @@ void mpeg4_encode_mb(MpegEncContext * s,
DCTELEM block[6][64],
int motion_x, int motion_y)
{
- int cbpc, cbpy, i, cbp, pred_x, pred_y;
+ int cbpc, cbpy, i, pred_x, pred_y;
int bits;
+ PutBitContext * const pb2 = s->data_partitioning ? &s->pb2 : &s->pb;
+ PutBitContext * const tex_pb = s->data_partitioning && s->pict_type!=B_TYPE ? &s->tex_pb : &s->pb;
+ PutBitContext * const dc_pb = s->data_partitioning && s->pict_type!=I_TYPE ? &s->pb2 : &s->pb;
+ const int interleaved_stats= (s->flags&CODEC_FLAG_PASS1) && !s->data_partitioning ? 1 : 0;
// printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
if (!s->mb_intra) {
/* compute cbp */
- cbp = 0;
+ int cbp = 0;
for (i = 0; i < 6; i++) {
- if (s->block_last_index[i] >= 0)
- cbp |= 1 << (5 - i);
- }
- if ((cbp | motion_x | motion_y) == 0 && s->mv_type==MV_TYPE_16X16) {
- /* skip macroblock */
- put_bits(&s->pb, 1, 1);
- s->misc_bits++;
- s->last_bits++;
- s->skip_count++;
- return;
+ if (s->block_last_index[i] >= 0)
+ cbp |= 1 << (5 - i);
}
- put_bits(&s->pb, 1, 0); /* mb coded */
- if(s->mv_type==MV_TYPE_16X16){
- cbpc = cbp & 3;
- put_bits(&s->pb,
- inter_MCBPC_bits[cbpc],
- inter_MCBPC_code[cbpc]);
- cbpy = cbp >> 2;
- cbpy ^= 0xf;
- put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
-
- bits= get_bit_count(&s->pb);
- s->misc_bits+= bits - s->last_bits;
- s->last_bits=bits;
- /* motion vectors: 16x16 mode */
- h263_pred_motion(s, 0, &pred_x, &pred_y);
-
- h263_encode_motion(s, motion_x - pred_x);
- h263_encode_motion(s, motion_y - pred_y);
- }else{
- cbpc = (cbp & 3)+16;
- put_bits(&s->pb,
- inter_MCBPC_bits[cbpc],
- inter_MCBPC_code[cbpc]);
- cbpy = cbp >> 2;
- cbpy ^= 0xf;
- put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+ if(s->pict_type==B_TYPE){
+ static const int mb_type_table[8]= {-1, 2, 3, 1,-1,-1,-1, 0}; /* convert from mv_dir to type */
+ int mb_type= mb_type_table[s->mv_dir];
+
+ if(s->mb_x==0){
+ s->last_mv[0][0][0]=
+ s->last_mv[0][0][1]=
+ s->last_mv[1][0][0]=
+ s->last_mv[1][0][1]= 0;
+ }
- bits= get_bit_count(&s->pb);
- s->misc_bits+= bits - s->last_bits;
- s->last_bits=bits;
+ /* nothing to do if this MB was skiped in the next P Frame */
+ if(s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]){
+ s->skip_count++;
+ s->mv[0][0][0]=
+ s->mv[0][0][1]=
+ s->mv[1][0][0]=
+ s->mv[1][0][1]= 0;
+ s->mv_dir= MV_DIR_FORWARD; //doesnt matter
+ return;
+ }
- for(i=0; i<4; i++){
- /* motion vectors: 8x8 mode*/
- h263_pred_motion(s, i, &pred_x, &pred_y);
+ if ((cbp | motion_x | motion_y | mb_type) ==0) {
+ /* direct MB with MV={0,0} */
+ put_bits(&s->pb, 1, 1); /* mb not coded modb1=1 */
- h263_encode_motion(s, s->motion_val[ s->block_index[i] ][0] - pred_x);
- h263_encode_motion(s, s->motion_val[ s->block_index[i] ][1] - pred_y);
+ if(interleaved_stats){
+ s->misc_bits++;
+ s->last_bits++;
+ }
+ s->skip_count++;
+ return;
}
- }
- bits= get_bit_count(&s->pb);
- s->mv_bits+= bits - s->last_bits;
- s->last_bits=bits;
+ put_bits(&s->pb, 1, 0); /* mb coded modb1=0 */
+ put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ //FIXME merge
+ put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we dont need it :)
+ if(cbp) put_bits(&s->pb, 6, cbp);
+
+ if(cbp && mb_type)
+ put_bits(&s->pb, 1, 0); /* no q-scale change */
- /* encode each block */
- for (i = 0; i < 6; i++) {
- mpeg4_encode_block(s, block[i], i, 0, zigzag_direct);
+ if(interleaved_stats){
+ bits= get_bit_count(&s->pb);
+ s->misc_bits+= bits - s->last_bits;
+ s->last_bits=bits;
+ }
+
+ switch(mb_type)
+ {
+ case 0: /* direct */
+ h263_encode_motion(s, motion_x, 1);
+ h263_encode_motion(s, motion_y, 1);
+ break;
+ case 1: /* bidir */
+ h263_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
+ h263_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
+ h263_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code);
+ h263_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
+ s->last_mv[0][0][0]= s->mv[0][0][0];
+ s->last_mv[0][0][1]= s->mv[0][0][1];
+ s->last_mv[1][0][0]= s->mv[1][0][0];
+ s->last_mv[1][0][1]= s->mv[1][0][1];
+ break;
+ case 2: /* backward */
+ h263_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code);
+ h263_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code);
+ s->last_mv[1][0][0]= motion_x;
+ s->last_mv[1][0][1]= motion_y;
+ break;
+ case 3: /* forward */
+ h263_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code);
+ h263_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code);
+ s->last_mv[0][0][0]= motion_x;
+ s->last_mv[0][0][1]= motion_y;
+ break;
+ default:
+ printf("unknown mb type\n");
+ return;
+ }
+
+ if(interleaved_stats){
+ bits= get_bit_count(&s->pb);
+ s->mv_bits+= bits - s->last_bits;
+ s->last_bits=bits;
+ }
+
+ /* encode each block */
+ for (i = 0; i < 6; i++) {
+ mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, &s->pb);
+ }
+
+ if(interleaved_stats){
+ bits= get_bit_count(&s->pb);
+ s->p_tex_bits+= bits - s->last_bits;
+ s->last_bits=bits;
+ }
+ }else{ /* s->pict_type==B_TYPE */
+ if ((cbp | motion_x | motion_y) == 0 && s->mv_type==MV_TYPE_16X16) {
+ /* check if the B frames can skip it too, as we must skip it if we skip here
+ why didnt they just compress the skip-mb bits instead of reusing them ?! */
+ if(s->max_b_frames>0){
+ int i;
+ int x,y, offset;
+ uint8_t *p_pic;
+
+ x= s->mb_x*16;
+ y= s->mb_y*16;
+ if(x+16 > s->width) x= s->width-16;
+ if(y+16 > s->height) y= s->height-16;
+
+ offset= x + y*s->linesize;
+ p_pic= s->new_picture[0] + offset;
+
+ s->mb_skiped=1;
+ for(i=0; i<s->max_b_frames; i++){
+ uint8_t *b_pic;
+ int diff;
+
+ if(s->coded_order[i+1].pict_type!=B_TYPE) break;
+
+ b_pic= s->coded_order[i+1].picture[0] + offset;
+ diff= pix_abs16x16(p_pic, b_pic, s->linesize);
+ if(diff>s->qscale*70){ //FIXME check that 70 is optimal
+ s->mb_skiped=0;
+ break;
+ }
+ }
+ }else
+ s->mb_skiped=1;
+
+ if(s->mb_skiped==1){
+ /* skip macroblock */
+ put_bits(&s->pb, 1, 1);
+
+ if(interleaved_stats){
+ s->misc_bits++;
+ s->last_bits++;
+ }
+ s->skip_count++;
+ return;
+ }
+ }
+
+ put_bits(&s->pb, 1, 0); /* mb coded */
+ if(s->mv_type==MV_TYPE_16X16){
+ cbpc = cbp & 3;
+ put_bits(&s->pb,
+ inter_MCBPC_bits[cbpc],
+ inter_MCBPC_code[cbpc]);
+ cbpy = cbp >> 2;
+ cbpy ^= 0xf;
+ put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+
+ if(interleaved_stats){
+ bits= get_bit_count(&s->pb);
+ s->misc_bits+= bits - s->last_bits;
+ s->last_bits=bits;
+ }
+
+ /* motion vectors: 16x16 mode */
+ h263_pred_motion(s, 0, &pred_x, &pred_y);
+
+ h263_encode_motion(s, motion_x - pred_x, s->f_code);
+ h263_encode_motion(s, motion_y - pred_y, s->f_code);
+ }else{
+ cbpc = (cbp & 3)+16;
+ put_bits(&s->pb,
+ inter_MCBPC_bits[cbpc],
+ inter_MCBPC_code[cbpc]);
+ cbpy = cbp >> 2;
+ cbpy ^= 0xf;
+ put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+
+ if(interleaved_stats){
+ bits= get_bit_count(&s->pb);
+ s->misc_bits+= bits - s->last_bits;
+ s->last_bits=bits;
+ }
+
+ for(i=0; i<4; i++){
+ /* motion vectors: 8x8 mode*/
+ h263_pred_motion(s, i, &pred_x, &pred_y);
+
+ h263_encode_motion(s, s->motion_val[ s->block_index[i] ][0] - pred_x, s->f_code);
+ h263_encode_motion(s, s->motion_val[ s->block_index[i] ][1] - pred_y, s->f_code);
+ }
+ }
+
+ if(interleaved_stats){
+ bits= get_bit_count(&s->pb);
+ s->mv_bits+= bits - s->last_bits;
+ s->last_bits=bits;
+ }
+
+ /* encode each block */
+ for (i = 0; i < 6; i++) {
+ mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, tex_pb);
+ }
+
+ if(interleaved_stats){
+ bits= get_bit_count(&s->pb);
+ s->p_tex_bits+= bits - s->last_bits;
+ s->last_bits=bits;
+ }
+ s->p_count++;
}
- bits= get_bit_count(&s->pb);
- s->p_tex_bits+= bits - s->last_bits;
- s->last_bits=bits;
- s->p_count++;
} else {
+ int cbp;
int dc_diff[6]; //dc values with the dc prediction subtracted
int dir[6]; //prediction direction
int zigzag_last_index[6];
@@ -381,22 +544,26 @@ void mpeg4_encode_mb(MpegEncContext * s,
inter_MCBPC_bits[cbpc + 4],
inter_MCBPC_code[cbpc + 4]);
}
- put_bits(&s->pb, 1, s->ac_pred);
+ put_bits(pb2, 1, s->ac_pred);
cbpy = cbp >> 2;
- put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+ put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
- bits= get_bit_count(&s->pb);
- s->misc_bits+= bits - s->last_bits;
- s->last_bits=bits;
+ if(interleaved_stats){
+ bits= get_bit_count(&s->pb);
+ s->misc_bits+= bits - s->last_bits;
+ s->last_bits=bits;
+ }
/* encode each block */
for (i = 0; i < 6; i++) {
- mpeg4_encode_block(s, block[i], i, dc_diff[i], scan_table[i]);
+ mpeg4_encode_block(s, block[i], i, dc_diff[i], scan_table[i], dc_pb, tex_pb);
}
- bits= get_bit_count(&s->pb);
- s->i_tex_bits+= bits - s->last_bits;
- s->last_bits=bits;
+ if(interleaved_stats){
+ bits= get_bit_count(&s->pb);
+ s->i_tex_bits+= bits - s->last_bits;
+ s->last_bits=bits;
+ }
s->i_count++;
/* restore ac coeffs & last_index stuff if we messed them up with the prediction */
@@ -425,76 +592,169 @@ void h263_encode_mb(MpegEncContext * s,
int motion_x, int motion_y)
{
int cbpc, cbpy, i, cbp, pred_x, pred_y;
-
- // printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
- if (!s->mb_intra) {
- /* compute cbp */
- cbp = 0;
- for (i = 0; i < 6; i++) {
- if (s->block_last_index[i] >= 0)
- cbp |= 1 << (5 - i);
- }
- if ((cbp | motion_x | motion_y) == 0) {
- /* skip macroblock */
- put_bits(&s->pb, 1, 1);
- return;
- }
- put_bits(&s->pb, 1, 0); /* mb coded */
- cbpc = cbp & 3;
- put_bits(&s->pb,
- inter_MCBPC_bits[cbpc],
- inter_MCBPC_code[cbpc]);
- cbpy = cbp >> 2;
- cbpy ^= 0xf;
- put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
-
- /* motion vectors: 16x16 mode only now */
- h263_pred_motion(s, 0, &pred_x, &pred_y);
+ INT16 pred_dc;
+ INT16 rec_intradc[6];
+ UINT16 *dc_ptr[6];
+
+ //printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
+ if (!s->mb_intra) {
+ /* compute cbp */
+ cbp = 0;
+ for (i = 0; i < 6; i++) {
+ if (s->block_last_index[i] >= 0)
+ cbp |= 1 << (5 - i);
+ }
+ if ((cbp | motion_x | motion_y) == 0) {
+ /* skip macroblock */
+ put_bits(&s->pb, 1, 1);
+ return;
+ }
+ put_bits(&s->pb, 1, 0); /* mb coded */
+ cbpc = cbp & 3;
+ put_bits(&s->pb,
+ inter_MCBPC_bits[cbpc],
+ inter_MCBPC_code[cbpc]);
+ cbpy = cbp >> 2;
+ cbpy ^= 0xf;
+ put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+
+ /* motion vectors: 16x16 mode only now */
+ h263_pred_motion(s, 0, &pred_x, &pred_y);
- if (!s->umvplus) {
- h263_encode_motion(s, motion_x - pred_x);
- h263_encode_motion(s, motion_y - pred_y);
- }
- else {
- h263p_encode_umotion(s, motion_x - pred_x);
- h263p_encode_umotion(s, motion_y - pred_y);
- if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1))
- /* To prevent Start Code emulation */
- put_bits(&s->pb,1,1);
- }
- } else {
- /* compute cbp */
- cbp = 0;
- for (i = 0; i < 6; i++) {
- if (s->block_last_index[i] >= 1)
- cbp |= 1 << (5 - i);
- }
+ if (!s->umvplus) {
+ h263_encode_motion(s, motion_x - pred_x, s->f_code);
+ h263_encode_motion(s, motion_y - pred_y, s->f_code);
+ }
+ else {
+ h263p_encode_umotion(s, motion_x - pred_x);
+ h263p_encode_umotion(s, motion_y - pred_y);
+ if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1))
+ /* To prevent Start Code emulation */
+ put_bits(&s->pb,1,1);
+ }
+ } else {
+ int li = s->h263_aic ? 0 : 1;
+
+ cbp = 0;
+ for(i=0; i<6; i++) {
+ /* Predict DC */
+ if (s->h263_aic && s->mb_intra) {
+ INT16 level = block[i][0];
+
+ pred_dc = h263_pred_dc(s, i, &dc_ptr[i]);
+ level -= pred_dc;
+ /* Quant */
+ if (level < 0)
+ level = (level + (s->qscale >> 1))/(s->y_dc_scale);
+ else
+ level = (level - (s->qscale >> 1))/(s->y_dc_scale);
+
+ /* AIC can change CBP */
+ if (level == 0 && s->block_last_index[i] == 0)
+ s->block_last_index[i] = -1;
+ else if (level < -127)
+ level = -127;
+ else if (level > 127)
+ level = 127;
+
+ block[i][0] = level;
+ /* Reconstruction */
+ rec_intradc[i] = (s->y_dc_scale*level) + pred_dc;
+ /* Oddify */
+ rec_intradc[i] |= 1;
+ //if ((rec_intradc[i] % 2) == 0)
+ // rec_intradc[i]++;
+ /* Clipping */
+ if (rec_intradc[i] < 0)
+ rec_intradc[i] = 0;
+ else if (rec_intradc[i] > 2047)
+ rec_intradc[i] = 2047;
+
+ /* Update AC/DC tables */
+ *dc_ptr[i] = rec_intradc[i];
+ }
+ /* compute cbp */
+ if (s->block_last_index[i] >= li)
+ cbp |= 1 << (5 - i);
+ }
- cbpc = cbp & 3;
- if (s->pict_type == I_TYPE) {
- put_bits(&s->pb,
- intra_MCBPC_bits[cbpc],
- intra_MCBPC_code[cbpc]);
- } else {
- put_bits(&s->pb, 1, 0); /* mb coded */
- put_bits(&s->pb,
- inter_MCBPC_bits[cbpc + 4],
- inter_MCBPC_code[cbpc + 4]);
- }
- if (s->h263_pred) {
- /* XXX: currently, we do not try to use ac prediction */
- put_bits(&s->pb, 1, 0); /* no ac prediction */
- }
- cbpy = cbp >> 2;
- put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+ cbpc = cbp & 3;
+ if (s->pict_type == I_TYPE) {
+ put_bits(&s->pb,
+ intra_MCBPC_bits[cbpc],
+ intra_MCBPC_code[cbpc]);
+ } else {
+ put_bits(&s->pb, 1, 0); /* mb coded */
+ put_bits(&s->pb,
+ inter_MCBPC_bits[cbpc + 4],
+ inter_MCBPC_code[cbpc + 4]);
+ }
+ if (s->h263_aic) {
+ /* XXX: currently, we do not try to use ac prediction */
+ put_bits(&s->pb, 1, 0); /* no AC prediction */
+ }
+ cbpy = cbp >> 2;
+ put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
}
- /* encode each block */
- for (i = 0; i < 6; i++) {
+ for(i=0; i<6; i++) {
+ /* encode each block */
h263_encode_block(s, block[i], i);
+
+ /* Update INTRADC for decoding */
+ if (s->h263_aic && s->mb_intra) {
+ block[i][0] = rec_intradc[i];
+
+ }
}
}
+static int h263_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr)
+{
+ int x, y, wrap, a, c, pred_dc, scale;
+ INT16 *dc_val, *ac_val;
+
+ /* find prediction */
+ if (n < 4) {
+ x = 2 * s->mb_x + 1 + (n & 1);
+ y = 2 * s->mb_y + 1 + ((n & 2) >> 1);
+ wrap = s->mb_width * 2 + 2;
+ dc_val = s->dc_val[0];
+ ac_val = s->ac_val[0][0];
+ scale = s->y_dc_scale;
+ } else {
+ x = s->mb_x + 1;
+ y = s->mb_y + 1;
+ wrap = s->mb_width + 2;
+ dc_val = s->dc_val[n - 4 + 1];
+ ac_val = s->ac_val[n - 4 + 1][0];
+ scale = s->c_dc_scale;
+ }
+ /* B C
+ * A X
+ */
+ a = dc_val[(x - 1) + (y) * wrap];
+ c = dc_val[(x) + (y - 1) * wrap];
+
+ /* No prediction outside GOB boundary */
+ if (s->first_slice_line && ((n < 2) || (n > 3)))
+ c = 1024;
+ pred_dc = 1024;
+ /* just DC prediction */
+ if (a != 1024 && c != 1024)
+ pred_dc = (a + c) >> 1;
+ else if (a != 1024)
+ pred_dc = a;
+ else
+ pred_dc = c;
+
+ /* we assume pred is positive */
+ //pred_dc = (pred_dc + (scale >> 1)) / scale;
+ *dc_val_ptr = &dc_val[x + y * wrap];
+ return pred_dc;
+}
+
+
void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
{
int x, y, wrap, a, c, pred_dc, scale, i;
@@ -526,6 +786,9 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
a = dc_val[(x - 1) + (y) * wrap];
c = dc_val[(x) + (y - 1) * wrap];
+ /* No prediction outside GOB boundary */
+ if (s->first_slice_line && ((n < 2) || (n > 3)))
+ c = 1024;
pred_dc = 1024;
if (s->ac_pred) {
if (s->h263_aic_dir) {
@@ -588,13 +851,46 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block,
mot_val = s->motion_val[xy];
- /* special case for first line */
- if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) {
- A = s->motion_val[xy - 1];
- *px = A[0];
- *py = A[1];
+ A = s->motion_val[xy - 1];
+ /* special case for first (slice) line */
+ if ((s->mb_y == 0 || s->first_slice_line) && block<3) {
+ // we cant just change some MVs to simulate that as we need them for the B frames (and ME)
+ // and if we ever support non rectangular objects than we need to do a few ifs here anyway :(
+ if(block==0){ //most common case
+ if(s->mb_x == s->resync_mb_x){ //rare
+ *px= *py = 0;
+ }else if(s->mb_x + 1 == s->resync_mb_x){ //rare
+ C = s->motion_val[xy + off[block] - wrap];
+ if(s->mb_x==0){
+ *px = C[0];
+ *py = C[1];
+ }else{
+ *px = mid_pred(A[0], 0, C[0]);
+ *py = mid_pred(A[1], 0, C[1]);
+ }
+ }else{
+ *px = A[0];
+ *py = A[1];
+ }
+ }else if(block==1){
+ if(s->mb_x + 1 == s->resync_mb_x){ //rare
+ C = s->motion_val[xy + off[block] - wrap];
+ *px = mid_pred(A[0], 0, C[0]);
+ *py = mid_pred(A[1], 0, C[1]);
+ }else{
+ *px = A[0];
+ *py = A[1];
+ }
+ }else{ /* block==2*/
+ B = s->motion_val[xy - wrap];
+ C = s->motion_val[xy + off[block] - wrap];
+ if(s->mb_x == s->resync_mb_x) //rare
+ A[0]=A[1]=0;
+
+ *px = mid_pred(A[0], B[0], C[0]);
+ *py = mid_pred(A[1], B[1], C[1]);
+ }
} else {
- A = s->motion_val[xy - 1];
B = s->motion_val[xy - wrap];
C = s->motion_val[xy + off[block] - wrap];
*px = mid_pred(A[0], B[0], C[0]);
@@ -603,7 +899,7 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block,
return mot_val;
}
-static void h263_encode_motion(MpegEncContext * s, int val)
+static void h263_encode_motion(MpegEncContext * s, int val, int f_code)
{
int range, l, m, bit_size, sign, code, bits;
@@ -612,7 +908,7 @@ static void h263_encode_motion(MpegEncContext * s, int val)
code = 0;
put_bits(&s->pb, mvtab[code][1], mvtab[code][0]);
} else {
- bit_size = s->f_code - 1;
+ bit_size = f_code - 1;
range = 1 << bit_size;
/* modulo encoding */
l = range * 32;
@@ -624,17 +920,14 @@ static void h263_encode_motion(MpegEncContext * s, int val)
}
if (val >= 0) {
- val--;
- code = (val >> bit_size) + 1;
- bits = val & (range - 1);
sign = 0;
} else {
val = -val;
- val--;
- code = (val >> bit_size) + 1;
- bits = val & (range - 1);
sign = 1;
}
+ val--;
+ code = (val >> bit_size) + 1;
+ bits = val & (range - 1);
put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign);
if (bit_size > 0) {
@@ -724,11 +1017,11 @@ static void init_mv_penalty_and_fcode(MpegEncContext *s)
}
}
-static void init_uni_dc_tab()
+static void init_uni_dc_tab(void)
{
int level, uni_code, uni_len;
- for(level=-255; level<256; level++){
+ for(level=-256; level<256; level++){
int size, v, l;
/* find number of bits */
size = 0;
@@ -787,23 +1080,42 @@ void h263_encode_init(MpegEncContext *s)
init_rl(&rl_inter);
init_rl(&rl_intra);
+ init_rl(&rl_intra_aic);
init_mv_penalty_and_fcode(s);
}
s->mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p
// use fcodes >1 only for mpeg4 & h263 & h263p FIXME
- if(s->h263_plus) s->fcode_tab= umv_fcode_tab;
- else if(s->h263_pred && !s->h263_msmpeg4) s->fcode_tab= fcode_tab;
+ switch(s->codec_id){
+ case CODEC_ID_MPEG4:
+ s->fcode_tab= fcode_tab;
+ s->min_qcoeff= -2048;
+ s->max_qcoeff= 2047;
+ break;
+ case CODEC_ID_H263P:
+ s->fcode_tab= umv_fcode_tab;
+ s->min_qcoeff= -128;
+ s->max_qcoeff= 127;
+ break;
+ default: //nothing needed default table allready set in mpegvideo.c
+ s->min_qcoeff= -128;
+ s->max_qcoeff= 127;
+ }
+
+ /* h263 type bias */
+ //FIXME mpeg4 mpeg quantizer
+ s->intra_quant_bias=0;
+ s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
}
static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
{
- int level, run, last, i, j, last_index, last_non_zero, sign, slevel;
- int code;
- RLTable *rl = &rl_inter;
+ int level, run, last, i, j, last_index, last_non_zero, sign, slevel, code;
+ RLTable *rl;
- if (s->mb_intra) {
+ rl = &rl_inter;
+ if (s->mb_intra && !s->h263_aic) {
/* DC coef */
level = block[0];
/* 255 cannot be represented, so we clamp */
@@ -823,23 +1135,25 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
i = 1;
} else {
i = 0;
+ if (s->h263_aic && s->mb_intra)
+ rl = &rl_intra_aic;
}
-
+
/* AC coefs */
last_index = s->block_last_index[n];
last_non_zero = i - 1;
for (; i <= last_index; i++) {
- j = zigzag_direct[i];
- level = block[j];
- if (level) {
- run = i - last_non_zero - 1;
- last = (i == last_index);
- sign = 0;
- slevel = level;
- if (level < 0) {
- sign = 1;
- level = -level;
- }
+ j = zigzag_direct[i];
+ level = block[j];
+ if (level) {
+ run = i - last_non_zero - 1;
+ last = (i == last_index);
+ sign = 0;
+ slevel = level;
+ if (level < 0) {
+ sign = 1;
+ level = -level;
+ }
code = get_rl_index(rl, last, run, level);
put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
if (code == rl->n) {
@@ -849,42 +1163,60 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
} else {
put_bits(&s->pb, 1, sign);
}
- last_non_zero = i;
- }
+ last_non_zero = i;
+ }
}
}
/***************************************************/
-static void mpeg4_stuffing(PutBitContext * pbc)
+void ff_mpeg4_stuffing(PutBitContext * pbc)
{
int length;
put_bits(pbc, 1, 0);
length= (-get_bit_count(pbc))&7;
- put_bits(pbc, length, (1<<length)-1);
+ if(length) put_bits(pbc, length, (1<<length)-1);
}
-static void put_string(PutBitContext * pbc, char *s)
-{
- while(*s){
- put_bits(pbc, 8, *s);
- s++;
+/* must be called before writing the header */
+void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){
+ int time_div, time_mod;
+
+ if(s->pict_type==I_TYPE){ //we will encode a vol header
+ s->time_increment_resolution= s->frame_rate/ff_gcd(s->frame_rate, FRAME_RATE_BASE);
+ if(s->time_increment_resolution>=256*256) s->time_increment_resolution= 256*128;
+
+ s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
+ }
+
+ s->time= picture_number*(INT64)FRAME_RATE_BASE*s->time_increment_resolution/s->frame_rate;
+ time_div= s->time/s->time_increment_resolution;
+ time_mod= s->time%s->time_increment_resolution;
+
+ if(s->pict_type==B_TYPE){
+ s->bp_time= s->last_non_b_time - s->time;
+ }else{
+ s->last_time_base= s->time_base;
+ s->time_base= time_div;
+ s->pp_time= s->time - s->last_non_b_time;
+ s->last_non_b_time= s->time;
}
- put_bits(pbc, 8, 0);
}
static void mpeg4_encode_vol_header(MpegEncContext * s)
{
int vo_ver_id=1; //must be 2 if we want GMC or q-pel
+ char buf[255];
+
+ s->vo_type= s->has_b_frames ? CORE_VO_TYPE : SIMPLE_VO_TYPE;
- if(get_bit_count(&s->pb)!=0) mpeg4_stuffing(&s->pb);
put_bits(&s->pb, 16, 0);
put_bits(&s->pb, 16, 0x100); /* video obj */
put_bits(&s->pb, 16, 0);
put_bits(&s->pb, 16, 0x120); /* video obj layer */
put_bits(&s->pb, 1, 0); /* random access vol */
- put_bits(&s->pb, 8, 1); /* video obj type indication= simple obj */
+ put_bits(&s->pb, 8, s->vo_type); /* video obj type indication */
put_bits(&s->pb, 1, 1); /* is obj layer id= yes */
put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */
put_bits(&s->pb, 3, 1); /* is obj layer priority */
@@ -892,11 +1224,20 @@ static void mpeg4_encode_vol_header(MpegEncContext * s)
put_bits(&s->pb, 4, s->aspect_ratio_info);/* aspect ratio info */
else
put_bits(&s->pb, 4, 1); /* aspect ratio info= sqare pixel */
- put_bits(&s->pb, 1, 0); /* vol control parameters= no */
+
+ if(s->low_delay){
+ put_bits(&s->pb, 1, 1); /* vol control parameters= yes */
+ put_bits(&s->pb, 2, 1); /* chroma format YUV 420/YV12 */
+ put_bits(&s->pb, 1, s->low_delay);
+ put_bits(&s->pb, 1, 0); /* vbv parameters= no */
+ }else{
+ put_bits(&s->pb, 1, 0); /* vol control parameters= no */
+ }
+
put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */
put_bits(&s->pb, 1, 1); /* marker bit */
- put_bits(&s->pb, 16, s->time_increment_resolution=30000);
- s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
+
+ put_bits(&s->pb, 16, s->time_increment_resolution);
if (s->time_increment_bits < 1)
s->time_increment_bits = 1;
put_bits(&s->pb, 1, 1); /* marker bit */
@@ -918,37 +1259,56 @@ static void mpeg4_encode_vol_header(MpegEncContext * s)
if (vo_ver_id != 1)
put_bits(&s->pb, 1, s->quarter_sample=0);
put_bits(&s->pb, 1, 1); /* complexity estimation disable */
- put_bits(&s->pb, 1, 1); /* resync marker disable */
- put_bits(&s->pb, 1, 0); /* data partitioned */
+ s->resync_marker= s->rtp_mode;
+ put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */
+ put_bits(&s->pb, 1, s->data_partitioning ? 1 : 0);
+ if(s->data_partitioning){
+ put_bits(&s->pb, 1, 0); /* no rvlc */
+ }
+
if (vo_ver_id != 1){
put_bits(&s->pb, 1, 0); /* newpred */
put_bits(&s->pb, 1, 0); /* reduced res vop */
}
put_bits(&s->pb, 1, 0); /* scalability */
- mpeg4_stuffing(&s->pb);
+ ff_mpeg4_stuffing(&s->pb);
put_bits(&s->pb, 16, 0);
put_bits(&s->pb, 16, 0x1B2); /* user_data */
- put_string(&s->pb, "ffmpeg"); //FIXME append some version ...
+ sprintf(buf, "FFmpeg%sb%s", FFMPEG_VERSION, LIBAVCODEC_BUILD_STR);
+ put_string(&s->pb, buf);
- s->no_rounding = 0;
+ ff_mpeg4_stuffing(&s->pb);
}
/* write mpeg4 VOP header */
void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
{
- if(s->pict_type==I_TYPE) mpeg4_encode_vol_header(s);
-
- if(get_bit_count(&s->pb)!=0) mpeg4_stuffing(&s->pb);
+ int time_incr;
+ int time_div, time_mod;
+
+ if(s->pict_type==I_TYPE){
+ s->no_rounding=0;
+ if(picture_number==0 || !s->strict_std_compliance)
+ mpeg4_encode_vol_header(s);
+ }
+
+//printf("num:%d rate:%d base:%d\n", s->picture_number, s->frame_rate, FRAME_RATE_BASE);
+
put_bits(&s->pb, 16, 0); /* vop header */
put_bits(&s->pb, 16, 0x1B6); /* vop header */
put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */
- /* XXX: time base + 1 not always correct */
- put_bits(&s->pb, 1, 1);
+
+ time_div= s->time/s->time_increment_resolution;
+ time_mod= s->time%s->time_increment_resolution;
+ time_incr= time_div - s->last_time_base;
+ while(time_incr--)
+ put_bits(&s->pb, 1, 1);
+
put_bits(&s->pb, 1, 0);
put_bits(&s->pb, 1, 1); /* marker */
- put_bits(&s->pb, s->time_increment_bits, 1); /* XXX: correct time increment */
+ put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */
put_bits(&s->pb, 1, 1); /* marker */
put_bits(&s->pb, 1, 1); /* vop coded */
if ( s->pict_type == P_TYPE
@@ -1106,16 +1466,17 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
}
}
-static inline void mpeg4_encode_dc(MpegEncContext * s, int level, int n)
+static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
{
#if 1
+// if(level<-255 || level>255) printf("dc overflow\n");
level+=256;
if (n < 4) {
/* luminance */
- put_bits(&s->pb, uni_DCtab_lum[level][1], uni_DCtab_lum[level][0]);
+ put_bits(s, uni_DCtab_lum[level][1], uni_DCtab_lum[level][0]);
} else {
/* chrominance */
- put_bits(&s->pb, uni_DCtab_chrom[level][1], uni_DCtab_chrom[level][0]);
+ put_bits(s, uni_DCtab_chrom[level][1], uni_DCtab_chrom[level][0]);
}
#else
int size, v;
@@ -1146,7 +1507,8 @@ static inline void mpeg4_encode_dc(MpegEncContext * s, int level, int n)
#endif
}
-static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, UINT8 *scan_table)
+static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
+ UINT8 *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb)
{
int level, run, last, i, j, last_index, last_non_zero, sign, slevel;
int code;
@@ -1154,7 +1516,7 @@ static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int i
if (s->mb_intra) {
/* mpeg4 based DC predictor */
- mpeg4_encode_dc(s, intra_dc, n);
+ mpeg4_encode_dc(dc_pb, intra_dc, n);
i = 1;
rl = &rl_intra;
} else {
@@ -1178,7 +1540,7 @@ static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int i
level = -level;
}
code = get_rl_index(rl, last, run, level);
- put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
+ put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
if (code == rl->n) {
int level1, run1;
level1 = level - rl->max_level[last][run];
@@ -1187,7 +1549,7 @@ static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int i
code = get_rl_index(rl, last, run, level1);
if (code == rl->n) {
esc2:
- put_bits(&s->pb, 1, 1);
+ put_bits(ac_pb, 1, 1);
if (level > MAX_LEVEL)
goto esc3;
run1 = run - rl->max_run[last][level] - 1;
@@ -1197,26 +1559,26 @@ static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int i
if (code == rl->n) {
esc3:
/* third escape */
- put_bits(&s->pb, 1, 1);
- put_bits(&s->pb, 1, last);
- put_bits(&s->pb, 6, run);
- put_bits(&s->pb, 1, 1);
- put_bits(&s->pb, 12, slevel & 0xfff);
- put_bits(&s->pb, 1, 1);
+ put_bits(ac_pb, 1, 1);
+ put_bits(ac_pb, 1, last);
+ put_bits(ac_pb, 6, run);
+ put_bits(ac_pb, 1, 1);
+ put_bits(ac_pb, 12, slevel & 0xfff);
+ put_bits(ac_pb, 1, 1);
} else {
/* second escape */
- put_bits(&s->pb, 1, 0);
- put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
- put_bits(&s->pb, 1, sign);
+ put_bits(ac_pb, 1, 0);
+ put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
+ put_bits(ac_pb, 1, sign);
}
} else {
/* first escape */
- put_bits(&s->pb, 1, 0);
- put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
- put_bits(&s->pb, 1, sign);
+ put_bits(ac_pb, 1, 0);
+ put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
+ put_bits(ac_pb, 1, sign);
}
} else {
- put_bits(&s->pb, 1, sign);
+ put_bits(ac_pb, 1, sign);
}
last_non_zero = i;
}
@@ -1265,11 +1627,11 @@ void init_rl(RLTable *rl)
if (run > max_run[level])
max_run[level] = run;
}
- rl->max_level[last] = malloc(MAX_RUN + 1);
+ rl->max_level[last] = av_malloc(MAX_RUN + 1);
memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
- rl->max_run[last] = malloc(MAX_LEVEL + 1);
+ rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
- rl->index_run[last] = malloc(MAX_RUN + 1);
+ rl->index_run[last] = av_malloc(MAX_RUN + 1);
memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
}
}
@@ -1356,82 +1718,146 @@ static inline void memsetw(short *tab, int val, int n)
tab[i] = val;
}
-static int mpeg4_resync(MpegEncContext *s)
+void ff_mpeg4_init_partitions(MpegEncContext *s)
+{
+ init_put_bits(&s->tex_pb, s->tex_pb_buffer, PB_BUFFER_SIZE, NULL, NULL);
+ init_put_bits(&s->pb2 , s->pb2_buffer , PB_BUFFER_SIZE, NULL, NULL);
+}
+
+void ff_mpeg4_merge_partitions(MpegEncContext *s)
+{
+ const int pb2_len = get_bit_count(&s->pb2 );
+ const int tex_pb_len= get_bit_count(&s->tex_pb);
+ const int bits= get_bit_count(&s->pb);
+
+ if(s->pict_type==I_TYPE){
+ put_bits(&s->pb, 19, DC_MARKER);
+ s->misc_bits+=19 + pb2_len + bits - s->last_bits;
+ s->i_tex_bits+= tex_pb_len;
+ }else{
+ put_bits(&s->pb, 17, MOTION_MARKER);
+ s->misc_bits+=17 + pb2_len;;
+ s->mv_bits+= bits - s->last_bits;
+ s->p_tex_bits+= tex_pb_len;
+ }
+
+ flush_put_bits(&s->pb2);
+ flush_put_bits(&s->tex_pb);
+
+ ff_copy_bits(&s->pb, s->pb2_buffer , pb2_len);
+ ff_copy_bits(&s->pb, s->tex_pb_buffer, tex_pb_len);
+ s->last_bits= get_bit_count(&s->pb);
+}
+
+void ff_mpeg4_encode_video_packet_header(MpegEncContext *s)
{
- int state, v, bits;
int mb_num_bits= av_log2(s->mb_num - 1) + 1;
- int header_extension=0, mb_num;
- int c_wrap, c_xy, l_wrap, l_xy;
-//printf("resync at %d %d\n", s->mb_x, s->mb_y);
-//printf("%X\n", show_bits(&s->gb, 24));
- if( get_bits_count(&s->gb) > s->gb.size*8-32)
- return 0;
+ ff_mpeg4_stuffing(&s->pb);
+ if(s->pict_type==I_TYPE)
+ put_bits(&s->pb, 16, 0);
+ else if(s->pict_type==B_TYPE)
+ put_bits(&s->pb, MAX(MAX(s->f_code, s->b_code)+15, 17), 0);
+ else /* S/P_TYPE */
+ put_bits(&s->pb, s->f_code+15, 0);
+ put_bits(&s->pb, 1, 1);
+
+ put_bits(&s->pb, mb_num_bits, s->mb_x + s->mb_y*s->mb_width);
+ put_bits(&s->pb, 5, s->qscale);
+ put_bits(&s->pb, 1, 0); /* no HEC */
+}
- align_get_bits(&s->gb);
- state = 0xff;
- for(;;) {
- v = get_bits(&s->gb, 8);
-//printf("%X ", v);
- state = ((state << 8) | v) & 0xffff;
- if (state == 0) break;
- if( get_bits_count(&s->gb) > s->gb.size*8-32){
- printf("resync failed\n");
- return -1;
- }
+/**
+ * decodes the next video packet and sets s->next_qscale
+ * returns mb_num of the next packet or <0 if something went wrong
+ */
+static int decode_video_packet_header(MpegEncContext *s, GetBitContext *gb)
+{
+ int bits;
+ int mb_num_bits= av_log2(s->mb_num - 1) + 1;
+ int header_extension=0, mb_num;
+//printf("%X\n", show_bits(&gb, 24));
+//printf("parse_video_packet_header\n");
+// if(show_aligned_bits(gb, 1, 16) != 0) return -1;
+
+ /* is there enough space left for a video packet + header */
+ if( get_bits_count(gb) > gb->size*8-20) return -1;
+
+//printf("resync at %d %d\n", s->mb_x, s->mb_y);
+// skip_bits(gb, 1);
+// align_get_bits(gb);
+ if(get_bits(gb, 16)!=0){
+ printf("internal error while decoding video packet header\n");
}
-//printf("%X\n", show_bits(&s->gb, 24));
+
+//printf("%X\n", show_bits(gb, 24));
bits=0;
- while(!get_bits1(&s->gb) && bits<30) bits++;
- if(s->pict_type == P_TYPE && bits != s->f_code-1)
- printf("marker does not match f_code\n");
- //FIXME check bits for B-framess
-//printf("%X\n", show_bits(&s->gb, 24));
+ while(!get_bits1(gb) && bits<30) bits++;
+ if((s->pict_type == P_TYPE || s->pict_type == S_TYPE) && bits != s->f_code-1){
+ printf("marker does not match f_code (is: %d should be: %d pos: %d end %d x: %d y: %d)\n",
+ bits+1, s->f_code, get_bits_count(gb), gb->size*8, s->mb_x, s->mb_y);
+ return -1;
+ }else if(s->pict_type == I_TYPE && bits != 0){
+ printf("marker too long\n");
+ return -1;
+ }else if(s->pict_type == B_TYPE && bits != MAX(MAX(s->f_code, s->b_code)-1, 1)){
+ printf("marker does not match f/b_code\n");
+ return -1;
+ }
+//printf("%X\n", show_bits(gb, 24));
if(s->shape != RECT_SHAPE){
- header_extension= get_bits1(&s->gb);
+ header_extension= get_bits1(gb);
//FIXME more stuff here
}
- mb_num= get_bits(&s->gb, mb_num_bits);
- if(mb_num != s->mb_x + s->mb_y*s->mb_width){
- printf("MB-num change not supported %d %d\n", mb_num, s->mb_x + s->mb_y*s->mb_width);
-// s->mb_x= mb_num % s->mb_width;
-// s->mb_y= mb_num / s->mb_width;
- //FIXME many vars are wrong now
- }
+ mb_num= get_bits(gb, mb_num_bits);
+ if(mb_num < s->mb_x + s->mb_y*s->mb_width || mb_num>=s->mb_num){
+ fprintf(stderr, "illegal mb_num in video packet (%d %d) \n", mb_num, s->mb_x + s->mb_y*s->mb_width);
+ return -1;
+ }
if(s->shape != BIN_ONLY_SHAPE){
- s->qscale= get_bits(&s->gb, 5);
- h263_dc_scale(s);
+ s->next_resync_qscale= get_bits(gb, 5);
+ if(s->next_resync_qscale==0)
+ s->next_resync_qscale= s->qscale;
+ if(s->next_resync_qscale==0){
+ fprintf(stderr, "qscale==0\n");
+ return -1;
+ }
}
if(s->shape == RECT_SHAPE){
- header_extension= get_bits1(&s->gb);
+ header_extension= get_bits1(gb);
}
if(header_extension){
+ int time_increment;
int time_incr=0;
- printf("header extension not really supported\n");
- while (get_bits1(&s->gb) != 0)
+ printf("header extension not supported\n");
+ return -1;
+
+ while (get_bits1(gb) != 0)
time_incr++;
- check_marker(&s->gb, "before time_increment in video packed header");
- s->time_increment= get_bits(&s->gb, s->time_increment_bits);
+ check_marker(gb, "before time_increment in video packed header");
+ time_increment= get_bits(gb, s->time_increment_bits);
if(s->pict_type!=B_TYPE){
+ s->last_time_base= s->time_base;
s->time_base+= time_incr;
- s->last_non_b_time[1]= s->last_non_b_time[0];
- s->last_non_b_time[0]= s->time_base*s->time_increment_resolution + s->time_increment;
+ s->time= s->time_base*s->time_increment_resolution + time_increment;
+ s->pp_time= s->time - s->last_non_b_time;
+ s->last_non_b_time= s->time;
}else{
- s->time= (s->last_non_b_time[1]/s->time_increment_resolution + time_incr)*s->time_increment_resolution;
- s->time+= s->time_increment;
+ s->time= (s->last_time_base + time_incr)*s->time_increment_resolution + time_increment;
+ s->bp_time= s->last_non_b_time - s->time;
}
- check_marker(&s->gb, "before vop_coding_type in video packed header");
+ check_marker(gb, "before vop_coding_type in video packed header");
- skip_bits(&s->gb, 2); /* vop coding type */
+ skip_bits(gb, 2); /* vop coding type */
//FIXME not rect stuff here
if(s->shape != BIN_ONLY_SHAPE){
- skip_bits(&s->gb, 3); /* intra dc vlc threshold */
+ skip_bits(gb, 3); /* intra dc vlc threshold */
if(s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE && s->num_sprite_warping_points){
mpeg4_decode_sprite_trajectory(s);
@@ -1440,44 +1866,506 @@ static int mpeg4_resync(MpegEncContext *s)
//FIXME reduced res stuff here
if (s->pict_type != I_TYPE) {
- s->f_code = get_bits(&s->gb, 3); /* fcode_for */
+ s->f_code = get_bits(gb, 3); /* fcode_for */
if(s->f_code==0){
printf("Error, video packet header damaged or not MPEG4 header (f_code=0)\n");
return -1; // makes no sense to continue, as the MV decoding will break very quickly
}
}
if (s->pict_type == B_TYPE) {
- s->b_code = get_bits(&s->gb, 3);
+ s->b_code = get_bits(gb, 3);
}
}
-
}
//FIXME new-pred stuff
+
+//printf("parse ok %d %d %d %d\n", mb_num, s->mb_x + s->mb_y*s->mb_width, get_bits_count(gb), get_bits_count(&s->gb));
+
+ return mb_num;
+}
+
+void ff_mpeg4_clean_buffers(MpegEncContext *s)
+{
+ int c_wrap, c_xy, l_wrap, l_xy;
l_wrap= s->block_wrap[0];
- l_xy= s->mb_y*l_wrap*2;
+ l_xy= s->mb_y*l_wrap*2 + s->mb_x*2;
c_wrap= s->block_wrap[4];
- c_xy= s->mb_y*c_wrap;
+ c_xy= s->mb_y*c_wrap + s->mb_x;
/* clean DC */
- memsetw(s->dc_val[0] + l_xy, 1024, l_wrap*3);
- memsetw(s->dc_val[1] + c_xy, 1024, c_wrap*2);
- memsetw(s->dc_val[2] + c_xy, 1024, c_wrap*2);
+ memsetw(s->dc_val[0] + l_xy, 1024, l_wrap*2+1);
+ memsetw(s->dc_val[1] + c_xy, 1024, c_wrap+1);
+ memsetw(s->dc_val[2] + c_xy, 1024, c_wrap+1);
/* clean AC */
- memset(s->ac_val[0] + l_xy, 0, l_wrap*3*16*sizeof(INT16));
- memset(s->ac_val[1] + c_xy, 0, c_wrap*2*16*sizeof(INT16));
- memset(s->ac_val[2] + c_xy, 0, c_wrap*2*16*sizeof(INT16));
+ memset(s->ac_val[0] + l_xy, 0, (l_wrap*2+1)*16*sizeof(INT16));
+ memset(s->ac_val[1] + c_xy, 0, (c_wrap +1)*16*sizeof(INT16));
+ memset(s->ac_val[2] + c_xy, 0, (c_wrap +1)*16*sizeof(INT16));
/* clean MV */
- memset(s->motion_val + l_xy, 0, l_wrap*3*2*sizeof(INT16));
+ // we cant clear the MVs as they might be needed by a b frame
+// memset(s->motion_val + l_xy, 0, (l_wrap*2+1)*2*sizeof(INT16));
// memset(s->motion_val, 0, 2*sizeof(INT16)*(2 + s->mb_width*2)*(2 + s->mb_height*2));
- s->resync_x_pos= s->mb_x;
+ s->last_mv[0][0][0]=
+ s->last_mv[0][0][1]=
+ s->last_mv[1][0][0]=
+ s->last_mv[1][0][1]= 0;
+}
+
+/* searches for the next resync marker clears ac,dc,mc, and sets s->next_resync_gb, s->mb_num_left */
+int ff_mpeg4_resync(MpegEncContext *s)
+{
+ GetBitContext gb;
+
+ /* search & parse next resync marker */
+
+ gb= s->next_resync_gb;
+ align_get_bits(&gb);
+//printf("mpeg4_resync %d next:%d \n", get_bits_count(&gb), get_bits_count(&s->next_resync_gb));
+ for(;;) {
+ int v= show_bits(&gb, 24);
+ if( get_bits_count(&gb) >= gb.size*8-24 || v == 1 /* start-code */){
+ s->mb_num_left= s->mb_num - s->mb_x - s->mb_y*s->mb_width;
+//printf("mpeg4_resync end\n");
+ s->gb= s->next_resync_gb; //continue at the next resync marker
+ return -1;
+ }else if(v>>8 == 0){
+ int next;
+ s->next_resync_pos= get_bits_count(&gb);
+
+ next= decode_video_packet_header(s, &gb);
+ if(next >= 0){
+ s->mb_num_left= next - s->mb_x - s->mb_y*s->mb_width;
+ break;
+ }
+
+ align_get_bits(&gb);
+ }
+ skip_bits(&gb, 8);
+ }
+ s->next_resync_gb=gb;
+
+ return 0;
+}
+
+static inline void init_block_index(MpegEncContext *s)
+{
+ s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
+ s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1) + s->mb_x*2;
+ s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1 + s->mb_x*2;
+ s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2) + s->mb_x*2;
+ s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
+ s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
+}
+
+static inline void update_block_index(MpegEncContext *s)
+{
+ s->block_index[0]+=2;
+ s->block_index[1]+=2;
+ s->block_index[2]+=2;
+ s->block_index[3]+=2;
+ s->block_index[4]++;
+ s->block_index[5]++;
+}
+
+/**
+ * decodes the first & second partition
+ * returns error type or 0 if no error
+ */
+int ff_mpeg4_decode_partitions(MpegEncContext *s)
+{
+ static const INT8 quant_tab[4] = { -1, -2, 1, 2 };
+ int mb_num;
+
+ /* decode first partition */
+ mb_num=0;
s->first_slice_line=1;
+ s->mb_x= s->resync_mb_x;
+ for(s->mb_y= s->resync_mb_y; mb_num < s->mb_num_left; s->mb_y++){
+ init_block_index(s);
+ for(; mb_num < s->mb_num_left && s->mb_x<s->mb_width; s->mb_x++){
+ const int xy= s->mb_x + s->mb_y*s->mb_width;
+ int cbpc;
+ int dir=0;
+
+ mb_num++;
+ update_block_index(s);
+ if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1)
+ s->first_slice_line=0;
+
+ if(s->mb_x==0) PRINT_MB_TYPE("\n");
+
+ if(s->pict_type==I_TYPE){
+ int i;
+
+ PRINT_MB_TYPE("I");
+ cbpc = get_vlc(&s->gb, &intra_MCBPC_vlc);
+ if (cbpc < 0){
+ fprintf(stderr, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
+ return DECODING_DESYNC;
+ }
+ s->cbp_table[xy]= cbpc & 3;
+ s->mb_type[xy]= MB_TYPE_INTRA;
+ s->mb_intra = 1;
+
+ if(cbpc & 4) {
+ s->qscale += quant_tab[get_bits(&s->gb, 2)];
+ if (s->qscale < 1)
+ s->qscale = 1;
+ else if (s->qscale > 31)
+ s->qscale = 31;
+ h263_dc_scale(s);
+ }
+ s->qscale_table[xy]= s->qscale;
+
+ s->mbintra_table[xy]= 1;
+ for(i=0; i<6; i++){
+ int dc_pred_dir;
+ int dc= mpeg4_decode_dc(s, i, &dc_pred_dir);
+ if(dc < 0){
+ fprintf(stderr, "DC corrupted at %d %d\n", s->mb_x, s->mb_y);
+ return DECODING_DESYNC;
+ }
+ dir<<=1;
+ if(dc_pred_dir) dir|=1;
+ }
+ s->pred_dir_table[xy]= dir;
+ }else{ /* P/S_TYPE */
+ int mx, my, pred_x, pred_y;
+ INT16 * const mot_val= s->motion_val[s->block_index[0]];
+ const int stride= s->block_wrap[0]*2;
+
+ if(get_bits1(&s->gb)){
+ /* skip mb */
+ s->mb_type[xy]= MB_TYPE_SKIPED;
+ if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
+ const int a= s->sprite_warping_accuracy;
+ PRINT_MB_TYPE("G");
+ if(s->divx_version==500 && s->divx_build==413){
+ mx = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample));
+ my = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample));
+ }else{
+ mx = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
+ my = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
+ s->mb_type[xy]= MB_TYPE_GMC | MB_TYPE_SKIPED;
+ }
+ }else{
+ PRINT_MB_TYPE("S");
+ mx = 0;
+ my = 0;
+ }
+ mot_val[0 ]= mot_val[2 ]=
+ mot_val[0+stride]= mot_val[2+stride]= mx;
+ mot_val[1 ]= mot_val[3 ]=
+ mot_val[1+stride]= mot_val[3+stride]= my;
+
+ if(s->mbintra_table[xy])
+ ff_clean_intra_table_entries(s);
+
+ continue;
+ }
+ cbpc = get_vlc(&s->gb, &inter_MCBPC_vlc);
+ if (cbpc < 0){
+ fprintf(stderr, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
+ return DECODING_DESYNC;
+ }
+ if (cbpc > 20)
+ cbpc+=3;
+ else if (cbpc == 20)
+ fprintf(stderr, "Stuffing !");
+ s->cbp_table[xy]= cbpc&(8+3); //8 is dquant
+
+ s->mb_intra = ((cbpc & 4) != 0);
+
+ if(s->mb_intra){
+ PRINT_MB_TYPE("I");
+ s->mbintra_table[xy]= 1;
+ s->mb_type[xy]= MB_TYPE_INTRA;
+ mot_val[0 ]= mot_val[2 ]=
+ mot_val[0+stride]= mot_val[2+stride]= 0;
+ mot_val[1 ]= mot_val[3 ]=
+ mot_val[1+stride]= mot_val[3+stride]= 0;
+ }else{
+ if(s->mbintra_table[xy])
+ ff_clean_intra_table_entries(s);
+
+ if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0)
+ s->mcsel= get_bits1(&s->gb);
+ else s->mcsel= 0;
+
+ if ((cbpc & 16) == 0) {
+ PRINT_MB_TYPE("P");
+ /* 16x16 motion prediction */
+ s->mb_type[xy]= MB_TYPE_INTER;
+
+ h263_pred_motion(s, 0, &pred_x, &pred_y);
+ if(!s->mcsel)
+ mx = h263_decode_motion(s, pred_x, s->f_code);
+ else {
+ const int a= s->sprite_warping_accuracy;
+ if(s->divx_version==500 && s->divx_build==413){
+ mx = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample));
+ }else{
+ mx = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
+ }
+ }
+ if (mx >= 0xffff)
+ return DECODING_DESYNC;
+
+ if(!s->mcsel)
+ my = h263_decode_motion(s, pred_y, s->f_code);
+ else{
+ const int a= s->sprite_warping_accuracy;
+ if(s->divx_version==500 && s->divx_build==413){
+ my = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample));
+ }else{
+ my = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
+ }
+ }
+ if (my >= 0xffff)
+ return DECODING_DESYNC;
+ mot_val[0 ]= mot_val[2 ] =
+ mot_val[0+stride]= mot_val[2+stride]= mx;
+ mot_val[1 ]= mot_val[3 ]=
+ mot_val[1+stride]= mot_val[3+stride]= my;
+ } else {
+ int i;
+ PRINT_MB_TYPE("4");
+ s->mb_type[xy]= MB_TYPE_INTER4V;
+ for(i=0;i<4;i++) {
+ INT16 *mot_val= h263_pred_motion(s, i, &pred_x, &pred_y);
+ mx = h263_decode_motion(s, pred_x, s->f_code);
+ if (mx >= 0xffff)
+ return DECODING_DESYNC;
+
+ my = h263_decode_motion(s, pred_y, s->f_code);
+ if (my >= 0xffff)
+ return DECODING_DESYNC;
+ mot_val[0] = mx;
+ mot_val[1] = my;
+ }
+ }
+ }
+ }
+ }
+ s->mb_x= 0;
+ }
+
+ if (s->pict_type==I_TYPE && get_bits(&s->gb, 19)!=DC_MARKER ) s->decoding_error= DECODING_DESYNC;
+ else if(s->pict_type!=I_TYPE && get_bits(&s->gb, 17)!=MOTION_MARKER) s->decoding_error= DECODING_DESYNC;
+ if(s->decoding_error== DECODING_DESYNC){
+ fprintf(stderr, "marker missing after first partition at %d %d\n", s->mb_x, s->mb_y);
+ return DECODING_DESYNC;
+ }
+
+ /* decode second partition */
+ mb_num=0;
+ s->mb_x= s->resync_mb_x;
+ for(s->mb_y= s->resync_mb_y; mb_num < s->mb_num_left; s->mb_y++){
+ init_block_index(s);
+ for(; mb_num < s->mb_num_left && s->mb_x<s->mb_width; s->mb_x++){
+ const int xy= s->mb_x + s->mb_y*s->mb_width;
+
+ mb_num++;
+ update_block_index(s);
+
+ if(s->pict_type==I_TYPE){
+ int ac_pred= get_bits1(&s->gb);
+ int cbpy = get_vlc(&s->gb, &cbpy_vlc);
+ if(cbpy<0){
+ fprintf(stderr, "cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
+ return DECODING_AC_LOST;
+ }
+
+ s->cbp_table[xy]|= cbpy<<2;
+ s->pred_dir_table[xy]|= ac_pred<<7;
+ }else{ /* P || S_TYPE */
+ if(s->mb_type[xy]&MB_TYPE_INTRA){
+ int dir=0,i;
+ int ac_pred = get_bits1(&s->gb);
+ int cbpy = get_vlc(&s->gb, &cbpy_vlc);
+
+ if(cbpy<0){
+ fprintf(stderr, "I cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
+ return DECODING_ACDC_LOST;
+ }
+
+ if(s->cbp_table[xy] & 8) {
+ s->qscale += quant_tab[get_bits(&s->gb, 2)];
+ if (s->qscale < 1)
+ s->qscale = 1;
+ else if (s->qscale > 31)
+ s->qscale = 31;
+ h263_dc_scale(s);
+ }
+ s->qscale_table[xy]= s->qscale;
+
+ for(i=0; i<6; i++){
+ int dc_pred_dir;
+ int dc= mpeg4_decode_dc(s, i, &dc_pred_dir);
+ if(dc < 0){
+ fprintf(stderr, "DC corrupted at %d %d\n", s->mb_x, s->mb_y);
+ return DECODING_ACDC_LOST;
+ }
+ dir<<=1;
+ if(dc_pred_dir) dir|=1;
+ }
+ s->cbp_table[xy]&= 3; //remove dquant
+ s->cbp_table[xy]|= cbpy<<2;
+ s->pred_dir_table[xy]= dir | (ac_pred<<7);
+ }else if(s->mb_type[xy]&MB_TYPE_SKIPED){
+ s->qscale_table[xy]= s->qscale;
+ s->cbp_table[xy]= 0;
+ }else{
+ int cbpy = get_vlc(&s->gb, &cbpy_vlc);
+
+ if(cbpy<0){
+ fprintf(stderr, "P cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
+ return DECODING_ACDC_LOST;
+ }
+
+ if(s->cbp_table[xy] & 8) {
+//fprintf(stderr, "dquant\n");
+ s->qscale += quant_tab[get_bits(&s->gb, 2)];
+ if (s->qscale < 1)
+ s->qscale = 1;
+ else if (s->qscale > 31)
+ s->qscale = 31;
+ h263_dc_scale(s);
+ }
+ s->qscale_table[xy]= s->qscale;
+
+ s->cbp_table[xy]&= 3; //remove dquant
+ s->cbp_table[xy]|= (cbpy^0xf)<<2;
+ }
+ }
+ }
+ s->mb_x= 0;
+ }
+
+
+ return 0;
+}
+
+static int mpeg4_decode_partitioned_mb(MpegEncContext *s,
+ DCTELEM block[6][64])
+{
+ int cbp, mb_type;
+ const int xy= s->mb_x + s->mb_y*s->mb_width;
+
+ if(s->mb_x==s->resync_mb_x && s->mb_y==s->resync_mb_y){ //Note resync_mb_{x,y}==0 at the start
+ int i;
+ int block_index_backup[6];
+ int qscale= s->qscale;
+
+ for(i=0; i<6; i++) block_index_backup[i]= s->block_index[i];
+
+ s->decoding_error= ff_mpeg4_decode_partitions(s);
+
+ for(i=0; i<6; i++) s->block_index[i]= block_index_backup[i];
+ s->first_slice_line=1;
+ s->mb_x= s->resync_mb_x;
+ s->mb_y= s->resync_mb_y;
+ s->qscale= qscale;
+ h263_dc_scale(s);
+
+ if(s->decoding_error==DECODING_DESYNC) return -1;
+ }
+
+ mb_type= s->mb_type[xy];
+ if(s->decoding_error)
+ cbp=0;
+ else
+ cbp = s->cbp_table[xy];
+
+ if(s->decoding_error!=DECODING_ACDC_LOST && s->qscale_table[xy] != s->qscale){
+ s->qscale= s->qscale_table[xy];
+ h263_dc_scale(s);
+ }
+
+ if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) {
+ int i;
+ for(i=0; i<4; i++){
+ s->mv[0][i][0] = s->motion_val[ s->block_index[i] ][0];
+ s->mv[0][i][1] = s->motion_val[ s->block_index[i] ][1];
+ }
+ s->mb_intra = mb_type&MB_TYPE_INTRA;
+
+ if (mb_type&MB_TYPE_SKIPED) {
+ /* skip mb */
+ for(i=0;i<6;i++)
+ s->block_last_index[i] = -1;
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mv_type = MV_TYPE_16X16;
+ if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
+ s->mcsel=1;
+ s->mb_skiped = 0;
+ }else{
+ s->mcsel=0;
+ s->mb_skiped = 1;
+ }
+ return 0;
+ }else if(s->mb_intra && s->decoding_error!=DECODING_ACDC_LOST){
+ s->ac_pred = s->pred_dir_table[xy]>>7;
+
+ /* decode each block */
+ for (i = 0; i < 6; i++) {
+ int ret= mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1);
+ if(ret==DECODING_AC_LOST){
+ fprintf(stderr, "texture corrupted at %d %d (trying to continue with mc/dc only)\n", s->mb_x, s->mb_y);
+ s->decoding_error=DECODING_AC_LOST;
+ cbp=0;
+ }else if(ret==DECODING_ACDC_LOST){
+ fprintf(stderr, "dc corrupted at %d %d (trying to continue with mc only)\n", s->mb_x, s->mb_y);
+ s->decoding_error=DECODING_ACDC_LOST;
+ break;
+ }
+ }
+ }else if(!s->mb_intra){
+// s->mcsel= 0; //FIXME do we need to init that
+
+ s->mv_dir = MV_DIR_FORWARD;
+ if (mb_type&MB_TYPE_INTER4V) {
+ s->mv_type = MV_TYPE_8X8;
+ } else {
+ s->mv_type = MV_TYPE_16X16;
+ }
+ if(s->decoding_error==0 && cbp){
+ /* decode each block */
+ for (i = 0; i < 6; i++) {
+ int ret= mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1);
+ if(ret==DECODING_AC_LOST){
+ fprintf(stderr, "texture corrupted at %d %d (trying to continue with mc/dc only)\n", s->mb_x, s->mb_y);
+ s->decoding_error=DECODING_AC_LOST;
+ break;
+ }
+ }
+ }
+ }
+ } else { /* I-Frame */
+ int i;
+ s->mb_intra = 1;
+ s->ac_pred = s->pred_dir_table[xy]>>7;
+
+ /* decode each block */
+ for (i = 0; i < 6; i++) {
+ int ret= mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1);
+ if(ret==DECODING_AC_LOST){
+ fprintf(stderr, "texture corrupted at %d %d (trying to continue with dc only)\n", s->mb_x, s->mb_y);
+ s->decoding_error=DECODING_AC_LOST;
+ cbp=0;
+ }else if(ret==DECODING_ACDC_LOST){
+ fprintf(stderr, "dc corrupted at %d %d\n", s->mb_x, s->mb_y);
+ return -1;
+ }
+ }
+ }
return 0;
}
+
int h263_decode_mb(MpegEncContext *s,
DCTELEM block[6][64])
{
@@ -1485,27 +2373,17 @@ int h263_decode_mb(MpegEncContext *s,
INT16 *mot_val;
static INT8 quant_tab[4] = { -1, -2, 1, 2 };
- if(s->resync_marker){
- if( s->resync_x_pos == s->mb_x+1
- || s->resync_x_pos == s->mb_x){
- /* f*ck mpeg4
- this is here so we dont need to slowdown h263_pred_motion with it */
- if(s->resync_x_pos == s->mb_x+1 && s->mb_x==0){
- int xy= s->block_index[0] - s->block_wrap[0];
- s->motion_val[xy][0]= s->motion_val[xy+2][0];
- s->motion_val[xy][1]= s->motion_val[xy+2][1];
- }
+ if(s->mb_x==0) PRINT_MB_TYPE("\n")
+ if(s->resync_marker){
+ if(s->resync_mb_x == s->mb_x && s->resync_mb_y+1 == s->mb_y){
s->first_slice_line=0;
- s->resync_x_pos=0; // isnt needed but for cleanness sake ;)
- }
-
- if(show_aligned_bits(&s->gb, 1, 16) == 0){
- if( mpeg4_resync(s) < 0 ) return -1;
-
}
}
+ if(s->data_partitioning && s->pict_type!=B_TYPE)
+ return mpeg4_decode_partitioned_mb(s, block);
+
if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) {
if (get_bits1(&s->gb)) {
/* skip mb */
@@ -1517,7 +2395,7 @@ int h263_decode_mb(MpegEncContext *s,
if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
const int a= s->sprite_warping_accuracy;
// int l = (1 << (s->f_code - 1)) * 32;
-
+ PRINT_MB_TYPE("G");
s->mcsel=1;
if(s->divx_version==500 && s->divx_build==413){
s->mv[0][0][0] = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample));
@@ -1533,6 +2411,7 @@ int h263_decode_mb(MpegEncContext *s,
s->mb_skiped = 0;
}else{
+ PRINT_MB_TYPE("S");
s->mcsel=0;
s->mv[0][0][0] = 0;
s->mv[0][0][1] = 0;
@@ -1568,6 +2447,7 @@ int h263_decode_mb(MpegEncContext *s,
}
s->mv_dir = MV_DIR_FORWARD;
if ((cbpc & 16) == 0) {
+ PRINT_MB_TYPE("P");
/* 16x16 motion prediction */
s->mv_type = MV_TYPE_16X16;
h263_pred_motion(s, 0, &pred_x, &pred_y);
@@ -1615,6 +2495,7 @@ int h263_decode_mb(MpegEncContext *s,
skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */
} else {
+ PRINT_MB_TYPE("4");
s->mv_type = MV_TYPE_8X8;
for(i=0;i<4;i++) {
mot_val = h263_pred_motion(s, i, &pred_x, &pred_y);
@@ -1643,8 +2524,8 @@ int h263_decode_mb(MpegEncContext *s,
int modb1; // first bit of modb
int modb2; // second bit of modb
int mb_type;
- int time_pp;
- int time_pb;
+ uint16_t time_pp;
+ uint16_t time_pb;
int xy;
s->mb_intra = 0; //B-frames never contain intra blocks
@@ -1674,7 +2555,7 @@ int h263_decode_mb(MpegEncContext *s,
//FIXME is this correct?
/* s->last_mv[0][0][0]=
s->last_mv[0][0][1]=0;*/
- s->mb_skiped = 1;
+ PRINT_MB_TYPE("s")
return 0;
}
@@ -1702,14 +2583,14 @@ int h263_decode_mb(MpegEncContext *s,
mx=my=0; //for case 4, we could put this to the mb_type=4 but than gcc compains about uninitalized mx/my
switch(mb_type)
{
- case 0:
+ case 0: /* direct */
mx = h263_decode_motion(s, 0, 1);
my = h263_decode_motion(s, 0, 1);
- case 4:
+ case 4: /* direct with mx=my=0 */
s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
xy= s->block_index[0];
- time_pp= s->last_non_b_time[0] - s->last_non_b_time[1];
- time_pb= s->time - s->last_non_b_time[1];
+ time_pp= s->pp_time;
+ time_pb= time_pp - s->bp_time;
//if(time_pp>3000 )printf("%d %d ", time_pp, time_pb);
//FIXME 4MV
//FIXME avoid divides
@@ -1723,6 +2604,7 @@ int h263_decode_mb(MpegEncContext *s,
s->mv[0][0][1] =
s->mv[1][0][0] =
s->mv[1][0][1] = 1000;*/
+ PRINT_MB_TYPE("D");
break;
case 1:
s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
@@ -1735,6 +2617,7 @@ int h263_decode_mb(MpegEncContext *s,
my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code);
s->last_mv[1][0][0]= s->mv[1][0][0] = mx;
s->last_mv[1][0][1]= s->mv[1][0][1] = my;
+ PRINT_MB_TYPE("i");
break;
case 2:
s->mv_dir = MV_DIR_BACKWARD;
@@ -1742,6 +2625,7 @@ int h263_decode_mb(MpegEncContext *s,
my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code);
s->last_mv[1][0][0]= s->mv[1][0][0] = mx;
s->last_mv[1][0][1]= s->mv[1][0][1] = my;
+ PRINT_MB_TYPE("B");
break;
case 3:
s->mv_dir = MV_DIR_FORWARD;
@@ -1749,8 +2633,11 @@ int h263_decode_mb(MpegEncContext *s,
my = h263_decode_motion(s, s->last_mv[0][0][1], s->f_code);
s->last_mv[0][0][0]= s->mv[0][0][0] = mx;
s->last_mv[0][0][1]= s->mv[0][0][1] = my;
+ PRINT_MB_TYPE("F");
break;
- default: return -1;
+ default:
+ printf("illegal MB_type\n");
+ return -1;
}
} else { /* I-Frame */
cbpc = get_vlc(&s->gb, &intra_MCBPC_vlc);
@@ -1759,6 +2646,7 @@ int h263_decode_mb(MpegEncContext *s,
dquant = cbpc & 4;
s->mb_intra = 1;
intra:
+ PRINT_MB_TYPE("I");
s->ac_pred = 0;
if (s->h263_pred || s->h263_aic) {
s->ac_pred = get_bits1(&s->gb);
@@ -1770,6 +2658,7 @@ intra:
s->c_dc_scale = 2 * s->qscale;
}
cbpy = get_vlc(&s->gb, &cbpy_vlc);
+ if(cbpy<0) return -1;
cbp = (cbpc & 3) | (cbpy << 2);
if (dquant) {
s->qscale += quant_tab[get_bits(&s->gb, 2)];
@@ -1815,7 +2704,7 @@ static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
if (sign)
val = -val;
val += pred;
-
+
/* modulo decoding */
if (!s->h263_long_vectors) {
l = (1 << (f_code - 1)) * 32;
@@ -1951,7 +2840,7 @@ not_coded:
return 0;
}
-static int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
+static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
{
int level, pred, code;
UINT16 *dc_val;
@@ -1960,16 +2849,22 @@ static int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
code = get_vlc(&s->gb, &dc_lum);
else
code = get_vlc(&s->gb, &dc_chrom);
- if (code < 0)
+ if (code < 0 || code > 9 /* && s->nbit<9 */){
+ fprintf(stderr, "illegal dc vlc\n");
return -1;
+ }
if (code == 0) {
level = 0;
} else {
level = get_bits(&s->gb, code);
if ((level >> (code - 1)) == 0) /* if MSB not set it is negative*/
level = - (level ^ ((1 << code) - 1));
- if (code > 8)
- skip_bits1(&s->gb); /* marker */
+ if (code > 8){
+ if(get_bits1(&s->gb)==0){ /* marker */
+ fprintf(stderr, "dc marker bit missing\n");
+ return -1;
+ }
+ }
}
pred = mpeg4_pred_dc(s, n, &dc_val, dir_ptr);
@@ -1984,7 +2879,13 @@ static int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
return level;
}
-static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
+/**
+ * decode a block
+ * returns 0 if everything went ok
+ * returns DECODING_AC_LOST if an error was detected during AC decoding
+ * returns DECODING_ACDC_LOST if an error was detected during DC decoding
+ */
+static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
int n, int coded)
{
int code, level, i, j, last, run;
@@ -1994,11 +2895,18 @@ static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
if (s->mb_intra) {
/* DC coef */
- level = mpeg4_decode_dc(s, n, &dc_pred_dir);
- if (level < 0)
- return -1;
+ if(s->data_partitioning && s->pict_type!=B_TYPE){
+ level = s->dc_val[0][ s->block_index[n] ];
+ if(n<4) level= (level + (s->y_dc_scale>>1))/s->y_dc_scale; //FIXME optimizs
+ else level= (level + (s->c_dc_scale>>1))/s->c_dc_scale;
+ dc_pred_dir= (s->pred_dir_table[s->mb_x + s->mb_y*s->mb_width]<<n)&32;
+ }else{
+ level = mpeg4_decode_dc(s, n, &dc_pred_dir);
+ if (level < 0)
+ return DECODING_ACDC_LOST;
+ }
block[0] = level;
- i = 1;
+ i = 1;
if (!coded)
goto not_coded;
rl = &rl_intra;
@@ -2023,7 +2931,7 @@ static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
for(;;) {
code = get_vlc(&s->gb, &rl->vlc);
if (code < 0)
- return -1;
+ return DECODING_AC_LOST;
if (code == rl->n) {
/* escape */
if (get_bits1(&s->gb) != 0) {
@@ -2031,15 +2939,46 @@ static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
/* third escape */
last = get_bits1(&s->gb);
run = get_bits(&s->gb, 6);
- get_bits1(&s->gb); /* marker */
+ if(get_bits1(&s->gb)==0){
+ fprintf(stderr, "1. marker bit missing in 3. esc\n");
+ return DECODING_AC_LOST;
+ }
level = get_bits(&s->gb, 12);
level = (level << 20) >> 20; /* sign extend */
- skip_bits1(&s->gb); /* marker */
+ if(get_bits1(&s->gb)==0){
+ fprintf(stderr, "2. marker bit missing in 3. esc\n");
+ return DECODING_AC_LOST;
+ }
+ if(level>512 || level<-512){ //FIXME check that QP=1 is ok with this too
+ fprintf(stderr, "|level| overflow in 3. esc\n");
+ return DECODING_AC_LOST;
+ }
+#if 1
+ {
+ const int abs_level= ABS(level);
+ int run1;
+ if(abs_level<=MAX_LEVEL && run<=MAX_RUN && s->error_resilience>=0){
+ if(abs_level <= rl->max_level[last][run]){
+ fprintf(stderr, "illegal 3. esc, vlc encoding possible\n");
+ return DECODING_AC_LOST;
+ }
+ if(abs_level <= rl->max_level[last][run]*2){
+ fprintf(stderr, "illegal 3. esc, esc 1 encoding possible\n");
+ return DECODING_AC_LOST;
+ }
+ run1 = run - rl->max_run[last][abs_level] - 1;
+ if(run1 >= 0 && abs_level <= rl->max_level[last][run1]){
+ fprintf(stderr, "illegal 3. esc, esc 2 encoding possible\n");
+ return DECODING_AC_LOST;
+ }
+ }
+ }
+#endif
} else {
/* second escape */
code = get_vlc(&s->gb, &rl->vlc);
if (code < 0 || code >= rl->n)
- return -1;
+ return DECODING_AC_LOST;
run = rl->table_run[code];
level = rl->table_level[code];
last = code >= rl->last;
@@ -2051,7 +2990,7 @@ static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
/* first escape */
code = get_vlc(&s->gb, &rl->vlc);
if (code < 0 || code >= rl->n)
- return -1;
+ return DECODING_AC_LOST;
run = rl->table_run[code];
level = rl->table_level[code];
last = code >= rl->last;
@@ -2068,7 +3007,7 @@ static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
}
i += run;
if (i >= 64)
- return -1;
+ return DECODING_AC_LOST;
j = scan_table[i];
block[j] = level;
i++;
@@ -2091,15 +3030,24 @@ int h263_decode_picture_header(MpegEncContext *s)
{
int format, width, height;
- /* picture header */
- if (get_bits(&s->gb, 22) != 0x20)
+ /* picture start code */
+ if (get_bits(&s->gb, 22) != 0x20) {
+ fprintf(stderr, "Bad picture start code\n");
return -1;
+ }
+ /* temporal reference */
s->picture_number = get_bits(&s->gb, 8); /* picture timestamp */
-
- if (get_bits1(&s->gb) != 1)
- return -1; /* marker */
- if (get_bits1(&s->gb) != 0)
+
+ /* PTYPE starts here */
+ if (get_bits1(&s->gb) != 1) {
+ /* marker */
+ fprintf(stderr, "Bad marker\n");
+ return -1;
+ }
+ if (get_bits1(&s->gb) != 0) {
+ fprintf(stderr, "Bad H263 id\n");
return -1; /* h263 id */
+ }
skip_bits1(&s->gb); /* split screen off */
skip_bits1(&s->gb); /* camera off */
skip_bits1(&s->gb); /* freeze picture release off */
@@ -2108,6 +3056,12 @@ int h263_decode_picture_header(MpegEncContext *s)
s->gob_number = 0;
format = get_bits(&s->gb, 3);
+ /*
+ 0 forbidden
+ 1 sub-QCIF
+ 10 QCIF
+ 7 extended PTYPE (PLUSPTYPE)
+ */
if (format != 7 && format != 6) {
s->h263_plus = 0;
@@ -2124,15 +3078,18 @@ int h263_decode_picture_header(MpegEncContext *s)
s->unrestricted_mv = get_bits1(&s->gb);
s->h263_long_vectors = s->unrestricted_mv;
- if (get_bits1(&s->gb) != 0)
+ if (get_bits1(&s->gb) != 0) {
+ fprintf(stderr, "H263 SAC not supported\n");
return -1; /* SAC: off */
+ }
if (get_bits1(&s->gb) != 0) {
s->mv_type = MV_TYPE_8X8; /* Advanced prediction mode */
}
- if (get_bits1(&s->gb) != 0)
+ if (get_bits1(&s->gb) != 0) {
+ fprintf(stderr, "H263 PB frame not supported\n");
return -1; /* not PB frame */
-
+ }
s->qscale = get_bits(&s->gb, 5);
skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
} else {
@@ -2141,10 +3098,12 @@ int h263_decode_picture_header(MpegEncContext *s)
/* H.263v2 */
s->h263_plus = 1;
ufep = get_bits(&s->gb, 3); /* Update Full Extended PTYPE */
-
+
+ /* ufep other than 0 and 1 are reserved */
if (ufep == 1) {
/* OPPTYPE */
format = get_bits(&s->gb, 3);
+ dprintf("ufep=1, format: %d\n", format);
skip_bits(&s->gb,1); /* Custom PCF */
s->umvplus_dec = get_bits(&s->gb, 1); /* Unrestricted Motion Vector */
skip_bits1(&s->gb); /* Syntax-based Arithmetic Coding (SAC) */
@@ -2154,34 +3113,59 @@ int h263_decode_picture_header(MpegEncContext *s)
if (get_bits1(&s->gb) != 0) { /* Advanced Intra Coding (AIC) */
s->h263_aic = 1;
}
+
skip_bits(&s->gb, 7);
+ /* these are the 7 bits: (in order of appearence */
+ /* Deblocking Filter */
+ /* Slice Structured */
+ /* Reference Picture Selection */
+ /* Independent Segment Decoding */
+ /* Alternative Inter VLC */
+ /* Modified Quantization */
+ /* Prevent start code emulation */
+
skip_bits(&s->gb, 3); /* Reserved */
- } else if (ufep != 0)
+ } else if (ufep != 0) {
+ fprintf(stderr, "Bad UFEP type (%d)\n", ufep);
return -1;
+ }
/* MPPTYPE */
- s->pict_type = get_bits(&s->gb, 3) + 1;
+ s->pict_type = get_bits(&s->gb, 3) + I_TYPE;
+ dprintf("pict_type: %d\n", s->pict_type);
if (s->pict_type != I_TYPE &&
s->pict_type != P_TYPE)
return -1;
skip_bits(&s->gb, 2);
s->no_rounding = get_bits1(&s->gb);
- //fprintf(stderr, "\nRTYPE: %d", s->no_rounding);
+ dprintf("RTYPE: %d\n", s->no_rounding);
skip_bits(&s->gb, 4);
/* Get the picture dimensions */
if (ufep) {
if (format == 6) {
/* Custom Picture Format (CPFMT) */
- skip_bits(&s->gb, 4); /* aspect ratio */
+ s->aspect_ratio_info = get_bits(&s->gb, 4);
+ dprintf("aspect: %d\n", s->aspect_ratio_info);
+ /* aspect ratios:
+ 0 - forbidden
+ 1 - 1:1
+ 2 - 12:11 (CIF 4:3)
+ 3 - 10:11 (525-type 4:3)
+ 4 - 16:11 (CIF 16:9)
+ 5 - 40:33 (525-type 16:9)
+ 6-14 - reserved
+ */
width = (get_bits(&s->gb, 9) + 1) * 4;
skip_bits1(&s->gb);
height = get_bits(&s->gb, 9) * 4;
-#ifdef DEBUG
- fprintf(stderr,"\nH.263+ Custom picture: %dx%d\n",width,height);
-#endif
- }
- else {
+ dprintf("\nH.263+ Custom picture: %dx%d\n",width,height);
+ if (s->aspect_ratio_info == EXTENDED_PAR) {
+ /* aspected dimensions */
+ skip_bits(&s->gb, 8); /* width */
+ skip_bits(&s->gb, 8); /* height */
+ }
+ } else {
width = h263_format[format][0];
height = h263_format[format][1];
}
@@ -2210,7 +3194,7 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s)
int a= 2<<s->sprite_warping_accuracy;
int rho= 3-s->sprite_warping_accuracy;
int r=16/a;
- int vop_ref[4][2]= {{0,0}, {s->width,0}, {0, s->height}, {s->width, s->height}}; // only true for rectangle shapes
+ const int vop_ref[4][2]= {{0,0}, {s->width,0}, {0, s->height}, {s->width, s->height}}; // only true for rectangle shapes
int d[4][2]={{0,0}, {0,0}, {0,0}, {0,0}};
int sprite_ref[4][2];
int virtual_ref[2][2];
@@ -2276,13 +3260,13 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s)
// the idea behind this virtual_ref mess is to be able to use shifts later per pixel instead of divides
// so the distance between points is converted from w&h based to w2&h2 based which are of the 2^x form
virtual_ref[0][0]= 16*(vop_ref[0][0] + w2)
- + RDIV(((w - w2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + w2*(r*sprite_ref[1][0] - 16*vop_ref[1][0])),w);
+ + ROUNDED_DIV(((w - w2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + w2*(r*sprite_ref[1][0] - 16*vop_ref[1][0])),w);
virtual_ref[0][1]= 16*vop_ref[0][1]
- + RDIV(((w - w2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + w2*(r*sprite_ref[1][1] - 16*vop_ref[1][1])),w);
+ + ROUNDED_DIV(((w - w2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + w2*(r*sprite_ref[1][1] - 16*vop_ref[1][1])),w);
virtual_ref[1][0]= 16*vop_ref[0][0]
- + RDIV(((h - h2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + h2*(r*sprite_ref[2][0] - 16*vop_ref[2][0])),h);
+ + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + h2*(r*sprite_ref[2][0] - 16*vop_ref[2][0])),h);
virtual_ref[1][1]= 16*(vop_ref[0][1] + h2)
- + RDIV(((h - h2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + h2*(r*sprite_ref[2][1] - 16*vop_ref[2][1])),h);
+ + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + h2*(r*sprite_ref[2][1] - 16*vop_ref[2][1])),h);
switch(s->num_sprite_warping_points)
{
@@ -2398,6 +3382,7 @@ printf("%d %d\n", s->sprite_delta[1][1][1], a<<s->sprite_shift[1][1]);*/
int mpeg4_decode_picture_header(MpegEncContext * s)
{
int time_incr, startcode, state, v;
+ int time_increment;
redo:
/* search next start code */
@@ -2412,8 +3397,13 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
}
state = ((state << 8) | v) & 0xffffff;
if( get_bits_count(&s->gb) > s->gb.size*8-32){
- printf("no VOP startcode found\n");
- return -1;
+ if(s->gb.size>50){
+ printf("no VOP startcode found, frame size was=%d\n", s->gb.size);
+ return -1;
+ }else{
+ printf("frame skip\n");
+ return FRAME_SKIPED;
+ }
}
}
//printf("startcode %X %d\n", startcode, get_bits_count(&s->gb));
@@ -2422,24 +3412,34 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
/* vol header */
skip_bits(&s->gb, 1); /* random access */
- skip_bits(&s->gb, 8); /* vo_type */
+ s->vo_type= get_bits(&s->gb, 8);
if (get_bits1(&s->gb) != 0) { /* is_ol_id */
vo_ver_id = get_bits(&s->gb, 4); /* vo_ver_id */
skip_bits(&s->gb, 3); /* vo_priority */
} else {
vo_ver_id = 1;
}
-
+//printf("vo type:%d\n",s->vo_type);
s->aspect_ratio_info= get_bits(&s->gb, 4);
- if(s->aspect_ratio_info == EXTENDET_PAR){
+ if(s->aspect_ratio_info == EXTENDED_PAR){
skip_bits(&s->gb, 8); //par_width
skip_bits(&s->gb, 8); // par_height
}
- if(get_bits1(&s->gb)){ /* vol control parameter */
- printf("vol control parameter not supported\n");
- return -1;
+ if ((s->vol_control_parameters=get_bits1(&s->gb))) { /* vol control parameter */
+ int chroma_format= get_bits(&s->gb, 2);
+ if(chroma_format!=1){
+ printf("illegal chroma format\n");
+ }
+ s->low_delay= get_bits1(&s->gb);
+ if(get_bits1(&s->gb)){ /* vbv parameters */
+ printf("vbv parameters not supported\n");
+ return -1;
+ }
+ }else{
+ s->low_delay=0;
}
+
s->shape = get_bits(&s->gb, 2); /* vol shape */
if(s->shape != RECT_SHAPE) printf("only rectangular vol supported\n");
if(s->shape == GRAY_SHAPE && vo_ver_id != 1){
@@ -2469,12 +3469,12 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
if(width && height){ /* they should be non zero but who knows ... */
s->width = width;
s->height = height;
-// printf("%d %d\n", width, height);
+// printf("width/height: %d %d\n", width, height);
}
}
if(get_bits1(&s->gb)) printf("interlaced not supported\n"); /* interlaced */
- if(!get_bits1(&s->gb)) printf("OBMC not supported\n"); /* OBMC Disable */
+ if(!get_bits1(&s->gb)) printf("OBMC not supported (very likely buggy encoder)\n"); /* OBMC Disable */
if (vo_ver_id == 1) {
s->vol_sprite_usage = get_bits1(&s->gb); /* vol_sprite_usage */
} else {
@@ -2509,7 +3509,57 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
}
// FIXME a bunch of grayscale shape things
- if(get_bits1(&s->gb)) printf("Quant-Type not supported\n"); /* vol_quant_type */ //FIXME
+
+ if(get_bits1(&s->gb)){ /* vol_quant_type */
+ int i, j, v;
+ /* load default matrixes */
+ for(i=0; i<64; i++){
+ v= ff_mpeg4_default_intra_matrix[i];
+ s->intra_matrix[i]= v;
+ s->chroma_intra_matrix[i]= v;
+
+ v= ff_mpeg4_default_non_intra_matrix[i];
+ s->inter_matrix[i]= v;
+ s->chroma_inter_matrix[i]= v;
+ }
+
+ /* load custom intra matrix */
+ if(get_bits1(&s->gb)){
+ for(i=0; i<64; i++){
+ v= get_bits(&s->gb, 8);
+ if(v==0) break;
+
+ j= zigzag_direct[i];
+ s->intra_matrix[j]= v;
+ s->chroma_intra_matrix[j]= v;
+ }
+ }
+
+ /* load custom non intra matrix */
+ if(get_bits1(&s->gb)){
+ for(i=0; i<64; i++){
+ v= get_bits(&s->gb, 8);
+ if(v==0) break;
+
+ j= zigzag_direct[i];
+ s->inter_matrix[j]= v;
+ s->chroma_inter_matrix[j]= v;
+ }
+
+ /* replicate last value */
+ for(; i<64; i++){
+ j= zigzag_direct[i];
+ s->inter_matrix[j]= v;
+ s->chroma_inter_matrix[j]= v;
+ }
+ }
+
+ s->dct_unquantize= s->dct_unquantize_mpeg2;
+
+ // FIXME a bunch of grayscale shape things
+ }else
+ s->dct_unquantize= s->dct_unquantize_h263;
+
if(vo_ver_id != 1)
s->quarter_sample= get_bits1(&s->gb);
else s->quarter_sample=0;
@@ -2518,10 +3568,12 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
s->resync_marker= !get_bits1(&s->gb); /* resync_marker_disabled */
- s->data_partioning= get_bits1(&s->gb);
- if(s->data_partioning){
- printf("data partitioning not supported\n");
- skip_bits1(&s->gb); // reversible vlc
+ s->data_partitioning= get_bits1(&s->gb);
+ if(s->data_partitioning){
+ s->rvlc= get_bits1(&s->gb);
+ if(s->rvlc){
+ printf("reversible vlc not supported\n");
+ }
}
if(vo_ver_id != 1) {
@@ -2561,16 +3613,20 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
}
buf[255]=0;
e=sscanf(buf, "DivX%dBuild%d", &ver, &build);
+ if(e!=2)
+ e=sscanf(buf, "DivX%db%d", &ver, &build);
if(e==2){
s->divx_version= ver;
s->divx_build= build;
if(s->picture_number==0){
printf("This file was encoded with DivX%d Build%d\n", ver, build);
- if(ver==500 && build==413){ //most likely all version are indeed totally buggy but i dunno for sure ...
+ if(ver==500 && build==413){
printf("WARNING: this version of DivX is not MPEG4 compatible, trying to workaround these bugs...\n");
+#if 0
}else{
printf("hmm, i havnt seen that version of divx yet, lets assume they fixed these bugs ...\n"
"using mpeg4 decoder, if it fails contact the developers (of ffmpeg)\n");
+#endif
}
}
}
@@ -2580,21 +3636,34 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
goto redo;
}
- s->pict_type = get_bits(&s->gb, 2) + 1; /* pict type: I = 0 , P = 1 */
-//printf("pic: %d, qpel:%d\n", s->pict_type, s->quarter_sample);
+ s->pict_type = get_bits(&s->gb, 2) + I_TYPE; /* pict type: I = 0 , P = 1 */
+//if(s->pict_type!=I_TYPE) return FRAME_SKIPED;
+ if(s->pict_type==B_TYPE && s->low_delay && s->vol_control_parameters==0){
+ printf("low_delay flag set, but shouldnt, clearing it\n");
+ s->low_delay=0;
+ }
+// printf("pic: %d, qpel:%d\n", s->pict_type, s->quarter_sample);
+//printf("%d", s->pict_type);
time_incr=0;
while (get_bits1(&s->gb) != 0)
time_incr++;
check_marker(&s->gb, "before time_increment");
- s->time_increment= get_bits(&s->gb, s->time_increment_bits);
+ time_increment= get_bits(&s->gb, s->time_increment_bits);
+//printf(" type:%d incr:%d increment:%d\n", s->pict_type, time_incr, time_increment);
if(s->pict_type!=B_TYPE){
+ s->last_time_base= s->time_base;
s->time_base+= time_incr;
- s->last_non_b_time[1]= s->last_non_b_time[0];
- s->last_non_b_time[0]= s->time_base*s->time_increment_resolution + s->time_increment;
+ s->time= s->time_base*s->time_increment_resolution + time_increment;
+ s->pp_time= s->time - s->last_non_b_time;
+ s->last_non_b_time= s->time;
}else{
- s->time= (s->last_non_b_time[1]/s->time_increment_resolution + time_incr)*s->time_increment_resolution;
- s->time+= s->time_increment;
+ s->time= (s->last_time_base + time_incr)*s->time_increment_resolution + time_increment;
+ s->bp_time= s->last_non_b_time - s->time;
+ if(s->pp_time <=s->bp_time){
+// printf("messed up order, seeking?, skiping current b frame\n");
+ return FRAME_SKIPED;
+ }
}
if(check_marker(&s->gb, "before vop_coded")==0 && s->picture_number==0){
@@ -2674,14 +3743,23 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
s->b_code = get_bits(&s->gb, 3);
//printf("b-code %d\n", s->b_code);
}
-//printf("quant:%d fcode:%d\n", s->qscale, s->f_code);
+//printf("quant:%d fcode:%d bcode:%d type:%d\n", s->qscale, s->f_code, s->b_code, s->pict_type);
if(!s->scalability){
if (s->shape!=RECT_SHAPE && s->pict_type!=I_TYPE) {
skip_bits1(&s->gb); // vop shape coding type
}
}
}
+ /* detect buggy encoders which dont set the low_delay flag (divx4/xvid/opendivx)*/
+ // note we cannot detect divx5 without b-frames easyly (allthough its buggy too)
+ if(s->vo_type==0 && s->vol_control_parameters==0 && s->divx_version==0 && s->picture_number==0){
+ printf("looks like this file was encoded with (divx4/(old)xvid/opendivx) -> forcing low_delay flag\n");
+ s->low_delay=1;
+ }
+
s->picture_number++; // better than pic number==0 allways ;)
+//printf("done\n");
+
return 0;
}
@@ -2691,22 +3769,29 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
int format;
/* picture header */
- if (get_bits(&s->gb, 22) != 0x20)
+ if (get_bits(&s->gb, 22) != 0x20) {
+ fprintf(stderr, "Bad picture start code\n");
return -1;
- skip_bits(&s->gb, 8); /* picture timestamp */
+ }
+ s->picture_number = get_bits(&s->gb, 8); /* picture timestamp */
- if (get_bits1(&s->gb) != 1)
+ if (get_bits1(&s->gb) != 1) {
+ fprintf(stderr, "Bad marker\n");
return -1; /* marker */
- if (get_bits1(&s->gb) != 0)
+ }
+ if (get_bits1(&s->gb) != 0) {
+ fprintf(stderr, "Bad H263 id\n");
return -1; /* h263 id */
+ }
skip_bits1(&s->gb); /* split screen off */
skip_bits1(&s->gb); /* camera off */
skip_bits1(&s->gb); /* freeze picture release off */
format = get_bits(&s->gb, 3);
- if (format != 7)
+ if (format != 7) {
+ fprintf(stderr, "Intel H263 free format not supported\n");
return -1;
-
+ }
s->h263_plus = 0;
s->pict_type = I_TYPE + get_bits1(&s->gb);
@@ -2714,12 +3799,18 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
s->unrestricted_mv = get_bits1(&s->gb);
s->h263_long_vectors = s->unrestricted_mv;
- if (get_bits1(&s->gb) != 0)
+ if (get_bits1(&s->gb) != 0) {
+ fprintf(stderr, "SAC not supported\n");
return -1; /* SAC: off */
- if (get_bits1(&s->gb) != 0)
+ }
+ if (get_bits1(&s->gb) != 0) {
+ fprintf(stderr, "Advanced Prediction Mode not supported\n");
return -1; /* advanced prediction mode: off */
- if (get_bits1(&s->gb) != 0)
- return -1; /* not PB frame */
+ }
+ if (get_bits1(&s->gb) != 0) {
+ fprintf(stderr, "PB frame mode no supported\n");
+ return -1; /* PB frame mode */
+ }
/* skip unknown header garbage */
skip_bits(&s->gb, 41);
diff --git a/src/libffmpeg/libavcodec/h263data.h b/src/libffmpeg/libavcodec/h263data.h
index a129fd6bf..5a7b943ea 100644
--- a/src/libffmpeg/libavcodec/h263data.h
+++ b/src/libffmpeg/libavcodec/h263data.h
@@ -1,11 +1,11 @@
/* intra MCBPC, mb_type = (intra), then (intraq) */
-static const UINT8 intra_MCBPC_code[8] = { 1, 1, 2, 3, 1, 1, 2, 3 };
-static const UINT8 intra_MCBPC_bits[8] = { 1, 3, 3, 3, 4, 6, 6, 6 };
+const UINT8 intra_MCBPC_code[8] = { 1, 1, 2, 3, 1, 1, 2, 3 };
+const UINT8 intra_MCBPC_bits[8] = { 1, 3, 3, 3, 4, 6, 6, 6 };
/* inter MCBPC, mb_type = (inter), (intra), (interq), (intraq), (inter4v) */
/* Changed the tables for interq and inter4v+q, following the standard ** Juanjo ** */
-static const UINT8 inter_MCBPC_code[25] = {
+const UINT8 inter_MCBPC_code[25] = {
1, 3, 2, 5,
3, 4, 3, 3,
3, 7, 6, 5,
@@ -14,7 +14,7 @@ static const UINT8 inter_MCBPC_code[25] = {
1, /* Stuffing */
2, 12, 14, 15,
};
-static const UINT8 inter_MCBPC_bits[25] = {
+const UINT8 inter_MCBPC_bits[25] = {
1, 4, 4, 6,
5, 8, 8, 7,
3, 7, 7, 9,
@@ -125,45 +125,73 @@ static RLTable rl_inter = {
inter_level,
};
-/* table used for Advanced INTRA Coding, just RUN and LEVEL change */
-const INT8 inter_level_aic[102] = {
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 3, 2, 1, 2, 2, 4, 5,
- 6, 7, 3, 2, 3, 4, 5, 2,
- 3, 4, 2, 3, 1, 2, 25, 1,
- 2, 24, 8, 2, 7, 4, 6, 1,
- 9, 23, 2, 3, 1, 10, 12, 11,
- 18, 17, 16, 15, 14, 13, 20, 19,
- 22, 21, 1, 1, 1, 1, 1, 1,
- 1, 2, 1, 1, 1, 3, 1, 1,
- 1, 1, 1, 1, 1, 4, 1, 1,
- 1, 1, 2, 2, 6, 5, 2, 2,
- 3, 7, 3, 4, 9, 8, 1, 1,
- 1, 2, 2, 2, 3, 10,
+const UINT16 intra_vlc_aic[103][2] = {
+{ 0x2, 2 }, { 0x6, 3 }, { 0xe, 4 }, { 0xc, 5 },
+{ 0xd, 5 }, { 0x10, 6 }, { 0x11, 6 }, { 0x12, 6 },
+{ 0x16, 7 }, { 0x1b, 8 }, { 0x20, 9 }, { 0x21, 9 },
+{ 0x1a, 9 }, { 0x1b, 9 }, { 0x1c, 9 }, { 0x1d, 9 },
+{ 0x1e, 9 }, { 0x1f, 9 }, { 0x23, 11 }, { 0x22, 11 },
+{ 0x57, 12 }, { 0x56, 12 }, { 0x55, 12 }, { 0x54, 12 },
+{ 0x53, 12 }, { 0xf, 4 }, { 0x14, 6 }, { 0x14, 7 },
+{ 0x1e, 8 }, { 0xf, 10 }, { 0x21, 11 }, { 0x50, 12 },
+{ 0xb, 5 }, { 0x15, 7 }, { 0xe, 10 }, { 0x9, 10 },
+{ 0x15, 6 }, { 0x1d, 8 }, { 0xd, 10 }, { 0x51, 12 },
+{ 0x13, 6 }, { 0x23, 9 }, { 0x7, 11 }, { 0x17, 7 },
+{ 0x22, 9 }, { 0x52, 12 }, { 0x1c, 8 }, { 0xc, 10 },
+{ 0x1f, 8 }, { 0xb, 10 }, { 0x25, 9 }, { 0xa, 10 },
+{ 0x24, 9 }, { 0x6, 11 }, { 0x21, 10 }, { 0x20, 10 },
+{ 0x8, 10 }, { 0x20, 11 }, { 0x7, 4 }, { 0xc, 6 },
+{ 0x10, 7 }, { 0x13, 8 }, { 0x11, 9 }, { 0x12, 9 },
+{ 0x4, 10 }, { 0x27, 11 }, { 0x26, 11 }, { 0x5f, 12 },
+{ 0xf, 6 }, { 0x13, 9 }, { 0x5, 10 }, { 0x25, 11 },
+{ 0xe, 6 }, { 0x14, 9 }, { 0x24, 11 }, { 0xd, 6 },
+{ 0x6, 10 }, { 0x5e, 12 }, { 0x11, 7 }, { 0x7, 10 },
+{ 0x13, 7 }, { 0x5d, 12 }, { 0x12, 7 }, { 0x5c, 12 },
+{ 0x14, 8 }, { 0x5b, 12 }, { 0x15, 8 }, { 0x1a, 8 },
+{ 0x19, 8 }, { 0x18, 8 }, { 0x17, 8 }, { 0x16, 8 },
+{ 0x19, 9 }, { 0x15, 9 }, { 0x16, 9 }, { 0x18, 9 },
+{ 0x17, 9 }, { 0x4, 11 }, { 0x5, 11 }, { 0x58, 12 },
+{ 0x59, 12 }, { 0x5a, 12 }, { 0x3, 7 },
};
-const INT8 inter_run_aic[102] = {
- 0, 1, 3, 5, 7, 8, 9, 10,
- 11, 4, 9, 13, 0, 1, 1, 1,
- 1, 1, 0, 3, 2, 3, 0, 4,
- 3, 0, 5, 5, 2, 6, 0, 4,
- 7, 0, 0, 8, 0, 2, 0, 12,
- 0, 0, 2, 1, 6, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 14, 20, 1, 19, 2,
- 3, 0, 5, 6, 4, 0, 9, 10,
- 11, 12, 13, 8, 7, 0, 17, 18,
- 16, 15, 2, 1, 0, 0, 4, 3,
- 1, 0, 2, 1, 0, 0, 21, 22,
- 23, 7, 6, 5, 3, 0,
+const INT8 intra_run_aic[102] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 4, 5, 5, 5, 6, 6,
+ 7, 7, 8, 8, 9, 9, 10, 11,
+12, 13, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 1, 1, 1, 1,
+ 2, 2, 2, 3, 3, 3, 4, 4,
+ 5, 5, 6, 6, 7, 7, 8, 9,
+10, 11, 12, 13, 14, 15, 16, 17,
+18, 19, 20, 21, 22, 23,
+};
+
+const INT8 intra_level_aic[102] = {
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+17, 18, 19, 20, 21, 22, 23, 24,
+25, 1, 2, 3, 4, 5, 6, 7,
+ 1, 2, 3, 4, 1, 2, 3, 4,
+ 1, 2, 3, 1, 2, 3, 1, 2,
+ 1, 2, 1, 2, 1, 2, 1, 1,
+ 1, 1, 1, 2, 3, 4, 5, 6,
+ 7, 8, 9, 10, 1, 2, 3, 4,
+ 1, 2, 3, 1, 2, 3, 1, 2,
+ 1, 2, 1, 2, 1, 2, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1,
};
static RLTable rl_intra_aic = {
102,
58,
- inter_vlc,
- inter_run_aic,
- inter_level_aic,
+ intra_vlc_aic,
+ intra_run_aic,
+ intra_level_aic,
};
static const UINT16 h263_format[8][2] = {
@@ -174,4 +202,3 @@ static const UINT16 h263_format[8][2] = {
{ 704, 576 },
{ 1408, 1152 },
};
-
diff --git a/src/libffmpeg/libavcodec/h263dec.c b/src/libffmpeg/libavcodec/h263dec.c
index e909ac56e..3c90a1e47 100644
--- a/src/libffmpeg/libavcodec/h263dec.c
+++ b/src/libffmpeg/libavcodec/h263dec.c
@@ -1,53 +1,60 @@
/*
* H263 decoder
- * Copyright (c) 2001 Gerard Lantau.
+ * Copyright (c) 2001 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#include "config.h"
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include "dsputil.h"
#include "avcodec.h"
+#include "dsputil.h"
#include "mpegvideo.h"
-#include "xine-utils/xineutils.h"
//#define DEBUG
+//#define PRINT_FRAME_TIME
+#ifdef PRINT_FRAME_TIME
+static inline long long rdtsc()
+{
+ long long l;
+ asm volatile( "rdtsc\n\t"
+ : "=A" (l)
+ );
+// printf("%d\n", int(l/1000));
+ return l;
+}
+#endif
static int h263_decode_init(AVCodecContext *avctx)
{
MpegEncContext *s = avctx->priv_data;
- int i;
s->avctx = avctx;
s->out_format = FMT_H263;
s->width = avctx->width;
s->height = avctx->height;
+ s->workaround_bugs= avctx->workaround_bugs;
/* select sub codec */
switch(avctx->codec->id) {
case CODEC_ID_H263:
s->gob_number = 0;
- s->first_gob_line = 0;
+ s->first_slice_line = 0;
break;
case CODEC_ID_MPEG4:
s->time_increment_bits = 4; /* default value for broken headers */
s->h263_pred = 1;
- s->has_b_frames = 1;
+ s->has_b_frames = 1; //default, might be overriden in the vol header during header parsing
break;
case CODEC_ID_MSMPEG4V1:
s->h263_msmpeg4 = 1;
@@ -64,23 +71,25 @@ static int h263_decode_init(AVCodecContext *avctx)
s->h263_pred = 1;
s->msmpeg4_version=3;
break;
+ case CODEC_ID_WMV1:
+ s->h263_msmpeg4 = 1;
+ s->h263_pred = 1;
+ s->msmpeg4_version=4;
+ break;
case CODEC_ID_H263I:
s->h263_intel = 1;
break;
default:
return -1;
}
-
+ s->codec_id= avctx->codec->id;
+ avctx->mbskip_table= s->mbskip_table;
+
/* for h263, we allocate the images after having read the header */
if (avctx->codec->id != CODEC_ID_H263 && avctx->codec->id != CODEC_ID_MPEG4)
if (MPV_common_init(s) < 0)
return -1;
- /* XXX: suppress this matrix init, only needed because using mpeg1
- dequantize in mmx case */
- for(i=0;i<64;i++)
- s->non_intra_matrix[i] = default_non_intra_matrix[i];
-
if (s->h263_msmpeg4)
msmpeg4_decode_init_vlc(s);
else
@@ -104,25 +113,37 @@ static int h263_decode_frame(AVCodecContext *avctx,
MpegEncContext *s = avctx->priv_data;
int ret;
AVPicture *pict = data;
-
+#ifdef PRINT_FRAME_TIME
+uint64_t time= rdtsc();
+#endif
#ifdef DEBUG
printf("*****frame %d size=%d\n", avctx->frame_number, buf_size);
printf("bytes=%x %x %x %x\n", buf[0], buf[1], buf[2], buf[3]);
#endif
-
+
+ s->hurry_up= avctx->hurry_up;
+ s->error_resilience= avctx->error_resilience;
+ s->workaround_bugs= avctx->workaround_bugs;
+ s->flags= avctx->flags;
+
/* no supplementary picture */
if (buf_size == 0) {
*data_size = 0;
return 0;
}
- init_get_bits(&s->gb, buf, buf_size);
+ if(s->bitstream_buffer_size && buf_size<20){ //divx 5.01+ frame reorder
+ init_get_bits(&s->gb, s->bitstream_buffer, s->bitstream_buffer_size);
+ }else
+ init_get_bits(&s->gb, buf, buf_size);
+ s->bitstream_buffer_size=0;
/* let's go :-) */
if (s->h263_msmpeg4) {
ret = msmpeg4_decode_picture_header(s);
} else if (s->h263_pred) {
ret = mpeg4_decode_picture_header(s);
+ s->has_b_frames= !s->low_delay;
} else if (s->h263_intel) {
ret = intel_h263_decode_picture_header(s);
} else {
@@ -146,8 +167,21 @@ static int h263_decode_frame(AVCodecContext *avctx,
return -1;
}
+ if(ret==FRAME_SKIPED) return 0;
+ /* skip if the header was thrashed */
if (ret < 0)
return -1;
+ /* skip b frames if we dont have reference frames */
+ if(s->num_available_buffers<2 && s->pict_type==B_TYPE) return 0;
+ /* skip b frames if we are in a hurry */
+ if(s->hurry_up && s->pict_type==B_TYPE) return 0;
+
+ if(s->next_p_frame_damaged){
+ if(s->pict_type==B_TYPE)
+ return 0;
+ else
+ s->next_p_frame_damaged=0;
+ }
MPV_frame_start(s);
@@ -155,6 +189,12 @@ static int h263_decode_frame(AVCodecContext *avctx,
printf("qscale=%d\n", s->qscale);
#endif
+ /* init resync/ error resilience specific variables */
+ s->next_resync_qscale= s->qscale;
+ s->next_resync_gb= s->gb;
+ if(s->resync_marker) s->mb_num_left= 0;
+ else s->mb_num_left= s->mb_num;
+
/* decode each macroblock */
s->block_wrap[0]=
s->block_wrap[1]=
@@ -167,7 +207,13 @@ static int h263_decode_frame(AVCodecContext *avctx,
/* FIXME: In the future H.263+ will have intra prediction */
/* and we are gonna need another way to detect MPEG4 */
if (s->mb_y && !s->h263_pred) {
- s->first_gob_line = h263_decode_gob_header(s);
+ s->first_slice_line = h263_decode_gob_header(s);
+ }
+
+ if(s->msmpeg4_version==1){
+ s->last_dc[0]=
+ s->last_dc[1]=
+ s->last_dc[2]= 128;
}
s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1;
@@ -186,35 +232,95 @@ static int h263_decode_frame(AVCodecContext *avctx,
#ifdef DEBUG
printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
#endif
+
+ if(s->resync_marker){
+ if(s->mb_num_left<=0){
+ /* except the first block */
+ if(s->mb_x!=0 || s->mb_y!=0){
+ /* did we miss the next resync marker without noticing an error yet */
+ if(((get_bits_count(&s->gb)+8)&(~7)) != s->next_resync_pos && s->decoding_error==0){
+ fprintf(stderr, "slice end missmatch x:%d y:%d %d %d\n",
+ s->mb_x, s->mb_y, get_bits_count(&s->gb), s->next_resync_pos);
+ ff_conceal_past_errors(s, 1);
+ }
+ }
+ s->qscale= s->next_resync_qscale;
+ s->gb= s->next_resync_gb;
+ s->resync_mb_x= s->mb_x; //we know that the marker is here cuz mb_num_left was the distance to it
+ s->resync_mb_y= s->mb_y;
+ s->first_slice_line=1;
+
+ if(s->codec_id==CODEC_ID_MPEG4){
+ ff_mpeg4_clean_buffers(s);
+ ff_mpeg4_resync(s);
+ }
+ }
+
+ if( s->resync_mb_x==s->mb_x
+ && s->resync_mb_y==s->mb_y && s->decoding_error!=0){
+ fprintf(stderr, "resynced at %d %d\n", s->mb_x, s->mb_y);
+ s->decoding_error= 0;
+ }
+ }
+
//fprintf(stderr,"\nFrame: %d\tMB: %d",avctx->frame_number, (s->mb_y * s->mb_width) + s->mb_x);
/* DCT & quantize */
- if (s->h263_msmpeg4) {
- msmpeg4_dc_scale(s);
- } else if (s->h263_pred) {
- h263_dc_scale(s);
+ if (s->h263_pred && !(s->msmpeg4_version==1 || s->msmpeg4_version==2)) {
+ /* old ffmpeg encoded msmpeg4v3 workaround */
+ if(s->workaround_bugs==1 && s->msmpeg4_version==3)
+ ff_old_msmpeg4_dc_scale(s);
+ else
+ h263_dc_scale(s);
} else {
/* default quantization values */
s->y_dc_scale = 8;
s->c_dc_scale = 8;
}
- clear_blocks(s->block[0]);
+
+ if(s->decoding_error!=DECODING_DESYNC){
+ int last_error= s->decoding_error;
+ clear_blocks(s->block[0]);
- s->mv_dir = MV_DIR_FORWARD;
- s->mv_type = MV_TYPE_16X16;
- if (s->h263_msmpeg4) {
- if (msmpeg4_decode_mb(s, s->block) < 0) {
- fprintf(stderr,"\nError at MB: %d\n", (s->mb_y * s->mb_width) + s->mb_x);
- return -1;
- }
- } else {
- if (h263_decode_mb(s, s->block) < 0) {
- fprintf(stderr,"\nError at MB: %d\n", (s->mb_y * s->mb_width) + s->mb_x);
- return -1;
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mv_type = MV_TYPE_16X16;
+ if (s->h263_msmpeg4) {
+ if (msmpeg4_decode_mb(s, s->block) < 0) {
+ fprintf(stderr,"Error at MB: %d\n", (s->mb_y * s->mb_width) + s->mb_x);
+ s->decoding_error=DECODING_DESYNC;
+ }
+ } else {
+ if (h263_decode_mb(s, s->block) < 0) {
+ fprintf(stderr,"Error at MB: %d\n", (s->mb_y * s->mb_width) + s->mb_x);
+ s->decoding_error=DECODING_DESYNC;
+ }
+ }
+
+ if(s->decoding_error!=last_error){
+ ff_conceal_past_errors(s, 0);
}
}
+
+ /* conceal errors */
+ if( s->decoding_error==DECODING_DESYNC
+ || (s->decoding_error==DECODING_ACDC_LOST && s->mb_intra)){
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mv_type = MV_TYPE_16X16;
+ s->mb_skiped=0;
+ s->mb_intra=0;
+ s->mv[0][0][0]=0; //FIXME this is not optimal
+ s->mv[0][0][1]=0;
+ clear_blocks(s->block[0]);
+ }else if(s->decoding_error && !s->mb_intra){
+ clear_blocks(s->block[0]);
+ }
+ //FIXME remove AC for intra
+
MPV_decode_mb(s, s->block);
+
+ s->mb_num_left--;
}
- if (avctx->draw_horiz_band) {
+ if ( avctx->draw_horiz_band
+ && (s->num_available_buffers>=1 || (!s->has_b_frames)) ) {
UINT8 *src_ptr[3];
int y, h, offset;
y = s->mb_y * 16;
@@ -236,11 +342,84 @@ static int h263_decode_frame(AVCodecContext *avctx,
}
}
- if (s->h263_msmpeg4 && s->pict_type==I_TYPE)
+ if (s->h263_msmpeg4 && s->msmpeg4_version<4 && s->pict_type==I_TYPE)
if(msmpeg4_decode_ext_header(s, buf_size) < 0) return -1;
+
+ /* divx 5.01+ bistream reorder stuff */
+ if(s->codec_id==CODEC_ID_MPEG4 && s->bitstream_buffer_size==0){
+ int current_pos= get_bits_count(&s->gb)>>3;
+ if( buf_size - current_pos > 5
+ && buf_size - current_pos < BITSTREAM_BUFFER_SIZE){
+ int i;
+ int startcode_found=0;
+ for(i=current_pos; i<buf_size; i++){
+ if(buf[i]==0 && buf[i+1]==0 && buf[i+2]==1 && buf[i+3]==0xB6){
+ startcode_found=1;
+ break;
+ }
+ }
+ if(startcode_found){
+ memcpy(s->bitstream_buffer, buf + current_pos, buf_size - current_pos);
+ s->bitstream_buffer_size= buf_size - current_pos;
+ }
+ }
+ }
+
+ if(s->bitstream_buffer_size==0 && s->error_resilience>0){
+ int left= s->gb.size*8 - get_bits_count(&s->gb);
+ int max_extra=8;
+
+ if(s->codec_id==CODEC_ID_MPEG4) max_extra+=32;
+
+ if(left>max_extra){
+ fprintf(stderr, "discarding %d junk bits at end, next would be %X\n", left, show_bits(&s->gb, 24));
+ if(s->decoding_error==0)
+ ff_conceal_past_errors(s, 1);
+ }
+ if(left<0){
+ fprintf(stderr, "overreading %d bits\n", -left);
+ if(s->decoding_error==0)
+ ff_conceal_past_errors(s, 1);
+ }
+ }
+
MPV_frame_end(s);
-
+#if 0 //dirty show MVs, we should export the MV tables and write a filter to show them
+{
+ int mb_y;
+ s->has_b_frames=1;
+ for(mb_y=0; mb_y<s->mb_height; mb_y++){
+ int mb_x;
+ int y= mb_y*16 + 8;
+ for(mb_x=0; mb_x<s->mb_width; mb_x++){
+ int x= mb_x*16 + 8;
+ uint8_t *ptr= s->last_picture[0];
+ int xy= 1 + mb_x*2 + (mb_y*2 + 1)*(s->mb_width*2 + 2);
+ int mx= (s->motion_val[xy][0]>>1) + x;
+ int my= (s->motion_val[xy][1]>>1) + y;
+ int i;
+ int max;
+
+ if(mx<0) mx=0;
+ if(my<0) my=0;
+ if(mx>=s->width) mx= s->width -1;
+ if(my>=s->height) my= s->height-1;
+ max= ABS(mx-x);
+ if(ABS(my-y) > max) max= ABS(my-y);
+ /* the ugliest linedrawing routine ... */
+ for(i=0; i<max; i++){
+ int x1= x + (mx-x)*i/max;
+ int y1= y + (my-y)*i/max;
+ ptr[y1*s->linesize + x1]+=100;
+ }
+ ptr[y*s->linesize + x]+=100;
+ s->mbskip_table[mb_x + mb_y*s->mb_width]=0;
+ }
+ }
+
+}
+#endif
if(s->pict_type==B_TYPE || (!s->has_b_frames)){
pict->data[0] = s->current_picture[0];
pict->data[1] = s->current_picture[1];
@@ -260,7 +439,13 @@ static int h263_decode_frame(AVCodecContext *avctx,
/* we substract 1 because it is added on utils.c */
avctx->frame_number = s->picture_number - 1;
- *data_size = sizeof(AVPicture);
+ /* dont output the last pic after seeking
+ note we allready added +1 for the current pix in MPV_frame_end(s) */
+ if(s->num_available_buffers>=2 || (!s->has_b_frames))
+ *data_size = sizeof(AVPicture);
+#ifdef PRINT_FRAME_TIME
+printf("%Ld\n", rdtsc()-time);
+#endif
return buf_size;
}
@@ -324,6 +509,18 @@ AVCodec msmpeg4v3_decoder = {
CODEC_CAP_DRAW_HORIZ_BAND,
};
+AVCodec wmv1_decoder = {
+ "wmv1",
+ CODEC_TYPE_VIDEO,
+ CODEC_ID_WMV1,
+ sizeof(MpegEncContext),
+ h263_decode_init,
+ NULL,
+ h263_decode_end,
+ h263_decode_frame,
+ CODEC_CAP_DRAW_HORIZ_BAND,
+};
+
AVCodec h263i_decoder = {
"h263i",
CODEC_TYPE_VIDEO,
diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
index 2c71850ee..b8eaa5fbd 100644
--- a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
@@ -1,25 +1,24 @@
/*
* MMX optimized DSP utils
- * Copyright (c) 2000, 2001 Gerard Lantau.
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
*/
-#include "xine-utils/xineutils.h"
#include "../dsputil.h"
#include "../simple_idct.h"
@@ -45,38 +44,124 @@ int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
-
/* external functions, from idct_mmx.c */
void ff_mmx_idct(DCTELEM *block);
void ff_mmxext_idct(DCTELEM *block);
/* pixel operations */
-static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001LL;
-static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002LL;
-//static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 };
-//static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 };
+static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL;
+static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
+static const uint64_t mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002ULL;
#define JUMPALIGN() __asm __volatile (".balign 8"::)
#define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::)
+#define MOVQ_WONE(regd) \
+ __asm __volatile ( \
+ "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
+ "psrlw $15, %%" #regd ::)
+
+#define MOVQ_BFE(regd) \
+ __asm __volatile ( \
+ "pcmpeqd %%" #regd ", %%" #regd " \n\t"\
+ "paddb %%" #regd ", %%" #regd " \n\t" ::)
+
#ifndef PIC
-#define MOVQ_WONE(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wone))
+#define MOVQ_BONE(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_bone))
#define MOVQ_WTWO(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wtwo))
#else
// for shared library it's better to use this way for accessing constants
// pcmpeqd -> -1
-#define MOVQ_WONE(regd) \
+#define MOVQ_BONE(regd) \
__asm __volatile ( \
- "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
- "psrlw $15, %%" #regd ::)
+ "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
+ "psrlw $15, %%" #regd " \n\t" \
+ "packuswb %%" #regd ", %%" #regd " \n\t" ::)
#define MOVQ_WTWO(regd) \
__asm __volatile ( \
- "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
- "psrlw $15, %%" #regd " \n\t" \
- "psllw $1, %%" #regd ::)
+ "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
+ "psrlw $15, %%" #regd " \n\t" \
+ "psllw $1, %%" #regd " \n\t"::)
+
#endif
+// using regr as temporary and for the output result
+// first argument is unmodifed and second is trashed
+// regfe is supposed to contain 0xfefefefefefefefe
+#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
+ "movq " #rega ", " #regr " \n\t"\
+ "pand " #regb ", " #regr " \n\t"\
+ "pxor " #rega ", " #regb " \n\t"\
+ "pand " #regfe "," #regb " \n\t"\
+ "psrlq $1, " #regb " \n\t"\
+ "paddb " #regb ", " #regr " \n\t"
+
+#define PAVGB_MMX(rega, regb, regr, regfe) \
+ "movq " #rega ", " #regr " \n\t"\
+ "por " #regb ", " #regr " \n\t"\
+ "pxor " #rega ", " #regb " \n\t"\
+ "pand " #regfe "," #regb " \n\t"\
+ "psrlq $1, " #regb " \n\t"\
+ "psubb " #regb ", " #regr " \n\t"
+
+// mm6 is supposed to contain 0xfefefefefefefefe
+#define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \
+ "movq " #rega ", " #regr " \n\t"\
+ "movq " #regc ", " #regp " \n\t"\
+ "pand " #regb ", " #regr " \n\t"\
+ "pand " #regd ", " #regp " \n\t"\
+ "pxor " #rega ", " #regb " \n\t"\
+ "pxor " #regc ", " #regd " \n\t"\
+ "pand %%mm6, " #regb " \n\t"\
+ "pand %%mm6, " #regd " \n\t"\
+ "psrlq $1, " #regb " \n\t"\
+ "psrlq $1, " #regd " \n\t"\
+ "paddb " #regb ", " #regr " \n\t"\
+ "paddb " #regd ", " #regp " \n\t"
+
+#define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
+ "movq " #rega ", " #regr " \n\t"\
+ "movq " #regc ", " #regp " \n\t"\
+ "por " #regb ", " #regr " \n\t"\
+ "por " #regd ", " #regp " \n\t"\
+ "pxor " #rega ", " #regb " \n\t"\
+ "pxor " #regc ", " #regd " \n\t"\
+ "pand %%mm6, " #regb " \n\t"\
+ "pand %%mm6, " #regd " \n\t"\
+ "psrlq $1, " #regd " \n\t"\
+ "psrlq $1, " #regb " \n\t"\
+ "psubb " #regb ", " #regr " \n\t"\
+ "psubb " #regd ", " #regp " \n\t"
+
+/***********************************/
+/* MMX no rounding */
+#define DEF(x, y) x ## _no_rnd_ ## y ##_mmx
+#define SET_RND MOVQ_WONE
+#define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
+#define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e)
+
+#include "dsputil_mmx_rnd.h"
+
+#undef DEF
+#undef SET_RND
+#undef PAVGBP
+#undef PAVGB
+/***********************************/
+/* MMX rounding */
+
+#define DEF(x, y) x ## _ ## y ##_mmx
+#define SET_RND MOVQ_WTWO
+#define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f)
+#define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e)
+
+#include "dsputil_mmx_rnd.h"
+
+#undef DEF
+#undef SET_RND
+#undef PAVGBP
+#undef PAVGB
+
/***********************************/
/* 3Dnow specific */
@@ -92,7 +177,7 @@ static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x000
/***********************************/
/* MMX2 specific */
-#define DEF(x) x ## _sse
+#define DEF(x) x ## _mmx2
/* Introduced only in MMX2 set */
#define PAVGB "pavgb"
@@ -107,34 +192,59 @@ static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x000
static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size)
{
- DCTELEM *p;
- const UINT8 *pix;
- int i;
+ asm volatile(
+ "movl $-128, %%eax \n\t"
+ "pxor %%mm7, %%mm7 \n\t"
+ ".balign 16 \n\t"
+ "1: \n\t"
+ "movq (%0), %%mm0 \n\t"
+ "movq (%0, %2), %%mm2 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "movq %%mm2, %%mm3 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "punpckhbw %%mm7, %%mm3 \n\t"
+ "movq %%mm0, (%1, %%eax)\n\t"
+ "movq %%mm1, 8(%1, %%eax)\n\t"
+ "movq %%mm2, 16(%1, %%eax)\n\t"
+ "movq %%mm3, 24(%1, %%eax)\n\t"
+ "addl %3, %0 \n\t"
+ "addl $32, %%eax \n\t"
+ "js 1b \n\t"
+ : "+r" (pixels)
+ : "r" (block+64), "r" (line_size), "r" (line_size*2)
+ : "%eax"
+ );
+}
- /* read the pixels */
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- for(i=0;i<4;i++) {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm2, 8%0\n\t"
- "movq %%mm1, 16%0\n\t"
- "movq %%mm3, 24%0\n\t"
- :"=m"(*p)
- :"m"(*pix), "m"(*(pix+line_size))
- :"memory");
- pix += line_size*2;
- p += 16;
- }
+static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride)
+{
+ asm volatile(
+ "pxor %%mm7, %%mm7 \n\t"
+ "movl $-128, %%eax \n\t"
+ ".balign 16 \n\t"
+ "1: \n\t"
+ "movq (%0), %%mm0 \n\t"
+ "movq (%1), %%mm2 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "movq %%mm2, %%mm3 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "punpckhbw %%mm7, %%mm3 \n\t"
+ "psubw %%mm2, %%mm0 \n\t"
+ "psubw %%mm3, %%mm1 \n\t"
+ "movq %%mm0, (%2, %%eax)\n\t"
+ "movq %%mm1, 8(%2, %%eax)\n\t"
+ "addl %3, %0 \n\t"
+ "addl %3, %1 \n\t"
+ "addl $16, %%eax \n\t"
+ "jnz 1b \n\t"
+ : "+r" (s1), "+r" (s2)
+ : "r" (block+64), "r" (stride)
+ : "%eax"
+ );
}
static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
@@ -203,12 +313,12 @@ static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line
pix = pixels;
MOVQ_ZERO(mm7);
i = 4;
- while (i) {
+ do {
__asm __volatile(
- "movq %2, %%mm0\n\t"
- "movq 8%2, %%mm1\n\t"
- "movq 16%2, %%mm2\n\t"
- "movq 24%2, %%mm3\n\t"
+ "movq (%2), %%mm0\n\t"
+ "movq 8(%2), %%mm1\n\t"
+ "movq 16(%2), %%mm2\n\t"
+ "movq 24(%2), %%mm3\n\t"
"movq %0, %%mm4\n\t"
"movq %1, %%mm6\n\t"
"movq %%mm4, %%mm5\n\t"
@@ -226,809 +336,42 @@ static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line
"movq %%mm0, %0\n\t"
"movq %%mm2, %1\n\t"
:"+m"(*pix), "+m"(*(pix+line_size))
- :"m"(*p)
+ :"r"(p)
:"memory");
pix += line_size*2;
p += 16;
- i--;
- };
+ } while (--i);
}
static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
- int hh;
- UINT8 *p;
- const UINT8 *pix;
-
- p = block;
- pix = pixels; // 2s
-#if 0
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += line_size;
- } while (--h);
-#else
- // this optimized code is not very usefull
- // the above loop is definitely faster
- // at least on Celeron 500MHz
- hh = h & 3;
- while (hh) {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += line_size;
- hh--;
- }
- hh=h>>2;
- while (hh) {
- __asm __volatile(
- "movq (%1), %%mm0 \n\t"
- "movq (%1, %2), %%mm1 \n\t"
- "movq (%1, %2, 2), %%mm2 \n\t"
- "movq (%1, %3), %%mm3 \n\t"
- "movq %%mm0, (%0) \n\t"
- "movq %%mm1, (%0, %2) \n\t"
- "movq %%mm2, (%0, %2, 2) \n\t"
- "movq %%mm3, (%0, %3) \n\t"
- ::"r"(p), "r"(pix), "r"(line_size), "r"(line_size*3)
- :"memory");
- pix += line_size*4;
- p += line_size*4;
- hh--;
- }
-#endif
-}
-
-static void put_pixels_x2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- MOVQ_WONE(mm4);
- JUMPALIGN();
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq 1%1, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm4, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size; p += line_size;
- } while (--h);
-}
-
-static void put_pixels_y2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- MOVQ_WONE(mm4);
- JUMPALIGN();
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm4, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size;
- } while (--h);
-}
-
-static void put_pixels_xy2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels; // 1s
- MOVQ_ZERO(mm7);
- MOVQ_WTWO(mm6);
- JUMPALIGN();
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq 1%2, %%mm5\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "movq %%mm4, %%mm1\n\t"
- "movq %%mm5, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpcklbw %%mm7, %%mm5\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm5, %%mm4\n\t"
- "paddusw %%mm3, %%mm1\n\t"
- "paddusw %%mm6, %%mm4\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm1, %%mm2\n\t"
- "psrlw $2, %%mm0\n\t"
- "psrlw $2, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size;
- } while(--h);
-}
-
-static void put_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq 1%1, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += line_size;
- } while (--h);
-}
-
-static void put_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- JUMPALIGN();
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size;
- } while(--h);
-}
-
-static void put_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- MOVQ_WONE(mm6);
- JUMPALIGN();
- do {
__asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq 1%2, %%mm5\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "movq %%mm4, %%mm1\n\t"
- "movq %%mm5, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpcklbw %%mm7, %%mm5\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm5, %%mm4\n\t"
- "paddusw %%mm3, %%mm1\n\t"
- "paddusw %%mm6, %%mm4\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm1, %%mm2\n\t"
- "psrlw $2, %%mm0\n\t"
- "psrlw $2, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size;
- } while(--h);
-}
-
-static void avg_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- MOVQ_WONE(mm6);
- JUMPALIGN();
- do {
- __asm __volatile(
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "paddusw %%mm6, %%mm0\n\t"
- "paddusw %%mm6, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"+m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += line_size;
- }
- while (--h);
-}
-
-static void avg_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- MOVQ_WONE(mm6);
- JUMPALIGN();
- do {
- __asm __volatile(
- "movq %1, %%mm1\n\t"
- "movq %0, %%mm0\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "movq %%mm4, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpckhbw %%mm7, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "paddusw %%mm4, %%mm1\n\t"
- "paddusw %%mm5, %%mm3\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm6, %%mm3\n\t"
- "psrlw $1, %%mm1\n\t"
- "psrlw $1, %%mm3\n\t"
- "paddusw %%mm6, %%mm0\n\t"
- "paddusw %%mm6, %%mm2\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"+m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += line_size;
- } while (--h);
-}
-
-static void avg_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- MOVQ_WONE(mm6);
- JUMPALIGN();
- do {
- __asm __volatile(
- "movq %1, %%mm1\n\t"
- "movq %0, %%mm0\n\t"
- "movq %2, %%mm4\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "movq %%mm4, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpckhbw %%mm7, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "paddusw %%mm4, %%mm1\n\t"
- "paddusw %%mm5, %%mm3\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm6, %%mm3\n\t"
- "psrlw $1, %%mm1\n\t"
- "psrlw $1, %%mm3\n\t"
- "paddusw %%mm6, %%mm0\n\t"
- "paddusw %%mm6, %%mm2\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"+m"(*p)
- :"m"(*pix), "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size ;
- } while(--h);
-}
-
-static void avg_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- // this doesn't seem to be used offten - so
- // the inside usage of mm_wone is not optimized
- MOVQ_WTWO(mm6);
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq 1%2, %%mm5\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "movq %%mm4, %%mm1\n\t"
- "movq %%mm5, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpcklbw %%mm7, %%mm5\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm5, %%mm4\n\t"
- "paddusw %%mm3, %%mm1\n\t"
- "paddusw %%mm6, %%mm4\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm1, %%mm2\n\t"
- "movq %3, %%mm5\n\t"
- "psrlw $2, %%mm0\n\t"
- "movq %0, %%mm1\n\t"
- "psrlw $2, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "paddusw %%mm5, %%mm0\n\t"
- "paddusw %%mm5, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"+m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size)), "m"(mm_wone)
- :"memory");
- pix += line_size;
- p += line_size ;
- } while(--h);
-}
-
-static void avg_no_rnd_pixels_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %0, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"+m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += line_size ;
- } while (--h);
-}
-
-static void avg_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq 1%1, %%mm1\n\t"
- "movq %0, %%mm4\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "movq %%mm4, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpckhbw %%mm7, %%mm5\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm5, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"+m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += line_size;
- } while (--h);
-}
-
-static void avg_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq %0, %%mm4\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "movq %%mm4, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpckhbw %%mm7, %%mm5\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm5, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"+m"(*p)
- :"m"(*pix), "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size ;
- } while(--h);
-}
-
-static void avg_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h)
-{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- MOVQ_WONE(mm6);
- JUMPALIGN();
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq 1%2, %%mm5\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "movq %%mm4, %%mm1\n\t"
- "movq %%mm5, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpcklbw %%mm7, %%mm5\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm5, %%mm4\n\t"
- "paddusw %%mm3, %%mm1\n\t"
- "paddusw %%mm6, %%mm4\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm1, %%mm2\n\t"
- "movq %0, %%mm1\n\t"
- "psrlw $2, %%mm0\n\t"
- "movq %%mm1, %%mm3\n\t"
- "psrlw $2, %%mm2\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "psrlw $1, %%mm0\n\t"
- "psrlw $1, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"+m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size;
- } while(--h);
-}
-
-static void sub_pixels_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
-{
- DCTELEM *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- do {
- __asm __volatile(
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm2\n\t"
- "movq 8%0, %%mm1\n\t"
- "movq %%mm2, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "psubsw %%mm2, %%mm0\n\t"
- "psubsw %%mm3, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, 8%0\n\t"
- :"+m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += 8;
- } while (--h);
-}
-
-static void sub_pixels_x2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
-{
- DCTELEM *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- MOVQ_WONE(mm6);
- JUMPALIGN();
- do {
- __asm __volatile(
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm2\n\t"
- "movq 8%0, %%mm1\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq %%mm2, %%mm3\n\t"
- "movq %%mm4, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpckhbw %%mm7, %%mm5\n\t"
- "paddusw %%mm4, %%mm2\n\t"
- "paddusw %%mm5, %%mm3\n\t"
- "paddusw %%mm6, %%mm2\n\t"
- "paddusw %%mm6, %%mm3\n\t"
- "psrlw $1, %%mm2\n\t"
- "psrlw $1, %%mm3\n\t"
- "psubsw %%mm2, %%mm0\n\t"
- "psubsw %%mm3, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, 8%0\n\t"
- :"+m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += 8;
- } while (--h);
-}
-
-static void sub_pixels_y2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
-{
- DCTELEM *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- MOVQ_WONE(mm6);
- do {
- __asm __volatile(
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm2\n\t"
- "movq 8%0, %%mm1\n\t"
- "movq %2, %%mm4\n\t"
- "movq %%mm2, %%mm3\n\t"
- "movq %%mm4, %%mm5\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpckhbw %%mm7, %%mm5\n\t"
- "paddusw %%mm4, %%mm2\n\t"
- "paddusw %%mm5, %%mm3\n\t"
- "paddusw %%mm6, %%mm2\n\t"
- "paddusw %%mm6, %%mm3\n\t"
- "psrlw $1, %%mm2\n\t"
- "psrlw $1, %%mm3\n\t"
- "psubsw %%mm2, %%mm0\n\t"
- "psubsw %%mm3, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, 8%0\n\t"
- :"+m"(*p)
- :"m"(*pix), "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += 8;
- } while (--h);
-}
-
-static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
-{
- DCTELEM *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- MOVQ_WTWO(mm6);
- JUMPALIGN();
- do {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq 1%2, %%mm5\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "movq %%mm4, %%mm1\n\t"
- "movq %%mm5, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpcklbw %%mm7, %%mm5\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm5, %%mm4\n\t"
- "paddusw %%mm3, %%mm1\n\t"
- "paddusw %%mm6, %%mm4\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm1, %%mm2\n\t"
- "movq %0, %%mm1\n\t"
- "movq 8%0, %%mm3\n\t"
- "psrlw $2, %%mm0\n\t"
- "psrlw $2, %%mm2\n\t"
- "psubsw %%mm0, %%mm1\n\t"
- "psubsw %%mm2, %%mm3\n\t"
- "movq %%mm1, %0\n\t"
- "movq %%mm3, 8%0\n\t"
- :"+m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += 8 ;
- } while(--h);
+ "lea (%3, %3), %%eax \n\t"
+ ".balign 8 \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "addl %%eax, %1 \n\t"
+ "addl %%eax, %2 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "addl %%eax, %1 \n\t"
+ "addl %%eax, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ : "+g"(h), "+r" (pixels), "+r" (block)
+ : "r"(line_size)
+ : "%eax", "memory"
+ );
}
static void clear_blocks_mmx(DCTELEM *blocks)
{
- asm volatile(
+ __asm __volatile(
"pxor %%mm7, %%mm7 \n\t"
"movl $-128*6, %%eax \n\t"
"1: \n\t"
@@ -1043,7 +386,9 @@ static void clear_blocks_mmx(DCTELEM *blocks)
);
}
+#if 0
static void just_return() { return; }
+#endif
void dsputil_init_mmx(void)
{
@@ -1065,10 +410,11 @@ void dsputil_init_mmx(void)
if (mm_flags & MM_MMX) {
get_pixels = get_pixels_mmx;
+ diff_pixels = diff_pixels_mmx;
put_pixels_clamped = put_pixels_clamped_mmx;
add_pixels_clamped = add_pixels_clamped_mmx;
clear_blocks= clear_blocks_mmx;
-
+
pix_abs16x16 = pix_abs16x16_mmx;
pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
@@ -1088,7 +434,7 @@ void dsputil_init_mmx(void)
put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx;
put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx;
put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_mmx;
-
+
avg_pixels_tab[0] = avg_pixels_mmx;
avg_pixels_tab[1] = avg_pixels_x2_mmx;
avg_pixels_tab[2] = avg_pixels_y2_mmx;
@@ -1098,44 +444,37 @@ void dsputil_init_mmx(void)
avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_mmx;
avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_mmx;
avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_mmx;
-
- sub_pixels_tab[0] = sub_pixels_mmx;
- sub_pixels_tab[1] = sub_pixels_x2_mmx;
- sub_pixels_tab[2] = sub_pixels_y2_mmx;
- sub_pixels_tab[3] = sub_pixels_xy2_mmx;
if (mm_flags & MM_MMXEXT) {
pix_abs16x16 = pix_abs16x16_mmx2;
pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2;
-
+
pix_abs8x8 = pix_abs8x8_mmx2;
pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2;
-
- put_pixels_tab[1] = put_pixels_x2_sse;
- put_pixels_tab[2] = put_pixels_y2_sse;
-
- avg_pixels_tab[0] = avg_pixels_sse;
- avg_pixels_tab[1] = avg_pixels_x2_sse;
- avg_pixels_tab[2] = avg_pixels_y2_sse;
- avg_pixels_tab[3] = avg_pixels_xy2_sse;
-
- sub_pixels_tab[1] = sub_pixels_x2_sse;
- sub_pixels_tab[2] = sub_pixels_y2_sse;
+
+ put_pixels_tab[1] = put_pixels_x2_mmx2;
+ put_pixels_tab[2] = put_pixels_y2_mmx2;
+ put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx2;
+ put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx2;
+
+ avg_pixels_tab[0] = avg_pixels_mmx2;
+ avg_pixels_tab[1] = avg_pixels_x2_mmx2;
+ avg_pixels_tab[2] = avg_pixels_y2_mmx2;
+ avg_pixels_tab[3] = avg_pixels_xy2_mmx2;
} else if (mm_flags & MM_3DNOW) {
put_pixels_tab[1] = put_pixels_x2_3dnow;
put_pixels_tab[2] = put_pixels_y2_3dnow;
-
+ put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_3dnow;
+ put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_3dnow;
+
avg_pixels_tab[0] = avg_pixels_3dnow;
avg_pixels_tab[1] = avg_pixels_x2_3dnow;
avg_pixels_tab[2] = avg_pixels_y2_3dnow;
avg_pixels_tab[3] = avg_pixels_xy2_3dnow;
-
- sub_pixels_tab[1] = sub_pixels_x2_3dnow;
- sub_pixels_tab[2] = sub_pixels_y2_3dnow;
}
/* idct */
@@ -1181,12 +520,25 @@ void dsputil_init_mmx(void)
avg_no_rnd_pixels_tab[2] = just_return;
avg_no_rnd_pixels_tab[3] = just_return;
- sub_pixels_tab[0] = just_return;
- sub_pixels_tab[1] = just_return;
- sub_pixels_tab[2] = just_return;
- sub_pixels_tab[3] = just_return;
-
//av_fdct = just_return;
//ff_idct = just_return;
#endif
}
+
+/* remove any non bit exact operation (testing purpose). NOTE that
+ this function should be kept as small as possible because it is
+ always difficult to test automatically non bit exact cases. */
+void dsputil_set_bit_exact_mmx(void)
+{
+ if (mm_flags & MM_MMX) {
+ if (mm_flags & MM_MMXEXT) {
+ put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx;
+ put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx;
+ avg_pixels_tab[3] = avg_pixels_xy2_mmx;
+ } else if (mm_flags & MM_3DNOW) {
+ put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx;
+ put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx;
+ avg_pixels_tab[3] = avg_pixels_xy2_mmx;
+ }
+ }
+}
diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h b/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h
index 830fe9f3b..a16ccc88b 100644
--- a/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h
+++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h
@@ -1,342 +1,296 @@
/*
* DSP utils : average functions are compiled twice for 3dnow/mmx2
- * Copyright (c) 2000, 2001 Gerard Lantau.
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ * Copyright (c) 2002 Michael Niedermayer
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
+ * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
+ * and improved by Zdenek Kabelac <kabi@users.sf.net>
+ */
+
+/* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm
+ clobber bug - now it will work with 2.95.2 and also with -fPIC
*/
-
static void DEF(put_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
- int dh, hh;
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- hh=h>>2;
- dh=h&3;
- while(hh--) {
__asm __volatile(
- "movq (%1), %%mm0\n\t"
- "movq 1(%1), %%mm1\n\t"
- "movq (%1, %2), %%mm2\n\t"
- "movq 1(%1, %2), %%mm3\n\t"
- "movq (%1, %2, 2), %%mm4\n\t"
- "movq 1(%1, %2, 2), %%mm5\n\t"
- "movq (%1, %3), %%mm6\n\t"
- "movq 1(%1, %3), %%mm7\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- PAVGB" %%mm3, %%mm2\n\t"
- PAVGB" %%mm5, %%mm4\n\t"
- PAVGB" %%mm7, %%mm6\n\t"
- "movq %%mm0, (%0)\n\t"
- "movq %%mm2, (%0, %2)\n\t"
- "movq %%mm4, (%0, %2, 2)\n\t"
- "movq %%mm6, (%0, %3)\n\t"
- ::"r"(p), "r"(pix), "r" (line_size), "r" (line_size*3)
- :"memory");
- pix += line_size*4; p += line_size*4;
- }
- while(dh--) {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq 1%1, %%mm1\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size; p += line_size;
- }
+ "lea (%3, %3), %%eax \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ PAVGB" 1(%1), %%mm0 \n\t"
+ PAVGB" 1(%1, %3), %%mm1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "addl %%eax, %1 \n\t"
+ "addl %%eax, %2 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ PAVGB" 1(%1), %%mm0 \n\t"
+ PAVGB" 1(%1, %3), %%mm1 \n\t"
+ "addl %%eax, %1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "addl %%eax, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r" (line_size)
+ :"%eax", "memory");
}
-
-static void DEF(put_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+
+/* GL: this function does incorrect rounding if overflow */
+static void DEF(put_no_rnd_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
- int dh, hh;
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
-
- hh=h>>1;
- dh=h&1;
- while(hh--) {
- __asm __volatile(
- "movq %2, %%mm0\n\t"
- "movq %3, %%mm1\n\t"
- "movq %4, %%mm2\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- PAVGB" %%mm2, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, %1\n\t"
- :"=m"(*p), "=m"(*(p+line_size))
- :"m"(*pix), "m"(*(pix+line_size)),
- "m"(*(pix+line_size*2))
- :"memory");
- pix += line_size*2;
- p += line_size*2;
- }
- if(dh) {
+ MOVQ_BONE(mm6);
__asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"=m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- }
+ "lea (%3, %3), %%eax \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm2 \n\t"
+ "movq 1(%1), %%mm1 \n\t"
+ "movq 1(%1, %3), %%mm3 \n\t"
+ "addl %%eax, %1 \n\t"
+ "psubusb %%mm6, %%mm0 \n\t"
+ "psubusb %%mm6, %%mm2 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ PAVGB" %%mm3, %%mm2 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm2, (%2, %3) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 1(%1), %%mm1 \n\t"
+ "movq (%1, %3), %%mm2 \n\t"
+ "movq 1(%1, %3), %%mm3 \n\t"
+ "addl %%eax, %2 \n\t"
+ "addl %%eax, %1 \n\t"
+ "psubusb %%mm6, %%mm0 \n\t"
+ "psubusb %%mm6, %%mm2 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ PAVGB" %%mm3, %%mm2 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm2, (%2, %3) \n\t"
+ "addl %%eax, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r" (line_size)
+ :"%eax", "memory");
}
-static void DEF(avg_pixels)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+static void DEF(put_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
- int dh, hh;
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- hh=h>>2;
- dh=h&3;
- while(hh--) {
__asm __volatile(
- "movq (%0), %%mm0\n\t"
- "movq (%1), %%mm1\n\t"
- "movq (%0, %2), %%mm2\n\t"
- "movq (%1, %2), %%mm3\n\t"
- "movq (%0, %2, 2), %%mm4\n\t"
- "movq (%1, %2, 2), %%mm5\n\t"
- "movq (%0, %3), %%mm6\n\t"
- "movq (%1, %3), %%mm7\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- PAVGB" %%mm3, %%mm2\n\t"
- PAVGB" %%mm5, %%mm4\n\t"
- PAVGB" %%mm7, %%mm6\n\t"
- "movq %%mm0, (%0)\n\t"
- "movq %%mm2, (%0, %2)\n\t"
- "movq %%mm4, (%0, %2, 2)\n\t"
- "movq %%mm6, (%0, %3)\n\t"
- ::"r"(p), "r"(pix), "r" (line_size), "r" (line_size*3)
- :"memory");
- pix += line_size*4; p += line_size*4;
- }
- while(dh--) {
- __asm __volatile(
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm1\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"+m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size; p += line_size;
- }
+ "lea (%3, %3), %%eax \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "subl %3, %2 \n\t"
+ "1: \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%eax), %%mm2 \n\t"
+ "addl %%eax, %1 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ PAVGB" %%mm2, %%mm1 \n\t"
+ "movq %%mm0, (%2, %3) \n\t"
+ "movq %%mm1, (%2, %%eax) \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%eax), %%mm0 \n\t"
+ "addl %%eax, %2 \n\t"
+ "addl %%eax, %1 \n\t"
+ PAVGB" %%mm1, %%mm2 \n\t"
+ PAVGB" %%mm0, %%mm1 \n\t"
+ "movq %%mm2, (%2, %3) \n\t"
+ "movq %%mm1, (%2, %%eax) \n\t"
+ "addl %%eax, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D" (block)
+ :"r" (line_size)
+ :"%eax", "memory");
}
-static void DEF(avg_pixels_x2)( UINT8 *block, const UINT8 *pixels, int line_size, int h)
+/* GL: this function does incorrect rounding if overflow */
+static void DEF(put_no_rnd_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
- int dh, hh;
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- hh=h>>1;
- dh=h&1;
- while(hh--) {
- __asm __volatile(
- "movq %2, %%mm2\n\t"
- "movq 1%2, %%mm3\n\t"
- "movq %3, %%mm4\n\t"
- "movq 1%3, %%mm5\n\t"
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm1\n\t"
- PAVGB" %%mm3, %%mm2\n\t"
- PAVGB" %%mm2, %%mm0\n\t"
- PAVGB" %%mm5, %%mm4\n\t"
- PAVGB" %%mm4, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, %1\n\t"
- :"+m"(*p), "+m"(*(p+line_size))
- :"m"(*pix), "m"(*(pix+line_size))
- :"memory");
- pix += line_size*2;
- p += line_size*2;
- }
- if(dh) {
+ MOVQ_BONE(mm6);
__asm __volatile(
- "movq %1, %%mm1\n\t"
- "movq 1%1, %%mm2\n\t"
- "movq %0, %%mm0\n\t"
- PAVGB" %%mm2, %%mm1\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"+m"(*p)
- :"m"(*pix)
- :"memory");
- }
+ "lea (%3, %3), %%eax \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "subl %3, %2 \n\t"
+ "1: \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%eax), %%mm2 \n\t"
+ "addl %%eax, %1 \n\t"
+ "psubusb %%mm6, %%mm1 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ PAVGB" %%mm2, %%mm1 \n\t"
+ "movq %%mm0, (%2, %3) \n\t"
+ "movq %%mm1, (%2, %%eax) \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%eax), %%mm0 \n\t"
+ "addl %%eax, %2 \n\t"
+ "addl %%eax, %1 \n\t"
+ "psubusb %%mm6, %%mm1 \n\t"
+ PAVGB" %%mm1, %%mm2 \n\t"
+ PAVGB" %%mm0, %%mm1 \n\t"
+ "movq %%mm2, (%2, %3) \n\t"
+ "movq %%mm1, (%2, %%eax) \n\t"
+ "addl %%eax, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D" (block)
+ :"r" (line_size)
+ :"%eax", "memory");
}
-static void DEF(avg_pixels_y2)( UINT8 *block, const UINT8 *pixels, int line_size, int h)
+static void DEF(avg_pixels)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
- int dh, hh;
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- hh=h>>1;
- dh=h&1;
- while(hh--) {
- __asm __volatile(
- "movq %2, %%mm2\n\t"
- "movq %3, %%mm3\n\t"
- "movq %3, %%mm4\n\t"
- "movq %4, %%mm5\n\t"
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm1\n\t"
- PAVGB" %%mm3, %%mm2\n\t"
- PAVGB" %%mm2, %%mm0\n\t"
- PAVGB" %%mm5, %%mm4\n\t"
- PAVGB" %%mm4, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, %1\n\t"
- :"+m"(*p), "+m"(*(p+line_size))
- :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2))
- :"memory");
- pix += line_size*2;
- p += line_size*2;
- }
- if(dh) {
__asm __volatile(
- "movq %1, %%mm1\n\t"
- "movq %2, %%mm2\n\t"
- "movq %0, %%mm0\n\t"
- PAVGB" %%mm2, %%mm1\n\t"
- PAVGB" %%mm1, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"+m"(*p)
- :"m"(*pix), "m"(*(pix+line_size))
- :"memory");
- }
+ "lea (%3, %3), %%eax \n\t"
+ "1: \n\t"
+ "movq (%2), %%mm0 \n\t"
+ "movq (%2, %3), %%mm1 \n\t"
+ PAVGB" (%1), %%mm0 \n\t"
+ PAVGB" (%1, %3), %%mm1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "addl %%eax, %1 \n\t"
+ "addl %%eax, %2 \n\t"
+ "movq (%2), %%mm0 \n\t"
+ "movq (%2, %3), %%mm1 \n\t"
+ PAVGB" (%1), %%mm0 \n\t"
+ PAVGB" (%1, %3), %%mm1 \n\t"
+ "addl %%eax, %1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "addl %%eax, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r" (line_size)
+ :"%eax", "memory");
}
-static void DEF(avg_pixels_xy2)( UINT8 *block, const UINT8 *pixels, int line_size, int h)
+static void DEF(avg_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
- UINT8 *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7\n\t"
- "movq %0, %%mm6\n\t"
- ::"m"(mm_wtwo));
- do {
__asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq 1%1, %%mm4\n\t"
- "movq 1%2, %%mm5\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm1, %%mm0\n\t"
- "paddusw %%mm3, %%mm2\n\t"
- "movq %%mm4, %%mm1\n\t"
- "movq %%mm5, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm4\n\t"
- "punpcklbw %%mm7, %%mm5\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "paddusw %%mm5, %%mm4\n\t"
- "paddusw %%mm3, %%mm1\n\t"
- "paddusw %%mm6, %%mm4\n\t"
- "paddusw %%mm6, %%mm1\n\t"
- "paddusw %%mm4, %%mm0\n\t"
- "paddusw %%mm1, %%mm2\n\t"
- "psrlw $2, %%mm0\n\t"
- "psrlw $2, %%mm2\n\t"
- "packuswb %%mm2, %%mm0\n\t"
- PAVGB" %0, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
- :"+m"(*p)
- :"m"(*pix),
- "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += line_size ;
- } while(--h);
+ "lea (%3, %3), %%eax \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm2 \n\t"
+ PAVGB" 1(%1), %%mm0 \n\t"
+ PAVGB" 1(%1, %3), %%mm2 \n\t"
+ PAVGB" (%2), %%mm0 \n\t"
+ PAVGB" (%2, %3), %%mm2 \n\t"
+ "addl %%eax, %1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm2, (%2, %3) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm2 \n\t"
+ PAVGB" 1(%1), %%mm0 \n\t"
+ PAVGB" 1(%1, %3), %%mm2 \n\t"
+ "addl %%eax, %2 \n\t"
+ "addl %%eax, %1 \n\t"
+ PAVGB" (%2), %%mm0 \n\t"
+ PAVGB" (%2, %3), %%mm2 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm2, (%2, %3) \n\t"
+ "addl %%eax, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r" (line_size)
+ :"%eax", "memory");
}
-static void DEF(sub_pixels_x2)( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
+static void DEF(avg_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
- DCTELEM *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7":);
- do {
__asm __volatile(
- "movq 1%1, %%mm2\n\t"
- "movq %0, %%mm0\n\t"
- PAVGB" %1, %%mm2\n\t"
- "movq 8%0, %%mm1\n\t"
- "movq %%mm2, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "psubsw %%mm2, %%mm0\n\t"
- "psubsw %%mm3, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, 8%0\n\t"
- :"+m"(*p)
- :"m"(*pix)
- :"memory");
- pix += line_size;
- p += 8;
- } while (--h);
+ "lea (%3, %3), %%eax \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "subl %3, %2 \n\t"
+ "1: \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%eax), %%mm2 \n\t"
+ "addl %%eax, %1 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ PAVGB" %%mm2, %%mm1 \n\t"
+ "movq (%2, %3), %%mm3 \n\t"
+ "movq (%2, %%eax), %%mm4 \n\t"
+ PAVGB" %%mm3, %%mm0 \n\t"
+ PAVGB" %%mm4, %%mm1 \n\t"
+ "movq %%mm0, (%2, %3) \n\t"
+ "movq %%mm1, (%2, %%eax) \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%eax), %%mm0 \n\t"
+ PAVGB" %%mm1, %%mm2 \n\t"
+ PAVGB" %%mm0, %%mm1 \n\t"
+ "addl %%eax, %2 \n\t"
+ "addl %%eax, %1 \n\t"
+ "movq (%2, %3), %%mm3 \n\t"
+ "movq (%2, %%eax), %%mm4 \n\t"
+ PAVGB" %%mm3, %%mm2 \n\t"
+ PAVGB" %%mm4, %%mm1 \n\t"
+ "movq %%mm2, (%2, %3) \n\t"
+ "movq %%mm1, (%2, %%eax) \n\t"
+ "addl %%eax, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r" (line_size)
+ :"%eax", "memory");
}
-static void DEF(sub_pixels_y2)( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
+// Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter
+static void DEF(avg_pixels_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
- DCTELEM *p;
- const UINT8 *pix;
- p = block;
- pix = pixels;
- __asm __volatile(
- "pxor %%mm7, %%mm7":);
- do {
+ MOVQ_BONE(mm6);
__asm __volatile(
- "movq %2, %%mm2\n\t"
- "movq %0, %%mm0\n\t"
- PAVGB" %1, %%mm2\n\t"
- "movq 8%0, %%mm1\n\t"
- "movq %%mm2, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "psubsw %%mm2, %%mm0\n\t"
- "psubsw %%mm3, %%mm1\n\t"
- "movq %%mm0, %0\n\t"
- "movq %%mm1, 8%0\n\t"
- :"+m"(*p)
- :"m"(*pix), "m"(*(pix+line_size))
- :"memory");
- pix += line_size;
- p += 8;
- } while (--h);
+ "lea (%3, %3), %%eax \n\t"
+ "movq (%1), %%mm0 \n\t"
+ PAVGB" 1(%1), %%mm0 \n\t"
+ ".balign 8 \n\t"
+ "1: \n\t"
+ "movq (%1, %%eax), %%mm2 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "psubusb %%mm6, %%mm2 \n\t"
+ PAVGB" 1(%1, %3), %%mm1 \n\t"
+ PAVGB" 1(%1, %%eax), %%mm2 \n\t"
+ "addl %%eax, %1 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ PAVGB" %%mm2, %%mm1 \n\t"
+ PAVGB" (%2), %%mm0 \n\t"
+ PAVGB" (%2, %3), %%mm1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%eax), %%mm0 \n\t"
+ PAVGB" 1(%1, %3), %%mm1 \n\t"
+ PAVGB" 1(%1, %%eax), %%mm0 \n\t"
+ "addl %%eax, %2 \n\t"
+ "addl %%eax, %1 \n\t"
+ PAVGB" %%mm1, %%mm2 \n\t"
+ PAVGB" %%mm0, %%mm1 \n\t"
+ PAVGB" (%2), %%mm2 \n\t"
+ PAVGB" (%2, %3), %%mm1 \n\t"
+ "movq %%mm2, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "addl %%eax, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r" (line_size)
+ :"%eax", "memory");
}
-
diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h b/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h
new file mode 100644
index 000000000..dc70c9c8e
--- /dev/null
+++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h
@@ -0,0 +1,305 @@
+/*
+ * DSP utils mmx functions are compiled twice for rnd/no_rnd
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
+ * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
+ * and improved by Zdenek Kabelac <kabi@users.sf.net>
+ */
+
+// put_pixels
+static void DEF(put, pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ MOVQ_BFE(mm6);
+ __asm __volatile(
+ "lea (%3, %3), %%eax \n\t"
+ ".balign 8 \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 1(%1), %%mm1 \n\t"
+ "movq (%1, %3), %%mm2 \n\t"
+ "movq 1(%1, %3), %%mm3 \n\t"
+ PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%2) \n\t"
+ "movq %%mm5, (%2, %3) \n\t"
+ "addl %%eax, %1 \n\t"
+ "addl %%eax, %2 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 1(%1), %%mm1 \n\t"
+ "movq (%1, %3), %%mm2 \n\t"
+ "movq 1(%1, %3), %%mm3 \n\t"
+ PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%2) \n\t"
+ "movq %%mm5, (%2, %3) \n\t"
+ "addl %%eax, %1 \n\t"
+ "addl %%eax, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r"(line_size)
+ :"eax", "memory");
+}
+
+static void DEF(put, pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ MOVQ_BFE(mm6);
+ __asm __volatile(
+ "lea (%3, %3), %%eax \n\t"
+ "movq (%1), %%mm0 \n\t"
+ ".balign 8 \n\t"
+ "1: \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%eax),%%mm2 \n\t"
+ PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
+ "movq %%mm4, (%2) \n\t"
+ "movq %%mm5, (%2, %3) \n\t"
+ "addl %%eax, %1 \n\t"
+ "addl %%eax, %2 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%eax),%%mm0 \n\t"
+ PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
+ "movq %%mm4, (%2) \n\t"
+ "movq %%mm5, (%2, %3) \n\t"
+ "addl %%eax, %1 \n\t"
+ "addl %%eax, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r"(line_size)
+ :"eax", "memory");
+}
+
+static void DEF(put, pixels_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ MOVQ_ZERO(mm7);
+ SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
+ __asm __volatile(
+ "movq (%1), %%mm0 \n\t"
+ "movq 1(%1), %%mm4 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "movq %%mm4, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "punpckhbw %%mm7, %%mm5 \n\t"
+ "paddusw %%mm0, %%mm4 \n\t"
+ "paddusw %%mm1, %%mm5 \n\t"
+ "xorl %%eax, %%eax \n\t"
+ "addl %3, %1 \n\t"
+ ".balign 8 \n\t"
+ "1: \n\t"
+ "movq (%1, %%eax), %%mm0 \n\t"
+ "movq 1(%1, %%eax), %%mm2 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "movq %%mm2, %%mm3 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "punpckhbw %%mm7, %%mm3 \n\t"
+ "paddusw %%mm2, %%mm0 \n\t"
+ "paddusw %%mm3, %%mm1 \n\t"
+ "paddusw %%mm6, %%mm4 \n\t"
+ "paddusw %%mm6, %%mm5 \n\t"
+ "paddusw %%mm0, %%mm4 \n\t"
+ "paddusw %%mm1, %%mm5 \n\t"
+ "psrlw $2, %%mm4 \n\t"
+ "psrlw $2, %%mm5 \n\t"
+ "packuswb %%mm5, %%mm4 \n\t"
+ "movq %%mm4, (%2, %%eax) \n\t"
+ "addl %3, %%eax \n\t"
+
+ "movq (%1, %%eax), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
+ "movq 1(%1, %%eax), %%mm4 \n\t"
+ "movq %%mm2, %%mm3 \n\t"
+ "movq %%mm4, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+ "punpckhbw %%mm7, %%mm3 \n\t"
+ "punpckhbw %%mm7, %%mm5 \n\t"
+ "paddusw %%mm2, %%mm4 \n\t"
+ "paddusw %%mm3, %%mm5 \n\t"
+ "paddusw %%mm6, %%mm0 \n\t"
+ "paddusw %%mm6, %%mm1 \n\t"
+ "paddusw %%mm4, %%mm0 \n\t"
+ "paddusw %%mm5, %%mm1 \n\t"
+ "psrlw $2, %%mm0 \n\t"
+ "psrlw $2, %%mm1 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "movq %%mm0, (%2, %%eax) \n\t"
+ "addl %3, %%eax \n\t"
+
+ "subl $2, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels)
+ :"D"(block), "r"(line_size)
+ :"eax", "memory");
+}
+
+// avg_pixels
+// in case more speed is needed - unroling would certainly help
+static void DEF(avg, pixels)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ MOVQ_BFE(mm6);
+ JUMPALIGN();
+ do {
+ __asm __volatile(
+ "movq %0, %%mm0 \n\t"
+ "movq %1, %%mm1 \n\t"
+ PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+ "movq %%mm2, %0 \n\t"
+ :"+m"(*block)
+ :"m"(*pixels)
+ :"memory");
+ pixels += line_size;
+ block += line_size;
+ }
+ while (--h);
+}
+
+static void DEF(avg, pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ MOVQ_BFE(mm6);
+ JUMPALIGN();
+ do {
+ __asm __volatile(
+ "movq %1, %%mm0 \n\t"
+ "movq 1%1, %%mm1 \n\t"
+ "movq %0, %%mm3 \n\t"
+ PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+ PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+ "movq %%mm0, %0 \n\t"
+ :"+m"(*block)
+ :"m"(*pixels)
+ :"memory");
+ pixels += line_size;
+ block += line_size;
+ } while (--h);
+}
+
+static void DEF(avg, pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ MOVQ_BFE(mm6);
+ __asm __volatile(
+ "lea (%3, %3), %%eax \n\t"
+ "movq (%1), %%mm0 \n\t"
+ ".balign 8 \n\t"
+ "1: \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%eax), %%mm2 \n\t"
+ PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
+ "movq (%2), %%mm3 \n\t"
+ PAVGB(%%mm3, %%mm4, %%mm0, %%mm6)
+ "movq (%2, %3), %%mm3 \n\t"
+ PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "addl %%eax, %1 \n\t"
+ "addl %%eax, %2 \n\t"
+
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%eax), %%mm0 \n\t"
+ PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
+ "movq (%2), %%mm3 \n\t"
+ PAVGB(%%mm3, %%mm4, %%mm0, %%mm6)
+ "movq (%2, %3), %%mm3 \n\t"
+ PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "addl %%eax, %1 \n\t"
+ "addl %%eax, %2 \n\t"
+
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r"(line_size)
+ :"eax", "memory");
+}
+
+// this routine is 'slightly' suboptimal but mostly unused
+static void DEF(avg, pixels_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ MOVQ_ZERO(mm7);
+ SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
+ __asm __volatile(
+ "movq (%1), %%mm0 \n\t"
+ "movq 1(%1), %%mm4 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "movq %%mm4, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "punpckhbw %%mm7, %%mm5 \n\t"
+ "paddusw %%mm0, %%mm4 \n\t"
+ "paddusw %%mm1, %%mm5 \n\t"
+ "xorl %%eax, %%eax \n\t"
+ "addl %3, %1 \n\t"
+ ".balign 8 \n\t"
+ "1: \n\t"
+ "movq (%1, %%eax), %%mm0 \n\t"
+ "movq 1(%1, %%eax), %%mm2 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "movq %%mm2, %%mm3 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "punpckhbw %%mm7, %%mm3 \n\t"
+ "paddusw %%mm2, %%mm0 \n\t"
+ "paddusw %%mm3, %%mm1 \n\t"
+ "paddusw %%mm6, %%mm4 \n\t"
+ "paddusw %%mm6, %%mm5 \n\t"
+ "paddusw %%mm0, %%mm4 \n\t"
+ "paddusw %%mm1, %%mm5 \n\t"
+ "psrlw $2, %%mm4 \n\t"
+ "psrlw $2, %%mm5 \n\t"
+ "movq (%2, %%eax), %%mm3 \n\t"
+ "packuswb %%mm5, %%mm4 \n\t"
+ "pcmpeqd %%mm2, %%mm2 \n\t"
+ "paddb %%mm2, %%mm2 \n\t"
+ PAVGB(%%mm3, %%mm4, %%mm5, %%mm2)
+ "movq %%mm5, (%2, %%eax) \n\t"
+ "addl %3, %%eax \n\t"
+
+ "movq (%1, %%eax), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
+ "movq 1(%1, %%eax), %%mm4 \n\t"
+ "movq %%mm2, %%mm3 \n\t"
+ "movq %%mm4, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+ "punpckhbw %%mm7, %%mm3 \n\t"
+ "punpckhbw %%mm7, %%mm5 \n\t"
+ "paddusw %%mm2, %%mm4 \n\t"
+ "paddusw %%mm3, %%mm5 \n\t"
+ "paddusw %%mm6, %%mm0 \n\t"
+ "paddusw %%mm6, %%mm1 \n\t"
+ "paddusw %%mm4, %%mm0 \n\t"
+ "paddusw %%mm5, %%mm1 \n\t"
+ "psrlw $2, %%mm0 \n\t"
+ "psrlw $2, %%mm1 \n\t"
+ "movq (%2, %%eax), %%mm3 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "pcmpeqd %%mm2, %%mm2 \n\t"
+ "paddb %%mm2, %%mm2 \n\t"
+ PAVGB(%%mm3, %%mm0, %%mm1, %%mm2)
+ "movq %%mm1, (%2, %%eax) \n\t"
+ "addl %3, %%eax \n\t"
+
+ "subl $2, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels)
+ :"D"(block), "r"(line_size)
+ :"eax", "memory");
+}
diff --git a/src/libffmpeg/libavcodec/i386/fdct_mmx.c b/src/libffmpeg/libavcodec/i386/fdct_mmx.c
index e9d48383d..7135beb21 100644
--- a/src/libffmpeg/libavcodec/i386/fdct_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/fdct_mmx.c
@@ -1,6 +1,6 @@
/*
* MMX optimized forward DCT
- * The gcc porting is Copyright (c) 2001 Gerard Lantau.
+ * The gcc porting is Copyright (c) 2001 Fabrice Bellard.
*
* from fdctam32.c - AP922 MMX(3D-Now) forward-DCT
*
@@ -10,7 +10,7 @@
#include "../common.h"
#include "mmx.h"
-//#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))
+#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))
//////////////////////////////////////////////////////////////////////
//
diff --git a/src/libffmpeg/libavcodec/i386/idct_mmx.c b/src/libffmpeg/libavcodec/i386/idct_mmx.c
index 618c1cfde..298c8a8b0 100644
--- a/src/libffmpeg/libavcodec/i386/idct_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/idct_mmx.c
@@ -528,8 +528,12 @@ static inline void idct_col (int16_t * col, int offset)
movq_r2m (mm3, *(col+offset+4*8)); // save y4
movq_r2m (mm4, *(col+offset+3*8)); // save y3
-}
+#undef T1
+#undef T2
+#undef T3
+#undef C4
+}
static int32_t rounder0[] ATTR_ALIGN(8) =
rounder ((1 << (COL_SHIFT - 1)) - 0.5);
@@ -547,6 +551,8 @@ static int32_t rounder3[] ATTR_ALIGN(8) =
static int32_t rounder5[] ATTR_ALIGN(8) =
rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
+#undef COL_SHIFT
+#undef ROW_SHIFT
#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
void idct (int16_t * block) \
diff --git a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
index e704c4219..9b76cdb07 100644
--- a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
@@ -1,20 +1,20 @@
/*
* MMX optimized motion estimation
- * Copyright (c) 2001 Gerard Lantau.
+ * Copyright (c) 2001 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* mostly by Michael Niedermayer <michaelni@gmx.at>
*/
@@ -26,6 +26,8 @@ static const __attribute__ ((aligned(8))) UINT64 round_tab[3]={
0x0002000200020002,
};
+static __attribute__ ((aligned(8))) uint64_t bone= 0x0101010101010101LL;
+
static inline void sad8_mmx(UINT8 *blk1, UINT8 *blk2, int stride, int h)
{
int len= -(stride<<h);
@@ -115,6 +117,7 @@ static inline void sad8_4_mmx2(UINT8 *blk1, UINT8 *blk2, int stride, int h)
int len= -(stride<<h);
asm volatile(
".balign 16 \n\t"
+ "movq "MANGLE(bone)", %%mm5 \n\t"
"1: \n\t"
"movq (%1, %%eax), %%mm0 \n\t"
"movq (%2, %%eax), %%mm2 \n\t"
@@ -122,6 +125,7 @@ static inline void sad8_4_mmx2(UINT8 *blk1, UINT8 *blk2, int stride, int h)
"movq 1(%2, %%eax), %%mm3 \n\t"
"pavgb %%mm2, %%mm0 \n\t"
"pavgb %%mm1, %%mm3 \n\t"
+ "psubusb %%mm5, %%mm3 \n\t"
"pavgb %%mm3, %%mm0 \n\t"
"movq (%3, %%eax), %%mm2 \n\t"
"psadbw %%mm2, %%mm0 \n\t"
@@ -132,6 +136,7 @@ static inline void sad8_4_mmx2(UINT8 *blk1, UINT8 *blk2, int stride, int h)
"movq 1(%2, %%eax), %%mm4 \n\t"
"pavgb %%mm3, %%mm1 \n\t"
"pavgb %%mm4, %%mm2 \n\t"
+ "psubusb %%mm5, %%mm2 \n\t"
"pavgb %%mm1, %%mm2 \n\t"
"movq (%3, %%eax), %%mm1 \n\t"
"psadbw %%mm1, %%mm2 \n\t"
diff --git a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c
index b7a782f56..390aa554c 100644
--- a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c
@@ -1,34 +1,30 @@
/*
* The simplest mpeg encoder (well, it was the simplest!)
- * Copyright (c) 2000,2001 Gerard Lantau.
+ * Copyright (c) 2000,2001 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Optimized for ia32 cpus by Nick Kurshev <nickols_k@mail.ru>
- * h263 dequantizer by Michael Niedermayer <michaelni@gmx.at>
+ * h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
*/
-#include "xine-utils/xineutils.h"
#include "../dsputil.h"
#include "../mpegvideo.h"
#include "../avcodec.h"
-#include "../mangle.h"
extern UINT8 zigzag_end[64];
-extern void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w);
-extern int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale);
extern UINT8 zigzag_direct_noperm[64];
extern UINT16 inv_zigzag_direct16[64];
@@ -195,103 +191,86 @@ asm volatile(
static void dct_unquantize_mpeg1_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
- int i, level, nCoeffs;
+ int nCoeffs;
const UINT16 *quant_matrix;
if(s->alternate_scan) nCoeffs= 64;
else nCoeffs= nCoeffs= zigzag_end[ s->block_last_index[n] ];
if (s->mb_intra) {
+ int block0;
if (n < 4)
- block[0] = block[0] * s->y_dc_scale;
+ block0 = block[0] * s->y_dc_scale;
else
- block[0] = block[0] * s->c_dc_scale;
- /* isnt used anymore (we have a h263 unquantizer since some time)
- if (s->out_format == FMT_H263) {
- i = 1;
- goto unquant_even;
- }*/
+ block0 = block[0] * s->c_dc_scale;
/* XXX: only mpeg1 */
quant_matrix = s->intra_matrix;
- i=1;
- /* Align on 4 elements boundary */
- while(i&3)
- {
- level = block[i];
- if (level) {
- if (level < 0) level = -level;
- level = (int)(level * qscale * quant_matrix[i]) >> 3;
- level = (level - 1) | 1;
- if (block[i] < 0) level = -level;
- block[i] = level;
- }
- i++;
- }
- __asm __volatile(
- "movd %0, %%mm6\n\t" /* mm6 = qscale | 0 */
- "punpckldq %%mm6, %%mm6\n\t" /* mm6 = qscale | qscale */
- "movq %2, %%mm4\n\t"
- "movq %%mm6, %%mm7\n\t"
- "movq %1, %%mm5\n\t"
- "packssdw %%mm6, %%mm7\n\t" /* mm7 = qscale | qscale | qscale | qscale */
- "pxor %%mm6, %%mm6\n\t"
- ::"g"(qscale),"m"(mm_wone),"m"(mm_wabs):"memory");
- for(;i<nCoeffs;i+=4) {
- __asm __volatile(
- "movq %1, %%mm0\n\t"
- "movq %%mm7, %%mm1\n\t"
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm0, %%mm3\n\t"
- "pcmpgtw %%mm6, %%mm2\n\t"
- "pmullw %2, %%mm1\n\t"
- "pandn %%mm4, %%mm2\n\t"
- "por %%mm5, %%mm2\n\t"
- "pmullw %%mm2, %%mm0\n\t" /* mm0 = abs(block[i]). */
-
- "pcmpeqw %%mm6, %%mm3\n\t"
- "pmullw %%mm0, %%mm1\n\t"
- "psraw $3, %%mm1\n\t"
- "psubw %%mm5, %%mm1\n\t" /* block[i] --; */
- "pandn %%mm4, %%mm3\n\t" /* fake of pcmpneqw : mm0 != 0 then mm1 = -1 */
- "por %%mm5, %%mm1\n\t" /* block[i] |= 1 */
- "pmullw %%mm2, %%mm1\n\t" /* change signs again */
-
- "pand %%mm3, %%mm1\n\t" /* nullify if was zero */
- "movq %%mm1, %0"
- :"=m"(block[i])
- :"m"(block[i]), "m"(quant_matrix[i])
- :"memory");
- }
- } else {
- i = 0;
-// unquant_even:
- quant_matrix = s->non_intra_matrix;
- /* Align on 4 elements boundary */
- while(i&7)
- {
- level = block[i];
- if (level) {
- if (level < 0) level = -level;
- level = (((level << 1) + 1) * qscale *
- ((int) quant_matrix[i])) >> 4;
- level = (level - 1) | 1;
- if(block[i] < 0) level = -level;
- block[i] = level;
- }
- i++;
- }
asm volatile(
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $15, %%mm7 \n\t"
"movd %2, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
+ "movl %3, %%eax \n\t"
".balign 16\n\t"
"1: \n\t"
- "movq (%0, %3), %%mm0 \n\t"
- "movq 8(%0, %3), %%mm1 \n\t"
- "movq (%1, %3), %%mm4 \n\t"
- "movq 8(%1, %3), %%mm5 \n\t"
+ "movq (%0, %%eax), %%mm0 \n\t"
+ "movq 8(%0, %%eax), %%mm1 \n\t"
+ "movq (%1, %%eax), %%mm4 \n\t"
+ "movq 8(%1, %%eax), %%mm5 \n\t"
+ "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
+ "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
+ "pxor %%mm2, %%mm2 \n\t"
+ "pxor %%mm3, %%mm3 \n\t"
+ "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
+ "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
+ "pxor %%mm2, %%mm0 \n\t"
+ "pxor %%mm3, %%mm1 \n\t"
+ "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
+ "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
+ "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
+ "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
+ "pxor %%mm4, %%mm4 \n\t"
+ "pxor %%mm5, %%mm5 \n\t" // FIXME slow
+ "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+ "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0
+ "psraw $3, %%mm0 \n\t"
+ "psraw $3, %%mm1 \n\t"
+ "psubw %%mm7, %%mm0 \n\t"
+ "psubw %%mm7, %%mm1 \n\t"
+ "por %%mm7, %%mm0 \n\t"
+ "por %%mm7, %%mm1 \n\t"
+ "pxor %%mm2, %%mm0 \n\t"
+ "pxor %%mm3, %%mm1 \n\t"
+ "psubw %%mm2, %%mm0 \n\t"
+ "psubw %%mm3, %%mm1 \n\t"
+ "pandn %%mm0, %%mm4 \n\t"
+ "pandn %%mm1, %%mm5 \n\t"
+ "movq %%mm4, (%0, %%eax) \n\t"
+ "movq %%mm5, 8(%0, %%eax) \n\t"
+
+ "addl $16, %%eax \n\t"
+ "js 1b \n\t"
+ ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
+ : "%eax", "memory"
+ );
+ block[0]= block0;
+
+ } else {
+ quant_matrix = s->inter_matrix;
+asm volatile(
+ "pcmpeqw %%mm7, %%mm7 \n\t"
+ "psrlw $15, %%mm7 \n\t"
+ "movd %2, %%mm6 \n\t"
+ "packssdw %%mm6, %%mm6 \n\t"
+ "packssdw %%mm6, %%mm6 \n\t"
+ "movl %3, %%eax \n\t"
+ ".balign 16\n\t"
+ "1: \n\t"
+ "movq (%0, %%eax), %%mm0 \n\t"
+ "movq 8(%0, %%eax), %%mm1 \n\t"
+ "movq (%1, %%eax), %%mm4 \n\t"
+ "movq 8(%1, %%eax), %%mm5 \n\t"
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2 \n\t"
@@ -310,8 +289,8 @@ asm volatile(
"pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
- "pcmpeqw (%0, %3), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
- "pcmpeqw 8(%0, %3), %%mm5 \n\t" // block[i] == 0 ? -1 : 0
+ "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+ "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0
"psraw $4, %%mm0 \n\t"
"psraw $4, %%mm1 \n\t"
"psubw %%mm7, %%mm0 \n\t"
@@ -324,13 +303,145 @@ asm volatile(
"psubw %%mm3, %%mm1 \n\t"
"pandn %%mm0, %%mm4 \n\t"
"pandn %%mm1, %%mm5 \n\t"
- "movq %%mm4, (%0, %3) \n\t"
- "movq %%mm5, 8(%0, %3) \n\t"
+ "movq %%mm4, (%0, %%eax) \n\t"
+ "movq %%mm5, 8(%0, %%eax) \n\t"
- "addl $16, %3 \n\t"
+ "addl $16, %%eax \n\t"
"js 1b \n\t"
- ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (2*(i-nCoeffs))
- : "memory"
+ ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
+ : "%eax", "memory"
+ );
+ }
+}
+
+static void dct_unquantize_mpeg2_mmx(MpegEncContext *s,
+ DCTELEM *block, int n, int qscale)
+{
+ int nCoeffs;
+ const UINT16 *quant_matrix;
+
+ if(s->alternate_scan) nCoeffs= 64;
+ else nCoeffs= nCoeffs= zigzag_end[ s->block_last_index[n] ];
+
+ if (s->mb_intra) {
+ int block0;
+ if (n < 4)
+ block0 = block[0] * s->y_dc_scale;
+ else
+ block0 = block[0] * s->c_dc_scale;
+ quant_matrix = s->intra_matrix;
+asm volatile(
+ "pcmpeqw %%mm7, %%mm7 \n\t"
+ "psrlw $15, %%mm7 \n\t"
+ "movd %2, %%mm6 \n\t"
+ "packssdw %%mm6, %%mm6 \n\t"
+ "packssdw %%mm6, %%mm6 \n\t"
+ "movl %3, %%eax \n\t"
+ ".balign 16\n\t"
+ "1: \n\t"
+ "movq (%0, %%eax), %%mm0 \n\t"
+ "movq 8(%0, %%eax), %%mm1 \n\t"
+ "movq (%1, %%eax), %%mm4 \n\t"
+ "movq 8(%1, %%eax), %%mm5 \n\t"
+ "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
+ "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
+ "pxor %%mm2, %%mm2 \n\t"
+ "pxor %%mm3, %%mm3 \n\t"
+ "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
+ "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
+ "pxor %%mm2, %%mm0 \n\t"
+ "pxor %%mm3, %%mm1 \n\t"
+ "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
+ "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
+ "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
+ "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
+ "pxor %%mm4, %%mm4 \n\t"
+ "pxor %%mm5, %%mm5 \n\t" // FIXME slow
+ "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+ "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0
+ "psraw $3, %%mm0 \n\t"
+ "psraw $3, %%mm1 \n\t"
+ "pxor %%mm2, %%mm0 \n\t"
+ "pxor %%mm3, %%mm1 \n\t"
+ "psubw %%mm2, %%mm0 \n\t"
+ "psubw %%mm3, %%mm1 \n\t"
+ "pandn %%mm0, %%mm4 \n\t"
+ "pandn %%mm1, %%mm5 \n\t"
+ "movq %%mm4, (%0, %%eax) \n\t"
+ "movq %%mm5, 8(%0, %%eax) \n\t"
+
+ "addl $16, %%eax \n\t"
+ "js 1b \n\t"
+ ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
+ : "%eax", "memory"
+ );
+ block[0]= block0;
+ //Note, we dont do mismatch control for intra as errors cannot accumulate
+
+ } else {
+ quant_matrix = s->inter_matrix;
+asm volatile(
+ "pcmpeqw %%mm7, %%mm7 \n\t"
+ "psrlq $48, %%mm7 \n\t"
+ "movd %2, %%mm6 \n\t"
+ "packssdw %%mm6, %%mm6 \n\t"
+ "packssdw %%mm6, %%mm6 \n\t"
+ "movl %3, %%eax \n\t"
+ ".balign 16\n\t"
+ "1: \n\t"
+ "movq (%0, %%eax), %%mm0 \n\t"
+ "movq 8(%0, %%eax), %%mm1 \n\t"
+ "movq (%1, %%eax), %%mm4 \n\t"
+ "movq 8(%1, %%eax), %%mm5 \n\t"
+ "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
+ "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
+ "pxor %%mm2, %%mm2 \n\t"
+ "pxor %%mm3, %%mm3 \n\t"
+ "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
+ "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
+ "pxor %%mm2, %%mm0 \n\t"
+ "pxor %%mm3, %%mm1 \n\t"
+ "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
+ "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
+ "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
+ "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
+ "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q
+ "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q
+ "paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
+ "paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
+ "pxor %%mm4, %%mm4 \n\t"
+ "pxor %%mm5, %%mm5 \n\t" // FIXME slow
+ "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+ "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0
+ "psrlw $4, %%mm0 \n\t"
+ "psrlw $4, %%mm1 \n\t"
+ "pxor %%mm2, %%mm0 \n\t"
+ "pxor %%mm3, %%mm1 \n\t"
+ "psubw %%mm2, %%mm0 \n\t"
+ "psubw %%mm3, %%mm1 \n\t"
+ "pandn %%mm0, %%mm4 \n\t"
+ "pandn %%mm1, %%mm5 \n\t"
+ "pxor %%mm4, %%mm7 \n\t"
+ "pxor %%mm5, %%mm7 \n\t"
+ "movq %%mm4, (%0, %%eax) \n\t"
+ "movq %%mm5, 8(%0, %%eax) \n\t"
+
+ "addl $16, %%eax \n\t"
+ "js 1b \n\t"
+ "movd 124(%0, %3), %%mm0 \n\t"
+ "movq %%mm7, %%mm6 \n\t"
+ "psrlq $32, %%mm7 \n\t"
+ "pxor %%mm6, %%mm7 \n\t"
+ "movq %%mm7, %%mm6 \n\t"
+ "psrlq $16, %%mm7 \n\t"
+ "pxor %%mm6, %%mm7 \n\t"
+ "pslld $31, %%mm7 \n\t"
+ "psrlq $15, %%mm7 \n\t"
+ "pxor %%mm7, %%mm0 \n\t"
+ "movd %%mm0, 124(%0, %3) \n\t"
+
+ ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
+ : "%eax", "memory"
);
}
}
@@ -441,18 +552,16 @@ void unused_var_warning_killer(){
void MPV_common_init_mmx(MpegEncContext *s)
{
if (mm_flags & MM_MMX) {
- if (s->out_format == FMT_H263)
- s->dct_unquantize = dct_unquantize_h263_mmx;
- else
- s->dct_unquantize = dct_unquantize_mpeg1_mmx;
-
- draw_edges = draw_edges_mmx;
-
- if(mm_flags & MM_MMXEXT){
- dct_quantize= dct_quantize_MMX2;
- }else{
- dct_quantize= dct_quantize_MMX;
- }
+ s->dct_unquantize_h263 = dct_unquantize_h263_mmx;
+ s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_mmx;
+ s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_mmx;
+
+ draw_edges = draw_edges_mmx;
+
+ if(mm_flags & MM_MMXEXT){
+ dct_quantize= dct_quantize_MMX2;
+ } else {
+ dct_quantize= dct_quantize_MMX;
+ }
}
}
-
diff --git a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c
index 2b3322915..aed537a23 100644
--- a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c
+++ b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c
@@ -1,21 +1,22 @@
/*
- Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-*/
-
+ * MPEG video MMX templates
+ *
+ * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
#undef SPREADW
#undef PMAXW
#ifdef HAVE_MMX2
@@ -33,149 +34,165 @@
static int RENAME(dct_quantize)(MpegEncContext *s,
DCTELEM *block, int n,
- int qscale)
+ int qscale, int *overflow)
{
- int i, level, last_non_zero_p1, q;
- const UINT16 *qmat;
+ int level=0, last_non_zero_p1, q; //=0 is cuz gcc says uninitalized ...
+ const UINT16 *qmat, *bias;
static __align8 INT16 temp_block[64];
- int minLevel, maxLevel;
-
- if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){
- /* mpeg4 */
- minLevel= -2048;
- maxLevel= 2047;
- }else if(s->out_format==FMT_MPEG1){
- /* mpeg1 */
- minLevel= -255;
- maxLevel= 255;
- }else if(s->out_format==FMT_MJPEG){
- /* (m)jpeg */
- minLevel= -1023;
- maxLevel= 1023;
- }else{
- /* h263 / msmpeg4 */
- minLevel= -128;
- maxLevel= 127;
- }
av_fdct (block);
-
+
if (s->mb_intra) {
int dummy;
if (n < 4)
q = s->y_dc_scale;
else
q = s->c_dc_scale;
-
/* note: block[0] is assumed to be positive */
+ if (!s->h263_aic) {
#if 1
- asm volatile (
- "xorl %%edx, %%edx \n\t"
- "mul %%ecx \n\t"
- : "=d" (temp_block[0]), "=a"(dummy)
- : "a" (block[0] + (q >> 1)), "c" (inverse[q])
- );
+ asm volatile (
+ "xorl %%edx, %%edx \n\t"
+ "mul %%ecx \n\t"
+ : "=d" (level), "=a"(dummy)
+ : "a" (block[0] + (q >> 1)), "c" (inverse[q])
+ );
#else
- asm volatile (
- "xorl %%edx, %%edx \n\t"
- "divw %%cx \n\t"
- "movzwl %%ax, %%eax \n\t"
- : "=a" (temp_block[0])
- : "a" (block[0] + (q >> 1)), "c" (q)
- : "%edx"
- );
+ asm volatile (
+ "xorl %%edx, %%edx \n\t"
+ "divw %%cx \n\t"
+ "movzwl %%ax, %%eax \n\t"
+ : "=a" (level)
+ : "a" (block[0] + (q >> 1)), "c" (q)
+ : "%edx"
+ );
#endif
+ } else
+ /* For AIC we skip quant/dequant of INTRADC */
+ level = block[0];
+
+ block[0]=0; //avoid fake overflow
// temp_block[0] = (block[0] + (q >> 1)) / q;
- i = 1;
last_non_zero_p1 = 1;
- if (s->out_format == FMT_H263) {
- qmat = s->q_non_intra_matrix16;
- } else {
- qmat = s->q_intra_matrix16;
- }
- for(i=1;i<4;i++) {
- level = block[i] * qmat[i];
- level = level / (1 << (QMAT_SHIFT_MMX - 3));
- /* XXX: currently, this code is not optimal. the range should be:
- mpeg1: -255..255
- mpeg2: -2048..2047
- h263: -128..127
- mpeg4: -2048..2047
- */
- if (level > maxLevel)
- level = maxLevel;
- else if (level < minLevel)
- level = minLevel;
- temp_block[i] = level;
-
- if(level)
- if(last_non_zero_p1 < inv_zigzag_direct16[i]) last_non_zero_p1= inv_zigzag_direct16[i];
- block[i]=0;
- }
+ bias = s->q_intra_matrix16_bias[qscale];
+ qmat = s->q_intra_matrix16[qscale];
} else {
- i = 0;
last_non_zero_p1 = 0;
- qmat = s->q_non_intra_matrix16;
+ bias = s->q_inter_matrix16_bias[qscale];
+ qmat = s->q_inter_matrix16[qscale];
}
- asm volatile( /* XXX: small rounding bug, but it shouldnt matter */
- "movd %3, %%mm3 \n\t"
- SPREADW(%%mm3)
- "movd %4, %%mm4 \n\t"
- SPREADW(%%mm4)
-#ifndef HAVE_MMX2
- "movd %5, %%mm5 \n\t"
- SPREADW(%%mm5)
-#endif
- "pxor %%mm7, %%mm7 \n\t"
- "movd %%eax, %%mm2 \n\t"
- SPREADW(%%mm2)
- "movl %6, %%eax \n\t"
- ".balign 16 \n\t"
- "1: \n\t"
- "movq (%1, %%eax), %%mm0 \n\t"
- "movq (%2, %%eax), %%mm1 \n\t"
- "movq %%mm0, %%mm6 \n\t"
- "psraw $15, %%mm6 \n\t"
- "pmulhw %%mm0, %%mm1 \n\t"
- "psubsw %%mm6, %%mm1 \n\t"
-#ifdef HAVE_MMX2
- "pminsw %%mm3, %%mm1 \n\t"
- "pmaxsw %%mm4, %%mm1 \n\t"
-#else
- "paddsw %%mm3, %%mm1 \n\t"
- "psubusw %%mm4, %%mm1 \n\t"
- "paddsw %%mm5, %%mm1 \n\t"
-#endif
- "movq %%mm1, (%8, %%eax) \n\t"
- "pcmpeqw %%mm7, %%mm1 \n\t"
- "movq (%7, %%eax), %%mm0 \n\t"
- "movq %%mm7, (%1, %%eax) \n\t"
- "pandn %%mm0, %%mm1 \n\t"
- PMAXW(%%mm1, %%mm2)
- "addl $8, %%eax \n\t"
- " js 1b \n\t"
- "movq %%mm2, %%mm0 \n\t"
- "psrlq $32, %%mm2 \n\t"
- PMAXW(%%mm0, %%mm2)
- "movq %%mm2, %%mm0 \n\t"
- "psrlq $16, %%mm2 \n\t"
- PMAXW(%%mm0, %%mm2)
- "movd %%mm2, %%eax \n\t"
- "movzbl %%al, %%eax \n\t"
- : "+a" (last_non_zero_p1)
- : "r" (block+64), "r" (qmat+64),
-#ifdef HAVE_MMX2
- "m" (maxLevel), "m" (minLevel), "m" (minLevel /* dummy */), "g" (2*i - 128),
-#else
- "m" (0x7FFF - maxLevel), "m" (0x7FFF -maxLevel + minLevel), "m" (minLevel), "g" (2*i - 128),
-#endif
- "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
- );
+ if(s->out_format == FMT_H263){
+
+ asm volatile(
+ "movd %%eax, %%mm3 \n\t" // last_non_zero_p1
+ SPREADW(%%mm3)
+ "pxor %%mm7, %%mm7 \n\t" // 0
+ "pxor %%mm4, %%mm4 \n\t" // 0
+ "movq (%2), %%mm5 \n\t" // qmat[0]
+ "pxor %%mm6, %%mm6 \n\t"
+ "psubw (%3), %%mm6 \n\t" // -bias[0]
+ "movl $-128, %%eax \n\t"
+ ".balign 16 \n\t"
+ "1: \n\t"
+ "pxor %%mm1, %%mm1 \n\t" // 0
+ "movq (%1, %%eax), %%mm0 \n\t" // block[i]
+ "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
+ "pxor %%mm1, %%mm0 \n\t"
+ "psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
+ "psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
+ "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
+ "por %%mm0, %%mm4 \n\t"
+ "pxor %%mm1, %%mm0 \n\t"
+ "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
+ "movq %%mm0, (%5, %%eax) \n\t"
+ "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
+ "movq (%4, %%eax), %%mm1 \n\t"
+ "movq %%mm7, (%1, %%eax) \n\t" // 0
+ "pandn %%mm1, %%mm0 \n\t"
+ PMAXW(%%mm0, %%mm3)
+ "addl $8, %%eax \n\t"
+ " js 1b \n\t"
+ "movq %%mm3, %%mm0 \n\t"
+ "psrlq $32, %%mm3 \n\t"
+ PMAXW(%%mm0, %%mm3)
+ "movq %%mm3, %%mm0 \n\t"
+ "psrlq $16, %%mm3 \n\t"
+ PMAXW(%%mm0, %%mm3)
+ "movd %%mm3, %%eax \n\t"
+ "movzbl %%al, %%eax \n\t" // last_non_zero_p1
+ : "+a" (last_non_zero_p1)
+ : "r" (block+64), "r" (qmat), "r" (bias),
+ "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
+ );
+ // note the asm is split cuz gcc doesnt like that many operands ...
+ asm volatile(
+ "movd %1, %%mm1 \n\t" // max_qcoeff
+ SPREADW(%%mm1)
+ "psubusw %%mm1, %%mm4 \n\t"
+ "packuswb %%mm4, %%mm4 \n\t"
+ "movd %%mm4, %0 \n\t" // *overflow
+ : "=g" (*overflow)
+ : "g" (s->max_qcoeff)
+ );
+ }else{ // FMT_H263
+ asm volatile(
+ "movd %%eax, %%mm3 \n\t" // last_non_zero_p1
+ SPREADW(%%mm3)
+ "pxor %%mm7, %%mm7 \n\t" // 0
+ "pxor %%mm4, %%mm4 \n\t" // 0
+ "movl $-128, %%eax \n\t"
+ ".balign 16 \n\t"
+ "1: \n\t"
+ "pxor %%mm1, %%mm1 \n\t" // 0
+ "movq (%1, %%eax), %%mm0 \n\t" // block[i]
+ "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
+ "pxor %%mm1, %%mm0 \n\t"
+ "psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
+ "movq (%3, %%eax), %%mm6 \n\t" // bias[0]
+ "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
+ "movq (%2, %%eax), %%mm5 \n\t" // qmat[i]
+ "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
+ "por %%mm0, %%mm4 \n\t"
+ "pxor %%mm1, %%mm0 \n\t"
+ "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
+ "movq %%mm0, (%5, %%eax) \n\t"
+ "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
+ "movq (%4, %%eax), %%mm1 \n\t"
+ "movq %%mm7, (%1, %%eax) \n\t" // 0
+ "pandn %%mm1, %%mm0 \n\t"
+ PMAXW(%%mm0, %%mm3)
+ "addl $8, %%eax \n\t"
+ " js 1b \n\t"
+ "movq %%mm3, %%mm0 \n\t"
+ "psrlq $32, %%mm3 \n\t"
+ PMAXW(%%mm0, %%mm3)
+ "movq %%mm3, %%mm0 \n\t"
+ "psrlq $16, %%mm3 \n\t"
+ PMAXW(%%mm0, %%mm3)
+ "movd %%mm3, %%eax \n\t"
+ "movzbl %%al, %%eax \n\t" // last_non_zero_p1
+ : "+a" (last_non_zero_p1)
+ : "r" (block+64), "r" (qmat+64), "r" (bias+64),
+ "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
+ );
+ // note the asm is split cuz gcc doesnt like that many operands ...
+ asm volatile(
+ "movd %1, %%mm1 \n\t" // max_qcoeff
+ SPREADW(%%mm1)
+ "psubusw %%mm1, %%mm4 \n\t"
+ "packuswb %%mm4, %%mm4 \n\t"
+ "movd %%mm4, %0 \n\t" // *overflow
+ : "=g" (*overflow)
+ : "g" (s->max_qcoeff)
+ );
+ }
+
+ if(s->mb_intra) temp_block[0]= level; //FIXME move afer permute
// last_non_zero_p1=64;
/* permute for IDCT */
asm volatile(
- "movl %0, %%eax \n\t"
+ "movl %0, %%eax \n\t"
"pushl %%ebp \n\t"
"movl %%esp, " MANGLE(esp_temp) "\n\t"
"1: \n\t"
@@ -203,5 +220,6 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
}
*/
//block_permute(block);
+
return last_non_zero_p1 - 1;
}
diff --git a/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c b/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c
index 297f23724..4f19cc20a 100644
--- a/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c
@@ -1,29 +1,43 @@
/*
- Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at)
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-*/
-
-#include <inttypes.h>
+ * Simple IDCT MMX
+ *
+ * Copyright (c) 2001, 2002 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
#include "../dsputil.h"
+/*
+23170.475006
+22725.260826
+21406.727617
+19265.545870
+16384.000000
+12872.826198
+8866.956905
+4520.335430
+*/
#define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#if 0
#define C4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#else
+#define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
+#endif
#define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
@@ -31,8 +45,8 @@
#define ROW_SHIFT 11
#define COL_SHIFT 20 // 6
-static uint64_t __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000ULL;
-static uint64_t __attribute__((aligned(8))) d40000= 0x0000000000040000ULL;
+static const uint64_t __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000ULL;
+static const uint64_t __attribute__((aligned(8))) d40000= 0x0000000000040000ULL;
static int16_t __attribute__((aligned(8))) temp[64];
static int16_t __attribute__((aligned(8))) coeffs[]= {
1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
@@ -43,27 +57,31 @@ static int16_t __attribute__((aligned(8))) coeffs[]= {
// 0, 0, 0, 0,
// 0, 0, 0, 0,
- C4, C2, C4, C2,
- C4, C6, C4, C6,
- C1, C3, C1, C3,
- C5, C7, C5, C7,
+ C4, C4, C4, C4,
+ C4, -C4, C4, -C4,
+
+ C2, C6, C2, C6,
+ C6, -C2, C6, -C2,
+
+ C1, C3, C1, C3,
+ C5, C7, C5, C7,
+
+ C3, -C7, C3, -C7,
+-C1, -C5, -C1, -C5,
+
+ C5, -C1, C5, -C1,
+ C7, C3, C7, C3,
+
+ C7, -C5, C7, -C5,
+ C3, -C1, C3, -C1
+};
- C4, C6, C4, C6,
- -C4, -C2, -C4, -C2,
- C3, -C7, C3, -C7,
- -C1, -C5, -C1, -C5,
-
- C4, -C6, C4, -C6,
- -C4, C2, -C4, C2,
- C5, -C1, C5, -C1,
- C7, C3, C7, C3,
-
- C4, -C2, C4, -C2,
- C4, -C6, C4, -C6,
- C7, -C5, C7, -C5,
- C3, -C1, C3, -C1
- };
#if 0
+static void unused_var_killer(){
+ int a= wm1010 + d40000;
+ temp[0]=a;
+}
+
static void inline idctCol (int16_t * col, int16_t *input)
{
#undef C0
@@ -79,7 +97,7 @@ static void inline idctCol (int16_t * col, int16_t *input)
const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C4 = 16384; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
const int C6 = 8867; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
const int C7 = 4520; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
@@ -128,7 +146,7 @@ static void inline idctRow (int16_t * output, int16_t * input)
const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C4 = 16384; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
const int C6 = 8867; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
const int C7 = 4520; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
@@ -188,110 +206,160 @@ row[7] = input[13];
static inline void idct(int16_t *block)
{
- int i;
-//for(i=0; i<64; i++) temp[i]= block[ block_permute_op(i) ];
-//for(i=0; i<64; i++) temp[block_permute_op(i)]= block[ i ];
-//for(i=0; i<64; i++) block[i]= temp[i];
-//block_permute(block);
-/*
-idctRow(temp, block);
-idctRow(temp+16, block+16);
-idctRow(temp+1, block+2);
-idctRow(temp+17, block+18);
-idctRow(temp+32, block+32);
-idctRow(temp+48, block+48);
-idctRow(temp+33, block+34);
-idctRow(temp+49, block+50);
-*/
-
asm volatile(
-// "lea 64(%0), %%eax \n\t"
-//r0,r2,R0,R2 r4,r6,R4,R6 r1,r3,R1,R3 r5,r7,R5,R7
-//src0 src4 src1 src5
-//r0,R0,r7,R7 r1,R1,r6,R6 r2,R2,r5,R5 r3,R3,r4,R4
-//dst0 dst1 dst2 dst3
#if 0 //Alternative, simpler variant
-#define IDCT_CORE(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R2 R0 r2 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R4 r6 r4 */\
+
+#define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
"movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
"movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq 16(%2), %%mm4 \n\t" /* C2 C4 C2 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C2R2+C4R0 C2r2+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C4R4 C6r6+C4r4 */\
- "movq 32(%2), %%mm6 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- "movq 40(%2), %%mm7 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
#rounder ", %%mm4 \n\t"\
-\
- "movq 48(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* C6R2+C4R0 C6r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B0 b0 */\
- "paddd %%mm4, %%mm6 \n\t" /* A0+B0 a0+b0 */\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ #rounder ", %%mm0 \n\t"\
+ "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
+ "paddd %%mm0, %%mm0 \n\t" \
+ "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
"paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm6, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
+ "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
+ "movq %%mm7, " #dst " \n\t"\
+ "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "movq %%mm2, 24+" #dst " \n\t"\
+ "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
+ "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
"psrad $" #shift ", %%mm6 \n\t"\
+ "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
+ "movq %%mm2, 8+" #dst " \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
- WRITE0(%%mm6, %%mm4, dst) \
-\
- "movq 56(%2), %%mm4 \n\t" /* -C2 -C4 -C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* -C2R6-C4R4 -C2r6-C4r4 */\
- "movq 64(%2), %%mm6 \n\t" /* -C7 C3 -C7 C3 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A1 a1 */\
+ "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
+ "movq %%mm4, 16+" #dst " \n\t"\
+
+#define COL_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
+ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
+ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
#rounder ", %%mm4 \n\t"\
-\
- "movq 80(%2), %%mm5 \n\t" /* -C6 C4 -C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* -C6R2+C4R0 -C6r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B1 b1 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm6 \n\t"\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ #rounder ", %%mm0 \n\t"\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
+ "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
+ "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
- WRITE1(%%mm6, %%mm4, dst, %%mm7) \
-\
- "movq 88(%2), %%mm4 \n\t" /* C2 -C4 C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* C2R6-C4R4 C2r6-C4r4 */\
- "movq 96(%2), %%mm6 \n\t" /* -C1 C5 -C1 C5 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "movq 104(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "movd %%mm7, " #dst " \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "movd %%mm0, 16+" #dst " \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "movd %%mm2, 96+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "movd %%mm4, 112+" #dst " \n\t"\
+ "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
"pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A2 a2 */\
- #rounder ", %%mm4 \n\t"\
-\
- "pmaddwd 112(%2), %%mm0 \n\t" /* -C2R2+C4R0 -C2r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B1 b1 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "pmaddwd 120(%2), %%mm1 \n\t" /* -C6R6+C4R4 -C6r6+C4r4 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "pmaddwd 128(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "pmaddwd 136(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm5 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
"psrad $" #shift ", %%mm6 \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
-\
- "paddd %%mm1, %%mm0 \n\t" /* A3 a3 */\
- #rounder ", %%mm0 \n\t"\
- "paddd %%mm3, %%mm2 \n\t" /* B3 b3 */\
- "paddd %%mm0, %%mm2 \n\t" /* A3+B3 a3+b3 */\
- "paddd %%mm0, %%mm0 \n\t" /* 2A3 2a3 */\
- "psubd %%mm2, %%mm0 \n\t" /* A3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm0 \n\t"\
- WRITE2(%%mm6, %%mm4, %%mm2, %%mm0, dst)
-
-#define DC_COND_IDCT_CORE(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R2 R0 r2 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R4 r6 r4 */\
+ "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movd %%mm2, 32+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
+ "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movd %%mm6, 48+" #dst " \n\t"\
+ "movd %%mm4, 64+" #dst " \n\t"\
+ "movd %%mm5, 80+" #dst " \n\t"\
+
+
+#define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
"movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
"movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq wm1010, %%mm4 \n\t"\
+ "movq "MANGLE(wm1010)", %%mm4 \n\t"\
"pand %%mm0, %%mm4 \n\t"\
"por %%mm1, %%mm4 \n\t"\
"por %%mm2, %%mm4 \n\t"\
@@ -300,234 +368,106 @@ idctRow(temp+49, block+50);
"movd %%mm4, %%eax \n\t"\
"orl %%eax, %%eax \n\t"\
"jz 1f \n\t"\
- "movq 16(%2), %%mm4 \n\t" /* C2 C4 C2 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C2R2+C4R0 C2r2+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C4R4 C6r6+C4r4 */\
- "movq 32(%2), %%mm6 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- "movq 40(%2), %%mm7 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
#rounder ", %%mm4 \n\t"\
-\
- "movq 48(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* C6R2+C4R0 C6r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B0 b0 */\
- "paddd %%mm4, %%mm6 \n\t" /* A0+B0 a0+b0 */\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ #rounder ", %%mm0 \n\t"\
+ "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
+ "paddd %%mm0, %%mm0 \n\t" \
+ "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
"paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm6, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- WRITE0(%%mm6, %%mm4, dst) \
-\
- "movq 56(%2), %%mm4 \n\t" /* -C2 -C4 -C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* -C2R6-C4R4 -C2r6-C4r4 */\
- "movq 64(%2), %%mm6 \n\t" /* -C7 C3 -C7 C3 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A1 a1 */\
- #rounder ", %%mm4 \n\t"\
-\
- "movq 80(%2), %%mm5 \n\t" /* -C6 C4 -C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* -C6R2+C4R0 -C6r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B1 b1 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm6 \n\t"\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
- WRITE1(%%mm6, %%mm4, dst, %%mm7) \
-\
- "movq 88(%2), %%mm4 \n\t" /* C2 -C4 C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* C2R6-C4R4 C2r6-C4r4 */\
- "movq 96(%2), %%mm6 \n\t" /* -C1 C5 -C1 C5 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "movq 104(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
+ "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
+ "movq %%mm7, " #dst " \n\t"\
+ "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "movq %%mm2, 24+" #dst " \n\t"\
+ "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
"pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A2 a2 */\
- #rounder ", %%mm4 \n\t"\
-\
- "pmaddwd 112(%2), %%mm0 \n\t" /* -C2R2+C4R0 -C2r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B1 b1 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "pmaddwd 120(%2), %%mm1 \n\t" /* -C6R6+C4R4 -C6r6+C4r4 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "pmaddwd 128(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "pmaddwd 136(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
-\
- "paddd %%mm1, %%mm0 \n\t" /* A3 a3 */\
- #rounder ", %%mm0 \n\t"\
- "paddd %%mm3, %%mm2 \n\t" /* B3 b3 */\
- "paddd %%mm0, %%mm2 \n\t" /* A3+B3 a3+b3 */\
- "paddd %%mm0, %%mm0 \n\t" /* 2A3 2a3 */\
- "psubd %%mm2, %%mm0 \n\t" /* A3-B3 a3-b3 */\
+ "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
"psrad $" #shift ", %%mm2 \n\t"\
"psrad $" #shift ", %%mm0 \n\t"\
- WRITE2(%%mm6, %%mm4, %%mm2, %%mm0, dst)\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
+ "movq %%mm2, 8+" #dst " \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
+ "movq %%mm4, 16+" #dst " \n\t"\
"jmp 2f \n\t"\
"1: \n\t"\
- WRITE3(%%mm0, dst)\
- "2: \n\t"\
-
-
-#define WRITE0(s0, s7, dst)\
- "movq " #s0 ", " #dst " \n\t" /* R0 r0 */\
- "movq " #s7 ", 24+" #dst " \n\t" /* R7 r7 */
-
-#define WRITE1(s1, s6, dst, tmp)\
- "movq " #dst ", " #tmp " \n\t" /* R0 r0 */\
- "packssdw " #s1 ", " #tmp " \n\t" /* R1 r1 R0 r0*/\
- "movq " #tmp ", " #dst " \n\t"\
- "movq 24+" #dst ", " #tmp " \n\t" /* R7 r7 */\
- "packssdw " #tmp ", " #s6 " \n\t" /* R7 r7 R6 r6*/\
- "movq " #s6 ", 24+" #dst " \n\t"
-
-#define WRITE2(s2, s5, s3, s4, dst)\
- "packssdw " #s3 ", " #s2 " \n\t" /* R3 r3 R2 r2*/\
- "packssdw " #s5 ", " #s4 " \n\t" /* R5 r5 R4 r4*/\
- "movq " #s2 ", 8+" #dst " \n\t"\
- "movq " #s4 ", 16+" #dst " \n\t"
-
-#define WRITE3(a, dst)\
- "pslld $16, " #a " \n\t"\
- "psrad $13, " #a " \n\t"\
- "packssdw " #a ", " #a " \n\t"\
- "movq " #a ", " #dst " \n\t"\
- "movq " #a ", 8+" #dst " \n\t"\
- "movq " #a ", 16+" #dst " \n\t"\
- "movq " #a ", 24+" #dst " \n\t"\
-
-//IDCT_CORE( src0, src4, src1, src5, dst, rounder, shift)
-IDCT_CORE( (%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
-/*
-DC_COND_IDCT_CORE( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11)
-DC_COND_IDCT_CORE( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11)
-DC_COND_IDCT_CORE( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11)
-*/
-IDCT_CORE( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11)
-IDCT_CORE( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11)
-IDCT_CORE( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11)
+ "pslld $16, %%mm0 \n\t"\
+ "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
+ "psrad $13, %%mm0 \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t"\
+ "movq %%mm0, " #dst " \n\t"\
+ "movq %%mm0, 8+" #dst " \n\t"\
+ "movq %%mm0, 16+" #dst " \n\t"\
+ "movq %%mm0, 24+" #dst " \n\t"\
+ "2: \n\t"
-#undef WRITE0
-#undef WRITE1
-#undef WRITE2
-#define WRITE0(s0, s7, dst)\
- "packssdw " #s0 ", " #s0 " \n\t" /* C0, c0, C0, c0 */\
- "packssdw " #s7 ", " #s7 " \n\t" /* C7, c7, C7, c7 */\
- "movd " #s0 ", " #dst " \n\t" /* C0, c0 */\
- "movd " #s7 ", 112+" #dst " \n\t" /* C7, c7 */
+//IDCT( src0, src4, src1, src5, dst, rounder, shift)
+ROW_IDCT( (%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
+/*ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1), paddd (%2), 11)
+ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1), paddd (%2), 11)
+ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1), paddd (%2), 11)*/
-#define WRITE1(s1, s6, dst, tmp)\
- "packssdw " #s1 ", " #s1 " \n\t" /* C1, c1, C1, c1 */\
- "packssdw " #s6 ", " #s6 " \n\t" /* C6, c6, C6, c6 */\
- "movd " #s1 ", 16+" #dst " \n\t" /* C1, c1 */\
- "movd " #s6 ", 96+" #dst " \n\t" /* C6, c6 */
+DC_COND_ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11)
+DC_COND_ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11)
+DC_COND_ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11)
-#define WRITE2(s2, s5, s3, s4, dst)\
- "packssdw " #s2 ", " #s2 " \n\t" /* C2, c2, C2, c2 */\
- "packssdw " #s3 ", " #s3 " \n\t" /* C3, c3, C3, c3 */\
- "movd " #s2 ", 32+" #dst " \n\t" /* C2, c2 */\
- "movd " #s3 ", 48+" #dst " \n\t" /* C3, c3 */\
- "packssdw " #s4 ", " #s4 " \n\t" /* C4, c4, C4, c4 */\
- "packssdw " #s5 ", " #s5 " \n\t" /* C5, c5, C5, c5 */\
- "movd " #s4 ", 64+" #dst " \n\t" /* C4, c4 */\
- "movd " #s5 ", 80+" #dst " \n\t" /* C5, c5 */\
-//IDCT_CORE( src0, src4, src1, src5, dst, rounder, shift)
-IDCT_CORE( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-IDCT_CORE( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT_CORE( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-IDCT_CORE( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+//IDCT( src0, src4, src1, src5, dst, rounder, shift)
+COL_IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
+COL_IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
+COL_IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
+COL_IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
#else
-#define IDCT_CORE(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R2 R0 r2 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R4 r6 r4 */\
+#define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
"movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
"movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq 16(%2), %%mm4 \n\t" /* C2 C4 C2 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C2R2+C4R0 C2r2+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C4R4 C6r6+C4r4 */\
- "movq 32(%2), %%mm6 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- "movq 40(%2), %%mm7 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
- #rounder ", %%mm4 \n\t"\
-\
- "movq 48(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* C6R2+C4R0 C6r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B0 b0 */\
- "paddd %%mm4, %%mm6 \n\t" /* A0+B0 a0+b0 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm6, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- WRITE0(%%mm6, %%mm4, dst) \
-\
- "movq 56(%2), %%mm4 \n\t" /* -C2 -C4 -C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* -C2R6-C4R4 -C2r6-C4r4 */\
- "movq 64(%2), %%mm6 \n\t" /* -C7 C3 -C7 C3 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A1 a1 */\
- #rounder ", %%mm4 \n\t"\
-\
- "movq 80(%2), %%mm5 \n\t" /* -C6 C4 -C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* -C6R2+C4R0 -C6r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B1 b1 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- WRITE1(%%mm6, %%mm4, dst, %%mm7) \
-\
- "movq 88(%2), %%mm4 \n\t" /* C2 -C4 C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* C2R6-C4R4 C2r6-C4r4 */\
- "movq 96(%2), %%mm6 \n\t" /* -C1 C5 -C1 C5 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "movq 104(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A2 a2 */\
- #rounder ", %%mm4 \n\t"\
-\
- "pmaddwd 112(%2), %%mm0 \n\t" /* -C2R2+C4R0 -C2r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B1 b1 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "pmaddwd 120(%2), %%mm1 \n\t" /* -C6R6+C4R4 -C6r6+C4r4 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "pmaddwd 128(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "pmaddwd 136(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
-\
- "paddd %%mm1, %%mm0 \n\t" /* A3 a3 */\
- #rounder ", %%mm0 \n\t"\
- "paddd %%mm3, %%mm2 \n\t" /* B3 b3 */\
- "paddd %%mm0, %%mm2 \n\t" /* A3+B3 a3+b3 */\
- "paddd %%mm0, %%mm0 \n\t" /* 2A3 2a3 */\
- "psubd %%mm2, %%mm0 \n\t" /* A3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm0 \n\t"\
- WRITE2(%%mm6, %%mm4, %%mm2, %%mm0, dst)
-
-#define DC_COND_IDCT_CORE(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R2 R0 r2 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R4 r6 r4 */\
- "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
- "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq wm1010, %%mm4 \n\t"\
+ "movq "MANGLE(wm1010)", %%mm4 \n\t"\
"pand %%mm0, %%mm4 \n\t"\
"por %%mm1, %%mm4 \n\t"\
"por %%mm2, %%mm4 \n\t"\
@@ -536,920 +476,822 @@ IDCT_CORE( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
"movd %%mm4, %%eax \n\t"\
"orl %%eax, %%eax \n\t"\
"jz 1f \n\t"\
- "movq 16(%2), %%mm4 \n\t" /* C2 C4 C2 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C2R2+C4R0 C2r2+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C4R4 C6r6+C4r4 */\
- "movq 32(%2), %%mm6 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- "movq 40(%2), %%mm7 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
#rounder ", %%mm4 \n\t"\
-\
- "movq 48(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* C6R2+C4R0 C6r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B0 b0 */\
- "paddd %%mm4, %%mm6 \n\t" /* A0+B0 a0+b0 */\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ #rounder ", %%mm0 \n\t"\
+ "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
+ "paddd %%mm0, %%mm0 \n\t" \
+ "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
"paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm6, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- WRITE0(%%mm6, %%mm4, dst) \
-\
- "movq 56(%2), %%mm4 \n\t" /* -C2 -C4 -C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* -C2R6-C4R4 -C2r6-C4r4 */\
- "movq 64(%2), %%mm6 \n\t" /* -C7 C3 -C7 C3 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A1 a1 */\
- #rounder ", %%mm4 \n\t"\
-\
- "movq 80(%2), %%mm5 \n\t" /* -C6 C4 -C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* -C6R2+C4R0 -C6r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B1 b1 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm6 \n\t"\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
- WRITE1(%%mm6, %%mm4, dst, %%mm7) \
-\
- "movq 88(%2), %%mm4 \n\t" /* C2 -C4 C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* C2R6-C4R4 C2r6-C4r4 */\
- "movq 96(%2), %%mm6 \n\t" /* -C1 C5 -C1 C5 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "movq 104(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
+ "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
+ "movq %%mm7, " #dst " \n\t"\
+ "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "movq %%mm2, 24+" #dst " \n\t"\
+ "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
"pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A2 a2 */\
- #rounder ", %%mm4 \n\t"\
-\
- "pmaddwd 112(%2), %%mm0 \n\t" /* -C2R2+C4R0 -C2r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B1 b1 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "pmaddwd 120(%2), %%mm1 \n\t" /* -C6R6+C4R4 -C6r6+C4r4 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "pmaddwd 128(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "pmaddwd 136(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
-\
- "paddd %%mm1, %%mm0 \n\t" /* A3 a3 */\
- #rounder ", %%mm0 \n\t"\
- "paddd %%mm3, %%mm2 \n\t" /* B3 b3 */\
- "paddd %%mm0, %%mm2 \n\t" /* A3+B3 a3+b3 */\
- "paddd %%mm0, %%mm0 \n\t" /* 2A3 2a3 */\
- "psubd %%mm2, %%mm0 \n\t" /* A3-B3 a3-b3 */\
+ "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
"psrad $" #shift ", %%mm2 \n\t"\
"psrad $" #shift ", %%mm0 \n\t"\
- WRITE2(%%mm6, %%mm4, %%mm2, %%mm0, dst)\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
+ "movq %%mm2, 8+" #dst " \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
+ "movq %%mm4, 16+" #dst " \n\t"\
"jmp 2f \n\t"\
- "#.balign 16 \n\t"\
"1: \n\t"\
- WRITE3(%%mm0, dst)\
- "2: \n\t"\
+ "pslld $16, %%mm0 \n\t"\
+ "paddd "MANGLE(d40000)", %%mm0 \n\t"\
+ "psrad $13, %%mm0 \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t"\
+ "movq %%mm0, " #dst " \n\t"\
+ "movq %%mm0, 8+" #dst " \n\t"\
+ "movq %%mm0, 16+" #dst " \n\t"\
+ "movq %%mm0, 24+" #dst " \n\t"\
+ "2: \n\t"
-#define Z_COND_IDCT_CORE(src0, src4, src1, src5, dst, rounder, shift, bt) \
- "movq " #src0 ", %%mm0 \n\t" /* R2 R0 r2 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R4 r6 r4 */\
+#define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
"movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
"movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
"movq %%mm0, %%mm4 \n\t"\
"por %%mm1, %%mm4 \n\t"\
"por %%mm2, %%mm4 \n\t"\
"por %%mm3, %%mm4 \n\t"\
- "packssdw %%mm4, %%mm4 \n\t"\
+ "packssdw %%mm4,%%mm4 \n\t"\
"movd %%mm4, %%eax \n\t"\
"orl %%eax, %%eax \n\t"\
"jz " #bt " \n\t"\
- "movq 16(%2), %%mm4 \n\t" /* C2 C4 C2 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C2R2+C4R0 C2r2+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C4R4 C6r6+C4r4 */\
- "movq 32(%2), %%mm6 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- "movq 40(%2), %%mm7 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
#rounder ", %%mm4 \n\t"\
-\
- "movq 48(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* C6R2+C4R0 C6r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B0 b0 */\
- "paddd %%mm4, %%mm6 \n\t" /* A0+B0 a0+b0 */\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ #rounder ", %%mm0 \n\t"\
+ "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
+ "paddd %%mm0, %%mm0 \n\t" \
+ "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
"paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm6, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- WRITE0(%%mm6, %%mm4, dst) \
-\
- "movq 56(%2), %%mm4 \n\t" /* -C2 -C4 -C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* -C2R6-C4R4 -C2r6-C4r4 */\
- "movq 64(%2), %%mm6 \n\t" /* -C7 C3 -C7 C3 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A1 a1 */\
- #rounder ", %%mm4 \n\t"\
-\
- "movq 80(%2), %%mm5 \n\t" /* -C6 C4 -C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* -C6R2+C4R0 -C6r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B1 b1 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm6 \n\t"\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
- WRITE1(%%mm6, %%mm4, dst, %%mm7) \
-\
- "movq 88(%2), %%mm4 \n\t" /* C2 -C4 C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* C2R6-C4R4 C2r6-C4r4 */\
- "movq 96(%2), %%mm6 \n\t" /* -C1 C5 -C1 C5 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "movq 104(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
+ "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
+ "movq %%mm7, " #dst " \n\t"\
+ "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "movq %%mm2, 24+" #dst " \n\t"\
+ "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
"pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A2 a2 */\
- #rounder ", %%mm4 \n\t"\
-\
- "pmaddwd 112(%2), %%mm0 \n\t" /* -C2R2+C4R0 -C2r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B1 b1 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "pmaddwd 120(%2), %%mm1 \n\t" /* -C6R6+C4R4 -C6r6+C4r4 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "pmaddwd 128(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "pmaddwd 136(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
-\
- "paddd %%mm1, %%mm0 \n\t" /* A3 a3 */\
- #rounder ", %%mm0 \n\t"\
- "paddd %%mm3, %%mm2 \n\t" /* B3 b3 */\
- "paddd %%mm0, %%mm2 \n\t" /* A3+B3 a3+b3 */\
- "paddd %%mm0, %%mm0 \n\t" /* 2A3 2a3 */\
- "psubd %%mm2, %%mm0 \n\t" /* A3-B3 a3-b3 */\
+ "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
"psrad $" #shift ", %%mm2 \n\t"\
"psrad $" #shift ", %%mm0 \n\t"\
- WRITE2(%%mm6, %%mm4, %%mm2, %%mm0, dst)\
-
-
-#define WRITE0(s0, s7, dst)\
- "movq " #s0 ", " #dst " \n\t" /* R0 r0 */\
- "movq " #s7 ", 24+" #dst " \n\t" /* R7 r7 */
-
-#define WRITE1(s1, s6, dst, tmp)\
- "movq " #dst ", " #tmp " \n\t" /* R0 r0 */\
- "packssdw " #s1 ", " #tmp " \n\t" /* R1 r1 R0 r0*/\
- "movq " #tmp ", " #dst " \n\t"\
- "movq 24+" #dst ", " #tmp " \n\t" /* R7 r7 */\
- "packssdw " #tmp ", " #s6 " \n\t" /* R7 r7 R6 r6*/\
- "movq " #s6 ", 24+" #dst " \n\t"
-
-#define WRITE2(s2, s5, s3, s4, dst)\
- "packssdw " #s3 ", " #s2 " \n\t" /* R3 r3 R2 r2*/\
- "packssdw " #s5 ", " #s4 " \n\t" /* R5 r5 R4 r4*/\
- "movq " #s2 ", 8+" #dst " \n\t"\
- "movq " #s4 ", 16+" #dst " \n\t"
-
-#define WRITE3(a, dst)\
- "pslld $16, " #a " \n\t"\
- "paddd d40000, " #a " \n\t"\
- "psrad $13, " #a " \n\t"\
- "packssdw " #a ", " #a " \n\t"\
- "movq " #a ", " #dst " \n\t"\
- "movq " #a ", 8+" #dst " \n\t"\
- "movq " #a ", 16+" #dst " \n\t"\
- "movq " #a ", 24+" #dst " \n\t"\
-
-#define WRITE0b(s0, s7, dst)\
- "packssdw " #s0 ", " #s0 " \n\t" /* C0, c0, C0, c0 */\
- "packssdw " #s7 ", " #s7 " \n\t" /* C7, c7, C7, c7 */\
- "movd " #s0 ", " #dst " \n\t" /* C0, c0 */\
- "movd " #s7 ", 112+" #dst " \n\t" /* C7, c7 */
-
-#define WRITE1b(s1, s6, dst, tmp)\
- "packssdw " #s1 ", " #s1 " \n\t" /* C1, c1, C1, c1 */\
- "packssdw " #s6 ", " #s6 " \n\t" /* C6, c6, C6, c6 */\
- "movd " #s1 ", 16+" #dst " \n\t" /* C1, c1 */\
- "movd " #s6 ", 96+" #dst " \n\t" /* C6, c6 */
-
-#define WRITE2b(s2, s5, s3, s4, dst)\
- "packssdw " #s2 ", " #s2 " \n\t" /* C2, c2, C2, c2 */\
- "packssdw " #s3 ", " #s3 " \n\t" /* C3, c3, C3, c3 */\
- "movd " #s2 ", 32+" #dst " \n\t" /* C2, c2 */\
- "movd " #s3 ", 48+" #dst " \n\t" /* C3, c3 */\
- "packssdw " #s4 ", " #s4 " \n\t" /* C4, c4, C4, c4 */\
- "packssdw " #s5 ", " #s5 " \n\t" /* C5, c5, C5, c5 */\
- "movd " #s4 ", 64+" #dst " \n\t" /* C4, c4 */\
- "movd " #s5 ", 80+" #dst " \n\t" /* C5, c5 */\
-
-
-//IDCT_CORE( src0, src4, src1, src5, dst, rounder, shift)
-DC_COND_IDCT_CORE( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
-Z_COND_IDCT_CORE( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
-Z_COND_IDCT_CORE( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
-Z_COND_IDCT_CORE( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
+ "movq %%mm2, 8+" #dst " \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
+ "movq %%mm4, 16+" #dst " \n\t"\
-#undef IDCT_CORE
-#define IDCT_CORE(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R2 R0 r2 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R4 r6 r4 */\
+#define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
"movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
"movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq 16(%2), %%mm4 \n\t" /* C2 C4 C2 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C2R2+C4R0 C2r2+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C4R4 C6r6+C4r4 */\
- "movq 32(%2), %%mm6 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- "movq 40(%2), %%mm7 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ #rounder ", %%mm4 \n\t"\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
"paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
-\
- "movq 48(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* C6R2+C4R0 C6r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B0 b0 */\
- "paddd %%mm4, %%mm6 \n\t" /* A0+B0 a0+b0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ #rounder ", %%mm0 \n\t"\
+ "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
+ "paddd %%mm0, %%mm0 \n\t" \
+ "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
"paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm6, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "psrad $" #shift ", %%mm6 \n\t"\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
- WRITE0b(%%mm6, %%mm4, dst) \
-\
- "movq 56(%2), %%mm4 \n\t" /* -C2 -C4 -C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* -C2R6-C4R4 -C2r6-C4r4 */\
- "movq 64(%2), %%mm6 \n\t" /* -C7 C3 -C7 C3 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A1 a1 */\
-\
- "movq 80(%2), %%mm5 \n\t" /* -C6 C4 -C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* -C6R2+C4R0 -C6r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B1 b1 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
+ "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
+ "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
+ "movq %%mm7, " #dst " \n\t"\
+ "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "movq %%mm2, 24+" #dst " \n\t"\
+ "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
+ "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
"psrad $" #shift ", %%mm6 \n\t"\
+ "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
+ "movq %%mm2, 8+" #dst " \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
- WRITE1b(%%mm6, %%mm4, dst, %%mm7) \
-\
- "movq 88(%2), %%mm4 \n\t" /* C2 -C4 C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* C2R6-C4R4 C2r6-C4r4 */\
- "movq 96(%2), %%mm6 \n\t" /* -C1 C5 -C1 C5 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "movq 104(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
+ "movq %%mm4, 16+" #dst " \n\t"\
+
+//IDCT( src0, src4, src1, src5, dst, rounder, shift)
+DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
+Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
+Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
+Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
+
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
+ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
+ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ #rounder ", %%mm4 \n\t"\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ #rounder ", %%mm0 \n\t"\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
+ "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
+ "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "movd %%mm7, " #dst " \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "movd %%mm0, 16+" #dst " \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "movd %%mm2, 96+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "movd %%mm4, 112+" #dst " \n\t"\
+ "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
"pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A2 a2 */\
-\
- "pmaddwd 112(%2), %%mm0 \n\t" /* -C2R2+C4R0 -C2r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B1 b1 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "pmaddwd 120(%2), %%mm1 \n\t" /* -C6R6+C4R4 -C6r6+C4r4 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "pmaddwd 128(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "pmaddwd 136(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm5 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
"psrad $" #shift ", %%mm6 \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
-\
- "paddd %%mm1, %%mm0 \n\t" /* A3 a3 */\
- "paddd %%mm3, %%mm2 \n\t" /* B3 b3 */\
- "paddd %%mm0, %%mm2 \n\t" /* A3+B3 a3+b3 */\
- "paddd %%mm0, %%mm0 \n\t" /* 2A3 2a3 */\
- "psubd %%mm2, %%mm0 \n\t" /* A3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm0 \n\t"\
- WRITE2b(%%mm6, %%mm4, %%mm2, %%mm0, dst)
-
-//IDCT_CORE( src0, src4, src1, src5, dst, rounder, shift)
-IDCT_CORE( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-IDCT_CORE( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT_CORE( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-IDCT_CORE( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+ "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movd %%mm2, 32+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
+ "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movd %%mm6, 48+" #dst " \n\t"\
+ "movd %%mm4, 64+" #dst " \n\t"\
+ "movd %%mm5, 80+" #dst " \n\t"
+
+
+//IDCT( src0, src4, src1, src5, dst, rounder, shift)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
"jmp 9f \n\t"
"#.balign 16 \n\t"\
"4: \n\t"
-Z_COND_IDCT_CORE( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
-Z_COND_IDCT_CORE( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
+Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
+Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
-#undef IDCT_CORE
-#define IDCT_CORE(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R2 R0 r2 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R4 r6 r4 */\
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
"movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq 16(%2), %%mm4 \n\t" /* C2 C4 C2 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C2R2+C4R0 C2r2+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C4R4 C6r6+C4r4 */\
- "movq 40(%2), %%mm7 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ #rounder ", %%mm4 \n\t"\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ #rounder ", %%mm0 \n\t"\
"paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
-\
- "movq 48(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* C6R2+C4R0 C6r2+C4r0 */\
- "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- WRITE0b(%%mm7, %%mm4, dst) \
-\
- "movq 56(%2), %%mm4 \n\t" /* -C2 -C4 -C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* -C2R6-C4R4 -C2r6-C4r4 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
+ "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
+ "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
"movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
"pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A1 a1 */\
-\
- "movq 80(%2), %%mm5 \n\t" /* -C6 C4 -C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* -C6R2+C4R0 -C6r2+C4r0 */\
- "paddd %%mm4, %%mm7 \n\t" /* A1+B1 a1+b1 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm7, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- WRITE1b(%%mm7, %%mm4, dst, %%mm6) \
-\
- "movq 88(%2), %%mm4 \n\t" /* C2 -C4 C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* C2R6-C4R4 C2r6-C4r4 */\
- "movq 104(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "paddd %%mm5, %%mm4 \n\t" /* A2 a2 */\
-\
- "pmaddwd 112(%2), %%mm0 \n\t" /* -C2R2+C4R0 -C2r2+C4r0 */\
- "paddd %%mm4, %%mm7 \n\t" /* A1+B1 a1+b1 */\
- "pmaddwd 120(%2), %%mm1 \n\t" /* -C6R6+C4R4 -C6r6+C4r4 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm7, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "pmaddwd 136(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "psrad $" #shift ", %%mm7 \n\t"\
+ "paddd %%mm4, %%mm1 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm1, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
-\
- "paddd %%mm1, %%mm0 \n\t" /* A3 a3 */\
- "paddd %%mm0, %%mm3 \n\t" /* A3+B3 a3+b3 */\
- "paddd %%mm0, %%mm0 \n\t" /* 2A3 2a3 */\
- "psubd %%mm3, %%mm0 \n\t" /* A3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm3 \n\t"\
+ "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm7, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm7, %%mm2 \n\t" /* A1-B1 a1-b1 */\
"psrad $" #shift ", %%mm0 \n\t"\
- WRITE2b(%%mm7, %%mm4, %%mm3, %%mm0, dst)
-
-//IDCT_CORE( src0, src4, src1, src5, dst, rounder, shift)
-IDCT_CORE( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-IDCT_CORE( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT_CORE( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-IDCT_CORE( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm1, %%mm1 \n\t" /* A0+B0 a0+b0 */\
+ "movd %%mm1, " #dst " \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "movd %%mm0, 16+" #dst " \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "movd %%mm2, 96+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "movd %%mm4, 112+" #dst " \n\t"\
+ "movq 88(%2), %%mm1 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
+ "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm1, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm1, %%mm5 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm5 \n\t"\
+ "movq %%mm6, %%mm1 \n\t" /* A3 a3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm1 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movd %%mm2, 32+" #dst " \n\t"\
+ "packssdw %%mm1, %%mm1 \n\t" /* A3-B3 a3-b3 */\
+ "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movd %%mm6, 48+" #dst " \n\t"\
+ "movd %%mm1, 64+" #dst " \n\t"\
+ "movd %%mm5, 80+" #dst " \n\t"
+
+//IDCT( src0, src4, src1, src5, dst, rounder, shift)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
"jmp 9f \n\t"
"#.balign 16 \n\t"\
"6: \n\t"
-Z_COND_IDCT_CORE( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
+Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
-#undef IDCT_CORE
-#define IDCT_CORE(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R2 R0 r2 r0 */\
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
"movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq 16(%2), %%mm4 \n\t" /* C2 C4 C2 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C2R2+C4R0 C2r2+C4r0 */\
- "movq 40(%2), %%mm7 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
-\
- "movq 48(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* C6R2+C4R0 C6r2+C4r0 */\
- "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- WRITE0b(%%mm7, %%mm4, dst) \
-\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ #rounder ", %%mm4 \n\t"\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ #rounder ", %%mm0 \n\t"\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
"movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
"pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
-\
- "movq 80(%2), %%mm4 \n\t" /* -C6 C4 -C6 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* -C6R2+C4R0 -C6r2+C4r0 */\
- "paddd %%mm5, %%mm7 \n\t" /* A1+B1 a1+b1 */\
- "paddd %%mm5, %%mm5 \n\t" /* 2A1 2a1 */\
- "psubd %%mm7, %%mm5 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm5 \n\t"\
- WRITE1b(%%mm7, %%mm5, dst, %%mm6) \
-\
- "movq 104(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
-\
- "pmaddwd 112(%2), %%mm0 \n\t" /* -C2R2+C4R0 -C2r2+C4r0 */\
- "paddd %%mm4, %%mm7 \n\t" /* A1+B1 a1+b1 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm7, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "pmaddwd 136(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "psrad $" #shift ", %%mm7 \n\t"\
+ "paddd %%mm4, %%mm1 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm1, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
-\
- "paddd %%mm0, %%mm3 \n\t" /* A3+B3 a3+b3 */\
- "paddd %%mm0, %%mm0 \n\t" /* 2A3 2a3 */\
- "psubd %%mm3, %%mm0 \n\t" /* A3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm3 \n\t"\
+ "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm7, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm7, %%mm2 \n\t" /* A1-B1 a1-b1 */\
"psrad $" #shift ", %%mm0 \n\t"\
- WRITE2b(%%mm7, %%mm4, %%mm3, %%mm0, dst)
-
-//IDCT_CORE( src0, src4, src1, src5, dst, rounder, shift)
-IDCT_CORE( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-IDCT_CORE( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT_CORE( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-IDCT_CORE( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm1, %%mm1 \n\t" /* A0+B0 a0+b0 */\
+ "movd %%mm1, " #dst " \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "movd %%mm0, 16+" #dst " \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "movd %%mm2, 96+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "movd %%mm4, 112+" #dst " \n\t"\
+ "movq 88(%2), %%mm1 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
+ "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm1, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm1, %%mm5 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm5 \n\t"\
+ "movq %%mm6, %%mm1 \n\t" /* A3 a3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm1 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movd %%mm2, 32+" #dst " \n\t"\
+ "packssdw %%mm1, %%mm1 \n\t" /* A3-B3 a3-b3 */\
+ "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movd %%mm6, 48+" #dst " \n\t"\
+ "movd %%mm1, 64+" #dst " \n\t"\
+ "movd %%mm5, 80+" #dst " \n\t"
+
+
+//IDCT( src0, src4, src1, src5, dst, rounder, shift)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
"jmp 9f \n\t"
"#.balign 16 \n\t"\
"2: \n\t"
-Z_COND_IDCT_CORE( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
+Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
-#undef IDCT_CORE
-#define IDCT_CORE(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R2 R0 r2 r0 */\
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
"movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
"movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq 16(%2), %%mm4 \n\t" /* C2 C4 C2 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C2R2+C4R0 C2r2+C4r0 */\
- "movq 32(%2), %%mm6 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- "movq 40(%2), %%mm7 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
-\
- "movq 48(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* C6R2+C4R0 C6r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B0 b0 */\
- "paddd %%mm4, %%mm6 \n\t" /* A0+B0 a0+b0 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ #rounder ", %%mm4 \n\t"\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ #rounder ", %%mm0 \n\t"\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
"paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm6, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "psrad $" #shift ", %%mm6 \n\t"\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
- WRITE0b(%%mm6, %%mm4, dst) \
-\
- "movq 64(%2), %%mm6 \n\t" /* -C7 C3 -C7 C3 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
-\
- "movq 80(%2), %%mm4 \n\t" /* -C6 C4 -C6 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* -C6R2+C4R0 -C6r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B1 b1 */\
- "paddd %%mm5, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "paddd %%mm5, %%mm5 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm5 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm5 \n\t"\
- WRITE1b(%%mm6, %%mm5, dst, %%mm7) \
-\
- "movq 96(%2), %%mm6 \n\t" /* -C1 C5 -C1 C5 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "movq 104(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "movd %%mm7, " #dst " \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "movd %%mm0, 16+" #dst " \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "movd %%mm2, 96+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "movd %%mm4, 112+" #dst " \n\t"\
+ "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
"pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
-\
- "pmaddwd 112(%2), %%mm0 \n\t" /* -C2R2+C4R0 -C2r2+C4r0 */\
- "paddd %%mm7, %%mm6 \n\t" /* B1 b1 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "pmaddwd 128(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "pmaddwd 136(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm5 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
"psrad $" #shift ", %%mm6 \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
-\
- "paddd %%mm3, %%mm2 \n\t" /* B3 b3 */\
- "paddd %%mm0, %%mm2 \n\t" /* A3+B3 a3+b3 */\
- "paddd %%mm0, %%mm0 \n\t" /* 2A3 2a3 */\
- "psubd %%mm2, %%mm0 \n\t" /* A3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm0 \n\t"\
- WRITE2b(%%mm6, %%mm4, %%mm2, %%mm0, dst)
-
-//IDCT_CORE( src0, src4, src1, src5, dst, rounder, shift)
-IDCT_CORE( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-IDCT_CORE( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT_CORE( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-IDCT_CORE( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+ "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movd %%mm2, 32+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
+ "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movd %%mm6, 48+" #dst " \n\t"\
+ "movd %%mm4, 64+" #dst " \n\t"\
+ "movd %%mm5, 80+" #dst " \n\t"
+
+//IDCT( src0, src4, src1, src5, dst, rounder, shift)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
"jmp 9f \n\t"
"#.balign 16 \n\t"\
"3: \n\t"
-#undef IDCT_CORE
-#define IDCT_CORE(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R2 R0 r2 r0 */\
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
"movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
- "movq 16(%2), %%mm4 \n\t" /* C2 C4 C2 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C2R2+C4R0 C2r2+C4r0 */\
- "movq 32(%2), %%mm6 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
-\
- "movq 48(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* C6R2+C4R0 C6r2+C4r0 */\
- "paddd %%mm4, %%mm6 \n\t" /* A0+B0 a0+b0 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ #rounder ", %%mm4 \n\t"\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ #rounder ", %%mm0 \n\t"\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 64(%2), %%mm3 \n\t"\
+ "pmaddwd %%mm2, %%mm3 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
"paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm6, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "psrad $" #shift ", %%mm6 \n\t"\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
- WRITE0b(%%mm6, %%mm4, dst) \
-\
- "movq 64(%2), %%mm6 \n\t" /* -C7 C3 -C7 C3 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
-\
- "movq 80(%2), %%mm4 \n\t" /* -C6 C4 -C6 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* -C6R2+C4R0 -C6r2+C4r0 */\
- "paddd %%mm5, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "paddd %%mm5, %%mm5 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm5 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm6 \n\t"\
+ "movq %%mm0, %%mm1 \n\t" /* A1 a1 */\
+ "paddd %%mm3, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm3, %%mm1 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "movd %%mm7, " #dst " \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "movd %%mm0, 16+" #dst " \n\t"\
+ "packssdw %%mm1, %%mm1 \n\t" /* A1-B1 a1-b1 */\
+ "movd %%mm1, 96+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "movd %%mm4, 112+" #dst " \n\t"\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "pmaddwd %%mm2, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "pmaddwd 96(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "movq %%mm5, %%mm1 \n\t" /* A2 a2 */\
+ "paddd %%mm4, %%mm1 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
"psrad $" #shift ", %%mm5 \n\t"\
- WRITE1b(%%mm6, %%mm5, dst, %%mm7) \
-\
- "movq 96(%2), %%mm6 \n\t" /* -C1 C5 -C1 C5 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
-\
- "pmaddwd 112(%2), %%mm0 \n\t" /* -C2R2+C4R0 -C2r2+C4r0 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "pmaddwd 128(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm2, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm2, %%mm4 \n\t" /* a3-B3 a3-b3 */\
"psrad $" #shift ", %%mm6 \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
-\
- "paddd %%mm0, %%mm2 \n\t" /* A3+B3 a3+b3 */\
- "paddd %%mm0, %%mm0 \n\t" /* 2A3 2a3 */\
- "psubd %%mm2, %%mm0 \n\t" /* A3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm0 \n\t"\
- WRITE2b(%%mm6, %%mm4, %%mm2, %%mm0, dst)
-
-//IDCT_CORE( src0, src4, src1, src5, dst, rounder, shift)
-IDCT_CORE( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-IDCT_CORE( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT_CORE( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-IDCT_CORE( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+ "packssdw %%mm1, %%mm1 \n\t" /* A2+B2 a2+b2 */\
+ "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movd %%mm1, 32+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
+ "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movd %%mm6, 48+" #dst " \n\t"\
+ "movd %%mm4, 64+" #dst " \n\t"\
+ "movd %%mm5, 80+" #dst " \n\t"
+
+
+//IDCT( src0, src4, src1, src5, dst, rounder, shift)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
"jmp 9f \n\t"
"#.balign 16 \n\t"\
"5: \n\t"
-#undef IDCT_CORE
-#define IDCT_CORE(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R2 R0 r2 r0 */\
- "movq 16(%2), %%mm4 \n\t" /* C2 C4 C2 C4 */\
- "movq %%mm4, %%mm6\n\t"\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C2R2+C4R0 C2r2+C4r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R4 r6 r4 */\
- "movq 24(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "movq %%mm5, %%mm7\n\t"\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C4R4 C6r6+C4r4 */\
- "movq 8+" #src0 ", %%mm2 \n\t" /*2R2 R0 r2 r0 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /*2C2R2+C4R0 C2r2+C4r0 */\
- "movq 8+" #src4 ", %%mm3 \n\t" /*2R6 R4 r6 r4 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /*2C6R6+C4R4 C6r6+C4r4 */\
-\
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ #rounder ", %%mm4 \n\t"\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
"paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
- "movq 48(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
+ #rounder ", %%mm0 \n\t"\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
+ "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
+ "movq 8+" #src0 ", %%mm2 \n\t" /* R4 R0 r4 r0 */\
+ "movq 8+" #src4 ", %%mm3 \n\t" /* R6 R2 r6 r2 */\
+ "movq 16(%2), %%mm1 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm2, %%mm1 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm7 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm7, %%mm2 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm7 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm3, %%mm7 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "pmaddwd 40(%2), %%mm3 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ #rounder ", %%mm1 \n\t"\
+ "paddd %%mm1, %%mm7 \n\t" /* A0 a0 */\
+ "paddd %%mm1, %%mm1 \n\t" /* 2C0 2c0 */\
+ #rounder ", %%mm2 \n\t"\
+ "psubd %%mm7, %%mm1 \n\t" /* A3 a3 */\
+ "paddd %%mm2, %%mm3 \n\t" /* A1 a1 */\
+ "paddd %%mm2, %%mm2 \n\t" /* 2C1 2c1 */\
+ "psubd %%mm3, %%mm2 \n\t" /* A2 a2 */\
"psrad $" #shift ", %%mm4 \n\t"\
- "pmaddwd %%mm0, %%mm5 \n\t" /* C6R2+C4R0 C6r2+C4r0 */\
-\
- "paddd %%mm7, %%mm6 \n\t" /*2A0 a0 */\
- "movq 56(%2), %%mm7 \n\t" /* -C2 -C4 -C2 -C4 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "pmaddwd %%mm1, %%mm7 \n\t" /* -C2R6-C4R4 -C2r6-C4r4 */\
-\
- "packssdw %%mm6, %%mm4 \n\t" /* C0, c0, C0, c0 */\
- "movq 48(%2), %%mm6 \n\t" /* C6 C4 C6 C4 */\
- "movq %%mm4, " #dst " \n\t" /* C0, c0 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /*2C6R2+C4R0 C6r2+C4r0 */\
-\
- "movq %%mm4, 112+" #dst " \n\t" /* C0, c0 */\
- "movq 56(%2), %%mm4 \n\t" /* -C2 -C4 -C2 -C4 */\
- "pmaddwd %%mm3, %%mm4 \n\t" /*2-C2R6-C4R4 -C2r6-C4r4 */\
-\
- "paddd %%mm5, %%mm7 \n\t" /* A1 a1 */\
- "movq 80(%2), %%mm5 \n\t" /* -C6 C4 -C6 C4 */\
"psrad $" #shift ", %%mm7 \n\t"\
- "pmaddwd %%mm0, %%mm5 \n\t" /* -C6R2+C4R0 -C6r2+C4r0 */\
-\
- "paddd %%mm4, %%mm6 \n\t" /*2A1 a1 */\
- "pmaddwd 112(%2), %%mm0 \n\t" /* -C2R2+C4R0 -C2r2+C4r0 */\
-\
- "psrad $" #shift ", %%mm6 \n\t"\
- "movq 88(%2), %%mm4 \n\t" /* C2 -C4 C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* C2R6-C4R4 C2r6-C4r4 */\
-\
- "pmaddwd 120(%2), %%mm1 \n\t" /* -C6R6+C4R4 -C6r6+C4r4 */\
- "packssdw %%mm6, %%mm7 \n\t" /* C1, c1, C1, c1 */\
-\
- "movq 80(%2), %%mm6 \n\t" /* -C6 C4 -C6 C4 */\
- "movq %%mm7, 16+" #dst " \n\t" /* C1, c1 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /*2-C6R2+C4R0 -C6r2+C4r0 */\
-\
- "movq %%mm7, 96+" #dst " \n\t" /* C1, c1 */\
- "movq 88(%2), %%mm7 \n\t" /* C2 -C4 C2 -C4 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /*2C2R6-C4R4 C2r6-C4r4 */\
-\
- "pmaddwd 112(%2), %%mm2 \n\t" /*2-C2R2+C4R0 -C2r2+C4r0 */\
- "paddd %%mm5, %%mm4 \n\t" /* A2 a2 */\
-\
- "pmaddwd 120(%2), %%mm3 \n\t" /*2-C6R6+C4R4 -C6r6+C4r4 */\
- "psrad $" #shift ", %%mm4 \n\t"\
-\
- "paddd %%mm7, %%mm6 \n\t" /*2A2 a2 */\
- "paddd %%mm1, %%mm0 \n\t" /* A3 a3 */\
-\
- "psrad $" #shift ", %%mm6 \n\t"\
-\
- "packssdw %%mm6, %%mm4 \n\t" /* C2, c2, C2, c2 */\
- "movq %%mm4, 32+" #dst " \n\t" /* C2, c2 */\
+ "psrad $" #shift ", %%mm3 \n\t"\
+ "packssdw %%mm7, %%mm4 \n\t" /* A0 a0 */\
+ "movq %%mm4, " #dst " \n\t"\
"psrad $" #shift ", %%mm0 \n\t"\
- "paddd %%mm3, %%mm2 \n\t" /*2A3 a3 */\
-\
- "movq %%mm4, 80+" #dst " \n\t" /* C2, c2 */\
+ "packssdw %%mm3, %%mm0 \n\t" /* A1 a1 */\
+ "movq %%mm0, 16+" #dst " \n\t"\
+ "movq %%mm0, 96+" #dst " \n\t"\
+ "movq %%mm4, 112+" #dst " \n\t"\
+ "psrad $" #shift ", %%mm5 \n\t"\
+ "psrad $" #shift ", %%mm6 \n\t"\
"psrad $" #shift ", %%mm2 \n\t"\
-\
- "packssdw %%mm2, %%mm0 \n\t" /* C3, c3, C3, c3 */\
- "movq %%mm0, 48+" #dst " \n\t" /* C3, c3 */\
- "movq %%mm0, 64+" #dst " \n\t" /* C3, c3 */\
-
-//IDCT_CORE( src0, src4, src1, src5, dst, rounder, shift)
-IDCT_CORE( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-//IDCT_CORE( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT_CORE( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-//IDCT_CORE( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+ "packssdw %%mm2, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movq %%mm5, 32+" #dst " \n\t"\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "packssdw %%mm1, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movq %%mm6, 48+" #dst " \n\t"\
+ "movq %%mm6, 64+" #dst " \n\t"\
+ "movq %%mm5, 80+" #dst " \n\t"
+
+
+//IDCT( src0, src4, src1, src5, dst, rounder, shift)
+IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
+//IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
+//IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
"jmp 9f \n\t"
"#.balign 16 \n\t"\
"1: \n\t"
-#undef IDCT_CORE
-#define IDCT_CORE(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R2 R0 r2 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R4 r6 r4 */\
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
"movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
- "movq 16(%2), %%mm4 \n\t" /* C2 C4 C2 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C2R2+C4R0 C2r2+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C4R4 C6r6+C4r4 */\
- "movq 32(%2), %%mm6 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ #rounder ", %%mm4 \n\t"\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ #rounder ", %%mm0 \n\t"\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
"paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
-\
- "movq 48(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* C6R2+C4R0 C6r2+C4r0 */\
- "paddd %%mm4, %%mm6 \n\t" /* A0+B0 a0+b0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
+ "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
+ "movq 64(%2), %%mm1 \n\t"\
+ "pmaddwd %%mm2, %%mm1 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
"paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm6, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- WRITE0b(%%mm6, %%mm4, dst) \
-\
- "movq 56(%2), %%mm4 \n\t" /* -C2 -C4 -C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* -C2R6-C4R4 -C2r6-C4r4 */\
- "movq 64(%2), %%mm6 \n\t" /* -C7 C3 -C7 C3 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "paddd %%mm5, %%mm4 \n\t" /* A1 a1 */\
-\
- "movq 80(%2), %%mm5 \n\t" /* -C6 C4 -C6 C4 */\
- "pmaddwd %%mm0, %%mm5 \n\t" /* -C6R2+C4R0 -C6r2+C4r0 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm6 \n\t"\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
- WRITE1b(%%mm6, %%mm4, dst, %%mm7) \
-\
- "movq 88(%2), %%mm4 \n\t" /* C2 -C4 C2 -C4 */\
- "pmaddwd %%mm1, %%mm4 \n\t" /* C2R6-C4R4 C2r6-C4r4 */\
- "movq 96(%2), %%mm6 \n\t" /* -C1 C5 -C1 C5 */\
- "pmaddwd %%mm2, %%mm6 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "paddd %%mm5, %%mm4 \n\t" /* A2 a2 */\
-\
- "pmaddwd 112(%2), %%mm0 \n\t" /* -C2R2+C4R0 -C2r2+C4r0 */\
- "paddd %%mm4, %%mm6 \n\t" /* A1+B1 a1+b1 */\
- "pmaddwd 120(%2), %%mm1 \n\t" /* -C6R6+C4R4 -C6r6+C4r4 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A1 2a1 */\
- "psubd %%mm6, %%mm4 \n\t" /* A1-B1 a1-b1 */\
- "pmaddwd 128(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "movq %%mm0, %%mm3 \n\t" /* A1 a1 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm1, %%mm3 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "psrad $" #shift ", %%mm3 \n\t"\
+ "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "movd %%mm7, " #dst " \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "movd %%mm0, 16+" #dst " \n\t"\
+ "packssdw %%mm3, %%mm3 \n\t" /* A1-B1 a1-b1 */\
+ "movd %%mm3, 96+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "movd %%mm4, 112+" #dst " \n\t"\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "pmaddwd %%mm2, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "pmaddwd 96(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "movq %%mm5, %%mm3 \n\t" /* A2 a2 */\
+ "paddd %%mm4, %%mm3 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm3 \n\t"\
+ "psrad $" #shift ", %%mm5 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm2, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm2, %%mm4 \n\t" /* a3-B3 a3-b3 */\
"psrad $" #shift ", %%mm6 \n\t"\
+ "packssdw %%mm3, %%mm3 \n\t" /* A2+B2 a2+b2 */\
+ "movd %%mm3, 32+" #dst " \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
-\
- "paddd %%mm1, %%mm0 \n\t" /* A3 a3 */\
- "paddd %%mm0, %%mm2 \n\t" /* A3+B3 a3+b3 */\
- "paddd %%mm0, %%mm0 \n\t" /* 2A3 2a3 */\
- "psubd %%mm2, %%mm0 \n\t" /* A3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm0 \n\t"\
- WRITE2b(%%mm6, %%mm4, %%mm2, %%mm0, dst)
-
-//IDCT_CORE( src0, src4, src1, src5, dst, rounder, shift)
-IDCT_CORE( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-IDCT_CORE( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT_CORE( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-IDCT_CORE( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+ "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movd %%mm6, 48+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
+ "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movd %%mm4, 64+" #dst " \n\t"\
+ "movd %%mm5, 80+" #dst " \n\t"
+
+
+//IDCT( src0, src4, src1, src5, dst, rounder, shift)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
"jmp 9f \n\t"
"#.balign 16 \n\t"
"7: \n\t"
-#undef IDCT_CORE
-#define IDCT_CORE(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R2 R0 r2 r0 */\
- "movq 16(%2), %%mm2 \n\t" /* C2 C4 C2 C4 */\
- "movq 8+" #src0 ", %%mm1 \n\t" /* R2 R0 r2 r0 */\
- "pmaddwd %%mm0, %%mm2 \n\t" /* C2R2+C4R0 C2r2+C4r0 */\
- "movq 16(%2), %%mm3 \n\t" /* C2 C4 C2 C4 */\
- "pmaddwd %%mm1, %%mm3 \n\t" /* C2R2+C4R0 C2r2+C4r0 */\
-\
- "movq 48(%2), %%mm4 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C6R2+C4R0 C6r2+C4r0 */\
- "movq 48(%2), %%mm5 \n\t" /* C6 C4 C6 C4 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R2+C4R0 C6r2+C4r0 */\
- "movq 80(%2), %%mm6 \n\t" /* -C6 C4 -C6 C4 */\
- "pmaddwd %%mm0, %%mm6 \n\t" /* -C6R2+C4R0 -C6r2+C4r0 */\
- "movq 80(%2), %%mm7 \n\t" /* -C6 C4 -C6 C4 */\
- "pmaddwd %%mm1, %%mm7 \n\t" /* -C6R2+C4R0 -C6r2+C4r0 */\
- "pmaddwd 112(%2), %%mm0 \n\t" /* -C2R2+C4R0 -C2r2+C4r0 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm3 \n\t"\
- "pmaddwd 112(%2), %%mm1 \n\t" /* -C2R2+C4R0 -C2r2+C4r0 */\
- "packssdw %%mm3, %%mm2 \n\t" /* C0, c0, C0, c0 */\
- "movq %%mm2, " #dst " \n\t" /* C0, c0 */\
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ #rounder ", %%mm4 \n\t"\
+ #rounder ", %%mm0 \n\t"\
"psrad $" #shift ", %%mm4 \n\t"\
- "psrad $" #shift ", %%mm5 \n\t"\
- "movq %%mm2, 112+" #dst " \n\t" /* C0, c0 */\
- "packssdw %%mm5, %%mm4 \n\t" /* C1, c1, C1, c1 */\
- "movq %%mm4, 16+" #dst " \n\t" /* C0, c0 */\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm6 \n\t"\
- "movq %%mm4, 96+" #dst " \n\t" /* C0, c0 */\
- "packssdw %%mm7, %%mm6 \n\t" /* C2, c2, C2, c2 */\
- "movq %%mm6, 32+" #dst " \n\t" /* C0, c0 */\
"psrad $" #shift ", %%mm0 \n\t"\
- "movq %%mm6, 80+" #dst " \n\t" /* C0, c0 */\
+ "movq 8+" #src0 ", %%mm2 \n\t" /* R4 R0 r4 r0 */\
+ "movq 16(%2), %%mm1 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm2, %%mm1 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm7 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm7, %%mm2 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm7 \n\t" /* C6 C2 C6 C2 */\
+ #rounder ", %%mm1 \n\t"\
+ #rounder ", %%mm2 \n\t"\
"psrad $" #shift ", %%mm1 \n\t"\
- "packssdw %%mm1, %%mm0 \n\t" /* C3, c3, C3, c3 */\
- "movq %%mm0, 48+" #dst " \n\t" /* C0, c0 */\
- "movq %%mm0, 64+" #dst " \n\t" /* C0, c0 */\
-
-//IDCT_CORE( src0, src4, src1, src5, dst, rounder, shift)
-IDCT_CORE( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-//IDCT_CORE( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT_CORE( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-//IDCT_CORE( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+ "packssdw %%mm1, %%mm4 \n\t" /* A0 a0 */\
+ "movq %%mm4, " #dst " \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm2, %%mm0 \n\t" /* A1 a1 */\
+ "movq %%mm0, 16+" #dst " \n\t"\
+ "movq %%mm0, 96+" #dst " \n\t"\
+ "movq %%mm4, 112+" #dst " \n\t"\
+ "movq %%mm0, 32+" #dst " \n\t"\
+ "movq %%mm4, 48+" #dst " \n\t"\
+ "movq %%mm4, 64+" #dst " \n\t"\
+ "movq %%mm0, 80+" #dst " \n\t"
+
+//IDCT( src0, src4, src1, src5, dst, rounder, shift)
+IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
+//IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
+//IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
#endif
/*
Input
- 00 20 02 22 40 60 42 62
- 10 30 12 32 50 70 52 72
- 01 21 03 23 41 61 43 63
+ 00 40 04 44 20 60 24 64
+ 10 30 14 34 50 70 54 74
+ 01 41 03 43 21 61 23 63
11 31 13 33 51 71 53 73
- 04 24 06 26 44 64 46 66
- 14 34 16 36 54 74 56 76
-...
-*/
-/*
+ 02 42 06 46 22 62 26 66
+ 12 32 16 36 52 72 56 76
+ 05 45 07 47 25 65 27 67
+ 15 35 17 37 55 75 57 77
+
Temp
- 00 02 10 12 20 22 30 32
- 40 42 50 52 60 62 70 72
+ 00 04 10 14 20 24 30 34
+ 40 44 50 54 60 64 70 74
01 03 11 13 21 23 31 33
41 43 51 53 61 63 71 73
- 04 06 14 16 24 26 34 36
- 44 46 54 56 64 66 74 76
+ 02 06 12 16 22 26 32 36
+ 42 46 52 56 62 66 72 76
05 07 15 17 25 27 35 37
45 47 55 57 65 67 75 77
*/
-/*
-Output
- 00 10 20 30 40 50 60 70
- 01 11 21 31 41 51 61 71
-...
-*/
-
"9: \n\t"
:: "r" (block), "r" (temp), "r" (coeffs)
: "%eax"
);
-/*
-idctCol(block, temp);
-idctCol(block+1, temp+2);
-idctCol(block+2, temp+4);
-idctCol(block+3, temp+6);
-idctCol(block+4, temp+8);
-idctCol(block+5, temp+10);
-idctCol(block+6, temp+12);
-idctCol(block+7, temp+14);
-*/
}
void simple_idct_mmx(int16_t *block)
{
- static int imax=0, imin=0;
- static int omax=0, omin=0;
- int i, j;
-/*
- for(i=0; i<64; i++)
- {
- if(block[i] > imax)
- {
- imax= block[i];
- printf("Input-Max: %d\n", imax);
- printf("Input-Min: %d\n", imin);
- printf("Output-Max: %d\n", omax);
- printf("Output-Min: %d\n", omin);
- }
- if(block[i] < imin)
- {
- imin= block[i];
- printf("Input-Max: %d\n", imax);
- printf("Input-Min: %d\n", imin);
- printf("Output-Max: %d\n", omax);
- printf("Output-Min: %d\n", omin);
- }
- }*/
-/* static int stat[64];
- for(j=0; j<4; j++)
- {
- static int line[8]={0,2,1,3,4,6,5,7};
- for(i=0; i<16; i++)
- {
- if(block[j*16+i])
- {
- stat[j*16+1]++;
- break;
- }
- }
- for(i=0; i<16; i++)
- {
- if(block[j*16+i] && i!=0 && i!=2)
- {
- stat[j*16+2]++;
- break;
- }
- }
- }
- stat[0]++;*/
-/* for(i=1; i<8; i++)
- {
- if(block[i] != 0)
- {
- stat[1]++;
- break;
- }
- }
- for(i=32; i<64; i++)
- {
- if(block[i] != 0)
- {
- stat[2]++;
- break;
- }
- }
- stat[0]++;
-*/
-// return;
idct(block);
-// memset(block, 0, 128);
-/*
- if(stat[0] > 100000)
- for(i=0; i<64; i++)
- {
- if((i&7) == 0) printf("\n");
- printf("%06d ", stat[i]);
- }
-*/
-/*
- for(i=0; i<4; i++) printf("%d", stat[1+i*16]);
- printf(" ");
- for(i=0; i<4; i++) printf("%d", stat[2+i*16]);
- printf("\n");
-*/
-// printf("%d", stat[2]);
-
-// memset(stat, 0, 256);
-
-/*
- for(i=0; i<64; i++)
- {
- if(block[i] > omax)
- {
- omax= block[i];
- printf("Input-Max: %d\n", imax);
- printf("Input-Min: %d\n", imin);
- printf("Output-Max: %d\n", omax);
- printf("Output-Min: %d\n", omin);
- }
- if(block[i] < omin)
- {
- omin= block[i];
- printf("Input-Max: %d\n", imax);
- printf("Input-Min: %d\n", imin);
- printf("Output-Max: %d\n", omax);
- printf("Output-Min: %d\n", omin);
- }
- }*/
}
diff --git a/src/libffmpeg/libavcodec/imgconvert.c b/src/libffmpeg/libavcodec/imgconvert.c
index d39b6c1e9..04300744f 100644
--- a/src/libffmpeg/libavcodec/imgconvert.c
+++ b/src/libffmpeg/libavcodec/imgconvert.c
@@ -1,20 +1,20 @@
/*
* Misc image convertion routines
- * Copyright (c) 2001 Gerard Lantau.
+ * Copyright (c) 2001, 2002 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "avcodec.h"
#include "dsputil.h"
@@ -361,6 +361,8 @@ int img_convert(AVPicture *dst, int dst_pix_fmt,
{
int i;
+ assert(pix_fmt != PIX_FMT_ANY && dst_pix_fmt != PIX_FMT_ANY);
+
if (dst_pix_fmt == pix_fmt) {
switch(pix_fmt) {
case PIX_FMT_YUV420P:
@@ -479,7 +481,7 @@ static void deinterlace_bottom_field(UINT8 *dst, int dst_wrap,
int y, y1, i;
UINT8 *buf;
- buf= (UINT8*) malloc(5 * width);
+ buf = (UINT8*)av_malloc(5 * width);
src = src1;
for(y=0;y<height;y+=2) {
@@ -509,7 +511,7 @@ static void deinterlace_bottom_field(UINT8 *dst, int dst_wrap,
dst += dst_wrap;
src += (2 + 1) * src_wrap;
}
- free(buf);
+ av_free(buf);
}
@@ -546,3 +548,5 @@ int avpicture_deinterlace(AVPicture *dst, AVPicture *src,
}
return 0;
}
+
+#undef FIX
diff --git a/src/libffmpeg/libavcodec/imgresample.c b/src/libffmpeg/libavcodec/imgresample.c
index fda5a31c4..26519bd38 100644
--- a/src/libffmpeg/libavcodec/imgresample.c
+++ b/src/libffmpeg/libavcodec/imgresample.c
@@ -1,27 +1,23 @@
/*
* High quality image resampling with polyphase filters
- * Copyright (c) 2001 Gerard Lantau.
+ * Copyright (c) 2001 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include "dsputil.h"
#include "avcodec.h"
+#include "dsputil.h"
#ifdef USE_FASTMEMCPY
#include "fastmemcpy.h"
@@ -454,7 +450,7 @@ ImgReSampleContext *img_resample_init(int owidth, int oheight,
return s;
fail:
- free(s);
+ av_free(s);
return NULL;
}
@@ -474,8 +470,8 @@ void img_resample(ImgReSampleContext *s,
void img_resample_close(ImgReSampleContext *s)
{
- free(s->line_buf);
- free(s);
+ av_free(s->line_buf);
+ av_free(s);
}
#ifdef TEST
@@ -522,7 +518,7 @@ static void dump_filter(INT16 *filter)
}
#ifdef HAVE_MMX
-int mm_flags;
+extern int mm_flags;
#endif
int main(int argc, char **argv)
@@ -609,7 +605,7 @@ int main(int argc, char **argv)
img, XSIZE, XSIZE, YSIZE);
if (memcmp(img1, img2, xsize * ysize) != 0) {
fprintf(stderr, "mmx error\n");
- abort();
+ exit(1);
}
printf("MMX OK\n");
#endif
diff --git a/src/libffmpeg/libavcodec/jfdctfst.c b/src/libffmpeg/libavcodec/jfdctfst.c
index cdc3b47f9..4e3b55bb5 100644
--- a/src/libffmpeg/libavcodec/jfdctfst.c
+++ b/src/libffmpeg/libavcodec/jfdctfst.c
@@ -113,7 +113,7 @@
*/
GLOBAL(void)
-jpeg_fdct_ifast (DCTELEM * data)
+fdct_ifast (DCTELEM * data)
{
DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
DCTELEM tmp10, tmp11, tmp12, tmp13;
@@ -222,3 +222,10 @@ jpeg_fdct_ifast (DCTELEM * data)
dataptr++; /* advance pointer to next column */
}
}
+
+
+#undef GLOBAL
+#undef CONST_BITS
+#undef DESCALE
+#undef FIX_0_541196100
+#undef FIX_1_306562965
diff --git a/src/libffmpeg/libavcodec/jrevdct.c b/src/libffmpeg/libavcodec/jrevdct.c
index 246f1b190..3ba91543d 100644
--- a/src/libffmpeg/libavcodec/jrevdct.c
+++ b/src/libffmpeg/libavcodec/jrevdct.c
@@ -1166,4 +1166,5 @@ void j_rev_dct(DCTBLOCK data)
}
}
-
+#undef FIX
+#undef CONST_BITS
diff --git a/src/libffmpeg/libavcodec/mjpeg.c b/src/libffmpeg/libavcodec/mjpeg.c
index 1eb35d2b8..577e9d884 100644
--- a/src/libffmpeg/libavcodec/mjpeg.c
+++ b/src/libffmpeg/libavcodec/mjpeg.c
@@ -1,28 +1,36 @@
/*
* MJPEG encoder and decoder
- * Copyright (c) 2000, 2001 Gerard Lantau.
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Support for external huffman table and various fixes (AVID workaround) by
+ * Alex Beregszaszi <alex@naxine.org>
*/
//#define DEBUG
-#include "config.h"
-
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
-#include "xine-utils/xineutils.h"
+
+#ifdef USE_FASTMEMCPY
+#include "fastmemcpy.h"
+#endif
+
+/* use two quantizer table (one for luminance and one for chrominance) */
+/* not yet working */
+#undef TWOMATRIXES
typedef struct MJpegContext {
UINT8 huff_size_dc_luminance[12];
@@ -36,12 +44,87 @@ typedef struct MJpegContext {
UINT16 huff_code_ac_chrominance[256];
} MJpegContext;
-#define SOF0 0xc0
-#define SOI 0xd8
-#define EOI 0xd9
-#define DQT 0xdb
-#define DHT 0xc4
-#define SOS 0xda
+/* JPEG marker codes */
+typedef enum {
+ /* start of frame */
+ SOF0 = 0xc0, /* baseline */
+ SOF1 = 0xc1, /* extended sequential, huffman */
+ SOF2 = 0xc2, /* progressive, huffman */
+ SOF3 = 0xc3, /* lossless, huffman */
+
+ SOF5 = 0xc5, /* differential sequential, huffman */
+ SOF6 = 0xc6, /* differential progressive, huffman */
+ SOF7 = 0xc7, /* differential lossless, huffman */
+ JPG = 0xc8, /* reserved for JPEG extension */
+ SOF9 = 0xc9, /* extended sequential, arithmetic */
+ SOF10 = 0xca, /* progressive, arithmetic */
+ SOF11 = 0xcb, /* lossless, arithmetic */
+
+ SOF13 = 0xcd, /* differential sequential, arithmetic */
+ SOF14 = 0xce, /* differential progressive, arithmetic */
+ SOF15 = 0xcf, /* differential lossless, arithmetic */
+
+ DHT = 0xc4, /* define huffman tables */
+
+ DAC = 0xcc, /* define arithmetic-coding conditioning */
+
+ /* restart with modulo 8 count "m" */
+ RST0 = 0xd0,
+ RST1 = 0xd1,
+ RST2 = 0xd2,
+ RST3 = 0xd3,
+ RST4 = 0xd4,
+ RST5 = 0xd5,
+ RST6 = 0xd6,
+ RST7 = 0xd7,
+
+ SOI = 0xd8, /* start of image */
+ EOI = 0xd9, /* end of image */
+ SOS = 0xda, /* start of scan */
+ DQT = 0xdb, /* define quantization tables */
+ DNL = 0xdc, /* define number of lines */
+ DRI = 0xdd, /* define restart interval */
+ DHP = 0xde, /* define hierarchical progression */
+ EXP = 0xdf, /* expand reference components */
+
+ APP0 = 0xe0,
+ APP1 = 0xe1,
+ APP2 = 0xe2,
+ APP3 = 0xe3,
+ APP4 = 0xe4,
+ APP5 = 0xe5,
+ APP6 = 0xe6,
+ APP7 = 0xe7,
+ APP8 = 0xe8,
+ APP9 = 0xe9,
+ APP10 = 0xea,
+ APP11 = 0xeb,
+ APP12 = 0xec,
+ APP13 = 0xed,
+ APP14 = 0xee,
+ APP15 = 0xef,
+
+ JPG0 = 0xf0,
+ JPG1 = 0xf1,
+ JPG2 = 0xf2,
+ JPG3 = 0xf3,
+ JPG4 = 0xf4,
+ JPG5 = 0xf5,
+ JPG6 = 0xf6,
+ JPG7 = 0xf7,
+ JPG8 = 0xf8,
+ JPG9 = 0xf9,
+ JPG10 = 0xfa,
+ JPG11 = 0xfb,
+ JPG12 = 0xfc,
+ JPG13 = 0xfd,
+
+ COM = 0xfe, /* comment */
+
+ TEM = 0x01, /* temporary private use for arithmetic coding */
+
+ /* 0x02 -> 0xbf reserved */
+} JPEG_MARKER;
#if 0
/* These are the sample quantization tables given in JPEG spec section K.1.
@@ -135,7 +218,6 @@ static const UINT8 val_ac_chrominance[] =
0xf9, 0xfa
};
-
/* isn't this function nicer than the one in the libjpeg ? */
static void build_huffman_codes(UINT8 *huff_size, UINT16 *huff_code,
const UINT8 *bits_table, const UINT8 *val_table)
@@ -160,9 +242,13 @@ int mjpeg_init(MpegEncContext *s)
{
MJpegContext *m;
- m = malloc(sizeof(MJpegContext));
+ m = av_malloc(sizeof(MJpegContext));
if (!m)
return -1;
+
+ s->min_qcoeff=-1023;
+ s->max_qcoeff= 1023;
+ s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
/* build all the huffman tables */
build_huffman_codes(m->huff_size_dc_luminance,
@@ -188,7 +274,7 @@ int mjpeg_init(MpegEncContext *s)
void mjpeg_close(MpegEncContext *s)
{
- free(s->mjpeg_ctx);
+ av_free(s->mjpeg_ctx);
}
static inline void put_marker(PutBitContext *p, int code)
@@ -227,14 +313,18 @@ static void jpeg_table_header(MpegEncContext *s)
/* quant matrixes */
put_marker(p, DQT);
+#ifdef TWOMATRIXES
+ put_bits(p, 16, 2 + 2 * (1 + 64));
+#else
put_bits(p, 16, 2 + 1 * (1 + 64));
+#endif
put_bits(p, 4, 0); /* 8 bit precision */
put_bits(p, 4, 0); /* table 0 */
for(i=0;i<64;i++) {
j = zigzag_direct[i];
put_bits(p, 8, s->intra_matrix[j]);
}
-#if 0
+#ifdef TWOMATRIXES
put_bits(p, 4, 0); /* 8 bit precision */
put_bits(p, 4, 1); /* table 1 */
for(i=0;i<64;i++) {
@@ -258,10 +348,46 @@ static void jpeg_table_header(MpegEncContext *s)
ptr[1] = size;
}
+static void jpeg_put_comments(MpegEncContext *s)
+{
+ PutBitContext *p = &s->pb;
+ int size;
+ UINT8 *ptr;
+
+#if 0
+ /* JFIF header */
+ put_marker(p, APP0);
+ put_bits(p, 16, 16);
+ put_string(p, "JFIF"); /* this puts the trailing zero-byte too */
+ put_bits(p, 16, 0x101);
+ put_bits(p, 8, 0); /* units type: 0 - aspect ratio */
+ put_bits(p, 16, 1); /* aspect: 1:1 */
+ put_bits(p, 16, 1);
+ put_bits(p, 8, 0); /* thumbnail width */
+ put_bits(p, 8, 0); /* thumbnail height */
+#endif
+
+ /* comment */
+ put_marker(p, COM);
+ flush_put_bits(p);
+ ptr = pbBufPtr(p);
+ put_bits(p, 16, 0); /* patched later */
+#define VERSION "FFmpeg" LIBAVCODEC_VERSION "b" LIBAVCODEC_BUILD_STR
+ put_string(p, VERSION);
+ size = strlen(VERSION)+3;
+#undef VERSION
+ ptr[0] = size >> 8;
+ ptr[1] = size;
+}
+
void mjpeg_picture_header(MpegEncContext *s)
{
put_marker(&s->pb, SOI);
+ if (!s->mjpeg_data_only_frames)
+ {
+ jpeg_put_comments(s);
+
if (s->mjpeg_write_tables) jpeg_table_header(s);
put_marker(&s->pb, SOF0);
@@ -282,13 +408,22 @@ void mjpeg_picture_header(MpegEncContext *s)
put_bits(&s->pb, 8, 2); /* component number */
put_bits(&s->pb, 4, s->mjpeg_hsample[1]); /* H factor */
put_bits(&s->pb, 4, s->mjpeg_vsample[1]); /* V factor */
+#ifdef TWOMATRIXES
+ put_bits(&s->pb, 8, 1); /* select matrix */
+#else
put_bits(&s->pb, 8, 0); /* select matrix */
+#endif
/* Cr component */
put_bits(&s->pb, 8, 3); /* component number */
put_bits(&s->pb, 4, s->mjpeg_hsample[2]); /* H factor */
put_bits(&s->pb, 4, s->mjpeg_vsample[2]); /* V factor */
+#ifdef TWOMATRIXES
+ put_bits(&s->pb, 8, 1); /* select matrix */
+#else
put_bits(&s->pb, 8, 0); /* select matrix */
+#endif
+ }
/* scan header */
put_marker(&s->pb, SOS);
@@ -312,7 +447,7 @@ void mjpeg_picture_header(MpegEncContext *s)
put_bits(&s->pb, 8, 0); /* Ss (not used) */
put_bits(&s->pb, 8, 63); /* Se (not used) */
- put_bits(&s->pb, 8, 0); /* (not used) */
+ put_bits(&s->pb, 8, 0); /* Ah/Al (not used) */
}
void mjpeg_picture_trailer(MpegEncContext *s)
@@ -321,8 +456,8 @@ void mjpeg_picture_trailer(MpegEncContext *s)
put_marker(&s->pb, EOI);
}
-static inline void encode_dc(MpegEncContext *s, int val,
- UINT8 *huff_size, UINT16 *huff_code)
+static inline void mjpeg_encode_dc(MpegEncContext *s, int val,
+ UINT8 *huff_size, UINT16 *huff_code)
{
int mant, nbits;
@@ -361,11 +496,11 @@ static void encode_block(MpegEncContext *s, DCTELEM *block, int n)
dc = block[0]; /* overflow is impossible */
val = dc - s->last_dc[component];
if (n < 4) {
- encode_dc(s, val, m->huff_size_dc_luminance, m->huff_code_dc_luminance);
+ mjpeg_encode_dc(s, val, m->huff_size_dc_luminance, m->huff_code_dc_luminance);
huff_size_ac = m->huff_size_ac_luminance;
huff_code_ac = m->huff_code_ac_luminance;
} else {
- encode_dc(s, val, m->huff_size_dc_chrominance, m->huff_code_dc_chrominance);
+ mjpeg_encode_dc(s, val, m->huff_size_dc_chrominance, m->huff_code_dc_chrominance);
huff_size_ac = m->huff_size_ac_chrominance;
huff_code_ac = m->huff_code_ac_chrominance;
}
@@ -429,6 +564,7 @@ void mjpeg_encode_mb(MpegEncContext *s,
#define MAX_COMPONENTS 4
typedef struct MJpegDecodeContext {
+ AVCodecContext *avctx;
GetBitContext gb;
UINT32 header_state;
int start_code; /* current start code */
@@ -455,8 +591,21 @@ typedef struct MJpegDecodeContext {
int linesize[MAX_COMPONENTS];
DCTELEM block[64] __align8;
UINT8 buffer[PICTURE_BUFFER_SIZE];
+
+ int buggy_avid;
+ int restart_interval;
+ int restart_count;
+ int interleaved_rows;
} MJpegDecodeContext;
+#define SKIP_REMAINING(gb, len) { \
+ dprintf("reamining %d bytes in marker\n", len); \
+ if (len) while (--len) \
+ skip_bits(gb, 8); \
+}
+
+static int mjpeg_decode_dht(MJpegDecodeContext *s, UINT8 *buf, int buf_size);
+
static void build_vlc(VLC *vlc, const UINT8 *bits_table, const UINT8 *val_table,
int nb_codes)
{
@@ -473,6 +622,8 @@ static int mjpeg_decode_init(AVCodecContext *avctx)
{
MJpegDecodeContext *s = avctx->priv_data;
+ s->avctx = avctx;
+
s->header_state = 0;
s->mpeg_enc_ctx_allocated = 0;
s->buffer_size = PICTURE_BUFFER_SIZE - 1; /* minus 1 to take into
@@ -487,6 +638,13 @@ static int mjpeg_decode_init(AVCodecContext *avctx)
build_vlc(&s->vlcs[0][1], bits_dc_chrominance, val_dc_chrominance, 12);
build_vlc(&s->vlcs[1][0], bits_ac_luminance, val_ac_luminance, 251);
build_vlc(&s->vlcs[1][1], bits_ac_chrominance, val_ac_chrominance, 251);
+
+ if (avctx->flags & CODEC_FLAG_EXTERN_HUFF)
+ {
+ printf("mjpeg: using external huffman table\n");
+ mjpeg_decode_dht(s, avctx->extradata, avctx->extradata_size);
+ /* should check for error - but dunno */
+ }
return 0;
}
@@ -496,14 +654,16 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s,
{
int len, index, i, j;
init_get_bits(&s->gb, buf, buf_size);
-
- len = get_bits(&s->gb, 16);
- len -= 2;
+
+ len = get_bits(&s->gb, 16) - 2;
while (len >= 65) {
/* only 8 bit precision handled */
if (get_bits(&s->gb, 4) != 0)
+ {
+ dprintf("dqt: 16bit precision\n");
return -1;
+ }
index = get_bits(&s->gb, 4);
if (index >= 4)
return -1;
@@ -511,10 +671,13 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s,
/* read quant table */
for(i=0;i<64;i++) {
j = zigzag_direct[i];
- s->quant_matrixes[index][j] = get_bits(&s->gb, 8);
+ s->quant_matrixes[index][j] = get_bits(&s->gb, 8);
}
len -= 65;
}
+
+ SKIP_REMAINING(&s->gb, len);
+
return 0;
}
@@ -581,6 +744,7 @@ static int mjpeg_decode_sof0(MJpegDecodeContext *s,
return -1;
height = get_bits(&s->gb, 16);
width = get_bits(&s->gb, 16);
+ dprintf("sof0: picture: %dx%d\n", width, height);
nb_components = get_bits(&s->gb, 8);
if (nb_components <= 0 ||
@@ -602,16 +766,15 @@ static int mjpeg_decode_sof0(MJpegDecodeContext *s,
s->quant_index[i] = get_bits(&s->gb, 8);
if (s->quant_index[i] >= 4)
return -1;
- dprintf("component %d %d:%d\n", i, s->h_count[i], s->v_count[i]);
+ dprintf("component %d %d:%d id: %d quant:%d\n", i, s->h_count[i],
+ s->v_count[i], s->component_id[i], s->quant_index[i]);
}
/* if different size, realloc/alloc picture */
/* XXX: also check h_count and v_count */
if (width != s->width || height != s->height) {
- for(i=0;i<MAX_COMPONENTS;i++) {
- free(s->current_picture[i]);
- s->current_picture[i] = NULL;
- }
+ for(i=0;i<MAX_COMPONENTS;i++)
+ av_freep(&s->current_picture[i]);
s->width = width;
s->height = height;
/* test interlaced mode */
@@ -619,7 +782,7 @@ static int mjpeg_decode_sof0(MJpegDecodeContext *s,
s->org_height != 0 &&
s->height < ((s->org_height * 3) / 4)) {
s->interlaced = 1;
- s->bottom_field = 0;
+ s->bottom_field = 0;
}
for(i=0;i<nb_components;i++) {
@@ -636,19 +799,26 @@ static int mjpeg_decode_sof0(MJpegDecodeContext *s,
}
s->first_picture = 0;
}
+
+ if (len != (8+(3*nb_components)))
+ {
+ dprintf("decode_sof0: error, len(%d) mismatch\n", len);
+ }
return 0;
}
-static inline int decode_dc(MJpegDecodeContext *s, int dc_index)
+static inline int mjpeg_decode_dc(MJpegDecodeContext *s, int dc_index)
{
- VLC *dc_vlc;
int code, diff;
- dc_vlc = &s->vlcs[0][dc_index];
- code = get_vlc(&s->gb, dc_vlc);
+ code = get_vlc(&s->gb, &s->vlcs[0][dc_index]);
if (code < 0)
+ {
+ dprintf("mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index,
+ &s->vlcs[0][dc_index]);
return 0xffff;
+ }
if (code == 0) {
diff = 0;
} else {
@@ -668,13 +838,13 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block,
VLC *ac_vlc;
INT16 *quant_matrix;
- quant_matrix = s->quant_matrixes[quant_index];
/* DC coef */
- val = decode_dc(s, dc_index);
+ val = mjpeg_decode_dc(s, dc_index);
if (val == 0xffff) {
dprintf("error dc\n");
return -1;
}
+ quant_matrix = s->quant_matrixes[quant_index];
val = val * quant_matrix[0] + s->last_dc[component];
s->last_dc[component] = val;
block[0] = val;
@@ -731,17 +901,24 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s,
nb_components = get_bits(&s->gb, 8);
/* XXX: only interleaved scan accepted */
if (nb_components != 3)
+ {
+ dprintf("decode_sos: components(%d) mismatch\n", nb_components);
return -1;
+ }
vmax = 0;
hmax = 0;
for(i=0;i<nb_components;i++) {
id = get_bits(&s->gb, 8) - 1;
+ dprintf("component: %d\n", id);
/* find component index */
for(index=0;index<s->nb_components;index++)
if (id == s->component_id[index])
break;
if (index == s->nb_components)
+ {
+ dprintf("decode_sos: index(%d) out of components\n", index);
return -1;
+ }
comp_index[i] = index;
nb_blocks[i] = s->h_count[index] * s->v_count[index];
@@ -749,15 +926,31 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s,
v_count[i] = s->v_count[index];
dc_index[i] = get_bits(&s->gb, 4);
- if (dc_index[i] >= 4)
- return -1;
ac_index[i] = get_bits(&s->gb, 4);
- if (ac_index[i] >= 4)
- return -1;
+
+ if (dc_index[i] < 0 || ac_index[i] < 0 ||
+ dc_index[i] >= 4 || ac_index[i] >= 4)
+ goto out_of_range;
+ switch(s->start_code)
+ {
+ case SOF0:
+ if (dc_index[i] > 1 || ac_index[i] > 1)
+ goto out_of_range;
+ break;
+ case SOF1:
+ case SOF2:
+ if (dc_index[i] > 3 || ac_index[i] > 3)
+ goto out_of_range;
+ break;
+ case SOF3:
+ if (dc_index[i] > 3 || ac_index[i] != 0)
+ goto out_of_range;
+ break;
+ }
}
- get_bits(&s->gb, 8); /* Ss */
- get_bits(&s->gb, 8); /* Se */
- get_bits(&s->gb, 8); /* not used */
+ skip_bits(&s->gb, 8); /* Ss */
+ skip_bits(&s->gb, 8); /* Se */
+ skip_bits(&s->gb, 8); /* Ah and Al (each are 4 bits) */
for(i=0;i<nb_components;i++)
s->last_dc[i] = 1024;
@@ -787,22 +980,24 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s,
v = v_count[i];
x = 0;
y = 0;
+ if (s->restart_interval && !s->restart_count)
+ s->restart_count = s->restart_interval;
for(j=0;j<n;j++) {
memset(s->block, 0, sizeof(s->block));
if (decode_block(s, s->block, i,
dc_index[i], ac_index[i],
s->quant_index[c]) < 0) {
- dprintf("error %d %d\n", mb_y, mb_x);
+ dprintf("error y=%d x=%d\n", mb_y, mb_x);
ret = -1;
goto the_end;
}
- ff_idct (s->block);
+// dprintf("mb: %d %d processed\n", mb_y, mb_x);
ptr = s->current_picture[c] +
(s->linesize[c] * (v * mb_y + y) * 8) +
(h * mb_x + x) * 8;
if (s->interlaced && s->bottom_field)
ptr += s->linesize[c] >> 1;
- put_pixels_clamped(s->block, ptr, s->linesize[c]);
+ ff_idct_put(ptr, s->linesize[c], s->block);
if (++x == h) {
x = 0;
y++;
@@ -815,6 +1010,139 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s,
the_end:
emms_c();
return ret;
+ out_of_range:
+ dprintf("decode_sos: ac/dc index out of range\n");
+ return -1;
+}
+
+static int mjpeg_decode_dri(MJpegDecodeContext *s,
+ UINT8 *buf, int buf_size)
+{
+ init_get_bits(&s->gb, buf, buf_size);
+
+ if (get_bits(&s->gb, 16) != 4)
+ return -1;
+ s->restart_interval = get_bits(&s->gb, 16);
+ printf("restart interval: %d\n", s->restart_interval);
+
+ return 0;
+}
+
+#define FOURCC(a,b,c,d) ((a << 24) | (b << 16) | (c << 8) | d)
+static int mjpeg_decode_app(MJpegDecodeContext *s,
+ UINT8 *buf, int buf_size, int start_code)
+{
+ int len, id;
+
+ init_get_bits(&s->gb, buf, buf_size);
+
+ /* XXX: verify len field validity */
+ len = get_bits(&s->gb, 16);
+ if (len < 5)
+ return -1;
+
+ id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
+ len -= 6;
+
+ /* buggy AVID, it puts EOI only at every 10th frame */
+ /* also this fourcc is used by non-avid files too, it means
+ interleaving, but it's always present in AVID files */
+ if (id == FOURCC('A','V','I','1'))
+ {
+ /* structure:
+ 4bytes AVI1
+ 1bytes polarity
+ 1bytes always zero
+ 4bytes field_size
+ 4bytes field_size_less_padding
+ */
+ s->buggy_avid = 1;
+ if (s->first_picture)
+ printf("mjpeg: workarounding buggy AVID\n");
+ s->interleaved_rows = get_bits(&s->gb, 8);
+#if 0
+ skip_bits(&s->gb, 8);
+ skip_bits(&s->gb, 32);
+ skip_bits(&s->gb, 32);
+ len -= 10;
+#endif
+ if (s->interleaved_rows)
+ printf("mjpeg: interleaved rows: %d\n", s->interleaved_rows);
+ goto out;
+ }
+
+ len -= 2;
+
+ if (id == FOURCC('J','F','I','F'))
+ {
+ skip_bits(&s->gb, 8); /* the trailing zero-byte */
+ printf("mjpeg: JFIF header found (version: %x.%x)\n",
+ get_bits(&s->gb, 8), get_bits(&s->gb, 8));
+ goto out;
+ }
+
+ /* Apple MJPEG-A */
+ if ((start_code == APP1) && (len > (0x28 - 8)))
+ {
+ id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
+ len -= 4;
+ if (id == FOURCC('m','j','p','g')) /* Apple MJPEG-A */
+ {
+#if 0
+ skip_bits(&s->gb, 32); /* field size */
+ skip_bits(&s->gb, 32); /* pad field size */
+ skip_bits(&s->gb, 32); /* next off */
+ skip_bits(&s->gb, 32); /* quant off */
+ skip_bits(&s->gb, 32); /* huff off */
+ skip_bits(&s->gb, 32); /* image off */
+ skip_bits(&s->gb, 32); /* scan off */
+ skip_bits(&s->gb, 32); /* data off */
+#endif
+ if (s->first_picture)
+ printf("mjpeg: Apple MJPEG-A header found\n");
+ }
+ }
+
+out:
+ /* should check for further values.. */
+ SKIP_REMAINING(&s->gb, len);
+
+ return 0;
+}
+#undef FOURCC
+
+static int mjpeg_decode_com(MJpegDecodeContext *s,
+ UINT8 *buf, int buf_size)
+{
+ int len, i;
+ UINT8 *cbuf;
+
+ init_get_bits(&s->gb, buf, buf_size);
+
+ /* XXX: verify len field validity */
+ len = get_bits(&s->gb, 16)-2;
+ cbuf = av_malloc(len+1);
+
+ for (i = 0; i < len; i++)
+ cbuf[i] = get_bits(&s->gb, 8);
+ if (cbuf[i-1] == '\n')
+ cbuf[i-1] = 0;
+ else
+ cbuf[i] = 0;
+
+ printf("mjpeg comment: '%s'\n", cbuf);
+
+ /* buggy avid, it puts EOI only at every 10th frame */
+ if (!strcmp(cbuf, "AVID"))
+ {
+ s->buggy_avid = 1;
+ if (s->first_picture)
+ printf("mjpeg: workarounding buggy AVID\n");
+ }
+
+ av_free(cbuf);
+
+ return 0;
}
/* return the 8 bit start code value and update the search
@@ -858,8 +1186,9 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
{
MJpegDecodeContext *s = avctx->priv_data;
UINT8 *buf_end, *buf_ptr, *buf_start;
- int len, code, start_code, input_size, i;
+ int len, code, input_size, i;
AVPicture *picture = data;
+ unsigned int start_code;
*data_size = 0;
@@ -883,10 +1212,13 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
} else {
memcpy(s->buf_ptr, buf_start, len);
s->buf_ptr += len;
- /* if we got FF 00, we copy FF to the stream to unescape FF 00 */
- if (code == 0) {
+ if (code < 0) {
+ /* nothing to do: wait next marker */
+ } else if (code == 0 || code == 0xff) {
+ /* if we got FF 00, we copy FF to the stream to unescape FF 00 */
+ /* valid marker code is between 00 and ff - alex */
s->buf_ptr--;
- } else if (code > 0) {
+ } else {
/* prepare data for next start code */
input_size = s->buf_ptr - s->buffer;
start_code = s->start_code;
@@ -895,6 +1227,7 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
dprintf("marker=%x\n", start_code);
switch(start_code) {
case SOI:
+ s->restart_interval = 0;
/* nothing to do on SOI */
break;
case DQT:
@@ -908,7 +1241,7 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
break;
case SOS:
mjpeg_decode_sos(s, s->buffer, input_size);
- if (s->start_code == EOI) {
+ if (s->start_code == EOI || s->buggy_avid || s->restart_interval) {
int l;
if (s->interlaced) {
s->bottom_field ^= 1;
@@ -943,11 +1276,41 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
}
/* dummy quality */
/* XXX: infer it with matrix */
- avctx->quality = 3;
+ avctx->quality = 3;
goto the_end;
}
break;
+ case DRI:
+ mjpeg_decode_dri(s, s->buffer, input_size);
+ break;
+ case SOF1:
+ case SOF2:
+ case SOF3:
+ case SOF5:
+ case SOF6:
+ case SOF7:
+ case SOF9:
+ case SOF10:
+ case SOF11:
+ case SOF13:
+ case SOF14:
+ case SOF15:
+ case JPG:
+ printf("mjpeg: unsupported coding type (%x)\n", start_code);
+ return -1;
}
+#if 1
+ if (start_code >= 0xd0 && start_code <= 0xd7) {
+ dprintf("restart marker: %d\n", start_code&0x0f);
+ } else if (s->first_picture) {
+ /* APP fields */
+ if (start_code >= 0xe0 && start_code <= 0xef)
+ mjpeg_decode_app(s, s->buffer, input_size, start_code);
+ /* Comment */
+ else if (start_code == COM)
+ mjpeg_decode_com(s, s->buffer, input_size);
+ }
+#endif
}
}
}
@@ -961,7 +1324,7 @@ static int mjpeg_decode_end(AVCodecContext *avctx)
int i, j;
for(i=0;i<MAX_COMPONENTS;i++)
- free(s->current_picture[i]);
+ av_free(s->current_picture[i]);
for(i=0;i<2;i++) {
for(j=0;j<4;j++)
free_vlc(&s->vlcs[i][j]);
@@ -978,4 +1341,6 @@ AVCodec mjpeg_decoder = {
NULL,
mjpeg_decode_end,
mjpeg_decode_frame,
+ 0,
+ NULL
};
diff --git a/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c b/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c
index 236c9206a..4539b6464 100644
--- a/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c
+++ b/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c
@@ -1,20 +1,20 @@
/*
* Sun mediaLib optimized DSP utils
- * Copyright (c) 2001 Gerard Lantau.
+ * Copyright (c) 2001 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "../dsputil.h"
diff --git a/src/libffmpeg/libavcodec/motion_est.c b/src/libffmpeg/libavcodec/motion_est.c
index 92724ac87..8f2ffa42e 100644
--- a/src/libffmpeg/libavcodec/motion_est.c
+++ b/src/libffmpeg/libavcodec/motion_est.c
@@ -1,43 +1,45 @@
/*
* Motion estimation
- * Copyright (c) 2000,2001 Gerard Lantau.
+ * Copyright (c) 2000,2001 Fabrice Bellard.
+ * Copyright (c) 2002 Michael Niedermayer
*
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
*/
-#include "config.h"
-#include "xine-utils/xineutils.h"
#include <stdlib.h>
#include <stdio.h>
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
-#define ABS(a) ((a)>0 ? (a) : -(a))
-#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#define SQ(a) ((a)*(a))
#define INTER_BIAS 257
-static void halfpel_motion_search(MpegEncContext * s,
- int *mx_ptr, int *my_ptr, int dmin,
- int xmin, int ymin, int xmax, int ymax,
- int pred_x, int pred_y);
+#define P_LAST P[0]
+#define P_LEFT P[1]
+#define P_TOP P[2]
+#define P_TOPRIGHT P[3]
+#define P_MEDIAN P[4]
+#define P_LAST_LEFT P[5]
+#define P_LAST_RIGHT P[6]
+#define P_LAST_TOP P[7]
+#define P_LAST_BOTTOM P[8]
+#define P_MV1 P[9]
-/* config it to test motion vector encoding (send random vectors) */
-//#define CONFIG_TEST_MV_ENCODE
static int pix_sum(UINT8 * pix, int line_size)
{
@@ -140,7 +142,7 @@ static void no_motion_search(MpegEncContext * s,
static int full_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int range,
- int xmin, int ymin, int xmax, int ymax)
+ int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
{
int x1, y1, x2, y2, xx, yy, x, y;
int mx, my, dmin, d;
@@ -166,7 +168,7 @@ static int full_motion_search(MpegEncContext * s,
my = 0;
for (y = y1; y <= y2; y++) {
for (x = x1; x <= x2; x++) {
- d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x,
+ d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x,
s->linesize);
if (d < dmin ||
(d == dmin &&
@@ -194,7 +196,7 @@ static int full_motion_search(MpegEncContext * s,
static int log_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int range,
- int xmin, int ymin, int xmax, int ymax)
+ int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
{
int x1, y1, x2, y2, xx, yy, x, y;
int mx, my, dmin, d;
@@ -231,7 +233,7 @@ static int log_motion_search(MpegEncContext * s,
do {
for (y = y1; y <= y2; y += range) {
for (x = x1; x <= x2; x += range) {
- d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize);
+ d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dmin = d;
mx = x;
@@ -270,7 +272,7 @@ static int log_motion_search(MpegEncContext * s,
static int phods_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int range,
- int xmin, int ymin, int xmax, int ymax)
+ int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
{
int x1, y1, x2, y2, xx, yy, x, y, lastx, d;
int mx, my, dminx, dminy;
@@ -311,7 +313,7 @@ static int phods_motion_search(MpegEncContext * s,
lastx = x;
for (x = x1; x <= x2; x += range) {
- d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize);
+ d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dminx = d;
mx = x;
@@ -320,7 +322,7 @@ static int phods_motion_search(MpegEncContext * s,
x = lastx;
for (y = y1; y <= y2; y += range) {
- d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize);
+ d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dminy = d;
my = y;
@@ -364,62 +366,61 @@ static int phods_motion_search(MpegEncContext * s,
#define CHECK_MV(x,y)\
{\
- d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
- d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
- if(d<dmin){\
- best[0]=x;\
- best[1]=y;\
- dmin=d;\
+ const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
+ const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
+ if(map[index]!=key){\
+ d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
+ d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
+ COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
+ map[index]= key;\
+ score_map[index]= d;\
}\
}
#define CHECK_MV_DIR(x,y,new_dir)\
{\
- d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
- d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
- if(d<dmin){\
- best[0]=x;\
- best[1]=y;\
- dmin=d;\
- next_dir= new_dir;\
+ const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
+ const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
+ if(map[index]!=key){\
+ d = pix_abs(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
+ d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
+ if(d<dmin){\
+ best[0]=x;\
+ best[1]=y;\
+ dmin=d;\
+ next_dir= new_dir;\
+ }\
+ map[index]= key;\
+ score_map[index]= d;\
}\
}
#define CHECK_MV4(x,y)\
{\
- d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
- d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
- if(d<dmin){\
- best[0]=x;\
- best[1]=y;\
- dmin=d;\
+ const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
+ const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
+ if(map[index]!=key){\
+ d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
+ d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
+ COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
+ map[index]= key;\
+ score_map[index]= d;\
}\
}
-#define CHECK_MV4_DIR(x,y,new_dir)\
-{\
- d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
- d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
- if(d<dmin){\
- best[0]=x;\
- best[1]=y;\
- dmin=d;\
- next_dir= new_dir;\
- }\
-}
-
-
#define check(x,y,S,v)\
-if( (x)<(xmin<<(S)) ) printf("%d %d %d %d xmin" #v, (x), (y), s->mb_x, s->mb_y);\
-if( (x)>(xmax<<(S)) ) printf("%d %d %d %d xmax" #v, (x), (y), s->mb_x, s->mb_y);\
-if( (y)<(ymin<<(S)) ) printf("%d %d %d %d ymin" #v, (x), (y), s->mb_x, s->mb_y);\
-if( (y)>(ymax<<(S)) ) printf("%d %d %d %d ymax" #v, (x), (y), s->mb_x, s->mb_y);\
+if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
+if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
+if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
+if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
static inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
- int xmin, int ymin, int xmax, int ymax, int shift)
+ int xmin, int ymin, int xmax, int ymax, int shift,
+ uint32_t *map, uint16_t *score_map, int map_generation,
+ op_pixels_abs_func pix_abs)
{
int next_dir=-1;
@@ -467,36 +468,19 @@ static inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
*/
}
-static inline int small_diamond_search4MV(MpegEncContext * s, int *best, int dmin,
- UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
- int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
- int xmin, int ymin, int xmax, int ymax, int shift)
-{
- int next_dir=-1;
-
- for(;;){
- int d;
- const int dir= next_dir;
- const int x= best[0];
- const int y= best[1];
- next_dir=-1;
-
-//printf("%d", dir);
- if(dir!=2 && x>xmin) CHECK_MV4_DIR(x-1, y , 0)
- if(dir!=3 && y>ymin) CHECK_MV4_DIR(x , y-1, 1)
- if(dir!=0 && x<xmax) CHECK_MV4_DIR(x+1, y , 2)
- if(dir!=1 && y<ymax) CHECK_MV4_DIR(x , y+1, 3)
-
- if(next_dir==-1){
- return dmin;
- }
- }
-}
-
+#if 1
+#define SNAKE_1 3
+#define SNAKE_2 2
+#else
+#define SNAKE_1 7
+#define SNAKE_2 3
+#endif
static inline int snake_search(MpegEncContext * s, int *best, int dmin,
UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
- int xmin, int ymin, int xmax, int ymax, int shift)
+ int xmin, int ymin, int xmax, int ymax, int shift,
+ uint32_t *map, uint16_t *score_map,int map_generation,
+ op_pixels_abs_func pix_abs)
{
int dir=0;
int c=1;
@@ -522,8 +506,15 @@ if(256*256*256*64%point==0)
x+=x_dir[dir];
y+=y_dir[dir];
if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){
- d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);
- d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;
+ const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;
+ const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);
+ if(map[index]!=key){
+ d = pix_abs(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);
+ d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;
+ map[index]=key;
+ score_map[index]=d;
+ }else
+ d= dmin+1;
}else{
d = dmin + 10000; //FIXME smarter boundary handling
}
@@ -542,21 +533,90 @@ if(256*256*256*64%point==0)
}else{
//bad++;
if(fails){
- if(fails>=3) return dmin;
+ if(fails>=SNAKE_1+1) return dmin;
}else{
- c= -c;
+ if(dir&1) dir-= c*3;
+ else c= -c;
+// c= -c;
}
- dir+=c*2;
+ dir+=c*SNAKE_2;
fails++;
}
dir&=7;
}
}
+static inline int cross_search(MpegEncContext * s, int *best, int dmin,
+ UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
+ int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
+ int xmin, int ymin, int xmax, int ymax, int shift,
+ uint32_t *map, uint16_t *score_map,int map_generation,
+ op_pixels_abs_func pix_abs)
+{
+ static int x_dir[4]= {-1, 0, 1, 0};
+ static int y_dir[4]= { 0,-1, 0, 1};
+ int improvement[2]={100000, 100000};
+ int dirs[2]={2, 3};
+ int dir;
+ int last_dir= -1;
+
+ for(;;){
+ dir= dirs[ improvement[0] > improvement[1] ? 0 : 1 ];
+ if(improvement[dir&1]==-1) return dmin;
+
+ {
+ const int x= best[0] + x_dir[dir];
+ const int y= best[1] + y_dir[dir];
+ const int key= (y<<ME_MAP_MV_BITS) + x + map_generation;
+ const int index= ((y<<ME_MAP_SHIFT) + x)&(ME_MAP_SIZE-1);
+ int d;
+ if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){
+ if(map[index]!=key){
+ d = pix_abs(new_pic, old_pic + x + y*pic_stride, pic_stride);
+ d += (mv_penalty[(x<<shift)-pred_x] + mv_penalty[(y<<shift)-pred_y])*quant;
+ map[index]=key;
+ score_map[index]=d;
+ if(d<dmin){
+ improvement[dir&1]= dmin-d;
+ improvement[(dir&1)^1]++;
+ dmin=d;
+ best[0]= x;
+ best[1]= y;
+ last_dir=dir;
+ continue;
+ }
+ }else{
+ d= score_map[index];
+ }
+ }else{
+ d= dmin + 1000; //FIXME is this a good idea?
+ }
+ /* evaluated point was cached or checked and worse */
+
+ if(last_dir==dir){
+ improvement[dir&1]= -1;
+ }else{
+ improvement[dir&1]= d-dmin;
+ last_dir= dirs[dir&1]= dir^2;
+ }
+ }
+ }
+}
+
+static inline int update_map_generation(MpegEncContext * s)
+{
+ s->me_map_generation+= 1<<(ME_MAP_MV_BITS*2);
+ if(s->me_map_generation==0){
+ s->me_map_generation= 1<<(ME_MAP_MV_BITS*2);
+ memset(s->me_map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
+ }
+ return s->me_map_generation;
+}
+
static int epzs_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr,
- int P[5][2], int pred_x, int pred_y,
- int xmin, int ymin, int xmax, int ymax)
+ int P[10][2], int pred_x, int pred_y,
+ int xmin, int ymin, int xmax, int ymax, uint8_t * ref_picture)
{
int best[2]={0, 0};
int d, dmin;
@@ -566,42 +626,74 @@ static int epzs_motion_search(MpegEncContext * s,
UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
int quant= s->qscale; // qscale of the prev frame
const int shift= 1+s->quarter_sample;
+ uint32_t *map= s->me_map;
+ uint16_t *score_map= s->me_score_map;
+ int map_generation;
new_pic = s->new_picture[0] + pic_xy;
- old_pic = s->last_picture[0] + pic_xy;
-
+ old_pic = ref_picture + pic_xy;
+
+ map_generation= update_map_generation(s);
+
dmin = pix_abs16x16(new_pic, old_pic, pic_stride);
- if(dmin<Z_THRESHOLD){
- *mx_ptr= 0;
- *my_ptr= 0;
-//printf("Z");
- return dmin;
- }
+ map[0]= map_generation;
+ score_map[0]= dmin;
/* first line */
- if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line)) {
- CHECK_MV(P[1][0]>>shift, P[1][1]>>shift)
+ if ((s->mb_y == 0 || s->first_slice_line)) {
+ CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+ CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
}else{
- CHECK_MV(P[4][0]>>shift, P[4][1]>>shift)
- if(dmin<Z_THRESHOLD){
- *mx_ptr= P[4][0]>>shift;
- *my_ptr= P[4][1]>>shift;
-//printf("M\n");
+ if(dmin<256 && ( P_LEFT[0] |P_LEFT[1]
+ |P_TOP[0] |P_TOP[1]
+ |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
+ *mx_ptr= 0;
+ *my_ptr= 0;
+ s->skip_me=1;
return dmin;
}
- CHECK_MV(P[1][0]>>shift, P[1][1]>>shift)
- CHECK_MV(P[2][0]>>shift, P[2][1]>>shift)
- CHECK_MV(P[3][0]>>shift, P[3][1]>>shift)
+ CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
+ if(dmin>256*2){
+ CHECK_MV(P_LAST[0] >>shift, P_LAST[1] >>shift)
+ CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
+ CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
+ CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
+ }
}
- CHECK_MV(P[0][0]>>shift, P[0][1]>>shift)
-
+ if(dmin>256*4){
+ CHECK_MV(P_LAST_RIGHT[0] >>shift, P_LAST_RIGHT[1] >>shift)
+ CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
+ }
+#if 0 //doest only slow things down
+ if(dmin>512*3){
+ int step;
+ dmin= score_map[0];
+ best[0]= best[1]=0;
+ for(step=128; step>0; step>>=1){
+ const int step2= step;
+ int y;
+ for(y=-step2+best[1]; y<=step2+best[1]; y+=step){
+ int x;
+ if(y<ymin || y>ymax) continue;
+
+ for(x=-step2+best[0]; x<=step2+best[0]; x+=step){
+ if(x<xmin || x>xmax) continue;
+ if(x==best[0] && y==best[1]) continue;
+ CHECK_MV(x,y)
+ }
+ }
+ }
+ }
+#endif
//check(best[0],best[1],0, b0)
- if(s->full_search==ME_EPZS)
+ if(s->me_method==ME_EPZS)
dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride,
- pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift);
+ pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
+ shift, map, score_map, map_generation, pix_abs16x16);
else
- dmin= snake_search(s, best, dmin, new_pic, old_pic, pic_stride,
- pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift);
+ dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride,
+ pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
+ shift, map, score_map, map_generation, pix_abs16x16);
//check(best[0],best[1],0, b1)
*mx_ptr= best[0];
*my_ptr= best[1];
@@ -612,8 +704,8 @@ static int epzs_motion_search(MpegEncContext * s,
static int epzs_motion_search4(MpegEncContext * s, int block,
int *mx_ptr, int *my_ptr,
- int P[6][2], int pred_x, int pred_y,
- int xmin, int ymin, int xmax, int ymax)
+ int P[10][2], int pred_x, int pred_y,
+ int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
{
int best[2]={0, 0};
int d, dmin;
@@ -623,34 +715,47 @@ static int epzs_motion_search4(MpegEncContext * s, int block,
UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
int quant= s->qscale; // qscale of the prev frame
const int shift= 1+s->quarter_sample;
+ uint32_t *map= s->me_map;
+ uint16_t *score_map= s->me_score_map;
+ int map_generation;
new_pic = s->new_picture[0] + pic_xy;
- old_pic = s->last_picture[0] + pic_xy;
-
- dmin = pix_abs8x8(new_pic, old_pic, pic_stride);
+ old_pic = ref_picture + pic_xy;
+ map_generation= update_map_generation(s);
+
+ dmin = 1000000;
+//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
/* first line */
- if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) {
- CHECK_MV4(P[1][0]>>shift, P[1][1]>>shift)
+ if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
+ CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+ CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift)
+ CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift)
}else{
- CHECK_MV4(P[4][0]>>shift, P[4][1]>>shift)
- if(dmin<Z_THRESHOLD){
- *mx_ptr= P[4][0]>>shift;
- *my_ptr= P[4][1]>>shift;
-//printf("M\n");
- return dmin;
+ CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift)
+ //FIXME try some early stop
+ if(dmin>64*2){
+ CHECK_MV4(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
+ CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+ CHECK_MV4(P_TOP[0]>>shift, P_TOP[1]>>shift)
+ CHECK_MV4(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
+ CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift)
}
- CHECK_MV4(P[1][0]>>shift, P[1][1]>>shift)
- CHECK_MV4(P[2][0]>>shift, P[2][1]>>shift)
- CHECK_MV4(P[3][0]>>shift, P[3][1]>>shift)
}
- CHECK_MV4(P[0][0]>>shift, P[0][1]>>shift)
- CHECK_MV4(P[5][0]>>shift, P[5][1]>>shift)
+ if(dmin>64*4){
+ CHECK_MV4(P_LAST_RIGHT[0]>>shift, P_LAST_RIGHT[1]>>shift)
+ CHECK_MV4(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
+ }
+
+ if(s->me_method==ME_EPZS)
+ dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride,
+ pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
+ shift, map, score_map, map_generation, pix_abs8x8);
+ else
+ dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride,
+ pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
+ shift, map, score_map, map_generation, pix_abs8x8);
-//check(best[0],best[1],0, b0)
- dmin= small_diamond_search4MV(s, best, dmin, new_pic, old_pic, pic_stride,
- pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift);
-//check(best[0],best[1],0, b1)
*mx_ptr= best[0];
*my_ptr= best[1];
@@ -659,56 +764,50 @@ static int epzs_motion_search4(MpegEncContext * s, int block,
}
#define CHECK_HALF_MV(suffix, x, y) \
- d= pix_abs16x16_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
+{\
+ d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\
- if(d<dminh){\
- dminh= d;\
- mx= mx1 + x;\
- my= my1 + y;\
- }
+ COPY3_IF_LT(dminh, d, dx, x, dy, y)\
+}
-#define CHECK_HALF_MV4(suffix, x, y) \
- d= pix_abs8x8_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
- d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\
- if(d<dminh){\
- dminh= d;\
- mx= mx1 + x;\
- my= my1 + y;\
- }
/* The idea would be to make half pel ME after Inter/Intra decision to
save time. */
-static inline void halfpel_motion_search(MpegEncContext * s,
+static inline int halfpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int xmin, int ymin, int xmax, int ymax,
- int pred_x, int pred_y)
+ int pred_x, int pred_y, uint8_t *ref_picture,
+ op_pixels_abs_func pix_abs_x2,
+ op_pixels_abs_func pix_abs_y2, op_pixels_abs_func pix_abs_xy2, int n)
{
UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
const int quant= s->qscale;
- int pen_x, pen_y;
- int mx, my, mx1, my1, d, xx, yy, dminh;
+ int mx, my, xx, yy, dminh;
UINT8 *pix, *ptr;
- mx = *mx_ptr;
- my = *my_ptr;
- ptr = s->last_picture[0] + (my * s->linesize) + mx;
+ if(s->skip_me){
+ *mx_ptr = 0;
+ *my_ptr = 0;
+ return dmin;
+ }else
- xx = 16 * s->mb_x;
- yy = 16 * s->mb_y;
+ xx = 16 * s->mb_x + 8*(n&1);
+ yy = 16 * s->mb_y + 8*(n>>1);
pix = s->new_picture[0] + (yy * s->linesize) + xx;
+
+ mx = *mx_ptr;
+ my = *my_ptr;
+ ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx);
dminh = dmin;
if (mx > xmin && mx < xmax &&
my > ymin && my < ymax) {
+ int dx=0, dy=0;
+ int d, pen_x, pen_y;
- mx= mx1= 2*(mx - xx);
- my= my1= 2*(my - yy);
- if(dmin < Z_THRESHOLD && mx==0 && my==0){
- *mx_ptr = 0;
- *my_ptr = 0;
- return;
- }
+ mx<<=1;
+ my<<=1;
pen_x= pred_x + mx;
pen_y= pred_y + my;
@@ -725,80 +824,135 @@ static inline void halfpel_motion_search(MpegEncContext * s,
CHECK_HALF_MV(y2 , 0, +1)
CHECK_HALF_MV(xy2, +1, +1)
+ mx+=dx;
+ my+=dy;
}else{
- mx= 2*(mx - xx);
- my= 2*(my - yy);
+ mx<<=1;
+ my<<=1;
}
*mx_ptr = mx;
*my_ptr = my;
+ return dminh;
}
-static inline void halfpel_motion_search4(MpegEncContext * s,
+static inline int fast_halfpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int xmin, int ymin, int xmax, int ymax,
- int pred_x, int pred_y, int block_x, int block_y)
+ int pred_x, int pred_y, uint8_t *ref_picture,
+ op_pixels_abs_func pix_abs_x2,
+ op_pixels_abs_func pix_abs_y2, op_pixels_abs_func pix_abs_xy2, int n)
{
UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
+ uint16_t *score_map= s->me_score_map;
const int quant= s->qscale;
- int pen_x, pen_y;
- int mx, my, mx1, my1, d, xx, yy, dminh;
+ int mx, my, xx, yy, dminh;
UINT8 *pix, *ptr;
- xx = 8 * block_x;
- yy = 8 * block_y;
+ if(s->skip_me){
+// printf("S");
+ *mx_ptr = 0;
+ *my_ptr = 0;
+ return dmin;
+ }
+// printf("N");
+
+ xx = 16 * s->mb_x + 8*(n&1);
+ yy = 16 * s->mb_y + 8*(n>>1);
pix = s->new_picture[0] + (yy * s->linesize) + xx;
-
+
mx = *mx_ptr;
my = *my_ptr;
- ptr = s->last_picture[0] + ((yy+my) * s->linesize) + xx + mx;
-
+ ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx);
+
dminh = dmin;
if (mx > xmin && mx < xmax &&
my > ymin && my < ymax) {
+ int dx=0, dy=0;
+ int d, pen_x, pen_y;
+ const int index= (my<<ME_MAP_SHIFT) + mx;
+ const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
+ const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
+ const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
+ const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
+ mx<<=1;
+ my<<=1;
- mx= mx1= 2*mx;
- my= my1= 2*my;
- if(dmin < Z_THRESHOLD && mx==0 && my==0){
- *mx_ptr = 0;
- *my_ptr = 0;
- return;
- }
pen_x= pred_x + mx;
pen_y= pred_y + my;
ptr-= s->linesize;
- CHECK_HALF_MV4(xy2, -1, -1)
- CHECK_HALF_MV4(y2 , 0, -1)
- CHECK_HALF_MV4(xy2, +1, -1)
-
- ptr+= s->linesize;
- CHECK_HALF_MV4(x2 , -1, 0)
- CHECK_HALF_MV4(x2 , +1, 0)
- CHECK_HALF_MV4(xy2, -1, +1)
- CHECK_HALF_MV4(y2 , 0, +1)
- CHECK_HALF_MV4(xy2, +1, +1)
+ if(t<=b){
+ CHECK_HALF_MV(y2 , 0, -1)
+ if(l<=r){
+ CHECK_HALF_MV(xy2, -1, -1)
+ if(t+r<=b+l){
+ CHECK_HALF_MV(xy2, +1, -1)
+ ptr+= s->linesize;
+ }else{
+ ptr+= s->linesize;
+ CHECK_HALF_MV(xy2, -1, +1)
+ }
+ CHECK_HALF_MV(x2 , -1, 0)
+ }else{
+ CHECK_HALF_MV(xy2, +1, -1)
+ if(t+l<=b+r){
+ CHECK_HALF_MV(xy2, -1, -1)
+ ptr+= s->linesize;
+ }else{
+ ptr+= s->linesize;
+ CHECK_HALF_MV(xy2, +1, +1)
+ }
+ CHECK_HALF_MV(x2 , +1, 0)
+ }
+ }else{
+ if(l<=r){
+ if(t+l<=b+r){
+ CHECK_HALF_MV(xy2, -1, -1)
+ ptr+= s->linesize;
+ }else{
+ ptr+= s->linesize;
+ CHECK_HALF_MV(xy2, +1, +1)
+ }
+ CHECK_HALF_MV(x2 , -1, 0)
+ CHECK_HALF_MV(xy2, -1, +1)
+ }else{
+ if(t+r<=b+l){
+ CHECK_HALF_MV(xy2, +1, -1)
+ ptr+= s->linesize;
+ }else{
+ ptr+= s->linesize;
+ CHECK_HALF_MV(xy2, -1, +1)
+ }
+ CHECK_HALF_MV(x2 , +1, 0)
+ CHECK_HALF_MV(xy2, +1, +1)
+ }
+ CHECK_HALF_MV(y2 , 0, +1)
+ }
+ mx+=dx;
+ my+=dy;
}else{
- mx*=2;
- my*=2;
+ mx<<=1;
+ my<<=1;
}
*mx_ptr = mx;
*my_ptr = my;
+ return dminh;
}
-static inline void set_mv_tables(MpegEncContext * s, int mx, int my)
+static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
{
- const int xy= s->mb_x + s->mb_y*s->mb_width;
+ const int xy= s->mb_x + 1 + (s->mb_y + 1)*(s->mb_width + 2);
- s->mv_table[0][xy] = mx;
- s->mv_table[1][xy] = my;
+ s->p_mv_table[xy][0] = mx;
+ s->p_mv_table[xy][1] = my;
/* has allready been set to the 4 MV if 4MV is done */
- if(!(s->flags&CODEC_FLAG_4MV)){
+ if(mv4){
int mot_xy= s->block_index[0];
s->motion_val[mot_xy ][0]= mx;
@@ -814,59 +968,158 @@ static inline void set_mv_tables(MpegEncContext * s, int mx, int my)
}
}
-#ifndef CONFIG_TEST_MV_ENCODE
-
-void estimate_motion(MpegEncContext * s,
- int mb_x, int mb_y)
+static inline void get_limits(MpegEncContext *s, int *range, int *xmin, int *ymin, int *xmax, int *ymax, int f_code)
{
- UINT8 *pix, *ppix;
- int sum, varc, vard, mx, my, range, dmin, xx, yy;
- int xmin, ymin, xmax, ymax;
- int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
- int pred_x=0, pred_y=0;
- int P[6][2];
- const int shift= 1+s->quarter_sample;
- int mb_type=0;
-
- range = 8 * (1 << (s->f_code - 1));
+ *range = 8 * (1 << (f_code - 1));
/* XXX: temporary kludge to avoid overflow for msmpeg4 */
if (s->out_format == FMT_H263 && !s->h263_msmpeg4)
- range = range * 2;
+ *range *= 2;
if (s->unrestricted_mv) {
- xmin = -16;
- ymin = -16;
+ *xmin = -16;
+ *ymin = -16;
if (s->h263_plus)
- range *= 2;
+ *range *= 2;
if(s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MPEG4){
- xmax = s->mb_width*16;
- ymax = s->mb_height*16;
+ *xmax = s->mb_width*16;
+ *ymax = s->mb_height*16;
}else {
/* XXX: dunno if this is correct but ffmpeg4 decoder wont like it otherwise
(cuz the drawn edge isnt large enough))*/
- xmax = s->width;
- ymax = s->height;
+ *xmax = s->width;
+ *ymax = s->height;
}
} else {
- xmin = 0;
- ymin = 0;
- xmax = s->mb_width*16 - 16;
- ymax = s->mb_height*16 - 16;
+ *xmin = 0;
+ *ymin = 0;
+ *xmax = s->mb_width*16 - 16;
+ *ymax = s->mb_height*16 - 16;
+ }
+}
+
+static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift)
+{
+ int block;
+ int P[10][2];
+ uint8_t *ref_picture= s->last_picture[0];
+ int dmin_sum=0;
+
+ for(block=0; block<4; block++){
+ int mx4, my4;
+ int pred_x4, pred_y4;
+ int dmin4;
+ static const int off[4]= {2, 1, 1, -1};
+ const int mot_stride = s->block_wrap[0];
+ const int mot_xy = s->block_index[block];
+// const int block_x= (block&1);
+// const int block_y= (block>>1);
+#if 1 // this saves us a bit of cliping work and shouldnt affect compression in a negative way
+ const int rel_xmin4= xmin;
+ const int rel_xmax4= xmax;
+ const int rel_ymin4= ymin;
+ const int rel_ymax4= ymax;
+#else
+ const int rel_xmin4= xmin - block_x*8;
+ const int rel_xmax4= xmax - block_x*8 + 8;
+ const int rel_ymin4= ymin - block_y*8;
+ const int rel_ymax4= ymax - block_y*8 + 8;
+#endif
+ P_LAST[0] = s->motion_val[mot_xy ][0];
+ P_LAST[1] = s->motion_val[mot_xy ][1];
+ P_LEFT[0] = s->motion_val[mot_xy - 1][0];
+ P_LEFT[1] = s->motion_val[mot_xy - 1][1];
+ P_LAST_RIGHT[0] = s->motion_val[mot_xy + 1][0];
+ P_LAST_RIGHT[1] = s->motion_val[mot_xy + 1][1];
+ P_LAST_BOTTOM[0]= s->motion_val[mot_xy + 1*mot_stride][0];
+ P_LAST_BOTTOM[1]= s->motion_val[mot_xy + 1*mot_stride][1];
+
+ if(P_LEFT[0] > (rel_xmax4<<shift)) P_LEFT[0] = (rel_xmax4<<shift);
+ if(P_LAST_RIGHT[0] < (rel_xmin4<<shift)) P_LAST_RIGHT[0] = (rel_xmin4<<shift);
+ if(P_LAST_BOTTOM[1]< (rel_ymin4<<shift)) P_LAST_BOTTOM[1]= (rel_ymin4<<shift);
+
+ /* special case for first line */
+ if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
+ pred_x4= P_LEFT[0];
+ pred_y4= P_LEFT[1];
+ } else {
+ P_TOP[0] = s->motion_val[mot_xy - mot_stride ][0];
+ P_TOP[1] = s->motion_val[mot_xy - mot_stride ][1];
+ P_TOPRIGHT[0] = s->motion_val[mot_xy - mot_stride + off[block]][0];
+ P_TOPRIGHT[1] = s->motion_val[mot_xy - mot_stride + off[block]][1];
+ if(P_TOP[1] > (rel_ymax4<<shift)) P_TOP[1] = (rel_ymax4<<shift);
+ if(P_TOPRIGHT[0] < (rel_xmin4<<shift)) P_TOPRIGHT[0]= (rel_xmin4<<shift);
+ if(P_TOPRIGHT[0] > (rel_xmax4<<shift)) P_TOPRIGHT[0]= (rel_xmax4<<shift);
+ if(P_TOPRIGHT[1] > (rel_ymax4<<shift)) P_TOPRIGHT[1]= (rel_ymax4<<shift);
+
+ P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
+ P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
+
+ if(s->out_format == FMT_H263){
+ pred_x4 = P_MEDIAN[0];
+ pred_y4 = P_MEDIAN[1];
+ }else { /* mpeg1 at least */
+ pred_x4= P_LEFT[0];
+ pred_y4= P_LEFT[1];
+ }
+ }
+ P_MV1[0]= mx;
+ P_MV1[1]= my;
+
+ dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture);
+
+ dmin4= fast_halfpel_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4,
+ pred_x4, pred_y4, ref_picture, pix_abs8x8_x2,
+ pix_abs8x8_y2, pix_abs8x8_xy2, block);
+
+ s->motion_val[ s->block_index[block] ][0]= mx4;
+ s->motion_val[ s->block_index[block] ][1]= my4;
+ dmin_sum+= dmin4;
}
- switch(s->full_search) {
+ return dmin_sum;
+}
+
+void ff_estimate_p_frame_motion(MpegEncContext * s,
+ int mb_x, int mb_y)
+{
+ UINT8 *pix, *ppix;
+ int sum, varc, vard, mx, my, range, dmin, xx, yy;
+ int xmin, ymin, xmax, ymax;
+ int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
+ int pred_x=0, pred_y=0;
+ int P[10][2];
+ const int shift= 1+s->quarter_sample;
+ int mb_type=0;
+ uint8_t *ref_picture= s->last_picture[0];
+
+ get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, s->f_code);
+ rel_xmin= xmin - mb_x*16;
+ rel_xmax= xmax - mb_x*16;
+ rel_ymin= ymin - mb_y*16;
+ rel_ymax= ymax - mb_y*16;
+ s->skip_me=0;
+
+ switch(s->me_method) {
case ME_ZERO:
default:
no_motion_search(s, &mx, &my);
+ mx-= mb_x*16;
+ my-= mb_y*16;
dmin = 0;
break;
case ME_FULL:
- dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax);
+ dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
+ mx-= mb_x*16;
+ my-= mb_y*16;
break;
case ME_LOG:
- dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax);
+ dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
+ mx-= mb_x*16;
+ my-= mb_y*16;
break;
case ME_PHODS:
- dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax);
+ dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
+ mx-= mb_x*16;
+ my-= mb_y*16;
break;
case ME_X1:
case ME_EPZS:
@@ -874,133 +1127,69 @@ void estimate_motion(MpegEncContext * s,
const int mot_stride = s->block_wrap[0];
const int mot_xy = s->block_index[0];
- rel_xmin= xmin - mb_x*16;
- rel_xmax= xmax - mb_x*16;
- rel_ymin= ymin - mb_y*16;
- rel_ymax= ymax - mb_y*16;
+ P_LAST[0] = s->motion_val[mot_xy ][0];
+ P_LAST[1] = s->motion_val[mot_xy ][1];
+ P_LEFT[0] = s->motion_val[mot_xy - 1][0];
+ P_LEFT[1] = s->motion_val[mot_xy - 1][1];
+ P_LAST_RIGHT[0] = s->motion_val[mot_xy + 2][0];
+ P_LAST_RIGHT[1] = s->motion_val[mot_xy + 2][1];
+ P_LAST_BOTTOM[0]= s->motion_val[mot_xy + 2*mot_stride][0];
+ P_LAST_BOTTOM[1]= s->motion_val[mot_xy + 2*mot_stride][1];
- P[0][0] = s->motion_val[mot_xy ][0];
- P[0][1] = s->motion_val[mot_xy ][1];
- P[1][0] = s->motion_val[mot_xy - 1][0];
- P[1][1] = s->motion_val[mot_xy - 1][1];
- if(P[1][0] > (rel_xmax<<shift)) P[1][0]= (rel_xmax<<shift);
+ if(P_LEFT[0] > (rel_xmax<<shift)) P_LEFT[0] = (rel_xmax<<shift);
+ if(P_LAST_RIGHT[0] < (rel_xmin<<shift)) P_LAST_RIGHT[0] = (rel_xmin<<shift);
+ if(P_LAST_BOTTOM[1]< (rel_ymin<<shift)) P_LAST_BOTTOM[1]= (rel_ymin<<shift);
/* special case for first line */
- if ((mb_y == 0 || s->first_slice_line || s->first_gob_line)) {
- P[4][0] = P[1][0];
- P[4][1] = P[1][1];
+ if ((mb_y == 0 || s->first_slice_line)) {
+ pred_x= P_LEFT[0];
+ pred_y= P_LEFT[1];
} else {
- P[2][0] = s->motion_val[mot_xy - mot_stride ][0];
- P[2][1] = s->motion_val[mot_xy - mot_stride ][1];
- P[3][0] = s->motion_val[mot_xy - mot_stride + 2 ][0];
- P[3][1] = s->motion_val[mot_xy - mot_stride + 2 ][1];
- if(P[2][1] > (rel_ymax<<shift)) P[2][1]= (rel_ymax<<shift);
- if(P[3][0] < (rel_xmin<<shift)) P[3][0]= (rel_xmin<<shift);
- if(P[3][1] > (rel_ymax<<shift)) P[3][1]= (rel_ymax<<shift);
+ P_TOP[0] = s->motion_val[mot_xy - mot_stride ][0];
+ P_TOP[1] = s->motion_val[mot_xy - mot_stride ][1];
+ P_TOPRIGHT[0] = s->motion_val[mot_xy - mot_stride + 2][0];
+ P_TOPRIGHT[1] = s->motion_val[mot_xy - mot_stride + 2][1];
+ if(P_TOP[1] > (rel_ymax<<shift)) P_TOP[1] = (rel_ymax<<shift);
+ if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift);
+ if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift);
- P[4][0]= mid_pred(P[1][0], P[2][0], P[3][0]);
- P[4][1]= mid_pred(P[1][1], P[2][1], P[3][1]);
- }
- if(s->out_format == FMT_H263){
- pred_x = P[4][0];
- pred_y = P[4][1];
- }else { /* mpeg1 at least */
- pred_x= P[1][0];
- pred_y= P[1][1];
+ P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
+ P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
+
+ if(s->out_format == FMT_H263){
+ pred_x = P_MEDIAN[0];
+ pred_y = P_MEDIAN[1];
+ }else { /* mpeg1 at least */
+ pred_x= P_LEFT[0];
+ pred_y= P_LEFT[1];
+ }
}
}
- dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax);
+ dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture);
- mx+= mb_x*16;
- my+= mb_y*16;
break;
}
-
- if(s->flags&CODEC_FLAG_4MV){
- int block;
-
- mb_type|= MB_TYPE_INTER4V;
-
- for(block=0; block<4; block++){
- int mx4, my4;
- int pred_x4, pred_y4;
- int dmin4;
- static const int off[4]= {2, 1, 1, -1};
- const int mot_stride = s->block_wrap[0];
- const int mot_xy = s->block_index[block];
- const int block_x= mb_x*2 + (block&1);
- const int block_y= mb_y*2 + (block>>1);
-
- const int rel_xmin4= xmin - block_x*8;
- const int rel_xmax4= xmax - block_x*8 + 8;
- const int rel_ymin4= ymin - block_y*8;
- const int rel_ymax4= ymax - block_y*8 + 8;
-
- P[0][0] = s->motion_val[mot_xy ][0];
- P[0][1] = s->motion_val[mot_xy ][1];
- P[1][0] = s->motion_val[mot_xy - 1][0];
- P[1][1] = s->motion_val[mot_xy - 1][1];
- if(P[1][0] > (rel_xmax4<<shift)) P[1][0]= (rel_xmax4<<shift);
-
- /* special case for first line */
- if ((mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) {
- P[4][0] = P[1][0];
- P[4][1] = P[1][1];
- } else {
- P[2][0] = s->motion_val[mot_xy - mot_stride ][0];
- P[2][1] = s->motion_val[mot_xy - mot_stride ][1];
- P[3][0] = s->motion_val[mot_xy - mot_stride + off[block]][0];
- P[3][1] = s->motion_val[mot_xy - mot_stride + off[block]][1];
- if(P[2][1] > (rel_ymax4<<shift)) P[2][1]= (rel_ymax4<<shift);
- if(P[3][0] < (rel_xmin4<<shift)) P[3][0]= (rel_xmin4<<shift);
- if(P[3][0] > (rel_xmax4<<shift)) P[3][0]= (rel_xmax4<<shift);
- if(P[3][1] > (rel_ymax4<<shift)) P[3][1]= (rel_ymax4<<shift);
-
- P[4][0]= mid_pred(P[1][0], P[2][0], P[3][0]);
- P[4][1]= mid_pred(P[1][1], P[2][1], P[3][1]);
- }
- if(s->out_format == FMT_H263){
- pred_x4 = P[4][0];
- pred_y4 = P[4][1];
- }else { /* mpeg1 at least */
- pred_x4= P[1][0];
- pred_y4= P[1][1];
- }
- P[5][0]= mx - mb_x*16;
- P[5][1]= my - mb_y*16;
-
- dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4);
-
- halfpel_motion_search4(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4,
- pred_x4, pred_y4, block_x, block_y);
-
- s->motion_val[ s->block_index[block] ][0]= mx4;
- s->motion_val[ s->block_index[block] ][1]= my4;
- }
- }
/* intra / predictive decision */
xx = mb_x * 16;
yy = mb_y * 16;
pix = s->new_picture[0] + (yy * s->linesize) + xx;
- /* At this point (mx,my) are full-pell and the absolute displacement */
- ppix = s->last_picture[0] + (my * s->linesize) + mx;
+ /* At this point (mx,my) are full-pell and the relative displacement */
+ ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx);
sum = pix_sum(pix, s->linesize);
-#if 0
- varc = pix_dev(pix, s->linesize, (sum+128)>>8) + INTER_BIAS;
- vard = pix_abs16x16(pix, ppix, s->linesize);
-#else
+
sum= (sum+8)>>4;
- varc = ((pix_norm1(pix, s->linesize) - sum*sum + 128 + 500)>>8);
+ varc = (pix_norm1(pix, s->linesize) - sum*sum + 500 + 128)>>8;
vard = (pix_norm(pix, ppix, s->linesize)+128)>>8;
-#endif
-
- s->mb_var[s->mb_width * mb_y + mb_x] = varc;
- s->avg_mb_var+= varc;
- s->mc_mb_var += vard;
-
+//printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
+ s->mb_var [s->mb_width * mb_y + mb_x] = varc;
+ s->mc_mb_var[s->mb_width * mb_y + mb_x] = vard;
+ s->mb_var_sum += varc;
+ s->mc_mb_var_sum += vard;
+//printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
+
#if 0
printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
@@ -1010,68 +1199,556 @@ void estimate_motion(MpegEncContext * s,
mb_type|= MB_TYPE_INTRA;
if (varc*2 + 200 > vard){
mb_type|= MB_TYPE_INTER;
- halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y);
+ if(s->me_method >= ME_EPZS)
+ fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+ pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2,
+ pix_abs16x16_xy2, 0);
+ else
+ halfpel_motion_search( s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+ pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2,
+ pix_abs16x16_xy2, 0);
}else{
- mx = mx*2 - mb_x*32;
- my = my*2 - mb_y*32;
+ mx <<=1;
+ my <<=1;
}
+ if((s->flags&CODEC_FLAG_4MV)
+ && !s->skip_me && varc>50 && vard>10){
+ mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
+ mb_type|=MB_TYPE_INTER4V;
+
+ set_p_mv_tables(s, mx, my, 0);
+ }else
+ set_p_mv_tables(s, mx, my, 1);
}else{
if (vard <= 64 || vard < varc) {
mb_type|= MB_TYPE_INTER;
- if (s->full_search != ME_ZERO) {
- halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y);
+ if (s->me_method != ME_ZERO) {
+ if(s->me_method >= ME_EPZS)
+ dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+ pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2,
+ pix_abs16x16_xy2, 0);
+ else
+ dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+ pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2,
+ pix_abs16x16_xy2, 0);
+ if((s->flags&CODEC_FLAG_4MV)
+ && !s->skip_me && varc>50 && vard>10){
+ int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
+ if(dmin4 + 128 <dmin)
+ mb_type= MB_TYPE_INTER4V;
+ }
+ set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V);
+
} else {
- mx -= 16 * mb_x;
- my -= 16 * mb_y;
+ mx <<=1;
+ my <<=1;
+ }
+#if 0
+ if (vard < 10) {
+ skip++;
+ fprintf(stderr,"\nEarly skip: %d vard: %2d varc: %5d dmin: %d",
+ skip, vard, varc, dmin);
}
+#endif
}else{
mb_type|= MB_TYPE_INTRA;
- mx = 0;//mx*2 - 32 * mb_x;
- my = 0;//my*2 - 32 * mb_y;
+ mx = 0;
+ my = 0;
}
}
s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
- set_mv_tables(s, mx, my);
}
-#else
+int ff_estimate_motion_b(MpegEncContext * s,
+ int mb_x, int mb_y, int16_t (*mv_table)[2], uint8_t *ref_picture, int f_code)
+{
+ int mx, my, range, dmin;
+ int xmin, ymin, xmax, ymax;
+ int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
+ int pred_x=0, pred_y=0;
+ int P[10][2];
+ const int shift= 1+s->quarter_sample;
+ const int mot_stride = s->mb_width + 2;
+ const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
+
+ get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, f_code);
+ rel_xmin= xmin - mb_x*16;
+ rel_xmax= xmax - mb_x*16;
+ rel_ymin= ymin - mb_y*16;
+ rel_ymax= ymax - mb_y*16;
+
+ switch(s->me_method) {
+ case ME_ZERO:
+ default:
+ no_motion_search(s, &mx, &my);
+ dmin = 0;
+ mx-= mb_x*16;
+ my-= mb_y*16;
+ break;
+ case ME_FULL:
+ dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
+ mx-= mb_x*16;
+ my-= mb_y*16;
+ break;
+ case ME_LOG:
+ dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
+ mx-= mb_x*16;
+ my-= mb_y*16;
+ break;
+ case ME_PHODS:
+ dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
+ mx-= mb_x*16;
+ my-= mb_y*16;
+ break;
+ case ME_X1:
+ case ME_EPZS:
+ {
+
+ P_LAST[0] = mv_table[mot_xy ][0];
+ P_LAST[1] = mv_table[mot_xy ][1];
+ P_LEFT[0] = mv_table[mot_xy - 1][0];
+ P_LEFT[1] = mv_table[mot_xy - 1][1];
+ P_LAST_RIGHT[0] = mv_table[mot_xy + 1][0];
+ P_LAST_RIGHT[1] = mv_table[mot_xy + 1][1];
+ P_LAST_BOTTOM[0] = mv_table[mot_xy + mot_stride][0];
+ P_LAST_BOTTOM[1] = mv_table[mot_xy + mot_stride][1];
+
+ if(P_LEFT[0] > (rel_xmax<<shift)) P_LEFT[0] = (rel_xmax<<shift);
+ if(P_LAST_RIGHT[0] < (rel_xmin<<shift)) P_LAST_RIGHT[0] = (rel_xmin<<shift);
+ if(P_LAST_BOTTOM[1]< (rel_ymin<<shift)) P_LAST_BOTTOM[1]= (rel_ymin<<shift);
+
+ /* special case for first line */
+ if ((mb_y == 0 || s->first_slice_line)) {
+ } else {
+ P_TOP[0] = mv_table[mot_xy - mot_stride ][0];
+ P_TOP[1] = mv_table[mot_xy - mot_stride ][1];
+ P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1 ][0];
+ P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1 ][1];
+ if(P_TOP[1] > (rel_ymax<<shift)) P_TOP[1]= (rel_ymax<<shift);
+ if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift);
+ if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift);
+
+ P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
+ P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
+ }
+ pred_x= P_LEFT[0];
+ pred_y= P_LEFT[1];
+ }
+ dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture);
+
+ break;
+ }
+
+ dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+ pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2,
+ pix_abs16x16_xy2, 0);
+//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
+// s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
+ mv_table[mot_xy][0]= mx;
+ mv_table[mot_xy][1]= my;
+ return dmin;
+}
+
+
+static inline int check_bidir_mv(MpegEncContext * s,
+ int mb_x, int mb_y,
+ int motion_fx, int motion_fy,
+ int motion_bx, int motion_by,
+ int pred_fx, int pred_fy,
+ int pred_bx, int pred_by)
+{
+ //FIXME optimize?
+ //FIXME direct mode penalty
+ UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
+ uint8_t *dest_y = s->me_scratchpad;
+ uint8_t *ptr;
+ int dxy;
+ int src_x, src_y;
+ int fbmin;
+
+ fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->qscale;
+
+ dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
+ src_x = mb_x * 16 + (motion_fx >> 1);
+ src_y = mb_y * 16 + (motion_fy >> 1);
+
+ ptr = s->last_picture[0] + (src_y * s->linesize) + src_x;
+ put_pixels_tab[dxy](dest_y , ptr , s->linesize, 16);
+ put_pixels_tab[dxy](dest_y + 8, ptr + 8, s->linesize, 16);
+
+ fbmin += (mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->qscale;
+
+ dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
+ src_x = mb_x * 16 + (motion_bx >> 1);
+ src_y = mb_y * 16 + (motion_by >> 1);
+
+ ptr = s->next_picture[0] + (src_y * s->linesize) + src_x;
+ avg_pixels_tab[dxy](dest_y , ptr , s->linesize, 16);
+ avg_pixels_tab[dxy](dest_y + 8, ptr + 8, s->linesize, 16);
+
+ fbmin += pix_abs16x16(s->new_picture[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
+ return fbmin;
+}
-/* test version which generates valid random vectors */
-int estimate_motion(MpegEncContext * s,
- int mb_x, int mb_y,
- int *mx_ptr, int *my_ptr)
+/* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
+static inline int bidir_refine(MpegEncContext * s,
+ int mb_x, int mb_y)
{
- int xx, yy, x1, y1, x2, y2, range;
-
- if ((random() % 10) >= 5) {
- range = 8 * (1 << (s->f_code - 1));
- if (s->out_format == FMT_H263 && !s->h263_msmpeg4)
- range = range * 2;
-
- xx = 16 * s->mb_x;
- yy = 16 * s->mb_y;
- x1 = xx - range;
- if (x1 < 0)
- x1 = 0;
- x2 = xx + range - 1;
- if (x2 > (s->width - 16))
- x2 = s->width - 16;
- y1 = yy - range;
- if (y1 < 0)
- y1 = 0;
- y2 = yy + range - 1;
- if (y2 > (s->height - 16))
- y2 = s->height - 16;
-
- *mx_ptr = (random() % (2 * (x2 - x1 + 1))) + 2 * (x1 - xx);
- *my_ptr = (random() % (2 * (y2 - y1 + 1))) + 2 * (y1 - yy);
- return 0;
+ const int mot_stride = s->mb_width + 2;
+ const int xy = (mb_y + 1)*mot_stride + mb_x + 1;
+ int fbmin;
+ int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
+ int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
+ int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
+ int pred_by= s->b_bidir_back_mv_table[xy-1][1];
+ int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
+ int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
+ int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
+ int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
+
+ //FIXME do refinement and add flag
+
+ fbmin= check_bidir_mv(s, mb_x, mb_y,
+ motion_fx, motion_fy,
+ motion_bx, motion_by,
+ pred_fx, pred_fy,
+ pred_bx, pred_by);
+
+ return fbmin;
+}
+
+static inline int direct_search(MpegEncContext * s,
+ int mb_x, int mb_y)
+{
+ int P[10][2];
+ const int mot_stride = s->mb_width + 2;
+ const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
+ int dmin, dmin2;
+ int motion_fx, motion_fy, motion_bx, motion_by, motion_bx0, motion_by0;
+ int motion_dx, motion_dy;
+ const int motion_px= s->p_mv_table[mot_xy][0];
+ const int motion_py= s->p_mv_table[mot_xy][1];
+ const int time_pp= s->pp_time;
+ const int time_bp= s->bp_time;
+ const int time_pb= time_pp - time_bp;
+ int bx, by;
+ int mx, my, mx2, my2;
+ uint8_t *ref_picture= s->me_scratchpad - (mb_x + 1 + (mb_y + 1)*s->linesize)*16;
+ int16_t (*mv_table)[2]= s->b_direct_mv_table;
+ uint16_t *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
+
+ /* thanks to iso-mpeg the rounding is different for the zero vector, so we need to handle that ... */
+ motion_fx= (motion_px*time_pb)/time_pp;
+ motion_fy= (motion_py*time_pb)/time_pp;
+ motion_bx0= (-motion_px*time_bp)/time_pp;
+ motion_by0= (-motion_py*time_bp)/time_pp;
+ motion_dx= motion_dy=0;
+ dmin2= check_bidir_mv(s, mb_x, mb_y,
+ motion_fx, motion_fy,
+ motion_bx0, motion_by0,
+ motion_fx, motion_fy,
+ motion_bx0, motion_by0) - s->qscale;
+
+ motion_bx= motion_fx - motion_px;
+ motion_by= motion_fy - motion_py;
+ for(by=-1; by<2; by++){
+ for(bx=-1; bx<2; bx++){
+ uint8_t *dest_y = s->me_scratchpad + (by+1)*s->linesize*16 + (bx+1)*16;
+ uint8_t *ptr;
+ int dxy;
+ int src_x, src_y;
+ const int width= s->width;
+ const int height= s->height;
+
+ dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
+ src_x = (mb_x + bx) * 16 + (motion_fx >> 1);
+ src_y = (mb_y + by) * 16 + (motion_fy >> 1);
+ src_x = clip(src_x, -16, width);
+ if (src_x == width) dxy &= ~1;
+ src_y = clip(src_y, -16, height);
+ if (src_y == height) dxy &= ~2;
+
+ ptr = s->last_picture[0] + (src_y * s->linesize) + src_x;
+ put_pixels_tab[dxy](dest_y , ptr , s->linesize, 16);
+ put_pixels_tab[dxy](dest_y + 8, ptr + 8, s->linesize, 16);
+
+ dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
+ src_x = (mb_x + bx) * 16 + (motion_bx >> 1);
+ src_y = (mb_y + by) * 16 + (motion_by >> 1);
+ src_x = clip(src_x, -16, width);
+ if (src_x == width) dxy &= ~1;
+ src_y = clip(src_y, -16, height);
+ if (src_y == height) dxy &= ~2;
+
+ avg_pixels_tab[dxy](dest_y , ptr , s->linesize, 16);
+ avg_pixels_tab[dxy](dest_y + 8, ptr + 8, s->linesize, 16);
+ }
+ }
+
+ P_LAST[0] = mv_table[mot_xy ][0];
+ P_LAST[1] = mv_table[mot_xy ][1];
+ P_LEFT[0] = mv_table[mot_xy - 1][0];
+ P_LEFT[1] = mv_table[mot_xy - 1][1];
+ P_LAST_RIGHT[0] = mv_table[mot_xy + 1][0];
+ P_LAST_RIGHT[1] = mv_table[mot_xy + 1][1];
+ P_LAST_BOTTOM[0] = mv_table[mot_xy + mot_stride][0];
+ P_LAST_BOTTOM[1] = mv_table[mot_xy + mot_stride][1];
+/*
+ if(P_LEFT[0] > (rel_xmax<<shift)) P_LEFT[0] = (rel_xmax<<shift);
+ if(P_LAST_RIGHT[0] < (rel_xmin<<shift)) P_LAST_RIGHT[0] = (rel_xmin<<shift);
+ if(P_LAST_BOTTOM[1]< (rel_ymin<<shift)) P_LAST_BOTTOM[1]= (rel_ymin<<shift);
+*/
+ /* special case for first line */
+ if ((mb_y == 0 || s->first_slice_line)) {
} else {
- *mx_ptr = 0;
- *my_ptr = 0;
- return 1;
+ P_TOP[0] = mv_table[mot_xy - mot_stride ][0];
+ P_TOP[1] = mv_table[mot_xy - mot_stride ][1];
+ P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1 ][0];
+ P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1 ][1];
+
+ P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
+ P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
}
+ dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, -16, -16, 15, 15, ref_picture);
+ if(mx==0 && my==0) dmin=99999999; // not representable, due to rounding stuff
+ if(dmin2<dmin){
+ dmin= dmin2;
+ mx=0;
+ my=0;
+ }
+#if 1
+ mx2= mx= mx*2;
+ my2= my= my*2;
+ for(by=-1; by<2; by++){
+ if(my2+by < -32) continue;
+ for(bx=-1; bx<2; bx++){
+ if(bx==0 && by==0) continue;
+ if(mx2+bx < -32) continue;
+ dmin2= check_bidir_mv(s, mb_x, mb_y,
+ mx2+bx+motion_fx, my2+by+motion_fy,
+ mx2+bx+motion_bx, my2+by+motion_by,
+ mx2+bx+motion_fx, my2+by+motion_fy,
+ motion_bx, motion_by) - s->qscale;
+
+ if(dmin2<dmin){
+ dmin=dmin2;
+ mx= mx2 + bx;
+ my= my2 + by;
+ }
+ }
+ }
+#else
+ mx*=2; my*=2;
+#endif
+ if(mx==0 && my==0){
+ motion_bx= motion_bx0;
+ motion_by= motion_by0;
+ }
+
+ s->b_direct_mv_table[mot_xy][0]= mx;
+ s->b_direct_mv_table[mot_xy][1]= my;
+ s->b_direct_forw_mv_table[mot_xy][0]= motion_fx + mx;
+ s->b_direct_forw_mv_table[mot_xy][1]= motion_fy + my;
+ s->b_direct_back_mv_table[mot_xy][0]= motion_bx + mx;
+ s->b_direct_back_mv_table[mot_xy][1]= motion_by + my;
+ return dmin;
}
-#endif
+void ff_estimate_b_frame_motion(MpegEncContext * s,
+ int mb_x, int mb_y)
+{
+ const int quant= s->qscale;
+ int fmin, bmin, dmin, fbmin;
+ int type=0;
+
+ dmin= direct_search(s, mb_x, mb_y);
+
+ fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, s->last_picture[0], s->f_code);
+ bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, s->next_picture[0], s->b_code) - quant;
+//printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
+
+ fbmin= bidir_refine(s, mb_x, mb_y);
+
+ if(s->flags&CODEC_FLAG_HQ){
+ type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT;
+ }else{
+ int score= dmin;
+ type=MB_TYPE_DIRECT;
+
+ if(fmin<score){
+ score=fmin;
+ type= MB_TYPE_FORWARD;
+ }
+ if(bmin<score){
+ score=bmin;
+ type= MB_TYPE_BACKWARD;
+ }
+ if(fbmin<score){
+ score=fbmin;
+ type= MB_TYPE_BIDIR;
+ }
+ s->mc_mb_var_sum += score;
+ s->mc_mb_var[mb_y*s->mb_width + mb_x] = score;
+ }
+/*
+{
+static int count=0;
+static int sum=0;
+if(type==MB_TYPE_DIRECT){
+ int diff= ABS(s->b_forw_mv_table)
+}
+}*/
+
+ s->mb_type[mb_y*s->mb_width + mb_x]= type;
+/* if(mb_y==0 && mb_x==0) printf("\n");
+ if(mb_x==0) printf("\n");
+ printf("%d", av_log2(type));
+*/
+}
+
+/* find best f_code for ME which do unlimited searches */
+int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
+{
+ if(s->me_method>=ME_EPZS){
+ int score[8];
+ int i, y;
+ UINT8 * fcode_tab= s->fcode_tab;
+ int best_fcode=-1;
+ int best_score=-10000000;
+
+ for(i=0; i<8; i++) score[i]= s->mb_num*(8-i); //FIXME *2 and all other too so its the same but nicer
+
+ for(y=0; y<s->mb_height; y++){
+ int x;
+ int xy= (y+1)* (s->mb_width+2) + 1;
+ i= y*s->mb_width;
+ for(x=0; x<s->mb_width; x++){
+ if(s->mb_type[i] & type){
+ int fcode= MAX(fcode_tab[mv_table[xy][0] + MAX_MV],
+ fcode_tab[mv_table[xy][1] + MAX_MV]);
+ int j;
+
+ for(j=0; j<fcode && j<8; j++){
+ if(s->pict_type==B_TYPE || s->mc_mb_var[i] < s->mb_var[i])
+ score[j]-= 170;
+ }
+ }
+ i++;
+ xy++;
+ }
+ }
+
+ for(i=1; i<8; i++){
+ if(score[i] > best_score){
+ best_score= score[i];
+ best_fcode= i;
+ }
+// printf("%d %d\n", i, score[i]);
+ }
+
+// printf("fcode: %d type: %d\n", i, s->pict_type);
+ return best_fcode;
+/* for(i=0; i<=MAX_FCODE; i++){
+ printf("%d ", mv_num[i]);
+ }
+ printf("\n");*/
+ }else{
+ return 1;
+ }
+}
+
+void ff_fix_long_p_mvs(MpegEncContext * s)
+{
+ const int f_code= s->f_code;
+ int y;
+ UINT8 * fcode_tab= s->fcode_tab;
+//int clip=0;
+//int noclip=0;
+ /* clip / convert to intra 16x16 type MVs */
+ for(y=0; y<s->mb_height; y++){
+ int x;
+ int xy= (y+1)* (s->mb_width+2)+1;
+ int i= y*s->mb_width;
+ for(x=0; x<s->mb_width; x++){
+ if(s->mb_type[i]&MB_TYPE_INTER){
+ if( fcode_tab[s->p_mv_table[xy][0] + MAX_MV] > f_code
+ || fcode_tab[s->p_mv_table[xy][0] + MAX_MV] == 0
+ || fcode_tab[s->p_mv_table[xy][1] + MAX_MV] > f_code
+ || fcode_tab[s->p_mv_table[xy][1] + MAX_MV] == 0 ){
+ s->mb_type[i] &= ~MB_TYPE_INTER;
+ s->mb_type[i] |= MB_TYPE_INTRA;
+ s->p_mv_table[xy][0] = 0;
+ s->p_mv_table[xy][1] = 0;
+//clip++;
+ }
+//else
+// noclip++;
+ }
+ xy++;
+ i++;
+ }
+ }
+//printf("%d no:%d %d//\n", clip, noclip, f_code);
+ if(s->flags&CODEC_FLAG_4MV){
+ const int wrap= 2+ s->mb_width*2;
+
+ /* clip / convert to intra 8x8 type MVs */
+ for(y=0; y<s->mb_height; y++){
+ int xy= (y*2 + 1)*wrap + 1;
+ int i= y*s->mb_width;
+ int x;
+
+ for(x=0; x<s->mb_width; x++){
+ if(s->mb_type[i]&MB_TYPE_INTER4V){
+ int block;
+ for(block=0; block<4; block++){
+ int off= (block& 1) + (block>>1)*wrap;
+ int mx= s->motion_val[ xy + off ][0];
+ int my= s->motion_val[ xy + off ][1];
+
+ if( fcode_tab[mx + MAX_MV] > f_code
+ || fcode_tab[mx + MAX_MV] == 0
+ || fcode_tab[my + MAX_MV] > f_code
+ || fcode_tab[my + MAX_MV] == 0 ){
+ s->mb_type[i] &= ~MB_TYPE_INTER4V;
+ s->mb_type[i] |= MB_TYPE_INTRA;
+ }
+ }
+ xy+=2;
+ i++;
+ }
+ }
+ }
+ }
+}
+
+void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type)
+{
+ int y;
+ UINT8 * fcode_tab= s->fcode_tab;
+
+ /* clip / convert to intra 16x16 type MVs */
+ for(y=0; y<s->mb_height; y++){
+ int x;
+ int xy= (y+1)* (s->mb_width+2)+1;
+ int i= y*s->mb_width;
+ for(x=0; x<s->mb_width; x++){
+ if(s->mb_type[i]&type){
+ if( fcode_tab[mv_table[xy][0] + MAX_MV] > f_code
+ || fcode_tab[mv_table[xy][0] + MAX_MV] == 0
+ || fcode_tab[mv_table[xy][1] + MAX_MV] > f_code
+ || fcode_tab[mv_table[xy][1] + MAX_MV] == 0 ){
+ if(s->mb_type[i]&(~type)) s->mb_type[i] &= ~type;
+ else{
+ mv_table[xy][0] = 0;
+ mv_table[xy][1] = 0;
+ //this is certainly bad FIXME
+ }
+ }
+ }
+ xy++;
+ i++;
+ }
+ }
+}
diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c
index ac614d5ce..37e9b70ac 100644
--- a/src/libffmpeg/libavcodec/mpeg12.c
+++ b/src/libffmpeg/libavcodec/mpeg12.c
@@ -1,26 +1,25 @@
/*
* MPEG1 encoder / MPEG2 decoder
- * Copyright (c) 2000,2001 Gerard Lantau.
+ * Copyright (c) 2000,2001 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
//#define DEBUG
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
-#include "xineutils.h"
#include "mpeg12data.h"
@@ -34,8 +33,6 @@
#define EXT_START_CODE 0x000001b5
#define USER_START_CODE 0x000001b2
-#define ABS(a) ((a)<0 ? -(a) : (a))
-
static void mpeg1_encode_block(MpegEncContext *s,
DCTELEM *block,
int component);
@@ -400,8 +397,11 @@ void mpeg1_encode_init(MpegEncContext *s)
}
}
s->mv_penalty= mv_penalty;
-
s->fcode_tab= fcode_tab;
+ s->min_qcoeff=-255;
+ s->max_qcoeff= 255;
+ s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
+ s->inter_quant_bias= 0;
}
static inline void encode_dc(MpegEncContext *s, int diff, int component)
@@ -853,6 +853,8 @@ static int mpeg_decode_mb(MpegEncContext *s,
if (cbp & (1 << (5 - i))) {
if (mpeg2_decode_block_intra(s, block[i], i) < 0)
return -1;
+ } else {
+ s->block_last_index[i] = -1;
}
}
} else {
@@ -860,6 +862,8 @@ static int mpeg_decode_mb(MpegEncContext *s,
if (cbp & (1 << (5 - i))) {
if (mpeg2_decode_block_non_intra(s, block[i], i) < 0)
return -1;
+ } else {
+ s->block_last_index[i] = -1;
}
}
}
@@ -868,6 +872,8 @@ static int mpeg_decode_mb(MpegEncContext *s,
if (cbp & (1 << (5 - i))) {
if (mpeg1_decode_block(s, block[i], i) < 0)
return -1;
+ } else {
+ s->block_last_index[i] = -1;
}
}
}
@@ -1028,9 +1034,9 @@ static int mpeg2_decode_block_non_intra(MpegEncContext *s,
UINT8 *buf_ptr;
i = 0;
if (n < 4)
- matrix = s->non_intra_matrix;
+ matrix = s->inter_matrix;
else
- matrix = s->chroma_non_intra_matrix;
+ matrix = s->chroma_inter_matrix;
/* special case for the first coef. no need to add a second vlc table */
SAVE_BITS(&s->gb);
@@ -1184,6 +1190,9 @@ static int mpeg_decode_init(AVCodecContext *avctx)
s->buf_ptr = s->buffer;
s->mpeg_enc_ctx.picture_number = 0;
s->repeat_field = 0;
+ s->mpeg_enc_ctx.codec_id= avctx->codec->id;
+ avctx->mbskip_table= s->mpeg_enc_ctx.mbskip_table;
+ s->mpeg_enc_ctx.flags= avctx->flags;
return 0;
}
@@ -1273,6 +1282,7 @@ static void mpeg_decode_sequence_extension(MpegEncContext *s)
s->frame_rate = (s->frame_rate * frame_rate_ext_n) / frame_rate_ext_d;
dprintf("sequence extension\n");
s->mpeg2 = 1;
+ s->avctx->sub_id = 2; /* indicates mpeg2 found */
}
static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
@@ -1293,8 +1303,8 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8);
j = zigzag_direct[i];
- s->non_intra_matrix[j] = v;
- s->chroma_non_intra_matrix[j] = v;
+ s->inter_matrix[j] = v;
+ s->chroma_inter_matrix[j] = v;
}
}
if (get_bits1(&s->gb)) {
@@ -1308,7 +1318,7 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8);
j = zigzag_direct[i];
- s->chroma_non_intra_matrix[j] = v;
+ s->chroma_inter_matrix[j] = v;
}
}
}
@@ -1334,6 +1344,8 @@ static void mpeg_decode_picture_coding_extension(MpegEncContext *s)
/* composite display not parsed */
dprintf("intra_dc_precision=%d\n", s->intra_dc_precision);
dprintf("picture_structure=%d\n", s->picture_structure);
+ dprintf("top field first=%d\n", s->top_field_first);
+ dprintf("repeat first field=%d\n", s->repeat_first_field);
dprintf("conceal=%d\n", s->concealment_motion_vectors);
dprintf("intra_vlc_format=%d\n", s->intra_vlc_format);
dprintf("alternate_scan=%d\n", s->alternate_scan);
@@ -1387,7 +1399,6 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
s->mb_x = -1;
s->mb_y = start_code;
s->mb_incr = 0;
-
/* start frame decoding */
if (s->first_slice) {
s->first_slice = 0;
@@ -1404,6 +1415,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
for(;;) {
clear_blocks(s->block[0]);
+ emms_c();
ret = mpeg_decode_mb(s, s->block);
dprintf("ret=%d\n", ret);
if (ret < 0)
@@ -1460,7 +1472,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
Mpeg1Context *s1 = avctx->priv_data;
MpegEncContext *s = &s1->mpeg_enc_ctx;
int width, height, i, v, j;
-
+
init_get_bits(&s->gb, buf, buf_size);
width = get_bits(&s->gb, 12);
@@ -1488,7 +1500,12 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
s->avctx = avctx;
avctx->width = width;
avctx->height = height;
- avctx->frame_rate = frame_rate_tab[s->frame_rate_index];
+ if (s->frame_rate_index >= 9) {
+ /* at least give a valid frame rate (some old mpeg1 have this) */
+ avctx->frame_rate = 25 * FRAME_RATE_BASE;
+ } else {
+ avctx->frame_rate = frame_rate_tab[s->frame_rate_index];
+ }
s->frame_rate = avctx->frame_rate;
avctx->bit_rate = s->bit_rate;
@@ -1526,20 +1543,20 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8);
j = zigzag_direct[i];
- s->non_intra_matrix[j] = v;
- s->chroma_non_intra_matrix[j] = v;
+ s->inter_matrix[j] = v;
+ s->chroma_inter_matrix[j] = v;
}
#ifdef DEBUG
dprintf("non intra matrix present\n");
for(i=0;i<64;i++)
- dprintf(" %d", s->non_intra_matrix[zigzag_direct[i]]);
+ dprintf(" %d", s->inter_matrix[zigzag_direct[i]]);
printf("\n");
#endif
} else {
for(i=0;i<64;i++) {
v = default_non_intra_matrix[i];
- s->non_intra_matrix[i] = v;
- s->chroma_non_intra_matrix[i] = v;
+ s->inter_matrix[i] = v;
+ s->chroma_inter_matrix[i] = v;
}
}
@@ -1549,6 +1566,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
s->picture_structure = PICT_FRAME;
s->frame_pred_frame_dct = 1;
s->mpeg2 = 0;
+ avctx->sub_id = 1; /* indicates mpeg1 */
return 0;
}
@@ -1566,7 +1584,7 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
dprintf("fill_buffer\n");
*data_size = 0;
-
+
/* special case for last picture */
if (buf_size == 0) {
if (s2->picture_number > 0) {
@@ -1583,15 +1601,18 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
buf_ptr = buf;
buf_end = buf + buf_size;
-
- if (s->repeat_field % 2 == 1) {
+
+#if 0
+ if (s->repeat_field % 2 == 1) {
s->repeat_field++;
//fprintf(stderr,"\nRepeating last frame: %d -> %d! pict: %d %d", avctx->frame_number-1, avctx->frame_number,
- // s2->picture_number, s->repeat_field);
- *data_size = sizeof(AVPicture);
- goto the_end;
+ // s2->picture_number, s->repeat_field);
+ if (avctx->flags & CODEC_FLAG_REPEAT_FIELD) {
+ *data_size = sizeof(AVPicture);
+ goto the_end;
+ }
}
-
+#endif
while (buf_ptr < buf_end) {
buf_start = buf_ptr;
/* find start next code */
@@ -1641,13 +1662,27 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
if (ret == 1) {
/* got a picture: exit */
/* first check if we must repeat the frame */
+ avctx->repeat_pict = 0;
+#if 0
if (s2->progressive_frame && s2->repeat_first_field) {
//fprintf(stderr,"\nRepeat this frame: %d! pict: %d",avctx->frame_number,s2->picture_number);
- s2->repeat_first_field = 0;
- s2->progressive_frame = 0;
+ //s2->repeat_first_field = 0;
+ //s2->progressive_frame = 0;
if (++s->repeat_field > 2)
s->repeat_field = 0;
+ avctx->repeat_pict = 1;
}
+#endif
+ if (s2->repeat_first_field) {
+ if (s2->progressive_sequence) {
+ if (s2->top_field_first)
+ avctx->repeat_pict = 4;
+ else
+ avctx->repeat_pict = 2;
+ } else if (s2->progressive_frame) {
+ avctx->repeat_pict = 1;
+ }
+ }
*data_size = sizeof(AVPicture);
goto the_end;
}
diff --git a/src/libffmpeg/libavcodec/mpeg4data.h b/src/libffmpeg/libavcodec/mpeg4data.h
index 91b99625f..e972a7576 100644
--- a/src/libffmpeg/libavcodec/mpeg4data.h
+++ b/src/libffmpeg/libavcodec/mpeg4data.h
@@ -4,13 +4,20 @@
#define BIN_ONLY_SHAPE 2
#define GRAY_SHAPE 3
+#define SIMPLE_VO_TYPE 1
+#define CORE_VO_TYPE 3
+
// aspect_ratio_info
-#define EXTENDET_PAR 15
+#define EXTENDED_PAR 15
//vol_sprite_usage / sprite_enable
#define STATIC_SPRITE 1
#define GMC_SPRITE 2
+#define MOTION_MARKER 0x1F001
+#define DC_MARKER 0x6B001
+
+
/* dc encoding for mpeg4 */
const UINT8 DCtab_lum[13][2] =
{
@@ -122,3 +129,27 @@ static const UINT16 pixel_aspect[16][2]={
{0, 0},
{0, 0},
};
+
+/* these matrixes will be permuted for the idct */
+INT16 ff_mpeg4_default_intra_matrix[64] = {
+ 8, 17, 18, 19, 21, 23, 25, 27,
+ 17, 18, 19, 21, 23, 25, 27, 28,
+ 20, 21, 22, 23, 24, 26, 28, 30,
+ 21, 22, 23, 24, 26, 28, 30, 32,
+ 22, 23, 24, 26, 28, 30, 32, 35,
+ 23, 24, 26, 28, 30, 32, 35, 38,
+ 25, 26, 28, 30, 32, 35, 38, 41,
+ 27, 28, 30, 32, 35, 38, 41, 45,
+};
+
+INT16 ff_mpeg4_default_non_intra_matrix[64] = {
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 18, 19, 20, 21, 22, 23, 24, 25,
+ 19, 20, 21, 22, 23, 24, 26, 27,
+ 20, 21, 22, 23, 25, 26, 27, 28,
+ 21, 22, 23, 24, 26, 27, 28, 30,
+ 22, 23, 24, 26, 27, 28, 30, 31,
+ 23, 24, 25, 27, 28, 30, 31, 33,
+};
+
diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c
index 9f572c3d9..63242c9de 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.c
+++ b/src/libffmpeg/libavcodec/mpegvideo.c
@@ -1,49 +1,42 @@
/*
* The simplest mpeg encoder (well, it was the simplest!)
- * Copyright (c) 2000,2001 Gerard Lantau.
+ * Copyright (c) 2000,2001 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
- * 4MV & hq encoding stuff by Michael Niedermayer <michaelni@gmx.at>
+ * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
*/
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include <string.h>
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
-#include "config.h"
-#include "xine-utils/xineutils.h"
-
#ifdef USE_FASTMEMCPY
#include "fastmemcpy.h"
#endif
static void encode_picture(MpegEncContext *s, int picture_number);
-static void rate_control_init(MpegEncContext *s);
-static int rate_estimate_qscale(MpegEncContext *s);
static void dct_unquantize_mpeg1_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale);
+static void dct_unquantize_mpeg2_c(MpegEncContext *s,
+ DCTELEM *block, int n, int qscale);
static void dct_unquantize_h263_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale);
static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w);
-static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale);
+static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
-int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale)= dct_quantize_c;
+int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow)= dct_quantize_c;
void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c;
#define EDGE_WIDTH 16
@@ -53,6 +46,7 @@ void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edg
//#define DEBUG
+
/* for jpeg fast DCT */
#define CONST_BITS 14
@@ -75,37 +69,55 @@ static UINT8 h263_chroma_roundtab[16] = {
static UINT16 default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
static UINT8 default_fcode_tab[MAX_MV*2+1];
-/* default motion estimation */
-int motion_estimation_method = ME_LOG;
-
extern UINT8 zigzag_end[64];
-static void convert_matrix(int *qmat, UINT16 *qmat16, const UINT16 *quant_matrix, int qscale)
+/* default motion estimation */
+int motion_estimation_method = ME_EPZS;
+
+static void convert_matrix(int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
+ const UINT16 *quant_matrix, int bias)
{
- int i;
+ int qscale;
- if (av_fdct == jpeg_fdct_ifast) {
- for(i=0;i<64;i++) {
- /* 16 <= qscale * quant_matrix[i] <= 7905 */
- /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
- /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
- /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
-
- qmat[block_permute_op(i)] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) /
- (aanscales[i] * qscale * quant_matrix[block_permute_op(i)]));
- }
- } else {
- for(i=0;i<64;i++) {
- /* We can safely suppose that 16 <= quant_matrix[i] <= 255
- So 16 <= qscale * quant_matrix[i] <= 7905
- so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
- so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67
- */
- qmat[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
- qmat16[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]);
+ for(qscale=1; qscale<32; qscale++){
+ int i;
+ if (av_fdct == fdct_ifast) {
+ for(i=0;i<64;i++) {
+ const int j= block_permute_op(i);
+ /* 16 <= qscale * quant_matrix[i] <= 7905 */
+ /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
+ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
+ /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
+
+ qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) /
+ (aanscales[i] * qscale * quant_matrix[j]));
+ }
+ } else {
+ for(i=0;i<64;i++) {
+ /* We can safely suppose that 16 <= quant_matrix[i] <= 255
+ So 16 <= qscale * quant_matrix[i] <= 7905
+ so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
+ so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67
+ */
+ qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
+ qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]);
+
+ if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
+
+ qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
+ }
}
}
}
+// move into common.c perhaps
+#define CHECKED_ALLOCZ(p, size)\
+{\
+ p= av_mallocz(size);\
+ if(p==NULL){\
+ perror("malloc");\
+ goto fail;\
+ }\
+}
/* init common structure for both encoder and decoder */
int MPV_common_init(MpegEncContext *s)
@@ -113,14 +125,19 @@ int MPV_common_init(MpegEncContext *s)
int c_size, i;
UINT8 *pict;
- if (s->out_format == FMT_H263)
- s->dct_unquantize = dct_unquantize_h263_c;
- else
- s->dct_unquantize = dct_unquantize_mpeg1_c;
+ s->dct_unquantize_h263 = dct_unquantize_h263_c;
+ s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
+ s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
#ifdef HAVE_MMX
MPV_common_init_mmx(s);
#endif
+ //setup default unquantizers (mpeg4 might change it later)
+ if(s->out_format == FMT_H263)
+ s->dct_unquantize = s->dct_unquantize_h263;
+ else
+ s->dct_unquantize = s->dct_unquantize_mpeg1;
+
s->mb_width = (s->width + 15) / 16;
s->mb_height = (s->height + 15) / 16;
s->mb_num = s->mb_width * s->mb_height;
@@ -135,58 +152,79 @@ int MPV_common_init(MpegEncContext *s)
c_size = (w >> shift) * (h >> shift);
pict_start = (w >> shift) * (EDGE_WIDTH >> shift) + (EDGE_WIDTH >> shift);
- pict = av_mallocz(c_size);
- if (pict == NULL)
- goto fail;
+ CHECKED_ALLOCZ(pict, c_size)
s->last_picture_base[i] = pict;
s->last_picture[i] = pict + pict_start;
+ if(i>0) memset(s->last_picture_base[i], 128, c_size);
- pict = av_mallocz(c_size);
- if (pict == NULL)
- goto fail;
+ CHECKED_ALLOCZ(pict, c_size)
s->next_picture_base[i] = pict;
s->next_picture[i] = pict + pict_start;
-
- if (s->has_b_frames) {
- pict = av_mallocz(c_size);
- if (pict == NULL)
- goto fail;
+ if(i>0) memset(s->next_picture_base[i], 128, c_size);
+
+ if (s->has_b_frames || s->codec_id==CODEC_ID_MPEG4) {
+ /* Note the MPEG4 stuff is here cuz of buggy encoders which dont set the low_delay flag but
+ do low-delay encoding, so we cant allways distinguish b-frame containing streams from low_delay streams */
+ CHECKED_ALLOCZ(pict, c_size)
s->aux_picture_base[i] = pict;
s->aux_picture[i] = pict + pict_start;
+ if(i>0) memset(s->aux_picture_base[i], 128, c_size);
}
}
if (s->encoding) {
- /* Allocate MB type table */
- s->mb_type = av_mallocz(s->mb_num * sizeof(char));
- if (s->mb_type == NULL) {
- perror("malloc");
- goto fail;
- }
+ int j;
+ int mv_table_size= (s->mb_width+2)*(s->mb_height+2);
+
+ CHECKED_ALLOCZ(s->mb_var , s->mb_num * sizeof(INT16))
+ CHECKED_ALLOCZ(s->mc_mb_var, s->mb_num * sizeof(INT16))
+
+ /* Allocate MV tables */
+ CHECKED_ALLOCZ(s->p_mv_table , mv_table_size * 2 * sizeof(INT16))
+ CHECKED_ALLOCZ(s->b_forw_mv_table , mv_table_size * 2 * sizeof(INT16))
+ CHECKED_ALLOCZ(s->b_back_mv_table , mv_table_size * 2 * sizeof(INT16))
+ CHECKED_ALLOCZ(s->b_bidir_forw_mv_table , mv_table_size * 2 * sizeof(INT16))
+ CHECKED_ALLOCZ(s->b_bidir_back_mv_table , mv_table_size * 2 * sizeof(INT16))
+ CHECKED_ALLOCZ(s->b_direct_forw_mv_table, mv_table_size * 2 * sizeof(INT16))
+ CHECKED_ALLOCZ(s->b_direct_back_mv_table, mv_table_size * 2 * sizeof(INT16))
+ CHECKED_ALLOCZ(s->b_direct_mv_table , mv_table_size * 2 * sizeof(INT16))
+
+ CHECKED_ALLOCZ(s->me_scratchpad, s->linesize*16*3*sizeof(uint8_t))
- s->mb_var = av_mallocz(s->mb_num * sizeof(INT16));
- if (s->mb_var == NULL) {
- perror("malloc");
- goto fail;
+ CHECKED_ALLOCZ(s->me_map , ME_MAP_SIZE*sizeof(uint32_t))
+ CHECKED_ALLOCZ(s->me_score_map, ME_MAP_SIZE*sizeof(uint16_t))
+
+ if(s->max_b_frames){
+ for(j=0; j<REORDER_BUFFER_SIZE; j++){
+ int i;
+ for(i=0;i<3;i++) {
+ int w, h, shift;
+
+ w = s->linesize;
+ h = s->mb_height * 16;
+ shift = (i == 0) ? 0 : 1;
+ c_size = (w >> shift) * (h >> shift);
+
+ CHECKED_ALLOCZ(pict, c_size);
+ s->picture_buffer[j][i] = pict;
+ }
+ }
}
- /* Allocate MV table */
- /* By now we just have one MV per MB */
- s->mv_table[0] = av_mallocz(s->mb_num * sizeof(INT16));
- s->mv_table[1] = av_mallocz(s->mb_num * sizeof(INT16));
- if (s->mv_table[1] == NULL || s->mv_table[0] == NULL) {
- perror("malloc");
- goto fail;
+
+ if(s->codec_id==CODEC_ID_MPEG4){
+ CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
+ CHECKED_ALLOCZ( s->pb2_buffer, PB_BUFFER_SIZE);
}
}
if (s->out_format == FMT_H263 || s->encoding) {
int size;
+ /* Allocate MB type table */
+ CHECKED_ALLOCZ(s->mb_type , s->mb_num * sizeof(UINT8))
+
/* MV prediction */
size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
- s->motion_val = malloc(size * 2 * sizeof(INT16));
- if (s->motion_val == NULL)
- goto fail;
- memset(s->motion_val, 0, size * 2 * sizeof(INT16));
+ CHECKED_ALLOCZ(s->motion_val, size * 2 * sizeof(INT16));
}
if (s->h263_pred || s->h263_plus) {
@@ -197,43 +235,40 @@ int MPV_common_init(MpegEncContext *s)
y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
c_size = (s->mb_width + 2) * (s->mb_height + 2);
size = y_size + 2 * c_size;
- s->dc_val[0] = malloc(size * sizeof(INT16));
- if (s->dc_val[0] == NULL)
- goto fail;
+ CHECKED_ALLOCZ(s->dc_val[0], size * sizeof(INT16));
s->dc_val[1] = s->dc_val[0] + y_size;
s->dc_val[2] = s->dc_val[1] + c_size;
for(i=0;i<size;i++)
s->dc_val[0][i] = 1024;
/* ac values */
- s->ac_val[0] = av_mallocz(size * sizeof(INT16) * 16);
- if (s->ac_val[0] == NULL)
- goto fail;
+ CHECKED_ALLOCZ(s->ac_val[0], size * sizeof(INT16) * 16);
s->ac_val[1] = s->ac_val[0] + y_size;
s->ac_val[2] = s->ac_val[1] + c_size;
/* cbp values */
- s->coded_block = av_mallocz(y_size);
- if (!s->coded_block)
- goto fail;
+ CHECKED_ALLOCZ(s->coded_block, y_size);
/* which mb is a intra block */
- s->mbintra_table = av_mallocz(s->mb_num);
- if (!s->mbintra_table)
- goto fail;
+ CHECKED_ALLOCZ(s->mbintra_table, s->mb_num);
memset(s->mbintra_table, 1, s->mb_num);
+
+ /* divx501 bitstream reorder buffer */
+ CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
+
+ /* cbp, ac_pred, pred_dir */
+ CHECKED_ALLOCZ(s->cbp_table , s->mb_num * sizeof(UINT8))
+ CHECKED_ALLOCZ(s->pred_dir_table, s->mb_num * sizeof(UINT8))
+
+ CHECKED_ALLOCZ(s->qscale_table , s->mb_num * sizeof(UINT8))
}
/* default structure is frame */
s->picture_structure = PICT_FRAME;
/* init macroblock skip table */
- if (!s->encoding) {
- s->mbskip_table = av_mallocz(s->mb_num);
- if (!s->mbskip_table)
- goto fail;
- }
+ CHECKED_ALLOCZ(s->mbskip_table, s->mb_num);
- s->block= s->intra_block;
+ s->block= s->blocks[0];
s->context_initialized = 1;
return 0;
@@ -242,39 +277,49 @@ int MPV_common_init(MpegEncContext *s)
return -1;
}
+
+//extern int sads;
+
/* init common structure for both encoder and decoder */
void MPV_common_end(MpegEncContext *s)
{
int i;
- if (s->mb_type)
- free(s->mb_type);
- if (s->mb_var)
- free(s->mb_var);
- if (s->mv_table[0])
- free(s->mv_table[0]);
- if (s->mv_table[1])
- free(s->mv_table[1]);
- if (s->motion_val)
- free(s->motion_val);
- if (s->dc_val[0])
- free(s->dc_val[0]);
- if (s->ac_val[0])
- free(s->ac_val[0]);
- if (s->coded_block)
- free(s->coded_block);
- if (s->mbintra_table)
- free(s->mbintra_table);
-
- if (s->mbskip_table)
- free(s->mbskip_table);
+ av_freep(&s->mb_type);
+ av_freep(&s->mb_var);
+ av_freep(&s->mc_mb_var);
+ av_freep(&s->p_mv_table);
+ av_freep(&s->b_forw_mv_table);
+ av_freep(&s->b_back_mv_table);
+ av_freep(&s->b_bidir_forw_mv_table);
+ av_freep(&s->b_bidir_back_mv_table);
+ av_freep(&s->b_direct_forw_mv_table);
+ av_freep(&s->b_direct_back_mv_table);
+ av_freep(&s->b_direct_mv_table);
+ av_freep(&s->motion_val);
+ av_freep(&s->dc_val[0]);
+ av_freep(&s->ac_val[0]);
+ av_freep(&s->coded_block);
+ av_freep(&s->mbintra_table);
+ av_freep(&s->cbp_table);
+ av_freep(&s->pred_dir_table);
+ av_freep(&s->qscale_table);
+ av_freep(&s->me_scratchpad);
+ av_freep(&s->me_map);
+ av_freep(&s->me_score_map);
+
+ av_freep(&s->mbskip_table);
+ av_freep(&s->bitstream_buffer);
+ av_freep(&s->tex_pb_buffer);
+ av_freep(&s->pb2_buffer);
for(i=0;i<3;i++) {
- if (s->last_picture_base[i])
- free(s->last_picture_base[i]);
- if (s->next_picture_base[i])
- free(s->next_picture_base[i]);
- if (s->has_b_frames)
- free(s->aux_picture_base[i]);
+ int j;
+ av_freep(&s->last_picture_base[i]);
+ av_freep(&s->next_picture_base[i]);
+ av_freep(&s->aux_picture_base[i]);
+ for(j=0; j<REORDER_BUFFER_SIZE; j++){
+ av_freep(&s->picture_buffer[j][i]);
+ }
}
s->context_initialized = 0;
}
@@ -285,11 +330,17 @@ int MPV_encode_init(AVCodecContext *avctx)
MpegEncContext *s = avctx->priv_data;
int i;
+ avctx->pix_fmt = PIX_FMT_YUV420P;
+
s->bit_rate = avctx->bit_rate;
s->bit_rate_tolerance = avctx->bit_rate_tolerance;
s->frame_rate = avctx->frame_rate;
s->width = avctx->width;
s->height = avctx->height;
+ if(avctx->gop_size > 600){
+ fprintf(stderr, "Warning keyframe interval too large! reducing it ...\n");
+ avctx->gop_size=600;
+ }
s->gop_size = avctx->gop_size;
s->rtp_mode = avctx->rtp_mode;
s->rtp_payload_size = avctx->rtp_payload_size;
@@ -300,36 +351,56 @@ int MPV_encode_init(AVCodecContext *avctx)
s->max_qdiff= avctx->max_qdiff;
s->qcompress= avctx->qcompress;
s->qblur= avctx->qblur;
+ s->b_quant_factor= avctx->b_quant_factor;
+ s->b_quant_offset= avctx->b_quant_offset;
s->avctx = avctx;
s->aspect_ratio_info= avctx->aspect_ratio_info;
s->flags= avctx->flags;
-
+ s->max_b_frames= avctx->max_b_frames;
+ s->rc_strategy= avctx->rc_strategy;
+ s->b_frame_strategy= avctx->b_frame_strategy;
+ s->codec_id= avctx->codec->id;
+ s->luma_elim_threshold = avctx->luma_elim_threshold;
+ s->chroma_elim_threshold= avctx->chroma_elim_threshold;
+ s->strict_std_compliance= avctx->strict_std_compliance;
+ s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
+
if (s->gop_size <= 1) {
s->intra_only = 1;
s->gop_size = 12;
} else {
s->intra_only = 0;
}
- s->full_search = motion_estimation_method;
-
+
+ /* ME algorithm */
+ if (avctx->me_method == 0)
+ /* For compatibility */
+ s->me_method = motion_estimation_method;
+ else
+ s->me_method = avctx->me_method;
+
+ /* Fixed QSCALE */
s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
switch(avctx->codec->id) {
case CODEC_ID_MPEG1VIDEO:
s->out_format = FMT_MPEG1;
+ avctx->delay=0; //FIXME not sure, should check the spec
break;
case CODEC_ID_MJPEG:
s->out_format = FMT_MJPEG;
s->intra_only = 1; /* force intra only for jpeg */
s->mjpeg_write_tables = 1; /* write all tables */
+ s->mjpeg_data_only_frames = 0; /* write all the needed headers */
s->mjpeg_vsample[0] = 2; /* set up default sampling factors */
s->mjpeg_vsample[1] = 1; /* the only currently supported values */
s->mjpeg_vsample[2] = 1;
- s->mjpeg_hsample[0] = 2;
+ s->mjpeg_hsample[0] = 2;
s->mjpeg_hsample[1] = 1;
s->mjpeg_hsample[2] = 1;
if (mjpeg_init(s) < 0)
return -1;
+ avctx->delay=0;
break;
case CODEC_ID_H263:
if (h263_get_picture_format(s->width, s->height) == 7) {
@@ -337,6 +408,7 @@ int MPV_encode_init(AVCodecContext *avctx)
return -1;
}
s->out_format = FMT_H263;
+ avctx->delay=0;
break;
case CODEC_ID_H263P:
s->out_format = FMT_H263;
@@ -344,19 +416,25 @@ int MPV_encode_init(AVCodecContext *avctx)
s->rtp_payload_size = 1200;
s->h263_plus = 1;
s->unrestricted_mv = 1;
+ s->h263_aic = 1;
/* These are just to be sure */
s->umvplus = 0;
s->umvplus_dec = 0;
+ avctx->delay=0;
break;
case CODEC_ID_RV10:
s->out_format = FMT_H263;
s->h263_rv10 = 1;
+ avctx->delay=0;
break;
case CODEC_ID_MPEG4:
s->out_format = FMT_H263;
s->h263_pred = 1;
s->unrestricted_mv = 1;
+ s->has_b_frames= s->max_b_frames ? 1 : 0;
+ s->low_delay=0;
+ avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
break;
case CODEC_ID_MSMPEG4V1:
s->out_format = FMT_H263;
@@ -364,6 +442,7 @@ int MPV_encode_init(AVCodecContext *avctx)
s->h263_pred = 1;
s->unrestricted_mv = 1;
s->msmpeg4_version= 1;
+ avctx->delay=0;
break;
case CODEC_ID_MSMPEG4V2:
s->out_format = FMT_H263;
@@ -371,6 +450,7 @@ int MPV_encode_init(AVCodecContext *avctx)
s->h263_pred = 1;
s->unrestricted_mv = 1;
s->msmpeg4_version= 2;
+ avctx->delay=0;
break;
case CODEC_ID_MSMPEG4V3:
s->out_format = FMT_H263;
@@ -378,16 +458,12 @@ int MPV_encode_init(AVCodecContext *avctx)
s->h263_pred = 1;
s->unrestricted_mv = 1;
s->msmpeg4_version= 3;
+ avctx->delay=0;
break;
default:
return -1;
}
- if((s->flags&CODEC_FLAG_4MV) && !(s->flags&CODEC_FLAG_HQ)){
- printf("4MV is currently only supported in HQ mode\n");
- return -1;
- }
-
{ /* set up some save defaults, some codecs might override them later */
static int done=0;
if(!done){
@@ -410,7 +486,7 @@ int MPV_encode_init(AVCodecContext *avctx)
mpeg1_encode_init(s);
/* dont use mv_penalty table for crap MV as it would be confused */
- if(s->full_search<4) s->mv_penalty= default_mv_penalty;
+ if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty;
s->encoding = 1;
@@ -420,18 +496,32 @@ int MPV_encode_init(AVCodecContext *avctx)
/* init default q matrix */
for(i=0;i<64;i++) {
- s->intra_matrix[i] = default_intra_matrix[i];
- s->non_intra_matrix[i] = default_non_intra_matrix[i];
+ if(s->out_format == FMT_H263)
+ s->intra_matrix[i] = default_non_intra_matrix[i];
+ else
+ s->intra_matrix[i] = default_intra_matrix[i];
+
+ s->inter_matrix[i] = default_non_intra_matrix[i];
}
- /* rate control init */
- rate_control_init(s);
+ /* precompute matrix */
+ /* for mjpeg, we do include qscale in the matrix */
+ if (s->out_format != FMT_MJPEG) {
+ convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias,
+ s->intra_matrix, s->intra_quant_bias);
+ convert_matrix(s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias,
+ s->inter_matrix, s->inter_quant_bias);
+ }
+
+ if(ff_rate_control_init(s) < 0)
+ return -1;
s->picture_number = 0;
s->picture_in_gop_number = 0;
s->fake_picture_number = 0;
/* motion detector init */
s->f_code = 1;
+ s->b_code = 1;
return 0;
}
@@ -443,6 +533,9 @@ int MPV_encode_end(AVCodecContext *avctx)
#ifdef STATS
print_stats();
#endif
+
+ ff_rate_control_uninit(s);
+
MPV_common_end(s);
if (s->out_format == FMT_MJPEG)
mjpeg_close(s);
@@ -485,12 +578,13 @@ void MPV_frame_start(MpegEncContext *s)
UINT8 *tmp;
s->mb_skiped = 0;
+ s->decoding_error=0;
+
if (s->pict_type == B_TYPE) {
for(i=0;i<3;i++) {
s->current_picture[i] = s->aux_picture[i];
}
} else {
- s->last_non_b_pict_type= s->pict_type;
for(i=0;i<3;i++) {
/* swap next and last */
tmp = s->last_picture[i];
@@ -504,9 +598,11 @@ void MPV_frame_start(MpegEncContext *s)
/* generic function for encode/decode called after a frame has been coded/decoded */
void MPV_frame_end(MpegEncContext *s)
{
+// if((s->picture_number%100)==0 && s->encoding) printf("sads:%d //\n", sads);
+
/* draw edge for correct motion prediction if outside */
if (s->pict_type != B_TYPE && !s->intra_only) {
- if(s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MPEG4 || s->divx_version==500){
+ if(s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MPEG4 || s->divx_version>=500){
draw_edges(s->current_picture[0], s->linesize, s->mb_width*16, s->mb_height*16, EDGE_WIDTH);
draw_edges(s->current_picture[1], s->linesize/2, s->mb_width*8, s->mb_height*8, EDGE_WIDTH/2);
draw_edges(s->current_picture[2], s->linesize/2, s->mb_width*8, s->mb_height*8, EDGE_WIDTH/2);
@@ -518,107 +614,195 @@ void MPV_frame_end(MpegEncContext *s)
}
}
emms_c();
+
+ if(s->pict_type!=B_TYPE){
+ s->last_non_b_pict_type= s->pict_type;
+ s->last_non_b_qscale= s->qscale;
+ s->last_non_b_mc_mb_var= s->mc_mb_var_sum;
+ s->num_available_buffers++;
+ if(s->num_available_buffers>2) s->num_available_buffers= 2;
+ }
}
-int MPV_encode_picture(AVCodecContext *avctx,
- unsigned char *buf, int buf_size, void *data)
+/* reorder input for encoding */
+void reorder_input(MpegEncContext *s, AVPicture *pict)
{
- MpegEncContext *s = avctx->priv_data;
- AVPicture *pict = data;
- int i, j;
+ int i, j, index;
+
+ if(s->max_b_frames > FF_MAX_B_FRAMES) s->max_b_frames= FF_MAX_B_FRAMES;
- if (s->fixed_qscale)
- s->qscale = avctx->quality;
+// delay= s->max_b_frames+1; (or 0 if no b frames cuz decoder diff)
- init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
-
- s->force_type= (avctx->flags&CODEC_FLAG_TYPE) ?
- (avctx->key_frame ? I_TYPE : P_TYPE) : 0;
- if (!s->intra_only) {
- /* first picture of GOP is intra */
- if (s->picture_in_gop_number % s->gop_size==0 || s->force_type==I_TYPE){
- s->picture_in_gop_number=0;
- s->pict_type = I_TYPE;
- }else
- s->pict_type = P_TYPE;
- } else {
- s->pict_type = I_TYPE;
+ for(j=0; j<REORDER_BUFFER_SIZE-1; j++){
+ s->coded_order[j]= s->coded_order[j+1];
}
-
- MPV_frame_start(s);
-
- for(i=0;i<3;i++) {
- UINT8 *src = pict->data[i];
- UINT8 *dest = s->current_picture[i];
- int src_wrap = pict->linesize[i];
- int dest_wrap = s->linesize;
- int w = s->width;
- int h = s->height;
-
- if (i >= 1) {
- dest_wrap >>= 1;
- w >>= 1;
- h >>= 1;
+ s->coded_order[j].picture[0]= s->coded_order[j].picture[1]= s->coded_order[j].picture[2]= NULL; //catch uninitalized buffers
+ s->coded_order[j].pict_type=0;
+
+ switch(s->input_pict_type){
+ default:
+ case I_TYPE:
+ case S_TYPE:
+ case P_TYPE:
+ index= s->max_b_frames - s->b_frames_since_non_b;
+ s->b_frames_since_non_b=0;
+ break;
+ case B_TYPE:
+ index= s->max_b_frames + 1;
+ s->b_frames_since_non_b++;
+ break;
+ }
+//printf("index:%d type:%d strides: %d %d\n", index, s->input_pict_type, pict->linesize[0], s->linesize);
+ if( (index==0 || (s->flags&CODEC_FLAG_INPUT_PRESERVED))
+ && pict->linesize[0] == s->linesize
+ && pict->linesize[1] == s->linesize>>1
+ && pict->linesize[2] == s->linesize>>1){
+//printf("ptr\n");
+ for(i=0; i<3; i++){
+ s->coded_order[index].picture[i]= pict->data[i];
}
+ }else{
+//printf("copy\n");
+ for(i=0; i<3; i++){
+ uint8_t *src = pict->data[i];
+ uint8_t *dest;
+ int src_wrap = pict->linesize[i];
+ int dest_wrap = s->linesize;
+ int w = s->width;
+ int h = s->height;
+
+ if(index==0) dest= s->last_picture[i]+16; //is current_picture indeed but the switch hapens after reordering
+ else dest= s->picture_buffer[s->picture_buffer_index][i];
+
+ if (i >= 1) {
+ dest_wrap >>= 1;
+ w >>= 1;
+ h >>= 1;
+ }
- if(dest_wrap==src_wrap){
- s->new_picture[i] = pict->data[i];
- } else {
+ s->coded_order[index].picture[i]= dest;
for(j=0;j<h;j++) {
memcpy(dest, src, w);
dest += dest_wrap;
src += src_wrap;
}
- s->new_picture[i] = s->current_picture[i];
- }
+ }
+ if(index!=0){
+ s->picture_buffer_index++;
+ if(s->picture_buffer_index >= REORDER_BUFFER_SIZE-1) s->picture_buffer_index=0;
+ }
+ }
+ s->coded_order[index].pict_type = s->input_pict_type;
+ s->coded_order[index].qscale = s->input_qscale;
+ s->coded_order[index].force_type= s->force_input_type;
+ s->coded_order[index].picture_in_gop_number= s->input_picture_in_gop_number;
+ s->coded_order[index].picture_number= s->input_picture_number;
+
+ for(i=0; i<3; i++){
+ s->new_picture[i]= s->coded_order[0].picture[i];
}
+}
+
+int MPV_encode_picture(AVCodecContext *avctx,
+ unsigned char *buf, int buf_size, void *data)
+{
+ MpegEncContext *s = avctx->priv_data;
+ AVPicture *pict = data;
- encode_picture(s, s->picture_number);
- avctx->key_frame = (s->pict_type == I_TYPE);
- avctx->header_bits = s->header_bits;
- avctx->mv_bits = s->mv_bits;
- avctx->misc_bits = s->misc_bits;
- avctx->i_tex_bits = s->i_tex_bits;
- avctx->p_tex_bits = s->p_tex_bits;
- avctx->i_count = s->i_count;
- avctx->p_count = s->p_count;
- avctx->skip_count = s->skip_count;
+ s->input_qscale = avctx->quality;
- MPV_frame_end(s);
- s->picture_number++;
- s->picture_in_gop_number++;
+ init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
- if (s->out_format == FMT_MJPEG)
- mjpeg_picture_trailer(s);
+ if(avctx->flags&CODEC_FLAG_TYPE){
+ s->input_pict_type=
+ s->force_input_type= avctx->key_frame ? I_TYPE : P_TYPE;
+ }else if(s->flags&CODEC_FLAG_PASS2){
+ s->input_pict_type=
+ s->force_input_type= s->rc_context.entry[s->input_picture_number].new_pict_type;
+ }else{
+ s->force_input_type=0;
+ if (!s->intra_only) {
+ /* first picture of GOP is intra */
+ if (s->input_picture_in_gop_number % s->gop_size==0){
+ s->input_pict_type = I_TYPE;
+ }else if(s->max_b_frames==0){
+ s->input_pict_type = P_TYPE;
+ }else{
+ if(s->b_frames_since_non_b < s->max_b_frames) //FIXME more IQ
+ s->input_pict_type = B_TYPE;
+ else
+ s->input_pict_type = P_TYPE;
+ }
+ } else {
+ s->input_pict_type = I_TYPE;
+ }
+ }
+
+ if(s->input_pict_type==I_TYPE)
+ s->input_picture_in_gop_number=0;
+
+ reorder_input(s, pict);
+
+ /* output? */
+ if(s->coded_order[0].picture[0]){
+
+ s->pict_type= s->coded_order[0].pict_type;
+ if (s->fixed_qscale) /* the ratecontrol needs the last qscale so we dont touch it for CBR */
+ s->qscale= s->coded_order[0].qscale;
+ s->force_type= s->coded_order[0].force_type;
+ s->picture_in_gop_number= s->coded_order[0].picture_in_gop_number;
+ s->picture_number= s->coded_order[0].picture_number;
+
+ MPV_frame_start(s);
+
+ encode_picture(s, s->picture_number);
+ avctx->key_frame = (s->pict_type == I_TYPE);
+ avctx->pict_type = s->pict_type;
+ avctx->real_pict_num = s->picture_number;
+ avctx->header_bits = s->header_bits;
+ avctx->mv_bits = s->mv_bits;
+ avctx->misc_bits = s->misc_bits;
+ avctx->i_tex_bits = s->i_tex_bits;
+ avctx->p_tex_bits = s->p_tex_bits;
+ avctx->i_count = s->i_count;
+ avctx->p_count = s->p_count;
+ avctx->skip_count = s->skip_count;
+
+ MPV_frame_end(s);
+
+ if (s->out_format == FMT_MJPEG)
+ mjpeg_picture_trailer(s);
+
+ avctx->quality = s->qscale;
+
+ if(s->flags&CODEC_FLAG_PASS1)
+ ff_write_pass1_stats(s);
+
+ }
+
+ s->input_picture_number++;
+ s->input_picture_in_gop_number++;
flush_put_bits(&s->pb);
- s->last_frame_bits= s->frame_bits;
s->frame_bits = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
+ if(s->pict_type==B_TYPE) s->pb_frame_bits+= s->frame_bits;
+ else s->pb_frame_bits= s->frame_bits;
+
s->total_bits += s->frame_bits;
avctx->frame_bits = s->frame_bits;
//printf("fcode: %d, type: %d, head: %d, mv: %d, misc: %d, frame: %d, itex: %d, ptex: %d\n",
//s->f_code, avctx->key_frame, s->header_bits, s->mv_bits, s->misc_bits, s->frame_bits, s->i_tex_bits, s->p_tex_bits);
- avctx->quality = s->qscale;
if (avctx->get_psnr) {
/* At this point pict->data should have the original frame */
/* an s->current_picture should have the coded/decoded frame */
get_psnr(pict->data, s->current_picture,
pict->linesize, s->linesize, avctx);
+// printf("%f\n", avctx->psnr_y);
}
return pbBufPtr(&s->pb) - s->pb.buf;
}
-static inline int clip(int a, int amin, int amax)
-{
- if (a < amin)
- return amin;
- else if (a > amax)
- return amax;
- else
- return a;
-}
-
static inline void gmc1_motion(MpegEncContext *s,
UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
int dest_offset,
@@ -626,7 +810,7 @@ static inline void gmc1_motion(MpegEncContext *s,
int h)
{
UINT8 *ptr;
- int dxy, offset, mx, my, src_x, src_y, height, linesize;
+ int offset, src_x, src_y, linesize;
int motion_x, motion_y;
if(s->real_sprite_warping_points>1) printf("more than 1 warp point isnt supported\n");
@@ -705,6 +889,8 @@ if(s->quarter_sample)
pix_op[dxy](dest_y, ptr, linesize, h);
pix_op[dxy](dest_y + 8, ptr + 8, linesize, h);
+ if(s->flags&CODEC_FLAG_GRAY) return;
+
if (s->out_format == FMT_H263) {
dxy = 0;
if ((motion_x & 3) != 0)
@@ -768,6 +954,8 @@ static inline void qpel_motion(MpegEncContext *s,
qpix_op[dxy](dest_y + linesize*8 , ptr + linesize*8 , linesize, linesize, motion_x&3, motion_y&3);
qpix_op[dxy](dest_y + linesize*8 + 8, ptr + linesize*8 + 8, linesize, linesize, motion_x&3, motion_y&3);
+ if(s->flags&CODEC_FLAG_GRAY) return;
+
mx= (motion_x>>1) | (motion_x&1);
my= (motion_y>>1) | (motion_y&1);
@@ -856,6 +1044,8 @@ static inline void MPV_motion(MpegEncContext *s,
dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
pix_op[dxy](dest, ptr, s->linesize, 8);
}
+
+ if(s->flags&CODEC_FLAG_GRAY) break;
/* In case of 8X8, we construct a single chroma motion vector
with a special rounding */
mx = 0;
@@ -922,8 +1112,7 @@ static inline void put_dct(MpegEncContext *s,
{
if (!s->mpeg2)
s->dct_unquantize(s, block, i, s->qscale);
- ff_idct (block);
- put_pixels_clamped(block, dest, line_size);
+ ff_idct_put (dest, line_size, block);
}
/* add block[] to dest[] */
@@ -931,14 +1120,53 @@ static inline void add_dct(MpegEncContext *s,
DCTELEM *block, int i, UINT8 *dest, int line_size)
{
if (s->block_last_index[i] >= 0) {
- if (!s->mpeg2)
- if(s->encoding || (!s->h263_msmpeg4))
- s->dct_unquantize(s, block, i, s->qscale);
- ff_idct (block);
- add_pixels_clamped(block, dest, line_size);
+ ff_idct_add (dest, line_size, block);
}
}
+static inline void add_dequant_dct(MpegEncContext *s,
+ DCTELEM *block, int i, UINT8 *dest, int line_size)
+{
+ if (s->block_last_index[i] >= 0) {
+ s->dct_unquantize(s, block, i, s->qscale);
+
+ ff_idct_add (dest, line_size, block);
+ }
+}
+
+/**
+ * cleans dc, ac, coded_block for the current non intra MB
+ */
+void ff_clean_intra_table_entries(MpegEncContext *s)
+{
+ int wrap = s->block_wrap[0];
+ int xy = s->block_index[0];
+
+ s->dc_val[0][xy ] =
+ s->dc_val[0][xy + 1 ] =
+ s->dc_val[0][xy + wrap] =
+ s->dc_val[0][xy + 1 + wrap] = 1024;
+ /* ac pred */
+ memset(s->ac_val[0][xy ], 0, 32 * sizeof(INT16));
+ memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(INT16));
+ if (s->msmpeg4_version>=3) {
+ s->coded_block[xy ] =
+ s->coded_block[xy + 1 ] =
+ s->coded_block[xy + wrap] =
+ s->coded_block[xy + 1 + wrap] = 0;
+ }
+ /* chroma */
+ wrap = s->block_wrap[4];
+ xy = s->mb_x + 1 + (s->mb_y + 1) * wrap;
+ s->dc_val[1][xy] =
+ s->dc_val[2][xy] = 1024;
+ /* ac pred */
+ memset(s->ac_val[1][xy], 0, 16 * sizeof(INT16));
+ memset(s->ac_val[2][xy], 0, 16 * sizeof(INT16));
+
+ s->mbintra_table[s->mb_x + s->mb_y*s->mb_width]= 0;
+}
+
/* generic function called after a macroblock has been parsed by the
decoder or after it has been encoded by the encoder.
@@ -952,69 +1180,39 @@ static inline void add_dct(MpegEncContext *s,
void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
{
int mb_x, mb_y;
- int dct_linesize, dct_offset;
- op_pixels_func *op_pix;
- qpel_mc_func *op_qpix;
+ const int mb_xy = s->mb_y * s->mb_width + s->mb_x;
mb_x = s->mb_x;
mb_y = s->mb_y;
#ifdef FF_POSTPROCESS
+ /* Obsolete. Exists for compatibility with mplayer only. */
quant_store[mb_y][mb_x]=s->qscale;
//printf("[%02d][%02d] %d\n",mb_x,mb_y,s->qscale);
+#else
+ if(s->avctx->quant_store) s->avctx->quant_store[mb_y*s->avctx->qstride+mb_x] = s->qscale;
#endif
/* update DC predictors for P macroblocks */
if (!s->mb_intra) {
if (s->h263_pred || s->h263_aic) {
- if(s->mbintra_table[mb_x + mb_y*s->mb_width])
- {
- int wrap, xy, v;
- s->mbintra_table[mb_x + mb_y*s->mb_width]=0;
- wrap = 2 * s->mb_width + 2;
- xy = 2 * mb_x + 1 + (2 * mb_y + 1) * wrap;
- v = 1024;
-
- s->dc_val[0][xy] = v;
- s->dc_val[0][xy + 1] = v;
- s->dc_val[0][xy + wrap] = v;
- s->dc_val[0][xy + 1 + wrap] = v;
- /* ac pred */
- memset(s->ac_val[0][xy], 0, 16 * sizeof(INT16));
- memset(s->ac_val[0][xy + 1], 0, 16 * sizeof(INT16));
- memset(s->ac_val[0][xy + wrap], 0, 16 * sizeof(INT16));
- memset(s->ac_val[0][xy + 1 + wrap], 0, 16 * sizeof(INT16));
- if (s->h263_msmpeg4) {
- s->coded_block[xy] = 0;
- s->coded_block[xy + 1] = 0;
- s->coded_block[xy + wrap] = 0;
- s->coded_block[xy + 1 + wrap] = 0;
- }
- /* chroma */
- wrap = s->mb_width + 2;
- xy = mb_x + 1 + (mb_y + 1) * wrap;
- s->dc_val[1][xy] = v;
- s->dc_val[2][xy] = v;
- /* ac pred */
- memset(s->ac_val[1][xy], 0, 16 * sizeof(INT16));
- memset(s->ac_val[2][xy], 0, 16 * sizeof(INT16));
- }
+ if(s->mbintra_table[mb_xy])
+ ff_clean_intra_table_entries(s);
} else {
- s->last_dc[0] = 128 << s->intra_dc_precision;
- s->last_dc[1] = 128 << s->intra_dc_precision;
+ s->last_dc[0] =
+ s->last_dc[1] =
s->last_dc[2] = 128 << s->intra_dc_precision;
}
}
else if (s->h263_pred || s->h263_aic)
- s->mbintra_table[mb_x + mb_y*s->mb_width]=1;
+ s->mbintra_table[mb_xy]=1;
/* update motion predictor, not for B-frames as they need the motion_val from the last P/S-Frame */
- if (s->out_format == FMT_H263) {
- if(s->pict_type!=B_TYPE){
- int xy, wrap, motion_x, motion_y;
+ if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE) { //FIXME move into h263.c if possible, format specific stuff shouldnt be here
+ int motion_x, motion_y;
- wrap = 2 * s->mb_width + 2;
- xy = 2 * mb_x + 1 + (2 * mb_y + 1) * wrap;
+ const int wrap = s->block_wrap[0];
+ const int xy = s->block_index[0];
if (s->mb_intra) {
motion_x = 0;
motion_y = 0;
@@ -1033,20 +1231,23 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
s->motion_val[xy + 1 + wrap][0] = motion_x;
s->motion_val[xy + 1 + wrap][1] = motion_y;
}
- }
}
- if (!s->intra_only) {
+ if (!(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) {
UINT8 *dest_y, *dest_cb, *dest_cr;
- UINT8 *mbskip_ptr;
-
- /* avoid copy if macroblock skipped in last frame too */
- if (!s->encoding && s->pict_type != B_TYPE) {
- mbskip_ptr = &s->mbskip_table[s->mb_y * s->mb_width + s->mb_x];
+ int dct_linesize, dct_offset;
+ op_pixels_func *op_pix;
+ qpel_mc_func *op_qpix;
+
+ /* avoid copy if macroblock skipped in last frame too
+ dont touch it for B-frames as they need the skip info from the next p-frame */
+ if (s->pict_type != B_TYPE) {
+ UINT8 *mbskip_ptr = &s->mbskip_table[mb_xy];
if (s->mb_skiped) {
s->mb_skiped = 0;
- /* if previous was skipped too, then nothing to do ! */
- if (*mbskip_ptr != 0)
+ /* if previous was skipped too, then nothing to do !
+ skip only during decoding as we might trash the buffers during encoding a bit */
+ if (*mbskip_ptr != 0 && !s->encoding)
goto the_end;
*mbskip_ptr = 1; /* indicate that this time we skiped it */
} else {
@@ -1068,33 +1269,53 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
if (!s->mb_intra) {
/* motion handling */
- if (!s->no_rounding){
- op_pix = put_pixels_tab;
- op_qpix= qpel_mc_rnd_tab;
- }else{
- op_pix = put_no_rnd_pixels_tab;
- op_qpix= qpel_mc_no_rnd_tab;
- }
+ /* decoding or more than one mb_type (MC was allready done otherwise) */
+ if((!s->encoding) || (s->mb_type[mb_xy]&(s->mb_type[mb_xy]-1))){
+ if ((!s->no_rounding) || s->pict_type==B_TYPE){
+ op_pix = put_pixels_tab;
+ op_qpix= qpel_mc_rnd_tab;
+ }else{
+ op_pix = put_no_rnd_pixels_tab;
+ op_qpix= qpel_mc_no_rnd_tab;
+ }
- if (s->mv_dir & MV_DIR_FORWARD) {
- MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
- if (!s->no_rounding)
- op_pix = avg_pixels_tab;
- else
- op_pix = avg_no_rnd_pixels_tab;
- }
- if (s->mv_dir & MV_DIR_BACKWARD) {
- MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
+ if (s->mv_dir & MV_DIR_FORWARD) {
+ MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
+ if ((!s->no_rounding) || s->pict_type==B_TYPE)
+ op_pix = avg_pixels_tab;
+ else
+ op_pix = avg_no_rnd_pixels_tab;
+ }
+ if (s->mv_dir & MV_DIR_BACKWARD) {
+ MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
+ }
}
- /* add dct residue */
- add_dct(s, block[0], 0, dest_y, dct_linesize);
- add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
- add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
- add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
+ /* skip dequant / idct if we are really late ;) */
+ if(s->hurry_up>1) goto the_end;
- add_dct(s, block[4], 4, dest_cb, s->linesize >> 1);
- add_dct(s, block[5], 5, dest_cr, s->linesize >> 1);
+ /* add dct residue */
+ if(!s->mpeg2 && (s->encoding || (!s->h263_msmpeg4))){
+ add_dequant_dct(s, block[0], 0, dest_y, dct_linesize);
+ add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize);
+ add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
+ add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
+
+ if(!(s->flags&CODEC_FLAG_GRAY)){
+ add_dequant_dct(s, block[4], 4, dest_cb, s->linesize >> 1);
+ add_dequant_dct(s, block[5], 5, dest_cr, s->linesize >> 1);
+ }
+ } else {
+ add_dct(s, block[0], 0, dest_y, dct_linesize);
+ add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
+ add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
+ add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
+
+ if(!(s->flags&CODEC_FLAG_GRAY)){
+ add_dct(s, block[4], 4, dest_cb, s->linesize >> 1);
+ add_dct(s, block[5], 5, dest_cr, s->linesize >> 1);
+ }
+ }
} else {
/* dct only in intra block */
put_dct(s, block[0], 0, dest_y, dct_linesize);
@@ -1102,128 +1323,188 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
- put_dct(s, block[4], 4, dest_cb, s->linesize >> 1);
- put_dct(s, block[5], 5, dest_cr, s->linesize >> 1);
+ if(!(s->flags&CODEC_FLAG_GRAY)){
+ put_dct(s, block[4], 4, dest_cb, s->linesize >> 1);
+ put_dct(s, block[5], 5, dest_cr, s->linesize >> 1);
+ }
}
}
the_end:
emms_c(); //FIXME remove
}
-static void encode_mb(MpegEncContext *s)
+static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold, int skip_dc)
+{
+ static const char tab[64]=
+ {3,2,2,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0};
+ int score=0;
+ int run=0;
+ int i;
+ DCTELEM *block= s->block[n];
+ const int last_index= s->block_last_index[n];
+
+ if(skip_dc) skip_dc=1;
+
+ /* are all which we could set to zero are allready zero? */
+ if(last_index<=skip_dc - 1) return;
+
+ for(i=0; i<=last_index; i++){
+ const int j = zigzag_direct[i];
+ const int level = ABS(block[j]);
+ if(level==1){
+ if(skip_dc && i==0) continue;
+ score+= tab[run];
+ run=0;
+ }else if(level>1){
+ return;
+ }else{
+ run++;
+ }
+ }
+ if(score >= threshold) return;
+ for(i=skip_dc; i<=last_index; i++){
+ const int j = zigzag_direct[i];
+ block[j]=0;
+ }
+ if(block[0]) s->block_last_index[n]= 0;
+ else s->block_last_index[n]= -1;
+}
+
+static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
+{
+ int i;
+ const int maxlevel= s->max_qcoeff;
+ const int minlevel= s->min_qcoeff;
+
+ for(i=0;i<=last_index; i++){
+ const int j = zigzag_direct[i];
+ int level = block[j];
+
+ if (level>maxlevel) level=maxlevel;
+ else if(level<minlevel) level=minlevel;
+ block[j]= level;
+ }
+}
+
+static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
{
- int wrap;
const int mb_x= s->mb_x;
const int mb_y= s->mb_y;
- UINT8 *ptr;
- const int motion_x= s->mv[0][0][0];
- const int motion_y= s->mv[0][0][1];
int i;
+ int skip_dct[6];
+#if 0
+ if (s->interlaced_dct) {
+ dct_linesize = s->linesize * 2;
+ dct_offset = s->linesize;
+ } else {
+ dct_linesize = s->linesize;
+ dct_offset = s->linesize * 8;
+ }
+#endif
+ for(i=0; i<6; i++) skip_dct[i]=0;
- /* get the pixels */
- wrap = s->linesize;
- ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16;
- get_pixels(s->block[0], ptr, wrap);
- get_pixels(s->block[1], ptr + 8, wrap);
- get_pixels(s->block[2], ptr + 8 * wrap, wrap);
- get_pixels(s->block[3], ptr + 8 * wrap + 8, wrap);
- wrap = s->linesize >> 1;
- ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8;
- get_pixels(s->block[4], ptr, wrap);
-
- wrap = s->linesize >> 1;
- ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8;
- get_pixels(s->block[5], ptr, wrap);
-
- /* subtract previous frame if non intra */
- if (!s->mb_intra) {
- int dxy, offset, mx, my;
+ if (s->mb_intra) {
+ UINT8 *ptr;
+ int wrap;
+
+ wrap = s->linesize;
+ ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16;
+ get_pixels(s->block[0], ptr , wrap);
+ get_pixels(s->block[1], ptr + 8, wrap);
+ get_pixels(s->block[2], ptr + 8 * wrap , wrap);
+ get_pixels(s->block[3], ptr + 8 * wrap + 8, wrap);
+
+ if(s->flags&CODEC_FLAG_GRAY){
+ skip_dct[4]= 1;
+ skip_dct[5]= 1;
+ }else{
+ wrap >>=1;
+ ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8;
+ get_pixels(s->block[4], ptr, wrap);
+
+ ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8;
+ get_pixels(s->block[5], ptr, wrap);
+ }
+ }else{
+ op_pixels_func *op_pix;
+ qpel_mc_func *op_qpix;
+ UINT8 *dest_y, *dest_cb, *dest_cr;
+ UINT8 *ptr_y, *ptr_cb, *ptr_cr;
+ int wrap_y, wrap_c;
+
+ dest_y = s->current_picture[0] + (mb_y * 16 * s->linesize ) + mb_x * 16;
+ dest_cb = s->current_picture[1] + (mb_y * 8 * (s->linesize >> 1)) + mb_x * 8;
+ dest_cr = s->current_picture[2] + (mb_y * 8 * (s->linesize >> 1)) + mb_x * 8;
+ wrap_y = s->linesize;
+ wrap_c = wrap_y>>1;
+ ptr_y = s->new_picture[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
+ ptr_cb = s->new_picture[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
+ ptr_cr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
+
+ if ((!s->no_rounding) || s->pict_type==B_TYPE){
+ op_pix = put_pixels_tab;
+ op_qpix= qpel_mc_rnd_tab;
+ }else{
+ op_pix = put_no_rnd_pixels_tab;
+ op_qpix= qpel_mc_no_rnd_tab;
+ }
+
+ if (s->mv_dir & MV_DIR_FORWARD) {
+ MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
+ if ((!s->no_rounding) || s->pict_type==B_TYPE)
+ op_pix = avg_pixels_tab;
+ else
+ op_pix = avg_no_rnd_pixels_tab;
+ }
+ if (s->mv_dir & MV_DIR_BACKWARD) {
+ MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
+ }
+
+ diff_pixels(s->block[0], ptr_y , dest_y , wrap_y);
+ diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
+ diff_pixels(s->block[2], ptr_y + 8 * wrap_y , dest_y + 8 * wrap_y , wrap_y);
+ diff_pixels(s->block[3], ptr_y + 8 * wrap_y + 8, dest_y + 8 * wrap_y + 8, wrap_y);
- if(s->mv_type==MV_TYPE_16X16){
- dxy = ((motion_y & 1) << 1) | (motion_x & 1);
- ptr = s->last_picture[0] +
- ((mb_y * 16 + (motion_y >> 1)) * s->linesize) +
- (mb_x * 16 + (motion_x >> 1));
-
- sub_pixels_2(s->block[0], ptr, s->linesize, dxy);
- sub_pixels_2(s->block[1], ptr + 8, s->linesize, dxy);
- sub_pixels_2(s->block[2], ptr + s->linesize * 8, s->linesize, dxy);
- sub_pixels_2(s->block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy);
-
- if (s->out_format == FMT_H263) {
- /* special rounding for h263 */
- dxy = 0;
- if ((motion_x & 3) != 0)
- dxy |= 1;
- if ((motion_y & 3) != 0)
- dxy |= 2;
- mx = motion_x >> 2;
- my = motion_y >> 2;
- } else {
- mx = motion_x / 2;
- my = motion_y / 2;
- dxy = ((my & 1) << 1) | (mx & 1);
- mx >>= 1;
- my >>= 1;
- }
- offset = ((mb_y * 8 + my) * (s->linesize >> 1)) + (mb_x * 8 + mx);
- ptr = s->last_picture[1] + offset;
- sub_pixels_2(s->block[4], ptr, s->linesize >> 1, dxy);
- ptr = s->last_picture[2] + offset;
- sub_pixels_2(s->block[5], ptr, s->linesize >> 1, dxy);
+ if(s->flags&CODEC_FLAG_GRAY){
+ skip_dct[4]= 1;
+ skip_dct[5]= 1;
}else{
- int src_x, src_y;
-
- for(i=0;i<4;i++) {
- int motion_x = s->mv[0][i][0];
- int motion_y = s->mv[0][i][1];
-
- dxy = ((motion_y & 1) << 1) | (motion_x & 1);
- src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
- src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
-
- ptr = s->last_picture[0] + (src_y * s->linesize) + (src_x);
- sub_pixels_2(s->block[i], ptr, s->linesize, dxy);
- }
- /* In case of 8X8, we construct a single chroma motion vector
- with a special rounding */
- mx = 0;
- my = 0;
- for(i=0;i<4;i++) {
- mx += s->mv[0][i][0];
- my += s->mv[0][i][1];
- }
- if (mx >= 0)
- mx = (h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
- else {
- mx = -mx;
- mx = -(h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
- }
- if (my >= 0)
- my = (h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
- else {
- my = -my;
- my = -(h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
- }
- dxy = ((my & 1) << 1) | (mx & 1);
- mx >>= 1;
- my >>= 1;
-
- src_x = mb_x * 8 + mx;
- src_y = mb_y * 8 + my;
- src_x = clip(src_x, -8, s->width/2);
- if (src_x == s->width/2)
- dxy &= ~1;
- src_y = clip(src_y, -8, s->height/2);
- if (src_y == s->height/2)
- dxy &= ~2;
-
- offset = (src_y * (s->linesize >> 1)) + src_x;
- ptr = s->last_picture[1] + offset;
- sub_pixels_2(s->block[4], ptr, s->linesize >> 1, dxy);
- ptr = s->last_picture[2] + offset;
- sub_pixels_2(s->block[5], ptr, s->linesize >> 1, dxy);
+ diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
+ diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
}
+
+ /* pre quantization */
+ if(s->mc_mb_var[s->mb_width*mb_y+ mb_x]<2*s->qscale*s->qscale){
+ if(pix_abs8x8(ptr_y , dest_y , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
+ if(pix_abs8x8(ptr_y + 8, dest_y + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
+ if(pix_abs8x8(ptr_y + 8*wrap_y , dest_y + 8*wrap_y , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
+ if(pix_abs8x8(ptr_y + 8*wrap_y + 8, dest_y + 8*wrap_y + 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
+ if(pix_abs8x8(ptr_cb , dest_cb , wrap_y) < 20*s->qscale) skip_dct[4]= 1;
+ if(pix_abs8x8(ptr_cr , dest_cr , wrap_y) < 20*s->qscale) skip_dct[5]= 1;
+#if 0
+{
+ static int stat[7];
+ int num=0;
+ for(i=0; i<6; i++)
+ if(skip_dct[i]) num++;
+ stat[num]++;
+
+ if(s->mb_x==0 && s->mb_y==0){
+ for(i=0; i<7; i++){
+ printf("%6d %1d\n", stat[i], i);
+ }
+ }
+}
+#endif
+ }
+
}
#if 0
@@ -1240,17 +1521,47 @@ static void encode_mb(MpegEncContext *s)
}
#endif
/* DCT & quantize */
- if (s->h263_msmpeg4) {
- msmpeg4_dc_scale(s);
- } else if (s->h263_pred) {
+ if (s->h263_pred && !(s->msmpeg4_version==1 || s->msmpeg4_version==2)) {
h263_dc_scale(s);
+ } else if (s->h263_aic) {
+ s->y_dc_scale = 2*s->qscale;
+ s->c_dc_scale = 2*s->qscale;
} else {
/* default quantization values */
s->y_dc_scale = 8;
s->c_dc_scale = 8;
}
- for(i=0;i<6;i++) {
- s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale);
+ if(s->out_format==FMT_MJPEG){
+ for(i=0;i<6;i++) {
+ int overflow;
+ s->block_last_index[i] = dct_quantize(s, s->block[i], i, 8, &overflow);
+ if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
+ }
+ }else{
+ for(i=0;i<6;i++) {
+ if(!skip_dct[i]){
+ int overflow;
+ s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale, &overflow);
+ // FIXME we could decide to change to quantizer instead of clipping
+ // JS: I don't think that would be a good idea it could lower quality instead
+ // of improve it. Just INTRADC clipping deserves changes in quantizer
+ if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
+ }else
+ s->block_last_index[i]= -1;
+ }
+ if(s->luma_elim_threshold && !s->mb_intra)
+ for(i=0; i<4; i++)
+ dct_single_coeff_elimination(s, i, s->luma_elim_threshold, 0);
+ if(s->chroma_elim_threshold && !s->mb_intra)
+ for(i=4; i<6; i++)
+ dct_single_coeff_elimination(s, i, s->chroma_elim_threshold, 1);
+ }
+
+ if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
+ s->block_last_index[4]=
+ s->block_last_index[5]= 0;
+ s->block[4][0]=
+ s->block[5][0]= 128;
}
/* huffman encode */
@@ -1272,14 +1583,113 @@ static void encode_mb(MpegEncContext *s)
}
}
-static void copy_bits(PutBitContext *pb, UINT8 *src, int length)
+void ff_copy_bits(PutBitContext *pb, UINT8 *src, int length)
{
+#if 1
+ int bytes= length>>4;
+ int bits= length&15;
+ int i;
+
+ if(length==0) return;
+
+ for(i=0; i<bytes; i++) put_bits(pb, 16, be2me_16(((uint16_t*)src)[i]));
+ put_bits(pb, bits, be2me_16(((uint16_t*)src)[i])>>(16-bits));
+#else
int bytes= length>>3;
int bits= length&7;
int i;
for(i=0; i<bytes; i++) put_bits(pb, 8, src[i]);
put_bits(pb, bits, src[i]>>(8-bits));
+#endif
+}
+
+static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
+ int i;
+
+ memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
+
+ /* mpeg1 */
+ d->mb_incr= s->mb_incr;
+ for(i=0; i<3; i++)
+ d->last_dc[i]= s->last_dc[i];
+
+ /* statistics */
+ d->mv_bits= s->mv_bits;
+ d->i_tex_bits= s->i_tex_bits;
+ d->p_tex_bits= s->p_tex_bits;
+ d->i_count= s->i_count;
+ d->p_count= s->p_count;
+ d->skip_count= s->skip_count;
+ d->misc_bits= s->misc_bits;
+ d->last_bits= 0;
+
+ d->mb_skiped= s->mb_skiped;
+}
+
+static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
+ int i;
+
+ memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
+ memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
+
+ /* mpeg1 */
+ d->mb_incr= s->mb_incr;
+ for(i=0; i<3; i++)
+ d->last_dc[i]= s->last_dc[i];
+
+ /* statistics */
+ d->mv_bits= s->mv_bits;
+ d->i_tex_bits= s->i_tex_bits;
+ d->p_tex_bits= s->p_tex_bits;
+ d->i_count= s->i_count;
+ d->p_count= s->p_count;
+ d->skip_count= s->skip_count;
+ d->misc_bits= s->misc_bits;
+
+ d->mb_intra= s->mb_intra;
+ d->mb_skiped= s->mb_skiped;
+ d->mv_type= s->mv_type;
+ d->mv_dir= s->mv_dir;
+ d->pb= s->pb;
+ if(s->data_partitioning){
+ d->pb2= s->pb2;
+ d->tex_pb= s->tex_pb;
+ }
+ d->block= s->block;
+ for(i=0; i<6; i++)
+ d->block_last_index[i]= s->block_last_index[i];
+}
+
+static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
+ PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
+ int *dmin, int *next_block, int motion_x, int motion_y)
+{
+ int bits_count;
+
+ copy_context_before_encode(s, backup, type);
+
+ s->block= s->blocks[*next_block];
+ s->pb= pb[*next_block];
+ if(s->data_partitioning){
+ s->pb2 = pb2 [*next_block];
+ s->tex_pb= tex_pb[*next_block];
+ }
+
+ encode_mb(s, motion_x, motion_y);
+
+ bits_count= get_bit_count(&s->pb);
+ if(s->data_partitioning){
+ bits_count+= get_bit_count(&s->pb2);
+ bits_count+= get_bit_count(&s->tex_pb);
+ }
+
+ if(bits_count<*dmin){
+ *dmin= bits_count;
+ *next_block^=1;
+
+ copy_context_after_encode(best, s, type);
+ }
}
static void encode_picture(MpegEncContext *s, int picture_number)
@@ -1287,8 +1697,17 @@ static void encode_picture(MpegEncContext *s, int picture_number)
int mb_x, mb_y, last_gob, pdif = 0;
int i;
int bits;
- MpegEncContext best_s;
- UINT8 bit_buf[4][3000]; //FIXME check that this is ALLWAYS large enogh for a MB
+ MpegEncContext best_s, backup_s;
+ UINT8 bit_buf[2][3000];
+ UINT8 bit_buf2[2][3000];
+ UINT8 bit_buf_tex[2][3000];
+ PutBitContext pb[2], pb2[2], tex_pb[2];
+
+ for(i=0; i<2; i++){
+ init_put_bits(&pb [i], bit_buf [i], 3000, NULL, NULL);
+ init_put_bits(&pb2 [i], bit_buf2 [i], 3000, NULL, NULL);
+ init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000, NULL, NULL);
+ }
s->picture_number = picture_number;
@@ -1299,12 +1718,16 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->block_wrap[4]=
s->block_wrap[5]= s->mb_width + 2;
- s->last_mc_mb_var = s->mc_mb_var;
/* Reset the average MB variance */
- s->avg_mb_var = 0;
- s->mc_mb_var = 0;
+ s->mb_var_sum = 0;
+ s->mc_mb_var_sum = 0;
+
+ /* we need to initialize some time vars before we can encode b-frames */
+ if (s->h263_pred && !s->h263_msmpeg4)
+ ff_set_mpeg4_time(s, s->picture_number);
+
/* Estimate motion for every MB */
- if(s->pict_type == P_TYPE){
+ if(s->pict_type != I_TYPE){
for(mb_y=0; mb_y < s->mb_height; mb_y++) {
s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
@@ -1319,127 +1742,61 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->block_index[3]+=2;
/* compute motion vector & mb_type and store in context */
- estimate_motion(s, mb_x, mb_y);
+ if(s->pict_type==B_TYPE)
+ ff_estimate_b_frame_motion(s, mb_x, mb_y);
+ else
+ ff_estimate_p_frame_motion(s, mb_x, mb_y);
// s->mb_type[mb_y*s->mb_width + mb_x]=MB_TYPE_INTER;
}
}
emms_c();
- }else{
+ }else /* if(s->pict_type == I_TYPE) */{
/* I-Frame */
//FIXME do we need to zero them?
memset(s->motion_val[0], 0, sizeof(INT16)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
- memset(s->mv_table[0] , 0, sizeof(INT16)*s->mb_width*s->mb_height);
- memset(s->mv_table[1] , 0, sizeof(INT16)*s->mb_width*s->mb_height);
+ memset(s->p_mv_table , 0, sizeof(INT16)*(s->mb_width+2)*(s->mb_height+2)*2);
memset(s->mb_type , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
}
- if(s->avg_mb_var < s->mc_mb_var && s->pict_type != B_TYPE && (!s->force_type)){ //FIXME subtract MV bits
+ if(s->mb_var_sum < s->mc_mb_var_sum && s->pict_type == P_TYPE){ //FIXME subtract MV bits
s->pict_type= I_TYPE;
- s->picture_in_gop_number=0;
memset(s->mb_type , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
+ if(s->max_b_frames==0){
+ s->input_pict_type= I_TYPE;
+ s->input_picture_in_gop_number=0;
+ }
//printf("Scene change detected, encoding as I Frame\n");
}
-
- /* find best f_code for ME which do unlimited searches */
- if(s->pict_type==P_TYPE && s->full_search>3){
- int mv_num[8];
- int i;
- int loose=0;
- UINT8 * fcode_tab= s->fcode_tab;
-
- for(i=0; i<8; i++) mv_num[i]=0;
-
- for(i=0; i<s->mb_num; i++){
- if(s->mb_type[i] & MB_TYPE_INTER){
- mv_num[ fcode_tab[s->mv_table[0][i] + MAX_MV] ]++;
- mv_num[ fcode_tab[s->mv_table[1][i] + MAX_MV] ]++;
-//printf("%d %d %d\n", s->mv_table[0][i], fcode_tab[s->mv_table[0][i] + MAX_MV], i);
- }
-//else printf("I");
- }
-
- for(i=MAX_FCODE; i>1; i--){
- loose+= mv_num[i];
- if(loose > 10) break; //FIXME this is pretty ineffective
- }
- s->f_code= i;
-/* for(i=0; i<=MAX_FCODE; i++){
- printf("%d ", mv_num[i]);
- }
- printf("\n");*/
- }else{
- s->f_code= 1;
+
+ if(s->pict_type==P_TYPE || s->pict_type==S_TYPE)
+ s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
+ ff_fix_long_p_mvs(s);
+ if(s->pict_type==B_TYPE){
+ s->f_code= ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
+ s->b_code= ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
+
+ ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
+ ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
+ ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
+ ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
}
-
+
//printf("f_code %d ///\n", s->f_code);
- /* convert MBs with too long MVs to I-Blocks */
- if(s->pict_type==P_TYPE){
- int i, x, y;
- const int f_code= s->f_code;
- UINT8 * fcode_tab= s->fcode_tab;
-//FIXME try to clip instead of intra izing ;)
- /* clip / convert to intra 16x16 type MVs */
- for(i=0; i<s->mb_num; i++){
- if(s->mb_type[i]&MB_TYPE_INTER){
- if( fcode_tab[s->mv_table[0][i] + MAX_MV] > f_code
- || fcode_tab[s->mv_table[0][i] + MAX_MV] == 0
- || fcode_tab[s->mv_table[1][i] + MAX_MV] > f_code
- || fcode_tab[s->mv_table[1][i] + MAX_MV] == 0 ){
- s->mb_type[i] &= ~MB_TYPE_INTER;
- s->mb_type[i] |= MB_TYPE_INTRA;
- s->mv_table[0][i] = 0;
- s->mv_table[1][i] = 0;
- }
- }
- }
-
- if(s->flags&CODEC_FLAG_4MV){
- int wrap= 2+ s->mb_width*2;
-
- /* clip / convert to intra 8x8 type MVs */
- for(y=0; y<s->mb_height; y++){
- int xy= (y*2 + 1)*wrap + 1;
- i= y*s->mb_width;
-
- for(x=0; x<s->mb_width; x++){
- if(s->mb_type[i]&MB_TYPE_INTER4V){
- int block;
- for(block=0; block<4; block++){
- int off= (block& 1) + (block>>1)*wrap;
- int mx= s->motion_val[ xy + off ][0];
- int my= s->motion_val[ xy + off ][1];
-
- if( fcode_tab[mx + MAX_MV] > f_code
- || fcode_tab[mx + MAX_MV] == 0
- || fcode_tab[my + MAX_MV] > f_code
- || fcode_tab[my + MAX_MV] == 0 ){
- s->mb_type[i] &= ~MB_TYPE_INTER4V;
- s->mb_type[i] |= MB_TYPE_INTRA;
- }
- }
- xy+=2;
- i++;
- }
- }
- }
- }
- }
// printf("%d %d\n", s->avg_mb_var, s->mc_mb_var);
- if (!s->fixed_qscale)
- s->qscale = rate_estimate_qscale(s);
+ if(s->flags&CODEC_FLAG_PASS2)
+ s->qscale = ff_rate_estimate_qscale_pass2(s);
+ else if (!s->fixed_qscale)
+ s->qscale = ff_rate_estimate_qscale(s);
- /* precompute matrix */
if (s->out_format == FMT_MJPEG) {
/* for mjpeg, we do include qscale in the matrix */
s->intra_matrix[0] = default_intra_matrix[0];
for(i=1;i<64;i++)
- s->intra_matrix[i] = (default_intra_matrix[i] * s->qscale) >> 3;
- convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, 8);
- } else {
- convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, s->qscale);
- convert_matrix(s->q_non_intra_matrix, s->q_non_intra_matrix16, s->non_intra_matrix, s->qscale);
+ s->intra_matrix[i] = CLAMP_TO_8BIT((default_intra_matrix[i] * s->qscale) >> 3);
+ convert_matrix(s->q_intra_matrix, s->q_intra_matrix16,
+ s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias);
}
s->last_bits= get_bit_count(&s->pb);
@@ -1489,21 +1846,31 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->gob_index = 2;
else
s->gob_index = 4;
+ }else if(s->codec_id==CODEC_ID_MPEG4){
+ s->gob_index = 1;
}
-
- s->avg_mb_var = s->avg_mb_var / s->mb_num;
-
+
+ if(s->codec_id==CODEC_ID_MPEG4 && s->data_partitioning && s->pict_type!=B_TYPE)
+ ff_mpeg4_init_partitions(s);
+
+ s->resync_mb_x=0;
+ s->resync_mb_y=0;
for(mb_y=0; mb_y < s->mb_height; mb_y++) {
- /* Put GOB header based on RTP MTU */
+ /* Put GOB header based on RTP MTU for formats which support it per line (H263*)*/
/* TODO: Put all this stuff in a separate generic function */
if (s->rtp_mode) {
if (!mb_y) {
s->ptr_lastgob = s->pb.buf;
s->ptr_last_mb_line = s->pb.buf;
} else if (s->out_format == FMT_H263 && !s->h263_pred && !s->h263_msmpeg4 && !(mb_y % s->gob_index)) {
+ // MN: we could move the space check from h263 -> here, as its not h263 specific
last_gob = h263_encode_gob_header(s, mb_y);
if (last_gob) {
- s->first_gob_line = 1;
+ s->first_slice_line = 1;
+ }else{
+ /*MN: we reset it here instead at the end of each line cuz mpeg4 can have
+ slice lines starting & ending in the middle*/
+ s->first_slice_line = 0;
}
}
}
@@ -1516,10 +1883,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2);
for(mb_x=0; mb_x < s->mb_width; mb_x++) {
const int mb_type= s->mb_type[mb_y * s->mb_width + mb_x];
- PutBitContext pb;
- int d;
+ const int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
+// int d;
int dmin=10000000;
- int best=0;
s->mb_x = mb_x;
s->mb_y = mb_y;
@@ -1529,114 +1895,216 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->block_index[3]+=2;
s->block_index[4]++;
s->block_index[5]++;
+
+ /* write gob / video packet header for formats which support it at any MB (MPEG4) */
+ if(s->rtp_mode && s->mb_y>0 && s->codec_id==CODEC_ID_MPEG4){
+ int pdif= pbBufPtr(&s->pb) - s->ptr_lastgob;
+
+ //the *2 is there so we stay below the requested size
+ if(pdif + s->mb_line_avgsize/s->mb_width >= s->rtp_payload_size){
+ if(s->codec_id==CODEC_ID_MPEG4){
+ if(s->data_partitioning && s->pict_type!=B_TYPE){
+ ff_mpeg4_merge_partitions(s);
+ ff_mpeg4_init_partitions(s);
+ }
+ ff_mpeg4_encode_video_packet_header(s);
+
+ if(s->flags&CODEC_FLAG_PASS1){
+ int bits= get_bit_count(&s->pb);
+ s->misc_bits+= bits - s->last_bits;
+ s->last_bits= bits;
+ }
+ ff_mpeg4_clean_buffers(s);
+ }
+ s->ptr_lastgob = pbBufPtr(&s->pb);
+ s->first_slice_line=1;
+ s->resync_mb_x=mb_x;
+ s->resync_mb_y=mb_y;
+ }
+
+ if( (s->resync_mb_x == s->mb_x)
+ && s->resync_mb_y+1 == s->mb_y){
+ s->first_slice_line=0;
+ }
+ }
- s->mv_dir = MV_DIR_FORWARD;
if(mb_type & (mb_type-1)){ // more than 1 MB type possible
- pb= s->pb;
+ int next_block=0;
+ int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
+
+ copy_context_before_encode(&backup_s, s, -1);
+ backup_s.pb= s->pb;
+ best_s.data_partitioning= s->data_partitioning;
+ if(s->data_partitioning){
+ backup_s.pb2= s->pb2;
+ backup_s.tex_pb= s->tex_pb;
+ }
+
if(mb_type&MB_TYPE_INTER){
+ s->mv_dir = MV_DIR_FORWARD;
s->mv_type = MV_TYPE_16X16;
s->mb_intra= 0;
- s->mv[0][0][0] = s->mv_table[0][mb_y * s->mb_width + mb_x];
- s->mv[0][0][1] = s->mv_table[1][mb_y * s->mb_width + mb_x];
- init_put_bits(&s->pb, bit_buf[1], 3000, NULL, NULL);
- s->block= s->inter_block;
-
- encode_mb(s);
- d= get_bit_count(&s->pb);
- if(d<dmin){
- flush_put_bits(&s->pb);
- dmin=d;
- best_s.mv[0][0][0]= s->mv[0][0][0];
- best_s.mv[0][0][1]= s->mv[0][0][1];
- best_s.mb_intra= 0;
- best_s.mv_type = MV_TYPE_16X16;
- best_s.pb=s->pb;
- best_s.block= s->block;
- best=1;
- for(i=0; i<6; i++)
- best_s.block_last_index[i]= s->block_last_index[i];
- }
+ s->mv[0][0][0] = s->p_mv_table[xy][0];
+ s->mv[0][0][1] = s->p_mv_table[xy][1];
+ encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb,
+ &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
}
- if(mb_type&MB_TYPE_INTER4V){
+ if(mb_type&MB_TYPE_INTER4V){
+ s->mv_dir = MV_DIR_FORWARD;
s->mv_type = MV_TYPE_8X8;
s->mb_intra= 0;
for(i=0; i<4; i++){
s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
}
- init_put_bits(&s->pb, bit_buf[2], 3000, NULL, NULL);
- s->block= s->inter4v_block;
-
- encode_mb(s);
- d= get_bit_count(&s->pb);
- if(d<dmin){
- flush_put_bits(&s->pb);
- dmin=d;
- for(i=0; i<4; i++){
- best_s.mv[0][i][0] = s->mv[0][i][0];
- best_s.mv[0][i][1] = s->mv[0][i][1];
- }
- best_s.mb_intra= 0;
- best_s.mv_type = MV_TYPE_8X8;
- best_s.pb=s->pb;
- best_s.block= s->block;
- best=2;
- for(i=0; i<6; i++)
- best_s.block_last_index[i]= s->block_last_index[i];
- }
+ encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb,
+ &dmin, &next_block, 0, 0);
+ }
+ if(mb_type&MB_TYPE_FORWARD){
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mv_type = MV_TYPE_16X16;
+ s->mb_intra= 0;
+ s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
+ s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
+ encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb,
+ &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
+ }
+ if(mb_type&MB_TYPE_BACKWARD){
+ s->mv_dir = MV_DIR_BACKWARD;
+ s->mv_type = MV_TYPE_16X16;
+ s->mb_intra= 0;
+ s->mv[1][0][0] = s->b_back_mv_table[xy][0];
+ s->mv[1][0][1] = s->b_back_mv_table[xy][1];
+ encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb,
+ &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
+ }
+ if(mb_type&MB_TYPE_BIDIR){
+ s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
+ s->mv_type = MV_TYPE_16X16;
+ s->mb_intra= 0;
+ s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
+ s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
+ s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
+ s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
+ encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb,
+ &dmin, &next_block, 0, 0);
+ }
+ if(mb_type&MB_TYPE_DIRECT){
+ s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+ s->mv_type = MV_TYPE_16X16; //FIXME
+ s->mb_intra= 0;
+ s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
+ s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
+ s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
+ s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
+ encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb,
+ &dmin, &next_block, s->b_direct_mv_table[xy][0], s->b_direct_mv_table[xy][1]);
}
if(mb_type&MB_TYPE_INTRA){
+ s->mv_dir = MV_DIR_FORWARD;
s->mv_type = MV_TYPE_16X16;
s->mb_intra= 1;
s->mv[0][0][0] = 0;
s->mv[0][0][1] = 0;
- init_put_bits(&s->pb, bit_buf[0], 3000, NULL, NULL);
- s->block= s->intra_block;
-
- encode_mb(s);
- d= get_bit_count(&s->pb);
- if(d<dmin){
- flush_put_bits(&s->pb);
- dmin=d;
- best_s.mv[0][0][0]= 0;
- best_s.mv[0][0][1]= 0;
- best_s.mb_intra= 1;
- best_s.mv_type = MV_TYPE_16X16;
- best_s.pb=s->pb;
- best_s.block= s->block;
- for(i=0; i<6; i++)
- best_s.block_last_index[i]= s->block_last_index[i];
- best=0;
- }
- /* force cleaning of ac/dc if needed ... */
- s->mbintra_table[mb_x + mb_y*s->mb_width]=1;
+ encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb,
+ &dmin, &next_block, 0, 0);
+ /* force cleaning of ac/dc pred stuff if needed ... */
+ if(s->h263_pred || s->h263_aic)
+ s->mbintra_table[mb_x + mb_y*s->mb_width]=1;
}
- for(i=0; i<4; i++){
- s->mv[0][i][0] = best_s.mv[0][i][0];
- s->mv[0][i][1] = best_s.mv[0][i][1];
+ copy_context_after_encode(s, &best_s, -1);
+
+ pb_bits_count= get_bit_count(&s->pb);
+ flush_put_bits(&s->pb);
+ ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
+ s->pb= backup_s.pb;
+
+ if(s->data_partitioning){
+ pb2_bits_count= get_bit_count(&s->pb2);
+ flush_put_bits(&s->pb2);
+ ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
+ s->pb2= backup_s.pb2;
+
+ tex_pb_bits_count= get_bit_count(&s->tex_pb);
+ flush_put_bits(&s->tex_pb);
+ ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
+ s->tex_pb= backup_s.tex_pb;
}
- s->mb_intra= best_s.mb_intra;
- s->mv_type= best_s.mv_type;
- for(i=0; i<6; i++)
- s->block_last_index[i]= best_s.block_last_index[i];
- copy_bits(&pb, bit_buf[best], dmin);
- s->block= best_s.block;
- s->pb= pb;
+ s->last_bits= get_bit_count(&s->pb);
} else {
+ int motion_x, motion_y;
+ s->mv_type=MV_TYPE_16X16;
// only one MB-Type possible
- if(mb_type&MB_TYPE_INTRA){
+ switch(mb_type){
+ case MB_TYPE_INTRA:
+ s->mv_dir = MV_DIR_FORWARD;
s->mb_intra= 1;
- s->mv[0][0][0] = 0;
- s->mv[0][0][1] = 0;
- }else{
+ motion_x= s->mv[0][0][0] = 0;
+ motion_y= s->mv[0][0][1] = 0;
+ break;
+ case MB_TYPE_INTER:
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mb_intra= 0;
+ motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
+ motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
+ break;
+ case MB_TYPE_INTER4V:
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mv_type = MV_TYPE_8X8;
s->mb_intra= 0;
- s->mv[0][0][0] = s->mv_table[0][mb_y * s->mb_width + mb_x];
- s->mv[0][0][1] = s->mv_table[1][mb_y * s->mb_width + mb_x];
+ for(i=0; i<4; i++){
+ s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
+ s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
+ }
+ motion_x= motion_y= 0;
+ break;
+ case MB_TYPE_DIRECT:
+ s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+ s->mb_intra= 0;
+ motion_x=s->b_direct_mv_table[xy][0];
+ motion_y=s->b_direct_mv_table[xy][1];
+ s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
+ s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
+ s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
+ s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
+ break;
+ case MB_TYPE_BIDIR:
+ s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
+ s->mb_intra= 0;
+ motion_x=0;
+ motion_y=0;
+ s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
+ s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
+ s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
+ s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
+ break;
+ case MB_TYPE_BACKWARD:
+ s->mv_dir = MV_DIR_BACKWARD;
+ s->mb_intra= 0;
+ motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
+ motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
+ break;
+ case MB_TYPE_FORWARD:
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mb_intra= 0;
+ motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
+ motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
+// printf(" %d %d ", motion_x, motion_y);
+ break;
+ default:
+ motion_x=motion_y=0; //gcc warning fix
+ printf("illegal MB type\n");
}
- encode_mb(s);
+ encode_mb(s, motion_x, motion_y);
+ }
+ /* clean the MV table in IPS frames for direct mode in B frames */
+ if(s->mb_intra /* && I,P,S_TYPE */){
+ s->p_mv_table[xy][0]=0;
+ s->p_mv_table[xy][1]=0;
}
MPV_decode_mb(s, s->block);
+//printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_width, get_bit_count(&s->pb));
}
@@ -1650,14 +2118,20 @@ static void encode_picture(MpegEncContext *s, int picture_number)
}
//fprintf(stderr, "\nMB line: %d\tSize: %u\tAvg. Size: %u", s->mb_y,
// (s->pb.buf_ptr - s->ptr_last_mb_line), s->mb_line_avgsize);
- s->first_gob_line = 0;
+ if(s->codec_id!=CODEC_ID_MPEG4) s->first_slice_line = 0; //FIXME clean
}
}
emms_c();
- if (s->h263_msmpeg4 && s->pict_type == I_TYPE)
+ if(s->codec_id==CODEC_ID_MPEG4 && s->data_partitioning && s->pict_type!=B_TYPE)
+ ff_mpeg4_merge_partitions(s);
+
+ if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
msmpeg4_encode_ext_header(s);
+ if(s->codec_id==CODEC_ID_MPEG4)
+ ff_mpeg4_stuffing(&s->pb);
+
//if (s->gob_number)
// fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
@@ -1675,30 +2149,14 @@ static void encode_picture(MpegEncContext *s, int picture_number)
static int dct_quantize_c(MpegEncContext *s,
DCTELEM *block, int n,
- int qscale)
+ int qscale, int *overflow)
{
int i, j, level, last_non_zero, q;
const int *qmat;
- int minLevel, maxLevel;
-
- if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){
- /* mpeg4 */
- minLevel= -2048;
- maxLevel= 2047;
- }else if(s->out_format==FMT_MPEG1){
- /* mpeg1 */
- minLevel= -255;
- maxLevel= 255;
- }else if(s->out_format==FMT_MJPEG){
- /* (m)jpeg */
- minLevel= -1023;
- maxLevel= 1023;
- }else{
- /* h263 / msmpeg4 */
- minLevel= -128;
- maxLevel= 127;
- }
-
+ int bias;
+ int max=0;
+ unsigned int threshold1, threshold2;
+
av_fdct (block);
/* we need this permutation so that we correct the IDCT
@@ -1706,81 +2164,54 @@ static int dct_quantize_c(MpegEncContext *s,
block_permute(block);
if (s->mb_intra) {
- if (n < 4)
- q = s->y_dc_scale;
- else
- q = s->c_dc_scale;
- q = q << 3;
-
+ if (!s->h263_aic) {
+ if (n < 4)
+ q = s->y_dc_scale;
+ else
+ q = s->c_dc_scale;
+ q = q << 3;
+ } else
+ /* For AIC we skip quant/dequant of INTRADC */
+ q = 1 << 3;
+
/* note: block[0] is assumed to be positive */
block[0] = (block[0] + (q >> 1)) / q;
i = 1;
last_non_zero = 0;
- if (s->out_format == FMT_H263) {
- qmat = s->q_non_intra_matrix;
- } else {
- qmat = s->q_intra_matrix;
- }
+ qmat = s->q_intra_matrix[qscale];
+ bias= s->intra_quant_bias<<(QMAT_SHIFT - 3 - QUANT_BIAS_SHIFT);
} else {
i = 0;
last_non_zero = -1;
- qmat = s->q_non_intra_matrix;
+ qmat = s->q_inter_matrix[qscale];
+ bias= s->inter_quant_bias<<(QMAT_SHIFT - 3 - QUANT_BIAS_SHIFT);
}
+ threshold1= (1<<(QMAT_SHIFT - 3)) - bias - 1;
+ threshold2= threshold1<<1;
for(;i<64;i++) {
j = zigzag_direct[i];
level = block[j];
level = level * qmat[j];
-#ifdef PARANOID
- {
- static int count = 0;
- int level1, level2, qmat1;
- double val;
- if (qmat == s->q_non_intra_matrix) {
- qmat1 = default_non_intra_matrix[j] * s->qscale;
- } else {
- qmat1 = default_intra_matrix[j] * s->qscale;
- }
- if (av_fdct != jpeg_fdct_ifast)
- val = ((double)block[j] * 8.0) / (double)qmat1;
- else
- val = ((double)block[j] * 8.0 * 2048.0) /
- ((double)qmat1 * aanscales[j]);
- level1 = (int)val;
- level2 = level / (1 << (QMAT_SHIFT - 3));
- if (level1 != level2) {
- fprintf(stderr, "%d: quant error qlevel=%d wanted=%d level=%d qmat1=%d qmat=%d wantedf=%0.6f\n",
- count, level2, level1, block[j], qmat1, qmat[j],
- val);
- count++;
- }
- }
-#endif
- /* XXX: slight error for the low range. Test should be equivalent to
- (level <= -(1 << (QMAT_SHIFT - 3)) || level >= (1 <<
- (QMAT_SHIFT - 3)))
- */
- if (((level << (31 - (QMAT_SHIFT - 3))) >> (31 - (QMAT_SHIFT - 3))) !=
- level) {
- level = level / (1 << (QMAT_SHIFT - 3));
- /* XXX: currently, this code is not optimal. the range should be:
- mpeg1: -255..255
- mpeg2: -2048..2047
- h263: -128..127
- mpeg4: -2048..2047
- */
- if (level > maxLevel)
- level = maxLevel;
- else if (level < minLevel)
- level = minLevel;
-
- block[j] = level;
+// if( bias+level >= (1<<(QMAT_SHIFT - 3))
+// || bias-level >= (1<<(QMAT_SHIFT - 3))){
+ if(((unsigned)(level+threshold1))>threshold2){
+ if(level>0){
+ level= (bias + level)>>(QMAT_SHIFT - 3);
+ block[j]= level;
+ }else{
+ level= (bias - level)>>(QMAT_SHIFT - 3);
+ block[j]= -level;
+ }
+ max |=level;
last_non_zero = i;
- } else {
- block[j] = 0;
+ }else{
+ block[j]=0;
}
}
+ *overflow= s->max_qcoeff < max; //overflow might have happend
+
return last_non_zero;
}
@@ -1822,7 +2253,7 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
}
} else {
i = 0;
- quant_matrix = s->non_intra_matrix;
+ quant_matrix = s->inter_matrix;
for(;i<nCoeffs;i++) {
int j= zigzag_direct[i];
level = block[j];
@@ -1848,6 +2279,69 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
}
}
+static void dct_unquantize_mpeg2_c(MpegEncContext *s,
+ DCTELEM *block, int n, int qscale)
+{
+ int i, level, nCoeffs;
+ const UINT16 *quant_matrix;
+
+ if(s->alternate_scan) nCoeffs= 64;
+ else nCoeffs= s->block_last_index[n]+1;
+
+ if (s->mb_intra) {
+ if (n < 4)
+ block[0] = block[0] * s->y_dc_scale;
+ else
+ block[0] = block[0] * s->c_dc_scale;
+ quant_matrix = s->intra_matrix;
+ for(i=1;i<nCoeffs;i++) {
+ int j= zigzag_direct[i];
+ level = block[j];
+ if (level) {
+ if (level < 0) {
+ level = -level;
+ level = (int)(level * qscale * quant_matrix[j]) >> 3;
+ level = -level;
+ } else {
+ level = (int)(level * qscale * quant_matrix[j]) >> 3;
+ }
+#ifdef PARANOID
+ if (level < -2048 || level > 2047)
+ fprintf(stderr, "unquant error %d %d\n", i, level);
+#endif
+ block[j] = level;
+ }
+ }
+ } else {
+ int sum=-1;
+ i = 0;
+ quant_matrix = s->inter_matrix;
+ for(;i<nCoeffs;i++) {
+ int j= zigzag_direct[i];
+ level = block[j];
+ if (level) {
+ if (level < 0) {
+ level = -level;
+ level = (((level << 1) + 1) * qscale *
+ ((int) (quant_matrix[j]))) >> 4;
+ level = -level;
+ } else {
+ level = (((level << 1) + 1) * qscale *
+ ((int) (quant_matrix[j]))) >> 4;
+ }
+#ifdef PARANOID
+ if (level < -2048 || level > 2047)
+ fprintf(stderr, "unquant error %d %d\n", i, level);
+#endif
+ block[j] = level;
+ sum+=level;
+ }
+ }
+ block[63]^=sum&1;
+ }
+}
+
+
static void dct_unquantize_h263_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
@@ -1891,188 +2385,117 @@ static void dct_unquantize_h263_c(MpegEncContext *s,
}
}
-/* rate control */
-
-/* an I frame is I_FRAME_SIZE_RATIO bigger than a P frame */
-#define I_FRAME_SIZE_RATIO 3.0
-#define QSCALE_K 20
-
-static void rate_control_init(MpegEncContext *s)
+static void remove_ac(MpegEncContext *s, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int mb_x, int mb_y)
{
-#if 1
- emms_c();
-
- //initial values, they dont really matter as they will be totally different within a few frames
- s->i_pred.coeff= s->p_pred.coeff= 7.0;
- s->i_pred.count= s->p_pred.count= 1.0;
-
- s->i_pred.decay= s->p_pred.decay= 0.4;
-
- // use more bits at the beginning, otherwise high motion at the begin will look like shit
- s->qsum=100;
- s->qcount=100;
-
- s->short_term_qsum=0.001;
- s->short_term_qcount=0.001;
-#else
- s->wanted_bits = 0;
-
- if (s->intra_only) {
- s->I_frame_bits = ((INT64)s->bit_rate * FRAME_RATE_BASE) / s->frame_rate;
- s->P_frame_bits = s->I_frame_bits;
- } else {
- s->P_frame_bits = (int) ((float)(s->gop_size * s->bit_rate) /
- (float)((float)s->frame_rate / FRAME_RATE_BASE * (I_FRAME_SIZE_RATIO + s->gop_size - 1)));
- s->I_frame_bits = (int)(s->P_frame_bits * I_FRAME_SIZE_RATIO);
+ int dc, dcb, dcr, y, i;
+ for(i=0; i<4; i++){
+ dc= s->dc_val[0][mb_x*2+1 + (i&1) + (mb_y*2+1 + (i>>1))*(s->mb_width*2+2)];
+ for(y=0; y<8; y++){
+ int x;
+ for(x=0; x<8; x++){
+ dest_y[x + (i&1)*8 + (y + (i>>1)*8)*s->linesize]= dc/8;
+ }
+ }
+ }
+ dcb = s->dc_val[1][mb_x+1 + (mb_y+1)*(s->mb_width+2)];
+ dcr= s->dc_val[2][mb_x+1 + (mb_y+1)*(s->mb_width+2)];
+ for(y=0; y<8; y++){
+ int x;
+ for(x=0; x<8; x++){
+ dest_cb[x + y*(s->linesize>>1)]= dcb/8;
+ dest_cr[x + y*(s->linesize>>1)]= dcr/8;
+ }
}
-
-#if defined(DEBUG)
- printf("I_frame_size=%d P_frame_size=%d\n",
- s->I_frame_bits, s->P_frame_bits);
-#endif
-#endif
-}
-
-static double predict(Predictor *p, double q, double var)
-{
- return p->coeff*var / (q*p->count);
-}
-
-static void update_predictor(Predictor *p, double q, double var, double size)
-{
- double new_coeff= size*q / (var + 1);
- if(var<1000) return;
-/*{
-int pred= predict(p, q, var);
-int error= abs(pred-size);
-static double sum=0;
-static int count=0;
-if(count>5) sum+=error;
-count++;
-if(256*256*256*64%count==0){
- printf("%d %f %f\n", count, sum/count, p->coeff);
-}
-}*/
- p->count*= p->decay;
- p->coeff*= p->decay;
- p->count++;
- p->coeff+= new_coeff;
}
-static int rate_estimate_qscale(MpegEncContext *s)
+/**
+ * will conceal past errors, and allso drop b frames if needed
+ *
+ */
+void ff_conceal_past_errors(MpegEncContext *s, int unknown_pos)
{
-#if 1
- int qmin= s->qmin;
- int qmax= s->qmax;
- int rate_q=5;
- float q;
- int qscale;
- float br_compensation;
- double diff;
- double short_term_q;
- double long_term_q;
- int last_qscale= s->qscale;
- double fps;
- INT64 wanted_bits;
- emms_c();
-
- fps= (double)s->frame_rate / FRAME_RATE_BASE;
- wanted_bits= s->bit_rate*(double)s->picture_number/fps;
-
+ int mb_x= s->mb_x;
+ int mb_y= s->mb_y;
+ int mb_dist=0;
+ int i, intra_count=0, inter_count=0;
+ int intra_conceal= s->msmpeg4_version ? 50 : 50; //FIXME finetune
+ int inter_conceal= s->msmpeg4_version ? 50 : 50;
- if(s->picture_number>2){
- /* update predictors */
- if(s->last_pict_type == I_TYPE){
- //FIXME
- }else{ //P Frame
-//printf("%d %d %d %f\n", s->qscale, s->last_mc_mb_var, s->frame_bits, s->p_pred.coeff);
- update_predictor(&s->p_pred, s->qscale, s->last_mc_mb_var, s->frame_bits);
- }
- }
+ // for last block
+ if(mb_x>=s->mb_width) mb_x= s->mb_width -1;
+ if(mb_y>=s->mb_height) mb_y= s->mb_height-1;
- if(s->pict_type == I_TYPE){
- //FIXME
- rate_q= s->qsum/s->qcount;
- }else{ //P Frame
- int i;
- int diff, best_diff=1000000000;
- for(i=1; i<=31; i++){
- diff= predict(&s->p_pred, i, s->mc_mb_var) - (double)s->bit_rate/fps;
- if(diff<0) diff= -diff;
- if(diff<best_diff){
- best_diff= diff;
- rate_q= i;
- }
- }
+ if(s->decoding_error==0 && unknown_pos){
+ if(s->data_partitioning && s->pict_type!=B_TYPE)
+ s->decoding_error= DECODING_AC_LOST;
+ else
+ s->decoding_error= DECODING_DESYNC;
}
- s->short_term_qsum*=s->qblur;
- s->short_term_qcount*=s->qblur;
+ if(s->decoding_error==DECODING_DESYNC && s->pict_type!=B_TYPE) s->next_p_frame_damaged=1;
- s->short_term_qsum+= rate_q;
- s->short_term_qcount++;
- short_term_q= s->short_term_qsum/s->short_term_qcount;
+ for(i=mb_x + mb_y*s->mb_width; i>=0; i--){
+ if(s->mbintra_table[i]) intra_count++;
+ else inter_count++;
+ }
- long_term_q= s->qsum/s->qcount*(s->total_bits+1)/(wanted_bits+1); //+1 to avoid nan & 0
-
-// q= (long_term_q - short_term_q)*s->qcompress + short_term_q;
- q= 1/((1/long_term_q - 1/short_term_q)*s->qcompress + 1/short_term_q);
+ if(s->decoding_error==DECODING_AC_LOST){
+ intra_conceal*=2;
+ inter_conceal*=2;
+ }else if(s->decoding_error==DECODING_ACDC_LOST){
+ intra_conceal*=2;
+ inter_conceal*=2;
+ }
- diff= s->total_bits - wanted_bits;
- br_compensation= (s->bit_rate_tolerance - diff)/s->bit_rate_tolerance;
- if(br_compensation<=0.0) br_compensation=0.001;
- q/=br_compensation;
+ if(unknown_pos && (intra_count<inter_count)){
+ intra_conceal= inter_conceal= s->mb_num;
+// printf("%d %d\n",intra_count, inter_count);
+ }
- qscale= (int)(q + 0.5);
- if (qscale<qmin) qscale=qmin;
- else if(qscale>qmax) qscale=qmax;
-
- if (qscale<last_qscale-s->max_qdiff) qscale=last_qscale-s->max_qdiff;
- else if(qscale>last_qscale+s->max_qdiff) qscale=last_qscale+s->max_qdiff;
+ fprintf(stderr, "concealing errors\n");
+
+ /* for all MBs from the current one back until the last resync marker */
+ for(; mb_y>=0 && mb_y>=s->resync_mb_y; mb_y--){
+ for(; mb_x>=0; mb_x--){
+ uint8_t *dest_y = s->current_picture[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
+ uint8_t *dest_cb = s->current_picture[1] + (mb_y * 8 * (s->linesize >> 1)) + mb_x * 8;
+ uint8_t *dest_cr = s->current_picture[2] + (mb_y * 8 * (s->linesize >> 1)) + mb_x * 8;
+ int mb_x_backup= s->mb_x; //FIXME pass xy to mpeg_motion
+ int mb_y_backup= s->mb_y;
+ s->mb_x=mb_x;
+ s->mb_y=mb_y;
+ if(s->mbintra_table[mb_y*s->mb_width + mb_x] && mb_dist<intra_conceal){
+ if(s->decoding_error==DECODING_AC_LOST){
+ remove_ac(s, dest_y, dest_cb, dest_cr, mb_x, mb_y);
+// printf("remove ac to %d %d\n", mb_x, mb_y);
+ }else{
+ mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
+ s->last_picture, 0, 0, put_pixels_tab,
+ 0/*mx*/, 0/*my*/, 16);
+ }
+ }
+ else if(!s->mbintra_table[mb_y*s->mb_width + mb_x] && mb_dist<inter_conceal){
+ int mx=0;
+ int my=0;
+
+ if(s->decoding_error!=DECODING_DESYNC){
+ int xy= mb_x*2+1 + (mb_y*2+1)*(s->mb_width*2+2);
+ mx= s->motion_val[ xy ][0];
+ my= s->motion_val[ xy ][1];
+ }
- s->qsum+= qscale;
- s->qcount++;
+ mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
+ s->last_picture, 0, 0, put_pixels_tab,
+ mx, my, 16);
+ }
+ s->mb_x= mb_x_backup;
+ s->mb_y= mb_y_backup;
- s->last_pict_type= s->pict_type;
-//printf("q:%d diff:%d comp:%f rate_q:%d st_q:%f fvar:%d last_size:%d\n", qscale, (int)diff, br_compensation,
-// rate_q, short_term_q, s->mc_mb_var, s->frame_bits);
-//printf("%d %d\n", s->bit_rate, (int)fps);
- return qscale;
-#else
- INT64 diff, total_bits = s->total_bits;
- float q;
- int qscale;
- if (s->pict_type == I_TYPE) {
- s->wanted_bits += s->I_frame_bits;
- } else {
- s->wanted_bits += s->P_frame_bits;
- }
- diff = s->wanted_bits - total_bits;
- q = 31.0 - (float)diff / (QSCALE_K * s->mb_height * s->mb_width);
- /* adjust for I frame */
- if (s->pict_type == I_TYPE && !s->intra_only) {
- q /= I_FRAME_SIZE_RATIO;
- }
-
- /* using a too small Q scale leeds to problems in mpeg1 and h263
- because AC coefficients are clamped to 255 or 127 */
- qmin = 3;
- if (q < qmin)
- q = qmin;
- else if (q > 31)
- q = 31;
- qscale = (int)(q + 0.5);
-#if defined(DEBUG)
- printf("\n%d: total=%0.0f wanted=%0.0f br=%0.1f diff=%d qest=%2.1f\n",
- s->picture_number,
- (double)total_bits,
- (double)s->wanted_bits,
- (float)s->frame_rate / FRAME_RATE_BASE *
- total_bits / s->picture_number,
- (int)diff, q);
-#endif
- return qscale;
-#endif
+ if(mb_x== s->resync_mb_x && mb_y== s->resync_mb_y) return;
+ if(!s->mbskip_table[mb_x + mb_y*s->mb_width]) mb_dist++;
+ }
+ mb_x=s->mb_width-1;
+ }
}
AVCodec mpeg1video_encoder = {
diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h
index f809a1255..2e957451b 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.h
+++ b/src/libffmpeg/libavcodec/mpegvideo.h
@@ -1,32 +1,31 @@
/*
* Generic DCT based hybrid video encoder
- * Copyright (c) 2000,2001 Gerard Lantau.
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-/* Macros for picture code type. */
-#define I_TYPE 1
-#define P_TYPE 2
-#define B_TYPE 3
-#define S_TYPE 4 //S(GMC)-VOP MPEG4
+#ifndef AVCODEC_MPEGVIDEO_H
+#define AVCODEC_MPEGVIDEO_H
+
+#define FRAME_SKIPED 100 // return value for header parsers if frame is not coded
enum OutputFormat {
FMT_MPEG1,
FMT_H263,
- FMT_MJPEG,
+ FMT_MJPEG,
};
#define MPEG_BUF_SIZE (16 * 1024)
@@ -36,6 +35,11 @@ enum OutputFormat {
#define MAX_FCODE 7
#define MAX_MV 2048
+#define REORDER_BUFFER_SIZE (FF_MAX_B_FRAMES+2)
+
+#define ME_MAP_SIZE 64
+#define ME_MAP_SHIFT 3
+#define ME_MAP_MV_BITS 11
typedef struct Predictor{
double coeff;
@@ -43,6 +47,33 @@ typedef struct Predictor{
double decay;
} Predictor;
+typedef struct RateControlEntry{
+ int pict_type;
+ int qscale;
+ int mv_bits;
+ int i_tex_bits;
+ int p_tex_bits;
+ int misc_bits;
+ UINT64 expected_bits;
+ int new_pict_type;
+ float new_qscale;
+}RateControlEntry;
+
+typedef struct RateControlContext{
+ FILE *stats_file;
+ int num_entries;
+ RateControlEntry *entry;
+}RateControlContext;
+
+typedef struct ReorderBuffer{
+ UINT8 *picture[3];
+ int pict_type;
+ int qscale;
+ int force_type;
+ int picture_number;
+ int picture_in_gop_number;
+} ReorderBuffer;
+
typedef struct MpegEncContext {
struct AVCodecContext *avctx;
/* the following parameters must be initialized before encoding */
@@ -53,11 +84,15 @@ typedef struct MpegEncContext {
int bit_rate; /* wanted bit rate */
int bit_rate_tolerance; /* amount of +- bits (>0)*/
enum OutputFormat out_format; /* output format */
+ int h263_pred; /* use mpeg4/h263 ac/dc predictions */
+
+/* the following codec id fields are deprecated in favor of codec_id */
int h263_plus; /* h263 plus headers */
int h263_rv10; /* use RV10 variation for H263 */
- int h263_pred; /* use mpeg4/h263 ac/dc predictions */
- int h263_msmpeg4; /* generate MSMPEG4 compatible stream */
+ int h263_msmpeg4; /* generate MSMPEG4 compatible stream (deprecated, use msmpeg4_version instead)*/
int h263_intel; /* use I263 intel h263 header */
+
+ int codec_id; /* see CODEC_ID_xxx */
int fixed_qscale; /* fixed qscale if non zero */
float qcompress; /* amount of qscale change between easy & hard scenes (0.0-1.0) */
float qblur; /* amount of qscale smoothing over time (0.0-1.0) */
@@ -66,7 +101,16 @@ typedef struct MpegEncContext {
int max_qdiff; /* max qscale difference between frames */
int encoding; /* true if we are encoding (vs decoding) */
int flags; /* AVCodecContext.flags (HQ, MV4, ...) */
- int force_type; /* 0= no force, otherwise I_TYPE, P_TYPE, ... */
+ int force_input_type;/* 0= no force, otherwise I_TYPE, P_TYPE, ... */
+ int max_b_frames; /* max number of b-frames for encoding */
+ float b_quant_factor;/* qscale factor between ips and b frames */
+ float b_quant_offset;/* qscale offset between ips and b frames */
+ int rc_strategy;
+ int b_frame_strategy;
+ int luma_elim_threshold;
+ int chroma_elim_threshold;
+ int strict_std_compliance; /* strictly follow the std (MPEG4, ...) */
+ int workaround_bugs; /* workaround bugs in encoders which cannot be detected automatically */
/* the following fields are managed internally by the encoder */
/* bit output */
@@ -74,46 +118,71 @@ typedef struct MpegEncContext {
/* sequence parameters */
int context_initialized;
+ int input_picture_number;
+ int input_picture_in_gop_number; /* 0-> first pic in gop, ... */
int picture_number;
int fake_picture_number; /* picture number at the bitstream frame rate */
int gop_picture_number; /* index of the first picture of a GOP based on fake_pic_num & mpeg1 specific */
int picture_in_gop_number; /* 0-> first pic in gop, ... */
- int mb_width, mb_height;
+ int b_frames_since_non_b; /* used for encoding, relative to not yet reordered input */
+ int mb_width, mb_height; /* number of MBs horizontally & vertically */
int mb_num; /* number of MBs of a picture */
int linesize; /* line size, in bytes, may be different from width */
UINT8 *new_picture[3]; /* picture to be compressed */
- UINT8 *last_picture[3]; /* previous picture */
+ UINT8 *picture_buffer[REORDER_BUFFER_SIZE][3]; /* internal buffers used for reordering of input pictures */
+ int picture_buffer_index;
+ ReorderBuffer coded_order[REORDER_BUFFER_SIZE];
+ UINT8 *last_picture[3]; /* previous picture */
UINT8 *last_picture_base[3]; /* real start of the picture */
- UINT8 *next_picture[3]; /* previous picture (for bidir pred) */
+ UINT8 *next_picture[3]; /* previous picture (for bidir pred) */
UINT8 *next_picture_base[3]; /* real start of the picture */
- UINT8 *aux_picture[3]; /* aux picture (for B frames only) */
- UINT8 *aux_picture_base[3]; /* real start of the picture */
- UINT8 *current_picture[3]; /* buffer to store the decompressed current picture */
- int last_dc[3]; /* last DC values for MPEG1 */
- INT16 *dc_val[3]; /* used for mpeg4 DC prediction, all 3 arrays must be continuous */
+ UINT8 *aux_picture[3]; /* aux picture (for B frames only) */
+ UINT8 *aux_picture_base[3]; /* real start of the picture */
+ UINT8 *current_picture[3]; /* buffer to store the decompressed current picture */
+ int num_available_buffers; /* is 0 at the start & after seeking, after the first I frame its 1 after next I/P 2 */
+ int last_dc[3]; /* last DC values for MPEG1 */
+ INT16 *dc_val[3]; /* used for mpeg4 DC prediction, all 3 arrays must be continuous */
int y_dc_scale, c_dc_scale;
- UINT8 *coded_block; /* used for coded block pattern prediction */
- INT16 (*ac_val[3])[16]; /* used for for mpeg4 AC prediction, all 3 arrays must be continuous */
+ UINT8 *coded_block; /* used for coded block pattern prediction (msmpeg4v3, wmv1)*/
+ INT16 (*ac_val[3])[16]; /* used for for mpeg4 AC prediction, all 3 arrays must be continuous */
int ac_pred;
int mb_skiped; /* MUST BE SET only during DECODING */
- UINT8 *mbskip_table; /* used to avoid copy if macroblock
- skipped (for black regions for example) */
- UINT8 *mbintra_table; /* used to kill a few memsets */
-
- int qscale;
- int pict_type;
- int last_non_b_pict_type; /* used for mpeg4 gmc b-frames */
- int last_pict_type; /* used for bit rate stuff (needs that to update the right predictor) */
+ UINT8 *mbskip_table; /* used to avoid copy if macroblock skipped (for black regions for example)
+ and used for b-frame encoding & decoding (contains skip table of next P Frame) */
+ UINT8 *mbintra_table; /* used to avoid setting {ac, dc, cbp}-pred stuff to zero on inter MB decoding */
+ UINT8 *cbp_table; /* used to store cbp, ac_pred for partitioned decoding */
+ UINT8 *pred_dir_table; /* used to store pred_dir for partitioned decoding */
+ INT8 *qscale_table; /* used to store qscale for partitioned decoding (& postprocessing FIXME export) */
+
+ int input_qscale; /* qscale prior to reordering of frames */
+ int input_pict_type; /* pict_type prior to reordering of frames */
+ int force_type; /* 0= no force, otherwise I_TYPE, P_TYPE, ... */
+ int qscale; /* QP */
+ int last_non_b_qscale; /* QP of last non b frame used for b frame qscale*/
+ int pict_type; /* I_TYPE, P_TYPE, B_TYPE, ... */
+ int last_non_b_pict_type; /* used for mpeg4 gmc b-frames & ratecontrol */
int frame_rate_index;
/* motion compensation */
int unrestricted_mv;
int h263_long_vectors; /* use horrible h263v1 long vector mode */
- int f_code; /* resolution */
- int b_code; /* backward resolution for B Frames (mpeg4) */
- INT16 *mv_table[2]; /* MV table (1MV per MB)*/
- INT16 (*motion_val)[2]; /* used for MV prediction (4MV per MB)*/
- int full_search;
+ int f_code; /* forward MV resolution */
+ int b_code; /* backward MV resolution for B Frames (mpeg4) */
+ INT16 (*motion_val)[2]; /* used for MV prediction (4MV per MB) */
+ INT16 (*p_mv_table)[2]; /* MV table (1MV per MB) p-frame encoding */
+ INT16 (*b_forw_mv_table)[2]; /* MV table (1MV per MB) forward mode b-frame encoding */
+ INT16 (*b_back_mv_table)[2]; /* MV table (1MV per MB) backward mode b-frame encoding */
+ INT16 (*b_bidir_forw_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */
+ INT16 (*b_bidir_back_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */
+ INT16 (*b_direct_forw_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
+ INT16 (*b_direct_back_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
+ INT16 (*b_direct_mv_table)[2]; /* MV table (1MV per MB) direct mode b-frame encoding */
+ int me_method; /* ME algorithm */
+ uint8_t *me_scratchpad; /* data area for the me algo, so that the ME doesnt need to malloc/free */
+ uint32_t *me_map; /* map to avoid duplicate evaluations */
+ uint16_t *me_score_map; /* map to store the SADs */
+ int me_map_generation;
+ int skip_me; /* set if ME is skiped for the current MB */
int mv_dir;
#define MV_DIR_BACKWARD 1
#define MV_DIR_FORWARD 2
@@ -131,62 +200,76 @@ typedef struct MpegEncContext {
*/
int mv[2][4][2];
int field_select[2][2];
- int last_mv[2][2][2];
+ int last_mv[2][2][2]; /* last MV, used for MV prediction in MPEG1 & B-frame MPEG4 */
UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV, used for ME */
UINT8 *fcode_tab; /* smallest fcode needed for each MV */
int has_b_frames;
- int no_rounding; /* apply no rounding to motion estimation (MPEG4) */
+ int no_rounding; /* apply no rounding to motion compensation (MPEG4, msmpeg4, ...)
+ for b-frames rounding mode is allways 0 */
+
+ int hurry_up; /* when set to 1 during decoding, b frames will be skiped
+ when set to 2 idct/dequant will be skipped too */
/* macroblock layer */
int mb_x, mb_y;
int mb_incr;
int mb_intra;
- UINT16 *mb_var; /* Table for MB variances */
- UINT8 *mb_type; /* Table for MB type */
+ UINT16 *mb_var; /* Table for MB variances */
+ UINT16 *mc_mb_var; /* Table for motion compensated MB variances */
+ UINT8 *mb_type; /* Table for MB type */
#define MB_TYPE_INTRA 0x01
#define MB_TYPE_INTER 0x02
#define MB_TYPE_INTER4V 0x04
#define MB_TYPE_SKIPED 0x08
+#define MB_TYPE_GMC 0x10
+
#define MB_TYPE_DIRECT 0x10
#define MB_TYPE_FORWARD 0x20
-#define MB_TYPE_BACKWAD 0x40
+#define MB_TYPE_BACKWARD 0x40
#define MB_TYPE_BIDIR 0x80
- int block_index[6];
+ int block_index[6]; /* index to current MB in block based arrays with edges*/
int block_wrap[6];
/* matrix transmitted in the bitstream */
UINT16 intra_matrix[64];
UINT16 chroma_intra_matrix[64];
- UINT16 non_intra_matrix[64];
- UINT16 chroma_non_intra_matrix[64];
+ UINT16 inter_matrix[64];
+ UINT16 chroma_inter_matrix[64];
+#define QUANT_BIAS_SHIFT 4
+ int intra_quant_bias; /* bias for the quantizer */
+ int inter_quant_bias; /* bias for the quantizer */
+ int min_qcoeff; /* minimum encodable coefficient */
+ int max_qcoeff; /* maximum encodable coefficient */
/* precomputed matrix (combine qscale and DCT renorm) */
- int q_intra_matrix[64];
- int q_non_intra_matrix[64];
+ int q_intra_matrix[32][64];
+ int q_inter_matrix[32][64];
/* identical to the above but for MMX & these are not permutated */
- UINT16 __align8 q_intra_matrix16[64] ;
- UINT16 __align8 q_non_intra_matrix16[64];
+ UINT16 __align8 q_intra_matrix16[32][64];
+ UINT16 __align8 q_inter_matrix16[32][64];
+ UINT16 __align8 q_intra_matrix16_bias[32][64];
+ UINT16 __align8 q_inter_matrix16_bias[32][64];
int block_last_index[6]; /* last non zero coefficient in block */
void *opaque; /* private data for the user */
/* bit rate control */
- int I_frame_bits; /* wanted number of bits per I frame */
- int P_frame_bits; /* same for P frame */
- int avg_mb_var; /* average MB variance for current frame */
- int mc_mb_var; /* motion compensated MB variance for current frame */
- int last_mc_mb_var; /* motion compensated MB variance for last frame */
+ int I_frame_bits; //FIXME used in mpeg12 ...
+ int mb_var_sum; /* sum of MB variance for current frame */
+ int mc_mb_var_sum; /* motion compensated MB variance for current frame */
+ int last_non_b_mc_mb_var;/* motion compensated MB variance for last non b frame */
INT64 wanted_bits;
INT64 total_bits;
- int frame_bits; /* bits used for the current frame */
- int last_frame_bits; /* bits used for the last frame */
+ int frame_bits; /* bits used for the current frame */
+ int pb_frame_bits; /* bits of the last b...bp group */
Predictor i_pred;
Predictor p_pred;
double qsum; /* sum of qscales */
double qcount; /* count of qscales */
double short_term_qsum; /* sum of recent qscales */
double short_term_qcount; /* count of recent qscales */
+ RateControlContext rc_context;
/* statistics, used for 2-pass encoding */
int mv_bits;
@@ -198,11 +281,24 @@ typedef struct MpegEncContext {
int skip_count;
int misc_bits; // cbp, mb_type
int last_bits; //temp var used for calculating the above vars
+
+ /* error concealment / resync */
+ int resync_mb_x; /* x position of last resync marker */
+ int resync_mb_y; /* y position of last resync marker */
+ int mb_num_left; /* number of MBs left in this video packet */
+ GetBitContext next_resync_gb; /* starts at the next resync marker */
+ int next_resync_qscale; /* qscale of next resync marker */
+ int next_resync_pos; /* bitstream position of next resync marker */
+#define DECODING_AC_LOST -1
+#define DECODING_ACDC_LOST -2
+#define DECODING_DESYNC -3
+ int decoding_error;
+ int next_p_frame_damaged; /* set if the next p frame is damaged, to avoid showing trashed b frames */
+ int error_resilience;
/* H.263 specific */
int gob_number;
int gob_index;
- int first_gob_line;
/* H.263+ specific */
int umvplus;
@@ -212,11 +308,13 @@ typedef struct MpegEncContext {
/* mpeg4 specific */
int time_increment_resolution;
- int time_increment_bits;
- int time_increment;
- int time_base;
- int time;
- int last_non_b_time[2];
+ int time_increment_bits; /* number of bits to represent the fractional part of time */
+ int last_time_base;
+ int time_base; /* time in seconds of last I,P,S Frame */
+ INT64 time; /* time of current frame */
+ INT64 last_non_b_time;
+ UINT16 pp_time; /* time distance between the last 2 p,s,i frames */
+ UINT16 bp_time; /* time distance between the last b and p,s,i frame */
int shape;
int vol_sprite_usage;
int sprite_width;
@@ -231,21 +329,32 @@ typedef struct MpegEncContext {
int sprite_shift[2][2];
int mcsel;
int quant_precision;
- int quarter_sample;
+ int quarter_sample; /* 1->qpel, 0->half pel ME/MC */
int scalability;
int new_pred;
int reduced_res_vop;
int aspect_ratio_info;
int sprite_warping_accuracy;
int low_latency_sprite;
- int data_partioning;
- int resync_marker;
- int resync_x_pos;
+ int data_partitioning;
+ int rvlc; /* reversible vlc */
+ int resync_marker; /* could this stream contain resync markers*/
+ int low_delay; /* no reordering needed / has no b-frames */
+ int vo_type;
+ int vol_control_parameters; /* does the stream contain the low_delay flag, used to workaround buggy encoders */
+ PutBitContext tex_pb; /* used for data partitioned VOPs */
+ PutBitContext pb2; /* used for data partitioned VOPs */
+#define PB_BUFFER_SIZE 1024*256
+ uint8_t *tex_pb_buffer;
+ uint8_t *pb2_buffer;
/* divx specific, used to workaround (many) bugs in divx5 */
int divx_version;
int divx_build;
-
+#define BITSTREAM_BUFFER_SIZE 1024*256
+ UINT8 *bitstream_buffer; //Divx 5.01 puts several frames in a single one, this is used to reorder them
+ int bitstream_buffer_size;
+
/* RV10 specific */
int rv10_version; /* RV10 version: 0 or 3 */
int rv10_first_dc_coded[3];
@@ -256,6 +365,7 @@ typedef struct MpegEncContext {
int mjpeg_hsample[3]; /* horizontal sampling factors, default = {2, 1, 1} */
int mjpeg_write_tables; /* do we want to have quantisation- and
huffmantables in the jpeg file ? */
+ int mjpeg_data_only_frames; /* frames only with SOI, SOS and EOI markers */
/* MSMPEG4 specific */
int mv_table_index;
@@ -266,8 +376,7 @@ typedef struct MpegEncContext {
int slice_height; /* in macroblocks */
int first_slice_line; /* used in mpeg4 too to handle resync markers */
int flipflop_rounding;
- int bitrate;
- int msmpeg4_version; /* 1=mp41, 2=mp42, 3=mp43/divx3 */
+ int msmpeg4_version; /* 0=not msmpeg4, 1=mp41, 2=mp42, 3=mp43/divx3 */
/* decompression specific */
GetBitContext gb;
@@ -306,10 +415,14 @@ typedef struct MpegEncContext {
UINT32 mb_line_avgsize;
DCTELEM (*block)[64]; /* points to one of the following blocks */
- DCTELEM intra_block[6][64] __align8;
- DCTELEM inter_block[6][64] __align8;
- DCTELEM inter4v_block[6][64] __align8;
- void (*dct_unquantize)(struct MpegEncContext *s,
+ DCTELEM blocks[2][6][64] __align8; // for HQ mode we need to keep the best block
+ void (*dct_unquantize_mpeg1)(struct MpegEncContext *s,
+ DCTELEM *block, int n, int qscale);
+ void (*dct_unquantize_mpeg2)(struct MpegEncContext *s,
+ DCTELEM *block, int n, int qscale);
+ void (*dct_unquantize_h263)(struct MpegEncContext *s,
+ DCTELEM *block, int n, int qscale);
+ void (*dct_unquantize)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both)
DCTELEM *block, int n, int qscale);
} MpegEncContext;
@@ -321,11 +434,20 @@ void MPV_frame_end(MpegEncContext *s);
#ifdef HAVE_MMX
void MPV_common_init_mmx(MpegEncContext *s);
#endif
+extern int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
+extern void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w);
+void ff_conceal_past_errors(MpegEncContext *s, int conceal_all);
+void ff_copy_bits(PutBitContext *pb, UINT8 *src, int length);
+void ff_clean_intra_table_entries(MpegEncContext *s);
/* motion_est.c */
-
-void estimate_motion(MpegEncContext *s,
- int mb_x, int mb_y);
+void ff_estimate_p_frame_motion(MpegEncContext * s,
+ int mb_x, int mb_y);
+void ff_estimate_b_frame_motion(MpegEncContext * s,
+ int mb_x, int mb_y);
+int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type);
+void ff_fix_long_p_mvs(MpegEncContext * s);
+void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type);
/* mpeg12.c */
extern INT16 default_intra_matrix[64];
@@ -382,6 +504,7 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block,
int *px, int *py);
void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
int dir);
+void ff_set_mpeg4_time(MpegEncContext * s, int picture_number);
void mpeg4_encode_picture_header(MpegEncContext *s, int picture_number);
void h263_encode_init(MpegEncContext *s);
@@ -393,6 +516,13 @@ int intel_h263_decode_picture_header(MpegEncContext *s);
int h263_decode_mb(MpegEncContext *s,
DCTELEM block[6][64]);
int h263_get_picture_format(int width, int height);
+int ff_mpeg4_decode_video_packet_header(MpegEncContext *s);
+int ff_mpeg4_resync(MpegEncContext *s);
+void ff_mpeg4_encode_video_packet_header(MpegEncContext *s);
+void ff_mpeg4_clean_buffers(MpegEncContext *s);
+void ff_mpeg4_stuffing(PutBitContext * pbc);
+void ff_mpeg4_init_partitions(MpegEncContext *s);
+void ff_mpeg4_merge_partitions(MpegEncContext *s);
/* rv10.c */
void rv10_encode_picture_header(MpegEncContext *s, int picture_number);
@@ -404,12 +534,12 @@ void msmpeg4_encode_ext_header(MpegEncContext * s);
void msmpeg4_encode_mb(MpegEncContext * s,
DCTELEM block[6][64],
int motion_x, int motion_y);
-void msmpeg4_dc_scale(MpegEncContext * s);
int msmpeg4_decode_picture_header(MpegEncContext * s);
int msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size);
int msmpeg4_decode_mb(MpegEncContext *s,
DCTELEM block[6][64]);
int msmpeg4_decode_init_vlc(MpegEncContext *s);
+void ff_old_msmpeg4_dc_scale(MpegEncContext *s);
/* mjpegenc.c */
@@ -419,3 +549,12 @@ void mjpeg_encode_mb(MpegEncContext *s,
DCTELEM block[6][64]);
void mjpeg_picture_header(MpegEncContext *s);
void mjpeg_picture_trailer(MpegEncContext *s);
+
+/* rate control */
+int ff_rate_control_init(MpegEncContext *s);
+int ff_rate_estimate_qscale(MpegEncContext *s);
+int ff_rate_estimate_qscale_pass2(MpegEncContext *s);
+void ff_write_pass1_stats(MpegEncContext *s);
+void ff_rate_control_uninit(MpegEncContext *s);
+
+#endif /* AVCODEC_MPEGVIDEO_H */
diff --git a/src/libffmpeg/libavcodec/msmpeg4.c b/src/libffmpeg/libavcodec/msmpeg4.c
index 66fc5255e..629c74497 100644
--- a/src/libffmpeg/libavcodec/msmpeg4.c
+++ b/src/libffmpeg/libavcodec/msmpeg4.c
@@ -1,27 +1,27 @@
/*
* MSMPEG4 backend for ffmpeg encoder and decoder
- * Copyright (c) 2001 Gerard Lantau.
+ * Copyright (c) 2001 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * msmpeg4v1 & v2 stuff by Michael Niedermayer <michaelni@gmx.at>
*/
-#include <stdlib.h>
-#include <stdio.h>
-#include "common.h"
+#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
-#include "avcodec.h"
+
/*
* You can also call this codec : MPEG4 with a twist !
@@ -53,7 +53,7 @@ static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
static int msmpeg4_decode_motion(MpegEncContext * s,
int *mx_ptr, int *my_ptr);
static void msmpeg4v2_encode_motion(MpegEncContext * s, int val);
-static void init_h263_dc_for_msmpeg4();
+static void init_h263_dc_for_msmpeg4(void);
extern UINT32 inverse[256];
@@ -137,7 +137,7 @@ static void init_mv_table(MVTable *tab)
{
int i, x, y;
- tab->table_mv_index = malloc(sizeof(UINT16) * 4096);
+ tab->table_mv_index = av_malloc(sizeof(UINT16) * 4096);
/* mark all entries as not used */
for(i=0;i<4096;i++)
tab->table_mv_index[i] = tab->n;
@@ -159,7 +159,7 @@ static void code012(PutBitContext *pb, int n)
}
}
-/* write MSMPEG4 V3 compatible frame header */
+/* write MSMPEG4 compatible frame header */
void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
{
int i;
@@ -171,7 +171,7 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb, 5, s->qscale);
s->rl_table_index = 2;
- if(s->msmpeg4_version==2)
+ if(s->msmpeg4_version<=2)
s->rl_chroma_table_index = 2; /* only for I frame */
else
s->rl_chroma_table_index = 1; /* only for I frame */
@@ -183,7 +183,7 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
if (s->pict_type == I_TYPE) {
put_bits(&s->pb, 5, 0x17); /* indicate only one "slice" */
- if(s->msmpeg4_version!=2){
+ if(s->msmpeg4_version>2){
code012(&s->pb, s->rl_chroma_table_index);
code012(&s->pb, s->rl_table_index);
@@ -194,7 +194,7 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb, 1, s->use_skip_mb_code);
s->rl_chroma_table_index = s->rl_table_index;
- if(s->msmpeg4_version!=2){
+ if(s->msmpeg4_version>2){
code012(&s->pb, s->rl_table_index);
put_bits(&s->pb, 1, s->dc_table_index);
@@ -228,14 +228,16 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
void msmpeg4_encode_ext_header(MpegEncContext * s)
{
- s->flipflop_rounding=1;
- s->bitrate= 910; // FIXME
-
put_bits(&s->pb, 5, s->frame_rate / FRAME_RATE_BASE); //yes 29.97 -> 29
- put_bits(&s->pb, 11, s->bitrate);
+ put_bits(&s->pb, 11, MIN(s->bit_rate, 2047));
- put_bits(&s->pb, 1, s->flipflop_rounding);
+ if(s->msmpeg4_version<3)
+ s->flipflop_rounding=0;
+ else{
+ s->flipflop_rounding=1;
+ put_bits(&s->pb, 1, s->flipflop_rounding);
+ }
}
/* predict coded block */
@@ -328,7 +330,7 @@ void msmpeg4_encode_mb(MpegEncContext * s,
if (s->use_skip_mb_code)
put_bits(&s->pb, 1, 0); /* mb coded */
- if(s->msmpeg4_version==2){
+ if(s->msmpeg4_version<=2){
put_bits(&s->pb,
v2_mb_type[cbp&3][1],
v2_mb_type[cbp&3][0]);
@@ -373,7 +375,7 @@ void msmpeg4_encode_mb(MpegEncContext * s,
printf("cbp=%x %x\n", cbp, coded_cbp);
#endif
- if(s->msmpeg4_version==2){
+ if(s->msmpeg4_version<=2){
if (s->pict_type == I_TYPE) {
put_bits(&s->pb,
v2_intra_cbpc[cbp&3][1], v2_intra_cbpc[cbp&3][0]);
@@ -410,11 +412,10 @@ void msmpeg4_encode_mb(MpegEncContext * s,
}
}
-
-/* strongly inspirated from MPEG4, but not exactly the same ! */
-void msmpeg4_dc_scale(MpegEncContext * s)
+/* old ffmpeg msmpeg4v3 mode */
+void ff_old_msmpeg4_dc_scale(MpegEncContext * s)
{
- if (s->qscale < 5 || s->msmpeg4_version==2){
+ if (s->qscale < 5){
s->y_dc_scale = 8;
s->c_dc_scale = 8;
}else if (s->qscale < 9){
@@ -426,6 +427,21 @@ void msmpeg4_dc_scale(MpegEncContext * s)
}
}
+static int msmpeg4v1_pred_dc(MpegEncContext * s, int n,
+ INT32 **dc_val_ptr)
+{
+ int i;
+
+ if (n < 4) {
+ i= 0;
+ } else {
+ i= n-3;
+ }
+
+ *dc_val_ptr= &s->last_dc[i];
+ return s->last_dc[i];
+}
+
/* dir = 0: left, dir = 1: top prediction */
static int msmpeg4_pred_dc(MpegEncContext * s, int n,
INT16 **dc_val_ptr, int *dir_ptr)
@@ -439,6 +455,7 @@ static int msmpeg4_pred_dc(MpegEncContext * s, int n,
} else {
scale = s->c_dc_scale;
}
+
wrap = s->block_wrap[n];
dc_val= s->dc_val[0] + s->block_index[n];
@@ -508,21 +525,29 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr
{
int sign, code;
int pred;
- INT16 *dc_val;
- pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr);
+ if(s->msmpeg4_version==1){
+ INT32 *dc_val;
+ pred = msmpeg4v1_pred_dc(s, n, &dc_val);
+
+ /* update predictor */
+ *dc_val= level;
+ }else{
+ INT16 *dc_val;
+ pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr);
- /* update predictor */
- if (n < 4) {
- *dc_val = level * s->y_dc_scale;
- } else {
- *dc_val = level * s->c_dc_scale;
+ /* update predictor */
+ if (n < 4) {
+ *dc_val = level * s->y_dc_scale;
+ } else {
+ *dc_val = level * s->c_dc_scale;
+ }
}
/* do the prediction */
level -= pred;
- if(s->msmpeg4_version==2){
+ if(s->msmpeg4_version<=2){
if (n < 4) {
put_bits(&s->pb,
v2_dc_lum_table[level+256][1],
@@ -589,7 +614,7 @@ static void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n)
} else {
i = 0;
rl = &rl_table[3 + s->rl_table_index];
- if(s->msmpeg4_version==2)
+ if(s->msmpeg4_version<=2)
run_diff = 0;
else
run_diff = 1;
@@ -669,9 +694,11 @@ static VLC cbpy_vlc;
static VLC v2_intra_cbpc_vlc;
static VLC v2_mb_type_vlc;
static VLC v2_mv_vlc;
+static VLC v1_intra_cbpc_vlc;
+static VLC v1_inter_cbpc_vlc;
/* this table is practically identical to the one from h263 except that its inverted */
-static void init_h263_dc_for_msmpeg4()
+static void init_h263_dc_for_msmpeg4(void)
{
static int inited=0;
@@ -733,60 +760,73 @@ static void init_h263_dc_for_msmpeg4()
/* init all vlc decoding tables */
int msmpeg4_decode_init_vlc(MpegEncContext *s)
{
+ static int done = 0;
int i;
MVTable *mv;
- for(i=0;i<NB_RL_TABLES;i++) {
- init_rl(&rl_table[i]);
- init_vlc_rl(&rl_table[i]);
- }
- for(i=0;i<2;i++) {
- mv = &mv_tables[i];
- init_vlc(&mv->vlc, 9, mv->n + 1,
- mv->table_mv_bits, 1, 1,
- mv->table_mv_code, 2, 2);
- }
- init_vlc(&dc_lum_vlc[0], 9, 120,
- &table0_dc_lum[0][1], 8, 4,
- &table0_dc_lum[0][0], 8, 4);
- init_vlc(&dc_chroma_vlc[0], 9, 120,
- &table0_dc_chroma[0][1], 8, 4,
- &table0_dc_chroma[0][0], 8, 4);
- init_vlc(&dc_lum_vlc[1], 9, 120,
- &table1_dc_lum[0][1], 8, 4,
- &table1_dc_lum[0][0], 8, 4);
- init_vlc(&dc_chroma_vlc[1], 9, 120,
- &table1_dc_chroma[0][1], 8, 4,
- &table1_dc_chroma[0][0], 8, 4);
+ if (!done) {
+ done = 1;
+
+ for(i=0;i<NB_RL_TABLES;i++) {
+ init_rl(&rl_table[i]);
+ init_vlc_rl(&rl_table[i]);
+ }
+ for(i=0;i<2;i++) {
+ mv = &mv_tables[i];
+ init_vlc(&mv->vlc, 9, mv->n + 1,
+ mv->table_mv_bits, 1, 1,
+ mv->table_mv_code, 2, 2);
+ }
+
+ init_vlc(&dc_lum_vlc[0], 9, 120,
+ &table0_dc_lum[0][1], 8, 4,
+ &table0_dc_lum[0][0], 8, 4);
+ init_vlc(&dc_chroma_vlc[0], 9, 120,
+ &table0_dc_chroma[0][1], 8, 4,
+ &table0_dc_chroma[0][0], 8, 4);
+ init_vlc(&dc_lum_vlc[1], 9, 120,
+ &table1_dc_lum[0][1], 8, 4,
+ &table1_dc_lum[0][0], 8, 4);
+ init_vlc(&dc_chroma_vlc[1], 9, 120,
+ &table1_dc_chroma[0][1], 8, 4,
+ &table1_dc_chroma[0][0], 8, 4);
- init_h263_dc_for_msmpeg4();
- init_vlc(&v2_dc_lum_vlc, 9, 512,
- &v2_dc_lum_table[0][1], 8, 4,
- &v2_dc_lum_table[0][0], 8, 4);
- init_vlc(&v2_dc_chroma_vlc, 9, 512,
- &v2_dc_chroma_table[0][1], 8, 4,
- &v2_dc_chroma_table[0][0], 8, 4);
+ init_h263_dc_for_msmpeg4();
+ init_vlc(&v2_dc_lum_vlc, 9, 512,
+ &v2_dc_lum_table[0][1], 8, 4,
+ &v2_dc_lum_table[0][0], 8, 4);
+ init_vlc(&v2_dc_chroma_vlc, 9, 512,
+ &v2_dc_chroma_table[0][1], 8, 4,
+ &v2_dc_chroma_table[0][0], 8, 4);
- init_vlc(&cbpy_vlc, 6, 16,
- &cbpy_tab[0][1], 2, 1,
- &cbpy_tab[0][0], 2, 1);
- init_vlc(&v2_intra_cbpc_vlc, 3, 4,
- &v2_intra_cbpc[0][1], 2, 1,
- &v2_intra_cbpc[0][0], 2, 1);
- init_vlc(&v2_mb_type_vlc, 5, 8,
- &v2_mb_type[0][1], 2, 1,
- &v2_mb_type[0][0], 2, 1);
- init_vlc(&v2_mv_vlc, 9, 33,
- &mvtab[0][1], 2, 1,
- &mvtab[0][0], 2, 1);
-
- init_vlc(&mb_non_intra_vlc, 9, 128,
- &table_mb_non_intra[0][1], 8, 4,
- &table_mb_non_intra[0][0], 8, 4);
- init_vlc(&mb_intra_vlc, 9, 64,
- &table_mb_intra[0][1], 4, 2,
- &table_mb_intra[0][0], 4, 2);
+ init_vlc(&cbpy_vlc, 6, 16,
+ &cbpy_tab[0][1], 2, 1,
+ &cbpy_tab[0][0], 2, 1);
+ init_vlc(&v2_intra_cbpc_vlc, 3, 4,
+ &v2_intra_cbpc[0][1], 2, 1,
+ &v2_intra_cbpc[0][0], 2, 1);
+ init_vlc(&v2_mb_type_vlc, 5, 8,
+ &v2_mb_type[0][1], 2, 1,
+ &v2_mb_type[0][0], 2, 1);
+ init_vlc(&v2_mv_vlc, 9, 33,
+ &mvtab[0][1], 2, 1,
+ &mvtab[0][0], 2, 1);
+
+ init_vlc(&mb_non_intra_vlc, 9, 128,
+ &table_mb_non_intra[0][1], 8, 4,
+ &table_mb_non_intra[0][0], 8, 4);
+ init_vlc(&mb_intra_vlc, 9, 64,
+ &table_mb_intra[0][1], 4, 2,
+ &table_mb_intra[0][0], 4, 2);
+
+ init_vlc(&v1_intra_cbpc_vlc, 6, 8,
+ intra_MCBPC_bits, 1, 1,
+ intra_MCBPC_code, 1, 1);
+ init_vlc(&v1_inter_cbpc_vlc, 6, 25,
+ inter_MCBPC_bits, 1, 1,
+ inter_MCBPC_code, 1, 1);
+ }
return 0;
}
@@ -802,31 +842,84 @@ static int decode012(GetBitContext *gb)
int msmpeg4_decode_picture_header(MpegEncContext * s)
{
- int code;
+ int code, code2;
+
+#if 0
+{
+int i;
+for(i=0; i<s->gb.size*8; i++)
+ printf("%d", get_bits1(&s->gb));
+// get_bits1(&s->gb);
+printf("END\n");
+return -1;
+}
+#endif
+
+ if(s->msmpeg4_version==1){
+ int start_code, num;
+ start_code = (get_bits(&s->gb, 16)<<16) | get_bits(&s->gb, 16);
+ if(start_code!=0x00000100){
+ fprintf(stderr, "invalid startcode\n");
+ return -1;
+ }
+
+ num= get_bits(&s->gb, 5); // frame number */
+ }
s->pict_type = get_bits(&s->gb, 2) + 1;
if (s->pict_type != I_TYPE &&
- s->pict_type != P_TYPE)
+ s->pict_type != P_TYPE){
+ fprintf(stderr, "invalid picture type\n");
return -1;
+ }
s->qscale = get_bits(&s->gb, 5);
if (s->pict_type == I_TYPE) {
code = get_bits(&s->gb, 5);
- /* 0x17: one slice, 0x18: two slices */
- if (code < 0x17)
- return -1;
- s->slice_height = s->mb_height / (code - 0x16);
- if(s->msmpeg4_version==2){
+ if(s->msmpeg4_version==1){
+ if(code==0 || code>s->mb_height){
+ fprintf(stderr, "invalid slice height %d\n", code);
+ return -1;
+ }
+
+ s->slice_height = code;
+ }else{
+ /* 0x17: one slice, 0x18: two slices, ... */
+ if (code < 0x17)
+ return -1;
+
+ s->slice_height = s->mb_height / (code - 0x16);
+ }
+
+ switch(s->msmpeg4_version){
+ case 1:
+ case 2:
s->rl_chroma_table_index = 2;
s->rl_table_index = 2;
s->dc_table_index = 0; //not used
- }else{
+ break;
+ case 3:
s->rl_chroma_table_index = decode012(&s->gb);
s->rl_table_index = decode012(&s->gb);
s->dc_table_index = get_bits1(&s->gb);
+ break;
+ case 4:
+ msmpeg4_decode_ext_header(s, 999 /* bufer size (useless here) */);
+ printf("%X\n", show_bits(&s->gb, 24));
+ code= get_bits(&s->gb, 2);
+ if(code==1){
+ code2= get_bits(&s->gb, 3);
+ if(code2==7) skip_bits(&s->gb, 1);
+ }
+ printf("%X\n", show_bits(&s->gb, 24));
+ s->rl_chroma_table_index = 2;
+ s->rl_table_index = 2;
+
+ s->dc_table_index = 0;
+ break;
}
s->no_rounding = 1;
/* printf(" %d %d %d %d \n",
@@ -835,22 +928,28 @@ int msmpeg4_decode_picture_header(MpegEncContext * s)
s->rl_table_index,
s->dc_table_index);*/
} else {
- s->use_skip_mb_code = get_bits1(&s->gb);
- if(s->msmpeg4_version==2){
+ switch(s->msmpeg4_version){
+ case 1:
+ case 2:
+ if(s->msmpeg4_version==1)
+ s->use_skip_mb_code = 1;
+ else
+ s->use_skip_mb_code = get_bits1(&s->gb);
s->rl_table_index = 2;
s->rl_chroma_table_index = s->rl_table_index;
-
s->dc_table_index = 0; //not used
-
s->mv_table_index = 0;
- }else{
+ break;
+ case 3:
+ s->use_skip_mb_code = get_bits1(&s->gb);
s->rl_table_index = decode012(&s->gb);
s->rl_chroma_table_index = s->rl_table_index;
s->dc_table_index = get_bits1(&s->gb);
s->mv_table_index = get_bits1(&s->gb);
+ break;
}
/* printf(" %d %d %d %d %d \n",
s->use_skip_mb_code,
@@ -864,6 +963,7 @@ int msmpeg4_decode_picture_header(MpegEncContext * s)
s->no_rounding = 0;
}
// printf("%d", s->no_rounding);
+//return -1;
}
#if 0
@@ -886,27 +986,36 @@ return -1;
int msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size)
{
+ int left= buf_size*8 - get_bits_count(&s->gb);
+ int length= s->msmpeg4_version>=3 ? 17 : 16;
/* the alt_bitstream reader could read over the end so we need to check it */
- if(get_bits_count(&s->gb) + 16 < buf_size*8)
+ if(left>=length && left<length+8)
{
int fps;
fps= get_bits(&s->gb, 5);
- s->bitrate= get_bits(&s->gb, 11);
- s->flipflop_rounding= get_bits1(&s->gb);
+ s->bit_rate= get_bits(&s->gb, 11);
+ if(s->msmpeg4_version>=3)
+ s->flipflop_rounding= get_bits1(&s->gb);
+ else
+ s->flipflop_rounding= 0;
-// printf("fps:%2d bps:%2d roundingType:%1d\n", fps, s->bitrate, s->flipflop_rounding);
+// printf("fps:%2d bps:%2d roundingType:%1d\n", fps, s->bit_rate, s->flipflop_rounding);
}
- else
+ else if(left<length+8)
{
s->flipflop_rounding= 0;
- s->bitrate= 0;
+ printf("ext header missing, %d left\n", left);
+ }
+ else
+ {
+ fprintf(stderr, "I frame too long, ignoring ext header\n");
}
return 0;
}
-static inline void memsetw(short *tab, int val, int n)
+static inline void msmpeg4_memsetw(short *tab, int val, int n)
{
int i;
for(i=0;i<n;i++)
@@ -952,6 +1061,7 @@ static int msmpeg4v2_decode_motion(MpegEncContext * s, int pred, int f_code)
int code, val, sign, shift;
code = get_vlc(&s->gb, &v2_mv_vlc);
+// printf("MV code %d at %d %d pred: %d\n", code, s->mb_x,s->mb_y, pred);
if (code < 0)
return 0xffff;
@@ -965,8 +1075,8 @@ static int msmpeg4v2_decode_motion(MpegEncContext * s, int pred, int f_code)
val++;
if (sign)
val = -val;
- val += pred;
+ val += pred;
if (val <= -64)
val += 64;
else if (val >= 64)
@@ -976,7 +1086,7 @@ static int msmpeg4v2_decode_motion(MpegEncContext * s, int pred, int f_code)
}
-int msmpeg4v2_decode_mb(MpegEncContext *s,
+static int msmpeg4v12_decode_mb(MpegEncContext *s,
DCTELEM block[6][64])
{
int cbp, code, i;
@@ -996,20 +1106,41 @@ int msmpeg4v2_decode_mb(MpegEncContext *s,
}
}
- code = get_vlc(&s->gb, &v2_mb_type_vlc);
+ if(s->msmpeg4_version==2)
+ code = get_vlc(&s->gb, &v2_mb_type_vlc);
+ else
+ code = get_vlc(&s->gb, &v1_inter_cbpc_vlc);
+ if(code<0 || code>7){
+ fprintf(stderr, "cbpc %d invalid at %d %d\n", code, s->mb_x, s->mb_y);
+ return -1;
+ }
+
s->mb_intra = code >>2;
cbp = code & 0x3;
} else {
s->mb_intra = 1;
- cbp= get_vlc(&s->gb, &v2_intra_cbpc_vlc);
+ if(s->msmpeg4_version==2)
+ cbp= get_vlc(&s->gb, &v2_intra_cbpc_vlc);
+ else
+ cbp= get_vlc(&s->gb, &v1_intra_cbpc_vlc);
+ if(cbp<0 || cbp>3){
+ fprintf(stderr, "cbpc %d invalid at %d %d\n", cbp, s->mb_x, s->mb_y);
+ return -1;
+ }
}
if (!s->mb_intra) {
- int mx, my;
+ int mx, my, cbpy;
+
+ cbpy= get_vlc(&s->gb, &cbpy_vlc);
+ if(cbpy<0){
+ fprintf(stderr, "cbpy %d invalid at %d %d\n", cbp, s->mb_x, s->mb_y);
+ return -1;
+ }
- cbp|= get_vlc(&s->gb, &cbpy_vlc)<<2;
- if((cbp&3) != 3) cbp^= 0x3C;
+ cbp|= cbpy<<2;
+ if(s->msmpeg4_version==1 || (cbp&3) != 3) cbp^= 0x3C;
h263_pred_motion(s, 0, &mx, &my);
mx= msmpeg4v2_decode_motion(s, mx, 1);
@@ -1020,14 +1151,20 @@ int msmpeg4v2_decode_mb(MpegEncContext *s,
s->mv[0][0][0] = mx;
s->mv[0][0][1] = my;
} else {
- s->ac_pred = get_bits1(&s->gb);
- cbp|= get_vlc(&s->gb, &cbpy_vlc)<<2;
+ if(s->msmpeg4_version==2){
+ s->ac_pred = get_bits1(&s->gb);
+ cbp|= get_vlc(&s->gb, &cbpy_vlc)<<2; //FIXME check errors
+ } else{
+ s->ac_pred = 0;
+ cbp|= get_vlc(&s->gb, &cbpy_vlc)<<2; //FIXME check errors
+ if(s->pict_type==P_TYPE) cbp^=0x3C;
+ }
}
for (i = 0; i < 6; i++) {
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
{
- fprintf(stderr,"\nIgnoring error while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
+ fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
return -1;
}
}
@@ -1046,23 +1183,23 @@ int msmpeg4_decode_mb(MpegEncContext *s,
int wrap;
/* reset DC pred (set previous line to 1024) */
wrap = 2 * s->mb_width + 2;
- memsetw(&s->dc_val[0][(1) + (2 * s->mb_y) * wrap],
- 1024, 2 * s->mb_width);
- wrap = s->mb_width + 2;
- memsetw(&s->dc_val[1][(1) + (s->mb_y) * wrap],
- 1024, s->mb_width);
- memsetw(&s->dc_val[2][(1) + (s->mb_y) * wrap],
- 1024, s->mb_width);
-
- /* reset AC pred (set previous line to 0) */
- wrap = s->mb_width * 2 + 2;
- memsetw(s->ac_val[0][0] + (1 + (2 * s->mb_y) * wrap)*16,
- 0, 2 * s->mb_width*16);
- wrap = s->mb_width + 2;
- memsetw(s->ac_val[1][0] + (1 + (s->mb_y) * wrap)*16,
- 0, s->mb_width*16);
- memsetw(s->ac_val[2][0] + (1 + (s->mb_y) * wrap)*16,
- 0, s->mb_width*16);
+ msmpeg4_memsetw(&s->dc_val[0][(1) + (2 * s->mb_y) * wrap],
+ 1024, 2 * s->mb_width);
+ wrap = s->mb_width + 2;
+ msmpeg4_memsetw(&s->dc_val[1][(1) + (s->mb_y) * wrap],
+ 1024, s->mb_width);
+ msmpeg4_memsetw(&s->dc_val[2][(1) + (s->mb_y) * wrap],
+ 1024, s->mb_width);
+
+ /* reset AC pred (set previous line to 0) */
+ wrap = s->mb_width * 2 + 2;
+ msmpeg4_memsetw(s->ac_val[0][0] + (1 + (2 * s->mb_y) * wrap)*16,
+ 0, 2 * s->mb_width*16);
+ wrap = s->mb_width + 2;
+ msmpeg4_memsetw(s->ac_val[1][0] + (1 + (s->mb_y) * wrap)*16,
+ 0, s->mb_width*16);
+ msmpeg4_memsetw(s->ac_val[2][0] + (1 + (s->mb_y) * wrap)*16,
+ 0, s->mb_width*16);
s->first_slice_line = 1;
} else {
@@ -1070,7 +1207,7 @@ int msmpeg4_decode_mb(MpegEncContext *s,
}
}
- if(s->msmpeg4_version==2) return msmpeg4v2_decode_mb(s, block); //FIXME merge if possible
+ if(s->msmpeg4_version<=2) return msmpeg4v12_decode_mb(s, block); //FIXME export function & call from outside perhaps
if (s->pict_type == P_TYPE) {
set_stat(ST_INTER_MB);
@@ -1133,10 +1270,11 @@ int msmpeg4_decode_mb(MpegEncContext *s,
for (i = 0; i < 6; i++) {
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
{
- fprintf(stderr,"\nIgnoring error while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
- // return -1;
+ fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
+ return -1;
}
}
+
return 0;
}
@@ -1156,14 +1294,24 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
/* DC coef */
set_stat(ST_DC);
level = msmpeg4_decode_dc(s, n, &dc_pred_dir);
- if (level < 0)
+ if (level < 0){
+ fprintf(stderr, "dc overflow-\n");
return -1;
- block[0] = level;
+ }
if (n < 4) {
rl = &rl_table[s->rl_table_index];
+ if(level > 256*s->y_dc_scale){
+ fprintf(stderr, "dc overflow+\n");
+ return -1;
+ }
} else {
rl = &rl_table[3 + s->rl_chroma_table_index];
+ if(level > 256*s->c_dc_scale){
+ fprintf(stderr, "dc overflow+\n");
+ return -1;
+ }
}
+ block[0] = level;
run_diff = 0;
i = 1;
@@ -1204,16 +1352,42 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
return -1;
if (code == rl->n) {
/* escape */
- if (get_bits1(&s->gb) == 0) {
- if (get_bits1(&s->gb) == 0) {
+ if (s->msmpeg4_version==1 || get_bits1(&s->gb) == 0) {
+ if (s->msmpeg4_version==1 || get_bits1(&s->gb) == 0) {
/* third escape */
last = get_bits1(&s->gb);
run = get_bits(&s->gb, 6);
level = get_bits(&s->gb, 8);
level = (level << 24) >> 24; /* sign extend */
+#if 0 // waste of time / this will detect very few errors
+ {
+ const int abs_level= ABS(level);
+ const int run1= run - rl->max_run[last][abs_level] - run_diff;
+ if(abs_level<=MAX_LEVEL && run<=MAX_RUN){
+ if(abs_level <= rl->max_level[last][run]){
+ fprintf(stderr, "illegal 3. esc, vlc encoding possible\n");
+ return DECODING_AC_LOST;
+ }
+ if(abs_level <= rl->max_level[last][run]*2){
+ fprintf(stderr, "illegal 3. esc, esc 1 encoding possible\n");
+ return DECODING_AC_LOST;
+ }
+ if(abs_level <= rl->max_level[last][run1] && 0){
+ fprintf(stderr, "illegal 3. esc, esc 2 encoding possible\n");
+ return DECODING_AC_LOST;
+ }
+ }
+ }
+#endif
//level = level * qmul + (level>0) * qadd - (level<=0) * qadd ;
if (level>0) level= level * qmul + qadd;
- else level= level * qmul - qadd;
+ else level= level * qmul - qadd;
+#if 0 // waste of time too :(
+ if(level>2048 || level<-2048){
+ fprintf(stderr, "|level| overflow in 3. esc\n");
+ return DECODING_AC_LOST;
+ }
+#endif
} else {
/* second escape */
code = get_vlc(&s->gb, &rl->vlc);
@@ -1250,6 +1424,7 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
i += run;
if (i >= 64)
return -1;
+
j = scan_table[i];
block[j] = level;
i++;
@@ -1271,9 +1446,8 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
{
int level, pred;
- INT16 *dc_val;
- if(s->msmpeg4_version==2){
+ if(s->msmpeg4_version<=2){
if (n < 4) {
level = get_vlc(&s->gb, &v2_dc_lum_vlc);
} else {
@@ -1288,8 +1462,10 @@ static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
} else {
level = get_vlc(&s->gb, &dc_chroma_vlc[s->dc_table_index]);
}
- if (level < 0)
+ if (level < 0){
+ fprintf(stderr, "illegal dc vlc\n");
return -1;
+ }
if (level == DC_MAX) {
level = get_bits(&s->gb, 8);
@@ -1301,14 +1477,24 @@ static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
}
}
- pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr);
- level += pred;
+ if(s->msmpeg4_version==1){
+ INT32 *dc_val;
+ pred = msmpeg4v1_pred_dc(s, n, &dc_val);
+ level += pred;
+
+ /* update predictor */
+ *dc_val= level;
+ }else{
+ INT16 *dc_val;
+ pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr);
+ level += pred;
- /* update predictor */
- if (n < 4) {
- *dc_val = level * s->y_dc_scale;
- } else {
- *dc_val = level * s->c_dc_scale;
+ /* update predictor */
+ if (n < 4) {
+ *dc_val = level * s->y_dc_scale;
+ } else {
+ *dc_val = level * s->c_dc_scale;
+ }
}
return level;
diff --git a/src/libffmpeg/libavcodec/msmpeg4data.h b/src/libffmpeg/libavcodec/msmpeg4data.h
index 9dcb8276f..66e0a3d89 100644
--- a/src/libffmpeg/libavcodec/msmpeg4data.h
+++ b/src/libffmpeg/libavcodec/msmpeg4data.h
@@ -3,7 +3,7 @@
*/
/* intra picture macro block coded block pattern */
-const UINT16 table_mb_intra[64][2] = {
+static const UINT16 table_mb_intra[64][2] = {
{ 0x1, 1 },{ 0x17, 6 },{ 0x9, 5 },{ 0x5, 5 },
{ 0x6, 5 },{ 0x47, 9 },{ 0x20, 7 },{ 0x10, 7 },
{ 0x2, 5 },{ 0x7c, 9 },{ 0x3a, 7 },{ 0x1d, 7 },
@@ -23,7 +23,7 @@ const UINT16 table_mb_intra[64][2] = {
};
/* non intra picture macro block coded block pattern + mb type */
-const UINT32 table_mb_non_intra[128][2] = {
+static const UINT32 table_mb_non_intra[128][2] = {
{ 0x40, 7 },{ 0x13c9, 13 },{ 0x9fd, 12 },{ 0x1fc, 15 },
{ 0x9fc, 12 },{ 0xa83, 18 },{ 0x12d34, 17 },{ 0x83bc, 16 },
{ 0x83a, 12 },{ 0x7f8, 17 },{ 0x3fd, 16 },{ 0x3ff, 16 },
@@ -128,7 +128,7 @@ static const UINT32 table0_dc_chroma[120][2] = {
/* dc table 1 */
-const UINT32 table1_dc_lum[120][2] = {
+static const UINT32 table1_dc_lum[120][2] = {
{ 0x2, 2 },{ 0x3, 2 },{ 0x3, 3 },{ 0x2, 4 },
{ 0x5, 4 },{ 0x1, 5 },{ 0x3, 5 },{ 0x8, 5 },
{ 0x0, 6 },{ 0x5, 6 },{ 0xd, 6 },{ 0xf, 6 },
@@ -161,7 +161,7 @@ const UINT32 table1_dc_lum[120][2] = {
{ 0x1e6964, 26 },{ 0x1e6965, 26 },{ 0x1e6966, 26 },{ 0x1e6967, 26 },
};
-const UINT32 table1_dc_chroma[120][2] = {
+static const UINT32 table1_dc_chroma[120][2] = {
{ 0x0, 2 },{ 0x1, 2 },{ 0x4, 3 },{ 0x7, 3 },
{ 0xb, 4 },{ 0xd, 4 },{ 0x15, 5 },{ 0x28, 6 },
{ 0x30, 6 },{ 0x32, 6 },{ 0x52, 7 },{ 0x62, 7 },
@@ -233,7 +233,7 @@ static const UINT16 table0_vlc[133][2] = {
{ 0x16, 7 },
};
-const INT8 table0_level[132] = {
+static const INT8 table0_level[132] = {
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
1, 2, 3, 4, 5, 6, 7, 8,
@@ -253,7 +253,7 @@ const INT8 table0_level[132] = {
1, 1, 1, 1,
};
-const INT8 table0_run[132] = {
+static const INT8 table0_run[132] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1,
@@ -275,7 +275,7 @@ const INT8 table0_run[132] = {
/* vlc table 1, for intra chroma and P macro blocks */
-const UINT16 table1_vlc[149][2] = {
+static const UINT16 table1_vlc[149][2] = {
{ 0x4, 3 },{ 0x14, 5 },{ 0x17, 7 },{ 0x7f, 8 },
{ 0x154, 9 },{ 0x1f2, 10 },{ 0xbf, 11 },{ 0x65, 12 },
{ 0xaaa, 12 },{ 0x630, 13 },{ 0x1597, 13 },{ 0x3b7, 14 },
@@ -316,7 +316,7 @@ const UINT16 table1_vlc[149][2] = {
{ 0xd, 9 },
};
-const INT8 table1_level[148] = {
+static const INT8 table1_level[148] = {
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 1, 2,
3, 4, 5, 6, 7, 8, 9, 1,
@@ -338,7 +338,7 @@ const INT8 table1_level[148] = {
1, 1, 1, 1,
};
-const INT8 table1_run[148] = {
+static const INT8 table1_run[148] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 2,
@@ -362,7 +362,7 @@ const INT8 table1_run[148] = {
/* third vlc table */
-const UINT16 table2_vlc[186][2] = {
+static const UINT16 table2_vlc[186][2] = {
{ 0x1, 2 },{ 0x5, 3 },{ 0xd, 4 },{ 0x12, 5 },
{ 0xe, 6 },{ 0x15, 7 },{ 0x13, 8 },{ 0x3f, 8 },
{ 0x4b, 9 },{ 0x11f, 9 },{ 0xb8, 10 },{ 0x3e3, 10 },
@@ -412,7 +412,7 @@ const UINT16 table2_vlc[186][2] = {
{ 0x23dc, 14 },{ 0x4a, 9 },
};
-const INT8 table2_level[185] = {
+static const INT8 table2_level[185] = {
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 1, 2, 3, 4, 5,
@@ -439,7 +439,7 @@ const INT8 table2_level[185] = {
1,
};
-const INT8 table2_run[185] = {
+static const INT8 table2_run[185] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1,
@@ -467,7 +467,7 @@ const INT8 table2_run[185] = {
};
/* second non intra vlc table */
-const UINT16 table4_vlc[169][2] = {
+static const UINT16 table4_vlc[169][2] = {
{ 0x0, 3 },{ 0x3, 4 },{ 0xb, 5 },{ 0x14, 6 },
{ 0x3f, 6 },{ 0x5d, 7 },{ 0xa2, 8 },{ 0xac, 9 },
{ 0x16e, 9 },{ 0x20a, 10 },{ 0x2e2, 10 },{ 0x432, 11 },
@@ -513,7 +513,7 @@ const UINT16 table4_vlc[169][2] = {
{ 0x169, 9 },
};
-const INT8 table4_level[168] = {
+static const INT8 table4_level[168] = {
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 1,
@@ -537,7 +537,7 @@ const INT8 table4_level[168] = {
1, 1, 1, 1, 1, 1, 1, 1,
};
-const INT8 table4_run[168] = {
+static const INT8 table4_run[168] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1,
@@ -575,6 +575,11 @@ extern const UINT8 DCtab_chrom[13][2];
extern const UINT8 cbpy_tab[16][2];
extern const UINT8 mvtab[33][2];
+extern const UINT8 intra_MCBPC_code[8];
+extern const UINT8 intra_MCBPC_bits[8];
+
+extern const UINT8 inter_MCBPC_code[8];
+extern const UINT8 inter_MCBPC_bits[8];
#define NB_RL_TABLES 6
@@ -627,7 +632,7 @@ static RLTable rl_table[NB_RL_TABLES] = {
/* motion vector table 0 */
-const UINT16 table0_mv_code[1100] = {
+static const UINT16 table0_mv_code[1100] = {
0x0001, 0x0003, 0x0005, 0x0007, 0x0003, 0x0008, 0x000c, 0x0001,
0x0002, 0x001b, 0x0006, 0x000b, 0x0015, 0x0002, 0x000e, 0x000f,
0x0014, 0x0020, 0x0022, 0x0025, 0x0027, 0x0029, 0x002d, 0x004b,
@@ -768,7 +773,7 @@ const UINT16 table0_mv_code[1100] = {
0x5f0d, 0x5f0e, 0x5f0f, 0x0000,
};
-const UINT8 table0_mv_bits[1100] = {
+static const UINT8 table0_mv_bits[1100] = {
1, 4, 4, 4, 5, 5, 5, 6,
6, 6, 7, 7, 7, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
@@ -909,7 +914,7 @@ const UINT8 table0_mv_bits[1100] = {
17, 17, 17, 8,
};
-const UINT8 table0_mvx[1099] = {
+static const UINT8 table0_mvx[1099] = {
32, 32, 31, 32, 33, 31, 33, 31,
33, 32, 34, 32, 30, 32, 31, 34,
35, 32, 34, 33, 29, 33, 30, 30,
@@ -1050,7 +1055,7 @@ const UINT8 table0_mvx[1099] = {
61, 19, 19,
};
-const UINT8 table0_mvy[1099] = {
+static const UINT8 table0_mvy[1099] = {
32, 31, 32, 33, 32, 31, 31, 33,
33, 34, 32, 30, 32, 35, 34, 31,
32, 29, 33, 30, 32, 34, 33, 31,
@@ -1192,7 +1197,7 @@ const UINT8 table0_mvy[1099] = {
};
/* motion vector table 1 */
-const UINT16 table1_mv_code[1100] = {
+static const UINT16 table1_mv_code[1100] = {
0x0000, 0x0007, 0x0009, 0x000f, 0x000a, 0x0011, 0x001a, 0x001c,
0x0011, 0x0031, 0x0025, 0x002d, 0x002f, 0x006f, 0x0075, 0x0041,
0x004c, 0x004e, 0x005c, 0x0060, 0x0062, 0x0066, 0x0068, 0x0069,
@@ -1333,7 +1338,7 @@ const UINT16 table1_mv_code[1100] = {
0x2473, 0x26a2, 0x26a3, 0x000b,
};
-const UINT8 table1_mv_bits[1100] = {
+static const UINT8 table1_mv_bits[1100] = {
2, 4, 4, 4, 5, 5, 5, 5,
6, 6, 7, 7, 7, 7, 7, 8,
8, 8, 8, 8, 8, 8, 8, 8,
@@ -1474,7 +1479,7 @@ const UINT8 table1_mv_bits[1100] = {
15, 15, 15, 4,
};
-const UINT8 table1_mvx[1099] = {
+static const UINT8 table1_mvx[1099] = {
32, 31, 32, 31, 33, 32, 33, 33,
31, 34, 30, 32, 32, 34, 35, 32,
34, 33, 29, 30, 30, 32, 31, 31,
@@ -1615,7 +1620,7 @@ const UINT8 table1_mvx[1099] = {
0, 12, 27,
};
-const UINT8 table1_mvy[1099] = {
+static const UINT8 table1_mvy[1099] = {
32, 32, 31, 31, 32, 33, 31, 33,
33, 32, 32, 30, 34, 31, 32, 29,
33, 30, 32, 33, 31, 35, 34, 30,
diff --git a/src/libffmpeg/libavcodec/ratecontrol.c b/src/libffmpeg/libavcodec/ratecontrol.c
new file mode 100644
index 000000000..8395eefad
--- /dev/null
+++ b/src/libffmpeg/libavcodec/ratecontrol.c
@@ -0,0 +1,402 @@
+/*
+ * Rate control for video encoders
+ *
+ * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+#define STATS_FILE "lavc_stats.txt"
+
+static int init_pass2(MpegEncContext *s);
+
+void ff_write_pass1_stats(MpegEncContext *s){
+ RateControlContext *rcc= &s->rc_context;
+// fprintf(c->stats_file, "type:%d q:%d icount:%d pcount:%d scount:%d itex:%d ptex%d mv:%d misc:%d fcode:%d bcode:%d\")
+ fprintf(rcc->stats_file, "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d\n",
+ s->picture_number, s->input_picture_number - s->max_b_frames, s->pict_type,
+ s->qscale, s->i_tex_bits, s->p_tex_bits, s->mv_bits, s->misc_bits, s->f_code, s->b_code);
+}
+
+int ff_rate_control_init(MpegEncContext *s)
+{
+ RateControlContext *rcc= &s->rc_context;
+ emms_c();
+
+ if(s->flags&CODEC_FLAG_PASS1){
+ rcc->stats_file= fopen(STATS_FILE, "w");
+ if(!rcc->stats_file){
+ fprintf(stderr, "failed to open " STATS_FILE "\n");
+ return -1;
+ }
+ } else if(s->flags&CODEC_FLAG_PASS2){
+ int size;
+ int i;
+
+ rcc->stats_file= fopen(STATS_FILE, "r");
+ if(!rcc->stats_file){
+ fprintf(stderr, "failed to open " STATS_FILE "\n");
+ return -1;
+ }
+
+ /* find number of pics without reading the file twice :) */
+ fseek(rcc->stats_file, 0, SEEK_END);
+ size= ftell(rcc->stats_file);
+ fseek(rcc->stats_file, 0, SEEK_SET);
+
+ size/= 64; // we need at least 64 byte to store a line ...
+ rcc->entry = (RateControlEntry*)av_mallocz(size*sizeof(RateControlEntry));
+
+ for(i=0; !feof(rcc->stats_file); i++){
+ RateControlEntry *rce;
+ int picture_number;
+ int e;
+
+ e= fscanf(rcc->stats_file, "in:%d ", &picture_number);
+ rce= &rcc->entry[picture_number];
+ e+=fscanf(rcc->stats_file, "out:%*d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%*d bcode:%*d\n",
+ &rce->pict_type, &rce->qscale, &rce->i_tex_bits, &rce->p_tex_bits, &rce->mv_bits, &rce->misc_bits);
+ if(e!=7){
+ fprintf(stderr, STATS_FILE " is damaged\n");
+ return -1;
+ }
+ }
+ rcc->num_entries= i;
+
+ if(init_pass2(s) < 0) return -1;
+ }
+
+ /* no 2pass stuff, just normal 1-pass */
+ //initial values, they dont really matter as they will be totally different within a few frames
+ s->i_pred.coeff= s->p_pred.coeff= 7.0;
+ s->i_pred.count= s->p_pred.count= 1.0;
+
+ s->i_pred.decay= s->p_pred.decay= 0.4;
+
+ // use more bits at the beginning, otherwise high motion at the begin will look like shit
+ s->qsum=100 * s->qmin;
+ s->qcount=100;
+
+ s->short_term_qsum=0.001;
+ s->short_term_qcount=0.001;
+
+ return 0;
+}
+
+void ff_rate_control_uninit(MpegEncContext *s)
+{
+ RateControlContext *rcc= &s->rc_context;
+ emms_c();
+
+ if(rcc->stats_file)
+ fclose(rcc->stats_file);
+ rcc->stats_file = NULL;
+ av_freep(&rcc->entry);
+}
+
+//----------------------------------
+// 1 Pass Code
+
+static double predict(Predictor *p, double q, double var)
+{
+ return p->coeff*var / (q*p->count);
+}
+
+static void update_predictor(Predictor *p, double q, double var, double size)
+{
+ double new_coeff= size*q / (var + 1);
+ if(var<1000) return;
+
+ p->count*= p->decay;
+ p->coeff*= p->decay;
+ p->count++;
+ p->coeff+= new_coeff;
+}
+
+int ff_rate_estimate_qscale(MpegEncContext *s)
+{
+ int qmin= s->qmin;
+ int qmax= s->qmax;
+ int rate_q=5;
+ float q;
+ int qscale;
+ float br_compensation;
+ double diff;
+ double short_term_q;
+ double long_term_q;
+ double fps;
+ int picture_number= s->input_picture_number - s->max_b_frames;
+ int64_t wanted_bits;
+ emms_c();
+
+ fps= (double)s->frame_rate / FRAME_RATE_BASE;
+ wanted_bits= (uint64_t)(s->bit_rate*(double)picture_number/fps);
+// printf("%d %d %d\n", picture_number, (int)wanted_bits, (int)s->total_bits);
+
+ if(s->pict_type==B_TYPE){
+ qmin= (int)(qmin*s->b_quant_factor+s->b_quant_offset + 0.5);
+ qmax= (int)(qmax*s->b_quant_factor+s->b_quant_offset + 0.5);
+ }
+ if(qmin<1) qmin=1;
+ if(qmax>31) qmax=31;
+ if(qmax<=qmin) qmax= qmin;
+
+ /* update predictors */
+ if(picture_number>2){
+ if(s->pict_type!=B_TYPE && s->last_non_b_pict_type == P_TYPE){
+//printf("%d %d %d %f\n", s->qscale, s->last_mc_mb_var, s->frame_bits, s->p_pred.coeff);
+ update_predictor(&s->p_pred, s->last_non_b_qscale, s->last_non_b_mc_mb_var, s->pb_frame_bits);
+ }
+ }
+
+ if(s->pict_type == I_TYPE){
+ short_term_q= s->short_term_qsum/s->short_term_qcount;
+
+ long_term_q= s->qsum/s->qcount*(s->total_bits+1)/(wanted_bits+1); //+1 to avoid nan & 0
+
+ q= 1/((1/long_term_q - 1/short_term_q)*s->qcompress + 1/short_term_q);
+ }else if(s->pict_type==B_TYPE){
+ q= (int)(s->last_non_b_qscale*s->b_quant_factor+s->b_quant_offset + 0.5);
+ }else{ //P Frame
+ int i;
+ int diff, best_diff=1000000000;
+ for(i=1; i<=31; i++){
+ diff= predict(&s->p_pred, i, s->mc_mb_var_sum) - (double)s->bit_rate/fps;
+ if(diff<0) diff= -diff;
+ if(diff<best_diff){
+ best_diff= diff;
+ rate_q= i;
+ }
+ }
+ s->short_term_qsum*=s->qblur;
+ s->short_term_qcount*=s->qblur;
+
+ s->short_term_qsum+= rate_q;
+ s->short_term_qcount++;
+ short_term_q= s->short_term_qsum/s->short_term_qcount;
+
+ long_term_q= s->qsum/s->qcount*(s->total_bits+1)/(wanted_bits+1); //+1 to avoid nan & 0
+
+// q= (long_term_q - short_term_q)*s->qcompress + short_term_q;
+ q= 1/((1/long_term_q - 1/short_term_q)*s->qcompress + 1/short_term_q);
+ }
+
+ diff= s->total_bits - wanted_bits;
+ br_compensation= (s->bit_rate_tolerance - diff)/s->bit_rate_tolerance;
+ if(br_compensation<=0.0) br_compensation=0.001;
+ q/=br_compensation;
+//printf("%f %f %f\n", q, br_compensation, short_term_q);
+ qscale= (int)(q + 0.5);
+ if (qscale<qmin) qscale=qmin;
+ else if(qscale>qmax) qscale=qmax;
+
+ if(s->pict_type!=B_TYPE){
+ s->qsum+= qscale;
+ s->qcount++;
+ if (qscale<s->last_non_b_qscale-s->max_qdiff) qscale=s->last_non_b_qscale-s->max_qdiff;
+ else if(qscale>s->last_non_b_qscale+s->max_qdiff) qscale=s->last_non_b_qscale+s->max_qdiff;
+ }
+//printf("q:%d diff:%d comp:%f rate_q:%d st_q:%f fvar:%d last_size:%d\n", qscale, (int)diff, br_compensation,
+// rate_q, short_term_q, s->mc_mb_var, s->frame_bits);
+//printf("%d %d\n", s->bit_rate, (int)fps);
+ return qscale;
+}
+
+//----------------------------------------------
+// 2-Pass code
+
+static int init_pass2(MpegEncContext *s)
+{
+ RateControlContext *rcc= &s->rc_context;
+ int i;
+ double fps= (double)s->frame_rate / FRAME_RATE_BASE;
+ double complexity[5]={0,0,0,0,0}; // aproximate bits at quant=1
+ double avg_quantizer[5];
+ uint64_t const_bits[5]={0,0,0,0,0}; // quantizer idependant bits
+ uint64_t available_bits[5];
+ uint64_t all_const_bits;
+ uint64_t all_available_bits= (uint64_t)(s->bit_rate*(double)rcc->num_entries/fps);
+ int num_frames[5]={0,0,0,0,0};
+ double rate_factor=0;
+ double step;
+ int last_i_frame=-10000000;
+
+ /* find complexity & const_bits & decide the pict_types */
+ for(i=0; i<rcc->num_entries; i++){
+ RateControlEntry *rce= &rcc->entry[i];
+
+ if(s->b_frame_strategy==0 || s->max_b_frames==0){
+ rce->new_pict_type= rce->pict_type;
+ }else{
+ int j;
+ int next_non_b_type=P_TYPE;
+
+ switch(rce->pict_type){
+ case I_TYPE:
+ if(i-last_i_frame>s->gop_size/2){ //FIXME this is not optimal
+ rce->new_pict_type= I_TYPE;
+ last_i_frame= i;
+ }else{
+ rce->new_pict_type= P_TYPE; // will be caught by the scene detection anyway
+ }
+ break;
+ case P_TYPE:
+ rce->new_pict_type= P_TYPE;
+ break;
+ case B_TYPE:
+ for(j=i+1; j<i+s->max_b_frames+2 && j<rcc->num_entries; j++){
+ if(rcc->entry[j].pict_type != B_TYPE){
+ next_non_b_type= rcc->entry[j].pict_type;
+ break;
+ }
+ }
+ if(next_non_b_type==I_TYPE)
+ rce->new_pict_type= P_TYPE;
+ else
+ rce->new_pict_type= B_TYPE;
+ break;
+ }
+ }
+
+ complexity[rce->new_pict_type]+= (rce->i_tex_bits+ rce->p_tex_bits)*(double)rce->qscale;
+ const_bits[rce->new_pict_type]+= rce->mv_bits + rce->misc_bits;
+ num_frames[rce->new_pict_type]++;
+ }
+ all_const_bits= const_bits[I_TYPE] + const_bits[P_TYPE] + const_bits[B_TYPE];
+
+ if(all_available_bits < all_const_bits){
+ fprintf(stderr, "requested bitrate is to low\n");
+ return -1;
+ }
+
+// avg_complexity= complexity/rcc->num_entries;
+ avg_quantizer[P_TYPE]=
+ avg_quantizer[I_TYPE]= (complexity[I_TYPE]+complexity[P_TYPE] + complexity[B_TYPE]/s->b_quant_factor)
+ / (all_available_bits - all_const_bits);
+ avg_quantizer[B_TYPE]= avg_quantizer[P_TYPE]*s->b_quant_factor + s->b_quant_offset;
+//printf("avg quantizer: %f %f\n", avg_quantizer[P_TYPE], avg_quantizer[B_TYPE]);
+
+ for(i=0; i<5; i++){
+ available_bits[i]= const_bits[i] + complexity[i]/avg_quantizer[i];
+ }
+//printf("%lld %lld %lld %lld\n", available_bits[I_TYPE], available_bits[P_TYPE], available_bits[B_TYPE], all_available_bits);
+
+ for(step=256*256; step>0.0000001; step*=0.5){
+ uint64_t expected_bits=0;
+ rate_factor+= step;
+ /* find qscale */
+ for(i=0; i<rcc->num_entries; i++){
+ RateControlEntry *rce= &rcc->entry[i];
+ double short_term_q, q, bits_left;
+ const int pict_type= rce->new_pict_type;
+ int qmin= s->qmin;
+ int qmax= s->qmax;
+
+ if(pict_type==B_TYPE){
+ qmin= (int)(qmin*s->b_quant_factor+s->b_quant_offset + 0.5);
+ qmax= (int)(qmax*s->b_quant_factor+s->b_quant_offset + 0.5);
+ }
+ if(qmin<1) qmin=1;
+ if(qmax>31) qmax=31;
+ if(qmax<=qmin) qmax= qmin;
+
+ switch(s->rc_strategy){
+ case 0:
+ bits_left= available_bits[pict_type]/num_frames[pict_type]*rate_factor - rce->misc_bits - rce->mv_bits;
+ if(bits_left<1.0) bits_left=1.0;
+ short_term_q= rce->qscale*(rce->i_tex_bits + rce->p_tex_bits)/bits_left;
+ break;
+ case 1:
+ bits_left= (available_bits[pict_type] - const_bits[pict_type])/num_frames[pict_type]*rate_factor;
+ if(bits_left<1.0) bits_left=1.0;
+ short_term_q= rce->qscale*(rce->i_tex_bits + rce->p_tex_bits)/bits_left;
+ break;
+ case 2:
+ bits_left= available_bits[pict_type]/num_frames[pict_type]*rate_factor;
+ if(bits_left<1.0) bits_left=1.0;
+ short_term_q= rce->qscale*(rce->i_tex_bits + rce->p_tex_bits + rce->misc_bits + rce->mv_bits)/bits_left;
+ break;
+ default:
+ fprintf(stderr, "unknown strategy\n");
+ short_term_q=3; //gcc warning fix
+ }
+
+ if(short_term_q>31.0) short_term_q=31.0;
+ else if (short_term_q<1.0) short_term_q=1.0;
+
+ q= 1/((1/avg_quantizer[pict_type] - 1/short_term_q)*s->qcompress + 1/short_term_q);
+ if (q<qmin) q=qmin;
+ else if(q>qmax) q=qmax;
+//printf("lq:%f, sq:%f t:%f q:%f\n", avg_quantizer[rce->pict_type], short_term_q, bits_left, q);
+ rce->new_qscale= q;
+ }
+
+ /* smooth curve */
+
+ /* find expected bits */
+ for(i=0; i<rcc->num_entries; i++){
+ RateControlEntry *rce= &rcc->entry[i];
+ double factor= rce->qscale / rce->new_qscale;
+
+ rce->expected_bits= expected_bits;
+ expected_bits += (int)(rce->misc_bits + rce->mv_bits + (rce->i_tex_bits + rce->p_tex_bits)*factor + 0.5);
+ }
+
+// printf("%d %d %f\n", (int)expected_bits, (int)all_available_bits, rate_factor);
+ if(expected_bits > all_available_bits) rate_factor-= step;
+ }
+
+ return 0;
+}
+
+int ff_rate_estimate_qscale_pass2(MpegEncContext *s)
+{
+ int qmin= s->qmin;
+ int qmax= s->qmax;
+ float q;
+ int qscale;
+ float br_compensation;
+ double diff;
+ int picture_number= s->picture_number;
+ RateControlEntry *rce= &s->rc_context.entry[picture_number];
+ int64_t wanted_bits= rce->expected_bits;
+ emms_c();
+
+// printf("%d %d %d\n", picture_number, (int)wanted_bits, (int)s->total_bits);
+
+ if(s->pict_type==B_TYPE){
+ qmin= (int)(qmin*s->b_quant_factor+s->b_quant_offset + 0.5);
+ qmax= (int)(qmax*s->b_quant_factor+s->b_quant_offset + 0.5);
+ }
+ if(qmin<1) qmin=1;
+ if(qmax>31) qmax=31;
+ if(qmax<=qmin) qmax= qmin;
+
+ q= rce->new_qscale;
+
+ diff= s->total_bits - wanted_bits;
+ br_compensation= (s->bit_rate_tolerance - diff)/s->bit_rate_tolerance;
+ if(br_compensation<=0.0) br_compensation=0.001;
+ q/=br_compensation;
+
+ qscale= (int)(q + 0.5);
+ if (qscale<qmin) qscale=qmin;
+ else if(qscale>qmax) qscale=qmax;
+// printf("%d %d %d %d type:%d\n", qmin, qscale, qmax, picture_number, s->pict_type); fflush(stdout);
+ return qscale;
+}
diff --git a/src/libffmpeg/libavcodec/rv10.c b/src/libffmpeg/libavcodec/rv10.c
index f4ebc9016..261c889de 100644
--- a/src/libffmpeg/libavcodec/rv10.c
+++ b/src/libffmpeg/libavcodec/rv10.c
@@ -1,27 +1,23 @@
/*
* RV10 codec
- * Copyright (c) 2000,2001 Gerard Lantau.
+ * Copyright (c) 2000,2001 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include "common.h"
-#include "dsputil.h"
#include "avcodec.h"
+#include "dsputil.h"
#include "mpegvideo.h"
//#define DEBUG
@@ -337,9 +333,9 @@ static int rv10_decode_picture_header(MpegEncContext *s)
static int rv10_decode_init(AVCodecContext *avctx)
{
MpegEncContext *s = avctx->priv_data;
- int i;
static int done;
+// s->avctx= avctx;
s->out_format = FMT_H263;
s->width = avctx->width;
@@ -351,11 +347,6 @@ static int rv10_decode_init(AVCodecContext *avctx)
if (MPV_common_init(s) < 0)
return -1;
- /* XXX: suppress this matrix init, only needed because using mpeg1
- dequantize in mmx case */
- for(i=0;i<64;i++)
- s->non_intra_matrix[i] = default_non_intra_matrix[i];
-
h263_decode_init_vlc(s);
/* init rv vlc */
@@ -439,9 +430,27 @@ static int rv10_decode_frame(AVCodecContext *avctx,
s->rv10_first_dc_coded[0] = 0;
s->rv10_first_dc_coded[1] = 0;
s->rv10_first_dc_coded[2] = 0;
-
+
+ s->block_wrap[0]=
+ s->block_wrap[1]=
+ s->block_wrap[2]=
+ s->block_wrap[3]= s->mb_width*2 + 2;
+ s->block_wrap[4]=
+ s->block_wrap[5]= s->mb_width + 2;
+ s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
+ s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1) + s->mb_x*2;
+ s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1 + s->mb_x*2;
+ s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2) + s->mb_x*2;
+ s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
+ s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
/* decode each macroblock */
for(i=0;i<mb_count;i++) {
+ s->block_index[0]+=2;
+ s->block_index[1]+=2;
+ s->block_index[2]+=2;
+ s->block_index[3]+=2;
+ s->block_index[4]++;
+ s->block_index[5]++;
#ifdef DEBUG
printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
#endif
@@ -459,6 +468,12 @@ static int rv10_decode_frame(AVCodecContext *avctx,
if (++s->mb_x == s->mb_width) {
s->mb_x = 0;
s->mb_y++;
+ s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1;
+ s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1);
+ s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1;
+ s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2);
+ s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2);
+ s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2);
}
}
diff --git a/src/libffmpeg/libavcodec/simple_idct.c b/src/libffmpeg/libavcodec/simple_idct.c
index a9653b187..0665f667a 100644
--- a/src/libffmpeg/libavcodec/simple_idct.c
+++ b/src/libffmpeg/libavcodec/simple_idct.c
@@ -1,29 +1,29 @@
/*
- Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at)
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-*/
-
+ * Simple IDCT
+ *
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
/*
- based upon some outcommented c code from mpeg2dec (idct_mmx.c written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
-*/
-
-#include <inttypes.h>
-
+ based upon some outcommented c code from mpeg2dec (idct_mmx.c
+ written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
+ */
+#include "avcodec.h"
+#include "dsputil.h"
#include "simple_idct.h"
-#include "../config.h"
#if 0
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
@@ -39,7 +39,7 @@
#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define W4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
@@ -47,65 +47,33 @@
#define COL_SHIFT 20 // 6
#endif
-/* 8x8 Matrix used to do a trivial (slow) 8 point IDCT */
-static int coeff[64]={
- W4, W4, W4, W4, W4, W4, W4, W4,
- W1, W3, W5, W7,-W7,-W5,-W3,-W1,
- W2, W6,-W6,-W2,-W2,-W6, W6, W2,
- W3,-W7,-W1,-W5, W5, W1, W7,-W3,
- W4,-W4,-W4, W4, W4,-W4,-W4, W4,
- W5,-W1, W7, W3,-W3,-W7, W1,-W5,
- W6,-W2, W2,-W6,-W6, W2,-W2, W6,
- W7,-W5, W3,-W1, W1,-W3, W5,-W7
-};
-
-static int inline idctRowCondZ (int16_t * row)
-{
- int a0, a1, a2, a3, b0, b1, b2, b3;
+#ifdef ARCH_ALPHA
+#define FAST_64BIT
+#endif
- if( !( ((uint32_t*)row)[0]|((uint32_t*)row)[1] |((uint32_t*)row)[2] |((uint32_t*)row)[3])) {
-/* row[0] = row[1] = row[2] = row[3] = row[4] =
- row[5] = row[6] = row[7] = 0;*/
- return 0;
- }
+#if defined(ARCH_POWERPC_405)
- if(!( ((uint32_t*)row)[2] |((uint32_t*)row)[3] )){
- a0 = W4*row[0] + W2*row[2] + (1<<(ROW_SHIFT-1));
- a1 = W4*row[0] + W6*row[2] + (1<<(ROW_SHIFT-1));
- a2 = W4*row[0] - W6*row[2] + (1<<(ROW_SHIFT-1));
- a3 = W4*row[0] - W2*row[2] + (1<<(ROW_SHIFT-1));
-
- b0 = W1*row[1] + W3*row[3];
- b1 = W3*row[1] - W7*row[3];
- b2 = W5*row[1] - W1*row[3];
- b3 = W7*row[1] - W5*row[3];
- }else{
- a0 = W4*row[0] + W2*row[2] + W4*row[4] + W6*row[6] + (1<<(ROW_SHIFT-1));
- a1 = W4*row[0] + W6*row[2] - W4*row[4] - W2*row[6] + (1<<(ROW_SHIFT-1));
- a2 = W4*row[0] - W6*row[2] - W4*row[4] + W2*row[6] + (1<<(ROW_SHIFT-1));
- a3 = W4*row[0] - W2*row[2] + W4*row[4] - W6*row[6] + (1<<(ROW_SHIFT-1));
-
- b0 = W1*row[1] + W3*row[3] + W5*row[5] + W7*row[7];
- b1 = W3*row[1] - W7*row[3] - W1*row[5] - W5*row[7];
- b2 = W5*row[1] - W1*row[3] + W7*row[5] + W3*row[7];
- b3 = W7*row[1] - W5*row[3] + W3*row[5] - W1*row[7];
- }
+/* signed 16x16 -> 32 multiply add accumulate */
+#define MAC16(rt, ra, rb) \
+ asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
- row[0] = (a0 + b0) >> ROW_SHIFT;
- row[1] = (a1 + b1) >> ROW_SHIFT;
- row[2] = (a2 + b2) >> ROW_SHIFT;
- row[3] = (a3 + b3) >> ROW_SHIFT;
- row[4] = (a3 - b3) >> ROW_SHIFT;
- row[5] = (a2 - b2) >> ROW_SHIFT;
- row[6] = (a1 - b1) >> ROW_SHIFT;
- row[7] = (a0 - b0) >> ROW_SHIFT;
-
- return 1;
-}
+/* signed 16x16 -> 32 multiply */
+#define MUL16(rt, ra, rb) \
+ asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
+
+#else
+
+/* signed 16x16 -> 32 multiply add accumulate */
+#define MAC16(rt, ra, rb) rt += (ra) * (rb)
+
+/* signed 16x16 -> 32 multiply */
+#define MUL16(rt, ra, rb) rt = (ra) * (rb)
+
+#endif
#ifdef ARCH_ALPHA
/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */
-static int inline idctRowCondDC(int16_t *row)
+static inline int idctRowCondDC(int16_t *row)
{
int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
uint64_t *lrow = (uint64_t *) row;
@@ -129,10 +97,10 @@ static int inline idctRowCondDC(int16_t *row)
}
}
- a0 = W4 * row[0];
- a1 = W4 * row[0];
- a2 = W4 * row[0];
- a3 = W4 * row[0];
+ a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
+ a1 = a0;
+ a2 = a0;
+ a3 = a0;
if (row[2]) {
a0 += W2 * row[2];
@@ -155,11 +123,6 @@ static int inline idctRowCondDC(int16_t *row)
a3 -= W6 * row[6];
}
- a0 += 1 << (ROW_SHIFT - 1);
- a1 += 1 << (ROW_SHIFT - 1);
- a2 += 1 << (ROW_SHIFT - 1);
- a3 += 1 << (ROW_SHIFT - 1);
-
if (row[1]) {
b0 = W1 * row[1];
b1 = W3 * row[1];
@@ -205,38 +168,86 @@ static int inline idctRowCondDC(int16_t *row)
return 2;
}
#else /* not ARCH_ALPHA */
-static int inline idctRowCondDC (int16_t * row)
+
+static inline void idctRowCondDC (int16_t * row)
{
int a0, a1, a2, a3, b0, b1, b2, b3;
+#ifdef FAST_64BIT
+ uint64_t temp;
+#else
+ uint32_t temp;
+#endif
- if( !( ((uint32_t*)row)[1] |((uint32_t*)row)[2] |((uint32_t*)row)[3]| row[1])) {
-// row[0] = row[1] = row[2] = row[3] = row[4] = row[5] = row[6] = row[7] = row[0]<<3;
- uint16_t temp= row[0]<<3;
- ((uint32_t*)row)[0]=((uint32_t*)row)[1]=
- ((uint32_t*)row)[2]=((uint32_t*)row)[3]= temp + (temp<<16);
- return 0;
+#ifdef FAST_64BIT
+#ifdef WORDS_BIGENDIAN
+#define ROW0_MASK 0xffff000000000000LL
+#else
+#define ROW0_MASK 0xffffLL
+#endif
+ if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
+ ((uint64_t *)row)[1]) == 0) {
+ temp = (row[0] << 3) & 0xffff;
+ temp += temp << 16;
+ temp += temp << 32;
+ ((uint64_t *)row)[0] = temp;
+ ((uint64_t *)row)[1] = temp;
+ return;
+ }
+#else
+ if (!(((uint32_t*)row)[1] |
+ ((uint32_t*)row)[2] |
+ ((uint32_t*)row)[3] |
+ row[1])) {
+ temp = (row[0] << 3) & 0xffff;
+ temp += temp << 16;
+ ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
+ ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
+ return;
}
+#endif
- if(!( ((uint32_t*)row)[2] |((uint32_t*)row)[3] )){
- a0 = W4*row[0] + W2*row[2] + (1<<(ROW_SHIFT-1));
- a1 = W4*row[0] + W6*row[2] + (1<<(ROW_SHIFT-1));
- a2 = W4*row[0] - W6*row[2] + (1<<(ROW_SHIFT-1));
- a3 = W4*row[0] - W2*row[2] + (1<<(ROW_SHIFT-1));
-
- b0 = W1*row[1] + W3*row[3];
- b1 = W3*row[1] - W7*row[3];
- b2 = W5*row[1] - W1*row[3];
- b3 = W7*row[1] - W5*row[3];
- }else{
- a0 = W4*row[0] + W2*row[2] + W4*row[4] + W6*row[6] + (1<<(ROW_SHIFT-1));
- a1 = W4*row[0] + W6*row[2] - W4*row[4] - W2*row[6] + (1<<(ROW_SHIFT-1));
- a2 = W4*row[0] - W6*row[2] - W4*row[4] + W2*row[6] + (1<<(ROW_SHIFT-1));
- a3 = W4*row[0] - W2*row[2] + W4*row[4] - W6*row[6] + (1<<(ROW_SHIFT-1));
-
- b0 = W1*row[1] + W3*row[3] + W5*row[5] + W7*row[7];
- b1 = W3*row[1] - W7*row[3] - W1*row[5] - W5*row[7];
- b2 = W5*row[1] - W1*row[3] + W7*row[5] + W3*row[7];
- b3 = W7*row[1] - W5*row[3] + W3*row[5] - W1*row[7];
+ a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
+ a1 = a0;
+ a2 = a0;
+ a3 = a0;
+
+ /* no need to optimize : gcc does it */
+ a0 += W2 * row[2];
+ a1 += W6 * row[2];
+ a2 -= W6 * row[2];
+ a3 -= W2 * row[2];
+
+ MUL16(b0, W1, row[1]);
+ MAC16(b0, W3, row[3]);
+ MUL16(b1, W3, row[1]);
+ MAC16(b1, -W7, row[3]);
+ MUL16(b2, W5, row[1]);
+ MAC16(b2, -W1, row[3]);
+ MUL16(b3, W7, row[1]);
+ MAC16(b3, -W5, row[3]);
+
+#ifdef FAST_64BIT
+ temp = ((uint64_t*)row)[1];
+#else
+ temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
+#endif
+ if (temp != 0) {
+ a0 += W4*row[4] + W6*row[6];
+ a1 += - W4*row[4] - W2*row[6];
+ a2 += - W4*row[4] + W2*row[6];
+ a3 += W4*row[4] - W6*row[6];
+
+ MAC16(b0, W5, row[5]);
+ MAC16(b0, W7, row[7]);
+
+ MAC16(b1, -W1, row[5]);
+ MAC16(b1, -W5, row[7]);
+
+ MAC16(b2, W7, row[5]);
+ MAC16(b2, W3, row[7]);
+
+ MAC16(b3, W3, row[5]);
+ MAC16(b3, -W1, row[7]);
}
row[0] = (a0 + b0) >> ROW_SHIFT;
@@ -247,202 +258,151 @@ static int inline idctRowCondDC (int16_t * row)
row[5] = (a2 - b2) >> ROW_SHIFT;
row[3] = (a3 + b3) >> ROW_SHIFT;
row[4] = (a3 - b3) >> ROW_SHIFT;
-
- return 1;
}
#endif /* not ARCH_ALPHA */
-static void inline idctCol (int16_t * col)
+static inline void idctSparseColPut (UINT8 *dest, int line_size,
+ int16_t * col)
{
-
-/*
- if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) {
- col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] =
- col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3;
- return;
- }*/
-
int a0, a1, a2, a3, b0, b1, b2, b3;
- col[0] += (1<<(COL_SHIFT-1))/W4;
- a0 = W4*col[8*0] + W2*col[8*2] + W4*col[8*4] + W6*col[8*6];
- a1 = W4*col[8*0] + W6*col[8*2] - W4*col[8*4] - W2*col[8*6];
- a2 = W4*col[8*0] - W6*col[8*2] - W4*col[8*4] + W2*col[8*6];
- a3 = W4*col[8*0] - W2*col[8*2] + W4*col[8*4] - W6*col[8*6];
-
- b0 = W1*col[8*1] + W3*col[8*3] + W5*col[8*5] + W7*col[8*7];
- b1 = W3*col[8*1] - W7*col[8*3] - W1*col[8*5] - W5*col[8*7];
- b2 = W5*col[8*1] - W1*col[8*3] + W7*col[8*5] + W3*col[8*7];
- b3 = W7*col[8*1] - W5*col[8*3] + W3*col[8*5] - W1*col[8*7];
-
- col[8*0] = (a0 + b0) >> COL_SHIFT;
- col[8*7] = (a0 - b0) >> COL_SHIFT;
- col[8*1] = (a1 + b1) >> COL_SHIFT;
- col[8*6] = (a1 - b1) >> COL_SHIFT;
- col[8*2] = (a2 + b2) >> COL_SHIFT;
- col[8*5] = (a2 - b2) >> COL_SHIFT;
- col[8*3] = (a3 + b3) >> COL_SHIFT;
- col[8*4] = (a3 - b3) >> COL_SHIFT;
-}
+ UINT8 *cm = cropTbl + MAX_NEG_CROP;
-static void inline idctSparseCol (int16_t * col)
-{
- int a0, a1, a2, a3, b0, b1, b2, b3;
- col[0] += (1<<(COL_SHIFT-1))/W4;
- a0 = W4*col[8*0];
- a1 = W4*col[8*0];
- a2 = W4*col[8*0];
- a3 = W4*col[8*0];
-
- if(col[8*2]){
- a0 += + W2*col[8*2];
- a1 += + W6*col[8*2];
- a2 += - W6*col[8*2];
- a3 += - W2*col[8*2];
- }
+ /* XXX: I did that only to give same values as previous code */
+ a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
+ a1 = a0;
+ a2 = a0;
+ a3 = a0;
- if(col[8*4]){
- a0 += + W4*col[8*4];
- a1 += - W4*col[8*4];
- a2 += - W4*col[8*4];
- a3 += + W4*col[8*4];
- }
-
- if(col[8*6]){
- a0 += + W6*col[8*6];
- a1 += - W2*col[8*6];
- a2 += + W2*col[8*6];
- a3 += - W6*col[8*6];
- }
+ a0 += + W2*col[8*2];
+ a1 += + W6*col[8*2];
+ a2 += - W6*col[8*2];
+ a3 += - W2*col[8*2];
- if(col[8*1]){
- b0 = W1*col[8*1];
- b1 = W3*col[8*1];
- b2 = W5*col[8*1];
- b3 = W7*col[8*1];
- }else{
- b0 =
- b1 =
- b2 =
- b3 = 0;
- }
+ MUL16(b0, W1, col[8*1]);
+ MUL16(b1, W3, col[8*1]);
+ MUL16(b2, W5, col[8*1]);
+ MUL16(b3, W7, col[8*1]);
- if(col[8*3]){
- b0 += + W3*col[8*3];
- b1 += - W7*col[8*3];
- b2 += - W1*col[8*3];
- b3 += - W5*col[8*3];
- }
+ MAC16(b0, + W3, col[8*3]);
+ MAC16(b1, - W7, col[8*3]);
+ MAC16(b2, - W1, col[8*3]);
+ MAC16(b3, - W5, col[8*3]);
- if(col[8*5]){
- b0 += + W5*col[8*5];
- b1 += - W1*col[8*5];
- b2 += + W7*col[8*5];
- b3 += + W3*col[8*5];
+ if(col[8*4]){
+ a0 += + W4*col[8*4];
+ a1 += - W4*col[8*4];
+ a2 += - W4*col[8*4];
+ a3 += + W4*col[8*4];
}
- if(col[8*7]){
- b0 += + W7*col[8*7];
- b1 += - W5*col[8*7];
- b2 += + W3*col[8*7];
- b3 += - W1*col[8*7];
+ if (col[8*5]) {
+ MAC16(b0, + W5, col[8*5]);
+ MAC16(b1, - W1, col[8*5]);
+ MAC16(b2, + W7, col[8*5]);
+ MAC16(b3, + W3, col[8*5]);
}
-#ifndef ARCH_ALPHA
- if(!(b0|b1|b2|b3)){
- col[8*0] = (a0) >> COL_SHIFT;
- col[8*7] = (a0) >> COL_SHIFT;
- col[8*1] = (a1) >> COL_SHIFT;
- col[8*6] = (a1) >> COL_SHIFT;
- col[8*2] = (a2) >> COL_SHIFT;
- col[8*5] = (a2) >> COL_SHIFT;
- col[8*3] = (a3) >> COL_SHIFT;
- col[8*4] = (a3) >> COL_SHIFT;
- }else{
-#endif
- col[8*0] = (a0 + b0) >> COL_SHIFT;
- col[8*7] = (a0 - b0) >> COL_SHIFT;
- col[8*1] = (a1 + b1) >> COL_SHIFT;
- col[8*6] = (a1 - b1) >> COL_SHIFT;
- col[8*2] = (a2 + b2) >> COL_SHIFT;
- col[8*5] = (a2 - b2) >> COL_SHIFT;
- col[8*3] = (a3 + b3) >> COL_SHIFT;
- col[8*4] = (a3 - b3) >> COL_SHIFT;
-#ifndef ARCH_ALPHA
- }
-#endif
+ if(col[8*6]){
+ a0 += + W6*col[8*6];
+ a1 += - W2*col[8*6];
+ a2 += + W2*col[8*6];
+ a3 += - W6*col[8*6];
+ }
+
+ if (col[8*7]) {
+ MAC16(b0, + W7, col[8*7]);
+ MAC16(b1, - W5, col[8*7]);
+ MAC16(b2, + W3, col[8*7]);
+ MAC16(b3, - W1, col[8*7]);
+ }
+
+ dest[0] = cm[(a0 + b0) >> COL_SHIFT];
+ dest += line_size;
+ dest[0] = cm[(a1 + b1) >> COL_SHIFT];
+ dest += line_size;
+ dest[0] = cm[(a2 + b2) >> COL_SHIFT];
+ dest += line_size;
+ dest[0] = cm[(a3 + b3) >> COL_SHIFT];
+ dest += line_size;
+ dest[0] = cm[(a3 - b3) >> COL_SHIFT];
+ dest += line_size;
+ dest[0] = cm[(a2 - b2) >> COL_SHIFT];
+ dest += line_size;
+ dest[0] = cm[(a1 - b1) >> COL_SHIFT];
+ dest += line_size;
+ dest[0] = cm[(a0 - b0) >> COL_SHIFT];
}
-static void inline idctSparse2Col (int16_t * col)
+static inline void idctSparseColAdd (UINT8 *dest, int line_size,
+ int16_t * col)
{
int a0, a1, a2, a3, b0, b1, b2, b3;
- col[0] += (1<<(COL_SHIFT-1))/W4;
- a0 = W4*col[8*0];
- a1 = W4*col[8*0];
- a2 = W4*col[8*0];
- a3 = W4*col[8*0];
-
- if(col[8*2]){
- a0 += + W2*col[8*2];
- a1 += + W6*col[8*2];
- a2 += - W6*col[8*2];
- a3 += - W2*col[8*2];
- }
+ UINT8 *cm = cropTbl + MAX_NEG_CROP;
- if(col[8*4]){
- a0 += + W4*col[8*4];
- a1 += - W4*col[8*4];
- a2 += - W4*col[8*4];
- a3 += + W4*col[8*4];
- }
+ /* XXX: I did that only to give same values as previous code */
+ a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
+ a1 = a0;
+ a2 = a0;
+ a3 = a0;
- if(col[8*6]){
- a0 += + W6*col[8*6];
- a1 += - W2*col[8*6];
- a2 += + W2*col[8*6];
- a3 += - W6*col[8*6];
- }
+ a0 += + W2*col[8*2];
+ a1 += + W6*col[8*2];
+ a2 += - W6*col[8*2];
+ a3 += - W2*col[8*2];
- if(col[8*1] || 1){
- b0 = W1*col[8*1];
- b1 = W3*col[8*1];
- b2 = W5*col[8*1];
- b3 = W7*col[8*1];
- }else{
- b0 =
- b1 =
- b2 =
- b3 = 0;
- }
+ MUL16(b0, W1, col[8*1]);
+ MUL16(b1, W3, col[8*1]);
+ MUL16(b2, W5, col[8*1]);
+ MUL16(b3, W7, col[8*1]);
- if(col[8*3]){
- b0 += + W3*col[8*3];
- b1 += - W7*col[8*3];
- b2 += - W1*col[8*3];
- b3 += - W5*col[8*3];
- }
+ MAC16(b0, + W3, col[8*3]);
+ MAC16(b1, - W7, col[8*3]);
+ MAC16(b2, - W1, col[8*3]);
+ MAC16(b3, - W5, col[8*3]);
- if(col[8*5]){
- b0 += + W5*col[8*5];
- b1 += - W1*col[8*5];
- b2 += + W7*col[8*5];
- b3 += + W3*col[8*5];
+ if(col[8*4]){
+ a0 += + W4*col[8*4];
+ a1 += - W4*col[8*4];
+ a2 += - W4*col[8*4];
+ a3 += + W4*col[8*4];
}
- if(col[8*7]){
- b0 += + W7*col[8*7];
- b1 += - W5*col[8*7];
- b2 += + W3*col[8*7];
- b3 += - W1*col[8*7];
+ if (col[8*5]) {
+ MAC16(b0, + W5, col[8*5]);
+ MAC16(b1, - W1, col[8*5]);
+ MAC16(b2, + W7, col[8*5]);
+ MAC16(b3, + W3, col[8*5]);
}
- col[8*0] = (a0 + b0) >> COL_SHIFT;
- col[8*7] = (a0 - b0) >> COL_SHIFT;
- col[8*1] = (a1 + b1) >> COL_SHIFT;
- col[8*6] = (a1 - b1) >> COL_SHIFT;
- col[8*2] = (a2 + b2) >> COL_SHIFT;
- col[8*5] = (a2 - b2) >> COL_SHIFT;
- col[8*3] = (a3 + b3) >> COL_SHIFT;
- col[8*4] = (a3 - b3) >> COL_SHIFT;
+ if(col[8*6]){
+ a0 += + W6*col[8*6];
+ a1 += - W2*col[8*6];
+ a2 += + W2*col[8*6];
+ a3 += - W6*col[8*6];
+ }
+
+ if (col[8*7]) {
+ MAC16(b0, + W7, col[8*7]);
+ MAC16(b1, - W5, col[8*7]);
+ MAC16(b2, + W3, col[8*7]);
+ MAC16(b3, - W1, col[8*7]);
+ }
+
+ dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)];
+ dest += line_size;
+ dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)];
+ dest += line_size;
+ dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)];
+ dest += line_size;
+ dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)];
+ dest += line_size;
+ dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)];
+ dest += line_size;
+ dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)];
+ dest += line_size;
+ dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)];
+ dest += line_size;
+ dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)];
}
#ifdef ARCH_ALPHA
@@ -472,82 +432,11 @@ static inline void idctCol2(int16_t *col)
lcol[12] = l; lcol[13] = r;
lcol[14] = l; lcol[15] = r;
}
-#endif
void simple_idct (short *block)
{
int i;
-
-#if 0
- int nonZero[8];
- int buffer[64];
- int nNonZero=0;
-
- idctRowCondDC(block);
-
- for(i=1; i<8; i++)
- {
- nonZero[nNonZero]=i;
- nNonZero+= idctRowCondZ(block + i*8);
- }
-
- if(nNonZero==0)
- {
- for(i=0; i<8; i++)
- {
- block[i ]=
- block[i+8 ]=
- block[i+16]=
- block[i+24]=
- block[i+32]=
- block[i+40]=
- block[i+48]=
- block[i+56]= (W4*block[i] + (1<<(COL_SHIFT-1))) >> COL_SHIFT;
- }
- }
- else if(nNonZero==1)
- {
- int index= nonZero[0]*8;
- for(i=0; i<8; i++)
- {
- int bias= W4*block[i] + (1<<(COL_SHIFT-1));
- int c= block[i + index];
- block[i ]= (c*coeff[index ] + bias) >> COL_SHIFT;
- block[i+8 ]= (c*coeff[index+1] + bias) >> COL_SHIFT;
- block[i+16]= (c*coeff[index+2] + bias) >> COL_SHIFT;
- block[i+24]= (c*coeff[index+3] + bias) >> COL_SHIFT;
- block[i+32]= (c*coeff[index+4] + bias) >> COL_SHIFT;
- block[i+40]= (c*coeff[index+5] + bias) >> COL_SHIFT;
- block[i+48]= (c*coeff[index+6] + bias) >> COL_SHIFT;
- block[i+56]= (c*coeff[index+7] + bias) >> COL_SHIFT;
- }
- }
-/* else if(nNonZero==2)
- {
- int index1= nonZero[0]*8;
- int index2= nonZero[1]*8;
- for(i=0; i<8; i++)
- {
- int bias= W4*block[i] + (1<<(COL_SHIFT-1));
- int c1= block[i + index1];
- int c2= block[i + index2];
- block[i ]= (c1*coeff[index1 ] + c2*coeff[index2 ] + bias) >> COL_SHIFT;
- block[i+8 ]= (c1*coeff[index1+1] + c2*coeff[index2+1] + bias) >> COL_SHIFT;
- block[i+16]= (c1*coeff[index1+2] + c2*coeff[index2+2] + bias) >> COL_SHIFT;
- block[i+24]= (c1*coeff[index1+3] + c2*coeff[index2+3] + bias) >> COL_SHIFT;
- block[i+32]= (c1*coeff[index1+4] + c2*coeff[index2+4] + bias) >> COL_SHIFT;
- block[i+40]= (c1*coeff[index1+5] + c2*coeff[index2+5] + bias) >> COL_SHIFT;
- block[i+48]= (c1*coeff[index1+6] + c2*coeff[index2+6] + bias) >> COL_SHIFT;
- block[i+56]= (c1*coeff[index1+7] + c2*coeff[index2+7] + bias) >> COL_SHIFT;
- }
- }*/
- else
- {
- for(i=0; i<8; i++)
- idctSparse2Col(block + i);
- }
-#elif defined(ARCH_ALPHA)
int rowsZero = 1; /* all rows except row 0 zero */
int rowsConstant = 1; /* all rows consist of a constant value */
@@ -579,11 +468,43 @@ void simple_idct (short *block)
for (i = 0; i < 8; i++)
idctSparseCol(block + i);
}
+}
+
+/* XXX: suppress this mess */
+void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
+{
+ simple_idct(block);
+ put_pixels_clamped(block, dest, line_size);
+}
+
+void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
+{
+ simple_idct(block);
+ add_pixels_clamped(block, dest, line_size);
+}
+
#else
- for(i=0; i<8; i++)
- idctRowCondDC(block + i*8);
-
- for(i=0; i<8; i++)
- idctSparseCol(block + i);
-#endif
+
+void simple_idct_put(UINT8 *dest, int line_size, INT16 *block)
+{
+ int i;
+ for(i=0; i<8; i++)
+ idctRowCondDC(block + i*8);
+
+ for(i=0; i<8; i++)
+ idctSparseColPut(dest + i, line_size, block + i);
}
+
+void simple_idct_add(UINT8 *dest, int line_size, INT16 *block)
+{
+ int i;
+ for(i=0; i<8; i++)
+ idctRowCondDC(block + i*8);
+
+ for(i=0; i<8; i++)
+ idctSparseColAdd(dest + i, line_size, block + i);
+}
+
+#endif
+
+#undef COL_SHIFT
diff --git a/src/libffmpeg/libavcodec/simple_idct.h b/src/libffmpeg/libavcodec/simple_idct.h
index 54dff7396..233a7b841 100644
--- a/src/libffmpeg/libavcodec/simple_idct.h
+++ b/src/libffmpeg/libavcodec/simple_idct.h
@@ -1,20 +1,23 @@
/*
- Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at)
+ * Simple IDCT
+ *
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-*/
-
-void simple_idct(short *block);
+void simple_idct_put(UINT8 *dest, int line_size, INT16 *block);
+void simple_idct_add(UINT8 *dest, int line_size, INT16 *block);
void simple_idct_mmx(short *block);
diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c
index 180712314..f6d967757 100644
--- a/src/libffmpeg/libavcodec/utils.c
+++ b/src/libffmpeg/libavcodec/utils.c
@@ -1,35 +1,30 @@
/*
* utils for libavcodec
- * Copyright (c) 2001 Gerard Lantau.
+ * Copyright (c) 2001 Fabrice Bellard.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
*
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#include <stdio.h>
-#include <string.h>
-#include <errno.h>
-#include "common.h"
-#include "dsputil.h"
#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
#ifdef HAVE_MALLOC_H
#include <malloc.h>
-#else
-#include <stdlib.h>
#endif
/* memory alloc */
-void *av_mallocz(int size)
+void *av_malloc(int size)
{
void *ptr;
#if defined ( ARCH_X86 ) && defined ( HAVE_MEMALIGN )
@@ -52,6 +47,31 @@ void *av_mallocz(int size)
return ptr;
}
+void *av_mallocz(int size)
+{
+ void *ptr;
+ ptr = av_malloc(size);
+ if (!ptr)
+ return NULL;
+ memset(ptr, 0, size);
+ return ptr;
+}
+
+/* NOTE: ptr = NULL is explicetly allowed */
+void av_free(void *ptr)
+{
+ /* XXX: this test should not be needed on most libcs */
+ if (ptr)
+ free(ptr);
+}
+
+/* cannot call it directly because of 'void **' casting is not automatic */
+void __av_freep(void **ptr)
+{
+ av_free(*ptr);
+ *ptr = NULL;
+}
+
/* encoder management */
AVCodec *first_avcodec;
@@ -70,13 +90,16 @@ int avcodec_open(AVCodecContext *avctx, AVCodec *codec)
avctx->codec = codec;
avctx->frame_number = 0;
- avctx->priv_data = av_mallocz(codec->priv_data_size);
- if (!avctx->priv_data)
- return -ENOMEM;
+ if (codec->priv_data_size > 0) {
+ avctx->priv_data = av_mallocz(codec->priv_data_size);
+ if (!avctx->priv_data)
+ return -ENOMEM;
+ } else {
+ avctx->priv_data = NULL;
+ }
ret = avctx->codec->init(avctx);
if (ret < 0) {
- free(avctx->priv_data);
- avctx->priv_data = NULL;
+ av_freep(&avctx->priv_data);
return ret;
}
return 0;
@@ -138,8 +161,7 @@ int avcodec_close(AVCodecContext *avctx)
{
if (avctx->codec->close)
avctx->codec->close(avctx);
- free(avctx->priv_data);
- avctx->priv_data = NULL;
+ av_freep(&avctx->priv_data);
avctx->codec = NULL;
return 0;
}
@@ -205,6 +227,7 @@ AVCodec *avcodec_find(enum CodecID id)
}
const char *pix_fmt_str[] = {
+ "??",
"yuv420p",
"yuv422",
"rgb24",
@@ -218,6 +241,7 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
const char *codec_name;
AVCodec *p;
char buf1[32];
+ char channels_str[100];
int bitrate;
if (encode)
@@ -259,19 +283,54 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
enc->width, enc->height,
(float)enc->frame_rate / FRAME_RATE_BASE);
}
+ snprintf(buf + strlen(buf), buf_size - strlen(buf),
+ ", q=%d-%d", enc->qmin, enc->qmax);
+
bitrate = enc->bit_rate;
break;
case CODEC_TYPE_AUDIO:
snprintf(buf, buf_size,
"Audio: %s",
codec_name);
+ switch (enc->channels) {
+ case 1:
+ strcpy(channels_str, "mono");
+ break;
+ case 2:
+ strcpy(channels_str, "stereo");
+ break;
+ case 6:
+ strcpy(channels_str, "5:1");
+ break;
+ default:
+ sprintf(channels_str, "%d channels", enc->channels);
+ break;
+ }
if (enc->sample_rate) {
snprintf(buf + strlen(buf), buf_size - strlen(buf),
", %d Hz, %s",
enc->sample_rate,
- enc->channels == 2 ? "stereo" : "mono");
+ channels_str);
+ }
+
+ /* for PCM codecs, compute bitrate directly */
+ switch(enc->codec_id) {
+ case CODEC_ID_PCM_S16LE:
+ case CODEC_ID_PCM_S16BE:
+ case CODEC_ID_PCM_U16LE:
+ case CODEC_ID_PCM_U16BE:
+ bitrate = enc->sample_rate * enc->channels * 16;
+ break;
+ case CODEC_ID_PCM_S8:
+ case CODEC_ID_PCM_U8:
+ case CODEC_ID_PCM_ALAW:
+ case CODEC_ID_PCM_MULAW:
+ bitrate = enc->sample_rate * enc->channels * 8;
+ break;
+ default:
+ bitrate = enc->bit_rate;
+ break;
}
- bitrate = enc->bit_rate;
break;
default:
abort();
@@ -364,6 +423,15 @@ int avpicture_get_size(int pix_fmt, int width, int height)
return size;
}
+unsigned avcodec_version( void )
+{
+ return LIBAVCODEC_VERSION_INT;
+}
+
+unsigned avcodec_build( void )
+{
+ return LIBAVCODEC_BUILD;
+}
/* must be called before any other functions */
void avcodec_init(void)
@@ -412,6 +480,7 @@ void avcodec_register_all(void)
register_avcodec(&msmpeg4v1_decoder);
register_avcodec(&msmpeg4v2_decoder);
register_avcodec(&msmpeg4v3_decoder);
+ register_avcodec(&wmv1_decoder);
register_avcodec(&mpeg_decoder);
register_avcodec(&h263i_decoder);
register_avcodec(&rv10_decoder);
@@ -423,20 +492,28 @@ void avcodec_register_all(void)
}
-static int encode_init(AVCodecContext *s)
+/* this should be called after seeking and before trying to decode the next frame */
+void avcodec_flush_buffers(AVCodecContext *avctx)
+{
+ MpegEncContext *s = avctx->priv_data;
+ s->num_available_buffers=0;
+}
+
+
+static int raw_encode_init(AVCodecContext *s)
{
return 0;
}
-static int decode_frame(AVCodecContext *avctx,
- void *data, int *data_size,
- UINT8 *buf, int buf_size)
+static int raw_decode_frame(AVCodecContext *avctx,
+ void *data, int *data_size,
+ UINT8 *buf, int buf_size)
{
return -1;
}
-static int encode_frame(AVCodecContext *avctx,
- unsigned char *frame, int buf_size, void *data)
+static int raw_encode_frame(AVCodecContext *avctx,
+ unsigned char *frame, int buf_size, void *data)
{
return -1;
}
@@ -446,8 +523,8 @@ AVCodec rawvideo_codec = {
CODEC_TYPE_VIDEO,
CODEC_ID_RAWVIDEO,
0,
- encode_init,
- encode_frame,
+ raw_encode_init,
+ raw_encode_frame,
NULL,
- decode_frame,
+ raw_decode_frame,
};