From 33c63119cc7398a0c7c8e0a43d98d682591eacb6 Mon Sep 17 00:00:00 2001
From: James Courtier-Dutton <jcdutton@users.sourceforge.net>
Date: Mon, 9 Jun 2003 17:27:51 +0000
Subject: Initial import of libmpeg2 version 0.3.2-cvs

CVS patchset: 5018
CVS date: 2003/06/09 17:27:51
---
 src/libmpeg2new/libmpeg2/Makefile.am           |   19 +
 src/libmpeg2new/libmpeg2/alloc.c               |   76 +
 src/libmpeg2new/libmpeg2/configure.incl        |   25 +
 src/libmpeg2new/libmpeg2/cpu_accel.c           |  182 +++
 src/libmpeg2new/libmpeg2/cpu_state.c           |  129 ++
 src/libmpeg2new/libmpeg2/decode.c              |  445 ++++++
 src/libmpeg2new/libmpeg2/header.c              |  725 ++++++++++
 src/libmpeg2new/libmpeg2/idct.c                |  294 ++++
 src/libmpeg2new/libmpeg2/idct_alpha.c          |  377 +++++
 src/libmpeg2new/libmpeg2/idct_altivec.c        |  260 ++++
 src/libmpeg2new/libmpeg2/idct_mlib.c           |   60 +
 src/libmpeg2new/libmpeg2/idct_mmx.c            |  814 +++++++++++
 src/libmpeg2new/libmpeg2/libmpeg2.pc.in        |   10 +
 src/libmpeg2new/libmpeg2/motion_comp.c         |  129 ++
 src/libmpeg2new/libmpeg2/motion_comp_alpha.c   |  252 ++++
 src/libmpeg2new/libmpeg2/motion_comp_altivec.c | 1009 +++++++++++++
 src/libmpeg2new/libmpeg2/motion_comp_mlib.c    |  190 +++
 src/libmpeg2new/libmpeg2/motion_comp_mmx.c     | 1005 +++++++++++++
 src/libmpeg2new/libmpeg2/mpeg2_internal.h      |  301 ++++
 src/libmpeg2new/libmpeg2/slice.c               | 1808 ++++++++++++++++++++++++
 src/libmpeg2new/libmpeg2/vlc.h                 |  429 ++++++
 21 files changed, 8539 insertions(+)
 create mode 100644 src/libmpeg2new/libmpeg2/Makefile.am
 create mode 100644 src/libmpeg2new/libmpeg2/alloc.c
 create mode 100644 src/libmpeg2new/libmpeg2/configure.incl
 create mode 100644 src/libmpeg2new/libmpeg2/cpu_accel.c
 create mode 100644 src/libmpeg2new/libmpeg2/cpu_state.c
 create mode 100644 src/libmpeg2new/libmpeg2/decode.c
 create mode 100644 src/libmpeg2new/libmpeg2/header.c
 create mode 100644 src/libmpeg2new/libmpeg2/idct.c
 create mode 100644 src/libmpeg2new/libmpeg2/idct_alpha.c
 create mode 100644 src/libmpeg2new/libmpeg2/idct_altivec.c
 create mode 100644 src/libmpeg2new/libmpeg2/idct_mlib.c
 create mode 100644 src/libmpeg2new/libmpeg2/idct_mmx.c
 create mode 100644 src/libmpeg2new/libmpeg2/libmpeg2.pc.in
 create mode 100644 src/libmpeg2new/libmpeg2/motion_comp.c
 create mode 100644 src/libmpeg2new/libmpeg2/motion_comp_alpha.c
 create mode 100644 src/libmpeg2new/libmpeg2/motion_comp_altivec.c
 create mode 100644 src/libmpeg2new/libmpeg2/motion_comp_mlib.c
 create mode 100644 src/libmpeg2new/libmpeg2/motion_comp_mmx.c
 create mode 100644 src/libmpeg2new/libmpeg2/mpeg2_internal.h
 create mode 100644 src/libmpeg2new/libmpeg2/slice.c
 create mode 100644 src/libmpeg2new/libmpeg2/vlc.h

(limited to 'src')

diff --git a/src/libmpeg2new/libmpeg2/Makefile.am b/src/libmpeg2new/libmpeg2/Makefile.am
new file mode 100644
index 000000000..ed9b50e21
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/Makefile.am
@@ -0,0 +1,19 @@
+AM_CFLAGS = $(OPT_CFLAGS) $(LIBMPEG2_CFLAGS)
+
+lib_LTLIBRARIES = libmpeg2.la
+libmpeg2_la_SOURCES = alloc.c header.c decode.c slice.c motion_comp.c idct.c \
+		      motion_comp_mlib.c idct_mlib.c
+libmpeg2_la_LIBADD = libmpeg2arch.la $(LIBMPEG2_LIBS)
+libmpeg2_la_LDFLAGS = -no-undefined
+
+noinst_LTLIBRARIES = libmpeg2arch.la
+libmpeg2arch_la_SOURCES = motion_comp_mmx.c idct_mmx.c \
+			  motion_comp_altivec.c idct_altivec.c \
+			  motion_comp_alpha.c idct_alpha.c \
+			  cpu_accel.c cpu_state.c
+libmpeg2arch_la_CFLAGS = $(OPT_CFLAGS) $(ARCH_OPT_CFLAGS) $(LIBMPEG2_CFLAGS)
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = libmpeg2.pc
+
+EXTRA_DIST = configure.incl vlc.h mpeg2_internal.h
diff --git a/src/libmpeg2new/libmpeg2/alloc.c b/src/libmpeg2new/libmpeg2/alloc.c
new file mode 100644
index 000000000..2e4792e94
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/alloc.c
@@ -0,0 +1,76 @@
+/*
+ * alloc.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2_internal.h"
+
+#if defined(HAVE_MEMALIGN) && !defined(__cplusplus)
+/* some systems have memalign() but no declaration for it */
+void * memalign (size_t align, size_t size);
+#endif
+
+void * (* mpeg2_malloc_hook) (int size, int reason) = NULL;
+int (* mpeg2_free_hook) (void * buf) = NULL;
+
+void * mpeg2_malloc (int size, int reason)
+{
+    char * buf;
+
+    if (mpeg2_malloc_hook) {
+	buf = (char *) mpeg2_malloc_hook (size, reason);
+	if (buf)
+	    return buf;
+    }
+
+#if defined(HAVE_MEMALIGN) && !defined(__cplusplus) && !defined(DEBUG)
+    return memalign (16, size);
+#else
+    buf = (char *) malloc (size + 15 + sizeof (void **));
+    if (buf) {
+	char * align_buf;
+
+	align_buf = buf + 15 + sizeof (void **);
+	align_buf -= (long)align_buf & 15;
+	*(((void **)align_buf) - 1) = buf;
+	return align_buf;
+    }
+    return NULL;
+#endif
+}
+
+void mpeg2_free (void * buf)
+{
+    if (mpeg2_free_hook && mpeg2_free_hook (buf))
+	return;
+
+#if defined(HAVE_MEMALIGN) && !defined(__cplusplus) && !defined(DEBUG)
+    free (buf);
+#else
+    free (*(((void **)buf) - 1));
+#endif
+}
diff --git a/src/libmpeg2new/libmpeg2/configure.incl b/src/libmpeg2new/libmpeg2/configure.incl
new file mode 100644
index 000000000..aa9337774
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/configure.incl
@@ -0,0 +1,25 @@
+AC_SUBST([LIBMPEG2_CFLAGS])
+AC_SUBST([LIBMPEG2_LIBS])
+
+dnl avoid -fPIC when possible
+AC_LIBTOOL_NON_PIC([LIBMPEG2_CFLAGS="$LIBMPEG2_CFLAGS -prefer-non-pic"])
+
+dnl check for cpudetect
+AC_ARG_ENABLE([accel-detect],
+    [  --disable-accel-detect  make a version without accel detection code])
+if test x"$enable_accel_detect" != x"no"; then
+    AC_DEFINE([ACCEL_DETECT],,[autodetect accelerations])
+fi
+
+dnl check for mlib
+AC_ARG_ENABLE([mlib],
+    [  --disable-mlib          make a version not using mediaLib])
+if test x"$enable_mlib" != x"no"; then
+    cflags_save="$CFLAGS"
+    CFLAGS="$OPT_CFLAGS -L/opt/SUNWmlib/lib -R/opt/SUNWmlib/lib $CFLAGS"
+    AC_CHECK_LIB([mlib],[mlib_VideoColorYUV2RGB420],
+        [AC_DEFINE([LIBMPEG2_MLIB],,[libmpeg2 mediaLib support])
+        LIBMPEG2_CFLAGS="$LIBMPEG2_CFLAGS -I/opt/SUNWmlib/include"
+        LIBMPEG2_LIBS="$LIBMPEG2_LIBS -L/opt/SUNWmlib/lib -R/opt/SUNWmlib/lib -lmlib"])
+    CFLAGS="$cflags_save"
+fi
diff --git a/src/libmpeg2new/libmpeg2/cpu_accel.c b/src/libmpeg2new/libmpeg2/cpu_accel.c
new file mode 100644
index 000000000..97e5ea3ca
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/cpu_accel.c
@@ -0,0 +1,182 @@
+/*
+ * cpu_accel.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+
+#ifdef ACCEL_DETECT
+#ifdef ARCH_X86
+static inline uint32_t arch_accel (void)
+{
+    uint32_t eax, ebx, ecx, edx;
+    int AMD;
+    uint32_t caps;
+
+#if !defined(PIC) && !defined(__PIC__)
+#define cpuid(op,eax,ebx,ecx,edx)	\
+    __asm__ ("cpuid"			\
+	     : "=a" (eax),		\
+	       "=b" (ebx),		\
+	       "=c" (ecx),		\
+	       "=d" (edx)		\
+	     : "a" (op)			\
+	     : "cc")
+#else	/* PIC version : save ebx */
+#define cpuid(op,eax,ebx,ecx,edx)	\
+    __asm__ ("push %%ebx\n\t"		\
+	     "cpuid\n\t"		\
+	     "movl %%ebx,%1\n\t"	\
+	     "pop %%ebx"		\
+	     : "=a" (eax),		\
+	       "=r" (ebx),		\
+	       "=c" (ecx),		\
+	       "=d" (edx)		\
+	     : "a" (op)			\
+	     : "cc")
+#endif
+
+    __asm__ ("pushf\n\t"
+	     "pushf\n\t"
+	     "pop %0\n\t"
+	     "movl %0,%1\n\t"
+	     "xorl $0x200000,%0\n\t"
+	     "push %0\n\t"
+	     "popf\n\t"
+	     "pushf\n\t"
+	     "pop %0\n\t"
+	     "popf"
+	     : "=r" (eax),
+	       "=r" (ebx)
+	     :
+	     : "cc");
+
+    if (eax == ebx)		/* no cpuid */
+	return 0;
+
+    cpuid (0x00000000, eax, ebx, ecx, edx);
+    if (!eax)			/* vendor string only */
+	return 0;
+
+    AMD = (ebx == 0x68747541) && (ecx == 0x444d4163) && (edx == 0x69746e65);
+
+    cpuid (0x00000001, eax, ebx, ecx, edx);
+    if (! (edx & 0x00800000))	/* no MMX */
+	return 0;
+
+    caps = MPEG2_ACCEL_X86_MMX;
+    if (edx & 0x02000000)	/* SSE - identical to AMD MMX extensions */
+	caps = MPEG2_ACCEL_X86_MMX | MPEG2_ACCEL_X86_MMXEXT;
+
+    cpuid (0x80000000, eax, ebx, ecx, edx);
+    if (eax < 0x80000001)	/* no extended capabilities */
+	return caps;
+
+    cpuid (0x80000001, eax, ebx, ecx, edx);
+
+    if (edx & 0x80000000)
+	caps |= MPEG2_ACCEL_X86_3DNOW;
+
+    if (AMD && (edx & 0x00400000))	/* AMD MMX extensions */
+	caps |= MPEG2_ACCEL_X86_MMXEXT;
+
+    return caps;
+}
+#endif /* ARCH_X86 */
+
+#ifdef ARCH_PPC
+#include <signal.h>
+#include <setjmp.h>
+
+static sigjmp_buf jmpbuf;
+static volatile sig_atomic_t canjump = 0;
+
+static RETSIGTYPE sigill_handler (int sig)
+{
+    if (!canjump) {
+	signal (sig, SIG_DFL);
+	raise (sig);
+    }
+
+    canjump = 0;
+    siglongjmp (jmpbuf, 1);
+}
+
+static inline uint32_t arch_accel (void)
+{
+    static RETSIGTYPE (* oldsig) (int);
+
+    oldsig = signal (SIGILL, sigill_handler);
+    if (sigsetjmp (jmpbuf, 1)) {
+	signal (SIGILL, oldsig);
+	return 0;
+    }
+
+    canjump = 1;
+
+#ifdef HAVE_ALTIVEC_H	/* gnu */
+#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t"
+#else			/* apple */
+#define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t"
+#endif
+    asm volatile ("mtspr 256, %0\n\t"
+		  VAND (0, 0, 0)
+		  :
+		  : "r" (-1));
+
+    signal (SIGILL, oldsig);
+    return MPEG2_ACCEL_PPC_ALTIVEC;
+}
+#endif /* ARCH_PPC */
+
+#ifdef ARCH_ALPHA
+static inline uint32_t arch_accel (void)
+{
+    uint64_t no_mvi;
+
+    asm volatile ("amask %1, %0"
+		  : "=r" (no_mvi)
+		  : "rI" (256));	/* AMASK_MVI */
+    return no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA |
+					 MPEG2_ACCEL_ALPHA_MVI);
+}
+#endif /* ARCH_ALPHA */
+#endif
+
+uint32_t mpeg2_detect_accel (void)
+{
+    uint32_t accel;
+
+    accel = 0;
+#ifdef ACCEL_DETECT
+#ifdef LIBMPEG2_MLIB
+    accel = MPEG2_ACCEL_MLIB;
+#endif
+#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA)
+    accel |= arch_accel ();
+#endif
+#endif
+    return accel;
+}
diff --git a/src/libmpeg2new/libmpeg2/cpu_state.c b/src/libmpeg2new/libmpeg2/cpu_state.c
new file mode 100644
index 000000000..a94e5fedc
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/cpu_state.c
@@ -0,0 +1,129 @@
+/*
+ * cpu_state.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2_internal.h"
+#include "attributes.h"
+#ifdef ARCH_X86
+#include "mmx.h"
+#endif
+
+void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL;
+void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL;
+
+#ifdef ARCH_X86
+static void state_restore_mmx (cpu_state_t * state)
+{
+    emms ();
+}
+#endif
+
+#ifdef ARCH_PPC
+#ifdef HAVE_ALTIVEC_H	/* gnu */
+#define LI(a,b) "li " #a "," #b "\n\t"
+#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t"
+#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t"
+#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t"
+#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t"
+#else			/* apple */
+#define LI(a,b) "li r" #a "," #b "\n\t"
+#define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t"
+#define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t"
+#define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t"
+#define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t"
+#endif
+
+static void state_save_altivec (cpu_state_t * state)
+{
+    asm (LI (9, 16)
+	 STVX0 (20, 0, 3)
+	 LI (11, 32)
+	 STVX (21, 9, 3)
+	 LI (9, 48)
+	 STVX (22, 11, 3)
+	 LI (11, 64)
+	 STVX (23, 9, 3)
+	 LI (9, 80)
+	 STVX (24, 11, 3)
+	 LI (11, 96)
+	 STVX (25, 9, 3)
+	 LI (9, 112)
+	 STVX (26, 11, 3)
+	 LI (11, 128)
+	 STVX (27, 9, 3)
+	 LI (9, 144)
+	 STVX (28, 11, 3)
+	 LI (11, 160)
+	 STVX (29, 9, 3)
+	 LI (9, 176)
+	 STVX (30, 11, 3)
+	 STVX (31, 9, 3));
+}
+
+static void state_restore_altivec (cpu_state_t * state)
+{
+    asm (LI (9, 16)
+	 LVX0 (20, 0, 3)
+	 LI (11, 32)
+	 LVX (21, 9, 3)
+	 LI (9, 48)
+	 LVX (22, 11, 3)
+	 LI (11, 64)
+	 LVX (23, 9, 3)
+	 LI (9, 80)
+	 LVX (24, 11, 3)
+	 LI (11, 96)
+	 LVX (25, 9, 3)
+	 LI (9, 112)
+	 LVX (26, 11, 3)
+	 LI (11, 128)
+	 LVX (27, 9, 3)
+	 LI (9, 144)
+	 LVX (28, 11, 3)
+	 LI (11, 160)
+	 LVX (29, 9, 3)
+	 LI (9, 176)
+	 LVX (30, 11, 3)
+	 LVX (31, 9, 3));
+}
+#endif
+
+void mpeg2_cpu_state_init (uint32_t accel)
+{
+#ifdef ARCH_X86
+    if (accel & MPEG2_ACCEL_X86_MMX) {
+	mpeg2_cpu_state_restore = state_restore_mmx;
+    }
+#endif
+#ifdef ARCH_PPC
+    if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
+	mpeg2_cpu_state_save = state_save_altivec;
+	mpeg2_cpu_state_restore = state_restore_altivec;
+    }
+#endif
+}
diff --git a/src/libmpeg2new/libmpeg2/decode.c b/src/libmpeg2new/libmpeg2/decode.c
new file mode 100644
index 000000000..df2ca2f28
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/decode.c
@@ -0,0 +1,445 @@
+/*
+ * decode.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <string.h>	/* memcmp/memset, try to remove */
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2_internal.h"
+#include "convert.h"
+
+static int mpeg2_accels = 0;
+
+#define BUFFER_SIZE (1194 * 1024)
+
+const mpeg2_info_t * mpeg2_info (mpeg2dec_t * mpeg2dec)
+{
+    return &(mpeg2dec->info);
+}
+
+static inline int skip_chunk (mpeg2dec_t * mpeg2dec, int bytes)
+{
+    uint8_t * current;
+    uint32_t shift;
+    uint8_t * chunk_ptr;
+    uint8_t * limit;
+    uint8_t byte;
+
+    if (!bytes)
+	return 0;
+
+    current = mpeg2dec->buf_start;
+    shift = mpeg2dec->shift;
+    chunk_ptr = mpeg2dec->chunk_ptr;
+    limit = current + bytes;
+
+    do {
+	byte = *current++;
+	if (shift == 0x00000100) {
+	    int skipped;
+
+	    mpeg2dec->shift = 0xffffff00;
+	    skipped = current - mpeg2dec->buf_start;
+	    mpeg2dec->buf_start = current;
+	    return skipped;
+	}
+	shift = (shift | byte) << 8;
+    } while (current < limit);
+
+    mpeg2dec->shift = shift;
+    mpeg2dec->buf_start = current;
+    return 0;
+}
+
+static inline int copy_chunk (mpeg2dec_t * mpeg2dec, int bytes)
+{
+    uint8_t * current;
+    uint32_t shift;
+    uint8_t * chunk_ptr;
+    uint8_t * limit;
+    uint8_t byte;
+
+    if (!bytes)
+	return 0;
+
+    current = mpeg2dec->buf_start;
+    shift = mpeg2dec->shift;
+    chunk_ptr = mpeg2dec->chunk_ptr;
+    limit = current + bytes;
+
+    do {
+	byte = *current++;
+	if (shift == 0x00000100) {
+	    int copied;
+
+	    mpeg2dec->shift = 0xffffff00;
+	    mpeg2dec->chunk_ptr = chunk_ptr + 1;
+	    copied = current - mpeg2dec->buf_start;
+	    mpeg2dec->buf_start = current;
+	    return copied;
+	}
+	shift = (shift | byte) << 8;
+	*chunk_ptr++ = byte;
+    } while (current < limit);
+
+    mpeg2dec->shift = shift;
+    mpeg2dec->buf_start = current;
+    return 0;
+}
+
+void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end)
+{
+    mpeg2dec->buf_start = start;
+    mpeg2dec->buf_end = end;
+}
+
+int mpeg2_getpos (mpeg2dec_t * mpeg2dec)
+{
+    return mpeg2dec->buf_end - mpeg2dec->buf_start;
+}
+
+static inline mpeg2_state_t seek_chunk (mpeg2dec_t * mpeg2dec)
+{
+    int size, skipped;
+
+    size = mpeg2dec->buf_end - mpeg2dec->buf_start;
+    skipped = skip_chunk (mpeg2dec, size);
+    if (!skipped) {
+	mpeg2dec->bytes_since_pts += size;
+	return STATE_BUFFER;
+    }
+    mpeg2dec->bytes_since_pts += skipped;
+    mpeg2dec->code = mpeg2dec->buf_start[-1];
+    return (mpeg2_state_t)-1;
+}
+
+static mpeg2_state_t seek_header (mpeg2dec_t * mpeg2dec)
+{
+    while (mpeg2dec->code != 0xb3 &&
+	   ((mpeg2dec->code != 0xb7 && mpeg2dec->code != 0xb8 &&
+	     mpeg2dec->code) || mpeg2dec->sequence.width == (unsigned)-1))
+	if (seek_chunk (mpeg2dec) == STATE_BUFFER)
+	    return STATE_BUFFER;
+    mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer;
+    return (mpeg2dec->code ? mpeg2_parse_header (mpeg2dec) :
+	    mpeg2_header_picture_start (mpeg2dec));
+}
+
+mpeg2_state_t mpeg2_seek_sequence (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2dec->sequence.width = (unsigned)-1;
+    return seek_header (mpeg2dec);
+}
+
+#define RECEIVED(code,state) (((state) << 8) + (code))
+
+mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec)
+{
+    int size_buffer, size_chunk, copied;
+
+    if (mpeg2dec->action) {
+	mpeg2_state_t state;
+
+	state = mpeg2dec->action (mpeg2dec);
+	if ((int)state >= 0)
+	    return state;
+    }
+
+    while (1) {
+	while ((unsigned) (mpeg2dec->code - mpeg2dec->first_decode_slice) <
+	       mpeg2dec->nb_decode_slices) {
+	    size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start;
+	    size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE -
+			  mpeg2dec->chunk_ptr);
+	    if (size_buffer <= size_chunk) {
+		copied = copy_chunk (mpeg2dec, size_buffer);
+		if (!copied) {
+		    mpeg2dec->bytes_since_pts += size_buffer;
+		    mpeg2dec->chunk_ptr += size_buffer;
+		    return STATE_BUFFER;
+		}
+	    } else {
+		copied = copy_chunk (mpeg2dec, size_chunk);
+		if (!copied) {
+		    /* filled the chunk buffer without finding a start code */
+		    mpeg2dec->bytes_since_pts += size_chunk;
+		    mpeg2dec->action = seek_chunk;
+		    return STATE_INVALID;
+		}
+	    }
+	    mpeg2dec->bytes_since_pts += copied;
+
+	    mpeg2_slice (&(mpeg2dec->decoder), mpeg2dec->code,
+			 mpeg2dec->chunk_start);
+	    mpeg2dec->code = mpeg2dec->buf_start[-1];
+	    mpeg2dec->chunk_ptr = mpeg2dec->chunk_start;
+	}
+	if ((unsigned) (mpeg2dec->code - 1) >= 0xb0 - 1)
+	    break;
+	if (seek_chunk (mpeg2dec) == STATE_BUFFER)
+	    return STATE_BUFFER;
+    }
+
+    switch (mpeg2dec->code) {
+    case 0x00:
+	mpeg2dec->action = mpeg2_header_picture_start;
+	return mpeg2dec->state;
+    case 0xb7:
+	mpeg2dec->action = mpeg2_header_end;
+	break;
+    case 0xb3:
+    case 0xb8:
+	mpeg2dec->action = mpeg2_parse_header;
+	break;
+    default:
+	mpeg2dec->action = seek_chunk;
+	return STATE_INVALID;
+    }
+    return (mpeg2dec->state == STATE_SLICE) ? STATE_SLICE : STATE_INVALID;
+}
+
+mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec)
+{
+    static int (* process_header[]) (mpeg2dec_t * mpeg2dec) = {
+	mpeg2_header_picture, mpeg2_header_extension, mpeg2_header_user_data,
+	mpeg2_header_sequence, NULL, NULL, NULL, NULL, mpeg2_header_gop
+    };
+    int size_buffer, size_chunk, copied;
+
+    mpeg2dec->action = mpeg2_parse_header;
+    while (1) {
+	size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start;
+	size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE -
+		      mpeg2dec->chunk_ptr);
+	if (size_buffer <= size_chunk) {
+	    copied = copy_chunk (mpeg2dec, size_buffer);
+	    if (!copied) {
+		mpeg2dec->bytes_since_pts += size_buffer;
+		mpeg2dec->chunk_ptr += size_buffer;
+		return STATE_BUFFER;
+	    }
+	} else {
+	    copied = copy_chunk (mpeg2dec, size_chunk);
+	    if (!copied) {
+		/* filled the chunk buffer without finding a start code */
+		mpeg2dec->bytes_since_pts += size_chunk;
+		mpeg2dec->code = 0xb4;
+		mpeg2dec->action = seek_header;
+		return STATE_INVALID;
+	    }
+	}
+	mpeg2dec->bytes_since_pts += copied;
+
+	if (process_header[mpeg2dec->code & 0x0b] (mpeg2dec)) {
+	    mpeg2dec->code = mpeg2dec->buf_start[-1];
+	    mpeg2dec->action = seek_header;
+	    return STATE_INVALID;
+	}
+
+	mpeg2dec->code = mpeg2dec->buf_start[-1];
+	switch (RECEIVED (mpeg2dec->code, mpeg2dec->state)) {
+
+	/* state transition after a sequence header */
+	case RECEIVED (0x00, STATE_SEQUENCE):
+	    mpeg2dec->action = mpeg2_header_picture_start;
+	case RECEIVED (0xb8, STATE_SEQUENCE):
+	    mpeg2_header_sequence_finalize (mpeg2dec);
+	    break;
+
+	/* other legal state transitions */
+	case RECEIVED (0x00, STATE_GOP):
+	    mpeg2dec->action = mpeg2_header_picture_start;
+	    break;
+	case RECEIVED (0x01, STATE_PICTURE):
+	case RECEIVED (0x01, STATE_PICTURE_2ND):
+	    mpeg2_header_matrix_finalize (mpeg2dec);
+	    mpeg2dec->action = mpeg2_header_slice_start;
+	    break;
+
+	/* legal headers within a given state */
+	case RECEIVED (0xb2, STATE_SEQUENCE):
+	case RECEIVED (0xb2, STATE_GOP):
+	case RECEIVED (0xb2, STATE_PICTURE):
+	case RECEIVED (0xb2, STATE_PICTURE_2ND):
+	case RECEIVED (0xb5, STATE_SEQUENCE):
+	case RECEIVED (0xb5, STATE_PICTURE):
+	case RECEIVED (0xb5, STATE_PICTURE_2ND):
+	    mpeg2dec->chunk_ptr = mpeg2dec->chunk_start;
+	    continue;
+
+	default:
+	    mpeg2dec->action = seek_header;
+	    return STATE_INVALID;
+	}
+
+	mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer;
+	return mpeg2dec->state;
+    }
+}
+
+void mpeg2_convert (mpeg2dec_t * mpeg2dec,
+		    void (* convert) (int, int, uint32_t, void *,
+				      struct convert_init_s *), void * arg)
+{
+    convert_init_t convert_init;
+    int size;
+
+    convert_init.id = NULL;
+    convert (mpeg2dec->decoder.width, mpeg2dec->decoder.height,
+	     mpeg2_accels, arg, &convert_init);
+    if (convert_init.id_size) {
+	convert_init.id = mpeg2dec->convert_id =
+	    mpeg2_malloc (convert_init.id_size, ALLOC_CONVERT_ID);
+	convert (mpeg2dec->decoder.width, mpeg2dec->decoder.height,
+		 mpeg2_accels, arg, &convert_init);
+    }
+    mpeg2dec->convert_size[0] = size = convert_init.buf_size[0];
+    mpeg2dec->convert_size[1] = size += convert_init.buf_size[1];
+    mpeg2dec->convert_size[2] = size += convert_init.buf_size[2];
+    mpeg2dec->convert_start = convert_init.start;
+    mpeg2dec->convert_copy = convert_init.copy;
+
+    size = mpeg2dec->decoder.width * mpeg2dec->decoder.height >> 2;
+    mpeg2dec->yuv_buf[0][0] = (uint8_t *) mpeg2_malloc (6 * size, ALLOC_YUV);
+    mpeg2dec->yuv_buf[0][1] = mpeg2dec->yuv_buf[0][0] + 4 * size;
+    mpeg2dec->yuv_buf[0][2] = mpeg2dec->yuv_buf[0][0] + 5 * size;
+    mpeg2dec->yuv_buf[1][0] = (uint8_t *) mpeg2_malloc (6 * size, ALLOC_YUV);
+    mpeg2dec->yuv_buf[1][1] = mpeg2dec->yuv_buf[1][0] + 4 * size;
+    mpeg2dec->yuv_buf[1][2] = mpeg2dec->yuv_buf[1][0] + 5 * size;
+    size = mpeg2dec->decoder.width * 8;
+    mpeg2dec->yuv_buf[2][0] = (uint8_t *) mpeg2_malloc (6 * size, ALLOC_YUV);
+    mpeg2dec->yuv_buf[2][1] = mpeg2dec->yuv_buf[2][0] + 4 * size;
+    mpeg2dec->yuv_buf[2][2] = mpeg2dec->yuv_buf[2][0] + 5 * size;
+}
+
+void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id)
+{
+    mpeg2_fbuf_t * fbuf;
+
+    if (mpeg2dec->custom_fbuf) {
+	mpeg2_set_fbuf (mpeg2dec, mpeg2dec->decoder.coding_type);
+	fbuf = mpeg2dec->fbuf[0];
+	if (mpeg2dec->state == STATE_SEQUENCE) {
+	    mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1];
+	    mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0];
+	}
+    } else {
+	fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index].fbuf);
+	mpeg2dec->alloc_index_user = ++mpeg2dec->alloc_index;
+    }
+    fbuf->buf[0] = buf[0];
+    fbuf->buf[1] = buf[1];
+    fbuf->buf[2] = buf[2];
+    fbuf->id = id;
+}
+
+void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf)
+{
+    mpeg2dec->custom_fbuf = custom_fbuf;
+}
+
+void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip)
+{
+    mpeg2dec->first_decode_slice = 1;
+    mpeg2dec->nb_decode_slices = skip ? 0 : (0xb0 - 1);
+}
+
+void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end)
+{
+    start = (start < 1) ? 1 : (start > 0xb0) ? 0xb0 : start;
+    end = (end < start) ? start : (end > 0xb0) ? 0xb0 : end;
+    mpeg2dec->first_decode_slice = start;
+    mpeg2dec->nb_decode_slices = end - start;
+}
+
+void mpeg2_pts (mpeg2dec_t * mpeg2dec, uint32_t pts)
+{
+    mpeg2dec->pts_previous = mpeg2dec->pts_current;
+    mpeg2dec->pts_current = pts;
+    mpeg2dec->num_pts++;
+    mpeg2dec->bytes_since_pts = 0;
+}
+
+uint32_t mpeg2_accel (uint32_t accel)
+{
+    if (!mpeg2_accels) {
+	if (accel & MPEG2_ACCEL_DETECT)
+	    accel |= mpeg2_detect_accel ();
+	mpeg2_accels = accel |= MPEG2_ACCEL_DETECT;
+	mpeg2_cpu_state_init (accel);
+	mpeg2_idct_init (accel);
+	mpeg2_mc_init (accel);
+    }
+    return mpeg2_accels & ~MPEG2_ACCEL_DETECT;
+}
+
+mpeg2dec_t * mpeg2_init (void)
+{
+    mpeg2dec_t * mpeg2dec;
+
+    mpeg2_accel (MPEG2_ACCEL_DETECT);
+
+    mpeg2dec = (mpeg2dec_t *) mpeg2_malloc (sizeof (mpeg2dec_t),
+					    ALLOC_MPEG2DEC);
+    if (mpeg2dec == NULL)
+	return NULL;
+
+    memset (mpeg2dec, 0, sizeof (mpeg2dec_t));
+
+    mpeg2dec->chunk_buffer = (uint8_t *) mpeg2_malloc (BUFFER_SIZE + 4,
+						       ALLOC_CHUNK);
+
+    mpeg2dec->shift = 0xffffff00;
+    mpeg2dec->action = mpeg2_seek_sequence;
+    mpeg2dec->code = 0xb4;
+    mpeg2dec->first_decode_slice = 1;
+    mpeg2dec->nb_decode_slices = 0xb0 - 1;
+    mpeg2dec->convert_id = NULL;
+
+    /* initialize substructures */
+    mpeg2_header_state_init (mpeg2dec);
+
+    return mpeg2dec;
+}
+
+void mpeg2_close (mpeg2dec_t * mpeg2dec)
+{
+    int i;
+
+    /* static uint8_t finalizer[] = {0,0,1,0xb4}; */
+    /* mpeg2_decode_data (mpeg2dec, finalizer, finalizer+4); */
+
+    mpeg2_free (mpeg2dec->chunk_buffer);
+    if (!mpeg2dec->custom_fbuf)
+	for (i = mpeg2dec->alloc_index_user; i < mpeg2dec->alloc_index; i++)
+	    mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[0]);
+    if (mpeg2dec->convert_start)
+	for (i = 0; i < 3; i++)
+	    mpeg2_free (mpeg2dec->yuv_buf[i][0]);
+    if (mpeg2dec->convert_id)
+	mpeg2_free (mpeg2dec->convert_id);
+    mpeg2_free (mpeg2dec);
+}
diff --git a/src/libmpeg2new/libmpeg2/header.c b/src/libmpeg2new/libmpeg2/header.c
new file mode 100644
index 000000000..55f16f1ee
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/header.c
@@ -0,0 +1,725 @@
+/*
+ * header.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <inttypes.h>
+#include <stdlib.h>	/* defines NULL */
+#include <string.h>	/* memcmp */
+
+#include "mpeg2.h"
+#include "mpeg2_internal.h"
+#include "convert.h"
+#include "attributes.h"
+
+#define SEQ_EXT 2
+#define SEQ_DISPLAY_EXT 4
+#define QUANT_MATRIX_EXT 8
+#define COPYRIGHT_EXT 0x10
+#define PIC_DISPLAY_EXT 0x80
+#define PIC_CODING_EXT 0x100
+
+/* default intra quant matrix, in zig-zag order */
+static const uint8_t default_intra_quantizer_matrix[64] ATTR_ALIGN(16) = {
+    8,
+    16, 16,
+    19, 16, 19,
+    22, 22, 22, 22,
+    22, 22, 26, 24, 26,
+    27, 27, 27, 26, 26, 26,
+    26, 27, 27, 27, 29, 29, 29,
+    34, 34, 34, 29, 29, 29, 27, 27,
+    29, 29, 32, 32, 34, 34, 37,
+    38, 37, 35, 35, 34, 35,
+    38, 38, 40, 40, 40,
+    48, 48, 46, 46,
+    56, 56, 58,
+    69, 69,
+    83
+};
+
+uint8_t mpeg2_scan_norm[64] ATTR_ALIGN(16) = {
+    /* Zig-Zag scan pattern */
+     0,  1,  8, 16,  9,  2,  3, 10, 17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+uint8_t mpeg2_scan_alt[64] ATTR_ALIGN(16) = {
+    /* Alternate scan pattern */
+     0, 8,  16, 24,  1,  9,  2, 10, 17, 25, 32, 40, 48, 56, 57, 49,
+    41, 33, 26, 18,  3, 11,  4, 12, 19, 27, 34, 42, 50, 58, 35, 43,
+    51, 59, 20, 28,  5, 13,  6, 14, 21, 29, 36, 44, 52, 60, 37, 45,
+    53, 61, 22, 30,  7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63
+};
+
+void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2dec->decoder.scan = mpeg2_scan_norm;
+    mpeg2dec->picture = mpeg2dec->pictures;
+    mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf;
+    mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf;
+    mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf;
+    mpeg2dec->first = 1;
+    mpeg2dec->alloc_index = 0;
+    mpeg2dec->alloc_index_user = 0;
+}
+
+static void reset_info (mpeg2_info_t * info)
+{
+    info->current_picture = info->current_picture_2nd = NULL;
+    info->display_picture = info->display_picture_2nd = NULL;
+    info->current_fbuf = info->display_fbuf = info->discard_fbuf = NULL;
+    info->user_data = NULL;	info->user_data_len = 0;
+}
+
+int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
+    static unsigned int frame_period[9] = {
+	0, 1126125, 1125000, 1080000, 900900, 900000, 540000, 450450, 450000
+    };
+    int i;
+
+    if ((buffer[6] & 0x20) != 0x20)	/* missing marker_bit */
+	return 1;
+
+    i = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2];
+    if (! (sequence->display_width = sequence->picture_width = i >> 12))
+	return 1;
+    if (! (sequence->display_height = sequence->picture_height = i & 0xfff))
+	return 1;
+    sequence->width = (sequence->picture_width + 15) & ~15;
+    sequence->height = (sequence->picture_height + 15) & ~15;
+    sequence->chroma_width = sequence->width >> 1;
+    sequence->chroma_height = sequence->height >> 1;
+
+    sequence->flags = (SEQ_FLAG_PROGRESSIVE_SEQUENCE |
+		       SEQ_VIDEO_FORMAT_UNSPECIFIED);
+
+    sequence->pixel_width = buffer[3] >> 4;	/* aspect ratio */
+    sequence->frame_period = 0;
+    if ((buffer[3] & 15) < 9)
+	sequence->frame_period = frame_period[buffer[3] & 15];
+
+    sequence->byte_rate = (buffer[4]<<10) | (buffer[5]<<2) | (buffer[6]>>6);
+
+    sequence->vbv_buffer_size = ((buffer[6]<<16)|(buffer[7]<<8))&0x1ff800;
+
+    if (buffer[7] & 4)
+	sequence->flags |= SEQ_FLAG_CONSTRAINED_PARAMETERS;
+
+    mpeg2dec->copy_matrix = 3;
+    if (buffer[7] & 2) {
+	for (i = 0; i < 64; i++)
+	    mpeg2dec->intra_quantizer_matrix[mpeg2_scan_norm[i]] =
+		(buffer[i+7] << 7) | (buffer[i+8] >> 1);
+	buffer += 64;
+    } else
+	for (i = 0; i < 64; i++)
+	    mpeg2dec->intra_quantizer_matrix[mpeg2_scan_norm[i]] =
+		default_intra_quantizer_matrix[i];
+
+    if (buffer[7] & 1)
+	for (i = 0; i < 64; i++)
+	    mpeg2dec->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] =
+		buffer[i+8];
+    else
+	for (i = 0; i < 64; i++)
+	    mpeg2dec->non_intra_quantizer_matrix[i] = 16;
+
+    sequence->profile_level_id = 0x80;
+    sequence->colour_primaries = 0;
+    sequence->transfer_characteristics = 0;
+    sequence->matrix_coefficients = 0;
+
+    mpeg2dec->ext_state = SEQ_EXT;
+    mpeg2dec->state = STATE_SEQUENCE;
+    mpeg2dec->display_offset_x = mpeg2dec->display_offset_y = 0;
+
+    reset_info (&(mpeg2dec->info));
+    mpeg2dec->info.gop = NULL;
+    return 0;
+}
+
+static int sequence_ext (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
+    uint32_t flags;
+
+    if (!(buffer[3] & 1))
+	return 1;
+
+    sequence->profile_level_id = (buffer[0] << 4) | (buffer[1] >> 4);
+
+    sequence->display_width = sequence->picture_width +=
+	((buffer[1] << 13) | (buffer[2] << 5)) & 0x3000;
+    sequence->display_height = sequence->picture_height +=
+	(buffer[2] << 7) & 0x3000;
+    sequence->width = (sequence->picture_width + 15) & ~15;
+    sequence->height = (sequence->picture_height + 15) & ~15;
+    flags = sequence->flags | SEQ_FLAG_MPEG2;
+    if (!(buffer[1] & 8)) {
+	flags &= ~SEQ_FLAG_PROGRESSIVE_SEQUENCE;
+	sequence->height = (sequence->height + 31) & ~31;
+    }
+    if (buffer[5] & 0x80)
+	flags |= SEQ_FLAG_LOW_DELAY;
+    sequence->flags = flags;
+    sequence->chroma_width = sequence->width;
+    sequence->chroma_height = sequence->height;
+    switch (buffer[1] & 6) {
+    case 0:	/* invalid */
+	return 1;
+    case 2:	/* 4:2:0 */
+	sequence->chroma_height >>= 1;
+    case 4:	/* 4:2:2 */
+	sequence->chroma_width >>= 1;
+    }
+
+    sequence->byte_rate += ((buffer[2]<<25) | (buffer[3]<<17)) & 0x3ffc0000;
+
+    sequence->vbv_buffer_size |= buffer[4] << 21;
+
+    sequence->frame_period =
+	sequence->frame_period * ((buffer[5]&31)+1) / (((buffer[5]>>2)&3)+1);
+
+    mpeg2dec->ext_state = SEQ_DISPLAY_EXT;
+
+    return 0;
+}
+
+static int sequence_display_ext (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
+    uint32_t flags;
+
+    flags = ((sequence->flags & ~SEQ_MASK_VIDEO_FORMAT) |
+	     ((buffer[0]<<4) & SEQ_MASK_VIDEO_FORMAT));
+    if (buffer[0] & 1) {
+	flags |= SEQ_FLAG_COLOUR_DESCRIPTION;
+	sequence->colour_primaries = buffer[1];
+	sequence->transfer_characteristics = buffer[2];
+	sequence->matrix_coefficients = buffer[3];
+	buffer += 3;
+    }
+
+    if (!(buffer[2] & 2))	/* missing marker_bit */
+	return 1;
+
+    sequence->display_width = (buffer[1] << 6) | (buffer[2] >> 2);
+    sequence->display_height =
+	((buffer[2]& 1 ) << 13) | (buffer[3] << 5) | (buffer[4] >> 3);
+
+    return 0;
+}
+
+static inline void finalize_sequence (mpeg2_sequence_t * sequence)
+{
+    int width;
+    int height;
+
+    sequence->byte_rate *= 50;
+
+    if (sequence->flags & SEQ_FLAG_MPEG2) {
+	switch (sequence->pixel_width) {
+	case 1:		/* square pixels */
+	    sequence->pixel_width = sequence->pixel_height = 1;	return;
+	case 2:		/* 4:3 aspect ratio */
+	    width = 4; height = 3;	break;
+	case 3:		/* 16:9 aspect ratio */
+	    width = 16; height = 9;	break;
+	case 4:		/* 2.21:1 aspect ratio */
+	    width = 221; height = 100;	break;
+	default:	/* illegal */
+	    sequence->pixel_width = sequence->pixel_height = 0;	return;
+	}
+	width *= sequence->display_height;
+	height *= sequence->display_width;
+
+    } else {
+	if (sequence->byte_rate == 50 * 0x3ffff) 
+	    sequence->byte_rate = 0;        /* mpeg-1 VBR */ 
+
+	switch (sequence->pixel_width) {
+	case 0:	case 15:	/* illegal */
+	    sequence->pixel_width = sequence->pixel_height = 0;		return;
+	case 1:	/* square pixels */
+	    sequence->pixel_width = sequence->pixel_height = 1;		return;
+	case 3:	/* 720x576 16:9 */
+	    sequence->pixel_width = 64;	sequence->pixel_height = 45;	return;
+	case 6:	/* 720x480 16:9 */
+	    sequence->pixel_width = 32;	sequence->pixel_height = 27;	return;
+	case 12:	/* 720*480 4:3 */
+	    sequence->pixel_width = 8;	sequence->pixel_height = 9;	return;
+	default:
+	    height = 88 * sequence->pixel_width + 1171;
+	    width = 2000;
+	}
+    }
+
+    sequence->pixel_width = width;
+    sequence->pixel_height = height;
+    while (width) {	/* find greatest common divisor */
+	int tmp = width;
+	width = height % tmp;
+	height = tmp;
+    }
+    sequence->pixel_width /= height;
+    sequence->pixel_height /= height;
+}
+
+void mpeg2_header_matrix_finalize (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+    int i;
+
+    if (mpeg2dec->copy_matrix & 1)
+	for (i = 0; i < 64; i++)
+	    decoder->intra_quantizer_matrix[i] =
+		mpeg2dec->intra_quantizer_matrix[i];
+    if (mpeg2dec->copy_matrix & 2)
+	for (i = 0; i < 64; i++)
+	    decoder->non_intra_quantizer_matrix[i] =
+		mpeg2dec->non_intra_quantizer_matrix[i];
+}
+
+void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
+    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+
+    finalize_sequence (sequence);
+
+    mpeg2_header_matrix_finalize (mpeg2dec);
+    decoder->mpeg1 = !(sequence->flags & SEQ_FLAG_MPEG2);
+    decoder->width = sequence->width;
+    decoder->height = sequence->height;
+    decoder->vertical_position_extension = (sequence->picture_height > 2800);
+
+    /*
+     * according to 6.1.1.6, repeat sequence headers should be
+     * identical to the original. However some DVDs dont respect that
+     * and have different bitrates in the repeat sequence headers. So
+     * we'll ignore that in the comparison and still consider these as
+     * repeat sequence headers.
+     */
+    mpeg2dec->sequence.byte_rate = sequence->byte_rate;
+    if (!memcmp (&(mpeg2dec->sequence), sequence, sizeof (mpeg2_sequence_t)))
+	mpeg2dec->state = STATE_SEQUENCE_REPEATED;
+    mpeg2dec->sequence = *sequence;
+
+    mpeg2dec->info.sequence = &(mpeg2dec->sequence);
+}
+
+int mpeg2_header_gop (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    mpeg2_gop_t * gop = &(mpeg2dec->gop);
+
+    reset_info (&(mpeg2dec->info));
+    if (! (buffer[1] & 8))
+	return 1;
+    mpeg2dec->info.gop = gop;
+    gop->hours = (buffer[0] >> 2) & 31;
+    gop->minutes = ((buffer[0] << 4) | (buffer[1] >> 4)) & 63;
+    gop->seconds = ((buffer[1] << 3) | (buffer[2] >> 5)) & 63;
+    gop->pictures = ((buffer[2] << 1) | (buffer[3] >> 7)) & 63;
+    gop->flags = (buffer[0] >> 7) | ((buffer[3] >> 4) & 6);
+    mpeg2dec->state = STATE_GOP;
+    return 0;
+}
+
+void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int coding_type)
+{
+    int i;
+
+    for (i = 0; i < 3; i++)
+	if (mpeg2dec->fbuf[1] != &mpeg2dec->fbuf_alloc[i].fbuf &&
+	    mpeg2dec->fbuf[2] != &mpeg2dec->fbuf_alloc[i].fbuf) {
+	    mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[i].fbuf;
+	    mpeg2dec->info.current_fbuf = mpeg2dec->fbuf[0];
+	    if ((coding_type == B_TYPE) ||
+		(mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) {
+		if ((coding_type == B_TYPE) || (mpeg2dec->convert_start))
+		    mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0];
+		mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0];
+	    }
+	    break;
+	}
+}
+
+mpeg2_state_t mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+    mpeg2_picture_t * picture;
+
+    if (mpeg2dec->state != STATE_SLICE_1ST) {
+	mpeg2dec->state = STATE_PICTURE;
+	picture = mpeg2dec->pictures;
+	if ((decoder->coding_type != PIC_FLAG_CODING_TYPE_B) ^
+	    (mpeg2dec->picture >= mpeg2dec->pictures + 2))
+	    picture += 2;
+    } else {
+	mpeg2dec->state = STATE_PICTURE_2ND;
+	picture = mpeg2dec->picture + 1;	/* second field picture */
+    }
+    mpeg2dec->picture = picture;
+    picture->flags = 0;
+    if (mpeg2dec->num_pts) {
+	if (mpeg2dec->bytes_since_pts >= 4) {
+	    mpeg2dec->num_pts = 0;
+	    picture->pts = mpeg2dec->pts_current;
+	    picture->flags = PIC_FLAG_PTS;
+	} else if (mpeg2dec->num_pts > 1) {
+	    mpeg2dec->num_pts = 1;
+	    picture->pts = mpeg2dec->pts_previous;
+	    picture->flags = PIC_FLAG_PTS;
+	}
+    }
+    picture->display_offset[0].x = picture->display_offset[1].x =
+	picture->display_offset[2].x = mpeg2dec->display_offset_x;
+    picture->display_offset[0].y = picture->display_offset[1].y =
+	picture->display_offset[2].y = mpeg2dec->display_offset_y;
+    return mpeg2_parse_header (mpeg2dec);
+}
+
+int mpeg2_header_picture (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    mpeg2_picture_t * picture = mpeg2dec->picture;
+    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+    int type;
+    int low_delay;
+
+    type = (buffer [1] >> 3) & 7;
+    low_delay = mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY;
+
+    if (mpeg2dec->state == STATE_PICTURE) {
+	mpeg2_picture_t * other;
+
+	decoder->second_field = 0;
+	other = mpeg2dec->pictures;
+	if (other == picture)
+	    other += 2;
+	if (decoder->coding_type != PIC_FLAG_CODING_TYPE_B) {
+	    mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1];
+	    mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0];
+	}
+	mpeg2dec->fbuf[0] = NULL;
+	reset_info (&(mpeg2dec->info));
+	mpeg2dec->info.current_picture = picture;
+	mpeg2dec->info.display_picture = picture;
+	if (type != PIC_FLAG_CODING_TYPE_B) {
+	    if (!low_delay) {
+		if (mpeg2dec->first) {
+		    mpeg2dec->info.display_picture = NULL;
+		    mpeg2dec->first = 0;
+		} else {
+		    mpeg2dec->info.display_picture = other;
+		    if (other->nb_fields == 1)
+			mpeg2dec->info.display_picture_2nd = other + 1;
+		    mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1];
+		}
+	    }
+	    if (!low_delay + !mpeg2dec->convert_start)
+		mpeg2dec->info.discard_fbuf =
+		    mpeg2dec->fbuf[!low_delay + !mpeg2dec->convert_start];
+	}
+	if (!mpeg2dec->custom_fbuf) {
+	    while (mpeg2dec->alloc_index < 3) {
+		mpeg2_fbuf_t * fbuf;
+
+		fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf);
+		fbuf->id = NULL;
+		if (mpeg2dec->convert_start) {    
+		    fbuf->buf[0] =
+			(uint8_t *) mpeg2_malloc (mpeg2dec->convert_size[0],
+						  ALLOC_CONVERTED);
+		    fbuf->buf[1] = fbuf->buf[0] + mpeg2dec->convert_size[1];
+		    fbuf->buf[2] = fbuf->buf[0] + mpeg2dec->convert_size[2];
+		} else {
+		    int size;
+		    size = mpeg2dec->decoder.width * mpeg2dec->decoder.height;
+		    fbuf->buf[0] = (uint8_t *) mpeg2_malloc (6 * size >> 2,
+							     ALLOC_YUV);
+		    fbuf->buf[1] = fbuf->buf[0] + size;
+		    fbuf->buf[2] = fbuf->buf[1] + (size >> 2);
+		}
+	    }
+	    mpeg2_set_fbuf (mpeg2dec, type);
+	}
+    } else {
+	decoder->second_field = 1;
+	mpeg2dec->info.current_picture_2nd = picture;
+	mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0;
+	if (low_delay || type == PIC_FLAG_CODING_TYPE_B)
+	    mpeg2dec->info.display_picture_2nd = picture;
+    }
+    mpeg2dec->ext_state = PIC_CODING_EXT;
+
+    picture->temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6);
+
+    decoder->coding_type = type;
+    picture->flags |= type;
+
+    if (type == PIC_FLAG_CODING_TYPE_P || type == PIC_FLAG_CODING_TYPE_B) {
+	/* forward_f_code and backward_f_code - used in mpeg1 only */
+	decoder->f_motion.f_code[1] = (buffer[3] >> 2) & 1;
+	decoder->f_motion.f_code[0] =
+	    (((buffer[3] << 1) | (buffer[4] >> 7)) & 7) - 1;
+	decoder->b_motion.f_code[1] = (buffer[4] >> 6) & 1;
+	decoder->b_motion.f_code[0] = ((buffer[4] >> 3) & 7) - 1;
+    }
+
+    /* XXXXXX decode extra_information_picture as well */
+
+    picture->nb_fields = 2;
+
+    decoder->intra_dc_precision = 0;
+    decoder->frame_pred_frame_dct = 1;
+    decoder->q_scale_type = 0;
+    decoder->concealment_motion_vectors = 0;
+    decoder->scan = mpeg2_scan_norm;
+    decoder->picture_structure = FRAME_PICTURE;
+    mpeg2dec->copy_matrix = 0;
+
+    return 0;
+}
+
+static int picture_coding_ext (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    mpeg2_picture_t * picture = mpeg2dec->picture;
+    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+    uint32_t flags;
+
+    /* pre subtract 1 for use later in compute_motion_vector */
+    decoder->f_motion.f_code[0] = (buffer[0] & 15) - 1;
+    decoder->f_motion.f_code[1] = (buffer[1] >> 4) - 1;
+    decoder->b_motion.f_code[0] = (buffer[1] & 15) - 1;
+    decoder->b_motion.f_code[1] = (buffer[2] >> 4) - 1;
+
+    flags = picture->flags;
+    decoder->intra_dc_precision = (buffer[2] >> 2) & 3;
+    decoder->picture_structure = buffer[2] & 3;
+    switch (decoder->picture_structure) {
+    case TOP_FIELD:
+	flags |= PIC_FLAG_TOP_FIELD_FIRST;
+    case BOTTOM_FIELD:
+	picture->nb_fields = 1;
+	break;
+    case FRAME_PICTURE:
+	if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) {
+	    picture->nb_fields = (buffer[3] & 2) ? 3 : 2;
+	    flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0;
+	} else
+	    picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2;
+	break;
+    default:
+	return 1;
+    }
+    decoder->top_field_first = buffer[3] >> 7;
+    decoder->frame_pred_frame_dct = (buffer[3] >> 6) & 1;
+    decoder->concealment_motion_vectors = (buffer[3] >> 5) & 1;
+    decoder->q_scale_type = (buffer[3] >> 4) & 1;
+    decoder->intra_vlc_format = (buffer[3] >> 3) & 1;
+    decoder->scan = (buffer[3] & 4) ? mpeg2_scan_alt : mpeg2_scan_norm;
+    flags |= (buffer[4] & 0x80) ? PIC_FLAG_PROGRESSIVE_FRAME : 0;
+    if (buffer[4] & 0x40)
+	flags |= (((buffer[4]<<26) | (buffer[5]<<18) | (buffer[6]<<10)) &
+		  PIC_MASK_COMPOSITE_DISPLAY) | PIC_FLAG_COMPOSITE_DISPLAY;
+    picture->flags = flags;
+
+    mpeg2dec->ext_state = PIC_DISPLAY_EXT | COPYRIGHT_EXT | QUANT_MATRIX_EXT;
+
+    return 0;
+}
+
+static int picture_display_ext (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    mpeg2_picture_t * picture = mpeg2dec->picture;
+    int i, nb_pos;
+
+    nb_pos = picture->nb_fields;
+    if (mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)
+	nb_pos >>= 1;
+
+    for (i = 0; i < nb_pos; i++) {
+	int x, y;
+
+	x = ((buffer[4*i] << 24) | (buffer[4*i+1] << 16) |
+	     (buffer[4*i+2] << 8) | buffer[4*i+3]) >> (11-2*i);
+	y = ((buffer[4*i+2] << 24) | (buffer[4*i+3] << 16) |
+	     (buffer[4*i+4] << 8) | buffer[4*i+5]) >> (10-2*i);
+	if (! (x & y & 1))
+	    return 1;
+	picture->display_offset[i].x = mpeg2dec->display_offset_x = x >> 1;
+	picture->display_offset[i].y = mpeg2dec->display_offset_y = y >> 1;
+    }
+    for (; i < 3; i++) {
+	picture->display_offset[i].x = mpeg2dec->display_offset_x;
+	picture->display_offset[i].y = mpeg2dec->display_offset_y;
+    }
+    return 0;
+}
+
+static int copyright_ext (mpeg2dec_t * mpeg2dec)
+{
+    return 0;
+}
+
+static int quant_matrix_ext (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    int i;
+
+    if (buffer[0] & 8) {
+	for (i = 0; i < 64; i++)
+	    mpeg2dec->intra_quantizer_matrix[mpeg2_scan_norm[i]] =
+		(buffer[i] << 5) | (buffer[i+1] >> 3);
+	mpeg2dec->copy_matrix |= 1;
+	buffer += 64;
+    }
+
+    if (buffer[0] & 4) {
+	for (i = 0; i < 64; i++)
+	    mpeg2dec->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] =
+		(buffer[i] << 6) | (buffer[i+1] >> 2);
+	mpeg2dec->copy_matrix |= 2;
+    }
+
+    return 0;
+}
+
+int mpeg2_header_extension (mpeg2dec_t * mpeg2dec)
+{
+    static int (* parser[]) (mpeg2dec_t *) = {
+	0, sequence_ext, sequence_display_ext, quant_matrix_ext,
+	copyright_ext, 0, 0, picture_display_ext, picture_coding_ext
+    };
+    int ext, ext_bit;
+
+    ext = mpeg2dec->chunk_start[0] >> 4;
+    ext_bit = 1 << ext;
+
+    if (!(mpeg2dec->ext_state & ext_bit))
+	return 0;	/* ignore illegal extensions */
+    mpeg2dec->ext_state &= ~ext_bit;
+    return parser[ext] (mpeg2dec);
+}
+
+int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec)
+{
+    if (!mpeg2dec->info.user_data_len)
+	mpeg2dec->info.user_data = mpeg2dec->chunk_start;
+    else
+	mpeg2dec->info.user_data_len += 3;
+    mpeg2dec->info.user_data_len += (mpeg2dec->chunk_ptr - 4 -
+				     mpeg2dec->chunk_start);
+    mpeg2dec->chunk_start = mpeg2dec->chunk_ptr - 1;
+    
+    return 0;
+}
+
+mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0;
+    mpeg2dec->state = ((mpeg2dec->picture->nb_fields > 1 ||
+			mpeg2dec->state == STATE_PICTURE_2ND) ?
+		       STATE_SLICE : STATE_SLICE_1ST);
+
+    if (!(mpeg2dec->nb_decode_slices))
+	mpeg2dec->picture->flags |= PIC_FLAG_SKIP;
+    else if (mpeg2dec->convert_start) {
+	int flags;
+
+	switch (mpeg2dec->decoder.picture_structure) {
+	case TOP_FIELD:		flags = CONVERT_TOP_FIELD;	break;
+	case BOTTOM_FIELD:	flags = CONVERT_BOTTOM_FIELD;	break;
+	default:
+	    flags =
+		((mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE) ?
+		 CONVERT_FRAME : CONVERT_BOTH_FIELDS);
+	}
+	mpeg2dec->convert_start (mpeg2dec->convert_id,
+				 mpeg2dec->fbuf[0]->buf, flags);
+
+	mpeg2dec->decoder.convert = mpeg2dec->convert_copy;
+	mpeg2dec->decoder.fbuf_id = mpeg2dec->convert_id;
+
+	if (mpeg2dec->decoder.coding_type == B_TYPE)
+	    mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->yuv_buf[2],
+			     mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1],
+			     mpeg2dec->yuv_buf[mpeg2dec->yuv_index]);
+	else {
+	    mpeg2_init_fbuf (&(mpeg2dec->decoder),
+			     mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1],
+			     mpeg2dec->yuv_buf[mpeg2dec->yuv_index],
+			     mpeg2dec->yuv_buf[mpeg2dec->yuv_index]);
+	    if (mpeg2dec->state == STATE_SLICE)
+		mpeg2dec->yuv_index ^= 1;
+	}
+    } else {
+	int b_type;
+
+	mpeg2dec->decoder.convert = NULL;
+	b_type = (mpeg2dec->decoder.coding_type == B_TYPE);
+	mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->fbuf[0]->buf,
+			 mpeg2dec->fbuf[b_type + 1]->buf,
+			 mpeg2dec->fbuf[b_type]->buf);
+    }
+    mpeg2dec->action = NULL;
+    return (mpeg2_state_t)-1;
+}
+
+mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2_picture_t * picture;
+    int b_type;
+
+    b_type = (mpeg2dec->decoder.coding_type == B_TYPE);
+    picture = mpeg2dec->pictures;
+    if ((mpeg2dec->picture >= picture + 2) ^ b_type)
+	picture = mpeg2dec->pictures + 2;
+
+    mpeg2dec->state = STATE_END;
+    reset_info (&(mpeg2dec->info));
+    if (!(mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) {
+	mpeg2dec->info.display_picture = picture;
+	if (picture->nb_fields == 1)
+	    mpeg2dec->info.display_picture_2nd = picture + 1;
+	mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[b_type];
+	if (!mpeg2dec->convert_start)
+	    mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type + 1];
+    } else if (!mpeg2dec->convert_start)
+	mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type];
+    mpeg2dec->action = mpeg2_seek_sequence;
+    mpeg2dec->first = 1;
+    return STATE_END;
+}
diff --git a/src/libmpeg2new/libmpeg2/idct.c b/src/libmpeg2new/libmpeg2/idct.c
new file mode 100644
index 000000000..932efcf2a
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/idct.c
@@ -0,0 +1,294 @@
+/*
+ * idct.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2_internal.h"
+#include "attributes.h"
+
+#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */
+#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */
+#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */
+#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */
+#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */
+#define W7 565  /* 2048 * sqrt (2) * cos (7 * pi / 16) */
+
+/* idct main entry point  */
+void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
+void (* mpeg2_idct_add) (int last, int16_t * block,
+			 uint8_t * dest, int stride);
+
+/*
+ * In legal streams, the IDCT output should be between -384 and +384.
+ * In corrupted streams, it is possible to force the IDCT output to go
+ * to +-3826 - this is the worst case for a column IDCT where the
+ * column inputs are 16-bit values.
+ */
+uint8_t mpeg2_clip[3840 * 2 + 256];
+#define CLIP(i) ((mpeg2_clip + 3840)[i])
+
+#if 0
+#define BUTTERFLY(t0,t1,W0,W1,d0,d1)	\
+do {					\
+    t0 = W0 * d0 + W1 * d1;		\
+    t1 = W0 * d1 - W1 * d0;		\
+} while (0)
+#else
+#define BUTTERFLY(t0,t1,W0,W1,d0,d1)	\
+do {					\
+    int tmp = W0 * (d0 + d1);		\
+    t0 = tmp + (W1 - W0) * d1;		\
+    t1 = tmp - (W1 + W0) * d0;		\
+} while (0)
+#endif
+
+static void inline idct_row (int16_t * const block)
+{
+    int d0, d1, d2, d3;
+    int a0, a1, a2, a3, b0, b1, b2, b3;
+    int t0, t1, t2, t3;
+
+    /* shortcut */
+    if (likely (!(block[1] | ((int32_t *)block)[1] | ((int32_t *)block)[2] |
+		  ((int32_t *)block)[3]))) {
+	uint32_t tmp = (uint16_t) (block[0] << 3);
+	tmp |= tmp << 16;
+	((int32_t *)block)[0] = tmp;
+	((int32_t *)block)[1] = tmp;
+	((int32_t *)block)[2] = tmp;
+	((int32_t *)block)[3] = tmp;
+	return;
+    }
+
+    d0 = (block[0] << 11) + 128;
+    d1 = block[1];
+    d2 = block[2] << 11;
+    d3 = block[3];
+    t0 = d0 + d2;
+    t1 = d0 - d2;
+    BUTTERFLY (t2, t3, W6, W2, d3, d1);
+    a0 = t0 + t2;
+    a1 = t1 + t3;
+    a2 = t1 - t3;
+    a3 = t0 - t2;
+
+    d0 = block[4];
+    d1 = block[5];
+    d2 = block[6];
+    d3 = block[7];
+    BUTTERFLY (t0, t1, W7, W1, d3, d0);
+    BUTTERFLY (t2, t3, W3, W5, d1, d2);
+    b0 = t0 + t2;
+    b3 = t1 + t3;
+    t0 -= t2;
+    t1 -= t3;
+    b1 = ((t0 + t1) * 181) >> 8;
+    b2 = ((t0 - t1) * 181) >> 8;
+
+    block[0] = (a0 + b0) >> 8;
+    block[1] = (a1 + b1) >> 8;
+    block[2] = (a2 + b2) >> 8;
+    block[3] = (a3 + b3) >> 8;
+    block[4] = (a3 - b3) >> 8;
+    block[5] = (a2 - b2) >> 8;
+    block[6] = (a1 - b1) >> 8;
+    block[7] = (a0 - b0) >> 8;
+}
+
+static void inline idct_col (int16_t * const block)
+{
+    int d0, d1, d2, d3;
+    int a0, a1, a2, a3, b0, b1, b2, b3;
+    int t0, t1, t2, t3;
+
+    d0 = (block[8*0] << 11) + 65536;
+    d1 = block[8*1];
+    d2 = block[8*2] << 11;
+    d3 = block[8*3];
+    t0 = d0 + d2;
+    t1 = d0 - d2;
+    BUTTERFLY (t2, t3, W6, W2, d3, d1);
+    a0 = t0 + t2;
+    a1 = t1 + t3;
+    a2 = t1 - t3;
+    a3 = t0 - t2;
+
+    d0 = block[8*4];
+    d1 = block[8*5];
+    d2 = block[8*6];
+    d3 = block[8*7];
+    BUTTERFLY (t0, t1, W7, W1, d3, d0);
+    BUTTERFLY (t2, t3, W3, W5, d1, d2);
+    b0 = t0 + t2;
+    b3 = t1 + t3;
+    t0 = (t0 - t2) >> 8;
+    t1 = (t1 - t3) >> 8;
+    b1 = (t0 + t1) * 181;
+    b2 = (t0 - t1) * 181;
+
+    block[8*0] = (a0 + b0) >> 17;
+    block[8*1] = (a1 + b1) >> 17;
+    block[8*2] = (a2 + b2) >> 17;
+    block[8*3] = (a3 + b3) >> 17;
+    block[8*4] = (a3 - b3) >> 17;
+    block[8*5] = (a2 - b2) >> 17;
+    block[8*6] = (a1 - b1) >> 17;
+    block[8*7] = (a0 - b0) >> 17;
+}
+
+static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest,
+			       const int stride)
+{
+    int i;
+
+    for (i = 0; i < 8; i++)
+	idct_row (block + 8 * i);
+    for (i = 0; i < 8; i++)
+	idct_col (block + i);
+    do {
+	dest[0] = CLIP (block[0]);
+	dest[1] = CLIP (block[1]);
+	dest[2] = CLIP (block[2]);
+	dest[3] = CLIP (block[3]);
+	dest[4] = CLIP (block[4]);
+	dest[5] = CLIP (block[5]);
+	dest[6] = CLIP (block[6]);
+	dest[7] = CLIP (block[7]);
+
+	block[0] = 0;	block[1] = 0;	block[2] = 0;	block[3] = 0;
+	block[4] = 0;	block[5] = 0;	block[6] = 0;	block[7] = 0;
+
+	dest += stride;
+	block += 8;
+    } while (--i);
+}
+
+static void mpeg2_idct_add_c (const int last, int16_t * block,
+			      uint8_t * dest, const int stride)
+{
+    int i;
+
+    if (last != 129 || (block[0] & 7) == 4) {
+	for (i = 0; i < 8; i++)
+	    idct_row (block + 8 * i);
+	for (i = 0; i < 8; i++)
+	    idct_col (block + i);
+	do {
+	    dest[0] = CLIP (block[0] + dest[0]);
+	    dest[1] = CLIP (block[1] + dest[1]);
+	    dest[2] = CLIP (block[2] + dest[2]);
+	    dest[3] = CLIP (block[3] + dest[3]);
+	    dest[4] = CLIP (block[4] + dest[4]);
+	    dest[5] = CLIP (block[5] + dest[5]);
+	    dest[6] = CLIP (block[6] + dest[6]);
+	    dest[7] = CLIP (block[7] + dest[7]);
+
+	    block[0] = 0;	block[1] = 0;	block[2] = 0;	block[3] = 0;
+	    block[4] = 0;	block[5] = 0;	block[6] = 0;	block[7] = 0;
+
+	    dest += stride;
+	    block += 8;
+	} while (--i);
+    } else {
+	int DC;
+
+	DC = (block[0] + 4) >> 3;
+	block[0] = block[63] = 0;
+	i = 8;
+	do {
+	    dest[0] = CLIP (DC + dest[0]);
+	    dest[1] = CLIP (DC + dest[1]);
+	    dest[2] = CLIP (DC + dest[2]);
+	    dest[3] = CLIP (DC + dest[3]);
+	    dest[4] = CLIP (DC + dest[4]);
+	    dest[5] = CLIP (DC + dest[5]);
+	    dest[6] = CLIP (DC + dest[6]);
+	    dest[7] = CLIP (DC + dest[7]);
+	    dest += stride;
+	} while (--i);
+    }
+}
+
+void mpeg2_idct_init (uint32_t accel)
+{
+#ifdef ARCH_X86
+    if (accel & MPEG2_ACCEL_X86_MMXEXT) {
+	mpeg2_idct_copy = mpeg2_idct_copy_mmxext;
+	mpeg2_idct_add = mpeg2_idct_add_mmxext;
+	mpeg2_idct_mmx_init ();
+    } else if (accel & MPEG2_ACCEL_X86_MMX) {
+	mpeg2_idct_copy = mpeg2_idct_copy_mmx;
+	mpeg2_idct_add = mpeg2_idct_add_mmx;
+	mpeg2_idct_mmx_init ();
+    } else
+#endif
+#ifdef ARCH_PPC
+    if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
+	mpeg2_idct_copy = mpeg2_idct_copy_altivec;
+	mpeg2_idct_add = mpeg2_idct_add_altivec;
+	mpeg2_idct_altivec_init ();
+    } else
+#endif
+#ifdef ARCH_ALPHA
+    if (accel & MPEG2_ACCEL_ALPHA_MVI) {
+	mpeg2_idct_copy = mpeg2_idct_copy_mvi;
+	mpeg2_idct_add = mpeg2_idct_add_mvi;
+	mpeg2_idct_alpha_init ();
+    } else if (accel & MPEG2_ACCEL_ALPHA) {
+	int i;
+
+	mpeg2_idct_copy = mpeg2_idct_copy_alpha;
+	mpeg2_idct_add = mpeg2_idct_add_alpha;
+	mpeg2_idct_alpha_init ();
+	for (i = -3840; i < 3840 + 256; i++)
+	    CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i);
+    } else
+#endif
+#ifdef LIBMPEG2_MLIB
+    if (accel & MPEG2_ACCEL_MLIB) {
+	mpeg2_idct_copy = mpeg2_idct_copy_mlib_non_ieee;
+	mpeg2_idct_add = (getenv ("MLIB_NON_IEEE") ?
+			  mpeg2_idct_add_mlib_non_ieee : mpeg2_idct_add_mlib);
+    } else
+#endif
+    {
+	extern uint8_t mpeg2_scan_norm[64];
+	extern uint8_t mpeg2_scan_alt[64];
+	int i, j;
+
+	mpeg2_idct_copy = mpeg2_idct_copy_c;
+	mpeg2_idct_add = mpeg2_idct_add_c;
+	for (i = -3840; i < 3840 + 256; i++)
+	    CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i);
+	for (i = 0; i < 64; i++) {
+	    j = mpeg2_scan_norm[i];
+	    mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
+	    j = mpeg2_scan_alt[i];
+	    mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
+	}
+    }
+}
diff --git a/src/libmpeg2new/libmpeg2/idct_alpha.c b/src/libmpeg2new/libmpeg2/idct_alpha.c
new file mode 100644
index 000000000..68c605508
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/idct_alpha.c
@@ -0,0 +1,377 @@
+/*
+ * idct_alpha.c
+ * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org>
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_ALPHA
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "alpha_asm.h"
+#include "attributes.h"
+
+#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */
+#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */
+#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */
+#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */
+#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */
+#define W7 565  /* 2048 * sqrt (2) * cos (7 * pi / 16) */
+
+extern uint8_t mpeg2_clip[3840 * 2 + 256];
+#define CLIP(i) ((mpeg2_clip + 3840)[i])
+
+#if 0
+#define BUTTERFLY(t0,t1,W0,W1,d0,d1)	\
+do {					\
+    t0 = W0 * d0 + W1 * d1;			\
+    t1 = W0 * d1 - W1 * d0;			\
+} while (0)
+#else
+#define BUTTERFLY(t0,t1,W0,W1,d0,d1)	\
+do {					\
+    int_fast32_t tmp = W0 * (d0 + d1);	\
+    t0 = tmp + (W1 - W0) * d1;		\
+    t1 = tmp - (W1 + W0) * d0;		\
+} while (0)
+#endif
+
+static void inline idct_row (int16_t * const block)
+{
+    uint64_t l, r;
+    int_fast32_t d0, d1, d2, d3;
+    int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
+    int_fast32_t t0, t1, t2, t3;
+
+    l = ldq (block);
+    r = ldq (block + 4);
+
+    /* shortcut */
+    if (likely (!((l & ~0xffffUL) | r))) {
+	uint64_t tmp = (uint16_t) (l << 3);
+	tmp |= tmp << 16;
+	tmp |= tmp << 32;
+	((int32_t *)block)[0] = tmp;
+	((int32_t *)block)[1] = tmp;
+	((int32_t *)block)[2] = tmp;
+	((int32_t *)block)[3] = tmp;
+	return;
+    }
+
+    d0 = (sextw (l) << 11) + 128;
+    d1 = sextw (extwl (l, 2));
+    d2 = sextw (extwl (l, 4)) << 11;
+    d3 = sextw (extwl (l, 6));
+    t0 = d0 + d2;
+    t1 = d0 - d2;
+    BUTTERFLY (t2, t3, W6, W2, d3, d1);
+    a0 = t0 + t2;
+    a1 = t1 + t3;
+    a2 = t1 - t3;
+    a3 = t0 - t2;
+
+    d0 = sextw (r);
+    d1 = sextw (extwl (r, 2));
+    d2 = sextw (extwl (r, 4));
+    d3 = sextw (extwl (r, 6));
+    BUTTERFLY (t0, t1, W7, W1, d3, d0);
+    BUTTERFLY (t2, t3, W3, W5, d1, d2);
+    b0 = t0 + t2;
+    b3 = t1 + t3;
+    t0 -= t2;
+    t1 -= t3;
+    b1 = ((t0 + t1) * 181) >> 8;
+    b2 = ((t0 - t1) * 181) >> 8;
+
+    block[0] = (a0 + b0) >> 8;
+    block[1] = (a1 + b1) >> 8;
+    block[2] = (a2 + b2) >> 8;
+    block[3] = (a3 + b3) >> 8;
+    block[4] = (a3 - b3) >> 8;
+    block[5] = (a2 - b2) >> 8;
+    block[6] = (a1 - b1) >> 8;
+    block[7] = (a0 - b0) >> 8;
+}
+
+static void inline idct_col (int16_t * const block)
+{
+    int_fast32_t d0, d1, d2, d3;
+    int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
+    int_fast32_t t0, t1, t2, t3;
+
+    d0 = (block[8*0] << 11) + 65536;
+    d1 = block[8*1];
+    d2 = block[8*2] << 11;
+    d3 = block[8*3];
+    t0 = d0 + d2;
+    t1 = d0 - d2;
+    BUTTERFLY (t2, t3, W6, W2, d3, d1);
+    a0 = t0 + t2;
+    a1 = t1 + t3;
+    a2 = t1 - t3;
+    a3 = t0 - t2;
+
+    d0 = block[8*4];
+    d1 = block[8*5];
+    d2 = block[8*6];
+    d3 = block[8*7];
+    BUTTERFLY (t0, t1, W7, W1, d3, d0);
+    BUTTERFLY (t2, t3, W3, W5, d1, d2);
+    b0 = t0 + t2;
+    b3 = t1 + t3;
+    t0 = (t0 - t2) >> 8;
+    t1 = (t1 - t3) >> 8;
+    b1 = (t0 + t1) * 181;
+    b2 = (t0 - t1) * 181;
+
+    block[8*0] = (a0 + b0) >> 17;
+    block[8*1] = (a1 + b1) >> 17;
+    block[8*2] = (a2 + b2) >> 17;
+    block[8*3] = (a3 + b3) >> 17;
+    block[8*4] = (a3 - b3) >> 17;
+    block[8*5] = (a2 - b2) >> 17;
+    block[8*6] = (a1 - b1) >> 17;
+    block[8*7] = (a0 - b0) >> 17;
+}
+
+void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, const int stride)
+{
+    uint64_t clampmask;
+    int i;
+
+    for (i = 0; i < 8; i++)
+	idct_row (block + 8 * i);
+
+    for (i = 0; i < 8; i++)
+	idct_col (block + i);
+
+    clampmask = zap (-1, 0xaa);	/* 0x00ff00ff00ff00ff */
+    do {
+	uint64_t shorts0, shorts1;
+
+	shorts0 = ldq (block);
+	shorts0 = maxsw4 (shorts0, 0);
+	shorts0 = minsw4 (shorts0, clampmask);
+	stl (pkwb (shorts0), dest);
+
+	shorts1 = ldq (block + 4);
+	shorts1 = maxsw4 (shorts1, 0);
+	shorts1 = minsw4 (shorts1, clampmask);
+	stl (pkwb (shorts1), dest + 4);
+
+	stq (0, block);
+	stq (0, block + 4);
+
+	dest += stride;
+	block += 8;
+    } while (--i);
+}
+
+void mpeg2_idct_add_mvi (const int last, int16_t * block,
+			 uint8_t * dest, const int stride)
+{
+    uint64_t clampmask;
+    uint64_t signmask;
+    int i;
+
+    if (last != 129 || (block[0] & 7) == 4) {
+	for (i = 0; i < 8; i++)
+	    idct_row (block + 8 * i);
+	for (i = 0; i < 8; i++)
+	    idct_col (block + i);
+	clampmask = zap (-1, 0xaa);	/* 0x00ff00ff00ff00ff */
+	signmask = zap (-1, 0x33);
+	signmask ^= signmask >> 1;	/* 0x8000800080008000 */
+
+	do {
+	    uint64_t shorts0, pix0, signs0;
+	    uint64_t shorts1, pix1, signs1;
+
+	    shorts0 = ldq (block);
+	    shorts1 = ldq (block + 4);
+
+	    pix0 = unpkbw (ldl (dest));
+	    /* signed subword add (MMX paddw).  */
+	    signs0 = shorts0 & signmask;
+	    shorts0 &= ~signmask;
+	    shorts0 += pix0;
+	    shorts0 ^= signs0;
+	    /* clamp. */
+	    shorts0 = maxsw4 (shorts0, 0);
+	    shorts0 = minsw4 (shorts0, clampmask);	
+
+	    /* next 4.  */
+	    pix1 = unpkbw (ldl (dest + 4));
+	    signs1 = shorts1 & signmask;
+	    shorts1 &= ~signmask;
+	    shorts1 += pix1;
+	    shorts1 ^= signs1;
+	    shorts1 = maxsw4 (shorts1, 0);
+	    shorts1 = minsw4 (shorts1, clampmask);
+
+	    stl (pkwb (shorts0), dest);
+	    stl (pkwb (shorts1), dest + 4);
+	    stq (0, block);
+	    stq (0, block + 4);
+
+	    dest += stride;
+	    block += 8;
+	} while (--i);
+    } else {
+	int DC;
+	uint64_t p0, p1, p2, p3, p4, p5, p6, p7;
+	uint64_t DCs;
+
+	DC = (block[0] + 4) >> 3;
+	block[0] = block[63] = 0;
+
+	p0 = ldq (dest + 0 * stride);
+	p1 = ldq (dest + 1 * stride);
+	p2 = ldq (dest + 2 * stride);
+	p3 = ldq (dest + 3 * stride);
+	p4 = ldq (dest + 4 * stride);
+	p5 = ldq (dest + 5 * stride);
+	p6 = ldq (dest + 6 * stride);
+	p7 = ldq (dest + 7 * stride);
+
+	if (DC > 0) {
+	    DCs = BYTE_VEC (likely (DC <= 255) ? DC : 255);
+	    p0 += minub8 (DCs, ~p0);
+	    p1 += minub8 (DCs, ~p1);
+	    p2 += minub8 (DCs, ~p2);
+	    p3 += minub8 (DCs, ~p3);
+	    p4 += minub8 (DCs, ~p4);
+	    p5 += minub8 (DCs, ~p5);
+	    p6 += minub8 (DCs, ~p6);
+	    p7 += minub8 (DCs, ~p7);
+	} else {
+	    DCs = BYTE_VEC (likely (-DC <= 255) ? -DC : 255);
+	    p0 -= minub8 (DCs, p0);
+	    p1 -= minub8 (DCs, p1);
+	    p2 -= minub8 (DCs, p2);
+	    p3 -= minub8 (DCs, p3);
+	    p4 -= minub8 (DCs, p4);
+	    p5 -= minub8 (DCs, p5);
+	    p6 -= minub8 (DCs, p6);
+	    p7 -= minub8 (DCs, p7);
+	}
+
+	stq (p0, dest + 0 * stride);
+	stq (p1, dest + 1 * stride);
+	stq (p2, dest + 2 * stride);
+	stq (p3, dest + 3 * stride);
+	stq (p4, dest + 4 * stride);
+	stq (p5, dest + 5 * stride);
+	stq (p6, dest + 6 * stride);
+	stq (p7, dest + 7 * stride);
+    }
+}
+
+void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, const int stride)
+{
+    int i;
+
+    for (i = 0; i < 8; i++)
+	idct_row (block + 8 * i);
+    for (i = 0; i < 8; i++)
+	idct_col (block + i);
+    do {
+	dest[0] = CLIP (block[0]);
+	dest[1] = CLIP (block[1]);
+	dest[2] = CLIP (block[2]);
+	dest[3] = CLIP (block[3]);
+	dest[4] = CLIP (block[4]);
+	dest[5] = CLIP (block[5]);
+	dest[6] = CLIP (block[6]);
+	dest[7] = CLIP (block[7]);
+
+	stq(0, block);
+	stq(0, block + 4);
+
+	dest += stride;
+	block += 8;
+    } while (--i);
+}
+
+void mpeg2_idct_add_alpha (const int last, int16_t * block,
+			   uint8_t * dest, const int stride)
+{
+    int i;
+
+    if (last != 129 || (block[0] & 7) == 4) {
+	for (i = 0; i < 8; i++)
+	    idct_row (block + 8 * i);
+	for (i = 0; i < 8; i++)
+	    idct_col (block + i);
+	do {
+	    dest[0] = CLIP (block[0] + dest[0]);
+	    dest[1] = CLIP (block[1] + dest[1]);
+	    dest[2] = CLIP (block[2] + dest[2]);
+	    dest[3] = CLIP (block[3] + dest[3]);
+	    dest[4] = CLIP (block[4] + dest[4]);
+	    dest[5] = CLIP (block[5] + dest[5]);
+	    dest[6] = CLIP (block[6] + dest[6]);
+	    dest[7] = CLIP (block[7] + dest[7]);
+
+	    stq(0, block);
+	    stq(0, block + 4);
+
+	    dest += stride;
+	    block += 8;
+	} while (--i);
+    } else {
+	int DC;
+
+	DC = (block[0] + 4) >> 3;
+	block[0] = block[63] = 0;
+	i = 8;
+	do {
+	    dest[0] = CLIP (DC + dest[0]);
+	    dest[1] = CLIP (DC + dest[1]);
+	    dest[2] = CLIP (DC + dest[2]);
+	    dest[3] = CLIP (DC + dest[3]);
+	    dest[4] = CLIP (DC + dest[4]);
+	    dest[5] = CLIP (DC + dest[5]);
+	    dest[6] = CLIP (DC + dest[6]);
+	    dest[7] = CLIP (DC + dest[7]);
+	    dest += stride;
+	} while (--i);
+    }
+}
+
+void mpeg2_idct_alpha_init (void)
+{
+    extern uint8_t mpeg2_scan_norm[64];
+    extern uint8_t mpeg2_scan_alt[64];
+    int i, j;
+
+    for (i = 0; i < 64; i++) {
+	j = mpeg2_scan_norm[i];
+	mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
+	j = mpeg2_scan_alt[i];
+	mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
+    }
+}
+
+#endif /* ARCH_ALPHA */
diff --git a/src/libmpeg2new/libmpeg2/idct_altivec.c b/src/libmpeg2new/libmpeg2/idct_altivec.c
new file mode 100644
index 000000000..e9fc28bc4
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/idct_altivec.c
@@ -0,0 +1,260 @@
+/*
+ * idct_altivec.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_PPC
+
+#ifdef HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2_internal.h"
+#include "attributes.h"
+
+typedef vector signed char vector_s8_t;
+typedef vector unsigned char vector_u8_t;
+typedef vector signed short vector_s16_t;
+typedef vector unsigned short vector_u16_t;
+typedef vector signed int vector_s32_t;
+typedef vector unsigned int vector_u32_t;
+
+#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
+/* work around gcc <3.3 vec_mergel bug */
+static inline vector_s16_t my_vec_mergel (vector_s16_t const A,
+					  vector_s16_t const B)
+{
+    static const vector_u8_t mergel = {
+	0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b,
+	0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f
+    };
+    return vec_perm (A, B, mergel);
+}
+#undef vec_mergel
+#define vec_mergel my_vec_mergel
+#endif
+
+#ifdef HAVE_ALTIVEC_H	/* gnu */
+#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h}
+#else			/* apple */
+#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h)
+#endif
+
+static const vector_s16_t constants ATTR_ALIGN(16) =
+    VEC_S16 (23170, 13573, 6518, 21895, -23170, -21895, 32, 31);
+static const vector_s16_t constants_1 ATTR_ALIGN(16) =
+    VEC_S16 (16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725);
+static const vector_s16_t constants_2 ATTR_ALIGN(16) =
+    VEC_S16 (22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521);
+static const vector_s16_t constants_3 ATTR_ALIGN(16) =
+    VEC_S16 (21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692);
+static const vector_s16_t constants_4 ATTR_ALIGN(16) =
+    VEC_S16 (19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722);
+
+#define IDCT_HALF					\
+    /* 1st stage */					\
+    t1 = vec_mradds (a1, vx7, vx1 );			\
+    t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7));	\
+    t7 = vec_mradds (a2, vx5, vx3);			\
+    t3 = vec_mradds (ma2, vx3, vx5);			\
+							\
+    /* 2nd stage */					\
+    t5 = vec_adds (vx0, vx4);				\
+    t0 = vec_subs (vx0, vx4);				\
+    t2 = vec_mradds (a0, vx6, vx2);			\
+    t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6));	\
+    t6 = vec_adds (t8, t3);				\
+    t3 = vec_subs (t8, t3);				\
+    t8 = vec_subs (t1, t7);				\
+    t1 = vec_adds (t1, t7);				\
+							\
+    /* 3rd stage */					\
+    t7 = vec_adds (t5, t2);				\
+    t2 = vec_subs (t5, t2);				\
+    t5 = vec_adds (t0, t4);				\
+    t0 = vec_subs (t0, t4);				\
+    t4 = vec_subs (t8, t3);				\
+    t3 = vec_adds (t8, t3);				\
+							\
+    /* 4th stage */					\
+    vy0 = vec_adds (t7, t1);				\
+    vy7 = vec_subs (t7, t1);				\
+    vy1 = vec_mradds (c4, t3, t5);			\
+    vy6 = vec_mradds (mc4, t3, t5);			\
+    vy2 = vec_mradds (c4, t4, t0);			\
+    vy5 = vec_mradds (mc4, t4, t0);			\
+    vy3 = vec_adds (t2, t6);				\
+    vy4 = vec_subs (t2, t6);
+
+#define IDCT								\
+    vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7;		\
+    vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7;		\
+    vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias;			\
+    vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8;			\
+    vector_u16_t shift;							\
+									\
+    c4 = vec_splat (constants, 0);					\
+    a0 = vec_splat (constants, 1);					\
+    a1 = vec_splat (constants, 2);					\
+    a2 = vec_splat (constants, 3);					\
+    mc4 = vec_splat (constants, 4);					\
+    ma2 = vec_splat (constants, 5);					\
+    bias = (vector_s16_t)vec_splat ((vector_s32_t)constants, 3);	\
+									\
+    zero = vec_splat_s16 (0);						\
+    shift = vec_splat_u16 (4);						\
+									\
+    vx0 = vec_mradds (vec_sl (block[0], shift), constants_1, zero);	\
+    vx1 = vec_mradds (vec_sl (block[1], shift), constants_2, zero);	\
+    vx2 = vec_mradds (vec_sl (block[2], shift), constants_3, zero);	\
+    vx3 = vec_mradds (vec_sl (block[3], shift), constants_4, zero);	\
+    vx4 = vec_mradds (vec_sl (block[4], shift), constants_1, zero);	\
+    vx5 = vec_mradds (vec_sl (block[5], shift), constants_4, zero);	\
+    vx6 = vec_mradds (vec_sl (block[6], shift), constants_3, zero);	\
+    vx7 = vec_mradds (vec_sl (block[7], shift), constants_2, zero);	\
+									\
+    IDCT_HALF								\
+									\
+    vx0 = vec_mergeh (vy0, vy4);					\
+    vx1 = vec_mergel (vy0, vy4);					\
+    vx2 = vec_mergeh (vy1, vy5);					\
+    vx3 = vec_mergel (vy1, vy5);					\
+    vx4 = vec_mergeh (vy2, vy6);					\
+    vx5 = vec_mergel (vy2, vy6);					\
+    vx6 = vec_mergeh (vy3, vy7);					\
+    vx7 = vec_mergel (vy3, vy7);					\
+									\
+    vy0 = vec_mergeh (vx0, vx4);					\
+    vy1 = vec_mergel (vx0, vx4);					\
+    vy2 = vec_mergeh (vx1, vx5);					\
+    vy3 = vec_mergel (vx1, vx5);					\
+    vy4 = vec_mergeh (vx2, vx6);					\
+    vy5 = vec_mergel (vx2, vx6);					\
+    vy6 = vec_mergeh (vx3, vx7);					\
+    vy7 = vec_mergel (vx3, vx7);					\
+									\
+    vx0 = vec_adds (vec_mergeh (vy0, vy4), bias);			\
+    vx1 = vec_mergel (vy0, vy4);					\
+    vx2 = vec_mergeh (vy1, vy5);					\
+    vx3 = vec_mergel (vy1, vy5);					\
+    vx4 = vec_mergeh (vy2, vy6);					\
+    vx5 = vec_mergel (vy2, vy6);					\
+    vx6 = vec_mergeh (vy3, vy7);					\
+    vx7 = vec_mergel (vy3, vy7);					\
+									\
+    IDCT_HALF								\
+									\
+    shift = vec_splat_u16 (6);						\
+    vx0 = vec_sra (vy0, shift);						\
+    vx1 = vec_sra (vy1, shift);						\
+    vx2 = vec_sra (vy2, shift);						\
+    vx3 = vec_sra (vy3, shift);						\
+    vx4 = vec_sra (vy4, shift);						\
+    vx5 = vec_sra (vy5, shift);						\
+    vx6 = vec_sra (vy6, shift);						\
+    vx7 = vec_sra (vy7, shift);
+
+void mpeg2_idct_copy_altivec (int16_t * const _block, uint8_t * dest,
+			      const int stride)
+{
+    vector_s16_t * const block = (vector_s16_t *)_block;
+    vector_u8_t tmp;
+
+    IDCT
+
+#define COPY(dest,src)						\
+    tmp = vec_packsu (src, src);				\
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);	\
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+
+    COPY (dest, vx0)	dest += stride;
+    COPY (dest, vx1)	dest += stride;
+    COPY (dest, vx2)	dest += stride;
+    COPY (dest, vx3)	dest += stride;
+    COPY (dest, vx4)	dest += stride;
+    COPY (dest, vx5)	dest += stride;
+    COPY (dest, vx6)	dest += stride;
+    COPY (dest, vx7)
+
+    block[0] = block[1] = block[2] = block[3] = zero;
+    block[4] = block[5] = block[6] = block[7] = zero;
+}
+
+void mpeg2_idct_add_altivec (const int last, int16_t * const _block,
+			     uint8_t * dest, const int stride)
+{
+    vector_s16_t * const block = (vector_s16_t *)_block;
+    vector_u8_t tmp;
+    vector_s16_t tmp2, tmp3;
+    vector_u8_t perm0;
+    vector_u8_t perm1;
+    vector_u8_t p0, p1, p;
+
+    IDCT
+
+    p0 = vec_lvsl (0, dest);
+    p1 = vec_lvsl (stride, dest);
+    p = vec_splat_u8 (-1);
+    perm0 = vec_mergeh (p, p0);
+    perm1 = vec_mergeh (p, p1);
+
+#define ADD(dest,src,perm)						\
+    /* *(uint64_t *)&tmp = *(uint64_t *)dest; */			\
+    tmp = vec_ld (0, dest);						\
+    tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm);	\
+    tmp3 = vec_adds (tmp2, src);					\
+    tmp = vec_packsu (tmp3, tmp3);					\
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);		\
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+
+    ADD (dest, vx0, perm0)	dest += stride;
+    ADD (dest, vx1, perm1)	dest += stride;
+    ADD (dest, vx2, perm0)	dest += stride;
+    ADD (dest, vx3, perm1)	dest += stride;
+    ADD (dest, vx4, perm0)	dest += stride;
+    ADD (dest, vx5, perm1)	dest += stride;
+    ADD (dest, vx6, perm0)	dest += stride;
+    ADD (dest, vx7, perm1)
+
+    block[0] = block[1] = block[2] = block[3] = zero;
+    block[4] = block[5] = block[6] = block[7] = zero;
+}
+
+void mpeg2_idct_altivec_init (void)
+{
+    extern uint8_t mpeg2_scan_norm[64];
+    extern uint8_t mpeg2_scan_alt[64];
+    int i, j;
+
+    /* the altivec idct uses a transposed input, so we patch scan tables */
+    for (i = 0; i < 64; i++) {
+	j = mpeg2_scan_norm[i];
+	mpeg2_scan_norm[i] = (j >> 3) | ((j & 7) << 3);
+	j = mpeg2_scan_alt[i];
+	mpeg2_scan_alt[i] = (j >> 3) | ((j & 7) << 3);
+    }
+}
+
+#endif
diff --git a/src/libmpeg2new/libmpeg2/idct_mlib.c b/src/libmpeg2new/libmpeg2/idct_mlib.c
new file mode 100644
index 000000000..83c39738d
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/idct_mlib.c
@@ -0,0 +1,60 @@
+/*
+ * idct_mlib.c
+ * Copyright (C) 1999-2003 H�kan Hjort <d95hjort@dtek.chalmers.se>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef LIBMPEG2_MLIB
+
+#include <mlib_types.h>
+#include <mlib_status.h>
+#include <mlib_sys.h>
+#include <mlib_video.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2_internal.h"
+
+void mpeg2_idct_add_mlib (const int last, int16_t * const block,
+			  uint8_t * const dest, const int stride)
+{
+    mlib_VideoIDCT_IEEE_S16_S16 (block, block);
+    mlib_VideoAddBlock_U8_S16 (dest, block, stride);
+    memset (block, 0, 64 * sizeof (uint16_t));
+}
+
+void mpeg2_idct_copy_mlib_non_ieee (int16_t * const block,
+				    uint8_t * const dest, const int stride)
+{
+    mlib_VideoIDCT8x8_U8_S16 (dest, block, stride);
+    memset (block, 0, 64 * sizeof (uint16_t));
+}
+
+void mpeg2_idct_add_mlib_non_ieee (const int last, int16_t * const block,
+				   uint8_t * const dest, const int stride)
+{
+    mlib_VideoIDCT8x8_S16_S16 (block, block);
+    mlib_VideoAddBlock_U8_S16 (dest, block, stride);
+    memset (block, 0, 64 * sizeof (uint16_t));
+}
+
+#endif
diff --git a/src/libmpeg2new/libmpeg2/idct_mmx.c b/src/libmpeg2new/libmpeg2/idct_mmx.c
new file mode 100644
index 000000000..e2afe6bb4
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/idct_mmx.c
@@ -0,0 +1,814 @@
+/*
+ * idct_mmx.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_X86
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2_internal.h"
+#include "attributes.h"
+#include "mmx.h"
+
+#define ROW_SHIFT 11
+#define COL_SHIFT 6
+
+#define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT)))
+#define rounder(bias) {round (bias), round (bias)}
+
+
+#if 0
+/* C row IDCT - its just here to document the MMXEXT and MMX versions */
+static inline void idct_row (int16_t * row, int offset,
+			     int16_t * table, int32_t * rounder)
+{
+    int C1, C2, C3, C4, C5, C6, C7;
+    int a0, a1, a2, a3, b0, b1, b2, b3;
+
+    row += offset;
+
+    C1 = table[1];
+    C2 = table[2];
+    C3 = table[3];
+    C4 = table[4];
+    C5 = table[5];
+    C6 = table[6];
+    C7 = table[7];
+
+    a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder;
+    a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder;
+    a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder;
+    a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder;
+
+    b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
+    b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
+    b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
+    b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
+
+    row[0] = (a0 + b0) >> ROW_SHIFT;
+    row[1] = (a1 + b1) >> ROW_SHIFT;
+    row[2] = (a2 + b2) >> ROW_SHIFT;
+    row[3] = (a3 + b3) >> ROW_SHIFT;
+    row[4] = (a3 - b3) >> ROW_SHIFT;
+    row[5] = (a2 - b2) >> ROW_SHIFT;
+    row[6] = (a1 - b1) >> ROW_SHIFT;
+    row[7] = (a0 - b0) >> ROW_SHIFT;
+}
+#endif
+
+
+/* MMXEXT row IDCT */
+
+#define mmxext_table(c1,c2,c3,c4,c5,c6,c7)	{  c4,  c2, -c4, -c2,	\
+						   c4,  c6,  c4,  c6,	\
+						   c1,  c3, -c1, -c5,	\
+						   c5,  c7,  c3, -c7,	\
+						   c4, -c6,  c4, -c6,	\
+						  -c4,  c2,  c4, -c2,	\
+						   c5, -c1,  c3, -c1,	\
+						   c7,  c3,  c7, -c5 }
+
+static inline void mmxext_row_head (int16_t * const row, const int offset,
+				    const int16_t * const table)
+{
+    movq_m2r (*(row+offset), mm2);	/* mm2 = x6 x4 x2 x0 */
+
+    movq_m2r (*(row+offset+4), mm5);	/* mm5 = x7 x5 x3 x1 */
+    movq_r2r (mm2, mm0);		/* mm0 = x6 x4 x2 x0 */
+
+    movq_m2r (*table, mm3);		/* mm3 = -C2 -C4 C2 C4 */
+    movq_r2r (mm5, mm6);		/* mm6 = x7 x5 x3 x1 */
+
+    movq_m2r (*(table+4), mm4);		/* mm4 = C6 C4 C6 C4 */
+    pmaddwd_r2r (mm0, mm3);		/* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
+
+    pshufw_r2r (mm2, mm2, 0x4e);	/* mm2 = x2 x0 x6 x4 */
+}
+
+static inline void mmxext_row (const int16_t * const table,
+			       const int32_t * const rounder)
+{
+    movq_m2r (*(table+8), mm1);		/* mm1 = -C5 -C1 C3 C1 */
+    pmaddwd_r2r (mm2, mm4);		/* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */
+
+    pmaddwd_m2r (*(table+16), mm0);	/* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */
+    pshufw_r2r (mm6, mm6, 0x4e);	/* mm6 = x3 x1 x7 x5 */
+
+    movq_m2r (*(table+12), mm7);	/* mm7 = -C7 C3 C7 C5 */
+    pmaddwd_r2r (mm5, mm1);		/* mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 */
+
+    paddd_m2r (*rounder, mm3);		/* mm3 += rounder */
+    pmaddwd_r2r (mm6, mm7);		/* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */
+
+    pmaddwd_m2r (*(table+20), mm2);	/* mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 */
+    paddd_r2r (mm4, mm3);		/* mm3 = a1 a0 + rounder */
+
+    pmaddwd_m2r (*(table+24), mm5);	/* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */
+    movq_r2r (mm3, mm4);		/* mm4 = a1 a0 + rounder */
+
+    pmaddwd_m2r (*(table+28), mm6);	/* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */
+    paddd_r2r (mm7, mm1);		/* mm1 = b1 b0 */
+
+    paddd_m2r (*rounder, mm0);		/* mm0 += rounder */
+    psubd_r2r (mm1, mm3);		/* mm3 = a1-b1 a0-b0 + rounder */
+
+    psrad_i2r (ROW_SHIFT, mm3);		/* mm3 = y6 y7 */
+    paddd_r2r (mm4, mm1);		/* mm1 = a1+b1 a0+b0 + rounder */
+
+    paddd_r2r (mm2, mm0);		/* mm0 = a3 a2 + rounder */
+    psrad_i2r (ROW_SHIFT, mm1);		/* mm1 = y1 y0 */
+
+    paddd_r2r (mm6, mm5);		/* mm5 = b3 b2 */
+    movq_r2r (mm0, mm4);		/* mm4 = a3 a2 + rounder */
+
+    paddd_r2r (mm5, mm0);		/* mm0 = a3+b3 a2+b2 + rounder */
+    psubd_r2r (mm5, mm4);		/* mm4 = a3-b3 a2-b2 + rounder */
+}
+
+static inline void mmxext_row_tail (int16_t * const row, const int store)
+{
+    psrad_i2r (ROW_SHIFT, mm0);		/* mm0 = y3 y2 */
+
+    psrad_i2r (ROW_SHIFT, mm4);		/* mm4 = y4 y5 */
+
+    packssdw_r2r (mm0, mm1);		/* mm1 = y3 y2 y1 y0 */
+
+    packssdw_r2r (mm3, mm4);		/* mm4 = y6 y7 y4 y5 */
+
+    movq_r2m (mm1, *(row+store));	/* save y3 y2 y1 y0 */
+    pshufw_r2r (mm4, mm4, 0xb1);	/* mm4 = y7 y6 y5 y4 */
+
+    /* slot */
+
+    movq_r2m (mm4, *(row+store+4));	/* save y7 y6 y5 y4 */
+}
+
+static inline void mmxext_row_mid (int16_t * const row, const int store,
+				   const int offset,
+				   const int16_t * const table)
+{
+    movq_m2r (*(row+offset), mm2);	/* mm2 = x6 x4 x2 x0 */
+    psrad_i2r (ROW_SHIFT, mm0);		/* mm0 = y3 y2 */
+
+    movq_m2r (*(row+offset+4), mm5);	/* mm5 = x7 x5 x3 x1 */
+    psrad_i2r (ROW_SHIFT, mm4);		/* mm4 = y4 y5 */
+
+    packssdw_r2r (mm0, mm1);		/* mm1 = y3 y2 y1 y0 */
+    movq_r2r (mm5, mm6);		/* mm6 = x7 x5 x3 x1 */
+
+    packssdw_r2r (mm3, mm4);		/* mm4 = y6 y7 y4 y5 */
+    movq_r2r (mm2, mm0);		/* mm0 = x6 x4 x2 x0 */
+
+    movq_r2m (mm1, *(row+store));	/* save y3 y2 y1 y0 */
+    pshufw_r2r (mm4, mm4, 0xb1);	/* mm4 = y7 y6 y5 y4 */
+
+    movq_m2r (*table, mm3);		/* mm3 = -C2 -C4 C2 C4 */
+    movq_r2m (mm4, *(row+store+4));	/* save y7 y6 y5 y4 */
+
+    pmaddwd_r2r (mm0, mm3);		/* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
+
+    movq_m2r (*(table+4), mm4);		/* mm4 = C6 C4 C6 C4 */
+    pshufw_r2r (mm2, mm2, 0x4e);	/* mm2 = x2 x0 x6 x4 */
+}
+
+
+/* MMX row IDCT */
+
+#define mmx_table(c1,c2,c3,c4,c5,c6,c7)	{  c4,  c2,  c4,  c6,	\
+					   c4,  c6, -c4, -c2,	\
+					   c1,  c3,  c3, -c7,	\
+					   c5,  c7, -c1, -c5,	\
+					   c4, -c6,  c4, -c2,	\
+					  -c4,  c2,  c4, -c6,	\
+					   c5, -c1,  c7, -c5,	\
+					   c7,  c3,  c3, -c1 }
+
+static inline void mmx_row_head (int16_t * const row, const int offset,
+				 const int16_t * const table)
+{
+    movq_m2r (*(row+offset), mm2);	/* mm2 = x6 x4 x2 x0 */
+
+    movq_m2r (*(row+offset+4), mm5);	/* mm5 = x7 x5 x3 x1 */
+    movq_r2r (mm2, mm0);		/* mm0 = x6 x4 x2 x0 */
+
+    movq_m2r (*table, mm3);		/* mm3 = C6 C4 C2 C4 */
+    movq_r2r (mm5, mm6);		/* mm6 = x7 x5 x3 x1 */
+
+    punpckldq_r2r (mm0, mm0);		/* mm0 = x2 x0 x2 x0 */
+
+    movq_m2r (*(table+4), mm4);		/* mm4 = -C2 -C4 C6 C4 */
+    pmaddwd_r2r (mm0, mm3);		/* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
+
+    movq_m2r (*(table+8), mm1);		/* mm1 = -C7 C3 C3 C1 */
+    punpckhdq_r2r (mm2, mm2);		/* mm2 = x6 x4 x6 x4 */
+}
+
+static inline void mmx_row (const int16_t * const table,
+			    const int32_t * const rounder)
+{
+    pmaddwd_r2r (mm2, mm4);		/* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */
+    punpckldq_r2r (mm5, mm5);		/* mm5 = x3 x1 x3 x1 */
+
+    pmaddwd_m2r (*(table+16), mm0);	/* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */
+    punpckhdq_r2r (mm6, mm6);		/* mm6 = x7 x5 x7 x5 */
+
+    movq_m2r (*(table+12), mm7);	/* mm7 = -C5 -C1 C7 C5 */
+    pmaddwd_r2r (mm5, mm1);		/* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */
+
+    paddd_m2r (*rounder, mm3);		/* mm3 += rounder */
+    pmaddwd_r2r (mm6, mm7);		/* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */
+
+    pmaddwd_m2r (*(table+20), mm2);	/* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */
+    paddd_r2r (mm4, mm3);		/* mm3 = a1 a0 + rounder */
+
+    pmaddwd_m2r (*(table+24), mm5);	/* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */
+    movq_r2r (mm3, mm4);		/* mm4 = a1 a0 + rounder */
+
+    pmaddwd_m2r (*(table+28), mm6);	/* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */
+    paddd_r2r (mm7, mm1);		/* mm1 = b1 b0 */
+
+    paddd_m2r (*rounder, mm0);		/* mm0 += rounder */
+    psubd_r2r (mm1, mm3);		/* mm3 = a1-b1 a0-b0 + rounder */
+
+    psrad_i2r (ROW_SHIFT, mm3);		/* mm3 = y6 y7 */
+    paddd_r2r (mm4, mm1);		/* mm1 = a1+b1 a0+b0 + rounder */
+
+    paddd_r2r (mm2, mm0);		/* mm0 = a3 a2 + rounder */
+    psrad_i2r (ROW_SHIFT, mm1);		/* mm1 = y1 y0 */
+
+    paddd_r2r (mm6, mm5);		/* mm5 = b3 b2 */
+    movq_r2r (mm0, mm7);		/* mm7 = a3 a2 + rounder */
+
+    paddd_r2r (mm5, mm0);		/* mm0 = a3+b3 a2+b2 + rounder */
+    psubd_r2r (mm5, mm7);		/* mm7 = a3-b3 a2-b2 + rounder */
+}
+
+static inline void mmx_row_tail (int16_t * const row, const int store)
+{
+    psrad_i2r (ROW_SHIFT, mm0);		/* mm0 = y3 y2 */
+
+    psrad_i2r (ROW_SHIFT, mm7);		/* mm7 = y4 y5 */
+
+    packssdw_r2r (mm0, mm1);		/* mm1 = y3 y2 y1 y0 */
+
+    packssdw_r2r (mm3, mm7);		/* mm7 = y6 y7 y4 y5 */
+
+    movq_r2m (mm1, *(row+store));	/* save y3 y2 y1 y0 */
+    movq_r2r (mm7, mm4);		/* mm4 = y6 y7 y4 y5 */
+
+    pslld_i2r (16, mm7);		/* mm7 = y7 0 y5 0 */
+
+    psrld_i2r (16, mm4);		/* mm4 = 0 y6 0 y4 */
+
+    por_r2r (mm4, mm7);			/* mm7 = y7 y6 y5 y4 */
+
+    /* slot */
+
+    movq_r2m (mm7, *(row+store+4));	/* save y7 y6 y5 y4 */
+}
+
+static inline void mmx_row_mid (int16_t * const row, const int store,
+				const int offset, const int16_t * const table)
+{
+    movq_m2r (*(row+offset), mm2);	/* mm2 = x6 x4 x2 x0 */
+    psrad_i2r (ROW_SHIFT, mm0);		/* mm0 = y3 y2 */
+
+    movq_m2r (*(row+offset+4), mm5);	/* mm5 = x7 x5 x3 x1 */
+    psrad_i2r (ROW_SHIFT, mm7);		/* mm7 = y4 y5 */
+
+    packssdw_r2r (mm0, mm1);		/* mm1 = y3 y2 y1 y0 */
+    movq_r2r (mm5, mm6);		/* mm6 = x7 x5 x3 x1 */
+
+    packssdw_r2r (mm3, mm7);		/* mm7 = y6 y7 y4 y5 */
+    movq_r2r (mm2, mm0);		/* mm0 = x6 x4 x2 x0 */
+
+    movq_r2m (mm1, *(row+store));	/* save y3 y2 y1 y0 */
+    movq_r2r (mm7, mm1);		/* mm1 = y6 y7 y4 y5 */
+
+    punpckldq_r2r (mm0, mm0);		/* mm0 = x2 x0 x2 x0 */
+    psrld_i2r (16, mm7);		/* mm7 = 0 y6 0 y4 */
+
+    movq_m2r (*table, mm3);		/* mm3 = C6 C4 C2 C4 */
+    pslld_i2r (16, mm1);		/* mm1 = y7 0 y5 0 */
+
+    movq_m2r (*(table+4), mm4);		/* mm4 = -C2 -C4 C6 C4 */
+    por_r2r (mm1, mm7);			/* mm7 = y7 y6 y5 y4 */
+
+    movq_m2r (*(table+8), mm1);		/* mm1 = -C7 C3 C3 C1 */
+    punpckhdq_r2r (mm2, mm2);		/* mm2 = x6 x4 x6 x4 */
+
+    movq_r2m (mm7, *(row+store+4));	/* save y7 y6 y5 y4 */
+    pmaddwd_r2r (mm0, mm3);		/* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
+}
+
+
+#if 0
+/* C column IDCT - its just here to document the MMXEXT and MMX versions */
+static inline void idct_col (int16_t * col, int offset)
+{
+/* multiplication - as implemented on mmx */
+#define F(c,x) (((c) * (x)) >> 16)
+
+/* saturation - it helps us handle torture test cases */
+#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x))
+
+    int16_t x0, x1, x2, x3, x4, x5, x6, x7;
+    int16_t y0, y1, y2, y3, y4, y5, y6, y7;
+    int16_t a0, a1, a2, a3, b0, b1, b2, b3;
+    int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12;
+
+    col += offset;
+
+    x0 = col[0*8];
+    x1 = col[1*8];
+    x2 = col[2*8];
+    x3 = col[3*8];
+    x4 = col[4*8];
+    x5 = col[5*8];
+    x6 = col[6*8];
+    x7 = col[7*8];
+
+    u04 = S (x0 + x4);
+    v04 = S (x0 - x4);
+    u26 = S (F (T2, x6) + x2);
+    v26 = S (F (T2, x2) - x6);
+
+    a0 = S (u04 + u26);
+    a1 = S (v04 + v26);
+    a2 = S (v04 - v26);
+    a3 = S (u04 - u26);
+
+    u17 = S (F (T1, x7) + x1);
+    v17 = S (F (T1, x1) - x7);
+    u35 = S (F (T3, x5) + x3);
+    v35 = S (F (T3, x3) - x5);
+
+    b0 = S (u17 + u35);
+    b3 = S (v17 - v35);
+    u12 = S (u17 - u35);
+    v12 = S (v17 + v35);
+    u12 = S (2 * F (C4, u12));
+    v12 = S (2 * F (C4, v12));
+    b1 = S (u12 + v12);
+    b2 = S (u12 - v12);
+
+    y0 = S (a0 + b0) >> COL_SHIFT;
+    y1 = S (a1 + b1) >> COL_SHIFT;
+    y2 = S (a2 + b2) >> COL_SHIFT;
+    y3 = S (a3 + b3) >> COL_SHIFT;
+
+    y4 = S (a3 - b3) >> COL_SHIFT;
+    y5 = S (a2 - b2) >> COL_SHIFT;
+    y6 = S (a1 - b1) >> COL_SHIFT;
+    y7 = S (a0 - b0) >> COL_SHIFT;
+
+    col[0*8] = y0;
+    col[1*8] = y1;
+    col[2*8] = y2;
+    col[3*8] = y3;
+    col[4*8] = y4;
+    col[5*8] = y5;
+    col[6*8] = y6;
+    col[7*8] = y7;
+}
+#endif
+
+
+/* MMX column IDCT */
+static inline void idct_col (int16_t * const col, const int offset)
+{
+#define T1 13036
+#define T2 27146
+#define T3 43790
+#define C4 23170
+
+    static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
+    static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
+    static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
+    static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
+
+    /* column code adapted from peter gubanov */
+    /* http://www.elecard.com/peter/idct.shtml */
+
+    movq_m2r (*_T1, mm0);		/* mm0 = T1 */
+
+    movq_m2r (*(col+offset+1*8), mm1);	/* mm1 = x1 */
+    movq_r2r (mm0, mm2);		/* mm2 = T1 */
+
+    movq_m2r (*(col+offset+7*8), mm4);	/* mm4 = x7 */
+    pmulhw_r2r (mm1, mm0);		/* mm0 = T1*x1 */
+
+    movq_m2r (*_T3, mm5);		/* mm5 = T3 */
+    pmulhw_r2r (mm4, mm2);		/* mm2 = T1*x7 */
+
+    movq_m2r (*(col+offset+5*8), mm6);	/* mm6 = x5 */
+    movq_r2r (mm5, mm7);		/* mm7 = T3-1 */
+
+    movq_m2r (*(col+offset+3*8), mm3);	/* mm3 = x3 */
+    psubsw_r2r (mm4, mm0);		/* mm0 = v17 */
+
+    movq_m2r (*_T2, mm4);		/* mm4 = T2 */
+    pmulhw_r2r (mm3, mm5);		/* mm5 = (T3-1)*x3 */
+
+    paddsw_r2r (mm2, mm1);		/* mm1 = u17 */
+    pmulhw_r2r (mm6, mm7);		/* mm7 = (T3-1)*x5 */
+
+    /* slot */
+
+    movq_r2r (mm4, mm2);		/* mm2 = T2 */
+    paddsw_r2r (mm3, mm5);		/* mm5 = T3*x3 */
+
+    pmulhw_m2r (*(col+offset+2*8), mm4);/* mm4 = T2*x2 */
+    paddsw_r2r (mm6, mm7);		/* mm7 = T3*x5 */
+
+    psubsw_r2r (mm6, mm5);		/* mm5 = v35 */
+    paddsw_r2r (mm3, mm7);		/* mm7 = u35 */
+
+    movq_m2r (*(col+offset+6*8), mm3);	/* mm3 = x6 */
+    movq_r2r (mm0, mm6);		/* mm6 = v17 */
+
+    pmulhw_r2r (mm3, mm2);		/* mm2 = T2*x6 */
+    psubsw_r2r (mm5, mm0);		/* mm0 = b3 */
+
+    psubsw_r2r (mm3, mm4);		/* mm4 = v26 */
+    paddsw_r2r (mm6, mm5);		/* mm5 = v12 */
+
+    movq_r2m (mm0, *(col+offset+3*8));	/* save b3 in scratch0 */
+    movq_r2r (mm1, mm6);		/* mm6 = u17 */
+
+    paddsw_m2r (*(col+offset+2*8), mm2);/* mm2 = u26 */
+    paddsw_r2r (mm7, mm6);		/* mm6 = b0 */
+
+    psubsw_r2r (mm7, mm1);		/* mm1 = u12 */
+    movq_r2r (mm1, mm7);		/* mm7 = u12 */
+
+    movq_m2r (*(col+offset+0*8), mm3);	/* mm3 = x0 */
+    paddsw_r2r (mm5, mm1);		/* mm1 = u12+v12 */
+
+    movq_m2r (*_C4, mm0);		/* mm0 = C4/2 */
+    psubsw_r2r (mm5, mm7);		/* mm7 = u12-v12 */
+
+    movq_r2m (mm6, *(col+offset+5*8));	/* save b0 in scratch1 */
+    pmulhw_r2r (mm0, mm1);		/* mm1 = b1/2 */
+
+    movq_r2r (mm4, mm6);		/* mm6 = v26 */
+    pmulhw_r2r (mm0, mm7);		/* mm7 = b2/2 */
+
+    movq_m2r (*(col+offset+4*8), mm5);	/* mm5 = x4 */
+    movq_r2r (mm3, mm0);		/* mm0 = x0 */
+
+    psubsw_r2r (mm5, mm3);		/* mm3 = v04 */
+    paddsw_r2r (mm5, mm0);		/* mm0 = u04 */
+
+    paddsw_r2r (mm3, mm4);		/* mm4 = a1 */
+    movq_r2r (mm0, mm5);		/* mm5 = u04 */
+
+    psubsw_r2r (mm6, mm3);		/* mm3 = a2 */
+    paddsw_r2r (mm2, mm5);		/* mm5 = a0 */
+
+    paddsw_r2r (mm1, mm1);		/* mm1 = b1 */
+    psubsw_r2r (mm2, mm0);		/* mm0 = a3 */
+
+    paddsw_r2r (mm7, mm7);		/* mm7 = b2 */
+    movq_r2r (mm3, mm2);		/* mm2 = a2 */
+
+    movq_r2r (mm4, mm6);		/* mm6 = a1 */
+    paddsw_r2r (mm7, mm3);		/* mm3 = a2+b2 */
+
+    psraw_i2r (COL_SHIFT, mm3);		/* mm3 = y2 */
+    paddsw_r2r (mm1, mm4);		/* mm4 = a1+b1 */
+
+    psraw_i2r (COL_SHIFT, mm4);		/* mm4 = y1 */
+    psubsw_r2r (mm1, mm6);		/* mm6 = a1-b1 */
+
+    movq_m2r (*(col+offset+5*8), mm1);	/* mm1 = b0 */
+    psubsw_r2r (mm7, mm2);		/* mm2 = a2-b2 */
+
+    psraw_i2r (COL_SHIFT, mm6);		/* mm6 = y6 */
+    movq_r2r (mm5, mm7);		/* mm7 = a0 */
+
+    movq_r2m (mm4, *(col+offset+1*8));	/* save y1 */
+    psraw_i2r (COL_SHIFT, mm2);		/* mm2 = y5 */
+
+    movq_r2m (mm3, *(col+offset+2*8));	/* save y2 */
+    paddsw_r2r (mm1, mm5);		/* mm5 = a0+b0 */
+
+    movq_m2r (*(col+offset+3*8), mm4);	/* mm4 = b3 */
+    psubsw_r2r (mm1, mm7);		/* mm7 = a0-b0 */
+
+    psraw_i2r (COL_SHIFT, mm5);		/* mm5 = y0 */
+    movq_r2r (mm0, mm3);		/* mm3 = a3 */
+
+    movq_r2m (mm2, *(col+offset+5*8));	/* save y5 */
+    psubsw_r2r (mm4, mm3);		/* mm3 = a3-b3 */
+
+    psraw_i2r (COL_SHIFT, mm7);		/* mm7 = y7 */
+    paddsw_r2r (mm0, mm4);		/* mm4 = a3+b3 */
+
+    movq_r2m (mm5, *(col+offset+0*8));	/* save y0 */
+    psraw_i2r (COL_SHIFT, mm3);		/* mm3 = y4 */
+
+    movq_r2m (mm6, *(col+offset+6*8));	/* save y6 */
+    psraw_i2r (COL_SHIFT, mm4);		/* mm4 = y3 */
+
+    movq_r2m (mm7, *(col+offset+7*8));	/* save y7 */
+
+    movq_r2m (mm3, *(col+offset+4*8));	/* save y4 */
+
+    movq_r2m (mm4, *(col+offset+3*8));	/* save y3 */
+}
+
+
+static const int32_t rounder0[] ATTR_ALIGN(8) =
+    rounder ((1 << (COL_SHIFT - 1)) - 0.5);
+static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
+static const int32_t rounder1[] ATTR_ALIGN(8) =
+    rounder (1.25683487303);	/* C1*(C1/C4+C1+C7)/2 */
+static const int32_t rounder7[] ATTR_ALIGN(8) =
+    rounder (-0.25);		/* C1*(C7/C4+C7-C1)/2 */
+static const int32_t rounder2[] ATTR_ALIGN(8) =
+    rounder (0.60355339059);	/* C2 * (C6+C2)/2 */
+static const int32_t rounder6[] ATTR_ALIGN(8) =
+    rounder (-0.25);		/* C2 * (C6-C2)/2 */
+static const int32_t rounder3[] ATTR_ALIGN(8) =
+    rounder (0.087788325588);	/* C3*(-C3/C4+C3+C5)/2 */
+static const int32_t rounder5[] ATTR_ALIGN(8) =
+    rounder (-0.441341716183);	/* C3*(-C5/C4+C5-C3)/2 */
+
+
+#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid)	\
+static inline void idct (int16_t * const block)				\
+{									\
+    static const int16_t table04[] ATTR_ALIGN(16) =			\
+	table (22725, 21407, 19266, 16384, 12873,  8867, 4520);		\
+    static const int16_t table17[] ATTR_ALIGN(16) =			\
+	table (31521, 29692, 26722, 22725, 17855, 12299, 6270);		\
+    static const int16_t table26[] ATTR_ALIGN(16) =			\
+	table (29692, 27969, 25172, 21407, 16819, 11585, 5906);		\
+    static const int16_t table35[] ATTR_ALIGN(16) =			\
+	table (26722, 25172, 22654, 19266, 15137, 10426, 5315);		\
+									\
+    idct_row_head (block, 0*8, table04);				\
+    idct_row (table04, rounder0);					\
+    idct_row_mid (block, 0*8, 4*8, table04);				\
+    idct_row (table04, rounder4);					\
+    idct_row_mid (block, 4*8, 1*8, table17);				\
+    idct_row (table17, rounder1);					\
+    idct_row_mid (block, 1*8, 7*8, table17);				\
+    idct_row (table17, rounder7);					\
+    idct_row_mid (block, 7*8, 2*8, table26);				\
+    idct_row (table26, rounder2);					\
+    idct_row_mid (block, 2*8, 6*8, table26);				\
+    idct_row (table26, rounder6);					\
+    idct_row_mid (block, 6*8, 3*8, table35);				\
+    idct_row (table35, rounder3);					\
+    idct_row_mid (block, 3*8, 5*8, table35);				\
+    idct_row (table35, rounder5);					\
+    idct_row_tail (block, 5*8);						\
+									\
+    idct_col (block, 0);						\
+    idct_col (block, 4);						\
+}
+
+
+#define COPY_MMX(offset,r0,r1,r2)	\
+do {					\
+    movq_m2r (*(block+offset), r0);	\
+    dest += stride;			\
+    movq_m2r (*(block+offset+4), r1);	\
+    movq_r2m (r2, *dest);		\
+    packuswb_r2r (r1, r0);		\
+} while (0)
+
+static inline void block_copy (int16_t * const block, uint8_t * dest,
+			       const int stride)
+{
+    movq_m2r (*(block+0*8), mm0);
+    movq_m2r (*(block+0*8+4), mm1);
+    movq_m2r (*(block+1*8), mm2);
+    packuswb_r2r (mm1, mm0);
+    movq_m2r (*(block+1*8+4), mm3);
+    movq_r2m (mm0, *dest);
+    packuswb_r2r (mm3, mm2);
+    COPY_MMX (2*8, mm0, mm1, mm2);
+    COPY_MMX (3*8, mm2, mm3, mm0);
+    COPY_MMX (4*8, mm0, mm1, mm2);
+    COPY_MMX (5*8, mm2, mm3, mm0);
+    COPY_MMX (6*8, mm0, mm1, mm2);
+    COPY_MMX (7*8, mm2, mm3, mm0);
+    movq_r2m (mm2, *(dest+stride));
+}
+
+
+#define ADD_MMX(offset,r1,r2,r3,r4)	\
+do {					\
+    movq_m2r (*(dest+2*stride), r1);	\
+    packuswb_r2r (r4, r3);		\
+    movq_r2r (r1, r2);			\
+    dest += stride;			\
+    movq_r2m (r3, *dest);		\
+    punpcklbw_r2r (mm0, r1);		\
+    paddsw_m2r (*(block+offset), r1);	\
+    punpckhbw_r2r (mm0, r2);		\
+    paddsw_m2r (*(block+offset+4), r2);	\
+} while (0)
+
+static inline void block_add (int16_t * const block, uint8_t * dest,
+			      const int stride)
+{
+    movq_m2r (*dest, mm1);
+    pxor_r2r (mm0, mm0);
+    movq_m2r (*(dest+stride), mm3);
+    movq_r2r (mm1, mm2);
+    punpcklbw_r2r (mm0, mm1);
+    movq_r2r (mm3, mm4);
+    paddsw_m2r (*(block+0*8), mm1);
+    punpckhbw_r2r (mm0, mm2);
+    paddsw_m2r (*(block+0*8+4), mm2);
+    punpcklbw_r2r (mm0, mm3);
+    paddsw_m2r (*(block+1*8), mm3);
+    packuswb_r2r (mm2, mm1);
+    punpckhbw_r2r (mm0, mm4);
+    movq_r2m (mm1, *dest);
+    paddsw_m2r (*(block+1*8+4), mm4);
+    ADD_MMX (2*8, mm1, mm2, mm3, mm4);
+    ADD_MMX (3*8, mm3, mm4, mm1, mm2);
+    ADD_MMX (4*8, mm1, mm2, mm3, mm4);
+    ADD_MMX (5*8, mm3, mm4, mm1, mm2);
+    ADD_MMX (6*8, mm1, mm2, mm3, mm4);
+    ADD_MMX (7*8, mm3, mm4, mm1, mm2);
+    packuswb_r2r (mm4, mm3);
+    movq_r2m (mm3, *(dest+stride));
+}
+
+
+static inline void block_zero (int16_t * const block)
+{
+    pxor_r2r (mm0, mm0);
+    movq_r2m (mm0, *(block+0*4));
+    movq_r2m (mm0, *(block+1*4));
+    movq_r2m (mm0, *(block+2*4));
+    movq_r2m (mm0, *(block+3*4));
+    movq_r2m (mm0, *(block+4*4));
+    movq_r2m (mm0, *(block+5*4));
+    movq_r2m (mm0, *(block+6*4));
+    movq_r2m (mm0, *(block+7*4));
+    movq_r2m (mm0, *(block+8*4));
+    movq_r2m (mm0, *(block+9*4));
+    movq_r2m (mm0, *(block+10*4));
+    movq_r2m (mm0, *(block+11*4));
+    movq_r2m (mm0, *(block+12*4));
+    movq_r2m (mm0, *(block+13*4));
+    movq_r2m (mm0, *(block+14*4));
+    movq_r2m (mm0, *(block+15*4));
+}
+
+
+#define CPU_MMXEXT 0
+#define CPU_MMX 1
+
+#define dup4(reg)			\
+do {					\
+    if (cpu != CPU_MMXEXT) {		\
+	punpcklwd_r2r (reg, reg);	\
+	punpckldq_r2r (reg, reg);	\
+    } else				\
+	pshufw_r2r (reg, reg, 0x00);	\
+} while (0)
+
+static inline void block_add_DC (int16_t * const block, uint8_t * dest,
+				 const int stride, const int cpu)
+{
+    movd_v2r ((block[0] + 4) >> 3, mm0);
+    pxor_r2r (mm1, mm1);
+    movq_m2r (*dest, mm2);
+    dup4 (mm0);
+    psubsw_r2r (mm0, mm1);
+    packuswb_r2r (mm0, mm0);
+    paddusb_r2r (mm0, mm2);
+    packuswb_r2r (mm1, mm1);
+    movq_m2r (*(dest + stride), mm3);
+    psubusb_r2r (mm1, mm2);
+    block[0] = 0;
+    paddusb_r2r (mm0, mm3);
+    movq_r2m (mm2, *dest);
+    psubusb_r2r (mm1, mm3);
+    movq_m2r (*(dest + 2*stride), mm2);
+    dest += stride;
+    movq_r2m (mm3, *dest);
+    paddusb_r2r (mm0, mm2);
+    movq_m2r (*(dest + 2*stride), mm3);
+    psubusb_r2r (mm1, mm2);
+    dest += stride;
+    paddusb_r2r (mm0, mm3);
+    movq_r2m (mm2, *dest);
+    psubusb_r2r (mm1, mm3);
+    movq_m2r (*(dest + 2*stride), mm2);
+    dest += stride;
+    movq_r2m (mm3, *dest);
+    paddusb_r2r (mm0, mm2);
+    movq_m2r (*(dest + 2*stride), mm3);
+    psubusb_r2r (mm1, mm2);
+    dest += stride;
+    paddusb_r2r (mm0, mm3);
+    movq_r2m (mm2, *dest);
+    psubusb_r2r (mm1, mm3);
+    movq_m2r (*(dest + 2*stride), mm2);
+    dest += stride;
+    movq_r2m (mm3, *dest);
+    paddusb_r2r (mm0, mm2);
+    movq_m2r (*(dest + 2*stride), mm3);
+    psubusb_r2r (mm1, mm2);
+    block[63] = 0;
+    paddusb_r2r (mm0, mm3);
+    movq_r2m (mm2, *(dest + stride));
+    psubusb_r2r (mm1, mm3);
+    movq_r2m (mm3, *(dest + 2*stride));
+}
+
+
+declare_idct (mmxext_idct, mmxext_table,
+	      mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
+
+void mpeg2_idct_copy_mmxext (int16_t * const block, uint8_t * const dest,
+			     const int stride)
+{
+    mmxext_idct (block);
+    block_copy (block, dest, stride);
+    block_zero (block);
+}
+
+void mpeg2_idct_add_mmxext (const int last, int16_t * const block,
+			    uint8_t * const dest, const int stride)
+{
+    if (last != 129 || (block[0] & 7) == 4) {
+	mmxext_idct (block);
+	block_add (block, dest, stride);
+	block_zero (block);
+    } else
+	block_add_DC (block, dest, stride, CPU_MMXEXT);
+}
+
+
+declare_idct (mmx_idct, mmx_table,
+	      mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
+
+void mpeg2_idct_copy_mmx (int16_t * const block, uint8_t * const dest,
+			  const int stride)
+{
+    mmx_idct (block);
+    block_copy (block, dest, stride);
+    block_zero (block);
+}
+
+void mpeg2_idct_add_mmx (const int last, int16_t * const block,
+			 uint8_t * const dest, const int stride)
+{
+    if (last != 129 || (block[0] & 7) == 4) {
+	mmx_idct (block);
+	block_add (block, dest, stride);
+	block_zero (block);
+    } else
+	block_add_DC (block, dest, stride, CPU_MMX);
+}
+
+
+void mpeg2_idct_mmx_init (void)
+{
+    extern uint8_t mpeg2_scan_norm[64];
+    extern uint8_t mpeg2_scan_alt[64];
+    int i, j;
+
+    /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
+
+    for (i = 0; i < 64; i++) {
+	j = mpeg2_scan_norm[i];
+	mpeg2_scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
+	j = mpeg2_scan_alt[i];
+	mpeg2_scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
+    }
+}
+
+#endif
diff --git a/src/libmpeg2new/libmpeg2/libmpeg2.pc.in b/src/libmpeg2new/libmpeg2/libmpeg2.pc.in
new file mode 100644
index 000000000..d54500b0e
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/libmpeg2.pc.in
@@ -0,0 +1,10 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libmpeg2
+Description: A decoding library for MPEG-1 and MPEG-2 streams.
+Version: @VERSION@
+Libs: -L${libdir} -lmpeg2
+Cflags: -I${includedir}/@PACKAGE@
diff --git a/src/libmpeg2new/libmpeg2/motion_comp.c b/src/libmpeg2new/libmpeg2/motion_comp.c
new file mode 100644
index 000000000..24cfee1e1
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/motion_comp.c
@@ -0,0 +1,129 @@
+/*
+ * motion_comp.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2_internal.h"
+
+mpeg2_mc_t mpeg2_mc;
+
+void mpeg2_mc_init (uint32_t accel)
+{
+#ifdef ARCH_X86
+    if (accel & MPEG2_ACCEL_X86_MMXEXT)
+	mpeg2_mc = mpeg2_mc_mmxext;
+    else if (accel & MPEG2_ACCEL_X86_3DNOW)
+	mpeg2_mc = mpeg2_mc_3dnow;
+    else if (accel & MPEG2_ACCEL_X86_MMX)
+	mpeg2_mc = mpeg2_mc_mmx;
+    else
+#endif
+#ifdef ARCH_PPC
+    if (accel & MPEG2_ACCEL_PPC_ALTIVEC)
+	mpeg2_mc = mpeg2_mc_altivec;
+    else
+#endif
+#ifdef ARCH_ALPHA
+    if (accel & MPEG2_ACCEL_ALPHA)
+	mpeg2_mc = mpeg2_mc_alpha;
+    else
+#endif
+#ifdef LIBMPEG2_MLIB
+    if (accel & MPEG2_ACCEL_MLIB)
+	mpeg2_mc = mpeg2_mc_mlib;
+    else
+#endif
+	mpeg2_mc = mpeg2_mc_c;
+}
+
+#define avg2(a,b) ((a+b+1)>>1)
+#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
+
+#define predict_o(i) (ref[i])
+#define predict_x(i) (avg2 (ref[i], ref[i+1]))
+#define predict_y(i) (avg2 (ref[i], (ref+stride)[i]))
+#define predict_xy(i) (avg4 (ref[i], ref[i+1], \
+			     (ref+stride)[i], (ref+stride)[i+1]))
+
+#define put(predictor,i) dest[i] = predictor (i)
+#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i])
+
+/* mc function template */
+
+#define MC_FUNC(op,xy)							\
+static void MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref,	\
+				   const int stride, int height)	\
+{									\
+    do {								\
+	op (predict_##xy, 0);						\
+	op (predict_##xy, 1);						\
+	op (predict_##xy, 2);						\
+	op (predict_##xy, 3);						\
+	op (predict_##xy, 4);						\
+	op (predict_##xy, 5);						\
+	op (predict_##xy, 6);						\
+	op (predict_##xy, 7);						\
+	op (predict_##xy, 8);						\
+	op (predict_##xy, 9);						\
+	op (predict_##xy, 10);						\
+	op (predict_##xy, 11);						\
+	op (predict_##xy, 12);						\
+	op (predict_##xy, 13);						\
+	op (predict_##xy, 14);						\
+	op (predict_##xy, 15);						\
+	ref += stride;							\
+	dest += stride;							\
+    } while (--height);							\
+}									\
+static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref,	\
+				  const int stride, int height)		\
+{									\
+    do {								\
+	op (predict_##xy, 0);						\
+	op (predict_##xy, 1);						\
+	op (predict_##xy, 2);						\
+	op (predict_##xy, 3);						\
+	op (predict_##xy, 4);						\
+	op (predict_##xy, 5);						\
+	op (predict_##xy, 6);						\
+	op (predict_##xy, 7);						\
+	ref += stride;							\
+	dest += stride;							\
+    } while (--height);							\
+}
+
+/* definitions of the actual mc functions */
+
+MC_FUNC (put,o)
+MC_FUNC (avg,o)
+MC_FUNC (put,x)
+MC_FUNC (avg,x)
+MC_FUNC (put,y)
+MC_FUNC (avg,y)
+MC_FUNC (put,xy)
+MC_FUNC (avg,xy)
+
+MPEG2_MC_EXTERN (c)
diff --git a/src/libmpeg2new/libmpeg2/motion_comp_alpha.c b/src/libmpeg2new/libmpeg2/motion_comp_alpha.c
new file mode 100644
index 000000000..662221b4d
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/motion_comp_alpha.c
@@ -0,0 +1,252 @@
+/*
+ * motion_comp_alpha.c
+ * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_ALPHA
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2_internal.h"
+#include "alpha_asm.h"
+
+static inline uint64_t avg2 (uint64_t a, uint64_t b)
+{
+    return (a | b) - (((a ^ b) & BYTE_VEC (0xfe)) >> 1);
+}
+
+// Load two unaligned quadwords from addr. This macro only works if
+// addr is actually unaligned.
+#define ULOAD16(ret_l,ret_r,addr)			\
+    do {						\
+	uint64_t _l = ldq_u (addr +  0);		\
+	uint64_t _m = ldq_u (addr +  8);		\
+	uint64_t _r = ldq_u (addr + 16);		\
+	ret_l = extql (_l, addr) | extqh (_m, addr);	\
+	ret_r = extql (_m, addr) | extqh (_r, addr);	\
+    } while (0)
+
+// Load two aligned quadwords from addr.
+#define ALOAD16(ret_l,ret_r,addr)			\
+    do {						\
+	ret_l = ldq (addr);				\
+	ret_r = ldq (addr + 8);				\
+    } while (0)
+
+#define OP8(LOAD,LOAD16,STORE)			\
+    do {					\
+	STORE (LOAD (pixels), block);		\
+	pixels += line_size;			\
+	block += line_size;			\
+    } while (--h)
+
+#define OP16(LOAD,LOAD16,STORE)			\
+    do {					\
+	uint64_t l, r;				\
+	LOAD16 (l, r, pixels);			\
+	STORE (l, block);			\
+	STORE (r, block + 8);			\
+	pixels += line_size;			\
+	block += line_size;			\
+    } while (--h)
+
+#define OP8_X2(LOAD,LOAD16,STORE)			\
+    do {						\
+	uint64_t p0, p1;				\
+							\
+	p0 = LOAD (pixels);				\
+	p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56);	\
+	STORE (avg2 (p0, p1), block);			\
+	pixels += line_size;				\
+	block += line_size;				\
+    } while (--h)
+
+#define OP16_X2(LOAD,LOAD16,STORE)				\
+    do {							\
+	uint64_t p0, p1;					\
+								\
+	LOAD16 (p0, p1, pixels);				\
+	STORE (avg2(p0, p0 >> 8 | p1 << 56), block);		\
+	STORE (avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56),	\
+	       block + 8);					\
+	pixels += line_size;					\
+	block += line_size;					\
+    } while (--h)
+
+#define OP8_Y2(LOAD,LOAD16,STORE)		\
+    do {					\
+	uint64_t p0, p1;			\
+	p0 = LOAD (pixels);			\
+	pixels += line_size;			\
+	p1 = LOAD (pixels);			\
+	do {					\
+	    uint64_t av = avg2 (p0, p1);	\
+	    if (--h == 0) line_size = 0;	\
+	    pixels += line_size;		\
+	    p0 = p1;				\
+	    p1 = LOAD (pixels);			\
+	    STORE (av, block);			\
+	    block += line_size;			\
+	} while (h);				\
+    } while (0)
+
+#define OP16_Y2(LOAD,LOAD16,STORE)		\
+    do {					\
+	uint64_t p0l, p0r, p1l, p1r;		\
+	LOAD16 (p0l, p0r, pixels);		\
+	pixels += line_size;			\
+	LOAD16 (p1l, p1r, pixels);		\
+	do {					\
+	    uint64_t avl, avr;			\
+	    if (--h == 0) line_size = 0;	\
+	    avl = avg2 (p0l, p1l);		\
+	    avr = avg2 (p0r, p1r);		\
+	    p0l = p1l;				\
+	    p0r = p1r;				\
+	    pixels += line_size;		\
+	    LOAD16 (p1l, p1r, pixels);		\
+	    STORE (avl, block);			\
+	    STORE (avr, block + 8);		\
+	    block += line_size;			\
+	} while (h);				\
+    } while (0)
+
+#define OP8_XY2(LOAD,LOAD16,STORE)				\
+    do {							\
+	uint64_t pl, ph;					\
+	uint64_t p1 = LOAD (pixels);				\
+	uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56);	\
+								\
+	ph = (((p1 & ~BYTE_VEC (0x03)) >> 2) +			\
+	      ((p2 & ~BYTE_VEC (0x03)) >> 2));			\
+	pl = ((p1 & BYTE_VEC (0x03)) +				\
+	      (p2 & BYTE_VEC (0x03)));				\
+								\
+	do {							\
+	    uint64_t npl, nph;					\
+								\
+	    pixels += line_size;				\
+	    p1 = LOAD (pixels);					\
+	    p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56);	\
+	    nph = (((p1 & ~BYTE_VEC (0x03)) >> 2) +		\
+	           ((p2 & ~BYTE_VEC (0x03)) >> 2));		\
+	    npl = ((p1 & BYTE_VEC (0x03)) +			\
+	           (p2 & BYTE_VEC (0x03)));			\
+								\
+	    STORE (ph + nph +					\
+		   (((pl + npl + BYTE_VEC (0x02)) >> 2) &	\
+		    BYTE_VEC (0x03)), block);			\
+								\
+	    block += line_size;					\
+            pl = npl;						\
+	    ph = nph;						\
+	} while (--h);						\
+    } while (0)
+
+#define OP16_XY2(LOAD,LOAD16,STORE)				\
+    do {							\
+	uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r;	\
+	LOAD16 (p0, p2, pixels);				\
+	p1 = p0 >> 8 | (p2 << 56);				\
+	p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56);		\
+								\
+	ph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) +		\
+	        ((p1 & ~BYTE_VEC (0x03)) >> 2));		\
+	pl_l = ((p0 & BYTE_VEC (0x03)) +			\
+	        (p1 & BYTE_VEC(0x03)));				\
+	ph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) +		\
+	        ((p3 & ~BYTE_VEC (0x03)) >> 2));		\
+	pl_r = ((p2 & BYTE_VEC (0x03)) +			\
+	        (p3 & BYTE_VEC (0x03)));			\
+								\
+	do {							\
+	    uint64_t npl_l, nph_l, npl_r, nph_r;		\
+								\
+	    pixels += line_size;				\
+	    LOAD16 (p0, p2, pixels);				\
+	    p1 = p0 >> 8 | (p2 << 56);				\
+	    p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56);	\
+	    nph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) +		\
+		     ((p1 & ~BYTE_VEC (0x03)) >> 2));		\
+	    npl_l = ((p0 & BYTE_VEC (0x03)) +			\
+		     (p1 & BYTE_VEC (0x03)));			\
+	    nph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) +		\
+		     ((p3 & ~BYTE_VEC (0x03)) >> 2));		\
+	    npl_r = ((p2 & BYTE_VEC (0x03)) +			\
+		     (p3 & BYTE_VEC (0x03)));			\
+								\
+	    STORE (ph_l + nph_l +				\
+		   (((pl_l + npl_l + BYTE_VEC (0x02)) >> 2) &	\
+		    BYTE_VEC(0x03)), block);			\
+	    STORE (ph_r + nph_r +				\
+		   (((pl_r + npl_r + BYTE_VEC (0x02)) >> 2) &	\
+		    BYTE_VEC(0x03)), block + 8);		\
+								\
+	    block += line_size;					\
+	    pl_l = npl_l;					\
+	    ph_l = nph_l;					\
+	    pl_r = npl_r;					\
+	    ph_r = nph_r;					\
+	} while (--h);						\
+    } while (0)
+
+#define MAKE_OP(OPNAME,SIZE,SUFF,OPKIND,STORE)				\
+static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha		\
+	(uint8_t *restrict block, const uint8_t *restrict pixels,	\
+	 int line_size, int h)						\
+{									\
+    if ((uint64_t) pixels & 0x7) {					\
+	OPKIND (uldq, ULOAD16, STORE);					\
+    } else {								\
+	OPKIND (ldq, ALOAD16, STORE);					\
+    }									\
+}
+
+#define PIXOP(OPNAME,STORE)			\
+    MAKE_OP (OPNAME, 8,  o,  OP8,      STORE);	\
+    MAKE_OP (OPNAME, 8,  x,  OP8_X2,   STORE);	\
+    MAKE_OP (OPNAME, 8,  y,  OP8_Y2,   STORE);	\
+    MAKE_OP (OPNAME, 8,  xy, OP8_XY2,  STORE);	\
+    MAKE_OP (OPNAME, 16, o,  OP16,     STORE);	\
+    MAKE_OP (OPNAME, 16, x,  OP16_X2,  STORE);	\
+    MAKE_OP (OPNAME, 16, y,  OP16_Y2,  STORE);	\
+    MAKE_OP (OPNAME, 16, xy, OP16_XY2, STORE);
+
+#define STORE(l,b) stq (l, b)
+PIXOP (put, STORE);
+#undef STORE
+#define STORE(l,b) stq (avg2 (l, ldq (b)), b);
+PIXOP (avg, STORE);
+
+mpeg2_mc_t mpeg2_mc_alpha = {
+    { MC_put_o_16_alpha, MC_put_x_16_alpha,
+      MC_put_y_16_alpha, MC_put_xy_16_alpha,
+      MC_put_o_8_alpha, MC_put_x_8_alpha,
+      MC_put_y_8_alpha, MC_put_xy_8_alpha },
+    { MC_avg_o_16_alpha, MC_avg_x_16_alpha,
+      MC_avg_y_16_alpha, MC_avg_xy_16_alpha,
+      MC_avg_o_8_alpha, MC_avg_x_8_alpha,
+      MC_avg_y_8_alpha, MC_avg_xy_8_alpha }
+};
+
+#endif
diff --git a/src/libmpeg2new/libmpeg2/motion_comp_altivec.c b/src/libmpeg2new/libmpeg2/motion_comp_altivec.c
new file mode 100644
index 000000000..f5d884e6e
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/motion_comp_altivec.c
@@ -0,0 +1,1009 @@
+/*
+ * motion_comp_altivec.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_PPC
+
+#ifdef HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2_internal.h"
+
+typedef vector signed char vector_s8_t;
+typedef vector unsigned char vector_u8_t;
+typedef vector signed short vector_s16_t;
+typedef vector unsigned short vector_u16_t;
+typedef vector signed int vector_s32_t;
+typedef vector unsigned int vector_u32_t;
+
+#ifndef COFFEE_BREAK	/* Workarounds for gcc suckage */
+
+static inline vector_u8_t my_vec_ld (int const A, const uint8_t * const B)
+{
+    return vec_ld (A, (uint8_t *)B);
+}
+#undef vec_ld
+#define vec_ld my_vec_ld
+
+static inline vector_u8_t my_vec_and (vector_u8_t const A, vector_u8_t const B)
+{
+    return vec_and (A, B);
+}
+#undef vec_and
+#define vec_and my_vec_and
+
+static inline vector_u8_t my_vec_avg (vector_u8_t const A, vector_u8_t const B)
+{
+    return vec_avg (A, B);
+}
+#undef vec_avg
+#define vec_avg my_vec_avg
+
+#endif
+
+static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t perm, ref0, ref1, tmp;
+
+    perm = vec_lvsl (0, ref);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    ref += stride;
+    tmp = vec_perm (ref0, ref1, perm);
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	vec_st (tmp, 0, dest);
+	tmp = vec_perm (ref0, ref1, perm);
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	tmp = vec_perm (ref0, ref1, perm);
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    vec_st (tmp, 0, dest);
+    tmp = vec_perm (ref0, ref1, perm);
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref,
+				const int stride, int height)
+{
+    vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1;
+
+    tmp0 = vec_lvsl (0, ref);
+    tmp0 = vec_mergeh (tmp0, tmp0);
+    perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+    tmp1 = vec_lvsl (stride, ref);
+    tmp1 = vec_mergeh (tmp1, tmp1);
+    perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    ref += stride;
+    tmp0 = vec_perm (ref0, ref1, perm0);
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp1 = vec_perm (ref0, ref1, perm1);
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp0 = vec_perm (ref0, ref1, perm0);
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+    dest += stride;
+    tmp1 = vec_perm (ref0, ref1, perm1);
+    vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+}
+
+static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t permA, permB, ref0, ref1, tmp;
+
+    permA = vec_lvsl (0, ref);
+    permB = vec_add (permA, vec_splat_u8 (1));
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    ref += stride;
+    tmp = vec_avg (vec_perm (ref0, ref1, permA),
+		   vec_perm (ref0, ref1, permB));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	vec_st (tmp, 0, dest);
+	tmp = vec_avg (vec_perm (ref0, ref1, permA),
+		       vec_perm (ref0, ref1, permB));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	tmp = vec_avg (vec_perm (ref0, ref1, permA),
+		       vec_perm (ref0, ref1, permB));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    vec_st (tmp, 0, dest);
+    tmp = vec_avg (vec_perm (ref0, ref1, permA),
+		   vec_perm (ref0, ref1, permB));
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref,
+				const int stride, int height)
+{
+    vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
+
+    ones = vec_splat_u8 (1);
+    tmp0 = vec_lvsl (0, ref);
+    tmp0 = vec_mergeh (tmp0, tmp0);
+    perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+    perm0B = vec_add (perm0A, ones);
+    tmp1 = vec_lvsl (stride, ref);
+    tmp1 = vec_mergeh (tmp1, tmp1);
+    perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+    perm1B = vec_add (perm1A, ones);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    ref += stride;
+    tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
+		    vec_perm (ref0, ref1, perm0B));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
+			vec_perm (ref0, ref1, perm1B));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
+			vec_perm (ref0, ref1, perm0B));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+    dest += stride;
+    tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
+		    vec_perm (ref0, ref1, perm1B));
+    vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+}
+
+static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp;
+
+    perm = vec_lvsl (0, ref);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    ref += stride;
+    tmp0 = vec_perm (ref0, ref1, perm);
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    ref += stride;
+    tmp1 = vec_perm (ref0, ref1, perm);
+    tmp = vec_avg (tmp0, tmp1);
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	vec_st (tmp, 0, dest);
+	tmp0 = vec_perm (ref0, ref1, perm);
+	tmp = vec_avg (tmp0, tmp1);
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	tmp1 = vec_perm (ref0, ref1, perm);
+	tmp = vec_avg (tmp0, tmp1);
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    vec_st (tmp, 0, dest);
+    tmp0 = vec_perm (ref0, ref1, perm);
+    tmp = vec_avg (tmp0, tmp1);
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref,
+				const int stride, int height)
+{
+    vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1;
+
+    tmp0 = vec_lvsl (0, ref);
+    tmp0 = vec_mergeh (tmp0, tmp0);
+    perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+    tmp1 = vec_lvsl (stride, ref);
+    tmp1 = vec_mergeh (tmp1, tmp1);
+    perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    ref += stride;
+    tmp0 = vec_perm (ref0, ref1, perm0);
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    ref += stride;
+    tmp1 = vec_perm (ref0, ref1, perm1);
+    tmp = vec_avg (tmp0, tmp1);
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp0 = vec_perm (ref0, ref1, perm0);
+	tmp = vec_avg (tmp0, tmp1);
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp1 = vec_perm (ref0, ref1, perm1);
+	tmp = vec_avg (tmp0, tmp1);
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+    dest += stride;
+    tmp0 = vec_perm (ref0, ref1, perm0);
+    tmp = vec_avg (tmp0, tmp1);
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+}
+
+static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
+				  const int stride, int height)
+{
+    vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
+    vector_u8_t ones;
+
+    ones = vec_splat_u8 (1);
+    permA = vec_lvsl (0, ref);
+    permB = vec_add (permA, ones);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    ref += stride;
+    A = vec_perm (ref0, ref1, permA);
+    B = vec_perm (ref0, ref1, permB);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    ref += stride;
+    A = vec_perm (ref0, ref1, permA);
+    B = vec_perm (ref0, ref1, permB);
+    avg1 = vec_avg (A, B);
+    xor1 = vec_xor (A, B);
+    tmp = vec_sub (vec_avg (avg0, avg1),
+		   vec_and (vec_and (ones, vec_or (xor0, xor1)),
+			    vec_xor (avg0, avg1)));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	vec_st (tmp, 0, dest);
+	A = vec_perm (ref0, ref1, permA);
+	B = vec_perm (ref0, ref1, permB);
+	avg0 = vec_avg (A, B);
+	xor0 = vec_xor (A, B);
+	tmp = vec_sub (vec_avg (avg0, avg1),
+		       vec_and (vec_and (ones, vec_or (xor0, xor1)),
+				vec_xor (avg0, avg1)));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	A = vec_perm (ref0, ref1, permA);
+	B = vec_perm (ref0, ref1, permB);
+	avg1 = vec_avg (A, B);
+	xor1 = vec_xor (A, B);
+	tmp = vec_sub (vec_avg (avg0, avg1),
+		       vec_and (vec_and (ones, vec_or (xor0, xor1)),
+				vec_xor (avg0, avg1)));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    vec_st (tmp, 0, dest);
+    A = vec_perm (ref0, ref1, permA);
+    B = vec_perm (ref0, ref1, permB);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+    tmp = vec_sub (vec_avg (avg0, avg1),
+		   vec_and (vec_and (ones, vec_or (xor0, xor1)),
+			    vec_xor (avg0, avg1)));
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
+    vector_u8_t avg0, avg1, xor0, xor1, tmp, ones;
+
+    ones = vec_splat_u8 (1);
+    perm0A = vec_lvsl (0, ref);
+    perm0A = vec_mergeh (perm0A, perm0A);
+    perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
+    perm0B = vec_add (perm0A, ones);
+    perm1A = vec_lvsl (stride, ref);
+    perm1A = vec_mergeh (perm1A, perm1A);
+    perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
+    perm1B = vec_add (perm1A, ones);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    ref += stride;
+    A = vec_perm (ref0, ref1, perm0A);
+    B = vec_perm (ref0, ref1, perm0B);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    ref += stride;
+    A = vec_perm (ref0, ref1, perm1A);
+    B = vec_perm (ref0, ref1, perm1B);
+    avg1 = vec_avg (A, B);
+    xor1 = vec_xor (A, B);
+    tmp = vec_sub (vec_avg (avg0, avg1),
+		   vec_and (vec_and (ones, vec_or (xor0, xor1)),
+			    vec_xor (avg0, avg1)));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	A = vec_perm (ref0, ref1, perm0A);
+	B = vec_perm (ref0, ref1, perm0B);
+	avg0 = vec_avg (A, B);
+	xor0 = vec_xor (A, B);
+	tmp = vec_sub (vec_avg (avg0, avg1),
+		       vec_and (vec_and (ones, vec_or (xor0, xor1)),
+				vec_xor (avg0, avg1)));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	A = vec_perm (ref0, ref1, perm1A);
+	B = vec_perm (ref0, ref1, perm1B);
+	avg1 = vec_avg (A, B);
+	xor1 = vec_xor (A, B);
+	tmp = vec_sub (vec_avg (avg0, avg1),
+		       vec_and (vec_and (ones, vec_or (xor0, xor1)),
+				vec_xor (avg0, avg1)));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+    dest += stride;
+    A = vec_perm (ref0, ref1, perm0A);
+    B = vec_perm (ref0, ref1, perm0B);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+    tmp = vec_sub (vec_avg (avg0, avg1),
+		   vec_and (vec_and (ones, vec_or (xor0, xor1)),
+			    vec_xor (avg0, avg1)));
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+}
+
+#if 0
+static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones;
+    vector_u16_t splat2, temp;
+
+    ones = vec_splat_u8 (1);
+    permA = vec_lvsl (0, ref);
+    permB = vec_add (permA, ones);
+
+    zero = vec_splat_u8 (0);
+    splat2 = vec_splat_u16 (2);
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	A = vec_perm (ref0, ref1, permA);
+	B = vec_perm (ref0, ref1, permB);
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	C = vec_perm (ref0, ref1, permA);
+	D = vec_perm (ref0, ref1, permB);
+
+	temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A),
+				(vector_u16_t)vec_mergeh (zero, B)),
+		       vec_add ((vector_u16_t)vec_mergeh (zero, C),
+				(vector_u16_t)vec_mergeh (zero, D)));
+	temp = vec_sr (vec_add (temp, splat2), splat2);
+	tmp = vec_pack (temp, temp);
+
+	vec_st (tmp, 0, dest);
+	dest += stride;
+	tmp = vec_avg (vec_perm (ref0, ref1, permA),
+		       vec_perm (ref0, ref1, permB));
+    } while (--height);
+}
+#endif
+
+static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t perm, ref0, ref1, tmp, prev;
+
+    perm = vec_lvsl (0, ref);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    ref += stride;
+    prev = vec_ld (0, dest);
+    tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_st (tmp, 0, dest);
+	tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	prev = vec_ld (2*stride, dest);
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    prev = vec_ld (stride, dest);
+    vec_st (tmp, 0, dest);
+    tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref,
+				const int stride, int height)
+{
+    vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev;
+
+    tmp0 = vec_lvsl (0, ref);
+    tmp0 = vec_mergeh (tmp0, tmp0);
+    perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+    tmp1 = vec_lvsl (stride, ref);
+    tmp1 = vec_mergeh (tmp1, tmp1);
+    perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    ref += stride;
+    prev = vec_ld (0, dest);
+    tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    prev = vec_ld (stride, dest);
+    vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+    dest += stride;
+    tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
+    vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+}
+
+static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t permA, permB, ref0, ref1, tmp, prev;
+
+    permA = vec_lvsl (0, ref);
+    permB = vec_add (permA, vec_splat_u8 (1));
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    prev = vec_ld (0, dest);
+    ref += stride;
+    tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
+				  vec_perm (ref0, ref1, permB)));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_st (tmp, 0, dest);
+	tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
+				      vec_perm (ref0, ref1, permB)));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	prev = vec_ld (2*stride, dest);
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
+				      vec_perm (ref0, ref1, permB)));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    prev = vec_ld (stride, dest);
+    vec_st (tmp, 0, dest);
+    tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
+				  vec_perm (ref0, ref1, permB)));
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref,
+				const int stride, int height)
+{
+    vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
+    vector_u8_t prev;
+
+    ones = vec_splat_u8 (1);
+    tmp0 = vec_lvsl (0, ref);
+    tmp0 = vec_mergeh (tmp0, tmp0);
+    perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+    perm0B = vec_add (perm0A, ones);
+    tmp1 = vec_lvsl (stride, ref);
+    tmp1 = vec_mergeh (tmp1, tmp1);
+    perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+    perm1B = vec_add (perm1A, ones);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    prev = vec_ld (0, dest);
+    ref += stride;
+    tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
+				   vec_perm (ref0, ref1, perm0B)));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
+				       vec_perm (ref0, ref1, perm1B)));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
+				       vec_perm (ref0, ref1, perm0B)));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    prev = vec_ld (stride, dest);
+    vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+    dest += stride;
+    tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
+				   vec_perm (ref0, ref1, perm1B)));
+    vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+}
+
+static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev;
+
+    perm = vec_lvsl (0, ref);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    ref += stride;
+    tmp0 = vec_perm (ref0, ref1, perm);
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    ref += stride;
+    prev = vec_ld (0, dest);
+    tmp1 = vec_perm (ref0, ref1, perm);
+    tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_st (tmp, 0, dest);
+	tmp0 = vec_perm (ref0, ref1, perm);
+	tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	prev = vec_ld (2*stride, dest);
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	tmp1 = vec_perm (ref0, ref1, perm);
+	tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    prev = vec_ld (stride, dest);
+    vec_st (tmp, 0, dest);
+    tmp0 = vec_perm (ref0, ref1, perm);
+    tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref,
+				const int stride, int height)
+{
+    vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev;
+
+    tmp0 = vec_lvsl (0, ref);
+    tmp0 = vec_mergeh (tmp0, tmp0);
+    perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+    tmp1 = vec_lvsl (stride, ref);
+    tmp1 = vec_mergeh (tmp1, tmp1);
+    perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    ref += stride;
+    tmp0 = vec_perm (ref0, ref1, perm0);
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    ref += stride;
+    prev = vec_ld (0, dest);
+    tmp1 = vec_perm (ref0, ref1, perm1);
+    tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp0 = vec_perm (ref0, ref1, perm0);
+	tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp1 = vec_perm (ref0, ref1, perm1);
+	tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    prev = vec_ld (stride, dest);
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+    dest += stride;
+    tmp0 = vec_perm (ref0, ref1, perm0);
+    tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+}
+
+static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
+				  const int stride, int height)
+{
+    vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
+    vector_u8_t ones, prev;
+
+    ones = vec_splat_u8 (1);
+    permA = vec_lvsl (0, ref);
+    permB = vec_add (permA, ones);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    ref += stride;
+    A = vec_perm (ref0, ref1, permA);
+    B = vec_perm (ref0, ref1, permB);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    ref += stride;
+    prev = vec_ld (0, dest);
+    A = vec_perm (ref0, ref1, permA);
+    B = vec_perm (ref0, ref1, permB);
+    avg1 = vec_avg (A, B);
+    xor1 = vec_xor (A, B);
+    tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
+				  vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					   vec_xor (avg0, avg1))));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_st (tmp, 0, dest);
+	A = vec_perm (ref0, ref1, permA);
+	B = vec_perm (ref0, ref1, permB);
+	avg0 = vec_avg (A, B);
+	xor0 = vec_xor (A, B);
+	tmp = vec_avg (prev,
+		       vec_sub (vec_avg (avg0, avg1),
+				vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					 vec_xor (avg0, avg1))));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	prev = vec_ld (2*stride, dest);
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	A = vec_perm (ref0, ref1, permA);
+	B = vec_perm (ref0, ref1, permB);
+	avg1 = vec_avg (A, B);
+	xor1 = vec_xor (A, B);
+	tmp = vec_avg (prev,
+		       vec_sub (vec_avg (avg0, avg1),
+				vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					 vec_xor (avg0, avg1))));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    prev = vec_ld (stride, dest);
+    vec_st (tmp, 0, dest);
+    A = vec_perm (ref0, ref1, permA);
+    B = vec_perm (ref0, ref1, permB);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+    tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
+				  vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					   vec_xor (avg0, avg1))));
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
+    vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev;
+
+    ones = vec_splat_u8 (1);
+    perm0A = vec_lvsl (0, ref);
+    perm0A = vec_mergeh (perm0A, perm0A);
+    perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
+    perm0B = vec_add (perm0A, ones);
+    perm1A = vec_lvsl (stride, ref);
+    perm1A = vec_mergeh (perm1A, perm1A);
+    perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
+    perm1B = vec_add (perm1A, ones);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    ref += stride;
+    A = vec_perm (ref0, ref1, perm0A);
+    B = vec_perm (ref0, ref1, perm0B);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    ref += stride;
+    prev = vec_ld (0, dest);
+    A = vec_perm (ref0, ref1, perm1A);
+    B = vec_perm (ref0, ref1, perm1B);
+    avg1 = vec_avg (A, B);
+    xor1 = vec_xor (A, B);
+    tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
+				  vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					   vec_xor (avg0, avg1))));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	A = vec_perm (ref0, ref1, perm0A);
+	B = vec_perm (ref0, ref1, perm0B);
+	avg0 = vec_avg (A, B);
+	xor0 = vec_xor (A, B);
+	tmp = vec_avg (prev,
+		       vec_sub (vec_avg (avg0, avg1),
+				vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					 vec_xor (avg0, avg1))));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	A = vec_perm (ref0, ref1, perm1A);
+	B = vec_perm (ref0, ref1, perm1B);
+	avg1 = vec_avg (A, B);
+	xor1 = vec_xor (A, B);
+	tmp = vec_avg (prev,
+		       vec_sub (vec_avg (avg0, avg1),
+				vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					 vec_xor (avg0, avg1))));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    prev = vec_ld (stride, dest);
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+    dest += stride;
+    A = vec_perm (ref0, ref1, perm0A);
+    B = vec_perm (ref0, ref1, perm0B);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+    tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
+				  vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					   vec_xor (avg0, avg1))));
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+}
+
+MPEG2_MC_EXTERN (altivec)
+
+#endif
diff --git a/src/libmpeg2new/libmpeg2/motion_comp_mlib.c b/src/libmpeg2new/libmpeg2/motion_comp_mlib.c
new file mode 100644
index 000000000..c7ed6b285
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/motion_comp_mlib.c
@@ -0,0 +1,190 @@
+/*
+ * motion_comp_mlib.c
+ * Copyright (C) 2000-2003 H�kan Hjort <d95hjort@dtek.chalmers.se>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef LIBMPEG2_MLIB
+
+#include <mlib_types.h>
+#include <mlib_status.h>
+#include <mlib_sys.h>
+#include <mlib_video.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2_internal.h"
+
+static void MC_put_o_16_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoCopyRef_U8_U8_16x16 (dest, (uint8_t *) ref, stride);
+    else
+	mlib_VideoCopyRef_U8_U8_16x8 (dest, (uint8_t *) ref, stride);
+}
+
+static void MC_put_x_16_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoInterpX_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpX_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_put_y_16_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoInterpY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_put_xy_16_mlib (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoInterpXY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpXY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_put_o_8_mlib (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoCopyRef_U8_U8_8x8 (dest, (uint8_t *) ref, stride);
+    else
+	mlib_VideoCopyRef_U8_U8_8x4 (dest, (uint8_t *) ref, stride);
+}
+
+static void MC_put_x_8_mlib (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoInterpX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_put_y_8_mlib (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoInterpY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_put_xy_8_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoInterpXY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpXY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_avg_o_16_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoCopyRefAve_U8_U8_16x16 (dest, (uint8_t *) ref, stride);
+    else
+	mlib_VideoCopyRefAve_U8_U8_16x8 (dest, (uint8_t *) ref, stride);
+}
+
+static void MC_avg_x_16_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoInterpAveX_U8_U8_16x16 (dest, (uint8_t *) ref,
+					  stride, stride);
+    else
+	mlib_VideoInterpAveX_U8_U8_16x8 (dest, (uint8_t *) ref,
+					 stride, stride);
+}
+
+static void MC_avg_y_16_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoInterpAveY_U8_U8_16x16 (dest, (uint8_t *) ref,
+					  stride, stride);
+    else
+	mlib_VideoInterpAveY_U8_U8_16x8 (dest, (uint8_t *) ref,
+					 stride, stride);
+}
+
+static void MC_avg_xy_16_mlib (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoInterpAveXY_U8_U8_16x16 (dest, (uint8_t *) ref,
+					   stride, stride);
+    else
+	mlib_VideoInterpAveXY_U8_U8_16x8 (dest, (uint8_t *) ref,
+					  stride, stride);
+}
+
+static void MC_avg_o_8_mlib (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoCopyRefAve_U8_U8_8x8 (dest, (uint8_t *) ref, stride);
+    else
+	mlib_VideoCopyRefAve_U8_U8_8x4 (dest, (uint8_t *) ref, stride);
+}
+
+static void MC_avg_x_8_mlib (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoInterpAveX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpAveX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_avg_y_8_mlib (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoInterpAveY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpAveY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_avg_xy_8_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoInterpAveXY_U8_U8_8x8 (dest, (uint8_t *) ref,
+					 stride, stride);
+    else
+	mlib_VideoInterpAveXY_U8_U8_8x4 (dest, (uint8_t *) ref,
+					 stride, stride);
+}
+
+MPEG2_MC_EXTERN (mlib)
+
+#endif
diff --git a/src/libmpeg2new/libmpeg2/motion_comp_mmx.c b/src/libmpeg2new/libmpeg2/motion_comp_mmx.c
new file mode 100644
index 000000000..2434ccee1
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/motion_comp_mmx.c
@@ -0,0 +1,1005 @@
+/*
+ * motion_comp_mmx.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_X86
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2_internal.h"
+#include "attributes.h"
+#include "mmx.h"
+
+#define CPU_MMXEXT 0
+#define CPU_3DNOW 1
+
+
+/* MMX code - needs a rewrite */
+
+/*
+ * Motion Compensation frequently needs to average values using the
+ * formula (x+y+1)>>1. Both MMXEXT and 3Dnow include one instruction
+ * to compute this, but it's been left out of classic MMX.
+ *
+ * We need to be careful of overflows when doing this computation.
+ * Rather than unpacking data to 16-bits, which reduces parallelism,
+ * we use the following formulas:
+ *
+ * (x+y)>>1 == (x&y)+((x^y)>>1)
+ * (x+y+1)>>1 == (x|y)-((x^y)>>1)
+ */
+
+/* some rounding constants */
+static mmx_t mask1 = {0xfefefefefefefefeLL};
+static mmx_t round4 = {0x0002000200020002LL};
+
+/*
+ * This code should probably be compiled with loop unrolling
+ * (ie, -funroll-loops in gcc)becuase some of the loops
+ * use a small static number of iterations. This was written
+ * with the assumption the compiler knows best about when
+ * unrolling will help
+ */
+
+static inline void mmx_zero_reg ()
+{
+    /* load 0 into mm0 */
+    pxor_r2r (mm0, mm0);
+}
+
+static inline void mmx_average_2_U8 (uint8_t * dest, const uint8_t * src1,
+				     const uint8_t * src2)
+{
+    /* *dest = (*src1 + *src2 + 1)/ 2; */
+
+    movq_m2r (*src1, mm1);	/* load 8 src1 bytes */
+    movq_r2r (mm1, mm2);	/* copy 8 src1 bytes */
+
+    movq_m2r (*src2, mm3);	/* load 8 src2 bytes */
+    movq_r2r (mm3, mm4);	/* copy 8 src2 bytes */
+
+    pxor_r2r (mm1, mm3);	/* xor src1 and src2 */
+    pand_m2r (mask1, mm3);	/* mask lower bits */
+    psrlq_i2r (1, mm3);		/* /2 */
+    por_r2r (mm2, mm4);		/* or src1 and src2 */
+    psubb_r2r (mm3, mm4);	/* subtract subresults */
+    movq_r2m (mm4, *dest);	/* store result in dest */
+}
+
+static inline void mmx_interp_average_2_U8 (uint8_t * dest,
+					    const uint8_t * src1,
+					    const uint8_t * src2)
+{
+    /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */
+
+    movq_m2r (*dest, mm1);	/* load 8 dest bytes */
+    movq_r2r (mm1, mm2);	/* copy 8 dest bytes */
+
+    movq_m2r (*src1, mm3);	/* load 8 src1 bytes */
+    movq_r2r (mm3, mm4);	/* copy 8 src1 bytes */
+
+    movq_m2r (*src2, mm5);	/* load 8 src2 bytes */
+    movq_r2r (mm5, mm6);	/* copy 8 src2 bytes */
+
+    pxor_r2r (mm3, mm5);	/* xor src1 and src2 */
+    pand_m2r (mask1, mm5);	/* mask lower bits */
+    psrlq_i2r (1, mm5);		/* /2 */
+    por_r2r (mm4, mm6);		/* or src1 and src2 */
+    psubb_r2r (mm5, mm6);	/* subtract subresults */
+    movq_r2r (mm6, mm5);	/* copy subresult */
+
+    pxor_r2r (mm1, mm5);	/* xor srcavg and dest */
+    pand_m2r (mask1, mm5);	/* mask lower bits */
+    psrlq_i2r (1, mm5);		/* /2 */
+    por_r2r (mm2, mm6);		/* or srcavg and dest */
+    psubb_r2r (mm5, mm6);	/* subtract subresults */
+    movq_r2m (mm6, *dest);	/* store result in dest */
+}
+
+static inline void mmx_average_4_U8 (uint8_t * dest, const uint8_t * src1,
+				     const uint8_t * src2,
+				     const uint8_t * src3,
+				     const uint8_t * src4)
+{
+    /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */
+
+    movq_m2r (*src1, mm1);	/* load 8 src1 bytes */
+    movq_r2r (mm1, mm2);	/* copy 8 src1 bytes */
+
+    punpcklbw_r2r (mm0, mm1);	/* unpack low src1 bytes */
+    punpckhbw_r2r (mm0, mm2);	/* unpack high src1 bytes */
+
+    movq_m2r (*src2, mm3);	/* load 8 src2 bytes */
+    movq_r2r (mm3, mm4);	/* copy 8 src2 bytes */
+
+    punpcklbw_r2r (mm0, mm3);	/* unpack low src2 bytes */
+    punpckhbw_r2r (mm0, mm4);	/* unpack high src2 bytes */
+
+    paddw_r2r (mm3, mm1);	/* add lows */
+    paddw_r2r (mm4, mm2);	/* add highs */
+
+    /* now have partials in mm1 and mm2 */
+
+    movq_m2r (*src3, mm3);	/* load 8 src3 bytes */
+    movq_r2r (mm3, mm4);	/* copy 8 src3 bytes */
+
+    punpcklbw_r2r (mm0, mm3);	/* unpack low src3 bytes */
+    punpckhbw_r2r (mm0, mm4);	/* unpack high src3 bytes */
+
+    paddw_r2r (mm3, mm1);	/* add lows */
+    paddw_r2r (mm4, mm2);	/* add highs */
+
+    movq_m2r (*src4, mm5);	/* load 8 src4 bytes */
+    movq_r2r (mm5, mm6);	/* copy 8 src4 bytes */
+
+    punpcklbw_r2r (mm0, mm5);	/* unpack low src4 bytes */
+    punpckhbw_r2r (mm0, mm6);	/* unpack high src4 bytes */
+
+    paddw_r2r (mm5, mm1);	/* add lows */
+    paddw_r2r (mm6, mm2);	/* add highs */
+
+    /* now have subtotal in mm1 and mm2 */
+
+    paddw_m2r (round4, mm1);
+    psraw_i2r (2, mm1);		/* /4 */
+    paddw_m2r (round4, mm2);
+    psraw_i2r (2, mm2);		/* /4 */
+
+    packuswb_r2r (mm2, mm1);	/* pack (w/ saturation) */
+    movq_r2m (mm1, *dest);	/* store result in dest */
+}
+
+static inline void mmx_interp_average_4_U8 (uint8_t * dest,
+					    const uint8_t * src1,
+					    const uint8_t * src2,
+					    const uint8_t * src3,
+					    const uint8_t * src4)
+{
+    /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */
+
+    movq_m2r (*src1, mm1);	/* load 8 src1 bytes */
+    movq_r2r (mm1, mm2);	/* copy 8 src1 bytes */
+
+    punpcklbw_r2r (mm0, mm1);	/* unpack low src1 bytes */
+    punpckhbw_r2r (mm0, mm2);	/* unpack high src1 bytes */
+
+    movq_m2r (*src2, mm3);	/* load 8 src2 bytes */
+    movq_r2r (mm3, mm4);	/* copy 8 src2 bytes */
+
+    punpcklbw_r2r (mm0, mm3);	/* unpack low src2 bytes */
+    punpckhbw_r2r (mm0, mm4);	/* unpack high src2 bytes */
+
+    paddw_r2r (mm3, mm1);	/* add lows */
+    paddw_r2r (mm4, mm2);	/* add highs */
+
+    /* now have partials in mm1 and mm2 */
+
+    movq_m2r (*src3, mm3);	/* load 8 src3 bytes */
+    movq_r2r (mm3, mm4);	/* copy 8 src3 bytes */
+
+    punpcklbw_r2r (mm0, mm3);	/* unpack low src3 bytes */
+    punpckhbw_r2r (mm0, mm4);	/* unpack high src3 bytes */
+
+    paddw_r2r (mm3, mm1);	/* add lows */
+    paddw_r2r (mm4, mm2);	/* add highs */
+
+    movq_m2r (*src4, mm5);	/* load 8 src4 bytes */
+    movq_r2r (mm5, mm6);	/* copy 8 src4 bytes */
+
+    punpcklbw_r2r (mm0, mm5);	/* unpack low src4 bytes */
+    punpckhbw_r2r (mm0, mm6);	/* unpack high src4 bytes */
+
+    paddw_r2r (mm5, mm1);	/* add lows */
+    paddw_r2r (mm6, mm2);	/* add highs */
+
+    paddw_m2r (round4, mm1);
+    psraw_i2r (2, mm1);		/* /4 */
+    paddw_m2r (round4, mm2);
+    psraw_i2r (2, mm2);		/* /4 */
+
+    /* now have subtotal/4 in mm1 and mm2 */
+
+    movq_m2r (*dest, mm3);	/* load 8 dest bytes */
+    movq_r2r (mm3, mm4);	/* copy 8 dest bytes */
+
+    packuswb_r2r (mm2, mm1);	/* pack (w/ saturation) */
+    movq_r2r (mm1,mm2);		/* copy subresult */
+
+    pxor_r2r (mm1, mm3);	/* xor srcavg and dest */
+    pand_m2r (mask1, mm3);	/* mask lower bits */
+    psrlq_i2r (1, mm3);		/* /2 */
+    por_r2r (mm2, mm4);		/* or srcavg and dest */
+    psubb_r2r (mm3, mm4);	/* subtract subresults */
+    movq_r2m (mm4, *dest);	/* store result in dest */
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_avg_mmx (const int width, int height, uint8_t * dest,
+			       const uint8_t * ref, const int stride)
+{
+    mmx_zero_reg ();
+
+    do {
+	mmx_average_2_U8 (dest, dest, ref);
+
+	if (width == 16)
+	    mmx_average_2_U8 (dest+8, dest+8, ref+8);
+
+	dest += stride;
+	ref += stride;
+    } while (--height);
+}
+
+static void MC_avg_o_16_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_avg_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_avg_o_8_mmx (uint8_t * dest, const uint8_t * ref,
+			    int stride, int height)
+{
+    MC_avg_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_put_mmx (const int width, int height, uint8_t * dest,
+			       const uint8_t * ref, const int stride)
+{
+    mmx_zero_reg ();
+
+    do {
+	movq_m2r (* ref, mm1);	/* load 8 ref bytes */
+	movq_r2m (mm1,* dest);	/* store 8 bytes at curr */
+
+	if (width == 16)
+	    {
+		movq_m2r (* (ref+8), mm1);	/* load 8 ref bytes */
+		movq_r2m (mm1,* (dest+8));	/* store 8 bytes at curr */
+	    }
+
+	dest += stride;
+	ref += stride;
+    } while (--height);
+}
+
+static void MC_put_o_16_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_put_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_put_o_8_mmx (uint8_t * dest, const uint8_t * ref,
+			    int stride, int height)
+{
+    MC_put_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+/* Half pixel interpolation in the x direction */
+static inline void MC_avg_x_mmx (const int width, int height, uint8_t * dest,
+				 const uint8_t * ref, const int stride)
+{
+    mmx_zero_reg ();
+
+    do {
+	mmx_interp_average_2_U8 (dest, ref, ref+1);
+
+	if (width == 16)
+	    mmx_interp_average_2_U8 (dest+8, ref+8, ref+9);
+
+	dest += stride;
+	ref += stride;
+    } while (--height);
+}
+
+static void MC_avg_x_16_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_avg_x_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_avg_x_8_mmx (uint8_t * dest, const uint8_t * ref,
+			    int stride, int height)
+{
+    MC_avg_x_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_put_x_mmx (const int width, int height, uint8_t * dest,
+				 const uint8_t * ref, const int stride)
+{
+    mmx_zero_reg ();
+
+    do {
+	mmx_average_2_U8 (dest, ref, ref+1);
+
+	if (width == 16)
+	    mmx_average_2_U8 (dest+8, ref+8, ref+9);
+
+	dest += stride;
+	ref += stride;
+    } while (--height);
+}
+
+static void MC_put_x_16_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_put_x_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_put_x_8_mmx (uint8_t * dest, const uint8_t * ref,
+			    int stride, int height)
+{
+    MC_put_x_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_avg_xy_mmx (const int width, int height, uint8_t * dest,
+				  const uint8_t * ref, const int stride)
+{
+    const uint8_t * ref_next = ref + stride;
+
+    mmx_zero_reg ();
+
+    do {
+	mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1);
+
+	if (width == 16)
+	    mmx_interp_average_4_U8 (dest+8, ref+8, ref+9,
+				     ref_next+8, ref_next+9);
+
+	dest += stride;
+	ref += stride;
+	ref_next += stride;
+    } while (--height);
+}
+
+static void MC_avg_xy_16_mmx (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_avg_xy_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_avg_xy_8_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_avg_xy_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_put_xy_mmx (const int width, int height, uint8_t * dest,
+				  const uint8_t * ref, const int stride)
+{
+    const uint8_t * ref_next = ref + stride;
+
+    mmx_zero_reg ();
+
+    do {
+	mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1);
+
+	if (width == 16)
+	    mmx_average_4_U8 (dest+8, ref+8, ref+9, ref_next+8, ref_next+9);
+
+	dest += stride;
+	ref += stride;
+	ref_next += stride;
+    } while (--height);
+}
+
+static void MC_put_xy_16_mmx (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_put_xy_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_put_xy_8_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_put_xy_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_avg_y_mmx (const int width, int height, uint8_t * dest,
+				 const uint8_t * ref, const int stride)
+{
+    const uint8_t * ref_next = ref + stride;
+
+    mmx_zero_reg ();
+
+    do {
+	mmx_interp_average_2_U8 (dest, ref, ref_next);
+
+	if (width == 16)
+	    mmx_interp_average_2_U8 (dest+8, ref+8, ref_next+8);
+
+	dest += stride;
+	ref += stride;
+	ref_next += stride;
+    } while (--height);
+}
+
+static void MC_avg_y_16_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_avg_y_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_avg_y_8_mmx (uint8_t * dest, const uint8_t * ref,
+			    int stride, int height)
+{
+    MC_avg_y_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_put_y_mmx (const int width, int height, uint8_t * dest,
+				 const uint8_t * ref, const int stride)
+{
+    const uint8_t * ref_next = ref + stride;
+
+    mmx_zero_reg ();
+
+    do {
+	mmx_average_2_U8 (dest, ref, ref_next);
+
+	if (width == 16)
+	    mmx_average_2_U8 (dest+8, ref+8, ref_next+8);
+
+	dest += stride;
+	ref += stride;
+	ref_next += stride;
+    } while (--height);
+}
+
+static void MC_put_y_16_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_put_y_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_put_y_8_mmx (uint8_t * dest, const uint8_t * ref,
+			    int stride, int height)
+{
+    MC_put_y_mmx (8, height, dest, ref, stride);
+}
+
+
+MPEG2_MC_EXTERN (mmx)
+
+
+
+
+
+
+
+/* CPU_MMXEXT/CPU_3DNOW adaptation layer */
+
+#define pavg_r2r(src,dest)		\
+do {					\
+    if (cpu == CPU_MMXEXT)		\
+	pavgb_r2r (src, dest);		\
+    else				\
+	pavgusb_r2r (src, dest);	\
+} while (0)
+
+#define pavg_m2r(src,dest)		\
+do {					\
+    if (cpu == CPU_MMXEXT)		\
+	pavgb_m2r (src, dest);		\
+    else				\
+	pavgusb_m2r (src, dest);	\
+} while (0)
+
+
+/* CPU_MMXEXT code */
+
+
+static inline void MC_put1_8 (int height, uint8_t * dest, const uint8_t * ref,
+			      const int stride)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_r2m (mm0, *dest);
+	ref += stride;
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_put1_16 (int height, uint8_t * dest, const uint8_t * ref,
+			       const int stride)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_m2r (*(ref+8), mm1);
+	ref += stride;
+	movq_r2m (mm0, *dest);
+	movq_r2m (mm1, *(dest+8));
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_avg1_8 (int height, uint8_t * dest, const uint8_t * ref,
+			      const int stride, const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	pavg_m2r (*dest, mm0);
+	ref += stride;
+	movq_r2m (mm0, *dest);
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_avg1_16 (int height, uint8_t * dest, const uint8_t * ref,
+			       const int stride, const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_m2r (*(ref+8), mm1);
+	pavg_m2r (*dest, mm0);
+	pavg_m2r (*(dest+8), mm1);
+	movq_r2m (mm0, *dest);
+	ref += stride;
+	movq_r2m (mm1, *(dest+8));
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_put2_8 (int height, uint8_t * dest, const uint8_t * ref,
+			      const int stride, const int offset,
+			      const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	pavg_m2r (*(ref+offset), mm0);
+	ref += stride;
+	movq_r2m (mm0, *dest);
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_put2_16 (int height, uint8_t * dest, const uint8_t * ref,
+			       const int stride, const int offset,
+			       const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_m2r (*(ref+8), mm1);
+	pavg_m2r (*(ref+offset), mm0);
+	pavg_m2r (*(ref+offset+8), mm1);
+	movq_r2m (mm0, *dest);
+	ref += stride;
+	movq_r2m (mm1, *(dest+8));
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_avg2_8 (int height, uint8_t * dest, const uint8_t * ref,
+			      const int stride, const int offset,
+			      const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	pavg_m2r (*(ref+offset), mm0);
+	pavg_m2r (*dest, mm0);
+	ref += stride;
+	movq_r2m (mm0, *dest);
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_avg2_16 (int height, uint8_t * dest, const uint8_t * ref,
+			       const int stride, const int offset,
+			       const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_m2r (*(ref+8), mm1);
+	pavg_m2r (*(ref+offset), mm0);
+	pavg_m2r (*(ref+offset+8), mm1);
+	pavg_m2r (*dest, mm0);
+	pavg_m2r (*(dest+8), mm1);
+	ref += stride;
+	movq_r2m (mm0, *dest);
+	movq_r2m (mm1, *(dest+8));
+	dest += stride;
+    } while (--height);
+}
+
+static mmx_t mask_one = {0x0101010101010101LL};
+
+static inline void MC_put4_8 (int height, uint8_t * dest, const uint8_t * ref,
+			      const int stride, const int cpu)
+{
+    movq_m2r (*ref, mm0);
+    movq_m2r (*(ref+1), mm1);
+    movq_r2r (mm0, mm7);
+    pxor_r2r (mm1, mm7);
+    pavg_r2r (mm1, mm0);
+    ref += stride;
+
+    do {
+	movq_m2r (*ref, mm2);
+	movq_r2r (mm0, mm5);
+
+	movq_m2r (*(ref+1), mm3);
+	movq_r2r (mm2, mm6);
+
+	pxor_r2r (mm3, mm6);
+	pavg_r2r (mm3, mm2);
+
+	por_r2r (mm6, mm7);
+	pxor_r2r (mm2, mm5);
+
+	pand_r2r (mm5, mm7);
+	pavg_r2r (mm2, mm0);
+
+	pand_m2r (mask_one, mm7);
+
+	psubusb_r2r (mm7, mm0);
+
+	ref += stride;
+	movq_r2m (mm0, *dest);
+	dest += stride;
+
+	movq_r2r (mm6, mm7);	/* unroll ! */
+	movq_r2r (mm2, mm0);	/* unroll ! */
+    } while (--height);
+}
+
+static inline void MC_put4_16 (int height, uint8_t * dest, const uint8_t * ref,
+			       const int stride, const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_m2r (*(ref+stride+1), mm1);
+	movq_r2r (mm0, mm7);
+	movq_m2r (*(ref+1), mm2);
+	pxor_r2r (mm1, mm7);
+	movq_m2r (*(ref+stride), mm3);
+	movq_r2r (mm2, mm6);
+	pxor_r2r (mm3, mm6);
+	pavg_r2r (mm1, mm0);
+	pavg_r2r (mm3, mm2);
+	por_r2r (mm6, mm7);
+	movq_r2r (mm0, mm6);
+	pxor_r2r (mm2, mm6);
+	pand_r2r (mm6, mm7);
+	pand_m2r (mask_one, mm7);
+	pavg_r2r (mm2, mm0);
+	psubusb_r2r (mm7, mm0);
+	movq_r2m (mm0, *dest);
+
+	movq_m2r (*(ref+8), mm0);
+	movq_m2r (*(ref+stride+9), mm1);
+	movq_r2r (mm0, mm7);
+	movq_m2r (*(ref+9), mm2);
+	pxor_r2r (mm1, mm7);
+	movq_m2r (*(ref+stride+8), mm3);
+	movq_r2r (mm2, mm6);
+	pxor_r2r (mm3, mm6);
+	pavg_r2r (mm1, mm0);
+	pavg_r2r (mm3, mm2);
+	por_r2r (mm6, mm7);
+	movq_r2r (mm0, mm6);
+	pxor_r2r (mm2, mm6);
+	pand_r2r (mm6, mm7);
+	pand_m2r (mask_one, mm7);
+	pavg_r2r (mm2, mm0);
+	psubusb_r2r (mm7, mm0);
+	ref += stride;
+	movq_r2m (mm0, *(dest+8));
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_avg4_8 (int height, uint8_t * dest, const uint8_t * ref,
+			      const int stride, const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_m2r (*(ref+stride+1), mm1);
+	movq_r2r (mm0, mm7);
+	movq_m2r (*(ref+1), mm2);
+	pxor_r2r (mm1, mm7);
+	movq_m2r (*(ref+stride), mm3);
+	movq_r2r (mm2, mm6);
+	pxor_r2r (mm3, mm6);
+	pavg_r2r (mm1, mm0);
+	pavg_r2r (mm3, mm2);
+	por_r2r (mm6, mm7);
+	movq_r2r (mm0, mm6);
+	pxor_r2r (mm2, mm6);
+	pand_r2r (mm6, mm7);
+	pand_m2r (mask_one, mm7);
+	pavg_r2r (mm2, mm0);
+	psubusb_r2r (mm7, mm0);
+	movq_m2r (*dest, mm1);
+	pavg_r2r (mm1, mm0);
+	ref += stride;
+	movq_r2m (mm0, *dest);
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_avg4_16 (int height, uint8_t * dest, const uint8_t * ref,
+			       const int stride, const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_m2r (*(ref+stride+1), mm1);
+	movq_r2r (mm0, mm7);
+	movq_m2r (*(ref+1), mm2);
+	pxor_r2r (mm1, mm7);
+	movq_m2r (*(ref+stride), mm3);
+	movq_r2r (mm2, mm6);
+	pxor_r2r (mm3, mm6);
+	pavg_r2r (mm1, mm0);
+	pavg_r2r (mm3, mm2);
+	por_r2r (mm6, mm7);
+	movq_r2r (mm0, mm6);
+	pxor_r2r (mm2, mm6);
+	pand_r2r (mm6, mm7);
+	pand_m2r (mask_one, mm7);
+	pavg_r2r (mm2, mm0);
+	psubusb_r2r (mm7, mm0);
+	movq_m2r (*dest, mm1);
+	pavg_r2r (mm1, mm0);
+	movq_r2m (mm0, *dest);
+
+	movq_m2r (*(ref+8), mm0);
+	movq_m2r (*(ref+stride+9), mm1);
+	movq_r2r (mm0, mm7);
+	movq_m2r (*(ref+9), mm2);
+	pxor_r2r (mm1, mm7);
+	movq_m2r (*(ref+stride+8), mm3);
+	movq_r2r (mm2, mm6);
+	pxor_r2r (mm3, mm6);
+	pavg_r2r (mm1, mm0);
+	pavg_r2r (mm3, mm2);
+	por_r2r (mm6, mm7);
+	movq_r2r (mm0, mm6);
+	pxor_r2r (mm2, mm6);
+	pand_r2r (mm6, mm7);
+	pand_m2r (mask_one, mm7);
+	pavg_r2r (mm2, mm0);
+	psubusb_r2r (mm7, mm0);
+	movq_m2r (*(dest+8), mm1);
+	pavg_r2r (mm1, mm0);
+	ref += stride;
+	movq_r2m (mm0, *(dest+8));
+	dest += stride;
+    } while (--height);
+}
+
+static void MC_avg_o_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+static void MC_avg_o_8_mmxext (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+static void MC_put_o_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_put1_16 (height, dest, ref, stride);
+}
+
+static void MC_put_o_8_mmxext (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_put1_8 (height, dest, ref, stride);
+}
+
+static void MC_avg_x_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
+}
+
+static void MC_avg_x_8_mmxext (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
+}
+
+static void MC_put_x_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
+}
+
+static void MC_put_x_8_mmxext (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
+}
+
+static void MC_avg_y_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
+}
+
+static void MC_avg_y_8_mmxext (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
+}
+
+static void MC_put_y_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
+}
+
+static void MC_put_y_8_mmxext (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
+}
+
+static void MC_avg_xy_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				 int stride, int height)
+{
+    MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+static void MC_avg_xy_8_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+static void MC_put_xy_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				 int stride, int height)
+{
+    MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+static void MC_put_xy_8_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+
+MPEG2_MC_EXTERN (mmxext)
+
+
+
+static void MC_avg_o_16_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+static void MC_avg_o_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+static void MC_put_o_16_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_put1_16 (height, dest, ref, stride);
+}
+
+static void MC_put_o_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_put1_8 (height, dest, ref, stride);
+}
+
+static void MC_avg_x_16_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
+}
+
+static void MC_avg_x_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
+}
+
+static void MC_put_x_16_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
+}
+
+static void MC_put_x_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
+}
+
+static void MC_avg_y_16_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
+}
+
+static void MC_avg_y_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
+}
+
+static void MC_put_y_16_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
+}
+
+static void MC_put_y_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
+}
+
+static void MC_avg_xy_16_3dnow (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+static void MC_avg_xy_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+static void MC_put_xy_16_3dnow (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_put4_16 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+static void MC_put_xy_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_put4_8 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+
+MPEG2_MC_EXTERN (3dnow)
+
+#endif
diff --git a/src/libmpeg2new/libmpeg2/mpeg2_internal.h b/src/libmpeg2new/libmpeg2/mpeg2_internal.h
new file mode 100644
index 000000000..ccd1bc4b5
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/mpeg2_internal.h
@@ -0,0 +1,301 @@
+/*
+ * mpeg2_internal.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* macroblock modes */
+#define MACROBLOCK_INTRA 1
+#define MACROBLOCK_PATTERN 2
+#define MACROBLOCK_MOTION_BACKWARD 4
+#define MACROBLOCK_MOTION_FORWARD 8
+#define MACROBLOCK_QUANT 16
+#define DCT_TYPE_INTERLACED 32
+/* motion_type */
+#define MOTION_TYPE_MASK (3*64)
+#define MOTION_TYPE_BASE 64
+#define MC_FIELD (1*64)
+#define MC_FRAME (2*64)
+#define MC_16X8 (2*64)
+#define MC_DMV (3*64)
+
+/* picture structure */
+#define TOP_FIELD 1
+#define BOTTOM_FIELD 2
+#define FRAME_PICTURE 3
+
+/* picture coding type */
+#define I_TYPE 1
+#define P_TYPE 2
+#define B_TYPE 3
+#define D_TYPE 4
+
+typedef struct {
+    uint8_t * ref[2][3];
+    uint8_t ** ref2[2];
+    int pmv[2][2];
+    int f_code[2];
+} motion_t;
+
+struct mpeg2_decoder_s {
+    /* first, state that carries information from one macroblock to the */
+    /* next inside a slice, and is never used outside of mpeg2_slice() */
+
+    /* DCT coefficients - should be kept aligned ! */
+    int16_t DCTblock[64];
+
+    /* bit parsing stuff */
+    uint32_t bitstream_buf;		/* current 32 bit working set */
+    int bitstream_bits;			/* used bits in working set */
+    const uint8_t * bitstream_ptr;	/* buffer with stream data */
+
+    uint8_t * dest[3];
+    uint8_t * picture_dest[3];
+    void (* convert) (void * fbuf_id, uint8_t * const * src,
+		      unsigned int v_offset);
+    void * fbuf_id;
+
+    int offset;
+    int stride;
+    int uv_stride;
+    unsigned int limit_x;
+    unsigned int limit_y_16;
+    unsigned int limit_y_8;
+    unsigned int limit_y;
+
+    /* Motion vectors */
+    /* The f_ and b_ correspond to the forward and backward motion */
+    /* predictors */
+    motion_t b_motion;
+    motion_t f_motion;
+
+    /* predictor for DC coefficients in intra blocks */
+    int16_t dc_dct_pred[3];
+
+    int quantizer_scale;	/* remove */
+    int dmv_offset;		/* remove */
+    unsigned int v_offset;	/* remove */
+
+    /* now non-slice-specific information */
+
+    /* sequence header stuff */
+    uint8_t intra_quantizer_matrix [64];
+    uint8_t non_intra_quantizer_matrix [64];
+
+    /* The width and height of the picture snapped to macroblock units */
+    int width;
+    int height;
+    int vertical_position_extension;
+
+    /* picture header stuff */
+
+    /* what type of picture this is (I, P, B, D) */
+    int coding_type;
+
+    /* picture coding extension stuff */
+
+    /* quantization factor for intra dc coefficients */
+    int intra_dc_precision;
+    /* top/bottom/both fields */
+    int picture_structure;
+    /* bool to indicate all predictions are frame based */
+    int frame_pred_frame_dct;
+    /* bool to indicate whether intra blocks have motion vectors */
+    /* (for concealment) */
+    int concealment_motion_vectors;
+    /* bit to indicate which quantization table to use */
+    int q_scale_type;
+    /* bool to use different vlc tables */
+    int intra_vlc_format;
+    /* used for DMV MC */
+    int top_field_first;
+
+    /* stuff derived from bitstream */
+
+    /* pointer to the zigzag scan we're supposed to be using */
+    const uint8_t * scan;
+
+    int second_field;
+
+    int mpeg1;
+};
+
+typedef struct {
+    mpeg2_fbuf_t fbuf;
+} fbuf_alloc_t;
+
+struct mpeg2dec_s {
+    mpeg2_decoder_t decoder;
+
+    mpeg2_info_t info;
+
+    uint32_t shift;
+    int is_display_initialized;
+    mpeg2_state_t (* action) (struct mpeg2dec_s * mpeg2dec);
+    mpeg2_state_t state;
+    uint32_t ext_state;
+
+    /* allocated in init - gcc has problems allocating such big structures */
+    uint8_t * chunk_buffer;
+    /* pointer to start of the current chunk */
+    uint8_t * chunk_start;
+    /* pointer to current position in chunk_buffer */
+    uint8_t * chunk_ptr;
+    /* last start code ? */
+    uint8_t code;
+
+    /* PTS */
+    uint32_t pts_current, pts_previous;
+    int num_pts;
+    int bytes_since_pts;
+
+    int first;
+    int alloc_index_user;
+    int alloc_index;
+    uint8_t first_decode_slice;
+    uint8_t nb_decode_slices;
+
+    mpeg2_sequence_t new_sequence;
+    mpeg2_sequence_t sequence;
+    mpeg2_gop_t gop;
+    mpeg2_picture_t pictures[4];
+    mpeg2_picture_t * picture;
+    /*const*/ mpeg2_fbuf_t * fbuf[3];	/* 0: current fbuf, 1-2: prediction fbufs */
+
+    fbuf_alloc_t fbuf_alloc[3];
+    int custom_fbuf;
+
+    uint8_t * yuv_buf[3][3];
+    int yuv_index;
+    void * convert_id;
+    int convert_size[3];
+    void (* convert_start) (void * id, uint8_t * const * dest, int flags);
+    void (* convert_copy) (void * id, uint8_t * const * src,
+			   unsigned int v_offset);
+
+    uint8_t * buf_start;
+    uint8_t * buf_end;
+
+    int16_t display_offset_x, display_offset_y;
+
+    int copy_matrix;
+    uint8_t intra_quantizer_matrix [64];
+    uint8_t non_intra_quantizer_matrix [64];
+};
+
+typedef struct {
+#ifdef ARCH_PPC
+    uint8_t regv[12*16];
+#endif
+    int dummy;
+} cpu_state_t;
+
+/* alloc.c */
+#define ALLOC_MPEG2DEC 0
+#define ALLOC_CHUNK 1
+#define ALLOC_YUV 2
+#define ALLOC_CONVERT_ID 3
+#define ALLOC_CONVERTED 4
+void * mpeg2_malloc (int size, int reason);
+void mpeg2_free (void * buf);
+
+/* cpu_accel.c */
+uint32_t mpeg2_detect_accel (void);
+
+/* cpu_state.c */
+void mpeg2_cpu_state_init (uint32_t accel);
+
+/* decode.c */
+mpeg2_state_t mpeg2_seek_sequence (mpeg2dec_t * mpeg2dec);
+mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec);
+
+/* header.c */
+void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec);
+int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec);
+int mpeg2_header_gop (mpeg2dec_t * mpeg2dec);
+mpeg2_state_t mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec);
+int mpeg2_header_picture (mpeg2dec_t * mpeg2dec);
+int mpeg2_header_extension (mpeg2dec_t * mpeg2dec);
+int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec);
+void mpeg2_header_matrix_finalize (mpeg2dec_t * mpeg2dec);
+void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec);
+mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec);
+mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec);
+void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int coding_type);
+
+/* idct.c */
+void mpeg2_idct_init (uint32_t accel);
+
+/* idct_mlib.c */
+void mpeg2_idct_add_mlib (int last, int16_t * block,
+			  uint8_t * dest, int stride);
+void mpeg2_idct_copy_mlib_non_ieee (int16_t * block, uint8_t * dest,
+				    int stride);
+void mpeg2_idct_add_mlib_non_ieee (int last, int16_t * block,
+				   uint8_t * dest, int stride);
+
+/* idct_mmx.c */
+void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_mmxext (int last, int16_t * block,
+			    uint8_t * dest, int stride);
+void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_mmx (int last, int16_t * block,
+			 uint8_t * dest, int stride);
+void mpeg2_idct_mmx_init (void);
+
+/* idct_altivec.c */
+void mpeg2_idct_copy_altivec (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_altivec (int last, int16_t * block,
+			     uint8_t * dest, int stride);
+void mpeg2_idct_altivec_init (void);
+
+/* idct_alpha.c */
+void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_mvi (int last, int16_t * block,
+			 uint8_t * dest, int stride);
+void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_alpha (int last, int16_t * block,
+			   uint8_t * dest, int stride);
+void mpeg2_idct_alpha_init (void);
+
+/* motion_comp.c */
+void mpeg2_mc_init (uint32_t accel);
+
+typedef void mpeg2_mc_fct (uint8_t *, const uint8_t *, int, int);
+
+typedef struct {
+    mpeg2_mc_fct * put [8];
+    mpeg2_mc_fct * avg [8];
+} mpeg2_mc_t;
+
+#define MPEG2_MC_EXTERN(x) mpeg2_mc_t mpeg2_mc_##x = {			  \
+    {MC_put_o_16_##x, MC_put_x_16_##x, MC_put_y_16_##x, MC_put_xy_16_##x, \
+     MC_put_o_8_##x,  MC_put_x_8_##x,  MC_put_y_8_##x,  MC_put_xy_8_##x}, \
+    {MC_avg_o_16_##x, MC_avg_x_16_##x, MC_avg_y_16_##x, MC_avg_xy_16_##x, \
+     MC_avg_o_8_##x,  MC_avg_x_8_##x,  MC_avg_y_8_##x,  MC_avg_xy_8_##x}  \
+};
+
+extern mpeg2_mc_t mpeg2_mc_c;
+extern mpeg2_mc_t mpeg2_mc_mmx;
+extern mpeg2_mc_t mpeg2_mc_mmxext;
+extern mpeg2_mc_t mpeg2_mc_3dnow;
+extern mpeg2_mc_t mpeg2_mc_altivec;
+extern mpeg2_mc_t mpeg2_mc_alpha;
+extern mpeg2_mc_t mpeg2_mc_mlib;
diff --git a/src/libmpeg2new/libmpeg2/slice.c b/src/libmpeg2new/libmpeg2/slice.c
new file mode 100644
index 000000000..3e2db0803
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/slice.c
@@ -0,0 +1,1808 @@
+/*
+ * slice.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2_internal.h"
+#include "attributes.h"
+
+extern mpeg2_mc_t mpeg2_mc;
+extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
+extern void (* mpeg2_idct_add) (int last, int16_t * block,
+				uint8_t * dest, int stride);
+extern void (* mpeg2_cpu_state_save) (cpu_state_t * state);
+extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state);
+
+#include "vlc.h"
+
+static int non_linear_quantizer_scale [] = {
+     0,  1,  2,  3,  4,  5,   6,   7,
+     8, 10, 12, 14, 16, 18,  20,  22,
+    24, 28, 32, 36, 40, 44,  48,  52,
+    56, 64, 72, 80, 88, 96, 104, 112
+};
+
+static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    int macroblock_modes;
+    const MBtab * tab;
+
+    switch (decoder->coding_type) {
+    case I_TYPE:
+
+	tab = MB_I + UBITS (bit_buf, 1);
+	DUMPBITS (bit_buf, bits, tab->len);
+	macroblock_modes = tab->modes;
+
+	if ((! (decoder->frame_pred_frame_dct)) &&
+	    (decoder->picture_structure == FRAME_PICTURE)) {
+	    macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
+	    DUMPBITS (bit_buf, bits, 1);
+	}
+
+	return macroblock_modes;
+
+    case P_TYPE:
+
+	tab = MB_P + UBITS (bit_buf, 5);
+	DUMPBITS (bit_buf, bits, tab->len);
+	macroblock_modes = tab->modes;
+
+	if (decoder->picture_structure != FRAME_PICTURE) {
+	    if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
+		macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE;
+		DUMPBITS (bit_buf, bits, 2);
+	    }
+	    return macroblock_modes;
+	} else if (decoder->frame_pred_frame_dct) {
+	    if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
+		macroblock_modes |= MC_FRAME;
+	    return macroblock_modes;
+	} else {
+	    if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
+		macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE;
+		DUMPBITS (bit_buf, bits, 2);
+	    }
+	    if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
+		macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
+		DUMPBITS (bit_buf, bits, 1);
+	    }
+	    return macroblock_modes;
+	}
+
+    case B_TYPE:
+
+	tab = MB_B + UBITS (bit_buf, 6);
+	DUMPBITS (bit_buf, bits, tab->len);
+	macroblock_modes = tab->modes;
+
+	if (decoder->picture_structure != FRAME_PICTURE) {
+	    if (! (macroblock_modes & MACROBLOCK_INTRA)) {
+		macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE;
+		DUMPBITS (bit_buf, bits, 2);
+	    }
+	    return macroblock_modes;
+	} else if (decoder->frame_pred_frame_dct) {
+	    /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
+	    macroblock_modes |= MC_FRAME;
+	    return macroblock_modes;
+	} else {
+	    if (macroblock_modes & MACROBLOCK_INTRA)
+		goto intra;
+	    macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE;
+	    DUMPBITS (bit_buf, bits, 2);
+	    if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
+	    intra:
+		macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
+		DUMPBITS (bit_buf, bits, 1);
+	    }
+	    return macroblock_modes;
+	}
+
+    case D_TYPE:
+
+	DUMPBITS (bit_buf, bits, 1);
+	return MACROBLOCK_INTRA;
+
+    default:
+	return 0;
+    }
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int get_quantizer_scale (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+    int quantizer_scale_code;
+
+    quantizer_scale_code = UBITS (bit_buf, 5);
+    DUMPBITS (bit_buf, bits, 5);
+
+    if (decoder->q_scale_type)
+	return non_linear_quantizer_scale [quantizer_scale_code];
+    else
+	return quantizer_scale_code << 1;
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int get_motion_delta (mpeg2_decoder_t * const decoder,
+				    const int f_code)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+    int delta;
+    int sign;
+    const MVtab * tab;
+
+    if (bit_buf & 0x80000000) {
+	DUMPBITS (bit_buf, bits, 1);
+	return 0;
+    } else if (bit_buf >= 0x0c000000) {
+
+	tab = MV_4 + UBITS (bit_buf, 4);
+	delta = (tab->delta << f_code) + 1;
+	bits += tab->len + f_code + 1;
+	bit_buf <<= tab->len;
+
+	sign = SBITS (bit_buf, 1);
+	bit_buf <<= 1;
+
+	if (f_code)
+	    delta += UBITS (bit_buf, f_code);
+	bit_buf <<= f_code;
+
+	return (delta ^ sign) - sign;
+
+    } else {
+
+	tab = MV_10 + UBITS (bit_buf, 10);
+	delta = (tab->delta << f_code) + 1;
+	bits += tab->len + 1;
+	bit_buf <<= tab->len;
+
+	sign = SBITS (bit_buf, 1);
+	bit_buf <<= 1;
+
+	if (f_code) {
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+	    delta += UBITS (bit_buf, f_code);
+	    DUMPBITS (bit_buf, bits, f_code);
+	}
+
+	return (delta ^ sign) - sign;
+
+    }
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int bound_motion_vector (const int vector, const int f_code)
+{
+    return ((int32_t)vector << (27 - f_code)) >> (27 - f_code);
+}
+
+static inline int get_dmv (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+    const DMVtab * tab;
+
+    tab = DMV_2 + UBITS (bit_buf, 2);
+    DUMPBITS (bit_buf, bits, tab->len);
+    return tab->dmv;
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int get_coded_block_pattern (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+    const CBPtab * tab;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+
+    if (bit_buf >= 0x20000000) {
+
+	tab = CBP_7 + (UBITS (bit_buf, 7) - 16);
+	DUMPBITS (bit_buf, bits, tab->len);
+	return tab->cbp;
+
+    } else {
+
+	tab = CBP_9 + UBITS (bit_buf, 9);
+	DUMPBITS (bit_buf, bits, tab->len);
+	return tab->cbp;
+    }
+
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int get_luma_dc_dct_diff (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    const DCtab * tab;
+    int size;
+    int dc_diff;
+
+    if (bit_buf < 0xf8000000) {
+	tab = DC_lum_5 + UBITS (bit_buf, 5);
+	size = tab->size;
+	if (size) {
+	    bits += tab->len + size;
+	    bit_buf <<= tab->len;
+	    dc_diff =
+		UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
+	    bit_buf <<= size;
+	    return dc_diff;
+	} else {
+	    DUMPBITS (bit_buf, bits, 3);
+	    return 0;
+	}
+    } else {
+	tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0);
+	size = tab->size;
+	DUMPBITS (bit_buf, bits, tab->len);
+	NEEDBITS (bit_buf, bits, bit_ptr);
+	dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
+	DUMPBITS (bit_buf, bits, size);
+	return dc_diff;
+    }
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int get_chroma_dc_dct_diff (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    const DCtab * tab;
+    int size;
+    int dc_diff;
+
+    if (bit_buf < 0xf8000000) {
+	tab = DC_chrom_5 + UBITS (bit_buf, 5);
+	size = tab->size;
+	if (size) {
+	    bits += tab->len + size;
+	    bit_buf <<= tab->len;
+	    dc_diff =
+		UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
+	    bit_buf <<= size;
+	    return dc_diff;
+	} else {
+	    DUMPBITS (bit_buf, bits, 2);
+	    return 0;
+	}
+    } else {
+	tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0);
+	size = tab->size;
+	DUMPBITS (bit_buf, bits, tab->len + 1);
+	NEEDBITS (bit_buf, bits, bit_ptr);
+	dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
+	DUMPBITS (bit_buf, bits, size);
+	return dc_diff;
+    }
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+#define SATURATE(val)					\
+do {							\
+    if (unlikely ((uint32_t)(val + 2048) > 4095))	\
+	val = SBITS (val, 1) ^ 2047;			\
+} while (0)
+
+static void get_intra_block_B14 (mpeg2_decoder_t * const decoder)
+{
+    int i;
+    int j;
+    int val;
+    const uint8_t * scan = decoder->scan;
+    const uint8_t * quant_matrix = decoder->intra_quantizer_matrix;
+    int quantizer_scale = decoder->quantizer_scale;
+    int mismatch;
+    const DCTtab * tab;
+    uint32_t bit_buf;
+    int bits;
+    const uint8_t * bit_ptr;
+    int16_t * dest;
+
+    dest = decoder->DCTblock;
+    i = 0;
+    mismatch = ~dest[0];
+
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+
+    while (1) {
+	if (bit_buf >= 0x28000000) {
+
+	    tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
+
+	    i += tab->run;
+	    if (i >= 64)
+		break;	/* end of block */
+
+	normal_code:
+	    j = scan[i];
+	    bit_buf <<= tab->len;
+	    bits += tab->len + 1;
+	    val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
+
+	    /* if (bitstream_get (1)) val = -val; */
+	    val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
+
+	    SATURATE (val);
+	    dest[j] = val;
+	    mismatch ^= val;
+
+	    bit_buf <<= 1;
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	} else if (bit_buf >= 0x04000000) {
+
+	    tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
+
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+
+	    /* escape code */
+
+	    i += UBITS (bit_buf << 6, 6) - 64;
+	    if (i >= 64)
+		break;	/* illegal, check needed to avoid buffer overflow */
+
+	    j = scan[i];
+
+	    DUMPBITS (bit_buf, bits, 12);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+	    val = (SBITS (bit_buf, 12) *
+		   quantizer_scale * quant_matrix[j]) / 16;
+
+	    SATURATE (val);
+	    dest[j] = val;
+	    mismatch ^= val;
+
+	    DUMPBITS (bit_buf, bits, 12);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	} else if (bit_buf >= 0x02000000) {
+	    tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00800000) {
+	    tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00200000) {
+	    tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else {
+	    tab = DCT_16 + UBITS (bit_buf, 16);
+	    bit_buf <<= 16;
+	    GETWORD (bit_buf, bits + 16, bit_ptr);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	}
+	break;	/* illegal, check needed to avoid buffer overflow */
+    }
+    dest[63] ^= mismatch & 1;
+    DUMPBITS (bit_buf, bits, 2);	/* dump end of block code */
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
+}
+
+static void get_intra_block_B15 (mpeg2_decoder_t * const decoder)
+{
+    int i;
+    int j;
+    int val;
+    const uint8_t * scan = decoder->scan;
+    const uint8_t * quant_matrix = decoder->intra_quantizer_matrix;
+    int quantizer_scale = decoder->quantizer_scale;
+    int mismatch;
+    const DCTtab * tab;
+    uint32_t bit_buf;
+    int bits;
+    const uint8_t * bit_ptr;
+    int16_t * dest;
+
+    dest = decoder->DCTblock;
+    i = 0;
+    mismatch = ~dest[0];
+
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+
+    while (1) {
+	if (bit_buf >= 0x04000000) {
+
+	    tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4);
+
+	    i += tab->run;
+	    if (i < 64) {
+
+	    normal_code:
+		j = scan[i];
+		bit_buf <<= tab->len;
+		bits += tab->len + 1;
+		val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
+
+		/* if (bitstream_get (1)) val = -val; */
+		val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
+
+		SATURATE (val);
+		dest[j] = val;
+		mismatch ^= val;
+
+		bit_buf <<= 1;
+		NEEDBITS (bit_buf, bits, bit_ptr);
+
+		continue;
+
+	    } else {
+
+		/* end of block. I commented out this code because if we */
+		/* dont exit here we will still exit at the later test :) */
+
+		/* if (i >= 128) break;	*/	/* end of block */
+
+		/* escape code */
+
+		i += UBITS (bit_buf << 6, 6) - 64;
+		if (i >= 64)
+		    break;	/* illegal, check against buffer overflow */
+
+		j = scan[i];
+
+		DUMPBITS (bit_buf, bits, 12);
+		NEEDBITS (bit_buf, bits, bit_ptr);
+		val = (SBITS (bit_buf, 12) *
+		       quantizer_scale * quant_matrix[j]) / 16;
+
+		SATURATE (val);
+		dest[j] = val;
+		mismatch ^= val;
+
+		DUMPBITS (bit_buf, bits, 12);
+		NEEDBITS (bit_buf, bits, bit_ptr);
+
+		continue;
+
+	    }
+	} else if (bit_buf >= 0x02000000) {
+	    tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00800000) {
+	    tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00200000) {
+	    tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else {
+	    tab = DCT_16 + UBITS (bit_buf, 16);
+	    bit_buf <<= 16;
+	    GETWORD (bit_buf, bits + 16, bit_ptr);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	}
+	break;	/* illegal, check needed to avoid buffer overflow */
+    }
+    dest[63] ^= mismatch & 1;
+    DUMPBITS (bit_buf, bits, 4);	/* dump end of block code */
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
+}
+
+static int get_non_intra_block (mpeg2_decoder_t * const decoder)
+{
+    int i;
+    int j;
+    int val;
+    const uint8_t * scan = decoder->scan;
+    const uint8_t * quant_matrix = decoder->non_intra_quantizer_matrix;
+    int quantizer_scale = decoder->quantizer_scale;
+    int mismatch;
+    const DCTtab * tab;
+    uint32_t bit_buf;
+    int bits;
+    const uint8_t * bit_ptr;
+    int16_t * dest;
+
+    i = -1;
+    mismatch = 1;
+    dest = decoder->DCTblock;
+
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    if (bit_buf >= 0x28000000) {
+	tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5);
+	goto entry_1;
+    } else
+	goto entry_2;
+
+    while (1) {
+	if (bit_buf >= 0x28000000) {
+
+	    tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
+
+	entry_1:
+	    i += tab->run;
+	    if (i >= 64)
+		break;	/* end of block */
+
+	normal_code:
+	    j = scan[i];
+	    bit_buf <<= tab->len;
+	    bits += tab->len + 1;
+	    val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5;
+
+	    /* if (bitstream_get (1)) val = -val; */
+	    val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
+
+	    SATURATE (val);
+	    dest[j] = val;
+	    mismatch ^= val;
+
+	    bit_buf <<= 1;
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	}
+
+    entry_2:
+	if (bit_buf >= 0x04000000) {
+
+	    tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
+
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+
+	    /* escape code */
+
+	    i += UBITS (bit_buf << 6, 6) - 64;
+	    if (i >= 64)
+		break;	/* illegal, check needed to avoid buffer overflow */
+
+	    j = scan[i];
+
+	    DUMPBITS (bit_buf, bits, 12);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+	    val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1;
+	    val = (val * quantizer_scale * quant_matrix[j]) / 32;
+
+	    SATURATE (val);
+	    dest[j] = val;
+	    mismatch ^= val;
+
+	    DUMPBITS (bit_buf, bits, 12);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	} else if (bit_buf >= 0x02000000) {
+	    tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00800000) {
+	    tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00200000) {
+	    tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else {
+	    tab = DCT_16 + UBITS (bit_buf, 16);
+	    bit_buf <<= 16;
+	    GETWORD (bit_buf, bits + 16, bit_ptr);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	}
+	break;	/* illegal, check needed to avoid buffer overflow */
+    }
+    dest[63] ^= mismatch & 1;
+    DUMPBITS (bit_buf, bits, 2);	/* dump end of block code */
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
+    return i;
+}
+
+static void get_mpeg1_intra_block (mpeg2_decoder_t * const decoder)
+{
+    int i;
+    int j;
+    int val;
+    const uint8_t * scan = decoder->scan;
+    const uint8_t * quant_matrix = decoder->intra_quantizer_matrix;
+    int quantizer_scale = decoder->quantizer_scale;
+    const DCTtab * tab;
+    uint32_t bit_buf;
+    int bits;
+    const uint8_t * bit_ptr;
+    int16_t * dest;
+
+    i = 0;
+    dest = decoder->DCTblock;
+
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+
+    while (1) {
+	if (bit_buf >= 0x28000000) {
+
+	    tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
+
+	    i += tab->run;
+	    if (i >= 64)
+		break;	/* end of block */
+
+	normal_code:
+	    j = scan[i];
+	    bit_buf <<= tab->len;
+	    bits += tab->len + 1;
+	    val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
+
+	    /* oddification */
+	    val = (val - 1) | 1;
+
+	    /* if (bitstream_get (1)) val = -val; */
+	    val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
+
+	    SATURATE (val);
+	    dest[j] = val;
+
+	    bit_buf <<= 1;
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	} else if (bit_buf >= 0x04000000) {
+
+	    tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
+
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+
+	    /* escape code */
+
+	    i += UBITS (bit_buf << 6, 6) - 64;
+	    if (i >= 64)
+		break;	/* illegal, check needed to avoid buffer overflow */
+
+	    j = scan[i];
+
+	    DUMPBITS (bit_buf, bits, 12);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+	    val = SBITS (bit_buf, 8);
+	    if (! (val & 0x7f)) {
+		DUMPBITS (bit_buf, bits, 8);
+		val = UBITS (bit_buf, 8) + 2 * val;
+	    }
+	    val = (val * quantizer_scale * quant_matrix[j]) / 16;
+
+	    /* oddification */
+	    val = (val + ~SBITS (val, 1)) | 1;
+
+	    SATURATE (val);
+	    dest[j] = val;
+
+	    DUMPBITS (bit_buf, bits, 8);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	} else if (bit_buf >= 0x02000000) {
+	    tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00800000) {
+	    tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00200000) {
+	    tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else {
+	    tab = DCT_16 + UBITS (bit_buf, 16);
+	    bit_buf <<= 16;
+	    GETWORD (bit_buf, bits + 16, bit_ptr);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	}
+	break;	/* illegal, check needed to avoid buffer overflow */
+    }
+    DUMPBITS (bit_buf, bits, 2);	/* dump end of block code */
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
+}
+
+static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder)
+{
+    int i;
+    int j;
+    int val;
+    const uint8_t * scan = decoder->scan;
+    const uint8_t * quant_matrix = decoder->non_intra_quantizer_matrix;
+    int quantizer_scale = decoder->quantizer_scale;
+    const DCTtab * tab;
+    uint32_t bit_buf;
+    int bits;
+    const uint8_t * bit_ptr;
+    int16_t * dest;
+
+    i = -1;
+    dest = decoder->DCTblock;
+
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    if (bit_buf >= 0x28000000) {
+	tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5);
+	goto entry_1;
+    } else
+	goto entry_2;
+
+    while (1) {
+	if (bit_buf >= 0x28000000) {
+
+	    tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
+
+	entry_1:
+	    i += tab->run;
+	    if (i >= 64)
+		break;	/* end of block */
+
+	normal_code:
+	    j = scan[i];
+	    bit_buf <<= tab->len;
+	    bits += tab->len + 1;
+	    val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5;
+
+	    /* oddification */
+	    val = (val - 1) | 1;
+
+	    /* if (bitstream_get (1)) val = -val; */
+	    val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
+
+	    SATURATE (val);
+	    dest[j] = val;
+
+	    bit_buf <<= 1;
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	}
+
+    entry_2:
+	if (bit_buf >= 0x04000000) {
+
+	    tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
+
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+
+	    /* escape code */
+
+	    i += UBITS (bit_buf << 6, 6) - 64;
+	    if (i >= 64)
+		break;	/* illegal, check needed to avoid buffer overflow */
+
+	    j = scan[i];
+
+	    DUMPBITS (bit_buf, bits, 12);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+	    val = SBITS (bit_buf, 8);
+	    if (! (val & 0x7f)) {
+		DUMPBITS (bit_buf, bits, 8);
+		val = UBITS (bit_buf, 8) + 2 * val;
+	    }
+	    val = 2 * (val + SBITS (val, 1)) + 1;
+	    val = (val * quantizer_scale * quant_matrix[j]) / 32;
+
+	    /* oddification */
+	    val = (val + ~SBITS (val, 1)) | 1;
+
+	    SATURATE (val);
+	    dest[j] = val;
+
+	    DUMPBITS (bit_buf, bits, 8);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	} else if (bit_buf >= 0x02000000) {
+	    tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00800000) {
+	    tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00200000) {
+	    tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else {
+	    tab = DCT_16 + UBITS (bit_buf, 16);
+	    bit_buf <<= 16;
+	    GETWORD (bit_buf, bits + 16, bit_ptr);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	}
+	break;	/* illegal, check needed to avoid buffer overflow */
+    }
+    DUMPBITS (bit_buf, bits, 2);	/* dump end of block code */
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
+    return i;
+}
+
+static inline void slice_intra_DCT (mpeg2_decoder_t * const decoder,
+				    const int cc,
+				    uint8_t * const dest, const int stride)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    /* Get the intra DC coefficient and inverse quantize it */
+    if (cc == 0)
+	decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder);
+    else
+	decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder);
+    decoder->DCTblock[0] =
+	decoder->dc_dct_pred[cc] << (3 - decoder->intra_dc_precision);
+
+    if (decoder->mpeg1) {
+	if (decoder->coding_type != D_TYPE)
+	    get_mpeg1_intra_block (decoder);
+    } else if (decoder->intra_vlc_format)
+	get_intra_block_B15 (decoder);
+    else
+	get_intra_block_B14 (decoder);
+    mpeg2_idct_copy (decoder->DCTblock, dest, stride);
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder,
+					uint8_t * const dest, const int stride)
+{
+    int last;
+
+    if (decoder->mpeg1)
+	last = get_mpeg1_non_intra_block (decoder);
+    else
+	last = get_non_intra_block (decoder);
+    mpeg2_idct_add (last, decoder->DCTblock, dest, stride);
+}
+
+#define MOTION(table,ref,motion_x,motion_y,size,y)			      \
+    pos_x = 2 * decoder->offset + motion_x;				      \
+    pos_y = 2 * decoder->v_offset + motion_y + 2 * y;			      \
+    if (unlikely (pos_x > decoder->limit_x)) {				      \
+	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
+	motion_x = pos_x - 2 * decoder->offset;				      \
+    }									      \
+    if (unlikely (pos_y > decoder->limit_y_ ## size)) {			      \
+	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size;	      \
+	motion_y = pos_y - 2 * decoder->v_offset - 2 * y;		      \
+    }									      \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
+    table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
+		    ref[0] + (pos_x >> 1) + (pos_y >> 1) * decoder->stride,   \
+		    decoder->stride, size);				      \
+    motion_x /= 2;	motion_y /= 2;					      \
+    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);			      \
+    offset = (((decoder->offset + motion_x) >> 1) +			      \
+	      ((((decoder->v_offset + motion_y) >> 1) + y/2) *		      \
+	       decoder->uv_stride));					      \
+    table[4+xy_half] (decoder->dest[1] + y/2 * decoder->uv_stride +	      \
+		      (decoder->offset >> 1), ref[1] + offset,		      \
+		      decoder->uv_stride, size/2);			      \
+    table[4+xy_half] (decoder->dest[2] + y/2 * decoder->uv_stride +	      \
+		      (decoder->offset >> 1), ref[2] + offset,		      \
+		      decoder->uv_stride, size/2)
+
+#define MOTION_FIELD(table,ref,motion_x,motion_y,dest_field,op,src_field)     \
+    pos_x = 2 * decoder->offset + motion_x;				      \
+    pos_y = decoder->v_offset + motion_y;				      \
+    if (unlikely (pos_x > decoder->limit_x)) {				      \
+	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
+	motion_x = pos_x - 2 * decoder->offset;				      \
+    }									      \
+    if (unlikely (pos_y > decoder->limit_y)) {				      \
+	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;		      \
+	motion_y = pos_y - decoder->v_offset;				      \
+    }									      \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
+    table[xy_half] (decoder->dest[0] + dest_field * decoder->stride +	      \
+		    decoder->offset,					      \
+		    (ref[0] + (pos_x >> 1) +				      \
+		     ((pos_y op) + src_field) * decoder->stride),	      \
+		    2 * decoder->stride, 8);				      \
+    motion_x /= 2;	motion_y /= 2;					      \
+    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);			      \
+    offset = (((decoder->offset + motion_x) >> 1) +			      \
+	      (((decoder->v_offset >> 1) + (motion_y op) + src_field) *	      \
+	       decoder->uv_stride));					      \
+    table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride +    \
+		      (decoder->offset >> 1), ref[1] + offset,		      \
+		      2 * decoder->uv_stride, 4);			      \
+    table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride +    \
+		      (decoder->offset >> 1), ref[2] + offset,		      \
+		      2 * decoder->uv_stride, 4)
+
+static void motion_mp1 (mpeg2_decoder_t * const decoder,
+			motion_t * const motion,
+			mpeg2_mc_fct * const * const table)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    int motion_x, motion_y;
+    unsigned int pos_x, pos_y, xy_half, offset;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    motion_x = (motion->pmv[0][0] +
+		(get_motion_delta (decoder,
+				   motion->f_code[0]) << motion->f_code[1]));
+    motion_x = bound_motion_vector (motion_x,
+				    motion->f_code[0] + motion->f_code[1]);
+    motion->pmv[0][0] = motion_x;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    motion_y = (motion->pmv[0][1] +
+		(get_motion_delta (decoder,
+				   motion->f_code[0]) << motion->f_code[1]));
+    motion_y = bound_motion_vector (motion_y,
+				    motion->f_code[0] + motion->f_code[1]);
+    motion->pmv[0][1] = motion_y;
+
+    MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0);
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static void motion_fr_frame (mpeg2_decoder_t * const decoder,
+			     motion_t * const motion,
+			     mpeg2_mc_fct * const * const table)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    int motion_x, motion_y;
+    unsigned int pos_x, pos_y, xy_half, offset;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,
+						     motion->f_code[0]);
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,
+						     motion->f_code[1]);
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;
+
+    MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0);
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static void motion_fr_field (mpeg2_decoder_t * const decoder,
+			     motion_t * const motion,
+			     mpeg2_mc_fct * const * const table)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    int motion_x, motion_y, field;
+    unsigned int pos_x, pos_y, xy_half, offset;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    field = UBITS (bit_buf, 1);
+    DUMPBITS (bit_buf, bits, 1);
+
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,
+						     motion->f_code[0]);
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
+    motion->pmv[0][0] = motion_x;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (decoder,
+							    motion->f_code[1]);
+    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */
+    motion->pmv[0][1] = motion_y << 1;
+
+    MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field);
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    field = UBITS (bit_buf, 1);
+    DUMPBITS (bit_buf, bits, 1);
+
+    motion_x = motion->pmv[1][0] + get_motion_delta (decoder,
+						     motion->f_code[0]);
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
+    motion->pmv[1][0] = motion_x;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    motion_y = (motion->pmv[1][1] >> 1) + get_motion_delta (decoder,
+							    motion->f_code[1]);
+    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */
+    motion->pmv[1][1] = motion_y << 1;
+
+    MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field);
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static void motion_fr_dmv (mpeg2_decoder_t * const decoder,
+			   motion_t * const motion,
+			   mpeg2_mc_fct * const * const table)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y;
+    unsigned int pos_x, pos_y, xy_half, offset;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,
+						     motion->f_code[0]);
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    dmv_x = get_dmv (decoder);
+
+    motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (decoder,
+							    motion->f_code[1]);
+    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1;
+    dmv_y = get_dmv (decoder);
+
+    m = decoder->top_field_first ? 1 : 3;
+    other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;
+    other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1;
+    MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0);
+
+    m = decoder->top_field_first ? 3 : 1;
+    other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;
+    other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1;
+    MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0);
+
+    pos_x = 2 * decoder->offset + motion_x;
+    pos_y = decoder->v_offset + motion_y;
+    if (unlikely (pos_x > decoder->limit_x)) {
+	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;
+	motion_x = pos_x - 2 * decoder->offset;
+    }
+    if (unlikely (pos_y > decoder->limit_y)) {
+	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;
+	motion_y = pos_y - decoder->v_offset;
+    }
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);
+    offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride;
+    mpeg2_mc.avg[xy_half]
+	(decoder->dest[0] + decoder->offset,
+	 motion->ref[0][0] + offset, 2 * decoder->stride, 8);
+    mpeg2_mc.avg[xy_half]
+	(decoder->dest[0] + decoder->stride + decoder->offset,
+	 motion->ref[0][0] + decoder->stride + offset, 2 * decoder->stride, 8);
+    motion_x /= 2;	motion_y /= 2;
+    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);
+    offset = (((decoder->offset + motion_x) >> 1) +
+	      (((decoder->v_offset >> 1) + (motion_y & ~1)) *
+	       decoder->uv_stride));
+    mpeg2_mc.avg[4+xy_half]
+	(decoder->dest[1] + (decoder->offset >> 1),
+	 motion->ref[0][1] + offset, 2 * decoder->uv_stride, 4);
+    mpeg2_mc.avg[4+xy_half]
+	(decoder->dest[1] + decoder->uv_stride + (decoder->offset >> 1),
+	 motion->ref[0][1] + decoder->uv_stride + offset,
+	 2 * decoder->uv_stride, 4);
+    mpeg2_mc.avg[4+xy_half]
+	(decoder->dest[2] + (decoder->offset >> 1),
+	 motion->ref[0][2] + offset, 2 * decoder->uv_stride, 4);
+    mpeg2_mc.avg[4+xy_half]
+	(decoder->dest[2] + decoder->uv_stride + (decoder->offset >> 1),
+	 motion->ref[0][2] + decoder->uv_stride + offset,
+	 2 * decoder->uv_stride, 4);
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline void motion_reuse (const mpeg2_decoder_t * const decoder,
+				 const motion_t * const motion,
+				 mpeg2_mc_fct * const * const table)
+{
+    int motion_x, motion_y;
+    unsigned int pos_x, pos_y, xy_half, offset;
+
+    motion_x = motion->pmv[0][0];
+    motion_y = motion->pmv[0][1];
+
+    MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0);
+}
+
+static inline void motion_zero (const mpeg2_decoder_t * const decoder,
+				const motion_t * const motion,
+				mpeg2_mc_fct * const * const table)
+{
+    unsigned int offset;
+
+    table[0] (decoder->dest[0] + decoder->offset,
+	      (motion->ref[0][0] + decoder->offset +
+	       decoder->v_offset * decoder->stride),
+	      decoder->stride, 16);
+
+    offset = ((decoder->offset >> 1) +
+	      (decoder->v_offset >> 1) * decoder->uv_stride);
+    table[4] (decoder->dest[1] + (decoder->offset >> 1),
+	      motion->ref[0][1] + offset, decoder->uv_stride, 8);
+    table[4] (decoder->dest[2] + (decoder->offset >> 1),
+	      motion->ref[0][2] + offset, decoder->uv_stride, 8);
+}
+
+/* like motion_frame, but parsing without actual motion compensation */
+static void motion_fr_conceal (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    int tmp;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    tmp = (decoder->f_motion.pmv[0][0] +
+	   get_motion_delta (decoder, decoder->f_motion.f_code[0]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]);
+    decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    tmp = (decoder->f_motion.pmv[0][1] +
+	   get_motion_delta (decoder, decoder->f_motion.f_code[1]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]);
+    decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp;
+
+    DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static void motion_fi_field (mpeg2_decoder_t * const decoder,
+			     motion_t * const motion,
+			     mpeg2_mc_fct * const * const table)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    int motion_x, motion_y;
+    uint8_t ** ref_field;
+    unsigned int pos_x, pos_y, xy_half, offset;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    ref_field = motion->ref2[UBITS (bit_buf, 1)];
+    DUMPBITS (bit_buf, bits, 1);
+
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,
+						     motion->f_code[0]);
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,
+						     motion->f_code[1]);
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;
+
+    MOTION (table, ref_field, motion_x, motion_y, 16, 0);
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static void motion_fi_16x8 (mpeg2_decoder_t * const decoder,
+			    motion_t * const motion,
+			    mpeg2_mc_fct * const * const table)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    int motion_x, motion_y;
+    uint8_t ** ref_field;
+    unsigned int pos_x, pos_y, xy_half, offset;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    ref_field = motion->ref2[UBITS (bit_buf, 1)];
+    DUMPBITS (bit_buf, bits, 1);
+
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,
+						     motion->f_code[0]);
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
+    motion->pmv[0][0] = motion_x;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,
+						     motion->f_code[1]);
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
+    motion->pmv[0][1] = motion_y;
+
+    MOTION (table, ref_field, motion_x, motion_y, 8, 0);
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    ref_field = motion->ref2[UBITS (bit_buf, 1)];
+    DUMPBITS (bit_buf, bits, 1);
+
+    motion_x = motion->pmv[1][0] + get_motion_delta (decoder,
+						     motion->f_code[0]);
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
+    motion->pmv[1][0] = motion_x;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    motion_y = motion->pmv[1][1] + get_motion_delta (decoder,
+						     motion->f_code[1]);
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
+    motion->pmv[1][1] = motion_y;
+
+    MOTION (table, ref_field, motion_x, motion_y, 8, 8);
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static void motion_fi_dmv (mpeg2_decoder_t * const decoder,
+			   motion_t * const motion,
+			   mpeg2_mc_fct * const * const table)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    int motion_x, motion_y, other_x, other_y;
+    unsigned int pos_x, pos_y, xy_half, offset;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,
+						     motion->f_code[0]);
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (decoder);
+
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,
+						     motion->f_code[1]);
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;
+    other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (decoder) +
+	       decoder->dmv_offset);
+
+    MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0);
+    MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0);
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static void motion_fi_conceal (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    int tmp;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    DUMPBITS (bit_buf, bits, 1); /* remove field_select */
+
+    tmp = (decoder->f_motion.pmv[0][0] +
+	   get_motion_delta (decoder, decoder->f_motion.f_code[0]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]);
+    decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    tmp = (decoder->f_motion.pmv[0][1] +
+	   get_motion_delta (decoder, decoder->f_motion.f_code[1]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]);
+    decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp;
+
+    DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+#define MOTION_CALL(routine,direction)				\
+do {								\
+    if ((direction) & MACROBLOCK_MOTION_FORWARD)		\
+	routine (decoder, &(decoder->f_motion), mpeg2_mc.put);	\
+    if ((direction) & MACROBLOCK_MOTION_BACKWARD)		\
+	routine (decoder, &(decoder->b_motion),			\
+		 ((direction) & MACROBLOCK_MOTION_FORWARD ?	\
+		  mpeg2_mc.avg : mpeg2_mc.put));		\
+} while (0)
+
+#define NEXT_MACROBLOCK							\
+do {									\
+    decoder->offset += 16;						\
+    if (decoder->offset == decoder->width) {				\
+	do { /* just so we can use the break statement */		\
+	    if (decoder->convert) {					\
+		decoder->convert (decoder->fbuf_id, decoder->dest,	\
+				  decoder->v_offset);			\
+		if (decoder->coding_type == B_TYPE)			\
+		    break;						\
+	    }								\
+	    decoder->dest[0] += 16 * decoder->stride;			\
+	    decoder->dest[1] += 4 * decoder->stride;			\
+	    decoder->dest[2] += 4 * decoder->stride;			\
+	} while (0);							\
+	decoder->v_offset += 16;					\
+	if (decoder->v_offset > decoder->limit_y) {			\
+	    if (mpeg2_cpu_state_restore)				\
+		mpeg2_cpu_state_restore (&cpu_state);			\
+	    return;							\
+	}								\
+	decoder->offset = 0;						\
+    }									\
+} while (0)
+
+void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3],
+		      uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3])
+{
+    int offset, stride, height, bottom_field;
+
+    stride = decoder->width;
+    bottom_field = (decoder->picture_structure == BOTTOM_FIELD);
+    offset = bottom_field ? stride : 0;
+    height = decoder->height;
+
+    decoder->picture_dest[0] = current_fbuf[0] + offset;
+    decoder->picture_dest[1] = current_fbuf[1] + (offset >> 1);
+    decoder->picture_dest[2] = current_fbuf[2] + (offset >> 1);
+
+    decoder->f_motion.ref[0][0] = forward_fbuf[0] + offset;
+    decoder->f_motion.ref[0][1] = forward_fbuf[1] + (offset >> 1);
+    decoder->f_motion.ref[0][2] = forward_fbuf[2] + (offset >> 1);
+
+    decoder->b_motion.ref[0][0] = backward_fbuf[0] + offset;
+    decoder->b_motion.ref[0][1] = backward_fbuf[1] + (offset >> 1);
+    decoder->b_motion.ref[0][2] = backward_fbuf[2] + (offset >> 1);
+
+    if (decoder->picture_structure != FRAME_PICTURE) {
+	decoder->dmv_offset = bottom_field ? 1 : -1;
+	decoder->f_motion.ref2[0] = decoder->f_motion.ref[bottom_field];
+	decoder->f_motion.ref2[1] = decoder->f_motion.ref[!bottom_field];
+	decoder->b_motion.ref2[0] = decoder->b_motion.ref[bottom_field];
+	decoder->b_motion.ref2[1] = decoder->b_motion.ref[!bottom_field];
+	offset = stride - offset;
+
+	if (decoder->second_field && (decoder->coding_type != B_TYPE))
+	    forward_fbuf = current_fbuf;
+
+	decoder->f_motion.ref[1][0] = forward_fbuf[0] + offset;
+	decoder->f_motion.ref[1][1] = forward_fbuf[1] + (offset >> 1);
+	decoder->f_motion.ref[1][2] = forward_fbuf[2] + (offset >> 1);
+
+	decoder->b_motion.ref[1][0] = backward_fbuf[0] + offset;
+	decoder->b_motion.ref[1][1] = backward_fbuf[1] + (offset >> 1);
+	decoder->b_motion.ref[1][2] = backward_fbuf[2] + (offset >> 1);
+
+	stride <<= 1;
+	height >>= 1;
+    }
+
+    decoder->stride = stride;
+    decoder->uv_stride = stride >> 1;
+    decoder->limit_x = 2 * decoder->width - 32;
+    decoder->limit_y_16 = 2 * height - 32;
+    decoder->limit_y_8 = 2 * height - 16;
+    decoder->limit_y = height - 16;
+}
+
+static inline int slice_init (mpeg2_decoder_t * const decoder, int code)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    int offset;
+    const MBAtab * mba;
+
+    decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
+	decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision;
+
+    decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
+    decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
+    decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
+    decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
+
+    if (decoder->vertical_position_extension) {
+	code += UBITS (bit_buf, 3) << 7;
+	DUMPBITS (bit_buf, bits, 3);
+    }
+    decoder->v_offset = (code - 1) * 16;
+    offset = 0;
+    if (!(decoder->convert) || decoder->coding_type != B_TYPE)
+	offset = (code - 1) * decoder->stride * 4;
+
+    decoder->dest[0] = decoder->picture_dest[0] + offset * 4;
+    decoder->dest[1] = decoder->picture_dest[1] + offset;
+    decoder->dest[2] = decoder->picture_dest[2] + offset;
+
+    decoder->quantizer_scale = get_quantizer_scale (decoder);
+
+    /* ignore intra_slice and all the extra data */
+    while (bit_buf & 0x80000000) {
+	DUMPBITS (bit_buf, bits, 9);
+	NEEDBITS (bit_buf, bits, bit_ptr);
+    }
+
+    /* decode initial macroblock address increment */
+    offset = 0;
+    while (1) {
+	if (bit_buf >= 0x08000000) {
+	    mba = MBA_5 + (UBITS (bit_buf, 6) - 2);
+	    break;
+	} else if (bit_buf >= 0x01800000) {
+	    mba = MBA_11 + (UBITS (bit_buf, 12) - 24);
+	    break;
+	} else switch (UBITS (bit_buf, 12)) {
+	case 8:		/* macroblock_escape */
+	    offset += 33;
+	    DUMPBITS (bit_buf, bits, 11);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+	    continue;
+	case 15:	/* macroblock_stuffing (MPEG1 only) */
+	    bit_buf &= 0xfffff;
+	    DUMPBITS (bit_buf, bits, 11);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+	    continue;
+	default:	/* error */
+	    return 1;
+	}
+    }
+    DUMPBITS (bit_buf, bits, mba->len + 1);
+    decoder->offset = (offset + mba->mba) << 4;
+
+    while (decoder->offset - decoder->width >= 0) {
+	decoder->offset -= decoder->width;
+	if (!(decoder->convert) || decoder->coding_type != B_TYPE) {
+	    decoder->dest[0] += 16 * decoder->stride;
+	    decoder->dest[1] += 4 * decoder->stride;
+	    decoder->dest[2] += 4 * decoder->stride;
+	}
+	decoder->v_offset += 16;
+    }
+    if (decoder->v_offset > decoder->limit_y)
+	return 1;
+
+    return 0;
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+void mpeg2_slice (mpeg2_decoder_t * const decoder, const int code,
+		  const uint8_t * const buffer)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    cpu_state_t cpu_state;
+
+    bitstream_init (decoder, buffer);
+
+    if (slice_init (decoder, code))
+	return;
+
+    if (mpeg2_cpu_state_save)
+	mpeg2_cpu_state_save (&cpu_state);
+
+    while (1) {
+	int macroblock_modes;
+	int mba_inc;
+	const MBAtab * mba;
+
+	NEEDBITS (bit_buf, bits, bit_ptr);
+
+	macroblock_modes = get_macroblock_modes (decoder);
+
+	/* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */
+	if (macroblock_modes & MACROBLOCK_QUANT)
+	    decoder->quantizer_scale = get_quantizer_scale (decoder);
+
+	if (macroblock_modes & MACROBLOCK_INTRA) {
+
+	    int DCT_offset, DCT_stride;
+	    int offset;
+	    uint8_t * dest_y;
+
+	    if (decoder->concealment_motion_vectors) {
+		if (decoder->picture_structure == FRAME_PICTURE)
+		    motion_fr_conceal (decoder);
+		else
+		    motion_fi_conceal (decoder);
+	    } else {
+		decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
+		decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
+		decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
+		decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
+	    }
+
+	    if (macroblock_modes & DCT_TYPE_INTERLACED) {
+		DCT_offset = decoder->stride;
+		DCT_stride = decoder->stride * 2;
+	    } else {
+		DCT_offset = decoder->stride * 8;
+		DCT_stride = decoder->stride;
+	    }
+
+	    offset = decoder->offset;
+	    dest_y = decoder->dest[0] + offset;
+	    slice_intra_DCT (decoder, 0, dest_y, DCT_stride);
+	    slice_intra_DCT (decoder, 0, dest_y + 8, DCT_stride);
+	    slice_intra_DCT (decoder, 0, dest_y + DCT_offset, DCT_stride);
+	    slice_intra_DCT (decoder, 0, dest_y + DCT_offset + 8, DCT_stride);
+	    slice_intra_DCT (decoder, 1, decoder->dest[1] + (offset >> 1),
+			     decoder->uv_stride);
+	    slice_intra_DCT (decoder, 2, decoder->dest[2] + (offset >> 1),
+			     decoder->uv_stride);
+
+	    if (decoder->coding_type == D_TYPE) {
+		NEEDBITS (bit_buf, bits, bit_ptr);
+		DUMPBITS (bit_buf, bits, 1);
+	    }
+	} else {
+
+	    if (decoder->picture_structure == FRAME_PICTURE)
+		switch (macroblock_modes & MOTION_TYPE_MASK) {
+		case MC_FRAME:
+		    if (decoder->mpeg1)
+			MOTION_CALL (motion_mp1, macroblock_modes);
+		    else
+			MOTION_CALL (motion_fr_frame, macroblock_modes);
+		    break;
+
+		case MC_FIELD:
+		    MOTION_CALL (motion_fr_field, macroblock_modes);
+		    break;
+
+		case MC_DMV:
+		    MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD);
+		    break;
+
+		case 0:
+		    /* non-intra mb without forward mv in a P picture */
+		    decoder->f_motion.pmv[0][0] = 0;
+		    decoder->f_motion.pmv[0][1] = 0;
+		    decoder->f_motion.pmv[1][0] = 0;
+		    decoder->f_motion.pmv[1][1] = 0;
+		    MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD);
+		    break;
+		}
+	    else
+		switch (macroblock_modes & MOTION_TYPE_MASK) {
+		case MC_FIELD:
+		    MOTION_CALL (motion_fi_field, macroblock_modes);
+		    break;
+
+		case MC_16X8:
+		    MOTION_CALL (motion_fi_16x8, macroblock_modes);
+		    break;
+
+		case MC_DMV:
+		    MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD);
+		    break;
+
+		case 0:
+		    /* non-intra mb without forward mv in a P picture */
+		    decoder->f_motion.pmv[0][0] = 0;
+		    decoder->f_motion.pmv[0][1] = 0;
+		    decoder->f_motion.pmv[1][0] = 0;
+		    decoder->f_motion.pmv[1][1] = 0;
+		    MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD);
+		    break;
+		}
+
+	    if (macroblock_modes & MACROBLOCK_PATTERN) {
+		int coded_block_pattern;
+		int DCT_offset, DCT_stride;
+		int offset;
+		uint8_t * dest_y;
+
+		if (macroblock_modes & DCT_TYPE_INTERLACED) {
+		    DCT_offset = decoder->stride;
+		    DCT_stride = decoder->stride * 2;
+		} else {
+		    DCT_offset = decoder->stride * 8;
+		    DCT_stride = decoder->stride;
+		}
+
+		coded_block_pattern = get_coded_block_pattern (decoder);
+
+		offset = decoder->offset;
+		dest_y = decoder->dest[0] + offset;
+		if (coded_block_pattern & 0x20)
+		    slice_non_intra_DCT (decoder, dest_y, DCT_stride);
+		if (coded_block_pattern & 0x10)
+		    slice_non_intra_DCT (decoder, dest_y + 8, DCT_stride);
+		if (coded_block_pattern & 0x08)
+		    slice_non_intra_DCT (decoder, dest_y + DCT_offset,
+					 DCT_stride);
+		if (coded_block_pattern & 0x04)
+		    slice_non_intra_DCT (decoder, dest_y + DCT_offset + 8,
+					 DCT_stride);
+		if (coded_block_pattern & 0x2)
+		    slice_non_intra_DCT (decoder,
+					 decoder->dest[1] + (offset >> 1),
+					 decoder->uv_stride);
+		if (coded_block_pattern & 0x1)
+		    slice_non_intra_DCT (decoder,
+					 decoder->dest[2] + (offset >> 1),
+					 decoder->uv_stride);
+	    }
+
+	    decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
+		decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision;
+	}
+
+	NEXT_MACROBLOCK;
+
+	NEEDBITS (bit_buf, bits, bit_ptr);
+	mba_inc = 0;
+	while (1) {
+	    if (bit_buf >= 0x10000000) {
+		mba = MBA_5 + (UBITS (bit_buf, 5) - 2);
+		break;
+	    } else if (bit_buf >= 0x03000000) {
+		mba = MBA_11 + (UBITS (bit_buf, 11) - 24);
+		break;
+	    } else switch (UBITS (bit_buf, 11)) {
+	    case 8:		/* macroblock_escape */
+		mba_inc += 33;
+		/* pass through */
+	    case 15:	/* macroblock_stuffing (MPEG1 only) */
+		DUMPBITS (bit_buf, bits, 11);
+		NEEDBITS (bit_buf, bits, bit_ptr);
+		continue;
+	    default:	/* end of slice, or error */
+		if (mpeg2_cpu_state_restore)
+		    mpeg2_cpu_state_restore (&cpu_state);
+		return;
+	    }
+	}
+	DUMPBITS (bit_buf, bits, mba->len);
+	mba_inc += mba->mba;
+
+	if (mba_inc) {
+	    decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
+		decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision;
+
+	    if (decoder->coding_type == P_TYPE) {
+		decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
+		decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
+
+		do {
+		    MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD);
+		    NEXT_MACROBLOCK;
+		} while (--mba_inc);
+	    } else {
+		do {
+		    MOTION_CALL (motion_reuse, macroblock_modes);
+		    NEXT_MACROBLOCK;
+		} while (--mba_inc);
+	    }
+	}
+    }
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
diff --git a/src/libmpeg2new/libmpeg2/vlc.h b/src/libmpeg2new/libmpeg2/vlc.h
new file mode 100644
index 000000000..8fa6b75bd
--- /dev/null
+++ b/src/libmpeg2new/libmpeg2/vlc.h
@@ -0,0 +1,429 @@
+/*
+ * vlc.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#define GETWORD(bit_buf,shift,bit_ptr)				\
+do {								\
+    bit_buf |= ((bit_ptr[0] << 8) | bit_ptr[1]) << (shift);	\
+    bit_ptr += 2;						\
+} while (0)
+
+static inline void bitstream_init (mpeg2_decoder_t * decoder,
+				   const uint8_t * start)
+{
+    decoder->bitstream_buf =
+	(start[0] << 24) | (start[1] << 16) | (start[2] << 8) | start[3];
+    decoder->bitstream_ptr = start + 4;
+    decoder->bitstream_bits = -16;
+}
+
+/* make sure that there are at least 16 valid bits in bit_buf */
+#define NEEDBITS(bit_buf,bits,bit_ptr)		\
+do {						\
+    if (unlikely (bits > 0)) {			\
+	GETWORD (bit_buf, bits, bit_ptr);	\
+	bits -= 16;				\
+    }						\
+} while (0)
+
+/* remove num valid bits from bit_buf */
+#define DUMPBITS(bit_buf,bits,num)	\
+do {					\
+    bit_buf <<= (num);			\
+    bits += (num);			\
+} while (0)
+
+/* take num bits from the high part of bit_buf and zero extend them */
+#define UBITS(bit_buf,num) (((uint32_t)(bit_buf)) >> (32 - (num)))
+
+/* take num bits from the high part of bit_buf and sign extend them */
+#define SBITS(bit_buf,num) (((int32_t)(bit_buf)) >> (32 - (num)))
+
+typedef struct {
+    uint8_t modes;
+    uint8_t len;
+} MBtab;
+
+typedef struct {
+    uint8_t delta;
+    uint8_t len;
+} MVtab;
+
+typedef struct {
+    int8_t dmv;
+    uint8_t len;
+} DMVtab;
+
+typedef struct {
+    uint8_t cbp;
+    uint8_t len;
+} CBPtab;
+
+typedef struct {
+    uint8_t size;
+    uint8_t len;
+} DCtab;
+
+typedef struct {
+    uint8_t run;
+    uint8_t level;
+    uint8_t len;
+} DCTtab;
+
+typedef struct {
+    uint8_t mba;
+    uint8_t len;
+} MBAtab;
+
+
+#define INTRA MACROBLOCK_INTRA
+#define QUANT MACROBLOCK_QUANT
+
+static const MBtab MB_I [] = {
+    {INTRA|QUANT, 2}, {INTRA, 1}
+};
+
+#define MC MACROBLOCK_MOTION_FORWARD
+#define CODED MACROBLOCK_PATTERN
+
+static const MBtab MB_P [] = {
+    {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA,    5},
+    {MC,          3}, {MC,          3}, {MC,             3}, {MC,       3},
+    {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
+    {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
+    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
+    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
+    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
+    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1}
+};
+
+#define FWD MACROBLOCK_MOTION_FORWARD
+#define BWD MACROBLOCK_MOTION_BACKWARD
+#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD
+
+static const MBtab MB_B [] = {
+    {0,                 0}, {INTRA|QUANT,       6},
+    {BWD|CODED|QUANT,   6}, {FWD|CODED|QUANT,   6},
+    {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5},
+					{INTRA,       5}, {INTRA,       5},
+    {FWD,         4}, {FWD,         4}, {FWD,         4}, {FWD,         4},
+    {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4},
+    {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
+    {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
+    {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
+    {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
+    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
+    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
+    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
+    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}
+};
+
+#undef INTRA
+#undef QUANT
+#undef MC
+#undef CODED
+#undef FWD
+#undef BWD
+#undef INTER
+
+
+static const MVtab MV_4 [] = {
+    { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}
+};
+
+static const MVtab MV_10 [] = {
+    { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10},
+    { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10},
+    {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9},
+    { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7},
+    { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7},
+    { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}
+};
+
+
+static const DMVtab DMV_2 [] = {
+    { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2}
+};
+
+
+static const CBPtab CBP_7 [] = {
+    {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7},
+    {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7},
+    {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6},
+    {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6},
+    {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5},
+    {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5},
+    {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5},
+    {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5},
+    {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5},
+    {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5},
+    {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5},
+    {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5},
+    {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5},
+    {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5},
+    {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5},
+    {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5},
+    {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4},
+    {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4},
+    {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4},
+    {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4},
+    {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
+    {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
+    {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
+    {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
+    {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
+    {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
+    {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
+    {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}
+};
+
+static const CBPtab CBP_9 [] = {
+    {0,    0}, {0x00, 9}, {0x27, 9}, {0x1b, 9},
+    {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9},
+    {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8},
+    {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8},
+    {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8},
+    {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8},
+    {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8},
+    {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8},
+    {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8},
+    {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8},
+    {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8},
+    {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8},
+    {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8},
+    {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8},
+    {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8},
+    {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8}
+};
+
+
+static const DCtab DC_lum_5 [] = {
+    {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+    {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+    {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
+    {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}
+};
+
+static const DCtab DC_chrom_5 [] = {
+    {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
+    {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+    {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+    {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}
+};
+
+static const DCtab DC_long [] = {
+    {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
+    {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
+    {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6},
+    {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9}
+};
+
+
+static const DCTtab DCT_16 [] = {
+    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+    {  2,18, 0}, {  2,17, 0}, {  2,16, 0}, {  2,15, 0},
+    {  7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0},
+    { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0},
+    { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0}
+};
+
+static const DCTtab DCT_15 [] = {
+    {  1,40,15}, {  1,39,15}, {  1,38,15}, {  1,37,15},
+    {  1,36,15}, {  1,35,15}, {  1,34,15}, {  1,33,15},
+    {  1,32,15}, {  2,14,15}, {  2,13,15}, {  2,12,15},
+    {  2,11,15}, {  2,10,15}, {  2, 9,15}, {  2, 8,15},
+    {  1,31,14}, {  1,31,14}, {  1,30,14}, {  1,30,14},
+    {  1,29,14}, {  1,29,14}, {  1,28,14}, {  1,28,14},
+    {  1,27,14}, {  1,27,14}, {  1,26,14}, {  1,26,14},
+    {  1,25,14}, {  1,25,14}, {  1,24,14}, {  1,24,14},
+    {  1,23,14}, {  1,23,14}, {  1,22,14}, {  1,22,14},
+    {  1,21,14}, {  1,21,14}, {  1,20,14}, {  1,20,14},
+    {  1,19,14}, {  1,19,14}, {  1,18,14}, {  1,18,14},
+    {  1,17,14}, {  1,17,14}, {  1,16,14}, {  1,16,14}
+};
+
+static const DCTtab DCT_13 [] = {
+    { 11, 2,13}, { 10, 2,13}, {  6, 3,13}, {  4, 4,13},
+    {  3, 5,13}, {  2, 7,13}, {  2, 6,13}, {  1,15,13},
+    {  1,14,13}, {  1,13,13}, {  1,12,13}, { 27, 1,13},
+    { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13},
+    {  1,11,12}, {  1,11,12}, {  9, 2,12}, {  9, 2,12},
+    {  5, 3,12}, {  5, 3,12}, {  1,10,12}, {  1,10,12},
+    {  3, 4,12}, {  3, 4,12}, {  8, 2,12}, {  8, 2,12},
+    { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12},
+    {  1, 9,12}, {  1, 9,12}, { 20, 1,12}, { 20, 1,12},
+    { 19, 1,12}, { 19, 1,12}, {  2, 5,12}, {  2, 5,12},
+    {  4, 3,12}, {  4, 3,12}, {  1, 8,12}, {  1, 8,12},
+    {  7, 2,12}, {  7, 2,12}, { 18, 1,12}, { 18, 1,12}
+};
+
+static const DCTtab DCT_B14_10 [] = {
+    { 17, 1,10}, {  6, 2,10}, {  1, 7,10}, {  3, 3,10},
+    {  2, 4,10}, { 16, 1,10}, { 15, 1,10}, {  5, 2,10}
+};
+
+static const DCTtab DCT_B14_8 [] = {
+    { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6},
+    {  3, 2, 7}, {  3, 2, 7}, { 10, 1, 7}, { 10, 1, 7},
+    {  1, 4, 7}, {  1, 4, 7}, {  9, 1, 7}, {  9, 1, 7},
+    {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6},
+    {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6},
+    {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6},
+    {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
+    { 14, 1, 8}, {  1, 6, 8}, { 13, 1, 8}, { 12, 1, 8},
+    {  4, 2, 8}, {  2, 3, 8}, {  1, 5, 8}, { 11, 1, 8}
+};
+
+static const DCTtab DCT_B14AC_5 [] = {
+		 {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
+    {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
+    {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}
+};
+
+static const DCTtab DCT_B14DC_5 [] = {
+		 {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
+    {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
+    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
+    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
+    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}
+};
+
+static const DCTtab DCT_B15_10 [] = {
+    {  6, 2, 9}, {  6, 2, 9}, { 15, 1, 9}, { 15, 1, 9},
+    {  3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9}
+};
+
+static const DCTtab DCT_B15_8 [] = {
+    { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6},
+    {  8, 1, 7}, {  8, 1, 7}, {  9, 1, 7}, {  9, 1, 7},
+    {  7, 1, 7}, {  7, 1, 7}, {  3, 2, 7}, {  3, 2, 7},
+    {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6},
+    {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6},
+    {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6},
+    {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
+    {  2, 5, 8}, { 12, 1, 8}, {  1,11, 8}, {  1,10, 8},
+    { 14, 1, 8}, { 13, 1, 8}, {  4, 2, 8}, {  2, 4, 8},
+    {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
+    {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
+    {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
+    {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
+    {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
+    {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
+    {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
+    {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
+    {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
+    { 10, 1, 7}, { 10, 1, 7}, {  2, 3, 7}, {  2, 3, 7},
+    { 11, 1, 7}, { 11, 1, 7}, {  1, 8, 7}, {  1, 8, 7},
+    {  1, 9, 7}, {  1, 9, 7}, {  1,12, 8}, {  1,13, 8},
+    {  3, 3, 8}, {  5, 2, 8}, {  1,14, 8}, {  1,15, 8}
+};
+
+
+static const MBAtab MBA_5 [] = {
+		    {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4},
+    {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3},
+    {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1},
+    {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}
+};
+
+static const MBAtab MBA_11 [] = {
+    {32, 11}, {31, 11}, {30, 11}, {29, 11},
+    {28, 11}, {27, 11}, {26, 11}, {25, 11},
+    {24, 11}, {23, 11}, {22, 11}, {21, 11},
+    {20, 10}, {20, 10}, {19, 10}, {19, 10},
+    {18, 10}, {18, 10}, {17, 10}, {17, 10},
+    {16, 10}, {16, 10}, {15, 10}, {15, 10},
+    {14,  8}, {14,  8}, {14,  8}, {14,  8},
+    {14,  8}, {14,  8}, {14,  8}, {14,  8},
+    {13,  8}, {13,  8}, {13,  8}, {13,  8},
+    {13,  8}, {13,  8}, {13,  8}, {13,  8},
+    {12,  8}, {12,  8}, {12,  8}, {12,  8},
+    {12,  8}, {12,  8}, {12,  8}, {12,  8},
+    {11,  8}, {11,  8}, {11,  8}, {11,  8},
+    {11,  8}, {11,  8}, {11,  8}, {11,  8},
+    {10,  8}, {10,  8}, {10,  8}, {10,  8},
+    {10,  8}, {10,  8}, {10,  8}, {10,  8},
+    { 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
+    { 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
+    { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+    { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+    { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+    { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+    { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
+    { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
+    { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
+    { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7}
+};
-- 
cgit v1.2.3