From 5574ccbb47739ee876dcd49bf8d9f0cfc2528d2a Mon Sep 17 00:00:00 2001
From: Darren Salt <linux@youmustbejoking.demon.co.uk>
Date: Mon, 12 Jan 2009 20:23:42 +0000
Subject: Move libmpeg2new into src/video_dec.

--HG--
rename : src/libmpeg2new/Makefile.am => src/video_dec/libmpeg2new/Makefile.am
rename : src/libmpeg2new/include/Makefile.am => src/video_dec/libmpeg2new/include/Makefile.am
rename : src/libmpeg2new/include/alpha_asm.h => src/video_dec/libmpeg2new/include/alpha_asm.h
rename : src/libmpeg2new/include/attributes.h => src/video_dec/libmpeg2new/include/attributes.h
rename : src/libmpeg2new/include/mmx.h => src/video_dec/libmpeg2new/include/mmx.h
rename : src/libmpeg2new/include/mpeg2.h => src/video_dec/libmpeg2new/include/mpeg2.h
rename : src/libmpeg2new/include/mpeg2convert.h => src/video_dec/libmpeg2new/include/mpeg2convert.h
rename : src/libmpeg2new/include/sse.h => src/video_dec/libmpeg2new/include/sse.h
rename : src/libmpeg2new/include/tendra.h => src/video_dec/libmpeg2new/include/tendra.h
rename : src/libmpeg2new/include/video_out.h => src/video_dec/libmpeg2new/include/video_out.h
rename : src/libmpeg2new/include/vis.h => src/video_dec/libmpeg2new/include/vis.h
rename : src/libmpeg2new/libmpeg2/Makefile.am => src/video_dec/libmpeg2new/libmpeg2/Makefile.am
rename : src/libmpeg2new/libmpeg2/alloc.c => src/video_dec/libmpeg2new/libmpeg2/alloc.c
rename : src/libmpeg2new/libmpeg2/configure.incl => src/video_dec/libmpeg2new/libmpeg2/configure.incl
rename : src/libmpeg2new/libmpeg2/convert_internal.h => src/video_dec/libmpeg2new/libmpeg2/convert_internal.h
rename : src/libmpeg2new/libmpeg2/cpu_accel.c => src/video_dec/libmpeg2new/libmpeg2/cpu_accel.c
rename : src/libmpeg2new/libmpeg2/cpu_state.c => src/video_dec/libmpeg2new/libmpeg2/cpu_state.c
rename : src/libmpeg2new/libmpeg2/decode.c => src/video_dec/libmpeg2new/libmpeg2/decode.c
rename : src/libmpeg2new/libmpeg2/header.c => src/video_dec/libmpeg2new/libmpeg2/header.c
rename : src/libmpeg2new/libmpeg2/idct.c => src/video_dec/libmpeg2new/libmpeg2/idct.c
rename : src/libmpeg2new/libmpeg2/idct_alpha.c => src/video_dec/libmpeg2new/libmpeg2/idct_alpha.c
rename : src/libmpeg2new/libmpeg2/idct_altivec.c => src/video_dec/libmpeg2new/libmpeg2/idct_altivec.c
rename : src/libmpeg2new/libmpeg2/idct_mlib.c => src/video_dec/libmpeg2new/libmpeg2/idct_mlib.c
rename : src/libmpeg2new/libmpeg2/idct_mmx.c => src/video_dec/libmpeg2new/libmpeg2/idct_mmx.c
rename : src/libmpeg2new/libmpeg2/libmpeg2.pc.in => src/video_dec/libmpeg2new/libmpeg2/libmpeg2.pc.in
rename : src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in => src/video_dec/libmpeg2new/libmpeg2/libmpeg2convert.pc.in
rename : src/libmpeg2new/libmpeg2/motion_comp.c => src/video_dec/libmpeg2new/libmpeg2/motion_comp.c
rename : src/libmpeg2new/libmpeg2/motion_comp_alpha.c => src/video_dec/libmpeg2new/libmpeg2/motion_comp_alpha.c
rename : src/libmpeg2new/libmpeg2/motion_comp_altivec.c => src/video_dec/libmpeg2new/libmpeg2/motion_comp_altivec.c
rename : src/libmpeg2new/libmpeg2/motion_comp_mlib.c => src/video_dec/libmpeg2new/libmpeg2/motion_comp_mlib.c
rename : src/libmpeg2new/libmpeg2/motion_comp_mmx.c => src/video_dec/libmpeg2new/libmpeg2/motion_comp_mmx.c
rename : src/libmpeg2new/libmpeg2/motion_comp_vis.c => src/video_dec/libmpeg2new/libmpeg2/motion_comp_vis.c
rename : src/libmpeg2new/libmpeg2/mpeg2_internal.h => src/video_dec/libmpeg2new/libmpeg2/mpeg2_internal.h
rename : src/libmpeg2new/libmpeg2/rgb.c => src/video_dec/libmpeg2new/libmpeg2/rgb.c
rename : src/libmpeg2new/libmpeg2/rgb_mmx.c => src/video_dec/libmpeg2new/libmpeg2/rgb_mmx.c
rename : src/libmpeg2new/libmpeg2/rgb_vis.c => src/video_dec/libmpeg2new/libmpeg2/rgb_vis.c
rename : src/libmpeg2new/libmpeg2/slice.c => src/video_dec/libmpeg2new/libmpeg2/slice.c
rename : src/libmpeg2new/libmpeg2/uyvy.c => src/video_dec/libmpeg2new/libmpeg2/uyvy.c
rename : src/libmpeg2new/libmpeg2/vlc.h => src/video_dec/libmpeg2new/libmpeg2/vlc.h
rename : src/libmpeg2new/xine_mpeg2new_decoder.c => src/video_dec/libmpeg2new/xine_mpeg2new_decoder.c
---
 src/libmpeg2new/Makefile.am                        |   12 -
 src/libmpeg2new/include/Makefile.am                |    3 -
 src/libmpeg2new/include/alpha_asm.h                |  181 --
 src/libmpeg2new/include/attributes.h               |   37 -
 src/libmpeg2new/include/mmx.h                      |  263 ---
 src/libmpeg2new/include/mpeg2.h                    |  202 --
 src/libmpeg2new/include/mpeg2convert.h             |   48 -
 src/libmpeg2new/include/sse.h                      |  256 ---
 src/libmpeg2new/include/tendra.h                   |   35 -
 src/libmpeg2new/include/video_out.h                |   58 -
 src/libmpeg2new/include/vis.h                      |  328 ----
 src/libmpeg2new/libmpeg2/Makefile.am               |   14 -
 src/libmpeg2new/libmpeg2/alloc.c                   |   70 -
 src/libmpeg2new/libmpeg2/configure.incl            |   11 -
 src/libmpeg2new/libmpeg2/convert_internal.h        |   42 -
 src/libmpeg2new/libmpeg2/cpu_accel.c               |  258 ---
 src/libmpeg2new/libmpeg2/cpu_state.c               |  129 --
 src/libmpeg2new/libmpeg2/decode.c                  |  439 -----
 src/libmpeg2new/libmpeg2/header.c                  |  961 ---------
 src/libmpeg2new/libmpeg2/idct.c                    |  287 ---
 src/libmpeg2new/libmpeg2/idct_alpha.c              |  379 ----
 src/libmpeg2new/libmpeg2/idct_altivec.c            |  288 ---
 src/libmpeg2new/libmpeg2/idct_mlib.c               |   60 -
 src/libmpeg2new/libmpeg2/idct_mmx.c                |  814 --------
 src/libmpeg2new/libmpeg2/libmpeg2.pc.in            |   10 -
 src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in     |   10 -
 src/libmpeg2new/libmpeg2/motion_comp.c             |  130 --
 src/libmpeg2new/libmpeg2/motion_comp_alpha.c       |  253 ---
 src/libmpeg2new/libmpeg2/motion_comp_altivec.c     | 1010 ----------
 src/libmpeg2new/libmpeg2/motion_comp_mlib.c        |  190 --
 src/libmpeg2new/libmpeg2/motion_comp_mmx.c         | 1005 ----------
 src/libmpeg2new/libmpeg2/motion_comp_vis.c         | 2061 --------------------
 src/libmpeg2new/libmpeg2/mpeg2_internal.h          |  302 ---
 src/libmpeg2new/libmpeg2/rgb.c                     |  598 ------
 src/libmpeg2new/libmpeg2/rgb_mmx.c                 |  321 ---
 src/libmpeg2new/libmpeg2/rgb_vis.c                 |  384 ----
 src/libmpeg2new/libmpeg2/slice.c                   | 2058 -------------------
 src/libmpeg2new/libmpeg2/uyvy.c                    |  123 --
 src/libmpeg2new/libmpeg2/vlc.h                     |  429 ----
 src/libmpeg2new/xine_mpeg2new_decoder.c            |  504 -----
 src/video_dec/libmpeg2new/Makefile.am              |   12 +
 src/video_dec/libmpeg2new/include/Makefile.am      |    3 +
 src/video_dec/libmpeg2new/include/alpha_asm.h      |  181 ++
 src/video_dec/libmpeg2new/include/attributes.h     |   37 +
 src/video_dec/libmpeg2new/include/mmx.h            |  263 +++
 src/video_dec/libmpeg2new/include/mpeg2.h          |  202 ++
 src/video_dec/libmpeg2new/include/mpeg2convert.h   |   48 +
 src/video_dec/libmpeg2new/include/sse.h            |  256 +++
 src/video_dec/libmpeg2new/include/tendra.h         |   35 +
 src/video_dec/libmpeg2new/include/video_out.h      |   58 +
 src/video_dec/libmpeg2new/include/vis.h            |  328 ++++
 src/video_dec/libmpeg2new/libmpeg2/Makefile.am     |   14 +
 src/video_dec/libmpeg2new/libmpeg2/alloc.c         |   70 +
 src/video_dec/libmpeg2new/libmpeg2/configure.incl  |   11 +
 .../libmpeg2new/libmpeg2/convert_internal.h        |   42 +
 src/video_dec/libmpeg2new/libmpeg2/cpu_accel.c     |  258 +++
 src/video_dec/libmpeg2new/libmpeg2/cpu_state.c     |  129 ++
 src/video_dec/libmpeg2new/libmpeg2/decode.c        |  439 +++++
 src/video_dec/libmpeg2new/libmpeg2/header.c        |  961 +++++++++
 src/video_dec/libmpeg2new/libmpeg2/idct.c          |  287 +++
 src/video_dec/libmpeg2new/libmpeg2/idct_alpha.c    |  379 ++++
 src/video_dec/libmpeg2new/libmpeg2/idct_altivec.c  |  288 +++
 src/video_dec/libmpeg2new/libmpeg2/idct_mlib.c     |   60 +
 src/video_dec/libmpeg2new/libmpeg2/idct_mmx.c      |  814 ++++++++
 src/video_dec/libmpeg2new/libmpeg2/libmpeg2.pc.in  |   10 +
 .../libmpeg2new/libmpeg2/libmpeg2convert.pc.in     |   10 +
 src/video_dec/libmpeg2new/libmpeg2/motion_comp.c   |  130 ++
 .../libmpeg2new/libmpeg2/motion_comp_alpha.c       |  253 +++
 .../libmpeg2new/libmpeg2/motion_comp_altivec.c     | 1010 ++++++++++
 .../libmpeg2new/libmpeg2/motion_comp_mlib.c        |  190 ++
 .../libmpeg2new/libmpeg2/motion_comp_mmx.c         | 1005 ++++++++++
 .../libmpeg2new/libmpeg2/motion_comp_vis.c         | 2061 ++++++++++++++++++++
 .../libmpeg2new/libmpeg2/mpeg2_internal.h          |  302 +++
 src/video_dec/libmpeg2new/libmpeg2/rgb.c           |  598 ++++++
 src/video_dec/libmpeg2new/libmpeg2/rgb_mmx.c       |  321 +++
 src/video_dec/libmpeg2new/libmpeg2/rgb_vis.c       |  384 ++++
 src/video_dec/libmpeg2new/libmpeg2/slice.c         | 2058 +++++++++++++++++++
 src/video_dec/libmpeg2new/libmpeg2/uyvy.c          |  123 ++
 src/video_dec/libmpeg2new/libmpeg2/vlc.h           |  429 ++++
 src/video_dec/libmpeg2new/xine_mpeg2new_decoder.c  |  504 +++++
 80 files changed, 14563 insertions(+), 14563 deletions(-)
 delete mode 100644 src/libmpeg2new/Makefile.am
 delete mode 100644 src/libmpeg2new/include/Makefile.am
 delete mode 100644 src/libmpeg2new/include/alpha_asm.h
 delete mode 100644 src/libmpeg2new/include/attributes.h
 delete mode 100644 src/libmpeg2new/include/mmx.h
 delete mode 100644 src/libmpeg2new/include/mpeg2.h
 delete mode 100644 src/libmpeg2new/include/mpeg2convert.h
 delete mode 100644 src/libmpeg2new/include/sse.h
 delete mode 100644 src/libmpeg2new/include/tendra.h
 delete mode 100644 src/libmpeg2new/include/video_out.h
 delete mode 100644 src/libmpeg2new/include/vis.h
 delete mode 100644 src/libmpeg2new/libmpeg2/Makefile.am
 delete mode 100644 src/libmpeg2new/libmpeg2/alloc.c
 delete mode 100644 src/libmpeg2new/libmpeg2/configure.incl
 delete mode 100644 src/libmpeg2new/libmpeg2/convert_internal.h
 delete mode 100644 src/libmpeg2new/libmpeg2/cpu_accel.c
 delete mode 100644 src/libmpeg2new/libmpeg2/cpu_state.c
 delete mode 100644 src/libmpeg2new/libmpeg2/decode.c
 delete mode 100644 src/libmpeg2new/libmpeg2/header.c
 delete mode 100644 src/libmpeg2new/libmpeg2/idct.c
 delete mode 100644 src/libmpeg2new/libmpeg2/idct_alpha.c
 delete mode 100644 src/libmpeg2new/libmpeg2/idct_altivec.c
 delete mode 100644 src/libmpeg2new/libmpeg2/idct_mlib.c
 delete mode 100644 src/libmpeg2new/libmpeg2/idct_mmx.c
 delete mode 100644 src/libmpeg2new/libmpeg2/libmpeg2.pc.in
 delete mode 100644 src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in
 delete mode 100644 src/libmpeg2new/libmpeg2/motion_comp.c
 delete mode 100644 src/libmpeg2new/libmpeg2/motion_comp_alpha.c
 delete mode 100644 src/libmpeg2new/libmpeg2/motion_comp_altivec.c
 delete mode 100644 src/libmpeg2new/libmpeg2/motion_comp_mlib.c
 delete mode 100644 src/libmpeg2new/libmpeg2/motion_comp_mmx.c
 delete mode 100644 src/libmpeg2new/libmpeg2/motion_comp_vis.c
 delete mode 100644 src/libmpeg2new/libmpeg2/mpeg2_internal.h
 delete mode 100644 src/libmpeg2new/libmpeg2/rgb.c
 delete mode 100644 src/libmpeg2new/libmpeg2/rgb_mmx.c
 delete mode 100644 src/libmpeg2new/libmpeg2/rgb_vis.c
 delete mode 100644 src/libmpeg2new/libmpeg2/slice.c
 delete mode 100644 src/libmpeg2new/libmpeg2/uyvy.c
 delete mode 100644 src/libmpeg2new/libmpeg2/vlc.h
 delete mode 100644 src/libmpeg2new/xine_mpeg2new_decoder.c
 create mode 100644 src/video_dec/libmpeg2new/Makefile.am
 create mode 100644 src/video_dec/libmpeg2new/include/Makefile.am
 create mode 100644 src/video_dec/libmpeg2new/include/alpha_asm.h
 create mode 100644 src/video_dec/libmpeg2new/include/attributes.h
 create mode 100644 src/video_dec/libmpeg2new/include/mmx.h
 create mode 100644 src/video_dec/libmpeg2new/include/mpeg2.h
 create mode 100644 src/video_dec/libmpeg2new/include/mpeg2convert.h
 create mode 100644 src/video_dec/libmpeg2new/include/sse.h
 create mode 100644 src/video_dec/libmpeg2new/include/tendra.h
 create mode 100644 src/video_dec/libmpeg2new/include/video_out.h
 create mode 100644 src/video_dec/libmpeg2new/include/vis.h
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/Makefile.am
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/alloc.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/configure.incl
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/convert_internal.h
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/cpu_accel.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/cpu_state.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/decode.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/header.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/idct.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/idct_alpha.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/idct_altivec.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/idct_mlib.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/idct_mmx.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/libmpeg2.pc.in
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/libmpeg2convert.pc.in
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/motion_comp.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/motion_comp_alpha.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/motion_comp_altivec.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/motion_comp_mlib.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/motion_comp_mmx.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/motion_comp_vis.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/mpeg2_internal.h
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/rgb.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/rgb_mmx.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/rgb_vis.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/slice.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/uyvy.c
 create mode 100644 src/video_dec/libmpeg2new/libmpeg2/vlc.h
 create mode 100644 src/video_dec/libmpeg2new/xine_mpeg2new_decoder.c

diff --git a/src/libmpeg2new/Makefile.am b/src/libmpeg2new/Makefile.am
deleted file mode 100644
index 8c248fdcb..000000000
--- a/src/libmpeg2new/Makefile.am
+++ /dev/null
@@ -1,12 +0,0 @@
-include $(top_srcdir)/misc/Makefile.common
-
-AM_CFLAGS  = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG)
-AM_LDFLAGS = $(xineplug_ldflags)
-
-SUBDIRS = libmpeg2
-
-xineplug_LTLIBRARIES = xineplug_decode_mpeg2.la
-
-xineplug_decode_mpeg2_la_SOURCES = xine_mpeg2_decoder.c
-xineplug_decode_mpeg2_la_LIBADD = $(XINE_LIB) ./libmpeg2/libmpeg2.la 
-xineplug_decode_mpeg2_la_CFLAGS = $(AM_CFLAGS) $(MLIB_CFLAGS)
diff --git a/src/libmpeg2new/include/Makefile.am b/src/libmpeg2new/include/Makefile.am
deleted file mode 100644
index 302d01cb1..000000000
--- a/src/libmpeg2new/include/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-pkginclude_HEADERS = mpeg2.h mpeg2convert.h
-
-EXTRA_DIST = video_out.h mmx.h alpha_asm.h vis.h attributes.h tendra.h
diff --git a/src/libmpeg2new/include/alpha_asm.h b/src/libmpeg2new/include/alpha_asm.h
deleted file mode 100644
index bf1081f24..000000000
--- a/src/libmpeg2new/include/alpha_asm.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Alpha assembly macros
- * Copyright (c) 2002-2003 Falk Hueffner <falk@debian.org>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307	 USA
- */
-
-#ifndef ALPHA_ASM_H
-#define ALPHA_ASM_H
-
-#include <inttypes.h>
-
-#if defined __GNUC__
-# define GNUC_PREREQ(maj, min) \
-        ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
-#else
-# define GNUC_PREREQ(maj, min) 0
-#endif
-
-#define AMASK_BWX (1 << 0)
-#define AMASK_FIX (1 << 1)
-#define AMASK_CIX (1 << 2)
-#define AMASK_MVI (1 << 8)
-
-#ifdef __alpha_bwx__
-# define HAVE_BWX() 1
-#else
-# define HAVE_BWX() (amask(AMASK_BWX) == 0)
-#endif
-#ifdef __alpha_fix__
-# define HAVE_FIX() 1
-#else
-# define HAVE_FIX() (amask(AMASK_FIX) == 0)
-#endif
-#ifdef __alpha_max__
-# define HAVE_MVI() 1
-#else
-# define HAVE_MVI() (amask(AMASK_MVI) == 0)
-#endif
-#ifdef __alpha_cix__
-# define HAVE_CIX() 1
-#else
-# define HAVE_CIX() (amask(AMASK_CIX) == 0)
-#endif
-
-inline static uint64_t BYTE_VEC(uint64_t x)
-{
-    x |= x <<  8;
-    x |= x << 16;
-    x |= x << 32;
-    return x;
-}
-inline static uint64_t WORD_VEC(uint64_t x)
-{
-    x |= x << 16;
-    x |= x << 32;
-    return x;
-}
-
-#define ldq(p) (*(const uint64_t *) (p))
-#define ldl(p) (*(const int32_t *) (p))
-#define stl(l, p) do { *(uint32_t *) (p) = (l); } while (0)
-#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
-#define sextw(x) ((int16_t) (x))
-
-#ifdef __GNUC__
-struct unaligned_long { uint64_t l; } __attribute__((packed));
-#define ldq_u(p)     (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
-#define uldq(a)	     (((const struct unaligned_long *) (a))->l)
-
-#if GNUC_PREREQ(3,3)
-#define prefetch(p)     __builtin_prefetch((p), 0, 1)
-#define prefetch_en(p)  __builtin_prefetch((p), 0, 0)
-#define prefetch_m(p)   __builtin_prefetch((p), 1, 1)
-#define prefetch_men(p) __builtin_prefetch((p), 1, 0)
-#define cmpbge	__builtin_alpha_cmpbge
-/* Avoid warnings.  */
-#define extql(a, b)	__builtin_alpha_extql(a, (uint64_t) (b))
-#define extwl(a, b)	__builtin_alpha_extwl(a, (uint64_t) (b))
-#define extqh(a, b)	__builtin_alpha_extqh(a, (uint64_t) (b))
-#define zap	__builtin_alpha_zap
-#define zapnot	__builtin_alpha_zapnot
-#define amask	__builtin_alpha_amask
-#define implver	__builtin_alpha_implver
-#define rpcc	__builtin_alpha_rpcc
-#else
-#define prefetch(p)     asm volatile("ldl $31,%0"  : : "m"(*(const char *) (p)) : "memory")
-#define prefetch_en(p)  asm volatile("ldq $31,%0"  : : "m"(*(const char *) (p)) : "memory")
-#define prefetch_m(p)   asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory")
-#define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory")
-#define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
-#define extql(a, b)  ({ uint64_t __r; asm ("extql   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
-#define extwl(a, b)  ({ uint64_t __r; asm ("extwl   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
-#define extqh(a, b)  ({ uint64_t __r; asm ("extqh   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
-#define zap(a, b)    ({ uint64_t __r; asm ("zap     %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
-#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
-#define amask(a)     ({ uint64_t __r; asm ("amask   %1,%0"      : "=r" (__r) : "rI"  (a));	     __r; })
-#define implver()    ({ uint64_t __r; asm ("implver %0"         : "=r" (__r));			     __r; })
-#define rpcc()	     ({ uint64_t __r; asm volatile ("rpcc %0"   : "=r" (__r));			     __r; })
-#endif
-#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory")
-
-#if GNUC_PREREQ(3,3) && defined(__alpha_max__)
-#define minub8	__builtin_alpha_minub8
-#define minsb8	__builtin_alpha_minsb8
-#define minuw4	__builtin_alpha_minuw4
-#define minsw4	__builtin_alpha_minsw4
-#define maxub8	__builtin_alpha_maxub8
-#define maxsb8	__builtin_alpha_maxsb8
-#define maxuw4	__builtin_alpha_maxuw4	
-#define maxsw4	__builtin_alpha_maxsw4
-#define perr	__builtin_alpha_perr
-#define pklb	__builtin_alpha_pklb
-#define pkwb	__builtin_alpha_pkwb
-#define unpkbl	__builtin_alpha_unpkbl
-#define unpkbw	__builtin_alpha_unpkbw
-#else
-#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define minuw4(a, b) ({ uint64_t __r; asm (".arch ev6; minuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define minsw4(a, b) ({ uint64_t __r; asm (".arch ev6; minsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define maxub8(a, b) ({ uint64_t __r; asm (".arch ev6; maxub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define maxsb8(a, b) ({ uint64_t __r; asm (".arch ev6; maxsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define perr(a, b)   ({ uint64_t __r; asm (".arch ev6; perr    %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
-#define pklb(a)      ({ uint64_t __r; asm (".arch ev6; pklb    %r1,%0"     : "=r" (__r) : "rJ"  (a));	     __r; })
-#define pkwb(a)      ({ uint64_t __r; asm (".arch ev6; pkwb    %r1,%0"     : "=r" (__r) : "rJ"  (a));	     __r; })
-#define unpkbl(a)    ({ uint64_t __r; asm (".arch ev6; unpkbl  %r1,%0"     : "=r" (__r) : "rJ"  (a));	     __r; })
-#define unpkbw(a)    ({ uint64_t __r; asm (".arch ev6; unpkbw  %r1,%0"     : "=r" (__r) : "rJ"  (a));	     __r; })
-#endif
-
-#elif defined(__DECC)		/* Digital/Compaq/hp "ccc" compiler */
-
-#include <c_asm.h>
-#define ldq_u(a)     asm ("ldq_u   %v0,0(%a0)", a)
-#define uldq(a)	     (*(const __unaligned uint64_t *) (a))
-#define cmpbge(a, b) asm ("cmpbge  %a0,%a1,%v0", a, b)
-#define extql(a, b)  asm ("extql   %a0,%a1,%v0", a, b)
-#define extwl(a, b)  asm ("extwl   %a0,%a1,%v0", a, b)
-#define extqh(a, b)  asm ("extqh   %a0,%a1,%v0", a, b)
-#define zap(a, b)    asm ("zap     %a0,%a1,%v0", a, b)
-#define zapnot(a, b) asm ("zapnot  %a0,%a1,%v0", a, b)
-#define amask(a)     asm ("amask   %a0,%v0", a)
-#define implver()    asm ("implver %v0")
-#define rpcc()	     asm ("rpcc	   %v0")
-#define minub8(a, b) asm ("minub8  %a0,%a1,%v0", a, b)
-#define minsb8(a, b) asm ("minsb8  %a0,%a1,%v0", a, b)
-#define minuw4(a, b) asm ("minuw4  %a0,%a1,%v0", a, b)
-#define minsw4(a, b) asm ("minsw4  %a0,%a1,%v0", a, b)
-#define maxub8(a, b) asm ("maxub8  %a0,%a1,%v0", a, b)
-#define maxsb8(a, b) asm ("maxsb8  %a0,%a1,%v0", a, b)
-#define maxuw4(a, b) asm ("maxuw4  %a0,%a1,%v0", a, b)
-#define maxsw4(a, b) asm ("maxsw4  %a0,%a1,%v0", a, b)
-#define perr(a, b)   asm ("perr    %a0,%a1,%v0", a, b)
-#define pklb(a)      asm ("pklb    %a0,%v0", a)
-#define pkwb(a)      asm ("pkwb    %a0,%v0", a)
-#define unpkbl(a)    asm ("unpkbl  %a0,%v0", a)
-#define unpkbw(a)    asm ("unpkbw  %a0,%v0", a)
-#define wh64(a)      asm ("wh64    %a0", a)
-
-#else
-#error "Unknown compiler!"
-#endif
-
-#endif /* ALPHA_ASM_H */
diff --git a/src/libmpeg2new/include/attributes.h b/src/libmpeg2new/include/attributes.h
deleted file mode 100644
index eefbc0dd1..000000000
--- a/src/libmpeg2new/include/attributes.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * attributes.h
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-/* use gcc attribs to align critical data structures */
-#ifdef ATTRIBUTE_ALIGNED_MAX
-#define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((ATTRIBUTE_ALIGNED_MAX < align) ? ATTRIBUTE_ALIGNED_MAX : align)))
-#else
-#define ATTR_ALIGN(align)
-#endif
-
-#ifdef HAVE_BUILTIN_EXPECT
-#define likely(x) __builtin_expect ((x) != 0, 1)
-#define unlikely(x) __builtin_expect ((x) != 0, 0)
-#else
-#define likely(x) (x)
-#define unlikely(x) (x)
-#endif
diff --git a/src/libmpeg2new/include/mmx.h b/src/libmpeg2new/include/mmx.h
deleted file mode 100644
index 08b4d4776..000000000
--- a/src/libmpeg2new/include/mmx.h
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * mmx.h
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-/*
- * The type of an value that fits in an MMX register (note that long
- * long constant values MUST be suffixed by LL and unsigned long long
- * values by ULL, lest they be truncated by the compiler)
- */
-
-typedef	union {
-	long long		q;	/* Quadword (64-bit) value */
-	unsigned long long	uq;	/* Unsigned Quadword */
-	int			d[2];	/* 2 Doubleword (32-bit) values */
-	unsigned int		ud[2];	/* 2 Unsigned Doubleword */
-	short			w[4];	/* 4 Word (16-bit) values */
-	unsigned short		uw[4];	/* 4 Unsigned Word */
-	char			b[8];	/* 8 Byte (8-bit) values */
-	unsigned char		ub[8];	/* 8 Unsigned Byte */
-	float			s[2];	/* Single-precision (32-bit) value */
-} ATTR_ALIGN(8) mmx_t;	/* On an 8-byte (64-bit) boundary */
-
-
-#define	mmx_i2r(op,imm,reg) \
-	__asm__ __volatile__ (#op " %0, %%" #reg \
-			      : /* nothing */ \
-			      : "i" (imm) )
-
-#define	mmx_m2r(op,mem,reg) \
-	__asm__ __volatile__ (#op " %0, %%" #reg \
-			      : /* nothing */ \
-			      : "m" (mem))
-
-#define	mmx_r2m(op,reg,mem) \
-	__asm__ __volatile__ (#op " %%" #reg ", %0" \
-			      : "=m" (mem) \
-			      : /* nothing */ )
-
-#define	mmx_r2r(op,regs,regd) \
-	__asm__ __volatile__ (#op " %" #regs ", %" #regd)
-
-
-#define	emms() __asm__ __volatile__ ("emms")
-
-#define	movd_m2r(var,reg)	mmx_m2r (movd, var, reg)
-#define	movd_r2m(reg,var)	mmx_r2m (movd, reg, var)
-#define	movd_v2r(var,reg)	__asm__ __volatile__ ("movd %0, %%" #reg \
-						      : /* nothing */ \
-						      : "rm" (var))
-#define	movd_r2v(reg,var)	__asm__ __volatile__ ("movd %%" #reg ", %0" \
-						      : "=rm" (var) \
-						      : /* nothing */ )
-
-#define	movq_m2r(var,reg)	mmx_m2r (movq, var, reg)
-#define	movq_r2m(reg,var)	mmx_r2m (movq, reg, var)
-#define	movq_r2r(regs,regd)	mmx_r2r (movq, regs, regd)
-
-#define	packssdw_m2r(var,reg)	mmx_m2r (packssdw, var, reg)
-#define	packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
-#define	packsswb_m2r(var,reg)	mmx_m2r (packsswb, var, reg)
-#define	packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
-
-#define	packuswb_m2r(var,reg)	mmx_m2r (packuswb, var, reg)
-#define	packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
-
-#define	paddb_m2r(var,reg)	mmx_m2r (paddb, var, reg)
-#define	paddb_r2r(regs,regd)	mmx_r2r (paddb, regs, regd)
-#define	paddd_m2r(var,reg)	mmx_m2r (paddd, var, reg)
-#define	paddd_r2r(regs,regd)	mmx_r2r (paddd, regs, regd)
-#define	paddw_m2r(var,reg)	mmx_m2r (paddw, var, reg)
-#define	paddw_r2r(regs,regd)	mmx_r2r (paddw, regs, regd)
-
-#define	paddsb_m2r(var,reg)	mmx_m2r (paddsb, var, reg)
-#define	paddsb_r2r(regs,regd)	mmx_r2r (paddsb, regs, regd)
-#define	paddsw_m2r(var,reg)	mmx_m2r (paddsw, var, reg)
-#define	paddsw_r2r(regs,regd)	mmx_r2r (paddsw, regs, regd)
-
-#define	paddusb_m2r(var,reg)	mmx_m2r (paddusb, var, reg)
-#define	paddusb_r2r(regs,regd)	mmx_r2r (paddusb, regs, regd)
-#define	paddusw_m2r(var,reg)	mmx_m2r (paddusw, var, reg)
-#define	paddusw_r2r(regs,regd)	mmx_r2r (paddusw, regs, regd)
-
-#define	pand_m2r(var,reg)	mmx_m2r (pand, var, reg)
-#define	pand_r2r(regs,regd)	mmx_r2r (pand, regs, regd)
-
-#define	pandn_m2r(var,reg)	mmx_m2r (pandn, var, reg)
-#define	pandn_r2r(regs,regd)	mmx_r2r (pandn, regs, regd)
-
-#define	pcmpeqb_m2r(var,reg)	mmx_m2r (pcmpeqb, var, reg)
-#define	pcmpeqb_r2r(regs,regd)	mmx_r2r (pcmpeqb, regs, regd)
-#define	pcmpeqd_m2r(var,reg)	mmx_m2r (pcmpeqd, var, reg)
-#define	pcmpeqd_r2r(regs,regd)	mmx_r2r (pcmpeqd, regs, regd)
-#define	pcmpeqw_m2r(var,reg)	mmx_m2r (pcmpeqw, var, reg)
-#define	pcmpeqw_r2r(regs,regd)	mmx_r2r (pcmpeqw, regs, regd)
-
-#define	pcmpgtb_m2r(var,reg)	mmx_m2r (pcmpgtb, var, reg)
-#define	pcmpgtb_r2r(regs,regd)	mmx_r2r (pcmpgtb, regs, regd)
-#define	pcmpgtd_m2r(var,reg)	mmx_m2r (pcmpgtd, var, reg)
-#define	pcmpgtd_r2r(regs,regd)	mmx_r2r (pcmpgtd, regs, regd)
-#define	pcmpgtw_m2r(var,reg)	mmx_m2r (pcmpgtw, var, reg)
-#define	pcmpgtw_r2r(regs,regd)	mmx_r2r (pcmpgtw, regs, regd)
-
-#define	pmaddwd_m2r(var,reg)	mmx_m2r (pmaddwd, var, reg)
-#define	pmaddwd_r2r(regs,regd)	mmx_r2r (pmaddwd, regs, regd)
-
-#define	pmulhw_m2r(var,reg)	mmx_m2r (pmulhw, var, reg)
-#define	pmulhw_r2r(regs,regd)	mmx_r2r (pmulhw, regs, regd)
-
-#define	pmullw_m2r(var,reg)	mmx_m2r (pmullw, var, reg)
-#define	pmullw_r2r(regs,regd)	mmx_r2r (pmullw, regs, regd)
-
-#define	por_m2r(var,reg)	mmx_m2r (por, var, reg)
-#define	por_r2r(regs,regd)	mmx_r2r (por, regs, regd)
-
-#define	pslld_i2r(imm,reg)	mmx_i2r (pslld, imm, reg)
-#define	pslld_m2r(var,reg)	mmx_m2r (pslld, var, reg)
-#define	pslld_r2r(regs,regd)	mmx_r2r (pslld, regs, regd)
-#define	psllq_i2r(imm,reg)	mmx_i2r (psllq, imm, reg)
-#define	psllq_m2r(var,reg)	mmx_m2r (psllq, var, reg)
-#define	psllq_r2r(regs,regd)	mmx_r2r (psllq, regs, regd)
-#define	psllw_i2r(imm,reg)	mmx_i2r (psllw, imm, reg)
-#define	psllw_m2r(var,reg)	mmx_m2r (psllw, var, reg)
-#define	psllw_r2r(regs,regd)	mmx_r2r (psllw, regs, regd)
-
-#define	psrad_i2r(imm,reg)	mmx_i2r (psrad, imm, reg)
-#define	psrad_m2r(var,reg)	mmx_m2r (psrad, var, reg)
-#define	psrad_r2r(regs,regd)	mmx_r2r (psrad, regs, regd)
-#define	psraw_i2r(imm,reg)	mmx_i2r (psraw, imm, reg)
-#define	psraw_m2r(var,reg)	mmx_m2r (psraw, var, reg)
-#define	psraw_r2r(regs,regd)	mmx_r2r (psraw, regs, regd)
-
-#define	psrld_i2r(imm,reg)	mmx_i2r (psrld, imm, reg)
-#define	psrld_m2r(var,reg)	mmx_m2r (psrld, var, reg)
-#define	psrld_r2r(regs,regd)	mmx_r2r (psrld, regs, regd)
-#define	psrlq_i2r(imm,reg)	mmx_i2r (psrlq, imm, reg)
-#define	psrlq_m2r(var,reg)	mmx_m2r (psrlq, var, reg)
-#define	psrlq_r2r(regs,regd)	mmx_r2r (psrlq, regs, regd)
-#define	psrlw_i2r(imm,reg)	mmx_i2r (psrlw, imm, reg)
-#define	psrlw_m2r(var,reg)	mmx_m2r (psrlw, var, reg)
-#define	psrlw_r2r(regs,regd)	mmx_r2r (psrlw, regs, regd)
-
-#define	psubb_m2r(var,reg)	mmx_m2r (psubb, var, reg)
-#define	psubb_r2r(regs,regd)	mmx_r2r (psubb, regs, regd)
-#define	psubd_m2r(var,reg)	mmx_m2r (psubd, var, reg)
-#define	psubd_r2r(regs,regd)	mmx_r2r (psubd, regs, regd)
-#define	psubw_m2r(var,reg)	mmx_m2r (psubw, var, reg)
-#define	psubw_r2r(regs,regd)	mmx_r2r (psubw, regs, regd)
-
-#define	psubsb_m2r(var,reg)	mmx_m2r (psubsb, var, reg)
-#define	psubsb_r2r(regs,regd)	mmx_r2r (psubsb, regs, regd)
-#define	psubsw_m2r(var,reg)	mmx_m2r (psubsw, var, reg)
-#define	psubsw_r2r(regs,regd)	mmx_r2r (psubsw, regs, regd)
-
-#define	psubusb_m2r(var,reg)	mmx_m2r (psubusb, var, reg)
-#define	psubusb_r2r(regs,regd)	mmx_r2r (psubusb, regs, regd)
-#define	psubusw_m2r(var,reg)	mmx_m2r (psubusw, var, reg)
-#define	psubusw_r2r(regs,regd)	mmx_r2r (psubusw, regs, regd)
-
-#define	punpckhbw_m2r(var,reg)		mmx_m2r (punpckhbw, var, reg)
-#define	punpckhbw_r2r(regs,regd)	mmx_r2r (punpckhbw, regs, regd)
-#define	punpckhdq_m2r(var,reg)		mmx_m2r (punpckhdq, var, reg)
-#define	punpckhdq_r2r(regs,regd)	mmx_r2r (punpckhdq, regs, regd)
-#define	punpckhwd_m2r(var,reg)		mmx_m2r (punpckhwd, var, reg)
-#define	punpckhwd_r2r(regs,regd)	mmx_r2r (punpckhwd, regs, regd)
-
-#define	punpcklbw_m2r(var,reg) 		mmx_m2r (punpcklbw, var, reg)
-#define	punpcklbw_r2r(regs,regd)	mmx_r2r (punpcklbw, regs, regd)
-#define	punpckldq_m2r(var,reg)		mmx_m2r (punpckldq, var, reg)
-#define	punpckldq_r2r(regs,regd)	mmx_r2r (punpckldq, regs, regd)
-#define	punpcklwd_m2r(var,reg)		mmx_m2r (punpcklwd, var, reg)
-#define	punpcklwd_r2r(regs,regd)	mmx_r2r (punpcklwd, regs, regd)
-
-#define	pxor_m2r(var,reg)	mmx_m2r (pxor, var, reg)
-#define	pxor_r2r(regs,regd)	mmx_r2r (pxor, regs, regd)
-
-
-/* 3DNOW extensions */
-
-#define pavgusb_m2r(var,reg)	mmx_m2r (pavgusb, var, reg)
-#define pavgusb_r2r(regs,regd)	mmx_r2r (pavgusb, regs, regd)
-
-
-/* AMD MMX extensions - also available in intel SSE */
-
-
-#define mmx_m2ri(op,mem,reg,imm) \
-	__asm__ __volatile__ (#op " %1, %0, %%" #reg \
-			      : /* nothing */ \
-			      : "m" (mem), "i" (imm))
-
-#define mmx_r2ri(op,regs,regd,imm) \
-	__asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
-			      : /* nothing */ \
-			      : "i" (imm) )
-
-#define	mmx_fetch(mem,hint) \
-	__asm__ __volatile__ ("prefetch" #hint " %0" \
-			      : /* nothing */ \
-			      : "m" (mem))
-
-
-#define	maskmovq(regs,maskreg)		mmx_r2ri (maskmovq, regs, maskreg)
-
-#define	movntq_r2m(mmreg,var)		mmx_r2m (movntq, mmreg, var)
-
-#define	pavgb_m2r(var,reg)		mmx_m2r (pavgb, var, reg)
-#define	pavgb_r2r(regs,regd)		mmx_r2r (pavgb, regs, regd)
-#define	pavgw_m2r(var,reg)		mmx_m2r (pavgw, var, reg)
-#define	pavgw_r2r(regs,regd)		mmx_r2r (pavgw, regs, regd)
-
-#define	pextrw_r2r(mmreg,reg,imm)	mmx_r2ri (pextrw, mmreg, reg, imm)
-
-#define	pinsrw_r2r(reg,mmreg,imm)	mmx_r2ri (pinsrw, reg, mmreg, imm)
-
-#define	pmaxsw_m2r(var,reg)		mmx_m2r (pmaxsw, var, reg)
-#define	pmaxsw_r2r(regs,regd)		mmx_r2r (pmaxsw, regs, regd)
-
-#define	pmaxub_m2r(var,reg)		mmx_m2r (pmaxub, var, reg)
-#define	pmaxub_r2r(regs,regd)		mmx_r2r (pmaxub, regs, regd)
-
-#define	pminsw_m2r(var,reg)		mmx_m2r (pminsw, var, reg)
-#define	pminsw_r2r(regs,regd)		mmx_r2r (pminsw, regs, regd)
-
-#define	pminub_m2r(var,reg)		mmx_m2r (pminub, var, reg)
-#define	pminub_r2r(regs,regd)		mmx_r2r (pminub, regs, regd)
-
-#define	pmovmskb(mmreg,reg) \
-	__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
-
-#define	pmulhuw_m2r(var,reg)		mmx_m2r (pmulhuw, var, reg)
-#define	pmulhuw_r2r(regs,regd)		mmx_r2r (pmulhuw, regs, regd)
-
-#define	prefetcht0(mem)			mmx_fetch (mem, t0)
-#define	prefetcht1(mem)			mmx_fetch (mem, t1)
-#define	prefetcht2(mem)			mmx_fetch (mem, t2)
-#define	prefetchnta(mem)		mmx_fetch (mem, nta)
-
-#define	psadbw_m2r(var,reg)		mmx_m2r (psadbw, var, reg)
-#define	psadbw_r2r(regs,regd)		mmx_r2r (psadbw, regs, regd)
-
-#define	pshufw_m2r(var,reg,imm)		mmx_m2ri(pshufw, var, reg, imm)
-#define	pshufw_r2r(regs,regd,imm)	mmx_r2ri(pshufw, regs, regd, imm)
-
-#define	sfence() __asm__ __volatile__ ("sfence\n\t")
diff --git a/src/libmpeg2new/include/mpeg2.h b/src/libmpeg2new/include/mpeg2.h
deleted file mode 100644
index 6c1a3805b..000000000
--- a/src/libmpeg2new/include/mpeg2.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * mpeg2.h
- * Copyright (C) 2000-2004 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef MPEG2_H
-#define MPEG2_H
-
-#define MPEG2_VERSION(a,b,c) (((a)<<16)|((b)<<8)|(c))
-#define MPEG2_RELEASE MPEG2_VERSION (0, 4, 1)	/* 0.4.1 */
-
-#define SEQ_FLAG_MPEG2 1
-#define SEQ_FLAG_CONSTRAINED_PARAMETERS 2
-#define SEQ_FLAG_PROGRESSIVE_SEQUENCE 4
-#define SEQ_FLAG_LOW_DELAY 8
-#define SEQ_FLAG_COLOUR_DESCRIPTION 16
-
-#define SEQ_MASK_VIDEO_FORMAT 0xe0
-#define SEQ_VIDEO_FORMAT_COMPONENT 0
-#define SEQ_VIDEO_FORMAT_PAL 0x20
-#define SEQ_VIDEO_FORMAT_NTSC 0x40
-#define SEQ_VIDEO_FORMAT_SECAM 0x60
-#define SEQ_VIDEO_FORMAT_MAC 0x80
-#define SEQ_VIDEO_FORMAT_UNSPECIFIED 0xa0
-
-typedef struct mpeg2_sequence_s {
-    unsigned int width, height;
-    unsigned int chroma_width, chroma_height;
-    unsigned int byte_rate;
-    unsigned int vbv_buffer_size;
-    uint32_t flags;
-
-    unsigned int picture_width, picture_height;
-    unsigned int display_width, display_height;
-    unsigned int pixel_width, pixel_height;
-    unsigned int frame_period;
-
-    uint8_t profile_level_id;
-    uint8_t colour_primaries;
-    uint8_t transfer_characteristics;
-    uint8_t matrix_coefficients;
-} mpeg2_sequence_t;
-
-#define GOP_FLAG_DROP_FRAME 1
-#define GOP_FLAG_BROKEN_LINK 2
-#define GOP_FLAG_CLOSED_GOP 4
-
-typedef struct mpeg2_gop_s {
-    uint8_t hours;
-    uint8_t minutes;
-    uint8_t seconds;
-    uint8_t pictures;
-    uint32_t flags;
-} mpeg2_gop_t;
-
-#define PIC_MASK_CODING_TYPE 7
-#define PIC_FLAG_CODING_TYPE_I 1
-#define PIC_FLAG_CODING_TYPE_P 2
-#define PIC_FLAG_CODING_TYPE_B 3
-#define PIC_FLAG_CODING_TYPE_D 4
-
-#define PIC_FLAG_TOP_FIELD_FIRST 8
-#define PIC_FLAG_PROGRESSIVE_FRAME 16
-#define PIC_FLAG_COMPOSITE_DISPLAY 32
-#define PIC_FLAG_SKIP 64
-#define PIC_FLAG_TAGS 128
-#define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000
-
-typedef struct mpeg2_picture_s {
-    unsigned int temporal_reference;
-    unsigned int nb_fields;
-    uint32_t tag, tag2;
-    uint32_t flags;
-    struct {
-	int x, y;
-    } display_offset[3];
-} mpeg2_picture_t;
-
-typedef struct mpeg2_fbuf_s {
-    uint8_t * buf[3];
-    void * id;
-} mpeg2_fbuf_t;
-
-typedef struct mpeg2_info_s {
-    const mpeg2_sequence_t * sequence;
-    const mpeg2_gop_t * gop;
-    const mpeg2_picture_t * current_picture;
-    const mpeg2_picture_t * current_picture_2nd;
-    const mpeg2_fbuf_t * current_fbuf;
-    const mpeg2_picture_t * display_picture;
-    const mpeg2_picture_t * display_picture_2nd;
-    const mpeg2_fbuf_t * display_fbuf;
-    const mpeg2_fbuf_t * discard_fbuf;
-    const uint8_t * user_data;
-    unsigned int user_data_len;
-} mpeg2_info_t;
-
-typedef struct mpeg2dec_s mpeg2dec_t;
-typedef struct mpeg2_decoder_s mpeg2_decoder_t;
-
-typedef enum {
-    STATE_BUFFER = 0,
-    STATE_SEQUENCE = 1,
-    STATE_SEQUENCE_REPEATED = 2,
-    STATE_SEQUENCE_MODIFIED = 3,
-    STATE_GOP = 4,
-    STATE_PICTURE = 5,
-    STATE_SLICE_1ST = 6,
-    STATE_PICTURE_2ND = 7,
-    STATE_SLICE = 8,
-    STATE_END = 9,
-    STATE_INVALID = 10,
-    STATE_INVALID_END = 11
-} mpeg2_state_t;
-
-typedef struct mpeg2_convert_init_s {
-    unsigned int id_size;
-    unsigned int buf_size[3];
-    void (* start) (void * id, const mpeg2_fbuf_t * fbuf,
-		    const mpeg2_picture_t * picture, const mpeg2_gop_t * gop);
-    void (* copy) (void * id, uint8_t * const * src, unsigned int v_offset);
-} mpeg2_convert_init_t;
-typedef enum {
-    MPEG2_CONVERT_SET = 0,
-    MPEG2_CONVERT_STRIDE = 1,
-    MPEG2_CONVERT_START = 2
-} mpeg2_convert_stage_t;
-typedef int mpeg2_convert_t (int stage, void * id,
-			     const mpeg2_sequence_t * sequence, int stride,
-			     uint32_t accel, void * arg,
-			     mpeg2_convert_init_t * result);
-int mpeg2_convert (mpeg2dec_t * mpeg2dec, mpeg2_convert_t convert, void * arg);
-int mpeg2_stride (mpeg2dec_t * mpeg2dec, int stride);
-void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id);
-void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf);
-
-#define MPEG2_ACCEL_X86_MMX 1
-#define MPEG2_ACCEL_X86_3DNOW 2
-#define MPEG2_ACCEL_X86_MMXEXT 4
-#define MPEG2_ACCEL_X86_SSE2 8
-#define MPEG2_ACCEL_X86_SSE3 16
-#define MPEG2_ACCEL_PPC_ALTIVEC 1
-#define MPEG2_ACCEL_ALPHA 1
-#define MPEG2_ACCEL_ALPHA_MVI 2
-#define MPEG2_ACCEL_SPARC_VIS 1
-#define MPEG2_ACCEL_SPARC_VIS2 2
-#define MPEG2_ACCEL_DETECT 0x80000000
-
-uint32_t mpeg2_accel (uint32_t accel);
-mpeg2dec_t * mpeg2_init (void);
-const mpeg2_info_t * mpeg2_info (mpeg2dec_t * mpeg2dec);
-void mpeg2_close (mpeg2dec_t * mpeg2dec);
-
-void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end);
-int mpeg2_getpos (mpeg2dec_t * mpeg2dec);
-mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec);
-
-void mpeg2_reset (mpeg2dec_t * mpeg2dec, int full_reset);
-void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip);
-void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end);
-
-void mpeg2_tag_picture (mpeg2dec_t * mpeg2dec, uint32_t tag, uint32_t tag2);
-
-void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3],
-		      uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]);
-void mpeg2_slice (mpeg2_decoder_t * decoder, int code, const uint8_t * buffer);
-int mpeg2_guess_aspect (const mpeg2_sequence_t * sequence,
-			unsigned int * pixel_width,
-			unsigned int * pixel_height);
-
-typedef enum {
-    MPEG2_ALLOC_MPEG2DEC = 0,
-    MPEG2_ALLOC_CHUNK = 1,
-    MPEG2_ALLOC_YUV = 2,
-    MPEG2_ALLOC_CONVERT_ID = 3,
-    MPEG2_ALLOC_CONVERTED = 4
-} mpeg2_alloc_t;
-
-void * mpeg2_malloc (unsigned size, mpeg2_alloc_t reason);
-void mpeg2_free (void * buf);
-void mpeg2_malloc_hooks (void * malloc (unsigned, mpeg2_alloc_t),
-			 int free (void *));
-
-#endif /* MPEG2_H */
diff --git a/src/libmpeg2new/include/mpeg2convert.h b/src/libmpeg2new/include/mpeg2convert.h
deleted file mode 100644
index aac5d1991..000000000
--- a/src/libmpeg2new/include/mpeg2convert.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * mpeg2convert.h
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef MPEG2CONVERT_H
-#define MPEG2CONVERT_H
-
-mpeg2_convert_t mpeg2convert_rgb32;
-mpeg2_convert_t mpeg2convert_rgb24;
-mpeg2_convert_t mpeg2convert_rgb16;
-mpeg2_convert_t mpeg2convert_rgb15;
-mpeg2_convert_t mpeg2convert_rgb8;
-mpeg2_convert_t mpeg2convert_bgr32;
-mpeg2_convert_t mpeg2convert_bgr24;
-mpeg2_convert_t mpeg2convert_bgr16;
-mpeg2_convert_t mpeg2convert_bgr15;
-mpeg2_convert_t mpeg2convert_bgr8;
-
-typedef enum {
-    MPEG2CONVERT_RGB = 0,
-    MPEG2CONVERT_BGR = 1
-} mpeg2convert_rgb_order_t;
-
-mpeg2_convert_t * mpeg2convert_rgb (mpeg2convert_rgb_order_t order,
-				    unsigned int bpp);
-
-mpeg2_convert_t mpeg2convert_uyvy;
-
-#endif /* MPEG2CONVERT_H */
diff --git a/src/libmpeg2new/include/sse.h b/src/libmpeg2new/include/sse.h
deleted file mode 100644
index 4bd853f8b..000000000
--- a/src/libmpeg2new/include/sse.h
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * sse.h
- * Copyright (C) 1999-2003 R. Fisher
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-typedef	union {
-	float			sf[4];	/* Single-precision (32-bit) value */
-} ATTR_ALIGN(16) sse_t;	/* On a 16 byte (128-bit) boundary */
-
-
-#define	sse_i2r(op, imm, reg) \
-	__asm__ __volatile__ (#op " %0, %%" #reg \
-			      : /* nothing */ \
-			      : "X" (imm) )
-
-#define	sse_m2r(op, mem, reg) \
-	__asm__ __volatile__ (#op " %0, %%" #reg \
-			      : /* nothing */ \
-			      : "X" (mem))
-
-#define	sse_r2m(op, reg, mem) \
-	__asm__ __volatile__ (#op " %%" #reg ", %0" \
-			      : "=X" (mem) \
-			      : /* nothing */ )
-
-#define	sse_r2r(op, regs, regd) \
-	__asm__ __volatile__ (#op " %" #regs ", %" #regd)
-
-#define	sse_r2ri(op, regs, regd, imm) \
-	__asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
-			      : /* nothing */ \
-			      : "X" (imm) )
-
-#define	sse_m2ri(op, mem, reg, subop) \
-	__asm__ __volatile__ (#op " %0, %%" #reg ", " #subop \
-			      : /* nothing */ \
-			      : "X" (mem))
-
-
-#define	movaps_m2r(var, reg)	sse_m2r(movaps, var, reg)
-#define	movaps_r2m(reg, var)	sse_r2m(movaps, reg, var)
-#define	movaps_r2r(regs, regd)	sse_r2r(movaps, regs, regd)
-
-#define	movntps_r2m(xmmreg, var)	sse_r2m(movntps, xmmreg, var)
-
-#define	movups_m2r(var, reg)	sse_m2r(movups, var, reg)
-#define	movups_r2m(reg, var)	sse_r2m(movups, reg, var)
-#define	movups_r2r(regs, regd)	sse_r2r(movups, regs, regd)
-
-#define	movhlps_r2r(regs, regd)	sse_r2r(movhlps, regs, regd)
-
-#define	movlhps_r2r(regs, regd)	sse_r2r(movlhps, regs, regd)
-
-#define	movhps_m2r(var, reg)	sse_m2r(movhps, var, reg)
-#define	movhps_r2m(reg, var)	sse_r2m(movhps, reg, var)
-
-#define	movlps_m2r(var, reg)	sse_m2r(movlps, var, reg)
-#define	movlps_r2m(reg, var)	sse_r2m(movlps, reg, var)
-
-#define	movss_m2r(var, reg)	sse_m2r(movss, var, reg)
-#define	movss_r2m(reg, var)	sse_r2m(movss, reg, var)
-#define	movss_r2r(regs, regd)	sse_r2r(movss, regs, regd)
-
-#define	shufps_m2r(var, reg, index)	sse_m2ri(shufps, var, reg, index)
-#define	shufps_r2r(regs, regd, index)	sse_r2ri(shufps, regs, regd, index)
-
-#define	cvtpi2ps_m2r(var, xmmreg)	sse_m2r(cvtpi2ps, var, xmmreg)
-#define	cvtpi2ps_r2r(mmreg, xmmreg)	sse_r2r(cvtpi2ps, mmreg, xmmreg)
-
-#define	cvtps2pi_m2r(var, mmreg)	sse_m2r(cvtps2pi, var, mmreg)
-#define	cvtps2pi_r2r(xmmreg, mmreg)	sse_r2r(cvtps2pi, mmreg, xmmreg)
-
-#define	cvttps2pi_m2r(var, mmreg)	sse_m2r(cvttps2pi, var, mmreg)
-#define	cvttps2pi_r2r(xmmreg, mmreg)	sse_r2r(cvttps2pi, mmreg, xmmreg)
-
-#define	cvtsi2ss_m2r(var, xmmreg)	sse_m2r(cvtsi2ss, var, xmmreg)
-#define	cvtsi2ss_r2r(reg, xmmreg)	sse_r2r(cvtsi2ss, reg, xmmreg)
-
-#define	cvtss2si_m2r(var, reg)		sse_m2r(cvtss2si, var, reg)
-#define	cvtss2si_r2r(xmmreg, reg)	sse_r2r(cvtss2si, xmmreg, reg)
-
-#define	cvttss2si_m2r(var, reg)		sse_m2r(cvtss2si, var, reg)
-#define	cvttss2si_r2r(xmmreg, reg)	sse_r2r(cvtss2si, xmmreg, reg)
-
-#define	movmskps(xmmreg, reg) \
-	__asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg)
-
-#define	addps_m2r(var, reg)		sse_m2r(addps, var, reg)
-#define	addps_r2r(regs, regd)		sse_r2r(addps, regs, regd)
-
-#define	addss_m2r(var, reg)		sse_m2r(addss, var, reg)
-#define	addss_r2r(regs, regd)		sse_r2r(addss, regs, regd)
-
-#define	subps_m2r(var, reg)		sse_m2r(subps, var, reg)
-#define	subps_r2r(regs, regd)		sse_r2r(subps, regs, regd)
-
-#define	subss_m2r(var, reg)		sse_m2r(subss, var, reg)
-#define	subss_r2r(regs, regd)		sse_r2r(subss, regs, regd)
-
-#define	mulps_m2r(var, reg)		sse_m2r(mulps, var, reg)
-#define	mulps_r2r(regs, regd)		sse_r2r(mulps, regs, regd)
-
-#define	mulss_m2r(var, reg)		sse_m2r(mulss, var, reg)
-#define	mulss_r2r(regs, regd)		sse_r2r(mulss, regs, regd)
-
-#define	divps_m2r(var, reg)		sse_m2r(divps, var, reg)
-#define	divps_r2r(regs, regd)		sse_r2r(divps, regs, regd)
-
-#define	divss_m2r(var, reg)		sse_m2r(divss, var, reg)
-#define	divss_r2r(regs, regd)		sse_r2r(divss, regs, regd)
-
-#define	rcpps_m2r(var, reg)		sse_m2r(rcpps, var, reg)
-#define	rcpps_r2r(regs, regd)		sse_r2r(rcpps, regs, regd)
-
-#define	rcpss_m2r(var, reg)		sse_m2r(rcpss, var, reg)
-#define	rcpss_r2r(regs, regd)		sse_r2r(rcpss, regs, regd)
-
-#define	rsqrtps_m2r(var, reg)		sse_m2r(rsqrtps, var, reg)
-#define	rsqrtps_r2r(regs, regd)		sse_r2r(rsqrtps, regs, regd)
-
-#define	rsqrtss_m2r(var, reg)		sse_m2r(rsqrtss, var, reg)
-#define	rsqrtss_r2r(regs, regd)		sse_r2r(rsqrtss, regs, regd)
-
-#define	sqrtps_m2r(var, reg)		sse_m2r(sqrtps, var, reg)
-#define	sqrtps_r2r(regs, regd)		sse_r2r(sqrtps, regs, regd)
-
-#define	sqrtss_m2r(var, reg)		sse_m2r(sqrtss, var, reg)
-#define	sqrtss_r2r(regs, regd)		sse_r2r(sqrtss, regs, regd)
-
-#define	andps_m2r(var, reg)		sse_m2r(andps, var, reg)
-#define	andps_r2r(regs, regd)		sse_r2r(andps, regs, regd)
-
-#define	andnps_m2r(var, reg)		sse_m2r(andnps, var, reg)
-#define	andnps_r2r(regs, regd)		sse_r2r(andnps, regs, regd)
-
-#define	orps_m2r(var, reg)		sse_m2r(orps, var, reg)
-#define	orps_r2r(regs, regd)		sse_r2r(orps, regs, regd)
-
-#define	xorps_m2r(var, reg)		sse_m2r(xorps, var, reg)
-#define	xorps_r2r(regs, regd)		sse_r2r(xorps, regs, regd)
-
-#define	maxps_m2r(var, reg)		sse_m2r(maxps, var, reg)
-#define	maxps_r2r(regs, regd)		sse_r2r(maxps, regs, regd)
-
-#define	maxss_m2r(var, reg)		sse_m2r(maxss, var, reg)
-#define	maxss_r2r(regs, regd)		sse_r2r(maxss, regs, regd)
-
-#define	minps_m2r(var, reg)		sse_m2r(minps, var, reg)
-#define	minps_r2r(regs, regd)		sse_r2r(minps, regs, regd)
-
-#define	minss_m2r(var, reg)		sse_m2r(minss, var, reg)
-#define	minss_r2r(regs, regd)		sse_r2r(minss, regs, regd)
-
-#define	cmpps_m2r(var, reg, op)		sse_m2ri(cmpps, var, reg, op)
-#define	cmpps_r2r(regs, regd, op)	sse_r2ri(cmpps, regs, regd, op)
-
-#define	cmpeqps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 0)
-#define	cmpeqps_r2r(regs, regd)		sse_r2ri(cmpps, regs, regd, 0)
-
-#define	cmpltps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 1)
-#define	cmpltps_r2r(regs, regd)		sse_r2ri(cmpps, regs, regd, 1)
-
-#define	cmpleps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 2)
-#define	cmpleps_r2r(regs, regd)		sse_r2ri(cmpps, regs, regd, 2)
-
-#define	cmpunordps_m2r(var, reg)	sse_m2ri(cmpps, var, reg, 3)
-#define	cmpunordps_r2r(regs, regd)	sse_r2ri(cmpps, regs, regd, 3)
-
-#define	cmpneqps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 4)
-#define	cmpneqps_r2r(regs, regd)	sse_r2ri(cmpps, regs, regd, 4)
-
-#define	cmpnltps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 5)
-#define	cmpnltps_r2r(regs, regd)	sse_r2ri(cmpps, regs, regd, 5)
-
-#define	cmpnleps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 6)
-#define	cmpnleps_r2r(regs, regd)	sse_r2ri(cmpps, regs, regd, 6)
-
-#define	cmpordps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 7)
-#define	cmpordps_r2r(regs, regd)	sse_r2ri(cmpps, regs, regd, 7)
-
-#define	cmpss_m2r(var, reg, op)		sse_m2ri(cmpss, var, reg, op)
-#define	cmpss_r2r(regs, regd, op)	sse_r2ri(cmpss, regs, regd, op)
-
-#define	cmpeqss_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 0)
-#define	cmpeqss_r2r(regs, regd)		sse_r2ri(cmpss, regs, regd, 0)
-
-#define	cmpltss_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 1)
-#define	cmpltss_r2r(regs, regd)		sse_r2ri(cmpss, regs, regd, 1)
-
-#define	cmpless_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 2)
-#define	cmpless_r2r(regs, regd)		sse_r2ri(cmpss, regs, regd, 2)
-
-#define	cmpunordss_m2r(var, reg)	sse_m2ri(cmpss, var, reg, 3)
-#define	cmpunordss_r2r(regs, regd)	sse_r2ri(cmpss, regs, regd, 3)
-
-#define	cmpneqss_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 4)
-#define	cmpneqss_r2r(regs, regd)	sse_r2ri(cmpss, regs, regd, 4)
-
-#define	cmpnltss_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 5)
-#define	cmpnltss_r2r(regs, regd)	sse_r2ri(cmpss, regs, regd, 5)
-
-#define	cmpnless_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 6)
-#define	cmpnless_r2r(regs, regd)	sse_r2ri(cmpss, regs, regd, 6)
-
-#define	cmpordss_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 7)
-#define	cmpordss_r2r(regs, regd)	sse_r2ri(cmpss, regs, regd, 7)
-
-#define	comiss_m2r(var, reg)		sse_m2r(comiss, var, reg)
-#define	comiss_r2r(regs, regd)		sse_r2r(comiss, regs, regd)
-
-#define	ucomiss_m2r(var, reg)		sse_m2r(ucomiss, var, reg)
-#define	ucomiss_r2r(regs, regd)		sse_r2r(ucomiss, regs, regd)
-
-#define	unpcklps_m2r(var, reg)		sse_m2r(unpcklps, var, reg)
-#define	unpcklps_r2r(regs, regd)	sse_r2r(unpcklps, regs, regd)
-
-#define	unpckhps_m2r(var, reg)		sse_m2r(unpckhps, var, reg)
-#define	unpckhps_r2r(regs, regd)	sse_r2r(unpckhps, regs, regd)
-
-#define	fxrstor(mem) \
-	__asm__ __volatile__ ("fxrstor %0" \
-			      : /* nothing */ \
-			      : "X" (mem))
-
-#define	fxsave(mem) \
-	__asm__ __volatile__ ("fxsave %0" \
-			      : /* nothing */ \
-			      : "X" (mem))
-
-#define	stmxcsr(mem) \
-	__asm__ __volatile__ ("stmxcsr %0" \
-			      : /* nothing */ \
-			      : "X" (mem))
-
-#define	ldmxcsr(mem) \
-	__asm__ __volatile__ ("ldmxcsr %0" \
-			      : /* nothing */ \
-			      : "X" (mem))
-
diff --git a/src/libmpeg2new/include/tendra.h b/src/libmpeg2new/include/tendra.h
deleted file mode 100644
index 09900916a..000000000
--- a/src/libmpeg2new/include/tendra.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * tendra.h
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#pragma TenDRA begin
-#pragma TenDRA longlong type warning
-
-#ifdef TenDRA_check
-
-#pragma TenDRA conversion analysis (pointer-int explicit) off
-#pragma TenDRA implicit function declaration off
-
-/* avoid the "No declarations in translation unit" problem */
-int TenDRA;
-
-#endif /* TenDRA_check */
diff --git a/src/libmpeg2new/include/video_out.h b/src/libmpeg2new/include/video_out.h
deleted file mode 100644
index 342c55197..000000000
--- a/src/libmpeg2new/include/video_out.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * video_out.h
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-struct mpeg2_sequence_s;
-struct mpeg2_convert_init_s;
-typedef struct {
-    int (* convert) (int stage, void * id,
-		     const struct mpeg2_sequence_s * sequence,
-		     int stride, uint32_t accel, void * arg,
-		     struct mpeg2_convert_init_s * result);
-} vo_setup_result_t;
-
-typedef struct vo_instance_s vo_instance_t;
-struct vo_instance_s {
-    int (* setup) (vo_instance_t * instance, unsigned int width,
-		   unsigned int height, unsigned int chroma_width,
-		   unsigned int chroma_height, vo_setup_result_t * result);
-    void (* setup_fbuf) (vo_instance_t * instance, uint8_t ** buf, void ** id);
-    void (* set_fbuf) (vo_instance_t * instance, uint8_t ** buf, void ** id);
-    void (* start_fbuf) (vo_instance_t * instance,
-			 uint8_t * const * buf, void * id);
-    void (* draw) (vo_instance_t * instance, uint8_t * const * buf, void * id);
-    void (* discard) (vo_instance_t * instance,
-		      uint8_t * const * buf, void * id);
-    void (* close) (vo_instance_t * instance);
-};
-
-typedef vo_instance_t * vo_open_t (void);
-
-typedef struct {
-    char * name;
-    vo_open_t * open;
-} vo_driver_t;
-
-void vo_accel (uint32_t accel);
-
-/* return NULL terminated array of all drivers */
-vo_driver_t const * vo_drivers (void);
diff --git a/src/libmpeg2new/include/vis.h b/src/libmpeg2new/include/vis.h
deleted file mode 100644
index 69dd49075..000000000
--- a/src/libmpeg2new/include/vis.h
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * vis.h
- * Copyright (C) 2003 David S. Miller <davem@redhat.com>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-/* You may be asking why I hard-code the instruction opcodes and don't
- * use the normal VIS assembler mnenomics for the VIS instructions.
- *
- * The reason is that Sun, in their infinite wisdom, decided that a binary
- * using a VIS instruction will cause it to be marked (in the ELF headers)
- * as doing so, and this prevents the OS from loading such binaries if the
- * current cpu doesn't have VIS.  There is no way to easily override this
- * behavior of the assembler that I am aware of.
- *
- * This totally defeats what libmpeg2 is trying to do which is allow a
- * single binary to be created, and then detect the availability of VIS
- * at runtime.
- *
- * I'm not saying that tainting the binary by default is bad, rather I'm
- * saying that not providing a way to override this easily unnecessarily
- * ties people's hands.
- *
- * Thus, we do the opcode encoding by hand and output 32-bit words in
- * the assembler to keep the binary from becoming tainted.
- */
-
-#define vis_opc_base	((0x1 << 31) | (0x36 << 19))
-#define vis_opf(X)	((X) << 5)
-#define vis_sreg(X)	(X)
-#define vis_dreg(X)	(((X)&0x1f)|((X)>>5))
-#define vis_rs1_s(X)	(vis_sreg(X) << 14)
-#define vis_rs1_d(X)	(vis_dreg(X) << 14)
-#define vis_rs2_s(X)	(vis_sreg(X) << 0)
-#define vis_rs2_d(X)	(vis_dreg(X) << 0)
-#define vis_rd_s(X)	(vis_sreg(X) << 25)
-#define vis_rd_d(X)	(vis_dreg(X) << 25)
-
-#define vis_ss2s(opf,rs1,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
-                                       vis_rs1_s(rs1) | \
-                                       vis_rs2_s(rs2) | \
-                                       vis_rd_s(rd)))
-
-#define vis_dd2d(opf,rs1,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
-                                       vis_rs1_d(rs1) | \
-                                       vis_rs2_d(rs2) | \
-                                       vis_rd_d(rd)))
-
-#define vis_ss2d(opf,rs1,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
-                                       vis_rs1_s(rs1) | \
-                                       vis_rs2_s(rs2) | \
-                                       vis_rd_d(rd)))
-
-#define vis_sd2d(opf,rs1,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
-                                       vis_rs1_s(rs1) | \
-                                       vis_rs2_d(rs2) | \
-                                       vis_rd_d(rd)))
-
-#define vis_d2s(opf,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
-                                       vis_rs2_d(rs2) | \
-                                       vis_rd_s(rd)))
-
-#define vis_s2d(opf,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
-                                       vis_rs2_s(rs2) | \
-                                       vis_rd_d(rd)))
-
-#define vis_d12d(opf,rs1,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
-                                       vis_rs1_d(rs1) | \
-                                       vis_rd_d(rd)))
-
-#define vis_d22d(opf,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
-                                       vis_rs2_d(rs2) | \
-                                       vis_rd_d(rd)))
-
-#define vis_s12s(opf,rs1,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
-                                       vis_rs1_s(rs1) | \
-                                       vis_rd_s(rd)))
-
-#define vis_s22s(opf,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
-                                       vis_rs2_s(rs2) | \
-                                       vis_rd_s(rd)))
-
-#define vis_s(opf,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
-                                       vis_rd_s(rd)))
-
-#define vis_d(opf,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
-                                       vis_rd_d(rd)))
-
-#define vis_r2m(op,rd,mem) \
-	__asm__ __volatile__ (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) )
-
-#define vis_r2m_2(op,rd,mem1,mem2) \
-	__asm__ __volatile__ (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) )
-
-#define vis_m2r(op,mem,rd) \
-	__asm__ __volatile__ (#op "\t[%0], %%f" #rd : : "r" (&(mem)) )
-
-#define vis_m2r_2(op,mem1,mem2,rd) \
-	__asm__ __volatile__ (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) )
-
-static inline void vis_set_gsr(unsigned int _val)
-{
-	register unsigned int val asm("g1");
-
-	val = _val;
-	__asm__ __volatile__(".word 0xa7804000"
-			     : : "r" (val));
-}
-
-#define VIS_GSR_ALIGNADDR_MASK	0x0000007
-#define VIS_GSR_ALIGNADDR_SHIFT	0
-#define VIS_GSR_SCALEFACT_MASK	0x0000078
-#define VIS_GSR_SCALEFACT_SHIFT	3
-
-#define vis_ld32(mem,rs1)		vis_m2r(ld, mem, rs1)
-#define vis_ld32_2(mem1,mem2,rs1)	vis_m2r_2(ld, mem1, mem2, rs1)
-#define vis_st32(rs1,mem)		vis_r2m(st, rs1, mem)
-#define vis_st32_2(rs1,mem1,mem2)	vis_r2m_2(st, rs1, mem1, mem2)
-#define vis_ld64(mem,rs1)		vis_m2r(ldd, mem, rs1)
-#define vis_ld64_2(mem1,mem2,rs1)	vis_m2r_2(ldd, mem1, mem2, rs1)
-#define vis_st64(rs1,mem)		vis_r2m(std, rs1, mem)
-#define vis_st64_2(rs1,mem1,mem2)	vis_r2m_2(std, rs1, mem1, mem2)
-
-#define vis_ldblk(mem, rd) \
-do {	register void *__mem asm("g1"); \
-	__mem = &(mem); \
-	__asm__ __volatile__(".word 0xc1985e00 | %1" \
-			     : \
-			     : "r" (__mem), \
-			       "i" (vis_rd_d(rd)) \
-			     : "memory"); \
-} while (0)
-
-#define vis_stblk(rd, mem) \
-do {	register void *__mem asm("g1"); \
-	__mem = &(mem); \
-	__asm__ __volatile__(".word 0xc1b85e00 | %1" \
-			     : \
-			     : "r" (__mem), \
-			       "i" (vis_rd_d(rd)) \
-			     : "memory"); \
-} while (0)
-
-#define vis_membar_storestore()	\
-	__asm__ __volatile__(".word 0x8143e008" : : : "memory")
-
-#define vis_membar_sync()	\
-	__asm__ __volatile__(".word 0x8143e040" : : : "memory")
-
-/* 16 and 32 bit partitioned addition and subtraction.  The normal
- * versions perform 4 16-bit or 2 32-bit additions or subtractions.
- * The 's' versions perform 2 16-bit or 2 32-bit additions or
- * subtractions.
- */
-
-#define vis_padd16(rs1,rs2,rd)		vis_dd2d(0x50, rs1, rs2, rd)
-#define vis_padd16s(rs1,rs2,rd)		vis_ss2s(0x51, rs1, rs2, rd)
-#define vis_padd32(rs1,rs2,rd)		vis_dd2d(0x52, rs1, rs2, rd)
-#define vis_padd32s(rs1,rs2,rd)		vis_ss2s(0x53, rs1, rs2, rd)
-#define vis_psub16(rs1,rs2,rd)		vis_dd2d(0x54, rs1, rs2, rd)
-#define vis_psub16s(rs1,rs2,rd)		vis_ss2s(0x55, rs1, rs2, rd)
-#define vis_psub32(rs1,rs2,rd)		vis_dd2d(0x56, rs1, rs2, rd)
-#define vis_psub32s(rs1,rs2,rd)		vis_ss2s(0x57, rs1, rs2, rd)
-
-/* Pixel formatting instructions.  */
-
-#define vis_pack16(rs2,rd)		vis_d2s( 0x3b,      rs2, rd)
-#define vis_pack32(rs1,rs2,rd)		vis_dd2d(0x3a, rs1, rs2, rd)
-#define vis_packfix(rs2,rd)		vis_d2s( 0x3d,      rs2, rd)
-#define vis_expand(rs2,rd)		vis_s2d( 0x4d,      rs2, rd)
-#define vis_pmerge(rs1,rs2,rd)		vis_ss2d(0x4b, rs1, rs2, rd)
-
-/* Partitioned multiply instructions.  */
-
-#define vis_mul8x16(rs1,rs2,rd)		vis_sd2d(0x31, rs1, rs2, rd)
-#define vis_mul8x16au(rs1,rs2,rd)	vis_ss2d(0x33, rs1, rs2, rd)
-#define vis_mul8x16al(rs1,rs2,rd)	vis_ss2d(0x35, rs1, rs2, rd)
-#define vis_mul8sux16(rs1,rs2,rd)	vis_dd2d(0x36, rs1, rs2, rd)
-#define vis_mul8ulx16(rs1,rs2,rd)	vis_dd2d(0x37, rs1, rs2, rd)
-#define vis_muld8sux16(rs1,rs2,rd)	vis_ss2d(0x38, rs1, rs2, rd)
-#define vis_muld8ulx16(rs1,rs2,rd)	vis_ss2d(0x39, rs1, rs2, rd)
-
-/* Alignment instructions.  */
-
-static inline void *vis_alignaddr(void *_ptr)
-{
-	register void *ptr asm("g1");
-
-	ptr = _ptr;
-
-	__asm__ __volatile__(".word %2"
-			     : "=&r" (ptr)
-			     : "0" (ptr),
-			       "i" (vis_opc_base | vis_opf(0x18) |
-				    vis_rs1_s(1) |
-				    vis_rs2_s(0) |
-				    vis_rd_s(1)));
-
-	return ptr;
-}
-
-static inline void vis_alignaddr_g0(void *_ptr)
-{
-	register void *ptr asm("g1");
-
-	ptr = _ptr;
-
-	__asm__ __volatile__(".word %2"
-			     : "=&r" (ptr)
-			     : "0" (ptr),
-			       "i" (vis_opc_base | vis_opf(0x18) |
-				    vis_rs1_s(1) |
-				    vis_rs2_s(0) |
-				    vis_rd_s(0)));
-}
-
-static inline void *vis_alignaddrl(void *_ptr)
-{
-	register void *ptr asm("g1");
-
-	ptr = _ptr;
-
-	__asm__ __volatile__(".word %2"
-			     : "=&r" (ptr)
-			     : "0" (ptr),
-			       "i" (vis_opc_base | vis_opf(0x19) |
-				    vis_rs1_s(1) |
-				    vis_rs2_s(0) |
-				    vis_rd_s(1)));
-
-	return ptr;
-}
-
-static inline void vis_alignaddrl_g0(void *_ptr)
-{
-	register void *ptr asm("g1");
-
-	ptr = _ptr;
-
-	__asm__ __volatile__(".word %2"
-			     : "=&r" (ptr)
-			     : "0" (ptr),
-			       "i" (vis_opc_base | vis_opf(0x19) |
-				    vis_rs1_s(1) |
-				    vis_rs2_s(0) |
-				    vis_rd_s(0)));
-}
-
-#define vis_faligndata(rs1,rs2,rd)	vis_dd2d(0x48, rs1, rs2, rd)
-
-/* Logical operate instructions.  */
-
-#define vis_fzero(rd)			vis_d(   0x60,           rd)
-#define vis_fzeros(rd)			vis_s(   0x61,           rd)
-#define vis_fone(rd)			vis_d(   0x7e,           rd)
-#define vis_fones(rd)			vis_s(   0x7f,           rd)
-#define vis_src1(rs1,rd)		vis_d12d(0x74, rs1,      rd)
-#define vis_src1s(rs1,rd)		vis_s12s(0x75, rs1,      rd)
-#define vis_src2(rs2,rd)		vis_d22d(0x78,      rs2, rd)
-#define vis_src2s(rs2,rd)		vis_s22s(0x79,      rs2, rd)
-#define vis_not1(rs1,rd)		vis_d12d(0x6a, rs1,      rd)
-#define vis_not1s(rs1,rd)		vis_s12s(0x6b, rs1,      rd)
-#define vis_not2(rs2,rd)		vis_d22d(0x66,      rs2, rd)
-#define vis_not2s(rs2,rd)		vis_s22s(0x67,      rs2, rd)
-#define vis_or(rs1,rs2,rd)		vis_dd2d(0x7c, rs1, rs2, rd)
-#define vis_ors(rs1,rs2,rd)		vis_ss2s(0x7d, rs1, rs2, rd)
-#define vis_nor(rs1,rs2,rd)		vis_dd2d(0x62, rs1, rs2, rd)
-#define vis_nors(rs1,rs2,rd)		vis_ss2s(0x63, rs1, rs2, rd)
-#define vis_and(rs1,rs2,rd)		vis_dd2d(0x70, rs1, rs2, rd)
-#define vis_ands(rs1,rs2,rd)		vis_ss2s(0x71, rs1, rs2, rd)
-#define vis_nand(rs1,rs2,rd)		vis_dd2d(0x6e, rs1, rs2, rd)
-#define vis_nands(rs1,rs2,rd)		vis_ss2s(0x6f, rs1, rs2, rd)
-#define vis_xor(rs1,rs2,rd)		vis_dd2d(0x6c, rs1, rs2, rd)
-#define vis_xors(rs1,rs2,rd)		vis_ss2s(0x6d, rs1, rs2, rd)
-#define vis_xnor(rs1,rs2,rd)		vis_dd2d(0x72, rs1, rs2, rd)
-#define vis_xnors(rs1,rs2,rd)		vis_ss2s(0x73, rs1, rs2, rd)
-#define vis_ornot1(rs1,rs2,rd)		vis_dd2d(0x7a, rs1, rs2, rd)
-#define vis_ornot1s(rs1,rs2,rd)		vis_ss2s(0x7b, rs1, rs2, rd)
-#define vis_ornot2(rs1,rs2,rd)		vis_dd2d(0x76, rs1, rs2, rd)
-#define vis_ornot2s(rs1,rs2,rd)		vis_ss2s(0x77, rs1, rs2, rd)
-#define vis_andnot1(rs1,rs2,rd)		vis_dd2d(0x68, rs1, rs2, rd)
-#define vis_andnot1s(rs1,rs2,rd)	vis_ss2s(0x69, rs1, rs2, rd)
-#define vis_andnot2(rs1,rs2,rd)		vis_dd2d(0x64, rs1, rs2, rd)
-#define vis_andnot2s(rs1,rs2,rd)	vis_ss2s(0x65, rs1, rs2, rd)
-
-/* Pixel component distance.  */
-
-#define vis_pdist(rs1,rs2,rd)		vis_dd2d(0x3e, rs1, rs2, rd)
diff --git a/src/libmpeg2new/libmpeg2/Makefile.am b/src/libmpeg2new/libmpeg2/Makefile.am
deleted file mode 100644
index 2caa3ddc2..000000000
--- a/src/libmpeg2new/libmpeg2/Makefile.am
+++ /dev/null
@@ -1,14 +0,0 @@
-include $(top_srcdir)/misc/Makefile.common
-
-AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG)
-
-noinst_LTLIBRARIES = libmpeg2.la libmpeg2arch.la
-
-libmpeg2_la_SOURCES = alloc.c header.c decode.c slice.c motion_comp.c idct.c
-libmpeg2_la_LIBADD = libmpeg2arch.la
-
-libmpeg2arch_la_SOURCES = motion_comp_mmx.c idct_mmx.c \
-                          motion_comp_altivec.c idct_altivec.c \
-                          motion_comp_alpha.c idct_alpha.c \
-                          motion_comp_vis.c \
-                          cpu_accel.c cpu_state.c
diff --git a/src/libmpeg2new/libmpeg2/alloc.c b/src/libmpeg2new/libmpeg2/alloc.c
deleted file mode 100644
index f1a7afa1c..000000000
--- a/src/libmpeg2new/libmpeg2/alloc.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * alloc.c
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "../include/mpeg2.h"
-
-static void * (* malloc_hook) (unsigned size, mpeg2_alloc_t reason) = NULL;
-static int (* free_hook) (void * buf) = NULL;
-
-void * mpeg2_malloc (unsigned size, mpeg2_alloc_t reason)
-{
-    char * buf;
-
-    if (malloc_hook) {
-	buf = (char *) malloc_hook (size, reason);
-	if (buf)
-	    return buf;
-    }
-
-    if (size) {
-	buf = (char *) malloc (size + 63 + sizeof (void **));
-	if (buf) {
-	    char * align_buf;
-
-	    align_buf = buf + 63 + sizeof (void **);
-	    align_buf -= (long)align_buf & 63;
-	    *(((void **)align_buf) - 1) = buf;
-	    return align_buf;
-	}
-    }
-    return NULL;
-}
-
-void mpeg2_free (void * buf)
-{
-    if (free_hook && free_hook (buf))
-	return;
-
-    if (buf)
-	free (*(((void **)buf) - 1));
-}
-
-void mpeg2_malloc_hooks (void * malloc (unsigned, mpeg2_alloc_t),
-			 int free (void *))
-{
-    malloc_hook = malloc;
-    free_hook = free;
-}
diff --git a/src/libmpeg2new/libmpeg2/configure.incl b/src/libmpeg2new/libmpeg2/configure.incl
deleted file mode 100644
index f8dbd5aef..000000000
--- a/src/libmpeg2new/libmpeg2/configure.incl
+++ /dev/null
@@ -1,11 +0,0 @@
-AC_SUBST([LIBMPEG2_CFLAGS])
-
-dnl avoid -fPIC when possible
-AC_LIBTOOL_NON_PIC([LIBMPEG2_CFLAGS="$LIBMPEG2_CFLAGS -prefer-non-pic"])
-
-dnl check for cpudetect
-AC_ARG_ENABLE([accel-detect],
-    [  --disable-accel-detect  make a version without accel detection code])
-if test x"$enable_accel_detect" != x"no"; then
-    AC_DEFINE([ACCEL_DETECT],,[autodetect accelerations])
-fi
diff --git a/src/libmpeg2new/libmpeg2/convert_internal.h b/src/libmpeg2new/libmpeg2/convert_internal.h
deleted file mode 100644
index d1e63d5e3..000000000
--- a/src/libmpeg2new/libmpeg2/convert_internal.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * convert_internal.h
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-typedef struct {
-    uint8_t * rgb_ptr;
-    int width;
-    int field;
-    int y_stride, rgb_stride, y_increm, uv_increm, rgb_increm, rgb_slice;
-    int chroma420, convert420;
-    int dither_offset, dither_stride;
-    int y_stride_frame, uv_stride_frame, rgb_stride_frame, rgb_stride_min;
-} convert_rgb_t;
-
-typedef void mpeg2convert_copy_t (void * id, uint8_t * const * src,
-				  unsigned int v_offset);
-
-mpeg2convert_copy_t * mpeg2convert_rgb_mmxext (int bpp, int mode,
-					       const mpeg2_sequence_t * seq);
-mpeg2convert_copy_t * mpeg2convert_rgb_mmx (int bpp, int mode,
-					    const mpeg2_sequence_t * seq);
-mpeg2convert_copy_t * mpeg2convert_rgb_vis (int bpp, int mode,
-					    const mpeg2_sequence_t * seq);
diff --git a/src/libmpeg2new/libmpeg2/cpu_accel.c b/src/libmpeg2new/libmpeg2/cpu_accel.c
deleted file mode 100644
index 7846f1e88..000000000
--- a/src/libmpeg2new/libmpeg2/cpu_accel.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * cpu_accel.c
- * Copyright (C) 2000-2004 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#include <inttypes.h>
-
-#include "../include/mpeg2.h"
-#include "../include/attributes.h"
-#include "mpeg2_internal.h"
-
-#ifdef ARCH_X86
-static inline uint32_t arch_accel (uint32_t accel)
-{
-    if (accel & (MPEG2_ACCEL_X86_3DNOW | MPEG2_ACCEL_X86_MMXEXT))
-	accel |= MPEG2_ACCEL_X86_MMX;
-	
-    if (accel & (MPEG2_ACCEL_X86_SSE2 | MPEG2_ACCEL_X86_SSE3))
-	accel |= MPEG2_ACCEL_X86_MMXEXT;
-	
-    if (accel & (MPEG2_ACCEL_X86_SSE3))
-	accel |= MPEG2_ACCEL_X86_SSE2;
-
-#ifdef ACCEL_DETECT
-    if (accel & MPEG2_ACCEL_DETECT) {
-	uint32_t eax, ebx, ecx, edx;
-	int AMD;
-
-#if !defined(PIC) && !defined(__PIC__)
-#define cpuid(op,eax,ebx,ecx,edx)	\
-    __asm__ ("cpuid"			\
-	     : "=a" (eax),		\
-	       "=b" (ebx),		\
-	       "=c" (ecx),		\
-	       "=d" (edx)		\
-	     : "a" (op)			\
-	     : "cc")
-#else	/* PIC version : save ebx */
-#define cpuid(op,eax,ebx,ecx,edx)	\
-    __asm__ ("push %%ebx\n\t"		\
-	     "cpuid\n\t"		\
-	     "movl %%ebx,%1\n\t"	\
-	     "pop %%ebx"		\
-	     : "=a" (eax),		\
-	       "=r" (ebx),		\
-	       "=c" (ecx),		\
-	       "=d" (edx)		\
-	     : "a" (op)			\
-	     : "cc")
-#endif
-
-	__asm__ ("pushf\n\t"
-		 "pushf\n\t"
-		 "pop %0\n\t"
-		 "movl %0,%1\n\t"
-		 "xorl $0x200000,%0\n\t"
-		 "push %0\n\t"
-		 "popf\n\t"
-		 "pushf\n\t"
-		 "pop %0\n\t"
-		 "popf"
-		 : "=r" (eax),
-		 "=r" (ebx)
-		 :
-		 : "cc");
-
-	if (eax == ebx)			/* no cpuid */
-	    return accel;
-
-	cpuid (0x00000000, eax, ebx, ecx, edx);
-	if (!eax)			/* vendor string only */
-	    return accel;
-
-	AMD = (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65);
-
-	cpuid (0x00000001, eax, ebx, ecx, edx);
-	if (! (edx & 0x00800000))	/* no MMX */
-	    return accel;
-
-	accel |= MPEG2_ACCEL_X86_MMX;
-	if (edx & 0x02000000)	/* SSE - identical to AMD MMX extensions */
-	    accel |= MPEG2_ACCEL_X86_MMXEXT;
-
-	if (edx & 0x04000000)	/* SSE2 */
-	    accel |= MPEG2_ACCEL_X86_SSE2;
-	    
-	if (ecx & 0x00000001)	/* SSE3 */
-	    accel |= MPEG2_ACCEL_X86_SSE3;
-	    
-	cpuid (0x80000000, eax, ebx, ecx, edx);
-	if (eax < 0x80000001)		/* no extended capabilities */
-	    return accel;
-
-	cpuid (0x80000001, eax, ebx, ecx, edx);
-
-	if (edx & 0x80000000)
-	    accel |= MPEG2_ACCEL_X86_3DNOW;
-
-	if (AMD && (edx & 0x00400000))	/* AMD MMX extensions */
-	    accel |= MPEG2_ACCEL_X86_MMXEXT;
-    }
-#endif /* ACCEL_DETECT */
-
-    return accel;
-}
-#endif /* ARCH_X86 */
-
-#if defined(ACCEL_DETECT) && (defined(ARCH_PPC) || defined(ARCH_SPARC))
-#include <signal.h>
-#include <setjmp.h>
-
-static sigjmp_buf jmpbuf;
-static volatile sig_atomic_t canjump = 0;
-
-static RETSIGTYPE sigill_handler (int sig)
-{
-    if (!canjump) {
-	signal (sig, SIG_DFL);
-	raise (sig);
-    }
-
-    canjump = 0;
-    siglongjmp (jmpbuf, 1);
-}
-#endif /* ACCEL_DETECT && (ARCH_PPC || ARCH_SPARC) */
-
-#ifdef ARCH_PPC
-static inline uint32_t arch_accel (uint32_t accel)
-{
-#ifdef ACCEL_DETECT
-    if (accel & (MPEG2_ACCEL_PPC_ALTIVEC | MPEG2_ACCEL_DETECT) ==
-	MPEG2_ACCEL_DETECT) {
-	static RETSIGTYPE (* oldsig) (int);
-
-	oldsig = signal (SIGILL, sigill_handler);
-	if (sigsetjmp (jmpbuf, 1)) {
-	    signal (SIGILL, oldsig);
-	    return accel;
-	}
-
-	canjump = 1;
-
-#ifdef HAVE_ALTIVEC_H	/* gnu */
-#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t"
-#else			/* apple */
-#define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t"
-#endif
-	asm volatile ("mtspr 256, %0\n\t"
-		      VAND (0, 0, 0)
-		      :
-		      : "r" (-1));
-
-	canjump = 0;
-	accel |= MPEG2_ACCEL_PPC_ALTIVEC;
-
-	signal (SIGILL, oldsig);
-    }
-#endif /* ACCEL_DETECT */
-
-    return accel;
-}
-#endif /* ARCH_PPC */
-
-#ifdef ARCH_SPARC
-static inline uint32_t arch_accel (uint32_t accel)
-{
-    if (accel & MPEG2_ACCEL_SPARC_VIS2)
-	accel |= MPEG2_ACCEL_SPARC_VIS;
-
-#ifdef ACCEL_DETECT
-    if (accel & (MPEG2_ACCEL_SPARC_VIS2 | MPEG2_ACCEL_DETECT) ==
-	MPEG2_ACCEL_DETECT) {
-	static RETSIGTYPE (* oldsig) (int);
-
-	oldsig = signal (SIGILL, sigill_handler);
-	if (sigsetjmp (jmpbuf, 1)) {
-	    signal (SIGILL, oldsig);
-	    return accel;
-	}
-
-	canjump = 1;
-
-	/* pdist %f0, %f0, %f0 */
-	__asm__ __volatile__(".word\t0x81b007c0");
-
-	canjump = 0;
-	accel |= MPEG2_ACCEL_SPARC_VIS;
-
-	if (sigsetjmp (jmpbuf, 1)) {
-	    signal (SIGILL, oldsig);
-	    return accel;
-	}
-
-	canjump = 1;
-
-	/* edge8n %g0, %g0, %g0 */
-	__asm__ __volatile__(".word\t0x81b00020");
-
-	canjump = 0;
-	accel |= MPEG2_ACCEL_SPARC_VIS2;
-
-	signal (SIGILL, oldsig);
-    }
-#endif /* ACCEL_DETECT */
-
-    return accel;
-}
-#endif /* ARCH_SPARC */
-
-#ifdef ARCH_ALPHA
-static inline uint32_t arch_accel (uint32_t accel)
-{
-    if (accel & MPEG2_ACCEL_ALPHA_MVI)
-	accel |= MPEG2_ACCEL_ALPHA;
-
-#ifdef ACCEL_DETECT
-    if (accel & MPEG2_ACCEL_DETECT) {
-	uint64_t no_mvi;
-
-	asm volatile ("amask %1, %0"
-		      : "=r" (no_mvi)
-		      : "rI" (256));	/* AMASK_MVI */
-	accel |= no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA |
-					       MPEG2_ACCEL_ALPHA_MVI);
-    }
-#endif /* ACCEL_DETECT */
-
-    return accel;
-}
-#endif /* ARCH_ALPHA */
-
-uint32_t mpeg2_detect_accel (uint32_t accel)
-{
-#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC)
-    accel = arch_accel (accel);
-#endif
-    return accel;
-}
diff --git a/src/libmpeg2new/libmpeg2/cpu_state.c b/src/libmpeg2new/libmpeg2/cpu_state.c
deleted file mode 100644
index edbf2dd28..000000000
--- a/src/libmpeg2new/libmpeg2/cpu_state.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * cpu_state.c
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "../include/mpeg2.h"
-#include "../include/attributes.h"
-#include "mpeg2_internal.h"
-#ifdef ARCH_X86
-#include "../include/mmx.h"
-#endif
-
-void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL;
-void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL;
-
-#ifdef ARCH_X86
-static void state_restore_mmx (cpu_state_t * state)
-{
-    emms ();
-}
-#endif
-
-#ifdef ARCH_PPC
-#ifdef HAVE_ALTIVEC_H	/* gnu */
-#define LI(a,b) "li " #a "," #b "\n\t"
-#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t"
-#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t"
-#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t"
-#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t"
-#else			/* apple */
-#define LI(a,b) "li r" #a "," #b "\n\t"
-#define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t"
-#define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t"
-#define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t"
-#define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t"
-#endif
-
-static void state_save_altivec (cpu_state_t * state)
-{
-    asm (LI (9, 16)
-	 STVX0 (20, 0, 3)
-	 LI (11, 32)
-	 STVX (21, 9, 3)
-	 LI (9, 48)
-	 STVX (22, 11, 3)
-	 LI (11, 64)
-	 STVX (23, 9, 3)
-	 LI (9, 80)
-	 STVX (24, 11, 3)
-	 LI (11, 96)
-	 STVX (25, 9, 3)
-	 LI (9, 112)
-	 STVX (26, 11, 3)
-	 LI (11, 128)
-	 STVX (27, 9, 3)
-	 LI (9, 144)
-	 STVX (28, 11, 3)
-	 LI (11, 160)
-	 STVX (29, 9, 3)
-	 LI (9, 176)
-	 STVX (30, 11, 3)
-	 STVX (31, 9, 3));
-}
-
-static void state_restore_altivec (cpu_state_t * state)
-{
-    asm (LI (9, 16)
-	 LVX0 (20, 0, 3)
-	 LI (11, 32)
-	 LVX (21, 9, 3)
-	 LI (9, 48)
-	 LVX (22, 11, 3)
-	 LI (11, 64)
-	 LVX (23, 9, 3)
-	 LI (9, 80)
-	 LVX (24, 11, 3)
-	 LI (11, 96)
-	 LVX (25, 9, 3)
-	 LI (9, 112)
-	 LVX (26, 11, 3)
-	 LI (11, 128)
-	 LVX (27, 9, 3)
-	 LI (9, 144)
-	 LVX (28, 11, 3)
-	 LI (11, 160)
-	 LVX (29, 9, 3)
-	 LI (9, 176)
-	 LVX (30, 11, 3)
-	 LVX (31, 9, 3));
-}
-#endif
-
-void mpeg2_cpu_state_init (uint32_t accel)
-{
-#ifdef ARCH_X86
-    if (accel & MPEG2_ACCEL_X86_MMX) {
-	mpeg2_cpu_state_restore = state_restore_mmx;
-    }
-#endif
-#ifdef ARCH_PPC
-    if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
-	mpeg2_cpu_state_save = state_save_altivec;
-	mpeg2_cpu_state_restore = state_restore_altivec;
-    }
-#endif
-}
diff --git a/src/libmpeg2new/libmpeg2/decode.c b/src/libmpeg2new/libmpeg2/decode.c
deleted file mode 100644
index 337ba4466..000000000
--- a/src/libmpeg2new/libmpeg2/decode.c
+++ /dev/null
@@ -1,439 +0,0 @@
-/*
- * decode.c
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#include <string.h>	/* memcmp/memset, try to remove */
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "../include/mpeg2.h"
-#include "../include/attributes.h"
-#include "mpeg2_internal.h"
-
-static int mpeg2_accels = 0;
-
-#define BUFFER_SIZE (1194 * 1024)
-
-const mpeg2_info_t * mpeg2_info (mpeg2dec_t * mpeg2dec)
-{
-    return &(mpeg2dec->info);
-}
-
-static inline int skip_chunk (mpeg2dec_t * mpeg2dec, int bytes)
-{
-    uint8_t * current;
-    uint32_t shift;
-    uint8_t * limit;
-    uint8_t byte;
-
-    if (!bytes)
-	return 0;
-
-    current = mpeg2dec->buf_start;
-    shift = mpeg2dec->shift;
-    limit = current + bytes;
-
-    do {
-	byte = *current++;
-	if (shift == 0x00000100) {
-	    int skipped;
-
-	    mpeg2dec->shift = 0xffffff00;
-	    skipped = current - mpeg2dec->buf_start;
-	    mpeg2dec->buf_start = current;
-	    return skipped;
-	}
-	shift = (shift | byte) << 8;
-    } while (current < limit);
-
-    mpeg2dec->shift = shift;
-    mpeg2dec->buf_start = current;
-    return 0;
-}
-
-static inline int copy_chunk (mpeg2dec_t * mpeg2dec, int bytes)
-{
-    uint8_t * current;
-    uint32_t shift;
-    uint8_t * chunk_ptr;
-    uint8_t * limit;
-    uint8_t byte;
-
-    if (!bytes)
-	return 0;
-
-    current = mpeg2dec->buf_start;
-    shift = mpeg2dec->shift;
-    chunk_ptr = mpeg2dec->chunk_ptr;
-    limit = current + bytes;
-
-    do {
-	byte = *current++;
-	if (shift == 0x00000100) {
-	    int copied;
-
-	    mpeg2dec->shift = 0xffffff00;
-	    mpeg2dec->chunk_ptr = chunk_ptr + 1;
-	    copied = current - mpeg2dec->buf_start;
-	    mpeg2dec->buf_start = current;
-	    return copied;
-	}
-	shift = (shift | byte) << 8;
-	*chunk_ptr++ = byte;
-    } while (current < limit);
-
-    mpeg2dec->shift = shift;
-    mpeg2dec->buf_start = current;
-    return 0;
-}
-
-void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end)
-{
-    mpeg2dec->buf_start = start;
-    mpeg2dec->buf_end = end;
-}
-
-int mpeg2_getpos (mpeg2dec_t * mpeg2dec)
-{
-    return mpeg2dec->buf_end - mpeg2dec->buf_start;
-}
-
-static inline mpeg2_state_t seek_chunk (mpeg2dec_t * mpeg2dec)
-{
-    int size, skipped;
-
-    size = mpeg2dec->buf_end - mpeg2dec->buf_start;
-    skipped = skip_chunk (mpeg2dec, size);
-    if (!skipped) {
-	mpeg2dec->bytes_since_tag += size;
-	return STATE_BUFFER;
-    }
-    mpeg2dec->bytes_since_tag += skipped;
-    mpeg2dec->code = mpeg2dec->buf_start[-1];
-    return STATE_INTERNAL_NORETURN;
-}
-
-mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec)
-{
-    while (!(mpeg2dec->code == 0xb3 ||
-	     ((mpeg2dec->code == 0xb7 || mpeg2dec->code == 0xb8 ||
-	       !mpeg2dec->code) && mpeg2dec->sequence.width != (unsigned)-1)))
-	if (seek_chunk (mpeg2dec) == STATE_BUFFER)
-	    return STATE_BUFFER;
-    mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer;
-    mpeg2dec->user_data_len = 0;
-    return ((mpeg2dec->code == 0xb7) ?
-	    mpeg2_header_end (mpeg2dec) : mpeg2_parse_header (mpeg2dec));
-}
-
-#define RECEIVED(code,state) (((state) << 8) + (code))
-
-mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec)
-{
-    int size_buffer, size_chunk, copied;
-
-    if (mpeg2dec->action) {
-	mpeg2_state_t state;
-
-	state = mpeg2dec->action (mpeg2dec);
-	if ((int)state > (int)STATE_INTERNAL_NORETURN)
-	    return state;
-    }
-
-    while (1) {
-	while ((unsigned) (mpeg2dec->code - mpeg2dec->first_decode_slice) <
-	       mpeg2dec->nb_decode_slices) {
-	    size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start;
-	    size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE -
-			  mpeg2dec->chunk_ptr);
-	    if (size_buffer <= size_chunk) {
-		copied = copy_chunk (mpeg2dec, size_buffer);
-		if (!copied) {
-		    mpeg2dec->bytes_since_tag += size_buffer;
-		    mpeg2dec->chunk_ptr += size_buffer;
-		    return STATE_BUFFER;
-		}
-	    } else {
-		copied = copy_chunk (mpeg2dec, size_chunk);
-		if (!copied) {
-		    /* filled the chunk buffer without finding a start code */
-		    mpeg2dec->bytes_since_tag += size_chunk;
-		    mpeg2dec->action = seek_chunk;
-		    return STATE_INVALID;
-		}
-	    }
-	    mpeg2dec->bytes_since_tag += copied;
-
-	    mpeg2_slice (&(mpeg2dec->decoder), mpeg2dec->code,
-			 mpeg2dec->chunk_start);
-	    mpeg2dec->code = mpeg2dec->buf_start[-1];
-	    mpeg2dec->chunk_ptr = mpeg2dec->chunk_start;
-	}
-	if ((unsigned) (mpeg2dec->code - 1) >= 0xb0 - 1)
-	    break;
-	if (seek_chunk (mpeg2dec) == STATE_BUFFER)
-	    return STATE_BUFFER;
-    }
-
-    mpeg2dec->action = mpeg2_seek_header;
-    switch (mpeg2dec->code) {
-    case 0x00:
-	return mpeg2dec->state;
-    case 0xb3:
-    case 0xb7:
-    case 0xb8:
-	return (mpeg2dec->state == STATE_SLICE) ? STATE_SLICE : STATE_INVALID;
-    default:
-	mpeg2dec->action = seek_chunk;
-	return STATE_INVALID;
-    }
-}
-
-mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec)
-{
-    static int (* process_header[]) (mpeg2dec_t * mpeg2dec) = {
-	mpeg2_header_picture, mpeg2_header_extension, mpeg2_header_user_data,
-	mpeg2_header_sequence, NULL, NULL, NULL, NULL, mpeg2_header_gop
-    };
-    int size_buffer, size_chunk, copied;
-
-    mpeg2dec->action = mpeg2_parse_header;
-    mpeg2dec->info.user_data = NULL;	mpeg2dec->info.user_data_len = 0;
-    while (1) {
-	size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start;
-	size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE -
-		      mpeg2dec->chunk_ptr);
-	if (size_buffer <= size_chunk) {
-	    copied = copy_chunk (mpeg2dec, size_buffer);
-	    if (!copied) {
-		mpeg2dec->bytes_since_tag += size_buffer;
-		mpeg2dec->chunk_ptr += size_buffer;
-		return STATE_BUFFER;
-	    }
-	} else {
-	    copied = copy_chunk (mpeg2dec, size_chunk);
-	    if (!copied) {
-		/* filled the chunk buffer without finding a start code */
-		mpeg2dec->bytes_since_tag += size_chunk;
-		mpeg2dec->code = 0xb4;
-		mpeg2dec->action = mpeg2_seek_header;
-		return STATE_INVALID;
-	    }
-	}
-	mpeg2dec->bytes_since_tag += copied;
-
-	if (process_header[mpeg2dec->code & 0x0b] (mpeg2dec)) {
-	    mpeg2dec->code = mpeg2dec->buf_start[-1];
-	    mpeg2dec->action = mpeg2_seek_header;
-	    return STATE_INVALID;
-	}
-
-	mpeg2dec->code = mpeg2dec->buf_start[-1];
-	switch (RECEIVED (mpeg2dec->code, mpeg2dec->state)) {
-
-	/* state transition after a sequence header */
-	case RECEIVED (0x00, STATE_SEQUENCE):
-	case RECEIVED (0xb8, STATE_SEQUENCE):
-	    mpeg2_header_sequence_finalize (mpeg2dec);
-	    break;
-
-	/* other legal state transitions */
-	case RECEIVED (0x00, STATE_GOP):
-	    mpeg2_header_gop_finalize (mpeg2dec);
-	    break;
-	case RECEIVED (0x01, STATE_PICTURE):
-	case RECEIVED (0x01, STATE_PICTURE_2ND):
-	    mpeg2_header_picture_finalize (mpeg2dec, mpeg2_accels);
-	    mpeg2dec->action = mpeg2_header_slice_start;
-	    break;
-
-	/* legal headers within a given state */
-	case RECEIVED (0xb2, STATE_SEQUENCE):
-	case RECEIVED (0xb2, STATE_GOP):
-	case RECEIVED (0xb2, STATE_PICTURE):
-	case RECEIVED (0xb2, STATE_PICTURE_2ND):
-	case RECEIVED (0xb5, STATE_SEQUENCE):
-	case RECEIVED (0xb5, STATE_PICTURE):
-	case RECEIVED (0xb5, STATE_PICTURE_2ND):
-	    mpeg2dec->chunk_ptr = mpeg2dec->chunk_start;
-	    continue;
-
-	default:
-	    mpeg2dec->action = mpeg2_seek_header;
-	    return STATE_INVALID;
-	}
-
-	mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer;
-	mpeg2dec->user_data_len = 0;
-	return mpeg2dec->state;
-    }
-}
-
-int mpeg2_convert (mpeg2dec_t * mpeg2dec, mpeg2_convert_t convert, void * arg)
-{
-    mpeg2_convert_init_t convert_init;
-    int error;
-
-    error = convert (MPEG2_CONVERT_SET, NULL, &(mpeg2dec->sequence), 0,
-		     mpeg2_accels, arg, &convert_init);
-    if (!error) {
-	mpeg2dec->convert = convert;
-	mpeg2dec->convert_arg = arg;
-	mpeg2dec->convert_id_size = convert_init.id_size;
-	mpeg2dec->convert_stride = 0;
-    }
-    return error;
-}
-
-int mpeg2_stride (mpeg2dec_t * mpeg2dec, int stride)
-{
-    if (!mpeg2dec->convert) {
-	if (stride < (int) mpeg2dec->sequence.width)
-	    stride = mpeg2dec->sequence.width;
-	mpeg2dec->decoder.stride_frame = stride;
-    } else {
-	mpeg2_convert_init_t convert_init;
-
-	stride = mpeg2dec->convert (MPEG2_CONVERT_STRIDE, NULL,
-				    &(mpeg2dec->sequence), stride,
-				    mpeg2_accels, mpeg2dec->convert_arg,
-				    &convert_init);
-	mpeg2dec->convert_id_size = convert_init.id_size;
-	mpeg2dec->convert_stride = stride;
-    }
-    return stride;
-}
-
-void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id)
-{
-    mpeg2_fbuf_t * fbuf;
-
-    if (mpeg2dec->custom_fbuf) {
-	if (mpeg2dec->state == STATE_SEQUENCE) {
-	    mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1];
-	    mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0];
-	}
-	mpeg2_set_fbuf (mpeg2dec, (mpeg2dec->decoder.coding_type ==
-				   PIC_FLAG_CODING_TYPE_B));
-	fbuf = mpeg2dec->fbuf[0];
-    } else {
-	fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index].fbuf);
-	mpeg2dec->alloc_index_user = ++mpeg2dec->alloc_index;
-    }
-    fbuf->buf[0] = buf[0];
-    fbuf->buf[1] = buf[1];
-    fbuf->buf[2] = buf[2];
-    fbuf->id = id;
-}
-
-void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf)
-{
-    mpeg2dec->custom_fbuf = custom_fbuf;
-}
-
-void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip)
-{
-    mpeg2dec->first_decode_slice = 1;
-    mpeg2dec->nb_decode_slices = skip ? 0 : (0xb0 - 1);
-}
-
-void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end)
-{
-    start = (start < 1) ? 1 : (start > 0xb0) ? 0xb0 : start;
-    end = (end < start) ? start : (end > 0xb0) ? 0xb0 : end;
-    mpeg2dec->first_decode_slice = start;
-    mpeg2dec->nb_decode_slices = end - start;
-}
-
-void mpeg2_tag_picture (mpeg2dec_t * mpeg2dec, uint32_t tag, uint32_t tag2)
-{
-    mpeg2dec->tag_previous = mpeg2dec->tag_current;
-    mpeg2dec->tag2_previous = mpeg2dec->tag2_current;
-    mpeg2dec->tag_current = tag;
-    mpeg2dec->tag2_current = tag2;
-    mpeg2dec->num_tags++;
-    mpeg2dec->bytes_since_tag = 0;
-}
-
-uint32_t mpeg2_accel (uint32_t accel)
-{
-    if (!mpeg2_accels) {
-	mpeg2_accels = mpeg2_detect_accel (accel) | MPEG2_ACCEL_DETECT;
-	mpeg2_cpu_state_init (mpeg2_accels);
-	mpeg2_idct_init (mpeg2_accels);
-	mpeg2_mc_init (mpeg2_accels);
-    }
-    return mpeg2_accels & ~MPEG2_ACCEL_DETECT;
-}
-
-void mpeg2_reset (mpeg2dec_t * mpeg2dec, int full_reset)
-{
-    mpeg2dec->buf_start = mpeg2dec->buf_end = NULL;
-    mpeg2dec->num_tags = 0;
-    mpeg2dec->shift = 0xffffff00;
-    mpeg2dec->code = 0xb4;
-    mpeg2dec->action = mpeg2_seek_header;
-    mpeg2dec->state = STATE_INVALID;
-    mpeg2dec->first = 1;
-
-    mpeg2_reset_info(&(mpeg2dec->info));
-    mpeg2dec->info.gop = NULL;
-    mpeg2dec->info.user_data = NULL;
-    mpeg2dec->info.user_data_len = 0;
-    if (full_reset) {
-	mpeg2dec->info.sequence = NULL;
-	mpeg2_header_state_init (mpeg2dec);
-    }
-
-}
-
-mpeg2dec_t * mpeg2_init (void)
-{
-    mpeg2dec_t * mpeg2dec;
-
-    mpeg2_accel (MPEG2_ACCEL_DETECT);
-
-    mpeg2dec = (mpeg2dec_t *) mpeg2_malloc (sizeof (mpeg2dec_t),
-					    MPEG2_ALLOC_MPEG2DEC);
-    if (mpeg2dec == NULL)
-	return NULL;
-
-    memset (mpeg2dec->decoder.DCTblock, 0, 64 * sizeof (int16_t));
-    memset (mpeg2dec->quantizer_matrix, 0, 4 * 64 * sizeof (uint8_t));
-
-    mpeg2dec->chunk_buffer = (uint8_t *) mpeg2_malloc (BUFFER_SIZE + 4,
-						       MPEG2_ALLOC_CHUNK);
-
-    mpeg2dec->sequence.width = (unsigned)-1;
-    mpeg2_reset (mpeg2dec, 1);
-
-    return mpeg2dec;
-}
-
-void mpeg2_close (mpeg2dec_t * mpeg2dec)
-{
-    mpeg2_header_state_init (mpeg2dec);
-    mpeg2_free (mpeg2dec->chunk_buffer);
-    mpeg2_free (mpeg2dec);
-}
diff --git a/src/libmpeg2new/libmpeg2/header.c b/src/libmpeg2new/libmpeg2/header.c
deleted file mode 100644
index 935a50aa3..000000000
--- a/src/libmpeg2new/libmpeg2/header.c
+++ /dev/null
@@ -1,961 +0,0 @@
-/*
- * header.c
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 2003      Regis Duchesne <hpreg@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#include <inttypes.h>
-#include <stdlib.h>	/* defines NULL */
-#include <string.h>	/* memcmp */
-
-#include "../include/mpeg2.h"
-#include "../include/attributes.h"
-#include "mpeg2_internal.h"
-
-#define SEQ_EXT 2
-#define SEQ_DISPLAY_EXT 4
-#define QUANT_MATRIX_EXT 8
-#define COPYRIGHT_EXT 0x10
-#define PIC_DISPLAY_EXT 0x80
-#define PIC_CODING_EXT 0x100
-
-/* default intra quant matrix, in zig-zag order */
-static const uint8_t default_intra_quantizer_matrix[64] ATTR_ALIGN(16) = {
-    8,
-    16, 16,
-    19, 16, 19,
-    22, 22, 22, 22,
-    22, 22, 26, 24, 26,
-    27, 27, 27, 26, 26, 26,
-    26, 27, 27, 27, 29, 29, 29,
-    34, 34, 34, 29, 29, 29, 27, 27,
-    29, 29, 32, 32, 34, 34, 37,
-    38, 37, 35, 35, 34, 35,
-    38, 38, 40, 40, 40,
-    48, 48, 46, 46,
-    56, 56, 58,
-    69, 69,
-    83
-};
-
-uint8_t mpeg2_scan_norm[64] ATTR_ALIGN(16) = {
-    /* Zig-Zag scan pattern */
-     0,  1,  8, 16,  9,  2,  3, 10, 17, 24, 32, 25, 18, 11,  4,  5,
-    12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13,  6,  7, 14, 21, 28,
-    35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
-    58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
-};
-
-uint8_t mpeg2_scan_alt[64] ATTR_ALIGN(16) = {
-    /* Alternate scan pattern */
-     0, 8,  16, 24,  1,  9,  2, 10, 17, 25, 32, 40, 48, 56, 57, 49,
-    41, 33, 26, 18,  3, 11,  4, 12, 19, 27, 34, 42, 50, 58, 35, 43,
-    51, 59, 20, 28,  5, 13,  6, 14, 21, 29, 36, 44, 52, 60, 37, 45,
-    53, 61, 22, 30,  7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63
-};
-
-void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec)
-{
-    if (mpeg2dec->sequence.width != (unsigned)-1) {
-	int i;
-
-	mpeg2dec->sequence.width = (unsigned)-1;
-	if (!mpeg2dec->custom_fbuf)
-	    for (i = mpeg2dec->alloc_index_user;
-		 i < mpeg2dec->alloc_index; i++) {
-		mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[0]);
-		mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[1]);
-		mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[2]);
-	    }
-	if (mpeg2dec->convert_start)
-	    for (i = 0; i < 3; i++) {
-		mpeg2_free (mpeg2dec->yuv_buf[i][0]);
-		mpeg2_free (mpeg2dec->yuv_buf[i][1]);
-		mpeg2_free (mpeg2dec->yuv_buf[i][2]);
-	    }
-	if (mpeg2dec->decoder.convert_id)
-	    mpeg2_free (mpeg2dec->decoder.convert_id);
-    }
-    mpeg2dec->decoder.coding_type = I_TYPE;
-    mpeg2dec->decoder.convert = NULL;
-    mpeg2dec->decoder.convert_id = NULL;
-    mpeg2dec->picture = mpeg2dec->pictures;
-    mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf;
-    mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf;
-    mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf;
-    mpeg2dec->first = 1;
-    mpeg2dec->alloc_index = 0;
-    mpeg2dec->alloc_index_user = 0;
-    mpeg2dec->first_decode_slice = 1;
-    mpeg2dec->nb_decode_slices = 0xb0 - 1;
-    mpeg2dec->convert = NULL;
-    mpeg2dec->convert_start = NULL;
-    mpeg2dec->custom_fbuf = 0;
-    mpeg2dec->yuv_index = 0;
-}
-
-void mpeg2_reset_info (mpeg2_info_t * info)
-{
-    info->current_picture = info->current_picture_2nd = NULL;
-    info->display_picture = info->display_picture_2nd = NULL;
-    info->current_fbuf = info->display_fbuf = info->discard_fbuf = NULL;
-}
-
-static void info_user_data (mpeg2dec_t * mpeg2dec)
-{
-    if (mpeg2dec->user_data_len) {
-	mpeg2dec->info.user_data = mpeg2dec->chunk_buffer;
-	mpeg2dec->info.user_data_len = mpeg2dec->user_data_len - 3;
-    }
-}
-
-int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec)
-{
-    uint8_t * buffer = mpeg2dec->chunk_start;
-    mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
-    static unsigned int frame_period[16] = {
-	0, 1126125, 1125000, 1080000, 900900, 900000, 540000, 450450, 450000,
-	/* unofficial: xing 15 fps */
-	1800000,
-	/* unofficial: libmpeg3 "Unofficial economy rates" 5/10/12/15 fps */
-	5400000, 2700000, 2250000, 1800000, 0, 0
-    };
-    int i;
-
-    if ((buffer[6] & 0x20) != 0x20)	/* missing marker_bit */
-	return 1;
-
-    i = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2];
-    if (! (sequence->display_width = sequence->picture_width = i >> 12))
-	return 1;
-    if (! (sequence->display_height = sequence->picture_height = i & 0xfff))
-	return 1;
-    sequence->width = (sequence->picture_width + 15) & ~15;
-    sequence->height = (sequence->picture_height + 15) & ~15;
-    sequence->chroma_width = sequence->width >> 1;
-    sequence->chroma_height = sequence->height >> 1;
-
-    sequence->flags = (SEQ_FLAG_PROGRESSIVE_SEQUENCE |
-		       SEQ_VIDEO_FORMAT_UNSPECIFIED);
-
-    sequence->pixel_width = buffer[3] >> 4;	/* aspect ratio */
-    sequence->frame_period = frame_period[buffer[3] & 15];
-
-    sequence->byte_rate = (buffer[4]<<10) | (buffer[5]<<2) | (buffer[6]>>6);
-
-    sequence->vbv_buffer_size = ((buffer[6]<<16)|(buffer[7]<<8))&0x1ff800;
-
-    if (buffer[7] & 4)
-	sequence->flags |= SEQ_FLAG_CONSTRAINED_PARAMETERS;
-
-    mpeg2dec->copy_matrix = 3;
-    if (buffer[7] & 2) {
-	for (i = 0; i < 64; i++)
-	    mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] =
-		(buffer[i+7] << 7) | (buffer[i+8] >> 1);
-	buffer += 64;
-    } else
-	for (i = 0; i < 64; i++)
-	    mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] =
-		default_intra_quantizer_matrix[i];
-
-    if (buffer[7] & 1)
-	for (i = 0; i < 64; i++)
-	    mpeg2dec->new_quantizer_matrix[1][mpeg2_scan_norm[i]] =
-		buffer[i+8];
-    else
-	memset (mpeg2dec->new_quantizer_matrix[1], 16, 64);
-
-    sequence->profile_level_id = 0x80;
-    sequence->colour_primaries = 0;
-    sequence->transfer_characteristics = 0;
-    sequence->matrix_coefficients = 0;
-
-    mpeg2dec->ext_state = SEQ_EXT;
-    mpeg2dec->state = STATE_SEQUENCE;
-    mpeg2dec->display_offset_x = mpeg2dec->display_offset_y = 0;
-
-    return 0;
-}
-
-static int sequence_ext (mpeg2dec_t * mpeg2dec)
-{
-    uint8_t * buffer = mpeg2dec->chunk_start;
-    mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
-    uint32_t flags;
-
-    if (!(buffer[3] & 1))
-	return 1;
-
-    sequence->profile_level_id = (buffer[0] << 4) | (buffer[1] >> 4);
-
-    sequence->display_width = sequence->picture_width +=
-	((buffer[1] << 13) | (buffer[2] << 5)) & 0x3000;
-    sequence->display_height = sequence->picture_height +=
-	(buffer[2] << 7) & 0x3000;
-    sequence->width = (sequence->picture_width + 15) & ~15;
-    sequence->height = (sequence->picture_height + 15) & ~15;
-    flags = sequence->flags | SEQ_FLAG_MPEG2;
-    if (!(buffer[1] & 8)) {
-	flags &= ~SEQ_FLAG_PROGRESSIVE_SEQUENCE;
-	sequence->height = (sequence->height + 31) & ~31;
-    }
-    if (buffer[5] & 0x80)
-	flags |= SEQ_FLAG_LOW_DELAY;
-    sequence->flags = flags;
-    sequence->chroma_width = sequence->width;
-    sequence->chroma_height = sequence->height;
-    switch (buffer[1] & 6) {
-    case 0:	/* invalid */
-	return 1;
-    case 2:	/* 4:2:0 */
-	sequence->chroma_height >>= 1;
-    case 4:	/* 4:2:2 */
-	sequence->chroma_width >>= 1;
-    }
-
-    sequence->byte_rate += ((buffer[2]<<25) | (buffer[3]<<17)) & 0x3ffc0000;
-
-    sequence->vbv_buffer_size |= buffer[4] << 21;
-
-    sequence->frame_period =
-	sequence->frame_period * ((buffer[5]&31)+1) / (((buffer[5]>>2)&3)+1);
-
-    mpeg2dec->ext_state = SEQ_DISPLAY_EXT;
-
-    return 0;
-}
-
-static int sequence_display_ext (mpeg2dec_t * mpeg2dec)
-{
-    uint8_t * buffer = mpeg2dec->chunk_start;
-    mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
-
-    sequence->flags = ((sequence->flags & ~SEQ_MASK_VIDEO_FORMAT) |
-		       ((buffer[0]<<4) & SEQ_MASK_VIDEO_FORMAT));
-    if (buffer[0] & 1) {
-	sequence->flags |= SEQ_FLAG_COLOUR_DESCRIPTION;
-	sequence->colour_primaries = buffer[1];
-	sequence->transfer_characteristics = buffer[2];
-	sequence->matrix_coefficients = buffer[3];
-	buffer += 3;
-    }
-
-    if (!(buffer[2] & 2))	/* missing marker_bit */
-	return 1;
-
-    sequence->display_width = (buffer[1] << 6) | (buffer[2] >> 2);
-    sequence->display_height =
-	((buffer[2]& 1 ) << 13) | (buffer[3] << 5) | (buffer[4] >> 3);
-
-    return 0;
-}
-
-static inline void simplify (unsigned int * u, unsigned int * v)
-{
-    unsigned int a, b, tmp;
-
-    a = *u;	b = *v;
-    while (a) {	/* find greatest common divisor */
-	tmp = a;	a = b % tmp;	b = tmp;
-    }
-    *u /= b;	*v /= b;
-}
-
-static inline void finalize_sequence (mpeg2_sequence_t * sequence)
-{
-    int width;
-    int height;
-
-    sequence->byte_rate *= 50;
-
-    if (sequence->flags & SEQ_FLAG_MPEG2) {
-	switch (sequence->pixel_width) {
-	case 1:		/* square pixels */
-	    sequence->pixel_width = sequence->pixel_height = 1;	return;
-	case 2:		/* 4:3 aspect ratio */
-	    width = 4; height = 3;	break;
-	case 3:		/* 16:9 aspect ratio */
-	    width = 16; height = 9;	break;
-	case 4:		/* 2.21:1 aspect ratio */
-	    width = 221; height = 100;	break;
-	default:	/* illegal */
-	    sequence->pixel_width = sequence->pixel_height = 0;	return;
-	}
-	width *= sequence->display_height;
-	height *= sequence->display_width;
-
-    } else {
-	if (sequence->byte_rate == 50 * 0x3ffff) 
-	    sequence->byte_rate = 0;        /* mpeg-1 VBR */ 
-
-	switch (sequence->pixel_width) {
-	case 0:	case 15:	/* illegal */
-	    sequence->pixel_width = sequence->pixel_height = 0;		return;
-	case 1:	/* square pixels */
-	    sequence->pixel_width = sequence->pixel_height = 1;		return;
-	case 3:	/* 720x576 16:9 */
-	    sequence->pixel_width = 64;	sequence->pixel_height = 45;	return;
-	case 6:	/* 720x480 16:9 */
-	    sequence->pixel_width = 32;	sequence->pixel_height = 27;	return;
-	case 8: /* BT.601 625 lines 4:3 */
-	    sequence->pixel_width = 59;	sequence->pixel_height = 54;	return;
-	case 12: /* BT.601 525 lines 4:3 */
-	    sequence->pixel_width = 10;	sequence->pixel_height = 11;	return;
-	default:
-	    height = 88 * sequence->pixel_width + 1171;
-	    width = 2000;
-	}
-    }
-
-    sequence->pixel_width = width;
-    sequence->pixel_height = height;
-    simplify (&sequence->pixel_width, &sequence->pixel_height);
-}
-
-int mpeg2_guess_aspect (const mpeg2_sequence_t * sequence,
-			unsigned int * pixel_width,
-			unsigned int * pixel_height)
-{
-    static struct {
-	unsigned int width, height;
-    } video_modes[] = {
-	{720, 576}, /* 625 lines, 13.5 MHz (D1, DV, DVB, DVD) */
-	{704, 576}, /* 625 lines, 13.5 MHz (1/1 D1, DVB, DVD, 4CIF) */
-	{544, 576}, /* 625 lines, 10.125 MHz (DVB, laserdisc) */
-	{528, 576}, /* 625 lines, 10.125 MHz (3/4 D1, DVB, laserdisc) */
-	{480, 576}, /* 625 lines, 9 MHz (2/3 D1, DVB, SVCD) */
-	{352, 576}, /* 625 lines, 6.75 MHz (D2, 1/2 D1, CVD, DVB, DVD) */
-	{352, 288}, /* 625 lines, 6.75 MHz, 1 field (D4, VCD, DVB, DVD, CIF) */
-	{176, 144}, /* 625 lines, 3.375 MHz, half field (QCIF) */
-	{720, 486}, /* 525 lines, 13.5 MHz (D1) */
-	{704, 486}, /* 525 lines, 13.5 MHz */
-	{720, 480}, /* 525 lines, 13.5 MHz (DV, DSS, DVD) */
-	{704, 480}, /* 525 lines, 13.5 MHz (1/1 D1, ATSC, DVD) */
-	{544, 480}, /* 525 lines. 10.125 MHz (DSS, laserdisc) */
-	{528, 480}, /* 525 lines. 10.125 MHz (3/4 D1, laserdisc) */
-	{480, 480}, /* 525 lines, 9 MHz (2/3 D1, SVCD) */
-	{352, 480}, /* 525 lines, 6.75 MHz (D2, 1/2 D1, CVD, DVD) */
-	{352, 240}  /* 525  lines. 6.75 MHz, 1 field (D4, VCD, DSS, DVD) */
-    };
-    unsigned int width, height, pix_width, pix_height, i, DAR_16_9;
-
-    *pixel_width = sequence->pixel_width;
-    *pixel_height = sequence->pixel_height;
-    width = sequence->picture_width;
-    height = sequence->picture_height;
-    for (i = 0; i < sizeof (video_modes) / sizeof (video_modes[0]); i++)
-	if (width == video_modes[i].width && height == video_modes[i].height)
-	    break;
-    if (i == sizeof (video_modes) / sizeof (video_modes[0]) ||
-	(sequence->pixel_width == 1 && sequence->pixel_height == 1) ||
-	width != sequence->display_width || height != sequence->display_height)
-	return 0;
-
-    for (pix_height = 1; height * pix_height < 480; pix_height <<= 1);
-    height *= pix_height;
-    for (pix_width = 1; width * pix_width <= 352; pix_width <<= 1);
-    width *= pix_width;
-
-    if (! (sequence->flags & SEQ_FLAG_MPEG2)) {
-	static unsigned int mpeg1_check[2][2] = {{11, 54}, {27, 45}};
-	DAR_16_9 = (sequence->pixel_height == 27 ||
-		    sequence->pixel_height == 45);
-	if (width < 704 ||
-	    sequence->pixel_height != mpeg1_check[DAR_16_9][height == 576])
-	    return 0;
-    } else {
-	DAR_16_9 = (3 * sequence->picture_width * sequence->pixel_width >
-		    4 * sequence->picture_height * sequence->pixel_height);
-	switch (width) {
-	case 528: case 544:	pix_width *= 4; pix_height *= 3; break;
-	case 480:		pix_width *= 3; pix_height *= 2; break;
-	}
-    }
-    if (DAR_16_9) {
-	pix_width *= 4; pix_height *= 3;
-    }
-    if (height == 576) {
-	pix_width *= 59; pix_height *= 54;
-    } else {
-	pix_width *= 10; pix_height *= 11;
-    }
-    *pixel_width = pix_width;
-    *pixel_height = pix_height;
-    simplify (pixel_width, pixel_height);
-    return (height == 576) ? 1 : 2;
-}
-
-static void copy_matrix (mpeg2dec_t * mpeg2dec, int index)
-{
-    if (memcmp (mpeg2dec->quantizer_matrix[index],
-		mpeg2dec->new_quantizer_matrix[index], 64)) {
-	memcpy (mpeg2dec->quantizer_matrix[index],
-		mpeg2dec->new_quantizer_matrix[index], 64);
-	mpeg2dec->scaled[index] = -1;
-    }
-}
-
-static void finalize_matrix (mpeg2dec_t * mpeg2dec)
-{
-    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
-    int i;
-
-    for (i = 0; i < 2; i++) {
-	if (mpeg2dec->copy_matrix & (1 << i))
-	    copy_matrix (mpeg2dec, i);
-	if ((mpeg2dec->copy_matrix & (4 << i)) &&
-	    memcmp (mpeg2dec->quantizer_matrix[i],
-		    mpeg2dec->new_quantizer_matrix[i+2], 64)) {
-	    copy_matrix (mpeg2dec, i + 2);
-	    decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i+2];
-	} else if (mpeg2dec->copy_matrix & (5 << i))
-	    decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i];
-    }
-}
-
-static mpeg2_state_t invalid_end_action (mpeg2dec_t * mpeg2dec)
-{
-    mpeg2_reset_info (&(mpeg2dec->info));
-    mpeg2dec->info.gop = NULL;
-    info_user_data (mpeg2dec);
-    mpeg2_header_state_init (mpeg2dec);
-    mpeg2dec->sequence = mpeg2dec->new_sequence;
-    mpeg2dec->action = mpeg2_seek_header;
-    mpeg2dec->state = STATE_SEQUENCE;
-    return STATE_SEQUENCE;
-}
-
-void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec)
-{
-    mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
-    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
-
-    finalize_sequence (sequence);
-    finalize_matrix (mpeg2dec);
-
-    decoder->mpeg1 = !(sequence->flags & SEQ_FLAG_MPEG2);
-    decoder->width = sequence->width;
-    decoder->height = sequence->height;
-    decoder->vertical_position_extension = (sequence->picture_height > 2800);
-    decoder->chroma_format = ((sequence->chroma_width == sequence->width) +
-			      (sequence->chroma_height == sequence->height));
-
-    if (mpeg2dec->sequence.width != (unsigned)-1) {
-	/*
-	 * According to 6.1.1.6, repeat sequence headers should be
-	 * identical to the original. However some encoders dont
-	 * respect that and change various fields (including bitrate
-	 * and aspect ratio) in the repeat sequence headers. So we
-	 * choose to be as conservative as possible and only restart
-	 * the decoder if the width, height, chroma_width,
-	 * chroma_height or low_delay flag are modified.
-	 */
-	if (sequence->width != mpeg2dec->sequence.width ||
-	    sequence->height != mpeg2dec->sequence.height ||
-	    sequence->chroma_width != mpeg2dec->sequence.chroma_width ||
-	    sequence->chroma_height != mpeg2dec->sequence.chroma_height ||
-	    ((sequence->flags ^ mpeg2dec->sequence.flags) &
-	     SEQ_FLAG_LOW_DELAY)) {
-	    decoder->stride_frame = sequence->width;
-	    mpeg2_header_end (mpeg2dec);
-	    mpeg2dec->action = invalid_end_action;
-	    mpeg2dec->state = STATE_INVALID_END;
-	    return;
-	}
-	mpeg2dec->state = (memcmp (&(mpeg2dec->sequence), sequence,
-				   sizeof (mpeg2_sequence_t)) ?
-			   STATE_SEQUENCE_MODIFIED : STATE_SEQUENCE_REPEATED);
-    } else
-	decoder->stride_frame = sequence->width;
-    mpeg2dec->sequence = *sequence;
-    mpeg2_reset_info (&(mpeg2dec->info));
-    mpeg2dec->info.sequence = &(mpeg2dec->sequence);
-    mpeg2dec->info.gop = NULL;
-    info_user_data (mpeg2dec);
-}
-
-int mpeg2_header_gop (mpeg2dec_t * mpeg2dec)
-{
-    uint8_t * buffer = mpeg2dec->chunk_start;
-    mpeg2_gop_t * gop = &(mpeg2dec->new_gop);
-
-    if (! (buffer[1] & 8))
-	return 1;
-    gop->hours = (buffer[0] >> 2) & 31;
-    gop->minutes = ((buffer[0] << 4) | (buffer[1] >> 4)) & 63;
-    gop->seconds = ((buffer[1] << 3) | (buffer[2] >> 5)) & 63;
-    gop->pictures = ((buffer[2] << 1) | (buffer[3] >> 7)) & 63;
-    gop->flags = (buffer[0] >> 7) | ((buffer[3] >> 4) & 6);
-    mpeg2dec->state = STATE_GOP;
-    return 0;
-}
-
-void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec)
-{
-    mpeg2dec->gop = mpeg2dec->new_gop;
-    mpeg2_reset_info (&(mpeg2dec->info));
-    mpeg2dec->info.gop = &(mpeg2dec->gop);
-    info_user_data (mpeg2dec);
-}
-
-void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type)
-{
-    int i;
-
-    for (i = 0; i < 3; i++)
-	if (mpeg2dec->fbuf[1] != &mpeg2dec->fbuf_alloc[i].fbuf &&
-	    mpeg2dec->fbuf[2] != &mpeg2dec->fbuf_alloc[i].fbuf) {
-	    mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[i].fbuf;
-	    mpeg2dec->info.current_fbuf = mpeg2dec->fbuf[0];
-	    if (b_type || (mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) {
-		if (b_type || mpeg2dec->convert)
-		    mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0];
-		mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0];
-	    }
-	    break;
-	}
-}
-
-int mpeg2_header_picture (mpeg2dec_t * mpeg2dec)
-{
-    uint8_t * buffer = mpeg2dec->chunk_start;
-    mpeg2_picture_t * picture = &(mpeg2dec->new_picture);
-    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
-    int type;
-
-    mpeg2dec->state = ((mpeg2dec->state != STATE_SLICE_1ST) ?
-		       STATE_PICTURE : STATE_PICTURE_2ND);
-    mpeg2dec->ext_state = PIC_CODING_EXT;
-
-    picture->temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6);
-
-    type = (buffer [1] >> 3) & 7;
-    if (type == PIC_FLAG_CODING_TYPE_P || type == PIC_FLAG_CODING_TYPE_B) {
-	/* forward_f_code and backward_f_code - used in mpeg1 only */
-	decoder->f_motion.f_code[1] = (buffer[3] >> 2) & 1;
-	decoder->f_motion.f_code[0] =
-	    (((buffer[3] << 1) | (buffer[4] >> 7)) & 7) - 1;
-	decoder->b_motion.f_code[1] = (buffer[4] >> 6) & 1;
-	decoder->b_motion.f_code[0] = ((buffer[4] >> 3) & 7) - 1;
-    }
-
-    picture->flags = PIC_FLAG_PROGRESSIVE_FRAME | type;
-    picture->tag = picture->tag2 = 0;
-    if (mpeg2dec->num_tags) {
-	if (mpeg2dec->bytes_since_tag >= mpeg2dec->chunk_ptr - buffer + 4) {
-	    mpeg2dec->num_tags = 0;
-	    picture->tag = mpeg2dec->tag_current;
-	    picture->tag2 = mpeg2dec->tag2_current;
-	    picture->flags |= PIC_FLAG_TAGS;
-	} else if (mpeg2dec->num_tags > 1) {
-	    mpeg2dec->num_tags = 1;
-	    picture->tag = mpeg2dec->tag_previous;
-	    picture->tag2 = mpeg2dec->tag2_previous;
-	    picture->flags |= PIC_FLAG_TAGS;
-	}
-    }
-    picture->nb_fields = 2;
-    picture->display_offset[0].x = picture->display_offset[1].x =
-	picture->display_offset[2].x = mpeg2dec->display_offset_x;
-    picture->display_offset[0].y = picture->display_offset[1].y =
-	picture->display_offset[2].y = mpeg2dec->display_offset_y;
-
-    /* XXXXXX decode extra_information_picture as well */
-
-    mpeg2dec->q_scale_type = 0;
-    decoder->intra_dc_precision = 7;
-    decoder->frame_pred_frame_dct = 1;
-    decoder->concealment_motion_vectors = 0;
-    decoder->scan = mpeg2_scan_norm;
-    decoder->picture_structure = FRAME_PICTURE;
-    mpeg2dec->copy_matrix = 0;
-
-    return 0;
-}
-
-static int picture_coding_ext (mpeg2dec_t * mpeg2dec)
-{
-    uint8_t * buffer = mpeg2dec->chunk_start;
-    mpeg2_picture_t * picture = &(mpeg2dec->new_picture);
-    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
-    uint32_t flags;
-
-    /* pre subtract 1 for use later in compute_motion_vector */
-    decoder->f_motion.f_code[0] = (buffer[0] & 15) - 1;
-    decoder->f_motion.f_code[1] = (buffer[1] >> 4) - 1;
-    decoder->b_motion.f_code[0] = (buffer[1] & 15) - 1;
-    decoder->b_motion.f_code[1] = (buffer[2] >> 4) - 1;
-
-    flags = picture->flags;
-    decoder->intra_dc_precision = 7 - ((buffer[2] >> 2) & 3);
-    decoder->picture_structure = buffer[2] & 3;
-    switch (decoder->picture_structure) {
-    case TOP_FIELD:
-	flags |= PIC_FLAG_TOP_FIELD_FIRST;
-    case BOTTOM_FIELD:
-	picture->nb_fields = 1;
-	break;
-    case FRAME_PICTURE:
-	if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) {
-	    picture->nb_fields = (buffer[3] & 2) ? 3 : 2;
-	    flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0;
-	} else
-	    picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2;
-	break;
-    default:
-	return 1;
-    }
-    decoder->top_field_first = buffer[3] >> 7;
-    decoder->frame_pred_frame_dct = (buffer[3] >> 6) & 1;
-    decoder->concealment_motion_vectors = (buffer[3] >> 5) & 1;
-    mpeg2dec->q_scale_type = buffer[3] & 16;
-    decoder->intra_vlc_format = (buffer[3] >> 3) & 1;
-    decoder->scan = (buffer[3] & 4) ? mpeg2_scan_alt : mpeg2_scan_norm;
-    if (!(buffer[4] & 0x80))
-	flags &= ~PIC_FLAG_PROGRESSIVE_FRAME;
-    if (buffer[4] & 0x40)
-	flags |= (((buffer[4]<<26) | (buffer[5]<<18) | (buffer[6]<<10)) &
-		  PIC_MASK_COMPOSITE_DISPLAY) | PIC_FLAG_COMPOSITE_DISPLAY;
-    picture->flags = flags;
-
-    mpeg2dec->ext_state = PIC_DISPLAY_EXT | COPYRIGHT_EXT | QUANT_MATRIX_EXT;
-
-    return 0;
-}
-
-static int picture_display_ext (mpeg2dec_t * mpeg2dec)
-{
-    uint8_t * buffer = mpeg2dec->chunk_start;
-    mpeg2_picture_t * picture = &(mpeg2dec->new_picture);
-    int i, nb_pos;
-
-    nb_pos = picture->nb_fields;
-    if (mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)
-	nb_pos >>= 1;
-
-    for (i = 0; i < nb_pos; i++) {
-	int x, y;
-
-	x = ((buffer[4*i] << 24) | (buffer[4*i+1] << 16) |
-	     (buffer[4*i+2] << 8) | buffer[4*i+3]) >> (11-2*i);
-	y = ((buffer[4*i+2] << 24) | (buffer[4*i+3] << 16) |
-	     (buffer[4*i+4] << 8) | buffer[4*i+5]) >> (10-2*i);
-	if (! (x & y & 1))
-	    return 1;
-	picture->display_offset[i].x = mpeg2dec->display_offset_x = x >> 1;
-	picture->display_offset[i].y = mpeg2dec->display_offset_y = y >> 1;
-    }
-    for (; i < 3; i++) {
-	picture->display_offset[i].x = mpeg2dec->display_offset_x;
-	picture->display_offset[i].y = mpeg2dec->display_offset_y;
-    }
-    return 0;
-}
-
-void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels)
-{
-    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
-    int old_type_b = (decoder->coding_type == B_TYPE);
-    int low_delay = mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY;
-
-    finalize_matrix (mpeg2dec);
-    decoder->coding_type = mpeg2dec->new_picture.flags & PIC_MASK_CODING_TYPE;
-
-    if (mpeg2dec->state == STATE_PICTURE) {
-	mpeg2_picture_t * picture;
-	mpeg2_picture_t * other;
-
-	decoder->second_field = 0;
-
-	picture = other = mpeg2dec->pictures;
-	if (old_type_b ^ (mpeg2dec->picture < mpeg2dec->pictures + 2))
-	    picture += 2;
-	else
-	    other += 2;
-	mpeg2dec->picture = picture;
-	*picture = mpeg2dec->new_picture;
-
-	if (!old_type_b) {
-	    mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1];
-	    mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0];
-	}
-	mpeg2dec->fbuf[0] = NULL;
-	mpeg2_reset_info (&(mpeg2dec->info));
-	mpeg2dec->info.current_picture = picture;
-	mpeg2dec->info.display_picture = picture;
-	if (decoder->coding_type != B_TYPE) {
-	    if (!low_delay) {
-		if (mpeg2dec->first) {
-		    mpeg2dec->info.display_picture = NULL;
-		    mpeg2dec->first = 0;
-		} else {
-		    mpeg2dec->info.display_picture = other;
-		    if (other->nb_fields == 1)
-			mpeg2dec->info.display_picture_2nd = other + 1;
-		    mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1];
-		}
-	    }
-	    if (!low_delay + !mpeg2dec->convert)
-		mpeg2dec->info.discard_fbuf =
-		    mpeg2dec->fbuf[!low_delay + !mpeg2dec->convert];
-	}
-	if (mpeg2dec->convert) {
-	    mpeg2_convert_init_t convert_init;
-	    if (!mpeg2dec->convert_start) {
-		int y_size, uv_size;
-
-		mpeg2dec->decoder.convert_id =
-		    mpeg2_malloc (mpeg2dec->convert_id_size,
-				  MPEG2_ALLOC_CONVERT_ID);
-		mpeg2dec->convert (MPEG2_CONVERT_START,
-				   mpeg2dec->decoder.convert_id,
-				   &(mpeg2dec->sequence),
-				   mpeg2dec->convert_stride, accels,
-				   mpeg2dec->convert_arg, &convert_init);
-		mpeg2dec->convert_start = convert_init.start;
-		mpeg2dec->decoder.convert = convert_init.copy;
-
-		y_size = decoder->stride_frame * mpeg2dec->sequence.height;
-		uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format);
-		mpeg2dec->yuv_buf[0][0] =
-		    (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV);
-		mpeg2dec->yuv_buf[0][1] =
-		    (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
-		mpeg2dec->yuv_buf[0][2] =
-		    (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
-		mpeg2dec->yuv_buf[1][0] =
-		    (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV);
-		mpeg2dec->yuv_buf[1][1] =
-		    (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
-		mpeg2dec->yuv_buf[1][2] =
-		    (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
-		y_size = decoder->stride_frame * 32;
-		uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format);
-		mpeg2dec->yuv_buf[2][0] =
-		    (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV);
-		mpeg2dec->yuv_buf[2][1] =
-		    (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
-		mpeg2dec->yuv_buf[2][2] =
-		    (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
-	    }
-	    if (!mpeg2dec->custom_fbuf) {
-		while (mpeg2dec->alloc_index < 3) {
-		    mpeg2_fbuf_t * fbuf;
-
-		    fbuf = &mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf;
-		    fbuf->id = NULL;
-		    fbuf->buf[0] =
-			(uint8_t *) mpeg2_malloc (convert_init.buf_size[0],
-						  MPEG2_ALLOC_CONVERTED);
-		    fbuf->buf[1] =
-			(uint8_t *) mpeg2_malloc (convert_init.buf_size[1],
-						  MPEG2_ALLOC_CONVERTED);
-		    fbuf->buf[2] =
-			(uint8_t *) mpeg2_malloc (convert_init.buf_size[2],
-						  MPEG2_ALLOC_CONVERTED);
-		}
-		mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE));
-	    }
-	} else if (!mpeg2dec->custom_fbuf) {
-	    while (mpeg2dec->alloc_index < 3) {
-		mpeg2_fbuf_t * fbuf;
-		int y_size, uv_size;
-
-		fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf);
-		fbuf->id = NULL;
-		y_size = decoder->stride_frame * mpeg2dec->sequence.height;
-		uv_size = y_size >> (2 - decoder->chroma_format);
-		fbuf->buf[0] = (uint8_t *) mpeg2_malloc (y_size,
-							 MPEG2_ALLOC_YUV);
-		fbuf->buf[1] = (uint8_t *) mpeg2_malloc (uv_size,
-							 MPEG2_ALLOC_YUV);
-		fbuf->buf[2] = (uint8_t *) mpeg2_malloc (uv_size,
-							 MPEG2_ALLOC_YUV);
-	    }
-	    mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE));
-	}
-    } else {
-	decoder->second_field = 1;
-	mpeg2dec->picture++;	/* second field picture */
-	*(mpeg2dec->picture) = mpeg2dec->new_picture;
-	mpeg2dec->info.current_picture_2nd = mpeg2dec->picture;
-	if (low_delay || decoder->coding_type == B_TYPE)
-	    mpeg2dec->info.display_picture_2nd = mpeg2dec->picture;
-    }
-
-    info_user_data (mpeg2dec);
-}
-
-static int copyright_ext (mpeg2dec_t * mpeg2dec)
-{
-    return 0;
-}
-
-static int quant_matrix_ext (mpeg2dec_t * mpeg2dec)
-{
-    uint8_t * buffer = mpeg2dec->chunk_start;
-    int i, j;
-
-    for (i = 0; i < 4; i++)
-	if (buffer[0] & (8 >> i)) {
-	    for (j = 0; j < 64; j++)
-		mpeg2dec->new_quantizer_matrix[i][mpeg2_scan_norm[j]] =
-		    (buffer[j] << (i+5)) | (buffer[j+1] >> (3-i));
-	    mpeg2dec->copy_matrix |= 1 << i;
-	    buffer += 64;
-	}
-
-    return 0;
-}
-
-int mpeg2_header_extension (mpeg2dec_t * mpeg2dec)
-{
-    static int (* parser[]) (mpeg2dec_t *) = {
-	0, sequence_ext, sequence_display_ext, quant_matrix_ext,
-	copyright_ext, 0, 0, picture_display_ext, picture_coding_ext
-    };
-    int ext, ext_bit;
-
-    ext = mpeg2dec->chunk_start[0] >> 4;
-    ext_bit = 1 << ext;
-
-    if (!(mpeg2dec->ext_state & ext_bit))
-	return 0;	/* ignore illegal extensions */
-    mpeg2dec->ext_state &= ~ext_bit;
-    return parser[ext] (mpeg2dec);
-}
-
-int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec)
-{
-    mpeg2dec->user_data_len += mpeg2dec->chunk_ptr - 1 - mpeg2dec->chunk_start;
-    mpeg2dec->chunk_start = mpeg2dec->chunk_ptr - 1;
-    
-    return 0;
-}
-
-static void prescale (mpeg2dec_t * mpeg2dec, int index)
-{
-    static int non_linear_scale [] = {
-	 0,  1,  2,  3,  4,  5,   6,   7,
-	 8, 10, 12, 14, 16, 18,  20,  22,
-	24, 28, 32, 36, 40, 44,  48,  52,
-	56, 64, 72, 80, 88, 96, 104, 112
-    };
-    int i, j, k;
-    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
-
-    if (mpeg2dec->scaled[index] != mpeg2dec->q_scale_type) {
-	mpeg2dec->scaled[index] = mpeg2dec->q_scale_type;
-	for (i = 0; i < 32; i++) {
-	    k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1);
-	    for (j = 0; j < 64; j++)
-		decoder->quantizer_prescale[index][i][j] =
-		    k * mpeg2dec->quantizer_matrix[index][j];
-	}
-    }
-}
-
-mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec)
-{
-    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
-
-    mpeg2dec->info.user_data = NULL;	mpeg2dec->info.user_data_len = 0;
-    mpeg2dec->state = ((mpeg2dec->picture->nb_fields > 1 ||
-			mpeg2dec->state == STATE_PICTURE_2ND) ?
-		       STATE_SLICE : STATE_SLICE_1ST);
-
-    if (mpeg2dec->decoder.coding_type != D_TYPE) {
-	prescale (mpeg2dec, 0);
-	if (decoder->chroma_quantizer[0] == decoder->quantizer_prescale[2])
-	    prescale (mpeg2dec, 2);
-	if (mpeg2dec->decoder.coding_type != I_TYPE) {
-	    prescale (mpeg2dec, 1);
-	    if (decoder->chroma_quantizer[1] == decoder->quantizer_prescale[3])
-		prescale (mpeg2dec, 3);
-	}
-    }
-
-    if (!(mpeg2dec->nb_decode_slices))
-	mpeg2dec->picture->flags |= PIC_FLAG_SKIP;
-    else if (mpeg2dec->convert_start) {
-	mpeg2dec->convert_start (decoder->convert_id, mpeg2dec->fbuf[0],
-				 mpeg2dec->picture, mpeg2dec->info.gop);
-
-	if (mpeg2dec->decoder.coding_type == B_TYPE)
-	    mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->yuv_buf[2],
-			     mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1],
-			     mpeg2dec->yuv_buf[mpeg2dec->yuv_index]);
-	else {
-	    mpeg2_init_fbuf (&(mpeg2dec->decoder),
-			     mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1],
-			     mpeg2dec->yuv_buf[mpeg2dec->yuv_index],
-			     mpeg2dec->yuv_buf[mpeg2dec->yuv_index]);
-	    if (mpeg2dec->state == STATE_SLICE)
-		mpeg2dec->yuv_index ^= 1;
-	}
-    } else {
-	int b_type;
-
-	b_type = (mpeg2dec->decoder.coding_type == B_TYPE);
-	mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->fbuf[0]->buf,
-			 mpeg2dec->fbuf[b_type + 1]->buf,
-			 mpeg2dec->fbuf[b_type]->buf);
-    }
-    mpeg2dec->action = NULL;
-    return STATE_INTERNAL_NORETURN;
-}
-
-static mpeg2_state_t seek_sequence (mpeg2dec_t * mpeg2dec)
-{
-    mpeg2_reset_info (&(mpeg2dec->info));
-    mpeg2dec->info.sequence = NULL;
-    mpeg2dec->info.gop = NULL;
-    mpeg2_header_state_init (mpeg2dec);
-    mpeg2dec->action = mpeg2_seek_header;
-    return mpeg2_seek_header (mpeg2dec);
-}
-
-mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec)
-{
-    mpeg2_picture_t * picture;
-    int b_type;
-
-    b_type = (mpeg2dec->decoder.coding_type == B_TYPE);
-    picture = mpeg2dec->pictures;
-    if ((mpeg2dec->picture >= picture + 2) ^ b_type)
-	picture = mpeg2dec->pictures + 2;
-
-    mpeg2_reset_info (&(mpeg2dec->info));
-    if (!(mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) {
-	mpeg2dec->info.display_picture = picture;
-	if (picture->nb_fields == 1)
-	    mpeg2dec->info.display_picture_2nd = picture + 1;
-	mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[b_type];
-	if (!mpeg2dec->convert)
-	    mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type + 1];
-    } else if (!mpeg2dec->convert)
-	mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type];
-    mpeg2dec->action = seek_sequence;
-    return STATE_END;
-}
diff --git a/src/libmpeg2new/libmpeg2/idct.c b/src/libmpeg2new/libmpeg2/idct.c
deleted file mode 100644
index 8b982bb33..000000000
--- a/src/libmpeg2new/libmpeg2/idct.c
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * idct.c
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "../include/mpeg2.h"
-#include "../include/attributes.h"
-#include "mpeg2_internal.h"
-
-#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */
-#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */
-#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */
-#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */
-#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */
-#define W7 565  /* 2048 * sqrt (2) * cos (7 * pi / 16) */
-
-/* idct main entry point  */
-void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
-void (* mpeg2_idct_add) (int last, int16_t * block,
-			 uint8_t * dest, int stride);
-
-/*
- * In legal streams, the IDCT output should be between -384 and +384.
- * In corrupted streams, it is possible to force the IDCT output to go
- * to +-3826 - this is the worst case for a column IDCT where the
- * column inputs are 16-bit values.
- */
-uint8_t mpeg2_clip[3840 * 2 + 256];
-#define CLIP(i) ((mpeg2_clip + 3840)[i])
-
-#if 0
-#define BUTTERFLY(t0,t1,W0,W1,d0,d1)	\
-do {					\
-    t0 = W0 * d0 + W1 * d1;		\
-    t1 = W0 * d1 - W1 * d0;		\
-} while (0)
-#else
-#define BUTTERFLY(t0,t1,W0,W1,d0,d1)	\
-do {					\
-    int tmp = W0 * (d0 + d1);		\
-    t0 = tmp + (W1 - W0) * d1;		\
-    t1 = tmp - (W1 + W0) * d0;		\
-} while (0)
-#endif
-
-static void inline idct_row (int16_t * const block)
-{
-    int d0, d1, d2, d3;
-    int a0, a1, a2, a3, b0, b1, b2, b3;
-    int t0, t1, t2, t3;
-
-    /* shortcut */
-    if (likely (!(block[1] | ((int32_t *)block)[1] | ((int32_t *)block)[2] |
-		  ((int32_t *)block)[3]))) {
-	uint32_t tmp = (uint16_t) (block[0] >> 1);
-	tmp |= tmp << 16;
-	((int32_t *)block)[0] = tmp;
-	((int32_t *)block)[1] = tmp;
-	((int32_t *)block)[2] = tmp;
-	((int32_t *)block)[3] = tmp;
-	return;
-    }
-
-    d0 = (block[0] << 11) + 2048;
-    d1 = block[1];
-    d2 = block[2] << 11;
-    d3 = block[3];
-    t0 = d0 + d2;
-    t1 = d0 - d2;
-    BUTTERFLY (t2, t3, W6, W2, d3, d1);
-    a0 = t0 + t2;
-    a1 = t1 + t3;
-    a2 = t1 - t3;
-    a3 = t0 - t2;
-
-    d0 = block[4];
-    d1 = block[5];
-    d2 = block[6];
-    d3 = block[7];
-    BUTTERFLY (t0, t1, W7, W1, d3, d0);
-    BUTTERFLY (t2, t3, W3, W5, d1, d2);
-    b0 = t0 + t2;
-    b3 = t1 + t3;
-    t0 -= t2;
-    t1 -= t3;
-    b1 = ((t0 + t1) >> 8) * 181;
-    b2 = ((t0 - t1) >> 8) * 181;
-
-    block[0] = (a0 + b0) >> 12;
-    block[1] = (a1 + b1) >> 12;
-    block[2] = (a2 + b2) >> 12;
-    block[3] = (a3 + b3) >> 12;
-    block[4] = (a3 - b3) >> 12;
-    block[5] = (a2 - b2) >> 12;
-    block[6] = (a1 - b1) >> 12;
-    block[7] = (a0 - b0) >> 12;
-}
-
-static void inline idct_col (int16_t * const block)
-{
-    int d0, d1, d2, d3;
-    int a0, a1, a2, a3, b0, b1, b2, b3;
-    int t0, t1, t2, t3;
-
-    d0 = (block[8*0] << 11) + 65536;
-    d1 = block[8*1];
-    d2 = block[8*2] << 11;
-    d3 = block[8*3];
-    t0 = d0 + d2;
-    t1 = d0 - d2;
-    BUTTERFLY (t2, t3, W6, W2, d3, d1);
-    a0 = t0 + t2;
-    a1 = t1 + t3;
-    a2 = t1 - t3;
-    a3 = t0 - t2;
-
-    d0 = block[8*4];
-    d1 = block[8*5];
-    d2 = block[8*6];
-    d3 = block[8*7];
-    BUTTERFLY (t0, t1, W7, W1, d3, d0);
-    BUTTERFLY (t2, t3, W3, W5, d1, d2);
-    b0 = t0 + t2;
-    b3 = t1 + t3;
-    t0 -= t2;
-    t1 -= t3;
-    b1 = ((t0 + t1) >> 8) * 181;
-    b2 = ((t0 - t1) >> 8) * 181;
-
-    block[8*0] = (a0 + b0) >> 17;
-    block[8*1] = (a1 + b1) >> 17;
-    block[8*2] = (a2 + b2) >> 17;
-    block[8*3] = (a3 + b3) >> 17;
-    block[8*4] = (a3 - b3) >> 17;
-    block[8*5] = (a2 - b2) >> 17;
-    block[8*6] = (a1 - b1) >> 17;
-    block[8*7] = (a0 - b0) >> 17;
-}
-
-static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest,
-			       const int stride)
-{
-    int i;
-
-    for (i = 0; i < 8; i++)
-	idct_row (block + 8 * i);
-    for (i = 0; i < 8; i++)
-	idct_col (block + i);
-    do {
-	dest[0] = CLIP (block[0]);
-	dest[1] = CLIP (block[1]);
-	dest[2] = CLIP (block[2]);
-	dest[3] = CLIP (block[3]);
-	dest[4] = CLIP (block[4]);
-	dest[5] = CLIP (block[5]);
-	dest[6] = CLIP (block[6]);
-	dest[7] = CLIP (block[7]);
-
-	((int32_t *)block)[0] = 0;	((int32_t *)block)[1] = 0;
-	((int32_t *)block)[2] = 0;	((int32_t *)block)[3] = 0;
-
-	dest += stride;
-	block += 8;
-    } while (--i);
-}
-
-static void mpeg2_idct_add_c (const int last, int16_t * block,
-			      uint8_t * dest, const int stride)
-{
-    int i;
-
-    if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
-	for (i = 0; i < 8; i++)
-	    idct_row (block + 8 * i);
-	for (i = 0; i < 8; i++)
-	    idct_col (block + i);
-	do {
-	    dest[0] = CLIP (block[0] + dest[0]);
-	    dest[1] = CLIP (block[1] + dest[1]);
-	    dest[2] = CLIP (block[2] + dest[2]);
-	    dest[3] = CLIP (block[3] + dest[3]);
-	    dest[4] = CLIP (block[4] + dest[4]);
-	    dest[5] = CLIP (block[5] + dest[5]);
-	    dest[6] = CLIP (block[6] + dest[6]);
-	    dest[7] = CLIP (block[7] + dest[7]);
-
-	    ((int32_t *)block)[0] = 0;	((int32_t *)block)[1] = 0;
-	    ((int32_t *)block)[2] = 0;	((int32_t *)block)[3] = 0;
-
-	    dest += stride;
-	    block += 8;
-	} while (--i);
-    } else {
-	int DC;
-
-	DC = (block[0] + 64) >> 7;
-	block[0] = block[63] = 0;
-	i = 8;
-	do {
-	    dest[0] = CLIP (DC + dest[0]);
-	    dest[1] = CLIP (DC + dest[1]);
-	    dest[2] = CLIP (DC + dest[2]);
-	    dest[3] = CLIP (DC + dest[3]);
-	    dest[4] = CLIP (DC + dest[4]);
-	    dest[5] = CLIP (DC + dest[5]);
-	    dest[6] = CLIP (DC + dest[6]);
-	    dest[7] = CLIP (DC + dest[7]);
-	    dest += stride;
-	} while (--i);
-    }
-}
-
-void mpeg2_idct_init (uint32_t accel)
-{
-#ifdef ARCH_X86
-    if (accel & MPEG2_ACCEL_X86_MMXEXT) {
-	mpeg2_idct_copy = mpeg2_idct_copy_mmxext;
-	mpeg2_idct_add = mpeg2_idct_add_mmxext;
-	mpeg2_idct_mmx_init ();
-    } else if (accel & MPEG2_ACCEL_X86_MMX) {
-	mpeg2_idct_copy = mpeg2_idct_copy_mmx;
-	mpeg2_idct_add = mpeg2_idct_add_mmx;
-	mpeg2_idct_mmx_init ();
-    } else
-#endif
-#ifdef ARCH_PPC
-    if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
-	mpeg2_idct_copy = mpeg2_idct_copy_altivec;
-	mpeg2_idct_add = mpeg2_idct_add_altivec;
-	mpeg2_idct_altivec_init ();
-    } else
-#endif
-#ifdef ARCH_ALPHA
-    if (accel & MPEG2_ACCEL_ALPHA_MVI) {
-	mpeg2_idct_copy = mpeg2_idct_copy_mvi;
-	mpeg2_idct_add = mpeg2_idct_add_mvi;
-	mpeg2_idct_alpha_init ();
-    } else if (accel & MPEG2_ACCEL_ALPHA) {
-	int i;
-
-	mpeg2_idct_copy = mpeg2_idct_copy_alpha;
-	mpeg2_idct_add = mpeg2_idct_add_alpha;
-	mpeg2_idct_alpha_init ();
-	for (i = -3840; i < 3840 + 256; i++)
-	    CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i);
-    } else
-#endif
-    {
-	extern uint8_t mpeg2_scan_norm[64];
-	extern uint8_t mpeg2_scan_alt[64];
-	int i, j;
-
-	mpeg2_idct_copy = mpeg2_idct_copy_c;
-	mpeg2_idct_add = mpeg2_idct_add_c;
-	for (i = -3840; i < 3840 + 256; i++)
-	    CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i);
-	for (i = 0; i < 64; i++) {
-	    j = mpeg2_scan_norm[i];
-	    mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
-	    j = mpeg2_scan_alt[i];
-	    mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
-	}
-    }
-}
diff --git a/src/libmpeg2new/libmpeg2/idct_alpha.c b/src/libmpeg2new/libmpeg2/idct_alpha.c
deleted file mode 100644
index 1d8fd08ee..000000000
--- a/src/libmpeg2new/libmpeg2/idct_alpha.c
+++ /dev/null
@@ -1,379 +0,0 @@
-/*
- * idct_alpha.c
- * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org>
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#ifdef ARCH_ALPHA
-
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "mpeg2.h"
-#include <xine/attributes.h>
-#include "mpeg2_internal.h"
-#include "alpha_asm.h"
-
-#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */
-#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */
-#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */
-#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */
-#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */
-#define W7 565  /* 2048 * sqrt (2) * cos (7 * pi / 16) */
-
-extern uint8_t mpeg2_clip[3840 * 2 + 256];
-#define CLIP(i) ((mpeg2_clip + 3840)[i])
-
-#if 0
-#define BUTTERFLY(t0,t1,W0,W1,d0,d1)	\
-do {					\
-    t0 = W0 * d0 + W1 * d1;			\
-    t1 = W0 * d1 - W1 * d0;			\
-} while (0)
-#else
-#define BUTTERFLY(t0,t1,W0,W1,d0,d1)	\
-do {					\
-    int_fast32_t tmp = W0 * (d0 + d1);	\
-    t0 = tmp + (W1 - W0) * d1;		\
-    t1 = tmp - (W1 + W0) * d0;		\
-} while (0)
-#endif
-
-static void inline idct_row (int16_t * const block)
-{
-    uint64_t l, r;
-    int_fast32_t d0, d1, d2, d3;
-    int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
-    int_fast32_t t0, t1, t2, t3;
-
-    l = ldq (block);
-    r = ldq (block + 4);
-
-    /* shortcut */
-    if (likely (!((l & ~0xffffUL) | r))) {
-	uint64_t tmp = (uint16_t) (l >> 1);
-	tmp |= tmp << 16;
-	tmp |= tmp << 32;
-	((int32_t *)block)[0] = tmp;
-	((int32_t *)block)[1] = tmp;
-	((int32_t *)block)[2] = tmp;
-	((int32_t *)block)[3] = tmp;
-	return;
-    }
-
-    d0 = (sextw (l) << 11) + 2048;
-    d1 = sextw (extwl (l, 2));
-    d2 = sextw (extwl (l, 4)) << 11;
-    d3 = sextw (extwl (l, 6));
-    t0 = d0 + d2;
-    t1 = d0 - d2;
-    BUTTERFLY (t2, t3, W6, W2, d3, d1);
-    a0 = t0 + t2;
-    a1 = t1 + t3;
-    a2 = t1 - t3;
-    a3 = t0 - t2;
-
-    d0 = sextw (r);
-    d1 = sextw (extwl (r, 2));
-    d2 = sextw (extwl (r, 4));
-    d3 = sextw (extwl (r, 6));
-    BUTTERFLY (t0, t1, W7, W1, d3, d0);
-    BUTTERFLY (t2, t3, W3, W5, d1, d2);
-    b0 = t0 + t2;
-    b3 = t1 + t3;
-    t0 -= t2;
-    t1 -= t3;
-    b1 = ((t0 + t1) >> 8) * 181;
-    b2 = ((t0 - t1) >> 8) * 181;
-
-    block[0] = (a0 + b0) >> 12;
-    block[1] = (a1 + b1) >> 12;
-    block[2] = (a2 + b2) >> 12;
-    block[3] = (a3 + b3) >> 12;
-    block[4] = (a3 - b3) >> 12;
-    block[5] = (a2 - b2) >> 12;
-    block[6] = (a1 - b1) >> 12;
-    block[7] = (a0 - b0) >> 12;
-}
-
-static void inline idct_col (int16_t * const block)
-{
-    int_fast32_t d0, d1, d2, d3;
-    int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
-    int_fast32_t t0, t1, t2, t3;
-
-    d0 = (block[8*0] << 11) + 65536;
-    d1 = block[8*1];
-    d2 = block[8*2] << 11;
-    d3 = block[8*3];
-    t0 = d0 + d2;
-    t1 = d0 - d2;
-    BUTTERFLY (t2, t3, W6, W2, d3, d1);
-    a0 = t0 + t2;
-    a1 = t1 + t3;
-    a2 = t1 - t3;
-    a3 = t0 - t2;
-
-    d0 = block[8*4];
-    d1 = block[8*5];
-    d2 = block[8*6];
-    d3 = block[8*7];
-    BUTTERFLY (t0, t1, W7, W1, d3, d0);
-    BUTTERFLY (t2, t3, W3, W5, d1, d2);
-    b0 = t0 + t2;
-    b3 = t1 + t3;
-    t0 -= t2;
-    t1 -= t3;
-    b1 = ((t0 + t1) >> 8) * 181;
-    b2 = ((t0 - t1) >> 8) * 181;
-
-    block[8*0] = (a0 + b0) >> 17;
-    block[8*1] = (a1 + b1) >> 17;
-    block[8*2] = (a2 + b2) >> 17;
-    block[8*3] = (a3 + b3) >> 17;
-    block[8*4] = (a3 - b3) >> 17;
-    block[8*5] = (a2 - b2) >> 17;
-    block[8*6] = (a1 - b1) >> 17;
-    block[8*7] = (a0 - b0) >> 17;
-}
-
-void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, const int stride)
-{
-    uint64_t clampmask;
-    int i;
-
-    for (i = 0; i < 8; i++)
-	idct_row (block + 8 * i);
-
-    for (i = 0; i < 8; i++)
-	idct_col (block + i);
-
-    clampmask = zap (-1, 0xaa);	/* 0x00ff00ff00ff00ff */
-    do {
-	uint64_t shorts0, shorts1;
-
-	shorts0 = ldq (block);
-	shorts0 = maxsw4 (shorts0, 0);
-	shorts0 = minsw4 (shorts0, clampmask);
-	stl (pkwb (shorts0), dest);
-
-	shorts1 = ldq (block + 4);
-	shorts1 = maxsw4 (shorts1, 0);
-	shorts1 = minsw4 (shorts1, clampmask);
-	stl (pkwb (shorts1), dest + 4);
-
-	stq (0, block);
-	stq (0, block + 4);
-
-	dest += stride;
-	block += 8;
-    } while (--i);
-}
-
-void mpeg2_idct_add_mvi (const int last, int16_t * block,
-			 uint8_t * dest, const int stride)
-{
-    uint64_t clampmask;
-    uint64_t signmask;
-    int i;
-
-    if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
-	for (i = 0; i < 8; i++)
-	    idct_row (block + 8 * i);
-	for (i = 0; i < 8; i++)
-	    idct_col (block + i);
-	clampmask = zap (-1, 0xaa);	/* 0x00ff00ff00ff00ff */
-	signmask = zap (-1, 0x33);
-	signmask ^= signmask >> 1;	/* 0x8000800080008000 */
-
-	do {
-	    uint64_t shorts0, pix0, signs0;
-	    uint64_t shorts1, pix1, signs1;
-
-	    shorts0 = ldq (block);
-	    shorts1 = ldq (block + 4);
-
-	    pix0 = unpkbw (ldl (dest));
-	    /* signed subword add (MMX paddw).  */
-	    signs0 = shorts0 & signmask;
-	    shorts0 &= ~signmask;
-	    shorts0 += pix0;
-	    shorts0 ^= signs0;
-	    /* clamp. */
-	    shorts0 = maxsw4 (shorts0, 0);
-	    shorts0 = minsw4 (shorts0, clampmask);	
-
-	    /* next 4.  */
-	    pix1 = unpkbw (ldl (dest + 4));
-	    signs1 = shorts1 & signmask;
-	    shorts1 &= ~signmask;
-	    shorts1 += pix1;
-	    shorts1 ^= signs1;
-	    shorts1 = maxsw4 (shorts1, 0);
-	    shorts1 = minsw4 (shorts1, clampmask);
-
-	    stl (pkwb (shorts0), dest);
-	    stl (pkwb (shorts1), dest + 4);
-	    stq (0, block);
-	    stq (0, block + 4);
-
-	    dest += stride;
-	    block += 8;
-	} while (--i);
-    } else {
-	int DC;
-	uint64_t p0, p1, p2, p3, p4, p5, p6, p7;
-	uint64_t DCs;
-
-	DC = (block[0] + 64) >> 7;
-	block[0] = block[63] = 0;
-
-	p0 = ldq (dest + 0 * stride);
-	p1 = ldq (dest + 1 * stride);
-	p2 = ldq (dest + 2 * stride);
-	p3 = ldq (dest + 3 * stride);
-	p4 = ldq (dest + 4 * stride);
-	p5 = ldq (dest + 5 * stride);
-	p6 = ldq (dest + 6 * stride);
-	p7 = ldq (dest + 7 * stride);
-
-	if (DC > 0) {
-	    DCs = BYTE_VEC (likely (DC <= 255) ? DC : 255);
-	    p0 += minub8 (DCs, ~p0);
-	    p1 += minub8 (DCs, ~p1);
-	    p2 += minub8 (DCs, ~p2);
-	    p3 += minub8 (DCs, ~p3);
-	    p4 += minub8 (DCs, ~p4);
-	    p5 += minub8 (DCs, ~p5);
-	    p6 += minub8 (DCs, ~p6);
-	    p7 += minub8 (DCs, ~p7);
-	} else {
-	    DCs = BYTE_VEC (likely (-DC <= 255) ? -DC : 255);
-	    p0 -= minub8 (DCs, p0);
-	    p1 -= minub8 (DCs, p1);
-	    p2 -= minub8 (DCs, p2);
-	    p3 -= minub8 (DCs, p3);
-	    p4 -= minub8 (DCs, p4);
-	    p5 -= minub8 (DCs, p5);
-	    p6 -= minub8 (DCs, p6);
-	    p7 -= minub8 (DCs, p7);
-	}
-
-	stq (p0, dest + 0 * stride);
-	stq (p1, dest + 1 * stride);
-	stq (p2, dest + 2 * stride);
-	stq (p3, dest + 3 * stride);
-	stq (p4, dest + 4 * stride);
-	stq (p5, dest + 5 * stride);
-	stq (p6, dest + 6 * stride);
-	stq (p7, dest + 7 * stride);
-    }
-}
-
-void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, const int stride)
-{
-    int i;
-
-    for (i = 0; i < 8; i++)
-	idct_row (block + 8 * i);
-    for (i = 0; i < 8; i++)
-	idct_col (block + i);
-    do {
-	dest[0] = CLIP (block[0]);
-	dest[1] = CLIP (block[1]);
-	dest[2] = CLIP (block[2]);
-	dest[3] = CLIP (block[3]);
-	dest[4] = CLIP (block[4]);
-	dest[5] = CLIP (block[5]);
-	dest[6] = CLIP (block[6]);
-	dest[7] = CLIP (block[7]);
-
-	stq(0, block);
-	stq(0, block + 4);
-
-	dest += stride;
-	block += 8;
-    } while (--i);
-}
-
-void mpeg2_idct_add_alpha (const int last, int16_t * block,
-			   uint8_t * dest, const int stride)
-{
-    int i;
-
-    if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
-	for (i = 0; i < 8; i++)
-	    idct_row (block + 8 * i);
-	for (i = 0; i < 8; i++)
-	    idct_col (block + i);
-	do {
-	    dest[0] = CLIP (block[0] + dest[0]);
-	    dest[1] = CLIP (block[1] + dest[1]);
-	    dest[2] = CLIP (block[2] + dest[2]);
-	    dest[3] = CLIP (block[3] + dest[3]);
-	    dest[4] = CLIP (block[4] + dest[4]);
-	    dest[5] = CLIP (block[5] + dest[5]);
-	    dest[6] = CLIP (block[6] + dest[6]);
-	    dest[7] = CLIP (block[7] + dest[7]);
-
-	    stq(0, block);
-	    stq(0, block + 4);
-
-	    dest += stride;
-	    block += 8;
-	} while (--i);
-    } else {
-	int DC;
-
-	DC = (block[0] + 64) >> 7;
-	block[0] = block[63] = 0;
-	i = 8;
-	do {
-	    dest[0] = CLIP (DC + dest[0]);
-	    dest[1] = CLIP (DC + dest[1]);
-	    dest[2] = CLIP (DC + dest[2]);
-	    dest[3] = CLIP (DC + dest[3]);
-	    dest[4] = CLIP (DC + dest[4]);
-	    dest[5] = CLIP (DC + dest[5]);
-	    dest[6] = CLIP (DC + dest[6]);
-	    dest[7] = CLIP (DC + dest[7]);
-	    dest += stride;
-	} while (--i);
-    }
-}
-
-void mpeg2_idct_alpha_init (void)
-{
-    extern uint8_t mpeg2_scan_norm[64];
-    extern uint8_t mpeg2_scan_alt[64];
-    int i, j;
-
-    for (i = 0; i < 64; i++) {
-	j = mpeg2_scan_norm[i];
-	mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
-	j = mpeg2_scan_alt[i];
-	mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
-    }
-}
-
-#endif /* ARCH_ALPHA */
diff --git a/src/libmpeg2new/libmpeg2/idct_altivec.c b/src/libmpeg2new/libmpeg2/idct_altivec.c
deleted file mode 100644
index f15bca165..000000000
--- a/src/libmpeg2new/libmpeg2/idct_altivec.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * idct_altivec.c
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#ifdef ARCH_PPC
-
-#ifdef HAVE_ALTIVEC_H
-#include <altivec.h>
-#endif
-#include <inttypes.h>
-
-#include "mpeg2.h"
-#include <xine/attributes.h>
-#include "mpeg2_internal.h"
-
-typedef vector signed char vector_s8_t;
-typedef vector unsigned char vector_u8_t;
-typedef vector signed short vector_s16_t;
-typedef vector unsigned short vector_u16_t;
-typedef vector signed int vector_s32_t;
-typedef vector unsigned int vector_u32_t;
-
-#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
-/* work around gcc <3.3 vec_mergel bug */
-static inline vector_s16_t my_vec_mergel (vector_s16_t const A,
-					  vector_s16_t const B)
-{
-    static const vector_u8_t mergel = {
-	0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b,
-	0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f
-    };
-    return vec_perm (A, B, mergel);
-}
-#undef vec_mergel
-#define vec_mergel my_vec_mergel
-#endif
-
-#ifdef HAVE_ALTIVEC_H	/* gnu */
-#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h}
-#else			/* apple */
-#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h)
-#endif
-
-static const vector_s16_t constants ATTR_ALIGN(16) =
-    VEC_S16 (23170, 13573, 6518, 21895, -23170, -21895, 32, 31);
-static const vector_s16_t constants_1 ATTR_ALIGN(16) =
-    VEC_S16 (16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725);
-static const vector_s16_t constants_2 ATTR_ALIGN(16) =
-    VEC_S16 (16069, 22289, 20995, 18895, 16069, 18895, 20995, 22289);
-static const vector_s16_t constants_3 ATTR_ALIGN(16) =
-    VEC_S16 (21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692);
-static const vector_s16_t constants_4 ATTR_ALIGN(16) =
-    VEC_S16 (13623, 18895, 17799, 16019, 13623, 16019, 17799, 18895);
-
-#define IDCT								\
-    vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7;		\
-    vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7;		\
-    vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias;			\
-    vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8;			\
-    vector_u16_t shift;							\
-									\
-    c4 = vec_splat (constants, 0);					\
-    a0 = vec_splat (constants, 1);					\
-    a1 = vec_splat (constants, 2);					\
-    a2 = vec_splat (constants, 3);					\
-    mc4 = vec_splat (constants, 4);					\
-    ma2 = vec_splat (constants, 5);					\
-    bias = (vector_s16_t)vec_splat ((vector_s32_t)constants, 3);	\
-									\
-    zero = vec_splat_s16 (0);						\
-									\
-    vx0 = vec_adds (block[0], block[4]);				\
-    vx4 = vec_subs (block[0], block[4]);				\
-    t5 = vec_mradds (vx0, constants_1, zero);				\
-    t0 = vec_mradds (vx4, constants_1, zero);				\
-									\
-    vx1 = vec_mradds (a1, block[7], block[1]);				\
-    vx7 = vec_mradds (a1, block[1], vec_subs (zero, block[7]));		\
-    t1 = vec_mradds (vx1, constants_2, zero);				\
-    t8 = vec_mradds (vx7, constants_2, zero);				\
-									\
-    vx2 = vec_mradds (a0, block[6], block[2]);				\
-    vx6 = vec_mradds (a0, block[2], vec_subs (zero, block[6]));		\
-    t2 = vec_mradds (vx2, constants_3, zero);				\
-    t4 = vec_mradds (vx6, constants_3, zero);				\
-									\
-    vx3 = vec_mradds (block[3], constants_4, zero);			\
-    vx5 = vec_mradds (block[5], constants_4, zero);			\
-    t7 = vec_mradds (a2, vx5, vx3);					\
-    t3 = vec_mradds (ma2, vx3, vx5);					\
-									\
-    t6 = vec_adds (t8, t3);						\
-    t3 = vec_subs (t8, t3);						\
-    t8 = vec_subs (t1, t7);						\
-    t1 = vec_adds (t1, t7);						\
-    t6 = vec_mradds (a0, t6, t6);	/* a0+1 == 2*c4 */		\
-    t1 = vec_mradds (a0, t1, t1);	/* a0+1 == 2*c4 */		\
-									\
-    t7 = vec_adds (t5, t2);						\
-    t2 = vec_subs (t5, t2);						\
-    t5 = vec_adds (t0, t4);						\
-    t0 = vec_subs (t0, t4);						\
-    t4 = vec_subs (t8, t3);						\
-    t3 = vec_adds (t8, t3);						\
-									\
-    vy0 = vec_adds (t7, t1);						\
-    vy7 = vec_subs (t7, t1);						\
-    vy1 = vec_adds (t5, t3);						\
-    vy6 = vec_subs (t5, t3);						\
-    vy2 = vec_adds (t0, t4);						\
-    vy5 = vec_subs (t0, t4);						\
-    vy3 = vec_adds (t2, t6);						\
-    vy4 = vec_subs (t2, t6);						\
-									\
-    vx0 = vec_mergeh (vy0, vy4);					\
-    vx1 = vec_mergel (vy0, vy4);					\
-    vx2 = vec_mergeh (vy1, vy5);					\
-    vx3 = vec_mergel (vy1, vy5);					\
-    vx4 = vec_mergeh (vy2, vy6);					\
-    vx5 = vec_mergel (vy2, vy6);					\
-    vx6 = vec_mergeh (vy3, vy7);					\
-    vx7 = vec_mergel (vy3, vy7);					\
-									\
-    vy0 = vec_mergeh (vx0, vx4);					\
-    vy1 = vec_mergel (vx0, vx4);					\
-    vy2 = vec_mergeh (vx1, vx5);					\
-    vy3 = vec_mergel (vx1, vx5);					\
-    vy4 = vec_mergeh (vx2, vx6);					\
-    vy5 = vec_mergel (vx2, vx6);					\
-    vy6 = vec_mergeh (vx3, vx7);					\
-    vy7 = vec_mergel (vx3, vx7);					\
-									\
-    vx0 = vec_mergeh (vy0, vy4);					\
-    vx1 = vec_mergel (vy0, vy4);					\
-    vx2 = vec_mergeh (vy1, vy5);					\
-    vx3 = vec_mergel (vy1, vy5);					\
-    vx4 = vec_mergeh (vy2, vy6);					\
-    vx5 = vec_mergel (vy2, vy6);					\
-    vx6 = vec_mergeh (vy3, vy7);					\
-    vx7 = vec_mergel (vy3, vy7);					\
-									\
-    vx0 = vec_adds (vx0, bias);						\
-    t5 = vec_adds (vx0, vx4);						\
-    t0 = vec_subs (vx0, vx4);						\
-									\
-    t1 = vec_mradds (a1, vx7, vx1);					\
-    t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7));			\
-									\
-    t2 = vec_mradds (a0, vx6, vx2);					\
-    t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6));			\
-									\
-    t7 = vec_mradds (a2, vx5, vx3);					\
-    t3 = vec_mradds (ma2, vx3, vx5);					\
-									\
-    t6 = vec_adds (t8, t3);						\
-    t3 = vec_subs (t8, t3);						\
-    t8 = vec_subs (t1, t7);						\
-    t1 = vec_adds (t1, t7);						\
-									\
-    t7 = vec_adds (t5, t2);						\
-    t2 = vec_subs (t5, t2);						\
-    t5 = vec_adds (t0, t4);						\
-    t0 = vec_subs (t0, t4);						\
-    t4 = vec_subs (t8, t3);						\
-    t3 = vec_adds (t8, t3);						\
-									\
-    vy0 = vec_adds (t7, t1);						\
-    vy7 = vec_subs (t7, t1);						\
-    vy1 = vec_mradds (c4, t3, t5);					\
-    vy6 = vec_mradds (mc4, t3, t5);					\
-    vy2 = vec_mradds (c4, t4, t0);					\
-    vy5 = vec_mradds (mc4, t4, t0);					\
-    vy3 = vec_adds (t2, t6);						\
-    vy4 = vec_subs (t2, t6);						\
-									\
-    shift = vec_splat_u16 (6);						\
-    vx0 = vec_sra (vy0, shift);						\
-    vx1 = vec_sra (vy1, shift);						\
-    vx2 = vec_sra (vy2, shift);						\
-    vx3 = vec_sra (vy3, shift);						\
-    vx4 = vec_sra (vy4, shift);						\
-    vx5 = vec_sra (vy5, shift);						\
-    vx6 = vec_sra (vy6, shift);						\
-    vx7 = vec_sra (vy7, shift);
-
-void mpeg2_idct_copy_altivec (int16_t * const _block, uint8_t * dest,
-			      const int stride)
-{
-    vector_s16_t * const block = (vector_s16_t *)_block;
-    vector_u8_t tmp;
-
-    IDCT
-
-#define COPY(dest,src)						\
-    tmp = vec_packsu (src, src);				\
-    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);	\
-    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-
-    COPY (dest, vx0)	dest += stride;
-    COPY (dest, vx1)	dest += stride;
-    COPY (dest, vx2)	dest += stride;
-    COPY (dest, vx3)	dest += stride;
-    COPY (dest, vx4)	dest += stride;
-    COPY (dest, vx5)	dest += stride;
-    COPY (dest, vx6)	dest += stride;
-    COPY (dest, vx7)
-
-    block[0] = block[1] = block[2] = block[3] = zero;
-    block[4] = block[5] = block[6] = block[7] = zero;
-}
-
-void mpeg2_idct_add_altivec (const int last, int16_t * const _block,
-			     uint8_t * dest, const int stride)
-{
-    vector_s16_t * const block = (vector_s16_t *)_block;
-    vector_u8_t tmp;
-    vector_s16_t tmp2, tmp3;
-    vector_u8_t perm0;
-    vector_u8_t perm1;
-    vector_u8_t p0, p1, p;
-
-    IDCT
-
-    p0 = vec_lvsl (0, dest);
-    p1 = vec_lvsl (stride, dest);
-    p = vec_splat_u8 (-1);
-    perm0 = vec_mergeh (p, p0);
-    perm1 = vec_mergeh (p, p1);
-
-#define ADD(dest,src,perm)						\
-    /* *(uint64_t *)&tmp = *(uint64_t *)dest; */			\
-    tmp = vec_ld (0, dest);						\
-    tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm);	\
-    tmp3 = vec_adds (tmp2, src);					\
-    tmp = vec_packsu (tmp3, tmp3);					\
-    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);		\
-    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-
-    ADD (dest, vx0, perm0)	dest += stride;
-    ADD (dest, vx1, perm1)	dest += stride;
-    ADD (dest, vx2, perm0)	dest += stride;
-    ADD (dest, vx3, perm1)	dest += stride;
-    ADD (dest, vx4, perm0)	dest += stride;
-    ADD (dest, vx5, perm1)	dest += stride;
-    ADD (dest, vx6, perm0)	dest += stride;
-    ADD (dest, vx7, perm1)
-
-    block[0] = block[1] = block[2] = block[3] = zero;
-    block[4] = block[5] = block[6] = block[7] = zero;
-}
-
-void mpeg2_idct_altivec_init (void)
-{
-    extern uint8_t mpeg2_scan_norm[64];
-    extern uint8_t mpeg2_scan_alt[64];
-    int i, j;
-
-    /* the altivec idct uses a transposed input, so we patch scan tables */
-    for (i = 0; i < 64; i++) {
-	j = mpeg2_scan_norm[i];
-	mpeg2_scan_norm[i] = (j >> 3) | ((j & 7) << 3);
-	j = mpeg2_scan_alt[i];
-	mpeg2_scan_alt[i] = (j >> 3) | ((j & 7) << 3);
-    }
-}
-
-#endif
diff --git a/src/libmpeg2new/libmpeg2/idct_mlib.c b/src/libmpeg2new/libmpeg2/idct_mlib.c
deleted file mode 100644
index 55a2e9b64..000000000
--- a/src/libmpeg2new/libmpeg2/idct_mlib.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * idct_mlib.c
- * Copyright (C) 1999-2003 Håkan Hjort <d95hjort@dtek.chalmers.se>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#ifdef LIBMPEG2_MLIB
-
-#include <mlib_types.h>
-#include <mlib_status.h>
-#include <mlib_sys.h>
-#include <mlib_video.h>
-#include <string.h>
-#include <inttypes.h>
-
-#include "../include/mpeg2.h"
-#include "mpeg2_internal.h"
-
-void mpeg2_idct_add_mlib (const int last, int16_t * const block,
-			  uint8_t * const dest, const int stride)
-{
-    mlib_VideoIDCT_IEEE_S16_S16 (block, block);
-    mlib_VideoAddBlock_U8_S16 (dest, block, stride);
-    memset (block, 0, 64 * sizeof (uint16_t));
-}
-
-void mpeg2_idct_copy_mlib_non_ieee (int16_t * const block,
-				    uint8_t * const dest, const int stride)
-{
-    mlib_VideoIDCT8x8_U8_S16 (dest, block, stride);
-    memset (block, 0, 64 * sizeof (uint16_t));
-}
-
-void mpeg2_idct_add_mlib_non_ieee (const int last, int16_t * const block,
-				   uint8_t * const dest, const int stride)
-{
-    mlib_VideoIDCT8x8_S16_S16 (block, block);
-    mlib_VideoAddBlock_U8_S16 (dest, block, stride);
-    memset (block, 0, 64 * sizeof (uint16_t));
-}
-
-#endif
diff --git a/src/libmpeg2new/libmpeg2/idct_mmx.c b/src/libmpeg2new/libmpeg2/idct_mmx.c
deleted file mode 100644
index d5a5c08a4..000000000
--- a/src/libmpeg2new/libmpeg2/idct_mmx.c
+++ /dev/null
@@ -1,814 +0,0 @@
-/*
- * idct_mmx.c
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#ifdef ARCH_X86
-
-#include <inttypes.h>
-
-#include "../include/mpeg2.h"
-#include "../include/attributes.h"
-#include "mpeg2_internal.h"
-#include "../include/mmx.h"
-
-#define ROW_SHIFT 15
-#define COL_SHIFT 6
-
-#define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT)))
-#define rounder(bias) {round (bias), round (bias)}
-
-
-#if 0
-/* C row IDCT - its just here to document the MMXEXT and MMX versions */
-static inline void idct_row (int16_t * row, int offset,
-			     int16_t * table, int32_t * rounder)
-{
-    int C1, C2, C3, C4, C5, C6, C7;
-    int a0, a1, a2, a3, b0, b1, b2, b3;
-
-    row += offset;
-
-    C1 = table[1];
-    C2 = table[2];
-    C3 = table[3];
-    C4 = table[4];
-    C5 = table[5];
-    C6 = table[6];
-    C7 = table[7];
-
-    a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder;
-    a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder;
-    a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder;
-    a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder;
-
-    b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
-    b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
-    b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
-    b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
-
-    row[0] = (a0 + b0) >> ROW_SHIFT;
-    row[1] = (a1 + b1) >> ROW_SHIFT;
-    row[2] = (a2 + b2) >> ROW_SHIFT;
-    row[3] = (a3 + b3) >> ROW_SHIFT;
-    row[4] = (a3 - b3) >> ROW_SHIFT;
-    row[5] = (a2 - b2) >> ROW_SHIFT;
-    row[6] = (a1 - b1) >> ROW_SHIFT;
-    row[7] = (a0 - b0) >> ROW_SHIFT;
-}
-#endif
-
-
-/* MMXEXT row IDCT */
-
-#define mmxext_table(c1,c2,c3,c4,c5,c6,c7)	{  c4,  c2, -c4, -c2,	\
-						   c4,  c6,  c4,  c6,	\
-						   c1,  c3, -c1, -c5,	\
-						   c5,  c7,  c3, -c7,	\
-						   c4, -c6,  c4, -c6,	\
-						  -c4,  c2,  c4, -c2,	\
-						   c5, -c1,  c3, -c1,	\
-						   c7,  c3,  c7, -c5 }
-
-static inline void mmxext_row_head (int16_t * const row, const int offset,
-				    const int16_t * const table)
-{
-    movq_m2r (*(row+offset), mm2);	/* mm2 = x6 x4 x2 x0 */
-
-    movq_m2r (*(row+offset+4), mm5);	/* mm5 = x7 x5 x3 x1 */
-    movq_r2r (mm2, mm0);		/* mm0 = x6 x4 x2 x0 */
-
-    movq_m2r (*table, mm3);		/* mm3 = -C2 -C4 C2 C4 */
-    movq_r2r (mm5, mm6);		/* mm6 = x7 x5 x3 x1 */
-
-    movq_m2r (*(table+4), mm4);		/* mm4 = C6 C4 C6 C4 */
-    pmaddwd_r2r (mm0, mm3);		/* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
-
-    pshufw_r2r (mm2, mm2, 0x4e);	/* mm2 = x2 x0 x6 x4 */
-}
-
-static inline void mmxext_row (const int16_t * const table,
-			       const int32_t * const rounder)
-{
-    movq_m2r (*(table+8), mm1);		/* mm1 = -C5 -C1 C3 C1 */
-    pmaddwd_r2r (mm2, mm4);		/* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */
-
-    pmaddwd_m2r (*(table+16), mm0);	/* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */
-    pshufw_r2r (mm6, mm6, 0x4e);	/* mm6 = x3 x1 x7 x5 */
-
-    movq_m2r (*(table+12), mm7);	/* mm7 = -C7 C3 C7 C5 */
-    pmaddwd_r2r (mm5, mm1);		/* mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 */
-
-    paddd_m2r (*rounder, mm3);		/* mm3 += rounder */
-    pmaddwd_r2r (mm6, mm7);		/* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */
-
-    pmaddwd_m2r (*(table+20), mm2);	/* mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 */
-    paddd_r2r (mm4, mm3);		/* mm3 = a1 a0 + rounder */
-
-    pmaddwd_m2r (*(table+24), mm5);	/* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */
-    movq_r2r (mm3, mm4);		/* mm4 = a1 a0 + rounder */
-
-    pmaddwd_m2r (*(table+28), mm6);	/* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */
-    paddd_r2r (mm7, mm1);		/* mm1 = b1 b0 */
-
-    paddd_m2r (*rounder, mm0);		/* mm0 += rounder */
-    psubd_r2r (mm1, mm3);		/* mm3 = a1-b1 a0-b0 + rounder */
-
-    psrad_i2r (ROW_SHIFT, mm3);		/* mm3 = y6 y7 */
-    paddd_r2r (mm4, mm1);		/* mm1 = a1+b1 a0+b0 + rounder */
-
-    paddd_r2r (mm2, mm0);		/* mm0 = a3 a2 + rounder */
-    psrad_i2r (ROW_SHIFT, mm1);		/* mm1 = y1 y0 */
-
-    paddd_r2r (mm6, mm5);		/* mm5 = b3 b2 */
-    movq_r2r (mm0, mm4);		/* mm4 = a3 a2 + rounder */
-
-    paddd_r2r (mm5, mm0);		/* mm0 = a3+b3 a2+b2 + rounder */
-    psubd_r2r (mm5, mm4);		/* mm4 = a3-b3 a2-b2 + rounder */
-}
-
-static inline void mmxext_row_tail (int16_t * const row, const int store)
-{
-    psrad_i2r (ROW_SHIFT, mm0);		/* mm0 = y3 y2 */
-
-    psrad_i2r (ROW_SHIFT, mm4);		/* mm4 = y4 y5 */
-
-    packssdw_r2r (mm0, mm1);		/* mm1 = y3 y2 y1 y0 */
-
-    packssdw_r2r (mm3, mm4);		/* mm4 = y6 y7 y4 y5 */
-
-    movq_r2m (mm1, *(row+store));	/* save y3 y2 y1 y0 */
-    pshufw_r2r (mm4, mm4, 0xb1);	/* mm4 = y7 y6 y5 y4 */
-
-    /* slot */
-
-    movq_r2m (mm4, *(row+store+4));	/* save y7 y6 y5 y4 */
-}
-
-static inline void mmxext_row_mid (int16_t * const row, const int store,
-				   const int offset,
-				   const int16_t * const table)
-{
-    movq_m2r (*(row+offset), mm2);	/* mm2 = x6 x4 x2 x0 */
-    psrad_i2r (ROW_SHIFT, mm0);		/* mm0 = y3 y2 */
-
-    movq_m2r (*(row+offset+4), mm5);	/* mm5 = x7 x5 x3 x1 */
-    psrad_i2r (ROW_SHIFT, mm4);		/* mm4 = y4 y5 */
-
-    packssdw_r2r (mm0, mm1);		/* mm1 = y3 y2 y1 y0 */
-    movq_r2r (mm5, mm6);		/* mm6 = x7 x5 x3 x1 */
-
-    packssdw_r2r (mm3, mm4);		/* mm4 = y6 y7 y4 y5 */
-    movq_r2r (mm2, mm0);		/* mm0 = x6 x4 x2 x0 */
-
-    movq_r2m (mm1, *(row+store));	/* save y3 y2 y1 y0 */
-    pshufw_r2r (mm4, mm4, 0xb1);	/* mm4 = y7 y6 y5 y4 */
-
-    movq_m2r (*table, mm3);		/* mm3 = -C2 -C4 C2 C4 */
-    movq_r2m (mm4, *(row+store+4));	/* save y7 y6 y5 y4 */
-
-    pmaddwd_r2r (mm0, mm3);		/* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
-
-    movq_m2r (*(table+4), mm4);		/* mm4 = C6 C4 C6 C4 */
-    pshufw_r2r (mm2, mm2, 0x4e);	/* mm2 = x2 x0 x6 x4 */
-}
-
-
-/* MMX row IDCT */
-
-#define mmx_table(c1,c2,c3,c4,c5,c6,c7)	{  c4,  c2,  c4,  c6,	\
-					   c4,  c6, -c4, -c2,	\
-					   c1,  c3,  c3, -c7,	\
-					   c5,  c7, -c1, -c5,	\
-					   c4, -c6,  c4, -c2,	\
-					  -c4,  c2,  c4, -c6,	\
-					   c5, -c1,  c7, -c5,	\
-					   c7,  c3,  c3, -c1 }
-
-static inline void mmx_row_head (int16_t * const row, const int offset,
-				 const int16_t * const table)
-{
-    movq_m2r (*(row+offset), mm2);	/* mm2 = x6 x4 x2 x0 */
-
-    movq_m2r (*(row+offset+4), mm5);	/* mm5 = x7 x5 x3 x1 */
-    movq_r2r (mm2, mm0);		/* mm0 = x6 x4 x2 x0 */
-
-    movq_m2r (*table, mm3);		/* mm3 = C6 C4 C2 C4 */
-    movq_r2r (mm5, mm6);		/* mm6 = x7 x5 x3 x1 */
-
-    punpckldq_r2r (mm0, mm0);		/* mm0 = x2 x0 x2 x0 */
-
-    movq_m2r (*(table+4), mm4);		/* mm4 = -C2 -C4 C6 C4 */
-    pmaddwd_r2r (mm0, mm3);		/* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
-
-    movq_m2r (*(table+8), mm1);		/* mm1 = -C7 C3 C3 C1 */
-    punpckhdq_r2r (mm2, mm2);		/* mm2 = x6 x4 x6 x4 */
-}
-
-static inline void mmx_row (const int16_t * const table,
-			    const int32_t * const rounder)
-{
-    pmaddwd_r2r (mm2, mm4);		/* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */
-    punpckldq_r2r (mm5, mm5);		/* mm5 = x3 x1 x3 x1 */
-
-    pmaddwd_m2r (*(table+16), mm0);	/* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */
-    punpckhdq_r2r (mm6, mm6);		/* mm6 = x7 x5 x7 x5 */
-
-    movq_m2r (*(table+12), mm7);	/* mm7 = -C5 -C1 C7 C5 */
-    pmaddwd_r2r (mm5, mm1);		/* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */
-
-    paddd_m2r (*rounder, mm3);		/* mm3 += rounder */
-    pmaddwd_r2r (mm6, mm7);		/* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */
-
-    pmaddwd_m2r (*(table+20), mm2);	/* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */
-    paddd_r2r (mm4, mm3);		/* mm3 = a1 a0 + rounder */
-
-    pmaddwd_m2r (*(table+24), mm5);	/* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */
-    movq_r2r (mm3, mm4);		/* mm4 = a1 a0 + rounder */
-
-    pmaddwd_m2r (*(table+28), mm6);	/* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */
-    paddd_r2r (mm7, mm1);		/* mm1 = b1 b0 */
-
-    paddd_m2r (*rounder, mm0);		/* mm0 += rounder */
-    psubd_r2r (mm1, mm3);		/* mm3 = a1-b1 a0-b0 + rounder */
-
-    psrad_i2r (ROW_SHIFT, mm3);		/* mm3 = y6 y7 */
-    paddd_r2r (mm4, mm1);		/* mm1 = a1+b1 a0+b0 + rounder */
-
-    paddd_r2r (mm2, mm0);		/* mm0 = a3 a2 + rounder */
-    psrad_i2r (ROW_SHIFT, mm1);		/* mm1 = y1 y0 */
-
-    paddd_r2r (mm6, mm5);		/* mm5 = b3 b2 */
-    movq_r2r (mm0, mm7);		/* mm7 = a3 a2 + rounder */
-
-    paddd_r2r (mm5, mm0);		/* mm0 = a3+b3 a2+b2 + rounder */
-    psubd_r2r (mm5, mm7);		/* mm7 = a3-b3 a2-b2 + rounder */
-}
-
-static inline void mmx_row_tail (int16_t * const row, const int store)
-{
-    psrad_i2r (ROW_SHIFT, mm0);		/* mm0 = y3 y2 */
-
-    psrad_i2r (ROW_SHIFT, mm7);		/* mm7 = y4 y5 */
-
-    packssdw_r2r (mm0, mm1);		/* mm1 = y3 y2 y1 y0 */
-
-    packssdw_r2r (mm3, mm7);		/* mm7 = y6 y7 y4 y5 */
-
-    movq_r2m (mm1, *(row+store));	/* save y3 y2 y1 y0 */
-    movq_r2r (mm7, mm4);		/* mm4 = y6 y7 y4 y5 */
-
-    pslld_i2r (16, mm7);		/* mm7 = y7 0 y5 0 */
-
-    psrld_i2r (16, mm4);		/* mm4 = 0 y6 0 y4 */
-
-    por_r2r (mm4, mm7);			/* mm7 = y7 y6 y5 y4 */
-
-    /* slot */
-
-    movq_r2m (mm7, *(row+store+4));	/* save y7 y6 y5 y4 */
-}
-
-static inline void mmx_row_mid (int16_t * const row, const int store,
-				const int offset, const int16_t * const table)
-{
-    movq_m2r (*(row+offset), mm2);	/* mm2 = x6 x4 x2 x0 */
-    psrad_i2r (ROW_SHIFT, mm0);		/* mm0 = y3 y2 */
-
-    movq_m2r (*(row+offset+4), mm5);	/* mm5 = x7 x5 x3 x1 */
-    psrad_i2r (ROW_SHIFT, mm7);		/* mm7 = y4 y5 */
-
-    packssdw_r2r (mm0, mm1);		/* mm1 = y3 y2 y1 y0 */
-    movq_r2r (mm5, mm6);		/* mm6 = x7 x5 x3 x1 */
-
-    packssdw_r2r (mm3, mm7);		/* mm7 = y6 y7 y4 y5 */
-    movq_r2r (mm2, mm0);		/* mm0 = x6 x4 x2 x0 */
-
-    movq_r2m (mm1, *(row+store));	/* save y3 y2 y1 y0 */
-    movq_r2r (mm7, mm1);		/* mm1 = y6 y7 y4 y5 */
-
-    punpckldq_r2r (mm0, mm0);		/* mm0 = x2 x0 x2 x0 */
-    psrld_i2r (16, mm7);		/* mm7 = 0 y6 0 y4 */
-
-    movq_m2r (*table, mm3);		/* mm3 = C6 C4 C2 C4 */
-    pslld_i2r (16, mm1);		/* mm1 = y7 0 y5 0 */
-
-    movq_m2r (*(table+4), mm4);		/* mm4 = -C2 -C4 C6 C4 */
-    por_r2r (mm1, mm7);			/* mm7 = y7 y6 y5 y4 */
-
-    movq_m2r (*(table+8), mm1);		/* mm1 = -C7 C3 C3 C1 */
-    punpckhdq_r2r (mm2, mm2);		/* mm2 = x6 x4 x6 x4 */
-
-    movq_r2m (mm7, *(row+store+4));	/* save y7 y6 y5 y4 */
-    pmaddwd_r2r (mm0, mm3);		/* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
-}
-
-
-#if 0
-/* C column IDCT - its just here to document the MMXEXT and MMX versions */
-static inline void idct_col (int16_t * col, int offset)
-{
-/* multiplication - as implemented on mmx */
-#define F(c,x) (((c) * (x)) >> 16)
-
-/* saturation - it helps us handle torture test cases */
-#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x))
-
-    int16_t x0, x1, x2, x3, x4, x5, x6, x7;
-    int16_t y0, y1, y2, y3, y4, y5, y6, y7;
-    int16_t a0, a1, a2, a3, b0, b1, b2, b3;
-    int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12;
-
-    col += offset;
-
-    x0 = col[0*8];
-    x1 = col[1*8];
-    x2 = col[2*8];
-    x3 = col[3*8];
-    x4 = col[4*8];
-    x5 = col[5*8];
-    x6 = col[6*8];
-    x7 = col[7*8];
-
-    u04 = S (x0 + x4);
-    v04 = S (x0 - x4);
-    u26 = S (F (T2, x6) + x2);
-    v26 = S (F (T2, x2) - x6);
-
-    a0 = S (u04 + u26);
-    a1 = S (v04 + v26);
-    a2 = S (v04 - v26);
-    a3 = S (u04 - u26);
-
-    u17 = S (F (T1, x7) + x1);
-    v17 = S (F (T1, x1) - x7);
-    u35 = S (F (T3, x5) + x3);
-    v35 = S (F (T3, x3) - x5);
-
-    b0 = S (u17 + u35);
-    b3 = S (v17 - v35);
-    u12 = S (u17 - u35);
-    v12 = S (v17 + v35);
-    u12 = S (2 * F (C4, u12));
-    v12 = S (2 * F (C4, v12));
-    b1 = S (u12 + v12);
-    b2 = S (u12 - v12);
-
-    y0 = S (a0 + b0) >> COL_SHIFT;
-    y1 = S (a1 + b1) >> COL_SHIFT;
-    y2 = S (a2 + b2) >> COL_SHIFT;
-    y3 = S (a3 + b3) >> COL_SHIFT;
-
-    y4 = S (a3 - b3) >> COL_SHIFT;
-    y5 = S (a2 - b2) >> COL_SHIFT;
-    y6 = S (a1 - b1) >> COL_SHIFT;
-    y7 = S (a0 - b0) >> COL_SHIFT;
-
-    col[0*8] = y0;
-    col[1*8] = y1;
-    col[2*8] = y2;
-    col[3*8] = y3;
-    col[4*8] = y4;
-    col[5*8] = y5;
-    col[6*8] = y6;
-    col[7*8] = y7;
-}
-#endif
-
-
-/* MMX column IDCT */
-static inline void idct_col (int16_t * const col, const int offset)
-{
-#define T1 13036
-#define T2 27146
-#define T3 43790
-#define C4 23170
-
-    static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
-    static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
-    static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
-    static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
-
-    /* column code adapted from peter gubanov */
-    /* http://www.elecard.com/peter/idct.shtml */
-
-    movq_m2r (*_T1, mm0);		/* mm0 = T1 */
-
-    movq_m2r (*(col+offset+1*8), mm1);	/* mm1 = x1 */
-    movq_r2r (mm0, mm2);		/* mm2 = T1 */
-
-    movq_m2r (*(col+offset+7*8), mm4);	/* mm4 = x7 */
-    pmulhw_r2r (mm1, mm0);		/* mm0 = T1*x1 */
-
-    movq_m2r (*_T3, mm5);		/* mm5 = T3 */
-    pmulhw_r2r (mm4, mm2);		/* mm2 = T1*x7 */
-
-    movq_m2r (*(col+offset+5*8), mm6);	/* mm6 = x5 */
-    movq_r2r (mm5, mm7);		/* mm7 = T3-1 */
-
-    movq_m2r (*(col+offset+3*8), mm3);	/* mm3 = x3 */
-    psubsw_r2r (mm4, mm0);		/* mm0 = v17 */
-
-    movq_m2r (*_T2, mm4);		/* mm4 = T2 */
-    pmulhw_r2r (mm3, mm5);		/* mm5 = (T3-1)*x3 */
-
-    paddsw_r2r (mm2, mm1);		/* mm1 = u17 */
-    pmulhw_r2r (mm6, mm7);		/* mm7 = (T3-1)*x5 */
-
-    /* slot */
-
-    movq_r2r (mm4, mm2);		/* mm2 = T2 */
-    paddsw_r2r (mm3, mm5);		/* mm5 = T3*x3 */
-
-    pmulhw_m2r (*(col+offset+2*8), mm4);/* mm4 = T2*x2 */
-    paddsw_r2r (mm6, mm7);		/* mm7 = T3*x5 */
-
-    psubsw_r2r (mm6, mm5);		/* mm5 = v35 */
-    paddsw_r2r (mm3, mm7);		/* mm7 = u35 */
-
-    movq_m2r (*(col+offset+6*8), mm3);	/* mm3 = x6 */
-    movq_r2r (mm0, mm6);		/* mm6 = v17 */
-
-    pmulhw_r2r (mm3, mm2);		/* mm2 = T2*x6 */
-    psubsw_r2r (mm5, mm0);		/* mm0 = b3 */
-
-    psubsw_r2r (mm3, mm4);		/* mm4 = v26 */
-    paddsw_r2r (mm6, mm5);		/* mm5 = v12 */
-
-    movq_r2m (mm0, *(col+offset+3*8));	/* save b3 in scratch0 */
-    movq_r2r (mm1, mm6);		/* mm6 = u17 */
-
-    paddsw_m2r (*(col+offset+2*8), mm2);/* mm2 = u26 */
-    paddsw_r2r (mm7, mm6);		/* mm6 = b0 */
-
-    psubsw_r2r (mm7, mm1);		/* mm1 = u12 */
-    movq_r2r (mm1, mm7);		/* mm7 = u12 */
-
-    movq_m2r (*(col+offset+0*8), mm3);	/* mm3 = x0 */
-    paddsw_r2r (mm5, mm1);		/* mm1 = u12+v12 */
-
-    movq_m2r (*_C4, mm0);		/* mm0 = C4/2 */
-    psubsw_r2r (mm5, mm7);		/* mm7 = u12-v12 */
-
-    movq_r2m (mm6, *(col+offset+5*8));	/* save b0 in scratch1 */
-    pmulhw_r2r (mm0, mm1);		/* mm1 = b1/2 */
-
-    movq_r2r (mm4, mm6);		/* mm6 = v26 */
-    pmulhw_r2r (mm0, mm7);		/* mm7 = b2/2 */
-
-    movq_m2r (*(col+offset+4*8), mm5);	/* mm5 = x4 */
-    movq_r2r (mm3, mm0);		/* mm0 = x0 */
-
-    psubsw_r2r (mm5, mm3);		/* mm3 = v04 */
-    paddsw_r2r (mm5, mm0);		/* mm0 = u04 */
-
-    paddsw_r2r (mm3, mm4);		/* mm4 = a1 */
-    movq_r2r (mm0, mm5);		/* mm5 = u04 */
-
-    psubsw_r2r (mm6, mm3);		/* mm3 = a2 */
-    paddsw_r2r (mm2, mm5);		/* mm5 = a0 */
-
-    paddsw_r2r (mm1, mm1);		/* mm1 = b1 */
-    psubsw_r2r (mm2, mm0);		/* mm0 = a3 */
-
-    paddsw_r2r (mm7, mm7);		/* mm7 = b2 */
-    movq_r2r (mm3, mm2);		/* mm2 = a2 */
-
-    movq_r2r (mm4, mm6);		/* mm6 = a1 */
-    paddsw_r2r (mm7, mm3);		/* mm3 = a2+b2 */
-
-    psraw_i2r (COL_SHIFT, mm3);		/* mm3 = y2 */
-    paddsw_r2r (mm1, mm4);		/* mm4 = a1+b1 */
-
-    psraw_i2r (COL_SHIFT, mm4);		/* mm4 = y1 */
-    psubsw_r2r (mm1, mm6);		/* mm6 = a1-b1 */
-
-    movq_m2r (*(col+offset+5*8), mm1);	/* mm1 = b0 */
-    psubsw_r2r (mm7, mm2);		/* mm2 = a2-b2 */
-
-    psraw_i2r (COL_SHIFT, mm6);		/* mm6 = y6 */
-    movq_r2r (mm5, mm7);		/* mm7 = a0 */
-
-    movq_r2m (mm4, *(col+offset+1*8));	/* save y1 */
-    psraw_i2r (COL_SHIFT, mm2);		/* mm2 = y5 */
-
-    movq_r2m (mm3, *(col+offset+2*8));	/* save y2 */
-    paddsw_r2r (mm1, mm5);		/* mm5 = a0+b0 */
-
-    movq_m2r (*(col+offset+3*8), mm4);	/* mm4 = b3 */
-    psubsw_r2r (mm1, mm7);		/* mm7 = a0-b0 */
-
-    psraw_i2r (COL_SHIFT, mm5);		/* mm5 = y0 */
-    movq_r2r (mm0, mm3);		/* mm3 = a3 */
-
-    movq_r2m (mm2, *(col+offset+5*8));	/* save y5 */
-    psubsw_r2r (mm4, mm3);		/* mm3 = a3-b3 */
-
-    psraw_i2r (COL_SHIFT, mm7);		/* mm7 = y7 */
-    paddsw_r2r (mm0, mm4);		/* mm4 = a3+b3 */
-
-    movq_r2m (mm5, *(col+offset+0*8));	/* save y0 */
-    psraw_i2r (COL_SHIFT, mm3);		/* mm3 = y4 */
-
-    movq_r2m (mm6, *(col+offset+6*8));	/* save y6 */
-    psraw_i2r (COL_SHIFT, mm4);		/* mm4 = y3 */
-
-    movq_r2m (mm7, *(col+offset+7*8));	/* save y7 */
-
-    movq_r2m (mm3, *(col+offset+4*8));	/* save y4 */
-
-    movq_r2m (mm4, *(col+offset+3*8));	/* save y3 */
-}
-
-
-static const int32_t rounder0[] ATTR_ALIGN(8) =
-    rounder ((1 << (COL_SHIFT - 1)) - 0.5);
-static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
-static const int32_t rounder1[] ATTR_ALIGN(8) =
-    rounder (1.25683487303);	/* C1*(C1/C4+C1+C7)/2 */
-static const int32_t rounder7[] ATTR_ALIGN(8) =
-    rounder (-0.25);		/* C1*(C7/C4+C7-C1)/2 */
-static const int32_t rounder2[] ATTR_ALIGN(8) =
-    rounder (0.60355339059);	/* C2 * (C6+C2)/2 */
-static const int32_t rounder6[] ATTR_ALIGN(8) =
-    rounder (-0.25);		/* C2 * (C6-C2)/2 */
-static const int32_t rounder3[] ATTR_ALIGN(8) =
-    rounder (0.087788325588);	/* C3*(-C3/C4+C3+C5)/2 */
-static const int32_t rounder5[] ATTR_ALIGN(8) =
-    rounder (-0.441341716183);	/* C3*(-C5/C4+C5-C3)/2 */
-
-
-#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid)	\
-static inline void idct (int16_t * const block)				\
-{									\
-    static const int16_t table04[] ATTR_ALIGN(16) =			\
-	table (22725, 21407, 19266, 16384, 12873,  8867, 4520);		\
-    static const int16_t table17[] ATTR_ALIGN(16) =			\
-	table (31521, 29692, 26722, 22725, 17855, 12299, 6270);		\
-    static const int16_t table26[] ATTR_ALIGN(16) =			\
-	table (29692, 27969, 25172, 21407, 16819, 11585, 5906);		\
-    static const int16_t table35[] ATTR_ALIGN(16) =			\
-	table (26722, 25172, 22654, 19266, 15137, 10426, 5315);		\
-									\
-    idct_row_head (block, 0*8, table04);				\
-    idct_row (table04, rounder0);					\
-    idct_row_mid (block, 0*8, 4*8, table04);				\
-    idct_row (table04, rounder4);					\
-    idct_row_mid (block, 4*8, 1*8, table17);				\
-    idct_row (table17, rounder1);					\
-    idct_row_mid (block, 1*8, 7*8, table17);				\
-    idct_row (table17, rounder7);					\
-    idct_row_mid (block, 7*8, 2*8, table26);				\
-    idct_row (table26, rounder2);					\
-    idct_row_mid (block, 2*8, 6*8, table26);				\
-    idct_row (table26, rounder6);					\
-    idct_row_mid (block, 6*8, 3*8, table35);				\
-    idct_row (table35, rounder3);					\
-    idct_row_mid (block, 3*8, 5*8, table35);				\
-    idct_row (table35, rounder5);					\
-    idct_row_tail (block, 5*8);						\
-									\
-    idct_col (block, 0);						\
-    idct_col (block, 4);						\
-}
-
-
-#define COPY_MMX(offset,r0,r1,r2)	\
-do {					\
-    movq_m2r (*(block+offset), r0);	\
-    dest += stride;			\
-    movq_m2r (*(block+offset+4), r1);	\
-    movq_r2m (r2, *dest);		\
-    packuswb_r2r (r1, r0);		\
-} while (0)
-
-static inline void block_copy (int16_t * const block, uint8_t * dest,
-			       const int stride)
-{
-    movq_m2r (*(block+0*8), mm0);
-    movq_m2r (*(block+0*8+4), mm1);
-    movq_m2r (*(block+1*8), mm2);
-    packuswb_r2r (mm1, mm0);
-    movq_m2r (*(block+1*8+4), mm3);
-    movq_r2m (mm0, *dest);
-    packuswb_r2r (mm3, mm2);
-    COPY_MMX (2*8, mm0, mm1, mm2);
-    COPY_MMX (3*8, mm2, mm3, mm0);
-    COPY_MMX (4*8, mm0, mm1, mm2);
-    COPY_MMX (5*8, mm2, mm3, mm0);
-    COPY_MMX (6*8, mm0, mm1, mm2);
-    COPY_MMX (7*8, mm2, mm3, mm0);
-    movq_r2m (mm2, *(dest+stride));
-}
-
-
-#define ADD_MMX(offset,r1,r2,r3,r4)	\
-do {					\
-    movq_m2r (*(dest+2*stride), r1);	\
-    packuswb_r2r (r4, r3);		\
-    movq_r2r (r1, r2);			\
-    dest += stride;			\
-    movq_r2m (r3, *dest);		\
-    punpcklbw_r2r (mm0, r1);		\
-    paddsw_m2r (*(block+offset), r1);	\
-    punpckhbw_r2r (mm0, r2);		\
-    paddsw_m2r (*(block+offset+4), r2);	\
-} while (0)
-
-static inline void block_add (int16_t * const block, uint8_t * dest,
-			      const int stride)
-{
-    movq_m2r (*dest, mm1);
-    pxor_r2r (mm0, mm0);
-    movq_m2r (*(dest+stride), mm3);
-    movq_r2r (mm1, mm2);
-    punpcklbw_r2r (mm0, mm1);
-    movq_r2r (mm3, mm4);
-    paddsw_m2r (*(block+0*8), mm1);
-    punpckhbw_r2r (mm0, mm2);
-    paddsw_m2r (*(block+0*8+4), mm2);
-    punpcklbw_r2r (mm0, mm3);
-    paddsw_m2r (*(block+1*8), mm3);
-    packuswb_r2r (mm2, mm1);
-    punpckhbw_r2r (mm0, mm4);
-    movq_r2m (mm1, *dest);
-    paddsw_m2r (*(block+1*8+4), mm4);
-    ADD_MMX (2*8, mm1, mm2, mm3, mm4);
-    ADD_MMX (3*8, mm3, mm4, mm1, mm2);
-    ADD_MMX (4*8, mm1, mm2, mm3, mm4);
-    ADD_MMX (5*8, mm3, mm4, mm1, mm2);
-    ADD_MMX (6*8, mm1, mm2, mm3, mm4);
-    ADD_MMX (7*8, mm3, mm4, mm1, mm2);
-    packuswb_r2r (mm4, mm3);
-    movq_r2m (mm3, *(dest+stride));
-}
-
-
-static inline void block_zero (int16_t * const block)
-{
-    pxor_r2r (mm0, mm0);
-    movq_r2m (mm0, *(block+0*4));
-    movq_r2m (mm0, *(block+1*4));
-    movq_r2m (mm0, *(block+2*4));
-    movq_r2m (mm0, *(block+3*4));
-    movq_r2m (mm0, *(block+4*4));
-    movq_r2m (mm0, *(block+5*4));
-    movq_r2m (mm0, *(block+6*4));
-    movq_r2m (mm0, *(block+7*4));
-    movq_r2m (mm0, *(block+8*4));
-    movq_r2m (mm0, *(block+9*4));
-    movq_r2m (mm0, *(block+10*4));
-    movq_r2m (mm0, *(block+11*4));
-    movq_r2m (mm0, *(block+12*4));
-    movq_r2m (mm0, *(block+13*4));
-    movq_r2m (mm0, *(block+14*4));
-    movq_r2m (mm0, *(block+15*4));
-}
-
-
-#define CPU_MMXEXT 0
-#define CPU_MMX 1
-
-#define dup4(reg)			\
-do {					\
-    if (cpu != CPU_MMXEXT) {		\
-	punpcklwd_r2r (reg, reg);	\
-	punpckldq_r2r (reg, reg);	\
-    } else				\
-	pshufw_r2r (reg, reg, 0x00);	\
-} while (0)
-
-static inline void block_add_DC (int16_t * const block, uint8_t * dest,
-				 const int stride, const int cpu)
-{
-    movd_v2r ((block[0] + 64) >> 7, mm0);
-    pxor_r2r (mm1, mm1);
-    movq_m2r (*dest, mm2);
-    dup4 (mm0);
-    psubsw_r2r (mm0, mm1);
-    packuswb_r2r (mm0, mm0);
-    paddusb_r2r (mm0, mm2);
-    packuswb_r2r (mm1, mm1);
-    movq_m2r (*(dest + stride), mm3);
-    psubusb_r2r (mm1, mm2);
-    block[0] = 0;
-    paddusb_r2r (mm0, mm3);
-    movq_r2m (mm2, *dest);
-    psubusb_r2r (mm1, mm3);
-    movq_m2r (*(dest + 2*stride), mm2);
-    dest += stride;
-    movq_r2m (mm3, *dest);
-    paddusb_r2r (mm0, mm2);
-    movq_m2r (*(dest + 2*stride), mm3);
-    psubusb_r2r (mm1, mm2);
-    dest += stride;
-    paddusb_r2r (mm0, mm3);
-    movq_r2m (mm2, *dest);
-    psubusb_r2r (mm1, mm3);
-    movq_m2r (*(dest + 2*stride), mm2);
-    dest += stride;
-    movq_r2m (mm3, *dest);
-    paddusb_r2r (mm0, mm2);
-    movq_m2r (*(dest + 2*stride), mm3);
-    psubusb_r2r (mm1, mm2);
-    dest += stride;
-    paddusb_r2r (mm0, mm3);
-    movq_r2m (mm2, *dest);
-    psubusb_r2r (mm1, mm3);
-    movq_m2r (*(dest + 2*stride), mm2);
-    dest += stride;
-    movq_r2m (mm3, *dest);
-    paddusb_r2r (mm0, mm2);
-    movq_m2r (*(dest + 2*stride), mm3);
-    psubusb_r2r (mm1, mm2);
-    block[63] = 0;
-    paddusb_r2r (mm0, mm3);
-    movq_r2m (mm2, *(dest + stride));
-    psubusb_r2r (mm1, mm3);
-    movq_r2m (mm3, *(dest + 2*stride));
-}
-
-
-declare_idct (mmxext_idct, mmxext_table,
-	      mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
-
-void mpeg2_idct_copy_mmxext (int16_t * const block, uint8_t * const dest,
-			     const int stride)
-{
-    mmxext_idct (block);
-    block_copy (block, dest, stride);
-    block_zero (block);
-}
-
-void mpeg2_idct_add_mmxext (const int last, int16_t * const block,
-			    uint8_t * const dest, const int stride)
-{
-    if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
-	mmxext_idct (block);
-	block_add (block, dest, stride);
-	block_zero (block);
-    } else
-	block_add_DC (block, dest, stride, CPU_MMXEXT);
-}
-
-
-declare_idct (mmx_idct, mmx_table,
-	      mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
-
-void mpeg2_idct_copy_mmx (int16_t * const block, uint8_t * const dest,
-			  const int stride)
-{
-    mmx_idct (block);
-    block_copy (block, dest, stride);
-    block_zero (block);
-}
-
-void mpeg2_idct_add_mmx (const int last, int16_t * const block,
-			 uint8_t * const dest, const int stride)
-{
-    if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
-	mmx_idct (block);
-	block_add (block, dest, stride);
-	block_zero (block);
-    } else
-	block_add_DC (block, dest, stride, CPU_MMX);
-}
-
-
-void mpeg2_idct_mmx_init (void)
-{
-    extern uint8_t mpeg2_scan_norm[64];
-    extern uint8_t mpeg2_scan_alt[64];
-    int i, j;
-
-    /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
-
-    for (i = 0; i < 64; i++) {
-	j = mpeg2_scan_norm[i];
-	mpeg2_scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
-	j = mpeg2_scan_alt[i];
-	mpeg2_scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
-    }
-}
-
-#endif
diff --git a/src/libmpeg2new/libmpeg2/libmpeg2.pc.in b/src/libmpeg2new/libmpeg2/libmpeg2.pc.in
deleted file mode 100644
index d54500b0e..000000000
--- a/src/libmpeg2new/libmpeg2/libmpeg2.pc.in
+++ /dev/null
@@ -1,10 +0,0 @@
-prefix=@prefix@
-exec_prefix=@exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-
-Name: libmpeg2
-Description: A decoding library for MPEG-1 and MPEG-2 streams.
-Version: @VERSION@
-Libs: -L${libdir} -lmpeg2
-Cflags: -I${includedir}/@PACKAGE@
diff --git a/src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in b/src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in
deleted file mode 100644
index 42383a6e2..000000000
--- a/src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in
+++ /dev/null
@@ -1,10 +0,0 @@
-prefix=@prefix@
-exec_prefix=@exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-
-Name: libmpeg2convert
-Description: libmpeg2 helper functions for converting to various formats.
-Version: @VERSION@
-Libs: -L${libdir} -lmpeg2convert
-Cflags: -I${includedir}/@PACKAGE@
diff --git a/src/libmpeg2new/libmpeg2/motion_comp.c b/src/libmpeg2new/libmpeg2/motion_comp.c
deleted file mode 100644
index d5a265d5c..000000000
--- a/src/libmpeg2new/libmpeg2/motion_comp.c
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * motion_comp.c
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#include <inttypes.h>
-
-#include "../include/mpeg2.h"
-#include "../include/attributes.h"
-#include "mpeg2_internal.h"
-
-mpeg2_mc_t mpeg2_mc;
-
-void mpeg2_mc_init (uint32_t accel)
-{
-#ifdef ARCH_X86
-    if (accel & MPEG2_ACCEL_X86_MMXEXT)
-	mpeg2_mc = mpeg2_mc_mmxext;
-    else if (accel & MPEG2_ACCEL_X86_3DNOW)
-	mpeg2_mc = mpeg2_mc_3dnow;
-    else if (accel & MPEG2_ACCEL_X86_MMX)
-	mpeg2_mc = mpeg2_mc_mmx;
-    else
-#endif
-#ifdef ARCH_PPC
-    if (accel & MPEG2_ACCEL_PPC_ALTIVEC)
-	mpeg2_mc = mpeg2_mc_altivec;
-    else
-#endif
-#ifdef ARCH_ALPHA
-    if (accel & MPEG2_ACCEL_ALPHA)
-	mpeg2_mc = mpeg2_mc_alpha;
-    else
-#endif
-#ifdef ARCH_SPARC
-    if (accel & MPEG2_ACCEL_SPARC_VIS)
-	mpeg2_mc = mpeg2_mc_vis;
-    else
-#endif
-	mpeg2_mc = mpeg2_mc_c;
-}
-
-#define avg2(a,b) ((a+b+1)>>1)
-#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
-
-#define predict_o(i) (ref[i])
-#define predict_x(i) (avg2 (ref[i], ref[i+1]))
-#define predict_y(i) (avg2 (ref[i], (ref+stride)[i]))
-#define predict_xy(i) (avg4 (ref[i], ref[i+1], \
-			     (ref+stride)[i], (ref+stride)[i+1]))
-
-#define put(predictor,i) dest[i] = predictor (i)
-#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i])
-
-/* mc function template */
-
-#define MC_FUNC(op,xy)							\
-static void MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref,	\
-				   const int stride, int height)	\
-{									\
-    do {								\
-	op (predict_##xy, 0);						\
-	op (predict_##xy, 1);						\
-	op (predict_##xy, 2);						\
-	op (predict_##xy, 3);						\
-	op (predict_##xy, 4);						\
-	op (predict_##xy, 5);						\
-	op (predict_##xy, 6);						\
-	op (predict_##xy, 7);						\
-	op (predict_##xy, 8);						\
-	op (predict_##xy, 9);						\
-	op (predict_##xy, 10);						\
-	op (predict_##xy, 11);						\
-	op (predict_##xy, 12);						\
-	op (predict_##xy, 13);						\
-	op (predict_##xy, 14);						\
-	op (predict_##xy, 15);						\
-	ref += stride;							\
-	dest += stride;							\
-    } while (--height);							\
-}									\
-static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref,	\
-				  const int stride, int height)		\
-{									\
-    do {								\
-	op (predict_##xy, 0);						\
-	op (predict_##xy, 1);						\
-	op (predict_##xy, 2);						\
-	op (predict_##xy, 3);						\
-	op (predict_##xy, 4);						\
-	op (predict_##xy, 5);						\
-	op (predict_##xy, 6);						\
-	op (predict_##xy, 7);						\
-	ref += stride;							\
-	dest += stride;							\
-    } while (--height);							\
-}
-
-/* definitions of the actual mc functions */
-
-MC_FUNC (put,o)
-MC_FUNC (avg,o)
-MC_FUNC (put,x)
-MC_FUNC (avg,x)
-MC_FUNC (put,y)
-MC_FUNC (avg,y)
-MC_FUNC (put,xy)
-MC_FUNC (avg,xy)
-
-MPEG2_MC_EXTERN (c)
diff --git a/src/libmpeg2new/libmpeg2/motion_comp_alpha.c b/src/libmpeg2new/libmpeg2/motion_comp_alpha.c
deleted file mode 100644
index 1b3712a1a..000000000
--- a/src/libmpeg2new/libmpeg2/motion_comp_alpha.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * motion_comp_alpha.c
- * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#ifdef ARCH_ALPHA
-
-#include <inttypes.h>
-
-#include "mpeg2.h"
-#include <xine/attributes.h>
-#include "mpeg2_internal.h"
-#include "alpha_asm.h"
-
-static inline uint64_t avg2 (uint64_t a, uint64_t b)
-{
-    return (a | b) - (((a ^ b) & BYTE_VEC (0xfe)) >> 1);
-}
-
-// Load two unaligned quadwords from addr. This macro only works if
-// addr is actually unaligned.
-#define ULOAD16(ret_l,ret_r,addr)			\
-    do {						\
-	uint64_t _l = ldq_u (addr +  0);		\
-	uint64_t _m = ldq_u (addr +  8);		\
-	uint64_t _r = ldq_u (addr + 16);		\
-	ret_l = extql (_l, addr) | extqh (_m, addr);	\
-	ret_r = extql (_m, addr) | extqh (_r, addr);	\
-    } while (0)
-
-// Load two aligned quadwords from addr.
-#define ALOAD16(ret_l,ret_r,addr)			\
-    do {						\
-	ret_l = ldq (addr);				\
-	ret_r = ldq (addr + 8);				\
-    } while (0)
-
-#define OP8(LOAD,LOAD16,STORE)			\
-    do {					\
-	STORE (LOAD (pixels), block);		\
-	pixels += line_size;			\
-	block += line_size;			\
-    } while (--h)
-
-#define OP16(LOAD,LOAD16,STORE)			\
-    do {					\
-	uint64_t l, r;				\
-	LOAD16 (l, r, pixels);			\
-	STORE (l, block);			\
-	STORE (r, block + 8);			\
-	pixels += line_size;			\
-	block += line_size;			\
-    } while (--h)
-
-#define OP8_X2(LOAD,LOAD16,STORE)			\
-    do {						\
-	uint64_t p0, p1;				\
-							\
-	p0 = LOAD (pixels);				\
-	p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56);	\
-	STORE (avg2 (p0, p1), block);			\
-	pixels += line_size;				\
-	block += line_size;				\
-    } while (--h)
-
-#define OP16_X2(LOAD,LOAD16,STORE)				\
-    do {							\
-	uint64_t p0, p1;					\
-								\
-	LOAD16 (p0, p1, pixels);				\
-	STORE (avg2(p0, p0 >> 8 | p1 << 56), block);		\
-	STORE (avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56),	\
-	       block + 8);					\
-	pixels += line_size;					\
-	block += line_size;					\
-    } while (--h)
-
-#define OP8_Y2(LOAD,LOAD16,STORE)		\
-    do {					\
-	uint64_t p0, p1;			\
-	p0 = LOAD (pixels);			\
-	pixels += line_size;			\
-	p1 = LOAD (pixels);			\
-	do {					\
-	    uint64_t av = avg2 (p0, p1);	\
-	    if (--h == 0) line_size = 0;	\
-	    pixels += line_size;		\
-	    p0 = p1;				\
-	    p1 = LOAD (pixels);			\
-	    STORE (av, block);			\
-	    block += line_size;			\
-	} while (h);				\
-    } while (0)
-
-#define OP16_Y2(LOAD,LOAD16,STORE)		\
-    do {					\
-	uint64_t p0l, p0r, p1l, p1r;		\
-	LOAD16 (p0l, p0r, pixels);		\
-	pixels += line_size;			\
-	LOAD16 (p1l, p1r, pixels);		\
-	do {					\
-	    uint64_t avl, avr;			\
-	    if (--h == 0) line_size = 0;	\
-	    avl = avg2 (p0l, p1l);		\
-	    avr = avg2 (p0r, p1r);		\
-	    p0l = p1l;				\
-	    p0r = p1r;				\
-	    pixels += line_size;		\
-	    LOAD16 (p1l, p1r, pixels);		\
-	    STORE (avl, block);			\
-	    STORE (avr, block + 8);		\
-	    block += line_size;			\
-	} while (h);				\
-    } while (0)
-
-#define OP8_XY2(LOAD,LOAD16,STORE)				\
-    do {							\
-	uint64_t pl, ph;					\
-	uint64_t p1 = LOAD (pixels);				\
-	uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56);	\
-								\
-	ph = (((p1 & ~BYTE_VEC (0x03)) >> 2) +			\
-	      ((p2 & ~BYTE_VEC (0x03)) >> 2));			\
-	pl = ((p1 & BYTE_VEC (0x03)) +				\
-	      (p2 & BYTE_VEC (0x03)));				\
-								\
-	do {							\
-	    uint64_t npl, nph;					\
-								\
-	    pixels += line_size;				\
-	    p1 = LOAD (pixels);					\
-	    p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56);	\
-	    nph = (((p1 & ~BYTE_VEC (0x03)) >> 2) +		\
-	           ((p2 & ~BYTE_VEC (0x03)) >> 2));		\
-	    npl = ((p1 & BYTE_VEC (0x03)) +			\
-	           (p2 & BYTE_VEC (0x03)));			\
-								\
-	    STORE (ph + nph +					\
-		   (((pl + npl + BYTE_VEC (0x02)) >> 2) &	\
-		    BYTE_VEC (0x03)), block);			\
-								\
-	    block += line_size;					\
-            pl = npl;						\
-	    ph = nph;						\
-	} while (--h);						\
-    } while (0)
-
-#define OP16_XY2(LOAD,LOAD16,STORE)				\
-    do {							\
-	uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r;	\
-	LOAD16 (p0, p2, pixels);				\
-	p1 = p0 >> 8 | (p2 << 56);				\
-	p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56);		\
-								\
-	ph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) +		\
-	        ((p1 & ~BYTE_VEC (0x03)) >> 2));		\
-	pl_l = ((p0 & BYTE_VEC (0x03)) +			\
-	        (p1 & BYTE_VEC(0x03)));				\
-	ph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) +		\
-	        ((p3 & ~BYTE_VEC (0x03)) >> 2));		\
-	pl_r = ((p2 & BYTE_VEC (0x03)) +			\
-	        (p3 & BYTE_VEC (0x03)));			\
-								\
-	do {							\
-	    uint64_t npl_l, nph_l, npl_r, nph_r;		\
-								\
-	    pixels += line_size;				\
-	    LOAD16 (p0, p2, pixels);				\
-	    p1 = p0 >> 8 | (p2 << 56);				\
-	    p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56);	\
-	    nph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) +		\
-		     ((p1 & ~BYTE_VEC (0x03)) >> 2));		\
-	    npl_l = ((p0 & BYTE_VEC (0x03)) +			\
-		     (p1 & BYTE_VEC (0x03)));			\
-	    nph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) +		\
-		     ((p3 & ~BYTE_VEC (0x03)) >> 2));		\
-	    npl_r = ((p2 & BYTE_VEC (0x03)) +			\
-		     (p3 & BYTE_VEC (0x03)));			\
-								\
-	    STORE (ph_l + nph_l +				\
-		   (((pl_l + npl_l + BYTE_VEC (0x02)) >> 2) &	\
-		    BYTE_VEC(0x03)), block);			\
-	    STORE (ph_r + nph_r +				\
-		   (((pl_r + npl_r + BYTE_VEC (0x02)) >> 2) &	\
-		    BYTE_VEC(0x03)), block + 8);		\
-								\
-	    block += line_size;					\
-	    pl_l = npl_l;					\
-	    ph_l = nph_l;					\
-	    pl_r = npl_r;					\
-	    ph_r = nph_r;					\
-	} while (--h);						\
-    } while (0)
-
-#define MAKE_OP(OPNAME,SIZE,SUFF,OPKIND,STORE)				\
-static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha		\
-	(uint8_t *restrict block, const uint8_t *restrict pixels,	\
-	 int line_size, int h)						\
-{									\
-    if ((uint64_t) pixels & 0x7) {					\
-	OPKIND (uldq, ULOAD16, STORE);					\
-    } else {								\
-	OPKIND (ldq, ALOAD16, STORE);					\
-    }									\
-}
-
-#define PIXOP(OPNAME,STORE)			\
-    MAKE_OP (OPNAME, 8,  o,  OP8,      STORE);	\
-    MAKE_OP (OPNAME, 8,  x,  OP8_X2,   STORE);	\
-    MAKE_OP (OPNAME, 8,  y,  OP8_Y2,   STORE);	\
-    MAKE_OP (OPNAME, 8,  xy, OP8_XY2,  STORE);	\
-    MAKE_OP (OPNAME, 16, o,  OP16,     STORE);	\
-    MAKE_OP (OPNAME, 16, x,  OP16_X2,  STORE);	\
-    MAKE_OP (OPNAME, 16, y,  OP16_Y2,  STORE);	\
-    MAKE_OP (OPNAME, 16, xy, OP16_XY2, STORE);
-
-#define STORE(l,b) stq (l, b)
-PIXOP (put, STORE);
-#undef STORE
-#define STORE(l,b) stq (avg2 (l, ldq (b)), b);
-PIXOP (avg, STORE);
-
-mpeg2_mc_t mpeg2_mc_alpha = {
-    { MC_put_o_16_alpha, MC_put_x_16_alpha,
-      MC_put_y_16_alpha, MC_put_xy_16_alpha,
-      MC_put_o_8_alpha, MC_put_x_8_alpha,
-      MC_put_y_8_alpha, MC_put_xy_8_alpha },
-    { MC_avg_o_16_alpha, MC_avg_x_16_alpha,
-      MC_avg_y_16_alpha, MC_avg_xy_16_alpha,
-      MC_avg_o_8_alpha, MC_avg_x_8_alpha,
-      MC_avg_y_8_alpha, MC_avg_xy_8_alpha }
-};
-
-#endif
diff --git a/src/libmpeg2new/libmpeg2/motion_comp_altivec.c b/src/libmpeg2new/libmpeg2/motion_comp_altivec.c
deleted file mode 100644
index ee740e14e..000000000
--- a/src/libmpeg2new/libmpeg2/motion_comp_altivec.c
+++ /dev/null
@@ -1,1010 +0,0 @@
-/*
- * motion_comp_altivec.c
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#ifdef ARCH_PPC
-
-#ifdef HAVE_ALTIVEC_H
-#include <altivec.h>
-#endif
-#include <inttypes.h>
-
-#include "mpeg2.h"
-#include <xine/attributes.h>
-#include "mpeg2_internal.h"
-
-typedef vector signed char vector_s8_t;
-typedef vector unsigned char vector_u8_t;
-typedef vector signed short vector_s16_t;
-typedef vector unsigned short vector_u16_t;
-typedef vector signed int vector_s32_t;
-typedef vector unsigned int vector_u32_t;
-
-#ifndef COFFEE_BREAK	/* Workarounds for gcc suckage */
-
-static inline vector_u8_t my_vec_ld (int const A, const uint8_t * const B)
-{
-    return vec_ld (A, (uint8_t *)B);
-}
-#undef vec_ld
-#define vec_ld my_vec_ld
-
-static inline vector_u8_t my_vec_and (vector_u8_t const A, vector_u8_t const B)
-{
-    return vec_and (A, B);
-}
-#undef vec_and
-#define vec_and my_vec_and
-
-static inline vector_u8_t my_vec_avg (vector_u8_t const A, vector_u8_t const B)
-{
-    return vec_avg (A, B);
-}
-#undef vec_avg
-#define vec_avg my_vec_avg
-
-#endif
-
-static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref,
-				 const int stride, int height)
-{
-    vector_u8_t perm, ref0, ref1, tmp;
-
-    perm = vec_lvsl (0, ref);
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (15, ref);
-    ref += stride;
-    tmp = vec_perm (ref0, ref1, perm);
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (15, ref);
-	ref += stride;
-	vec_st (tmp, 0, dest);
-	tmp = vec_perm (ref0, ref1, perm);
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (15, ref);
-	ref += stride;
-	vec_st (tmp, stride, dest);
-	dest += 2*stride;
-	tmp = vec_perm (ref0, ref1, perm);
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (15, ref);
-    vec_st (tmp, 0, dest);
-    tmp = vec_perm (ref0, ref1, perm);
-    vec_st (tmp, stride, dest);
-}
-
-static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref,
-				const int stride, int height)
-{
-    vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1;
-
-    tmp0 = vec_lvsl (0, ref);
-    tmp0 = vec_mergeh (tmp0, tmp0);
-    perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
-    tmp1 = vec_lvsl (stride, ref);
-    tmp1 = vec_mergeh (tmp1, tmp1);
-    perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (7, ref);
-    ref += stride;
-    tmp0 = vec_perm (ref0, ref1, perm0);
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (7, ref);
-	ref += stride;
-	vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
-	dest += stride;
-	tmp1 = vec_perm (ref0, ref1, perm1);
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (7, ref);
-	ref += stride;
-	vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
-	dest += stride;
-	tmp0 = vec_perm (ref0, ref1, perm0);
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (7, ref);
-    vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
-    dest += stride;
-    tmp1 = vec_perm (ref0, ref1, perm1);
-    vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
-}
-
-static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref,
-				 const int stride, int height)
-{
-    vector_u8_t permA, permB, ref0, ref1, tmp;
-
-    permA = vec_lvsl (0, ref);
-    permB = vec_add (permA, vec_splat_u8 (1));
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (16, ref);
-    ref += stride;
-    tmp = vec_avg (vec_perm (ref0, ref1, permA),
-		   vec_perm (ref0, ref1, permB));
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (16, ref);
-	ref += stride;
-	vec_st (tmp, 0, dest);
-	tmp = vec_avg (vec_perm (ref0, ref1, permA),
-		       vec_perm (ref0, ref1, permB));
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (16, ref);
-	ref += stride;
-	vec_st (tmp, stride, dest);
-	dest += 2*stride;
-	tmp = vec_avg (vec_perm (ref0, ref1, permA),
-		       vec_perm (ref0, ref1, permB));
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (16, ref);
-    vec_st (tmp, 0, dest);
-    tmp = vec_avg (vec_perm (ref0, ref1, permA),
-		   vec_perm (ref0, ref1, permB));
-    vec_st (tmp, stride, dest);
-}
-
-static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref,
-				const int stride, int height)
-{
-    vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
-
-    ones = vec_splat_u8 (1);
-    tmp0 = vec_lvsl (0, ref);
-    tmp0 = vec_mergeh (tmp0, tmp0);
-    perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
-    perm0B = vec_add (perm0A, ones);
-    tmp1 = vec_lvsl (stride, ref);
-    tmp1 = vec_mergeh (tmp1, tmp1);
-    perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
-    perm1B = vec_add (perm1A, ones);
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (8, ref);
-    ref += stride;
-    tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
-		    vec_perm (ref0, ref1, perm0B));
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (8, ref);
-	ref += stride;
-	vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
-	dest += stride;
-	tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
-			vec_perm (ref0, ref1, perm1B));
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (8, ref);
-	ref += stride;
-	vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
-	dest += stride;
-	tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
-			vec_perm (ref0, ref1, perm0B));
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (8, ref);
-    vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
-    dest += stride;
-    tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
-		    vec_perm (ref0, ref1, perm1B));
-    vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
-}
-
-static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref,
-				 const int stride, int height)
-{
-    vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp;
-
-    perm = vec_lvsl (0, ref);
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (15, ref);
-    ref += stride;
-    tmp0 = vec_perm (ref0, ref1, perm);
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (15, ref);
-    ref += stride;
-    tmp1 = vec_perm (ref0, ref1, perm);
-    tmp = vec_avg (tmp0, tmp1);
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (15, ref);
-	ref += stride;
-	vec_st (tmp, 0, dest);
-	tmp0 = vec_perm (ref0, ref1, perm);
-	tmp = vec_avg (tmp0, tmp1);
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (15, ref);
-	ref += stride;
-	vec_st (tmp, stride, dest);
-	dest += 2*stride;
-	tmp1 = vec_perm (ref0, ref1, perm);
-	tmp = vec_avg (tmp0, tmp1);
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (15, ref);
-    vec_st (tmp, 0, dest);
-    tmp0 = vec_perm (ref0, ref1, perm);
-    tmp = vec_avg (tmp0, tmp1);
-    vec_st (tmp, stride, dest);
-}
-
-static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref,
-				const int stride, int height)
-{
-    vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1;
-
-    tmp0 = vec_lvsl (0, ref);
-    tmp0 = vec_mergeh (tmp0, tmp0);
-    perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
-    tmp1 = vec_lvsl (stride, ref);
-    tmp1 = vec_mergeh (tmp1, tmp1);
-    perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (7, ref);
-    ref += stride;
-    tmp0 = vec_perm (ref0, ref1, perm0);
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (7, ref);
-    ref += stride;
-    tmp1 = vec_perm (ref0, ref1, perm1);
-    tmp = vec_avg (tmp0, tmp1);
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (7, ref);
-	ref += stride;
-	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-	dest += stride;
-	tmp0 = vec_perm (ref0, ref1, perm0);
-	tmp = vec_avg (tmp0, tmp1);
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (7, ref);
-	ref += stride;
-	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-	dest += stride;
-	tmp1 = vec_perm (ref0, ref1, perm1);
-	tmp = vec_avg (tmp0, tmp1);
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (7, ref);
-    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-    dest += stride;
-    tmp0 = vec_perm (ref0, ref1, perm0);
-    tmp = vec_avg (tmp0, tmp1);
-    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-}
-
-static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
-				  const int stride, int height)
-{
-    vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
-    vector_u8_t ones;
-
-    ones = vec_splat_u8 (1);
-    permA = vec_lvsl (0, ref);
-    permB = vec_add (permA, ones);
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (16, ref);
-    ref += stride;
-    A = vec_perm (ref0, ref1, permA);
-    B = vec_perm (ref0, ref1, permB);
-    avg0 = vec_avg (A, B);
-    xor0 = vec_xor (A, B);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (16, ref);
-    ref += stride;
-    A = vec_perm (ref0, ref1, permA);
-    B = vec_perm (ref0, ref1, permB);
-    avg1 = vec_avg (A, B);
-    xor1 = vec_xor (A, B);
-    tmp = vec_sub (vec_avg (avg0, avg1),
-		   vec_and (vec_and (ones, vec_or (xor0, xor1)),
-			    vec_xor (avg0, avg1)));
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (16, ref);
-	ref += stride;
-	vec_st (tmp, 0, dest);
-	A = vec_perm (ref0, ref1, permA);
-	B = vec_perm (ref0, ref1, permB);
-	avg0 = vec_avg (A, B);
-	xor0 = vec_xor (A, B);
-	tmp = vec_sub (vec_avg (avg0, avg1),
-		       vec_and (vec_and (ones, vec_or (xor0, xor1)),
-				vec_xor (avg0, avg1)));
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (16, ref);
-	ref += stride;
-	vec_st (tmp, stride, dest);
-	dest += 2*stride;
-	A = vec_perm (ref0, ref1, permA);
-	B = vec_perm (ref0, ref1, permB);
-	avg1 = vec_avg (A, B);
-	xor1 = vec_xor (A, B);
-	tmp = vec_sub (vec_avg (avg0, avg1),
-		       vec_and (vec_and (ones, vec_or (xor0, xor1)),
-				vec_xor (avg0, avg1)));
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (16, ref);
-    vec_st (tmp, 0, dest);
-    A = vec_perm (ref0, ref1, permA);
-    B = vec_perm (ref0, ref1, permB);
-    avg0 = vec_avg (A, B);
-    xor0 = vec_xor (A, B);
-    tmp = vec_sub (vec_avg (avg0, avg1),
-		   vec_and (vec_and (ones, vec_or (xor0, xor1)),
-			    vec_xor (avg0, avg1)));
-    vec_st (tmp, stride, dest);
-}
-
-static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
-				 const int stride, int height)
-{
-    vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
-    vector_u8_t avg0, avg1, xor0, xor1, tmp, ones;
-
-    ones = vec_splat_u8 (1);
-    perm0A = vec_lvsl (0, ref);
-    perm0A = vec_mergeh (perm0A, perm0A);
-    perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
-    perm0B = vec_add (perm0A, ones);
-    perm1A = vec_lvsl (stride, ref);
-    perm1A = vec_mergeh (perm1A, perm1A);
-    perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
-    perm1B = vec_add (perm1A, ones);
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (8, ref);
-    ref += stride;
-    A = vec_perm (ref0, ref1, perm0A);
-    B = vec_perm (ref0, ref1, perm0B);
-    avg0 = vec_avg (A, B);
-    xor0 = vec_xor (A, B);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (8, ref);
-    ref += stride;
-    A = vec_perm (ref0, ref1, perm1A);
-    B = vec_perm (ref0, ref1, perm1B);
-    avg1 = vec_avg (A, B);
-    xor1 = vec_xor (A, B);
-    tmp = vec_sub (vec_avg (avg0, avg1),
-		   vec_and (vec_and (ones, vec_or (xor0, xor1)),
-			    vec_xor (avg0, avg1)));
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (8, ref);
-	ref += stride;
-	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-	dest += stride;
-	A = vec_perm (ref0, ref1, perm0A);
-	B = vec_perm (ref0, ref1, perm0B);
-	avg0 = vec_avg (A, B);
-	xor0 = vec_xor (A, B);
-	tmp = vec_sub (vec_avg (avg0, avg1),
-		       vec_and (vec_and (ones, vec_or (xor0, xor1)),
-				vec_xor (avg0, avg1)));
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (8, ref);
-	ref += stride;
-	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-	dest += stride;
-	A = vec_perm (ref0, ref1, perm1A);
-	B = vec_perm (ref0, ref1, perm1B);
-	avg1 = vec_avg (A, B);
-	xor1 = vec_xor (A, B);
-	tmp = vec_sub (vec_avg (avg0, avg1),
-		       vec_and (vec_and (ones, vec_or (xor0, xor1)),
-				vec_xor (avg0, avg1)));
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (8, ref);
-    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-    dest += stride;
-    A = vec_perm (ref0, ref1, perm0A);
-    B = vec_perm (ref0, ref1, perm0B);
-    avg0 = vec_avg (A, B);
-    xor0 = vec_xor (A, B);
-    tmp = vec_sub (vec_avg (avg0, avg1),
-		   vec_and (vec_and (ones, vec_or (xor0, xor1)),
-			    vec_xor (avg0, avg1)));
-    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-}
-
-#if 0
-static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
-				 const int stride, int height)
-{
-    vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones;
-    vector_u16_t splat2, temp;
-
-    ones = vec_splat_u8 (1);
-    permA = vec_lvsl (0, ref);
-    permB = vec_add (permA, ones);
-
-    zero = vec_splat_u8 (0);
-    splat2 = vec_splat_u16 (2);
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (8, ref);
-	ref += stride;
-	A = vec_perm (ref0, ref1, permA);
-	B = vec_perm (ref0, ref1, permB);
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (8, ref);
-	C = vec_perm (ref0, ref1, permA);
-	D = vec_perm (ref0, ref1, permB);
-
-	temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A),
-				(vector_u16_t)vec_mergeh (zero, B)),
-		       vec_add ((vector_u16_t)vec_mergeh (zero, C),
-				(vector_u16_t)vec_mergeh (zero, D)));
-	temp = vec_sr (vec_add (temp, splat2), splat2);
-	tmp = vec_pack (temp, temp);
-
-	vec_st (tmp, 0, dest);
-	dest += stride;
-	tmp = vec_avg (vec_perm (ref0, ref1, permA),
-		       vec_perm (ref0, ref1, permB));
-    } while (--height);
-}
-#endif
-
-static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref,
-				 const int stride, int height)
-{
-    vector_u8_t perm, ref0, ref1, tmp, prev;
-
-    perm = vec_lvsl (0, ref);
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (15, ref);
-    ref += stride;
-    prev = vec_ld (0, dest);
-    tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (15, ref);
-	ref += stride;
-	prev = vec_ld (stride, dest);
-	vec_st (tmp, 0, dest);
-	tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (15, ref);
-	ref += stride;
-	prev = vec_ld (2*stride, dest);
-	vec_st (tmp, stride, dest);
-	dest += 2*stride;
-	tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (15, ref);
-    prev = vec_ld (stride, dest);
-    vec_st (tmp, 0, dest);
-    tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
-    vec_st (tmp, stride, dest);
-}
-
-static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref,
-				const int stride, int height)
-{
-    vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev;
-
-    tmp0 = vec_lvsl (0, ref);
-    tmp0 = vec_mergeh (tmp0, tmp0);
-    perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
-    tmp1 = vec_lvsl (stride, ref);
-    tmp1 = vec_mergeh (tmp1, tmp1);
-    perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (7, ref);
-    ref += stride;
-    prev = vec_ld (0, dest);
-    tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (7, ref);
-	ref += stride;
-	prev = vec_ld (stride, dest);
-	vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
-	dest += stride;
-	tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (7, ref);
-	ref += stride;
-	prev = vec_ld (stride, dest);
-	vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
-	dest += stride;
-	tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (7, ref);
-    prev = vec_ld (stride, dest);
-    vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
-    dest += stride;
-    tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
-    vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
-}
-
-static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref,
-				 const int stride, int height)
-{
-    vector_u8_t permA, permB, ref0, ref1, tmp, prev;
-
-    permA = vec_lvsl (0, ref);
-    permB = vec_add (permA, vec_splat_u8 (1));
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (16, ref);
-    prev = vec_ld (0, dest);
-    ref += stride;
-    tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
-				  vec_perm (ref0, ref1, permB)));
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (16, ref);
-	ref += stride;
-	prev = vec_ld (stride, dest);
-	vec_st (tmp, 0, dest);
-	tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
-				      vec_perm (ref0, ref1, permB)));
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (16, ref);
-	ref += stride;
-	prev = vec_ld (2*stride, dest);
-	vec_st (tmp, stride, dest);
-	dest += 2*stride;
-	tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
-				      vec_perm (ref0, ref1, permB)));
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (16, ref);
-    prev = vec_ld (stride, dest);
-    vec_st (tmp, 0, dest);
-    tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
-				  vec_perm (ref0, ref1, permB)));
-    vec_st (tmp, stride, dest);
-}
-
-static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref,
-				const int stride, int height)
-{
-    vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
-    vector_u8_t prev;
-
-    ones = vec_splat_u8 (1);
-    tmp0 = vec_lvsl (0, ref);
-    tmp0 = vec_mergeh (tmp0, tmp0);
-    perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
-    perm0B = vec_add (perm0A, ones);
-    tmp1 = vec_lvsl (stride, ref);
-    tmp1 = vec_mergeh (tmp1, tmp1);
-    perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
-    perm1B = vec_add (perm1A, ones);
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (8, ref);
-    prev = vec_ld (0, dest);
-    ref += stride;
-    tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
-				   vec_perm (ref0, ref1, perm0B)));
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (8, ref);
-	ref += stride;
-	prev = vec_ld (stride, dest);
-	vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
-	dest += stride;
-	tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
-				       vec_perm (ref0, ref1, perm1B)));
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (8, ref);
-	ref += stride;
-	prev = vec_ld (stride, dest);
-	vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
-	dest += stride;
-	tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
-				       vec_perm (ref0, ref1, perm0B)));
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (8, ref);
-    prev = vec_ld (stride, dest);
-    vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
-    dest += stride;
-    tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
-				   vec_perm (ref0, ref1, perm1B)));
-    vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
-}
-
-static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref,
-				 const int stride, int height)
-{
-    vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev;
-
-    perm = vec_lvsl (0, ref);
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (15, ref);
-    ref += stride;
-    tmp0 = vec_perm (ref0, ref1, perm);
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (15, ref);
-    ref += stride;
-    prev = vec_ld (0, dest);
-    tmp1 = vec_perm (ref0, ref1, perm);
-    tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (15, ref);
-	ref += stride;
-	prev = vec_ld (stride, dest);
-	vec_st (tmp, 0, dest);
-	tmp0 = vec_perm (ref0, ref1, perm);
-	tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (15, ref);
-	ref += stride;
-	prev = vec_ld (2*stride, dest);
-	vec_st (tmp, stride, dest);
-	dest += 2*stride;
-	tmp1 = vec_perm (ref0, ref1, perm);
-	tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (15, ref);
-    prev = vec_ld (stride, dest);
-    vec_st (tmp, 0, dest);
-    tmp0 = vec_perm (ref0, ref1, perm);
-    tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
-    vec_st (tmp, stride, dest);
-}
-
-static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref,
-				const int stride, int height)
-{
-    vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev;
-
-    tmp0 = vec_lvsl (0, ref);
-    tmp0 = vec_mergeh (tmp0, tmp0);
-    perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
-    tmp1 = vec_lvsl (stride, ref);
-    tmp1 = vec_mergeh (tmp1, tmp1);
-    perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (7, ref);
-    ref += stride;
-    tmp0 = vec_perm (ref0, ref1, perm0);
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (7, ref);
-    ref += stride;
-    prev = vec_ld (0, dest);
-    tmp1 = vec_perm (ref0, ref1, perm1);
-    tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (7, ref);
-	ref += stride;
-	prev = vec_ld (stride, dest);
-	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-	dest += stride;
-	tmp0 = vec_perm (ref0, ref1, perm0);
-	tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (7, ref);
-	ref += stride;
-	prev = vec_ld (stride, dest);
-	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-	dest += stride;
-	tmp1 = vec_perm (ref0, ref1, perm1);
-	tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (7, ref);
-    prev = vec_ld (stride, dest);
-    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-    dest += stride;
-    tmp0 = vec_perm (ref0, ref1, perm0);
-    tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
-    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-}
-
-static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
-				  const int stride, int height)
-{
-    vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
-    vector_u8_t ones, prev;
-
-    ones = vec_splat_u8 (1);
-    permA = vec_lvsl (0, ref);
-    permB = vec_add (permA, ones);
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (16, ref);
-    ref += stride;
-    A = vec_perm (ref0, ref1, permA);
-    B = vec_perm (ref0, ref1, permB);
-    avg0 = vec_avg (A, B);
-    xor0 = vec_xor (A, B);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (16, ref);
-    ref += stride;
-    prev = vec_ld (0, dest);
-    A = vec_perm (ref0, ref1, permA);
-    B = vec_perm (ref0, ref1, permB);
-    avg1 = vec_avg (A, B);
-    xor1 = vec_xor (A, B);
-    tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
-				  vec_and (vec_and (ones, vec_or (xor0, xor1)),
-					   vec_xor (avg0, avg1))));
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (16, ref);
-	ref += stride;
-	prev = vec_ld (stride, dest);
-	vec_st (tmp, 0, dest);
-	A = vec_perm (ref0, ref1, permA);
-	B = vec_perm (ref0, ref1, permB);
-	avg0 = vec_avg (A, B);
-	xor0 = vec_xor (A, B);
-	tmp = vec_avg (prev,
-		       vec_sub (vec_avg (avg0, avg1),
-				vec_and (vec_and (ones, vec_or (xor0, xor1)),
-					 vec_xor (avg0, avg1))));
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (16, ref);
-	ref += stride;
-	prev = vec_ld (2*stride, dest);
-	vec_st (tmp, stride, dest);
-	dest += 2*stride;
-	A = vec_perm (ref0, ref1, permA);
-	B = vec_perm (ref0, ref1, permB);
-	avg1 = vec_avg (A, B);
-	xor1 = vec_xor (A, B);
-	tmp = vec_avg (prev,
-		       vec_sub (vec_avg (avg0, avg1),
-				vec_and (vec_and (ones, vec_or (xor0, xor1)),
-					 vec_xor (avg0, avg1))));
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (16, ref);
-    prev = vec_ld (stride, dest);
-    vec_st (tmp, 0, dest);
-    A = vec_perm (ref0, ref1, permA);
-    B = vec_perm (ref0, ref1, permB);
-    avg0 = vec_avg (A, B);
-    xor0 = vec_xor (A, B);
-    tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
-				  vec_and (vec_and (ones, vec_or (xor0, xor1)),
-					   vec_xor (avg0, avg1))));
-    vec_st (tmp, stride, dest);
-}
-
-static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
-				 const int stride, int height)
-{
-    vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
-    vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev;
-
-    ones = vec_splat_u8 (1);
-    perm0A = vec_lvsl (0, ref);
-    perm0A = vec_mergeh (perm0A, perm0A);
-    perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
-    perm0B = vec_add (perm0A, ones);
-    perm1A = vec_lvsl (stride, ref);
-    perm1A = vec_mergeh (perm1A, perm1A);
-    perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
-    perm1B = vec_add (perm1A, ones);
-
-    height = (height >> 1) - 1;
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (8, ref);
-    ref += stride;
-    A = vec_perm (ref0, ref1, perm0A);
-    B = vec_perm (ref0, ref1, perm0B);
-    avg0 = vec_avg (A, B);
-    xor0 = vec_xor (A, B);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (8, ref);
-    ref += stride;
-    prev = vec_ld (0, dest);
-    A = vec_perm (ref0, ref1, perm1A);
-    B = vec_perm (ref0, ref1, perm1B);
-    avg1 = vec_avg (A, B);
-    xor1 = vec_xor (A, B);
-    tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
-				  vec_and (vec_and (ones, vec_or (xor0, xor1)),
-					   vec_xor (avg0, avg1))));
-
-    do {
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (8, ref);
-	ref += stride;
-	prev = vec_ld (stride, dest);
-	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-	dest += stride;
-	A = vec_perm (ref0, ref1, perm0A);
-	B = vec_perm (ref0, ref1, perm0B);
-	avg0 = vec_avg (A, B);
-	xor0 = vec_xor (A, B);
-	tmp = vec_avg (prev,
-		       vec_sub (vec_avg (avg0, avg1),
-				vec_and (vec_and (ones, vec_or (xor0, xor1)),
-					 vec_xor (avg0, avg1))));
-
-	ref0 = vec_ld (0, ref);
-	ref1 = vec_ld (8, ref);
-	ref += stride;
-	prev = vec_ld (stride, dest);
-	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-	dest += stride;
-	A = vec_perm (ref0, ref1, perm1A);
-	B = vec_perm (ref0, ref1, perm1B);
-	avg1 = vec_avg (A, B);
-	xor1 = vec_xor (A, B);
-	tmp = vec_avg (prev,
-		       vec_sub (vec_avg (avg0, avg1),
-				vec_and (vec_and (ones, vec_or (xor0, xor1)),
-					 vec_xor (avg0, avg1))));
-    } while (--height);
-
-    ref0 = vec_ld (0, ref);
-    ref1 = vec_ld (8, ref);
-    prev = vec_ld (stride, dest);
-    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-    dest += stride;
-    A = vec_perm (ref0, ref1, perm0A);
-    B = vec_perm (ref0, ref1, perm0B);
-    avg0 = vec_avg (A, B);
-    xor0 = vec_xor (A, B);
-    tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
-				  vec_and (vec_and (ones, vec_or (xor0, xor1)),
-					   vec_xor (avg0, avg1))));
-    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
-    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
-}
-
-MPEG2_MC_EXTERN (altivec)
-
-#endif
diff --git a/src/libmpeg2new/libmpeg2/motion_comp_mlib.c b/src/libmpeg2new/libmpeg2/motion_comp_mlib.c
deleted file mode 100644
index 71c085029..000000000
--- a/src/libmpeg2new/libmpeg2/motion_comp_mlib.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * motion_comp_mlib.c
- * Copyright (C) 2000-2003 Håkan Hjort <d95hjort@dtek.chalmers.se>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#ifdef LIBMPEG2_MLIB
-
-#include <mlib_types.h>
-#include <mlib_status.h>
-#include <mlib_sys.h>
-#include <mlib_video.h>
-#include <inttypes.h>
-
-#include "../include/mpeg2.h"
-#include "mpeg2_internal.h"
-
-static void MC_put_o_16_mlib (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    if (height == 16)
-	mlib_VideoCopyRef_U8_U8_16x16 (dest, (uint8_t *) ref, stride);
-    else
-	mlib_VideoCopyRef_U8_U8_16x8 (dest, (uint8_t *) ref, stride);
-}
-
-static void MC_put_x_16_mlib (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    if (height == 16)
-	mlib_VideoInterpX_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride);
-    else
-	mlib_VideoInterpX_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride);
-}
-
-static void MC_put_y_16_mlib (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    if (height == 16)
-	mlib_VideoInterpY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride);
-    else
-	mlib_VideoInterpY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride);
-}
-
-static void MC_put_xy_16_mlib (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    if (height == 16)
-	mlib_VideoInterpXY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride);
-    else
-	mlib_VideoInterpXY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride);
-}
-
-static void MC_put_o_8_mlib (uint8_t * dest, const uint8_t * ref,
-			     int stride, int height)
-{
-    if (height == 8)
-	mlib_VideoCopyRef_U8_U8_8x8 (dest, (uint8_t *) ref, stride);
-    else
-	mlib_VideoCopyRef_U8_U8_8x4 (dest, (uint8_t *) ref, stride);
-}
-
-static void MC_put_x_8_mlib (uint8_t * dest, const uint8_t * ref,
-			     int stride, int height)
-{
-    if (height == 8)
-	mlib_VideoInterpX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride);
-    else
-	mlib_VideoInterpX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride);
-}
-
-static void MC_put_y_8_mlib (uint8_t * dest, const uint8_t * ref,
-			     int stride, int height)
-{
-    if (height == 8)
-	mlib_VideoInterpY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride);
-    else
-	mlib_VideoInterpY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride);
-}
-
-static void MC_put_xy_8_mlib (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    if (height == 8)
-	mlib_VideoInterpXY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride);
-    else
-	mlib_VideoInterpXY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride);
-}
-
-static void MC_avg_o_16_mlib (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    if (height == 16)
-	mlib_VideoCopyRefAve_U8_U8_16x16 (dest, (uint8_t *) ref, stride);
-    else
-	mlib_VideoCopyRefAve_U8_U8_16x8 (dest, (uint8_t *) ref, stride);
-}
-
-static void MC_avg_x_16_mlib (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    if (height == 16)
-	mlib_VideoInterpAveX_U8_U8_16x16 (dest, (uint8_t *) ref,
-					  stride, stride);
-    else
-	mlib_VideoInterpAveX_U8_U8_16x8 (dest, (uint8_t *) ref,
-					 stride, stride);
-}
-
-static void MC_avg_y_16_mlib (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    if (height == 16)
-	mlib_VideoInterpAveY_U8_U8_16x16 (dest, (uint8_t *) ref,
-					  stride, stride);
-    else
-	mlib_VideoInterpAveY_U8_U8_16x8 (dest, (uint8_t *) ref,
-					 stride, stride);
-}
-
-static void MC_avg_xy_16_mlib (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    if (height == 16)
-	mlib_VideoInterpAveXY_U8_U8_16x16 (dest, (uint8_t *) ref,
-					   stride, stride);
-    else
-	mlib_VideoInterpAveXY_U8_U8_16x8 (dest, (uint8_t *) ref,
-					  stride, stride);
-}
-
-static void MC_avg_o_8_mlib (uint8_t * dest, const uint8_t * ref,
-			     int stride, int height)
-{
-    if (height == 8)
-	mlib_VideoCopyRefAve_U8_U8_8x8 (dest, (uint8_t *) ref, stride);
-    else
-	mlib_VideoCopyRefAve_U8_U8_8x4 (dest, (uint8_t *) ref, stride);
-}
-
-static void MC_avg_x_8_mlib (uint8_t * dest, const uint8_t * ref,
-			     int stride, int height)
-{
-    if (height == 8)
-	mlib_VideoInterpAveX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride);
-    else
-	mlib_VideoInterpAveX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride);
-}
-
-static void MC_avg_y_8_mlib (uint8_t * dest, const uint8_t * ref,
-			     int stride, int height)
-{
-    if (height == 8)
-	mlib_VideoInterpAveY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride);
-    else
-	mlib_VideoInterpAveY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride);
-}
-
-static void MC_avg_xy_8_mlib (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    if (height == 8)
-	mlib_VideoInterpAveXY_U8_U8_8x8 (dest, (uint8_t *) ref,
-					 stride, stride);
-    else
-	mlib_VideoInterpAveXY_U8_U8_8x4 (dest, (uint8_t *) ref,
-					 stride, stride);
-}
-
-MPEG2_MC_EXTERN (mlib)
-
-#endif
diff --git a/src/libmpeg2new/libmpeg2/motion_comp_mmx.c b/src/libmpeg2new/libmpeg2/motion_comp_mmx.c
deleted file mode 100644
index 8694bdfea..000000000
--- a/src/libmpeg2new/libmpeg2/motion_comp_mmx.c
+++ /dev/null
@@ -1,1005 +0,0 @@
-/*
- * motion_comp_mmx.c
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#ifdef ARCH_X86
-
-#include <inttypes.h>
-
-#include "../include/mpeg2.h"
-#include "../include/attributes.h"
-#include "mpeg2_internal.h"
-#include "../include/mmx.h"
-
-#define CPU_MMXEXT 0
-#define CPU_3DNOW 1
-
-
-/* MMX code - needs a rewrite */
-
-/*
- * Motion Compensation frequently needs to average values using the
- * formula (x+y+1)>>1. Both MMXEXT and 3Dnow include one instruction
- * to compute this, but it's been left out of classic MMX.
- *
- * We need to be careful of overflows when doing this computation.
- * Rather than unpacking data to 16-bits, which reduces parallelism,
- * we use the following formulas:
- *
- * (x+y)>>1 == (x&y)+((x^y)>>1)
- * (x+y+1)>>1 == (x|y)-((x^y)>>1)
- */
-
-/* some rounding constants */
-static mmx_t mask1 = {0xfefefefefefefefeLL};
-static mmx_t round4 = {0x0002000200020002LL};
-
-/*
- * This code should probably be compiled with loop unrolling
- * (ie, -funroll-loops in gcc)becuase some of the loops
- * use a small static number of iterations. This was written
- * with the assumption the compiler knows best about when
- * unrolling will help
- */
-
-static inline void mmx_zero_reg ()
-{
-    /* load 0 into mm0 */
-    pxor_r2r (mm0, mm0);
-}
-
-static inline void mmx_average_2_U8 (uint8_t * dest, const uint8_t * src1,
-				     const uint8_t * src2)
-{
-    /* *dest = (*src1 + *src2 + 1)/ 2; */
-
-    movq_m2r (*src1, mm1);	/* load 8 src1 bytes */
-    movq_r2r (mm1, mm2);	/* copy 8 src1 bytes */
-
-    movq_m2r (*src2, mm3);	/* load 8 src2 bytes */
-    movq_r2r (mm3, mm4);	/* copy 8 src2 bytes */
-
-    pxor_r2r (mm1, mm3);	/* xor src1 and src2 */
-    pand_m2r (mask1, mm3);	/* mask lower bits */
-    psrlq_i2r (1, mm3);		/* /2 */
-    por_r2r (mm2, mm4);		/* or src1 and src2 */
-    psubb_r2r (mm3, mm4);	/* subtract subresults */
-    movq_r2m (mm4, *dest);	/* store result in dest */
-}
-
-static inline void mmx_interp_average_2_U8 (uint8_t * dest,
-					    const uint8_t * src1,
-					    const uint8_t * src2)
-{
-    /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */
-
-    movq_m2r (*dest, mm1);	/* load 8 dest bytes */
-    movq_r2r (mm1, mm2);	/* copy 8 dest bytes */
-
-    movq_m2r (*src1, mm3);	/* load 8 src1 bytes */
-    movq_r2r (mm3, mm4);	/* copy 8 src1 bytes */
-
-    movq_m2r (*src2, mm5);	/* load 8 src2 bytes */
-    movq_r2r (mm5, mm6);	/* copy 8 src2 bytes */
-
-    pxor_r2r (mm3, mm5);	/* xor src1 and src2 */
-    pand_m2r (mask1, mm5);	/* mask lower bits */
-    psrlq_i2r (1, mm5);		/* /2 */
-    por_r2r (mm4, mm6);		/* or src1 and src2 */
-    psubb_r2r (mm5, mm6);	/* subtract subresults */
-    movq_r2r (mm6, mm5);	/* copy subresult */
-
-    pxor_r2r (mm1, mm5);	/* xor srcavg and dest */
-    pand_m2r (mask1, mm5);	/* mask lower bits */
-    psrlq_i2r (1, mm5);		/* /2 */
-    por_r2r (mm2, mm6);		/* or srcavg and dest */
-    psubb_r2r (mm5, mm6);	/* subtract subresults */
-    movq_r2m (mm6, *dest);	/* store result in dest */
-}
-
-static inline void mmx_average_4_U8 (uint8_t * dest, const uint8_t * src1,
-				     const uint8_t * src2,
-				     const uint8_t * src3,
-				     const uint8_t * src4)
-{
-    /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */
-
-    movq_m2r (*src1, mm1);	/* load 8 src1 bytes */
-    movq_r2r (mm1, mm2);	/* copy 8 src1 bytes */
-
-    punpcklbw_r2r (mm0, mm1);	/* unpack low src1 bytes */
-    punpckhbw_r2r (mm0, mm2);	/* unpack high src1 bytes */
-
-    movq_m2r (*src2, mm3);	/* load 8 src2 bytes */
-    movq_r2r (mm3, mm4);	/* copy 8 src2 bytes */
-
-    punpcklbw_r2r (mm0, mm3);	/* unpack low src2 bytes */
-    punpckhbw_r2r (mm0, mm4);	/* unpack high src2 bytes */
-
-    paddw_r2r (mm3, mm1);	/* add lows */
-    paddw_r2r (mm4, mm2);	/* add highs */
-
-    /* now have partials in mm1 and mm2 */
-
-    movq_m2r (*src3, mm3);	/* load 8 src3 bytes */
-    movq_r2r (mm3, mm4);	/* copy 8 src3 bytes */
-
-    punpcklbw_r2r (mm0, mm3);	/* unpack low src3 bytes */
-    punpckhbw_r2r (mm0, mm4);	/* unpack high src3 bytes */
-
-    paddw_r2r (mm3, mm1);	/* add lows */
-    paddw_r2r (mm4, mm2);	/* add highs */
-
-    movq_m2r (*src4, mm5);	/* load 8 src4 bytes */
-    movq_r2r (mm5, mm6);	/* copy 8 src4 bytes */
-
-    punpcklbw_r2r (mm0, mm5);	/* unpack low src4 bytes */
-    punpckhbw_r2r (mm0, mm6);	/* unpack high src4 bytes */
-
-    paddw_r2r (mm5, mm1);	/* add lows */
-    paddw_r2r (mm6, mm2);	/* add highs */
-
-    /* now have subtotal in mm1 and mm2 */
-
-    paddw_m2r (round4, mm1);
-    psraw_i2r (2, mm1);		/* /4 */
-    paddw_m2r (round4, mm2);
-    psraw_i2r (2, mm2);		/* /4 */
-
-    packuswb_r2r (mm2, mm1);	/* pack (w/ saturation) */
-    movq_r2m (mm1, *dest);	/* store result in dest */
-}
-
-static inline void mmx_interp_average_4_U8 (uint8_t * dest,
-					    const uint8_t * src1,
-					    const uint8_t * src2,
-					    const uint8_t * src3,
-					    const uint8_t * src4)
-{
-    /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */
-
-    movq_m2r (*src1, mm1);	/* load 8 src1 bytes */
-    movq_r2r (mm1, mm2);	/* copy 8 src1 bytes */
-
-    punpcklbw_r2r (mm0, mm1);	/* unpack low src1 bytes */
-    punpckhbw_r2r (mm0, mm2);	/* unpack high src1 bytes */
-
-    movq_m2r (*src2, mm3);	/* load 8 src2 bytes */
-    movq_r2r (mm3, mm4);	/* copy 8 src2 bytes */
-
-    punpcklbw_r2r (mm0, mm3);	/* unpack low src2 bytes */
-    punpckhbw_r2r (mm0, mm4);	/* unpack high src2 bytes */
-
-    paddw_r2r (mm3, mm1);	/* add lows */
-    paddw_r2r (mm4, mm2);	/* add highs */
-
-    /* now have partials in mm1 and mm2 */
-
-    movq_m2r (*src3, mm3);	/* load 8 src3 bytes */
-    movq_r2r (mm3, mm4);	/* copy 8 src3 bytes */
-
-    punpcklbw_r2r (mm0, mm3);	/* unpack low src3 bytes */
-    punpckhbw_r2r (mm0, mm4);	/* unpack high src3 bytes */
-
-    paddw_r2r (mm3, mm1);	/* add lows */
-    paddw_r2r (mm4, mm2);	/* add highs */
-
-    movq_m2r (*src4, mm5);	/* load 8 src4 bytes */
-    movq_r2r (mm5, mm6);	/* copy 8 src4 bytes */
-
-    punpcklbw_r2r (mm0, mm5);	/* unpack low src4 bytes */
-    punpckhbw_r2r (mm0, mm6);	/* unpack high src4 bytes */
-
-    paddw_r2r (mm5, mm1);	/* add lows */
-    paddw_r2r (mm6, mm2);	/* add highs */
-
-    paddw_m2r (round4, mm1);
-    psraw_i2r (2, mm1);		/* /4 */
-    paddw_m2r (round4, mm2);
-    psraw_i2r (2, mm2);		/* /4 */
-
-    /* now have subtotal/4 in mm1 and mm2 */
-
-    movq_m2r (*dest, mm3);	/* load 8 dest bytes */
-    movq_r2r (mm3, mm4);	/* copy 8 dest bytes */
-
-    packuswb_r2r (mm2, mm1);	/* pack (w/ saturation) */
-    movq_r2r (mm1,mm2);		/* copy subresult */
-
-    pxor_r2r (mm1, mm3);	/* xor srcavg and dest */
-    pand_m2r (mask1, mm3);	/* mask lower bits */
-    psrlq_i2r (1, mm3);		/* /2 */
-    por_r2r (mm2, mm4);		/* or srcavg and dest */
-    psubb_r2r (mm3, mm4);	/* subtract subresults */
-    movq_r2m (mm4, *dest);	/* store result in dest */
-}
-
-/*-----------------------------------------------------------------------*/
-
-static inline void MC_avg_mmx (const int width, int height, uint8_t * dest,
-			       const uint8_t * ref, const int stride)
-{
-    mmx_zero_reg ();
-
-    do {
-	mmx_average_2_U8 (dest, dest, ref);
-
-	if (width == 16)
-	    mmx_average_2_U8 (dest+8, dest+8, ref+8);
-
-	dest += stride;
-	ref += stride;
-    } while (--height);
-}
-
-static void MC_avg_o_16_mmx (uint8_t * dest, const uint8_t * ref,
-			     int stride, int height)
-{
-    MC_avg_mmx (16, height, dest, ref, stride);
-}
-
-static void MC_avg_o_8_mmx (uint8_t * dest, const uint8_t * ref,
-			    int stride, int height)
-{
-    MC_avg_mmx (8, height, dest, ref, stride);
-}
-
-/*-----------------------------------------------------------------------*/
-
-static inline void MC_put_mmx (const int width, int height, uint8_t * dest,
-			       const uint8_t * ref, const int stride)
-{
-    mmx_zero_reg ();
-
-    do {
-	movq_m2r (* ref, mm1);	/* load 8 ref bytes */
-	movq_r2m (mm1,* dest);	/* store 8 bytes at curr */
-
-	if (width == 16)
-	    {
-		movq_m2r (* (ref+8), mm1);	/* load 8 ref bytes */
-		movq_r2m (mm1,* (dest+8));	/* store 8 bytes at curr */
-	    }
-
-	dest += stride;
-	ref += stride;
-    } while (--height);
-}
-
-static void MC_put_o_16_mmx (uint8_t * dest, const uint8_t * ref,
-			     int stride, int height)
-{
-    MC_put_mmx (16, height, dest, ref, stride);
-}
-
-static void MC_put_o_8_mmx (uint8_t * dest, const uint8_t * ref,
-			    int stride, int height)
-{
-    MC_put_mmx (8, height, dest, ref, stride);
-}
-
-/*-----------------------------------------------------------------------*/
-
-/* Half pixel interpolation in the x direction */
-static inline void MC_avg_x_mmx (const int width, int height, uint8_t * dest,
-				 const uint8_t * ref, const int stride)
-{
-    mmx_zero_reg ();
-
-    do {
-	mmx_interp_average_2_U8 (dest, ref, ref+1);
-
-	if (width == 16)
-	    mmx_interp_average_2_U8 (dest+8, ref+8, ref+9);
-
-	dest += stride;
-	ref += stride;
-    } while (--height);
-}
-
-static void MC_avg_x_16_mmx (uint8_t * dest, const uint8_t * ref,
-			     int stride, int height)
-{
-    MC_avg_x_mmx (16, height, dest, ref, stride);
-}
-
-static void MC_avg_x_8_mmx (uint8_t * dest, const uint8_t * ref,
-			    int stride, int height)
-{
-    MC_avg_x_mmx (8, height, dest, ref, stride);
-}
-
-/*-----------------------------------------------------------------------*/
-
-static inline void MC_put_x_mmx (const int width, int height, uint8_t * dest,
-				 const uint8_t * ref, const int stride)
-{
-    mmx_zero_reg ();
-
-    do {
-	mmx_average_2_U8 (dest, ref, ref+1);
-
-	if (width == 16)
-	    mmx_average_2_U8 (dest+8, ref+8, ref+9);
-
-	dest += stride;
-	ref += stride;
-    } while (--height);
-}
-
-static void MC_put_x_16_mmx (uint8_t * dest, const uint8_t * ref,
-			     int stride, int height)
-{
-    MC_put_x_mmx (16, height, dest, ref, stride);
-}
-
-static void MC_put_x_8_mmx (uint8_t * dest, const uint8_t * ref,
-			    int stride, int height)
-{
-    MC_put_x_mmx (8, height, dest, ref, stride);
-}
-
-/*-----------------------------------------------------------------------*/
-
-static inline void MC_avg_xy_mmx (const int width, int height, uint8_t * dest,
-				  const uint8_t * ref, const int stride)
-{
-    const uint8_t * ref_next = ref + stride;
-
-    mmx_zero_reg ();
-
-    do {
-	mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1);
-
-	if (width == 16)
-	    mmx_interp_average_4_U8 (dest+8, ref+8, ref+9,
-				     ref_next+8, ref_next+9);
-
-	dest += stride;
-	ref += stride;
-	ref_next += stride;
-    } while (--height);
-}
-
-static void MC_avg_xy_16_mmx (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    MC_avg_xy_mmx (16, height, dest, ref, stride);
-}
-
-static void MC_avg_xy_8_mmx (uint8_t * dest, const uint8_t * ref,
-			     int stride, int height)
-{
-    MC_avg_xy_mmx (8, height, dest, ref, stride);
-}
-
-/*-----------------------------------------------------------------------*/
-
-static inline void MC_put_xy_mmx (const int width, int height, uint8_t * dest,
-				  const uint8_t * ref, const int stride)
-{
-    const uint8_t * ref_next = ref + stride;
-
-    mmx_zero_reg ();
-
-    do {
-	mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1);
-
-	if (width == 16)
-	    mmx_average_4_U8 (dest+8, ref+8, ref+9, ref_next+8, ref_next+9);
-
-	dest += stride;
-	ref += stride;
-	ref_next += stride;
-    } while (--height);
-}
-
-static void MC_put_xy_16_mmx (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    MC_put_xy_mmx (16, height, dest, ref, stride);
-}
-
-static void MC_put_xy_8_mmx (uint8_t * dest, const uint8_t * ref,
-			     int stride, int height)
-{
-    MC_put_xy_mmx (8, height, dest, ref, stride);
-}
-
-/*-----------------------------------------------------------------------*/
-
-static inline void MC_avg_y_mmx (const int width, int height, uint8_t * dest,
-				 const uint8_t * ref, const int stride)
-{
-    const uint8_t * ref_next = ref + stride;
-
-    mmx_zero_reg ();
-
-    do {
-	mmx_interp_average_2_U8 (dest, ref, ref_next);
-
-	if (width == 16)
-	    mmx_interp_average_2_U8 (dest+8, ref+8, ref_next+8);
-
-	dest += stride;
-	ref += stride;
-	ref_next += stride;
-    } while (--height);
-}
-
-static void MC_avg_y_16_mmx (uint8_t * dest, const uint8_t * ref,
-			     int stride, int height)
-{
-    MC_avg_y_mmx (16, height, dest, ref, stride);
-}
-
-static void MC_avg_y_8_mmx (uint8_t * dest, const uint8_t * ref,
-			    int stride, int height)
-{
-    MC_avg_y_mmx (8, height, dest, ref, stride);
-}
-
-/*-----------------------------------------------------------------------*/
-
-static inline void MC_put_y_mmx (const int width, int height, uint8_t * dest,
-				 const uint8_t * ref, const int stride)
-{
-    const uint8_t * ref_next = ref + stride;
-
-    mmx_zero_reg ();
-
-    do {
-	mmx_average_2_U8 (dest, ref, ref_next);
-
-	if (width == 16)
-	    mmx_average_2_U8 (dest+8, ref+8, ref_next+8);
-
-	dest += stride;
-	ref += stride;
-	ref_next += stride;
-    } while (--height);
-}
-
-static void MC_put_y_16_mmx (uint8_t * dest, const uint8_t * ref,
-			     int stride, int height)
-{
-    MC_put_y_mmx (16, height, dest, ref, stride);
-}
-
-static void MC_put_y_8_mmx (uint8_t * dest, const uint8_t * ref,
-			    int stride, int height)
-{
-    MC_put_y_mmx (8, height, dest, ref, stride);
-}
-
-
-MPEG2_MC_EXTERN (mmx)
-
-
-
-
-
-
-
-/* CPU_MMXEXT/CPU_3DNOW adaptation layer */
-
-#define pavg_r2r(src,dest)		\
-do {					\
-    if (cpu == CPU_MMXEXT)		\
-	pavgb_r2r (src, dest);		\
-    else				\
-	pavgusb_r2r (src, dest);	\
-} while (0)
-
-#define pavg_m2r(src,dest)		\
-do {					\
-    if (cpu == CPU_MMXEXT)		\
-	pavgb_m2r (src, dest);		\
-    else				\
-	pavgusb_m2r (src, dest);	\
-} while (0)
-
-
-/* CPU_MMXEXT code */
-
-
-static inline void MC_put1_8 (int height, uint8_t * dest, const uint8_t * ref,
-			      const int stride)
-{
-    do {
-	movq_m2r (*ref, mm0);
-	movq_r2m (mm0, *dest);
-	ref += stride;
-	dest += stride;
-    } while (--height);
-}
-
-static inline void MC_put1_16 (int height, uint8_t * dest, const uint8_t * ref,
-			       const int stride)
-{
-    do {
-	movq_m2r (*ref, mm0);
-	movq_m2r (*(ref+8), mm1);
-	ref += stride;
-	movq_r2m (mm0, *dest);
-	movq_r2m (mm1, *(dest+8));
-	dest += stride;
-    } while (--height);
-}
-
-static inline void MC_avg1_8 (int height, uint8_t * dest, const uint8_t * ref,
-			      const int stride, const int cpu)
-{
-    do {
-	movq_m2r (*ref, mm0);
-	pavg_m2r (*dest, mm0);
-	ref += stride;
-	movq_r2m (mm0, *dest);
-	dest += stride;
-    } while (--height);
-}
-
-static inline void MC_avg1_16 (int height, uint8_t * dest, const uint8_t * ref,
-			       const int stride, const int cpu)
-{
-    do {
-	movq_m2r (*ref, mm0);
-	movq_m2r (*(ref+8), mm1);
-	pavg_m2r (*dest, mm0);
-	pavg_m2r (*(dest+8), mm1);
-	movq_r2m (mm0, *dest);
-	ref += stride;
-	movq_r2m (mm1, *(dest+8));
-	dest += stride;
-    } while (--height);
-}
-
-static inline void MC_put2_8 (int height, uint8_t * dest, const uint8_t * ref,
-			      const int stride, const int offset,
-			      const int cpu)
-{
-    do {
-	movq_m2r (*ref, mm0);
-	pavg_m2r (*(ref+offset), mm0);
-	ref += stride;
-	movq_r2m (mm0, *dest);
-	dest += stride;
-    } while (--height);
-}
-
-static inline void MC_put2_16 (int height, uint8_t * dest, const uint8_t * ref,
-			       const int stride, const int offset,
-			       const int cpu)
-{
-    do {
-	movq_m2r (*ref, mm0);
-	movq_m2r (*(ref+8), mm1);
-	pavg_m2r (*(ref+offset), mm0);
-	pavg_m2r (*(ref+offset+8), mm1);
-	movq_r2m (mm0, *dest);
-	ref += stride;
-	movq_r2m (mm1, *(dest+8));
-	dest += stride;
-    } while (--height);
-}
-
-static inline void MC_avg2_8 (int height, uint8_t * dest, const uint8_t * ref,
-			      const int stride, const int offset,
-			      const int cpu)
-{
-    do {
-	movq_m2r (*ref, mm0);
-	pavg_m2r (*(ref+offset), mm0);
-	pavg_m2r (*dest, mm0);
-	ref += stride;
-	movq_r2m (mm0, *dest);
-	dest += stride;
-    } while (--height);
-}
-
-static inline void MC_avg2_16 (int height, uint8_t * dest, const uint8_t * ref,
-			       const int stride, const int offset,
-			       const int cpu)
-{
-    do {
-	movq_m2r (*ref, mm0);
-	movq_m2r (*(ref+8), mm1);
-	pavg_m2r (*(ref+offset), mm0);
-	pavg_m2r (*(ref+offset+8), mm1);
-	pavg_m2r (*dest, mm0);
-	pavg_m2r (*(dest+8), mm1);
-	ref += stride;
-	movq_r2m (mm0, *dest);
-	movq_r2m (mm1, *(dest+8));
-	dest += stride;
-    } while (--height);
-}
-
-static mmx_t mask_one = {0x0101010101010101LL};
-
-static inline void MC_put4_8 (int height, uint8_t * dest, const uint8_t * ref,
-			      const int stride, const int cpu)
-{
-    movq_m2r (*ref, mm0);
-    movq_m2r (*(ref+1), mm1);
-    movq_r2r (mm0, mm7);
-    pxor_r2r (mm1, mm7);
-    pavg_r2r (mm1, mm0);
-    ref += stride;
-
-    do {
-	movq_m2r (*ref, mm2);
-	movq_r2r (mm0, mm5);
-
-	movq_m2r (*(ref+1), mm3);
-	movq_r2r (mm2, mm6);
-
-	pxor_r2r (mm3, mm6);
-	pavg_r2r (mm3, mm2);
-
-	por_r2r (mm6, mm7);
-	pxor_r2r (mm2, mm5);
-
-	pand_r2r (mm5, mm7);
-	pavg_r2r (mm2, mm0);
-
-	pand_m2r (mask_one, mm7);
-
-	psubusb_r2r (mm7, mm0);
-
-	ref += stride;
-	movq_r2m (mm0, *dest);
-	dest += stride;
-
-	movq_r2r (mm6, mm7);	/* unroll ! */
-	movq_r2r (mm2, mm0);	/* unroll ! */
-    } while (--height);
-}
-
-static inline void MC_put4_16 (int height, uint8_t * dest, const uint8_t * ref,
-			       const int stride, const int cpu)
-{
-    do {
-	movq_m2r (*ref, mm0);
-	movq_m2r (*(ref+stride+1), mm1);
-	movq_r2r (mm0, mm7);
-	movq_m2r (*(ref+1), mm2);
-	pxor_r2r (mm1, mm7);
-	movq_m2r (*(ref+stride), mm3);
-	movq_r2r (mm2, mm6);
-	pxor_r2r (mm3, mm6);
-	pavg_r2r (mm1, mm0);
-	pavg_r2r (mm3, mm2);
-	por_r2r (mm6, mm7);
-	movq_r2r (mm0, mm6);
-	pxor_r2r (mm2, mm6);
-	pand_r2r (mm6, mm7);
-	pand_m2r (mask_one, mm7);
-	pavg_r2r (mm2, mm0);
-	psubusb_r2r (mm7, mm0);
-	movq_r2m (mm0, *dest);
-
-	movq_m2r (*(ref+8), mm0);
-	movq_m2r (*(ref+stride+9), mm1);
-	movq_r2r (mm0, mm7);
-	movq_m2r (*(ref+9), mm2);
-	pxor_r2r (mm1, mm7);
-	movq_m2r (*(ref+stride+8), mm3);
-	movq_r2r (mm2, mm6);
-	pxor_r2r (mm3, mm6);
-	pavg_r2r (mm1, mm0);
-	pavg_r2r (mm3, mm2);
-	por_r2r (mm6, mm7);
-	movq_r2r (mm0, mm6);
-	pxor_r2r (mm2, mm6);
-	pand_r2r (mm6, mm7);
-	pand_m2r (mask_one, mm7);
-	pavg_r2r (mm2, mm0);
-	psubusb_r2r (mm7, mm0);
-	ref += stride;
-	movq_r2m (mm0, *(dest+8));
-	dest += stride;
-    } while (--height);
-}
-
-static inline void MC_avg4_8 (int height, uint8_t * dest, const uint8_t * ref,
-			      const int stride, const int cpu)
-{
-    do {
-	movq_m2r (*ref, mm0);
-	movq_m2r (*(ref+stride+1), mm1);
-	movq_r2r (mm0, mm7);
-	movq_m2r (*(ref+1), mm2);
-	pxor_r2r (mm1, mm7);
-	movq_m2r (*(ref+stride), mm3);
-	movq_r2r (mm2, mm6);
-	pxor_r2r (mm3, mm6);
-	pavg_r2r (mm1, mm0);
-	pavg_r2r (mm3, mm2);
-	por_r2r (mm6, mm7);
-	movq_r2r (mm0, mm6);
-	pxor_r2r (mm2, mm6);
-	pand_r2r (mm6, mm7);
-	pand_m2r (mask_one, mm7);
-	pavg_r2r (mm2, mm0);
-	psubusb_r2r (mm7, mm0);
-	movq_m2r (*dest, mm1);
-	pavg_r2r (mm1, mm0);
-	ref += stride;
-	movq_r2m (mm0, *dest);
-	dest += stride;
-    } while (--height);
-}
-
-static inline void MC_avg4_16 (int height, uint8_t * dest, const uint8_t * ref,
-			       const int stride, const int cpu)
-{
-    do {
-	movq_m2r (*ref, mm0);
-	movq_m2r (*(ref+stride+1), mm1);
-	movq_r2r (mm0, mm7);
-	movq_m2r (*(ref+1), mm2);
-	pxor_r2r (mm1, mm7);
-	movq_m2r (*(ref+stride), mm3);
-	movq_r2r (mm2, mm6);
-	pxor_r2r (mm3, mm6);
-	pavg_r2r (mm1, mm0);
-	pavg_r2r (mm3, mm2);
-	por_r2r (mm6, mm7);
-	movq_r2r (mm0, mm6);
-	pxor_r2r (mm2, mm6);
-	pand_r2r (mm6, mm7);
-	pand_m2r (mask_one, mm7);
-	pavg_r2r (mm2, mm0);
-	psubusb_r2r (mm7, mm0);
-	movq_m2r (*dest, mm1);
-	pavg_r2r (mm1, mm0);
-	movq_r2m (mm0, *dest);
-
-	movq_m2r (*(ref+8), mm0);
-	movq_m2r (*(ref+stride+9), mm1);
-	movq_r2r (mm0, mm7);
-	movq_m2r (*(ref+9), mm2);
-	pxor_r2r (mm1, mm7);
-	movq_m2r (*(ref+stride+8), mm3);
-	movq_r2r (mm2, mm6);
-	pxor_r2r (mm3, mm6);
-	pavg_r2r (mm1, mm0);
-	pavg_r2r (mm3, mm2);
-	por_r2r (mm6, mm7);
-	movq_r2r (mm0, mm6);
-	pxor_r2r (mm2, mm6);
-	pand_r2r (mm6, mm7);
-	pand_m2r (mask_one, mm7);
-	pavg_r2r (mm2, mm0);
-	psubusb_r2r (mm7, mm0);
-	movq_m2r (*(dest+8), mm1);
-	pavg_r2r (mm1, mm0);
-	ref += stride;
-	movq_r2m (mm0, *(dest+8));
-	dest += stride;
-    } while (--height);
-}
-
-static void MC_avg_o_16_mmxext (uint8_t * dest, const uint8_t * ref,
-				int stride, int height)
-{
-    MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
-}
-
-static void MC_avg_o_8_mmxext (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
-}
-
-static void MC_put_o_16_mmxext (uint8_t * dest, const uint8_t * ref,
-				int stride, int height)
-{
-    MC_put1_16 (height, dest, ref, stride);
-}
-
-static void MC_put_o_8_mmxext (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    MC_put1_8 (height, dest, ref, stride);
-}
-
-static void MC_avg_x_16_mmxext (uint8_t * dest, const uint8_t * ref,
-				int stride, int height)
-{
-    MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
-}
-
-static void MC_avg_x_8_mmxext (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
-}
-
-static void MC_put_x_16_mmxext (uint8_t * dest, const uint8_t * ref,
-				int stride, int height)
-{
-    MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
-}
-
-static void MC_put_x_8_mmxext (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
-}
-
-static void MC_avg_y_16_mmxext (uint8_t * dest, const uint8_t * ref,
-				int stride, int height)
-{
-    MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
-}
-
-static void MC_avg_y_8_mmxext (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
-}
-
-static void MC_put_y_16_mmxext (uint8_t * dest, const uint8_t * ref,
-				int stride, int height)
-{
-    MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
-}
-
-static void MC_put_y_8_mmxext (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
-}
-
-static void MC_avg_xy_16_mmxext (uint8_t * dest, const uint8_t * ref,
-				 int stride, int height)
-{
-    MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
-}
-
-static void MC_avg_xy_8_mmxext (uint8_t * dest, const uint8_t * ref,
-				int stride, int height)
-{
-    MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
-}
-
-static void MC_put_xy_16_mmxext (uint8_t * dest, const uint8_t * ref,
-				 int stride, int height)
-{
-    MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
-}
-
-static void MC_put_xy_8_mmxext (uint8_t * dest, const uint8_t * ref,
-				int stride, int height)
-{
-    MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
-}
-
-
-MPEG2_MC_EXTERN (mmxext)
-
-
-
-static void MC_avg_o_16_3dnow (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
-}
-
-static void MC_avg_o_8_3dnow (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
-}
-
-static void MC_put_o_16_3dnow (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    MC_put1_16 (height, dest, ref, stride);
-}
-
-static void MC_put_o_8_3dnow (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    MC_put1_8 (height, dest, ref, stride);
-}
-
-static void MC_avg_x_16_3dnow (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
-}
-
-static void MC_avg_x_8_3dnow (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
-}
-
-static void MC_put_x_16_3dnow (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
-}
-
-static void MC_put_x_8_3dnow (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
-}
-
-static void MC_avg_y_16_3dnow (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
-}
-
-static void MC_avg_y_8_3dnow (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
-}
-
-static void MC_put_y_16_3dnow (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
-}
-
-static void MC_put_y_8_3dnow (uint8_t * dest, const uint8_t * ref,
-			      int stride, int height)
-{
-    MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
-}
-
-static void MC_avg_xy_16_3dnow (uint8_t * dest, const uint8_t * ref,
-				int stride, int height)
-{
-    MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW);
-}
-
-static void MC_avg_xy_8_3dnow (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW);
-}
-
-static void MC_put_xy_16_3dnow (uint8_t * dest, const uint8_t * ref,
-				int stride, int height)
-{
-    MC_put4_16 (height, dest, ref, stride, CPU_3DNOW);
-}
-
-static void MC_put_xy_8_3dnow (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
-{
-    MC_put4_8 (height, dest, ref, stride, CPU_3DNOW);
-}
-
-
-MPEG2_MC_EXTERN (3dnow)
-
-#endif
diff --git a/src/libmpeg2new/libmpeg2/motion_comp_vis.c b/src/libmpeg2new/libmpeg2/motion_comp_vis.c
deleted file mode 100644
index e724d28a2..000000000
--- a/src/libmpeg2new/libmpeg2/motion_comp_vis.c
+++ /dev/null
@@ -1,2061 +0,0 @@
-/*
- * motion_comp_vis.c
- * Copyright (C) 2003 David S. Miller <davem@redhat.com>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#ifdef ARCH_SPARC
-
-#include <inttypes.h>
-
-#include "mpeg2.h"
-#include <xine/attributes.h>
-#include "mpeg2_internal.h"
-#include "vis.h"
-
-/* The trick used in some of this file is the formula from the MMX
- * motion comp code, which is:
- *
- * (x+y+1)>>1 == (x|y)-((x^y)>>1)
- *
- * This allows us to average 8 bytes at a time in a 64-bit FPU reg.
- * We avoid overflows by masking before we do the shift, and we
- * implement the shift by multiplying by 1/2 using mul8x16.  So in
- * VIS this is (assume 'x' is in f0, 'y' is in f2, a repeating mask
- * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and
- * the value 0x80808080 is in f8):
- *
- *	fxor		f0, f2, f10
- *	fand		f10, f4, f10
- *	fmul8x16	f8, f10, f10
- *	fand		f10, f6, f10
- *	for		f0, f2, f12
- *	fpsub16		f12, f10, f10
- */
-
-#define DUP4(x) {x, x, x, x}
-#define DUP8(x) {x, x, x, x, x, x, x, x}
-static const int16_t constants1[] ATTR_ALIGN(8) = DUP4 (1);
-static const int16_t constants2[] ATTR_ALIGN(8) = DUP4 (2);
-static const int16_t constants3[] ATTR_ALIGN(8) = DUP4 (3);
-static const int16_t constants6[] ATTR_ALIGN(8) = DUP4 (6);
-static const int8_t constants_fe[] ATTR_ALIGN(8) = DUP8 (0xfe);
-static const int8_t constants_7f[] ATTR_ALIGN(8) = DUP8 (0x7f);
-static const int8_t constants128[] ATTR_ALIGN(8) = DUP8 (128);
-static const int16_t constants256_512[] ATTR_ALIGN(8) =
-	{256, 512, 256, 512};
-static const int16_t constants256_1024[] ATTR_ALIGN(8) =
-	{256, 1024, 256, 1024};
-
-#define REF_0		0
-#define REF_0_1		1
-#define REF_2		2
-#define REF_2_1		3
-#define REF_4		4
-#define REF_4_1		5
-#define REF_6		6
-#define REF_6_1		7
-#define REF_S0		8
-#define REF_S0_1	9
-#define REF_S2		10
-#define REF_S2_1	11
-#define REF_S4		12
-#define REF_S4_1	13
-#define REF_S6		14
-#define REF_S6_1	15
-#define DST_0		16
-#define DST_1		17
-#define DST_2		18
-#define DST_3		19
-#define CONST_1		20
-#define CONST_2		20
-#define CONST_3		20
-#define CONST_6		20
-#define MASK_fe		20
-#define CONST_128	22
-#define CONST_256	22
-#define CONST_512	22
-#define CONST_1024	22
-#define TMP0		24
-#define TMP1		25
-#define TMP2		26
-#define TMP3		27
-#define TMP4		28
-#define TMP5		29
-#define ZERO		30
-#define MASK_7f		30
-
-#define TMP6		32
-#define TMP8		34
-#define TMP10		36
-#define TMP12		38
-#define TMP14		40
-#define TMP16		42
-#define TMP18		44
-#define TMP20		46
-#define TMP22		48
-#define TMP24		50
-#define TMP26		52
-#define TMP28		54
-#define TMP30		56
-#define TMP32		58
-
-static void MC_put_o_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	int offset;
-
-	ref = vis_alignaddr(ref);
-	offset = (ref != _ref) ? 16 : 0;
-	do {	/* 5 cycles */
-		vis_ld64(ref[0], TMP0);
-
-		vis_ld64_2(ref, 8, TMP2);
-
-		vis_ld64_2(ref, offset, TMP4);
-		ref += stride;
-
-		vis_faligndata(TMP0, TMP2, REF_0);
-		vis_st64(REF_0, dest[0]);
-
-		vis_faligndata(TMP2, TMP4, REF_2);
-		vis_st64_2(REF_2, dest, 8);
-		dest += stride;
-	} while (--height);
-}
-
-static void MC_put_o_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	int offset;
-
-	ref = vis_alignaddr(ref);
-	offset = (ref != _ref) ? 8 : 0;
-	do {	/* 4 cycles */
-		vis_ld64(ref[0], TMP0);
-
-		vis_ld64_2(ref, offset, TMP2);
-		ref += stride;
-
-		/* stall */
-
-		vis_faligndata(TMP0, TMP2, REF_0);
-		vis_st64(REF_0, dest[0]);
-		dest += stride;
-	} while (--height);
-}
-
-
-static void MC_avg_o_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	int stride_8 = stride + 8;
-	int offset;
-
-	ref = vis_alignaddr(ref);
-	offset = (ref != _ref) ? 16 : 0;
-
-	vis_ld64(ref[0], TMP0);
-
-	vis_ld64(ref[8], TMP2);
-
-	vis_ld64_2(ref, offset, TMP4);
-
-	vis_ld64(dest[0], DST_0);
-
-	vis_ld64(dest[8], DST_2);
-
-	vis_ld64(constants_fe[0], MASK_fe);
-	vis_faligndata(TMP0, TMP2, REF_0);
-
-	vis_ld64(constants_7f[0], MASK_7f);
-	vis_faligndata(TMP2, TMP4, REF_2);
-
-	vis_ld64(constants128[0], CONST_128);
-
-	ref += stride;
-	height = (height >> 1) - 1;
-
-	do {	/* 24 cycles */
-		vis_ld64(ref[0], TMP0);
-		vis_xor(DST_0, REF_0, TMP6);
-
-		vis_ld64_2(ref, 8, TMP2);
-		vis_and(TMP6, MASK_fe, TMP6);
-
-		vis_ld64_2(ref, offset, TMP4);
-		ref += stride;
-		vis_mul8x16(CONST_128, TMP6, TMP6);
-		vis_xor(DST_2, REF_2, TMP8);
-
-		vis_and(TMP8, MASK_fe, TMP8);
-
-		vis_or(DST_0, REF_0, TMP10);
-		vis_ld64_2(dest, stride, DST_0);
-		vis_mul8x16(CONST_128, TMP8, TMP8);
-
-		vis_or(DST_2, REF_2, TMP12);
-		vis_ld64_2(dest, stride_8, DST_2);
-
-		vis_ld64(ref[0], TMP14);
-		vis_and(TMP6, MASK_7f, TMP6);
-
-		vis_and(TMP8, MASK_7f, TMP8);
-
-		vis_psub16(TMP10, TMP6, TMP6);
-		vis_st64(TMP6, dest[0]);
-
-		vis_psub16(TMP12, TMP8, TMP8);
-		vis_st64_2(TMP8, dest, 8);
-
-		dest += stride;
-		vis_ld64_2(ref, 8, TMP16);
-		vis_faligndata(TMP0, TMP2, REF_0);
-
-		vis_ld64_2(ref, offset, TMP18);
-		vis_faligndata(TMP2, TMP4, REF_2);
-		ref += stride;
-
-		vis_xor(DST_0, REF_0, TMP20);
-
-		vis_and(TMP20, MASK_fe, TMP20);
-
-		vis_xor(DST_2, REF_2, TMP22);
-		vis_mul8x16(CONST_128, TMP20, TMP20);
-
-		vis_and(TMP22, MASK_fe, TMP22);
-
-		vis_or(DST_0, REF_0, TMP24);
-		vis_mul8x16(CONST_128, TMP22, TMP22);
-
-		vis_or(DST_2, REF_2, TMP26);
-
-		vis_ld64_2(dest, stride, DST_0);
-		vis_faligndata(TMP14, TMP16, REF_0);
-
-		vis_ld64_2(dest, stride_8, DST_2);
-		vis_faligndata(TMP16, TMP18, REF_2);
-
-		vis_and(TMP20, MASK_7f, TMP20);
-
-		vis_and(TMP22, MASK_7f, TMP22);
-
-		vis_psub16(TMP24, TMP20, TMP20);
-		vis_st64(TMP20, dest[0]);
-
-		vis_psub16(TMP26, TMP22, TMP22);
-		vis_st64_2(TMP22, dest, 8);
-		dest += stride;
-	} while (--height);
-
-	vis_ld64(ref[0], TMP0);
-	vis_xor(DST_0, REF_0, TMP6);
-
-	vis_ld64_2(ref, 8, TMP2);
-	vis_and(TMP6, MASK_fe, TMP6);
-
-	vis_ld64_2(ref, offset, TMP4);
-	vis_mul8x16(CONST_128, TMP6, TMP6);
-	vis_xor(DST_2, REF_2, TMP8);
-
-	vis_and(TMP8, MASK_fe, TMP8);
-
-	vis_or(DST_0, REF_0, TMP10);
-	vis_ld64_2(dest, stride, DST_0);
-	vis_mul8x16(CONST_128, TMP8, TMP8);
-
-	vis_or(DST_2, REF_2, TMP12);
-	vis_ld64_2(dest, stride_8, DST_2);
-
-	vis_ld64(ref[0], TMP14);
-	vis_and(TMP6, MASK_7f, TMP6);
-
-	vis_and(TMP8, MASK_7f, TMP8);
-
-	vis_psub16(TMP10, TMP6, TMP6);
-	vis_st64(TMP6, dest[0]);
-
-	vis_psub16(TMP12, TMP8, TMP8);
-	vis_st64_2(TMP8, dest, 8);
-
-	dest += stride;
-	vis_faligndata(TMP0, TMP2, REF_0);
-
-	vis_faligndata(TMP2, TMP4, REF_2);
-
-	vis_xor(DST_0, REF_0, TMP20);
-
-	vis_and(TMP20, MASK_fe, TMP20);
-
-	vis_xor(DST_2, REF_2, TMP22);
-	vis_mul8x16(CONST_128, TMP20, TMP20);
-
-	vis_and(TMP22, MASK_fe, TMP22);
-
-	vis_or(DST_0, REF_0, TMP24);
-	vis_mul8x16(CONST_128, TMP22, TMP22);
-
-	vis_or(DST_2, REF_2, TMP26);
-
-	vis_and(TMP20, MASK_7f, TMP20);
-
-	vis_and(TMP22, MASK_7f, TMP22);
-
-	vis_psub16(TMP24, TMP20, TMP20);
-	vis_st64(TMP20, dest[0]);
-
-	vis_psub16(TMP26, TMP22, TMP22);
-	vis_st64_2(TMP22, dest, 8);
-}
-
-static void MC_avg_o_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	int offset;
-
-	ref = vis_alignaddr(ref);
-	offset = (ref != _ref) ? 8 : 0;
-
-	vis_ld64(ref[0], TMP0);
-
-	vis_ld64_2(ref, offset, TMP2);
-
-	vis_ld64(dest[0], DST_0);
-
-	vis_ld64(constants_fe[0], MASK_fe);
-
-	vis_ld64(constants_7f[0], MASK_7f);
-	vis_faligndata(TMP0, TMP2, REF_0);
-
-	vis_ld64(constants128[0], CONST_128);
-
-	ref += stride;
-	height = (height >> 1) - 1;
-
-	do {	/* 12 cycles */
-		vis_ld64(ref[0], TMP0);
-		vis_xor(DST_0, REF_0, TMP4);
-
-		vis_ld64_2(ref, offset, TMP2);
-		vis_and(TMP4, MASK_fe, TMP4);
-
-		vis_or(DST_0, REF_0, TMP6);
-		vis_ld64_2(dest, stride, DST_0);
-		ref += stride;
-		vis_mul8x16(CONST_128, TMP4, TMP4);
-
-		vis_ld64(ref[0], TMP12);
-		vis_faligndata(TMP0, TMP2, REF_0);
-
-		vis_ld64_2(ref, offset, TMP2);
-		vis_xor(DST_0, REF_0, TMP0);
-		ref += stride;
-
-		vis_and(TMP0, MASK_fe, TMP0);
-
-		vis_and(TMP4, MASK_7f, TMP4);
-
-		vis_psub16(TMP6, TMP4, TMP4);
-		vis_st64(TMP4, dest[0]);
-		dest += stride;
-		vis_mul8x16(CONST_128, TMP0, TMP0);
-
-		vis_or(DST_0, REF_0, TMP6);
-		vis_ld64_2(dest, stride, DST_0);
-
-		vis_faligndata(TMP12, TMP2, REF_0);
-
-		vis_and(TMP0, MASK_7f, TMP0);
-
-		vis_psub16(TMP6, TMP0, TMP4);
-		vis_st64(TMP4, dest[0]);
-		dest += stride;
-	} while (--height);
-
-	vis_ld64(ref[0], TMP0);
-	vis_xor(DST_0, REF_0, TMP4);
-
-	vis_ld64_2(ref, offset, TMP2);
-	vis_and(TMP4, MASK_fe, TMP4);
-
-	vis_or(DST_0, REF_0, TMP6);
-	vis_ld64_2(dest, stride, DST_0);
-	vis_mul8x16(CONST_128, TMP4, TMP4);
-
-	vis_faligndata(TMP0, TMP2, REF_0);
-
-	vis_xor(DST_0, REF_0, TMP0);
-
-	vis_and(TMP0, MASK_fe, TMP0);
-
-	vis_and(TMP4, MASK_7f, TMP4);
-
-	vis_psub16(TMP6, TMP4, TMP4);
-	vis_st64(TMP4, dest[0]);
-	dest += stride;
-	vis_mul8x16(CONST_128, TMP0, TMP0);
-
-	vis_or(DST_0, REF_0, TMP6);
-
-	vis_and(TMP0, MASK_7f, TMP0);
-
-	vis_psub16(TMP6, TMP0, TMP4);
-	vis_st64(TMP4, dest[0]);
-}
-
-static void MC_put_x_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-
-	ref = vis_alignaddr(ref);
-
-	vis_ld64(ref[0],    TMP0);
-
-	vis_ld64_2(ref, 8,  TMP2);
-
-	vis_ld64_2(ref, 16, TMP4);
-
-	vis_ld64(constants_fe[0], MASK_fe);
-
-	vis_ld64(constants_7f[0], MASK_7f);
-	vis_faligndata(TMP0, TMP2, REF_0);
-
-	vis_ld64(constants128[0], CONST_128);
-	vis_faligndata(TMP2, TMP4, REF_4);
-
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_2);
-		vis_faligndata(TMP2, TMP4, REF_6);
-	} else {
-		vis_src1(TMP2, REF_2);
-		vis_src1(TMP4, REF_6);
-	}
-
-	ref += stride;
-	height = (height >> 1) - 1;
-
-	do {	/* 34 cycles */
-		vis_ld64(ref[0],    TMP0);
-		vis_xor(REF_0, REF_2, TMP6);
-
-		vis_ld64_2(ref, 8,  TMP2);
-		vis_xor(REF_4, REF_6, TMP8);
-
-		vis_ld64_2(ref, 16, TMP4);
-		vis_and(TMP6, MASK_fe, TMP6);
-		ref += stride;
-
-		vis_ld64(ref[0],    TMP14);
-		vis_mul8x16(CONST_128, TMP6, TMP6);
-		vis_and(TMP8, MASK_fe, TMP8);
-
-		vis_ld64_2(ref, 8,  TMP16);
-		vis_mul8x16(CONST_128, TMP8, TMP8);
-		vis_or(REF_0, REF_2, TMP10);
-
-		vis_ld64_2(ref, 16, TMP18);
-		ref += stride;
-		vis_or(REF_4, REF_6, TMP12);
-
-		vis_alignaddr_g0((void *)off);
-
-		vis_faligndata(TMP0, TMP2, REF_0);
-
-		vis_faligndata(TMP2, TMP4, REF_4);
-
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_2);
-			vis_faligndata(TMP2, TMP4, REF_6);
-		} else {
-			vis_src1(TMP2, REF_2);
-			vis_src1(TMP4, REF_6);
-		}
-
-		vis_and(TMP6, MASK_7f, TMP6);
-
-		vis_and(TMP8, MASK_7f, TMP8);
-
-		vis_psub16(TMP10, TMP6, TMP6);
-		vis_st64(TMP6, dest[0]);
-
-		vis_psub16(TMP12, TMP8, TMP8);
-		vis_st64_2(TMP8, dest, 8);
-		dest += stride;
-
-		vis_xor(REF_0, REF_2, TMP6);
-
-		vis_xor(REF_4, REF_6, TMP8);
-
-		vis_and(TMP6, MASK_fe, TMP6);
-
-		vis_mul8x16(CONST_128, TMP6, TMP6);
-		vis_and(TMP8, MASK_fe, TMP8);
-
-		vis_mul8x16(CONST_128, TMP8, TMP8);
-		vis_or(REF_0, REF_2, TMP10);
-
-		vis_or(REF_4, REF_6, TMP12);
-
-		vis_alignaddr_g0((void *)off);
-
-		vis_faligndata(TMP14, TMP16, REF_0);
-
-		vis_faligndata(TMP16, TMP18, REF_4);
-
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP14, TMP16, REF_2);
-			vis_faligndata(TMP16, TMP18, REF_6);
-		} else {
-			vis_src1(TMP16, REF_2);
-			vis_src1(TMP18, REF_6);
-		}
-
-		vis_and(TMP6, MASK_7f, TMP6);
-
-		vis_and(TMP8, MASK_7f, TMP8);
-
-		vis_psub16(TMP10, TMP6, TMP6);
-		vis_st64(TMP6, dest[0]);
-
-		vis_psub16(TMP12, TMP8, TMP8);
-		vis_st64_2(TMP8, dest, 8);
-		dest += stride;
-	} while (--height);
-
-	vis_ld64(ref[0],    TMP0);
-	vis_xor(REF_0, REF_2, TMP6);
-
-	vis_ld64_2(ref, 8,  TMP2);
-	vis_xor(REF_4, REF_6, TMP8);
-
-	vis_ld64_2(ref, 16, TMP4);
-	vis_and(TMP6, MASK_fe, TMP6);
-
-	vis_mul8x16(CONST_128, TMP6, TMP6);
-	vis_and(TMP8, MASK_fe, TMP8);
-
-	vis_mul8x16(CONST_128, TMP8, TMP8);
-	vis_or(REF_0, REF_2, TMP10);
-
-	vis_or(REF_4, REF_6, TMP12);
-
-	vis_alignaddr_g0((void *)off);
-
-	vis_faligndata(TMP0, TMP2, REF_0);
-
-	vis_faligndata(TMP2, TMP4, REF_4);
-
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_2);
-		vis_faligndata(TMP2, TMP4, REF_6);
-	} else {
-		vis_src1(TMP2, REF_2);
-		vis_src1(TMP4, REF_6);
-	}
-
-	vis_and(TMP6, MASK_7f, TMP6);
-
-	vis_and(TMP8, MASK_7f, TMP8);
-
-	vis_psub16(TMP10, TMP6, TMP6);
-	vis_st64(TMP6, dest[0]);
-
-	vis_psub16(TMP12, TMP8, TMP8);
-	vis_st64_2(TMP8, dest, 8);
-	dest += stride;
-
-	vis_xor(REF_0, REF_2, TMP6);
-
-	vis_xor(REF_4, REF_6, TMP8);
-
-	vis_and(TMP6, MASK_fe, TMP6);
-
-	vis_mul8x16(CONST_128, TMP6, TMP6);
-	vis_and(TMP8, MASK_fe, TMP8);
-
-	vis_mul8x16(CONST_128, TMP8, TMP8);
-	vis_or(REF_0, REF_2, TMP10);
-
-	vis_or(REF_4, REF_6, TMP12);
-
-	vis_and(TMP6, MASK_7f, TMP6);
-
-	vis_and(TMP8, MASK_7f, TMP8);
-
-	vis_psub16(TMP10, TMP6, TMP6);
-	vis_st64(TMP6, dest[0]);
-
-	vis_psub16(TMP12, TMP8, TMP8);
-	vis_st64_2(TMP8, dest, 8);
-}
-
-static void MC_put_x_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-
-	ref = vis_alignaddr(ref);
-
-	vis_ld64(ref[0], TMP0);
-
-	vis_ld64(ref[8], TMP2);
-
-	vis_ld64(constants_fe[0], MASK_fe);
-
-	vis_ld64(constants_7f[0], MASK_7f);
-
-	vis_ld64(constants128[0], CONST_128);
-	vis_faligndata(TMP0, TMP2, REF_0);
-
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_2);
-	} else {
-		vis_src1(TMP2, REF_2);
-	}
-
-	ref += stride;
-	height = (height >> 1) - 1;
-
-	do {	/* 20 cycles */
-		vis_ld64(ref[0], TMP0);
-		vis_xor(REF_0, REF_2, TMP4);
-
-		vis_ld64_2(ref, 8, TMP2);
-		vis_and(TMP4, MASK_fe, TMP4);
-		ref += stride;
-
-		vis_ld64(ref[0], TMP8);
-		vis_or(REF_0, REF_2, TMP6);
-		vis_mul8x16(CONST_128, TMP4, TMP4);
-
-		vis_alignaddr_g0((void *)off);
-
-		vis_ld64_2(ref, 8, TMP10);
-		ref += stride;
-		vis_faligndata(TMP0, TMP2, REF_0);
-
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_2);
-		} else {
-			vis_src1(TMP2, REF_2);
-		}
-
-		vis_and(TMP4, MASK_7f, TMP4);
-
-		vis_psub16(TMP6, TMP4, DST_0);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-
-		vis_xor(REF_0, REF_2, TMP12);
-
-		vis_and(TMP12, MASK_fe, TMP12);
-
-		vis_or(REF_0, REF_2, TMP14);
-		vis_mul8x16(CONST_128, TMP12, TMP12);
-
-		vis_alignaddr_g0((void *)off);
-		vis_faligndata(TMP8, TMP10, REF_0);
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP8, TMP10, REF_2);
-		} else {
-			vis_src1(TMP10, REF_2);
-		}
-
-		vis_and(TMP12, MASK_7f, TMP12);
-
-		vis_psub16(TMP14, TMP12, DST_0);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-	} while (--height);
-
-	vis_ld64(ref[0], TMP0);
-	vis_xor(REF_0, REF_2, TMP4);
-
-	vis_ld64_2(ref, 8, TMP2);
-	vis_and(TMP4, MASK_fe, TMP4);
-
-	vis_or(REF_0, REF_2, TMP6);
-	vis_mul8x16(CONST_128, TMP4, TMP4);
-
-	vis_alignaddr_g0((void *)off);
-
-	vis_faligndata(TMP0, TMP2, REF_0);
-
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_2);
-	} else {
-		vis_src1(TMP2, REF_2);
-	}
-
-	vis_and(TMP4, MASK_7f, TMP4);
-
-	vis_psub16(TMP6, TMP4, DST_0);
-	vis_st64(DST_0, dest[0]);
-	dest += stride;
-
-	vis_xor(REF_0, REF_2, TMP12);
-
-	vis_and(TMP12, MASK_fe, TMP12);
-
-	vis_or(REF_0, REF_2, TMP14);
-	vis_mul8x16(CONST_128, TMP12, TMP12);
-
-	vis_and(TMP12, MASK_7f, TMP12);
-
-	vis_psub16(TMP14, TMP12, DST_0);
-	vis_st64(DST_0, dest[0]);
-	dest += stride;
-}
-
-static void MC_avg_x_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
-
-	vis_ld64(constants3[0], CONST_3);
-	vis_fzero(ZERO);
-	vis_ld64(constants256_512[0], CONST_256);
-
-	ref = vis_alignaddr(ref);
-	do {	/* 26 cycles */
-		vis_ld64(ref[0], TMP0);
-
-		vis_ld64(ref[8], TMP2);
-
-		vis_alignaddr_g0((void *)off);
-
-		vis_ld64(ref[16], TMP4);
-
-		vis_ld64(dest[0], DST_0);
-		vis_faligndata(TMP0, TMP2, REF_0);
-
-		vis_ld64(dest[8], DST_2);
-		vis_faligndata(TMP2, TMP4, REF_4);
-
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_2);
-			vis_faligndata(TMP2, TMP4, REF_6);
-		} else {
-			vis_src1(TMP2, REF_2);
-			vis_src1(TMP4, REF_6);
-		}
-
-		vis_mul8x16au(REF_0,   CONST_256, TMP0);
-
-		vis_pmerge(ZERO,     REF_2,     TMP4);
-		vis_mul8x16au(REF_0_1, CONST_256, TMP2);
-
-		vis_pmerge(ZERO, REF_2_1, TMP6);
-
-		vis_padd16(TMP0, TMP4, TMP0);
-
-		vis_mul8x16al(DST_0,   CONST_512, TMP4);
-		vis_padd16(TMP2, TMP6, TMP2);
-
-		vis_mul8x16al(DST_1,   CONST_512, TMP6);
-
-		vis_mul8x16au(REF_6,   CONST_256, TMP12);
-
-		vis_padd16(TMP0, TMP4, TMP0);
-		vis_mul8x16au(REF_6_1, CONST_256, TMP14);
-
-		vis_padd16(TMP2, TMP6, TMP2);
-		vis_mul8x16au(REF_4,   CONST_256, TMP16);
-
-		vis_padd16(TMP0, CONST_3, TMP8);
-		vis_mul8x16au(REF_4_1, CONST_256, TMP18);
-
-		vis_padd16(TMP2, CONST_3, TMP10);
-		vis_pack16(TMP8, DST_0);
-
-		vis_pack16(TMP10, DST_1);
-		vis_padd16(TMP16, TMP12, TMP0);
-
-		vis_st64(DST_0, dest[0]);
-		vis_mul8x16al(DST_2,   CONST_512, TMP4);
-		vis_padd16(TMP18, TMP14, TMP2);
-
-		vis_mul8x16al(DST_3,   CONST_512, TMP6);
-		vis_padd16(TMP0, CONST_3, TMP0);
-
-		vis_padd16(TMP2, CONST_3, TMP2);
-
-		vis_padd16(TMP0, TMP4, TMP0);
-
-		vis_padd16(TMP2, TMP6, TMP2);
-		vis_pack16(TMP0, DST_2);
-
-		vis_pack16(TMP2, DST_3);
-		vis_st64(DST_2, dest[8]);
-
-		ref += stride;
-		dest += stride;
-	} while (--height);
-}
-
-static void MC_avg_x_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-	int stride_times_2 = stride << 1;
-
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
-
-	vis_ld64(constants3[0], CONST_3);
-	vis_fzero(ZERO);
-	vis_ld64(constants256_512[0], CONST_256);
-
-	ref = vis_alignaddr(ref);
-	height >>= 2;
-	do {	/* 47 cycles */
-		vis_ld64(ref[0],   TMP0);
-
-		vis_ld64_2(ref, 8, TMP2);
-		ref += stride;
-
-		vis_alignaddr_g0((void *)off);
-
-		vis_ld64(ref[0],   TMP4);
-		vis_faligndata(TMP0, TMP2, REF_0);
-
-		vis_ld64_2(ref, 8, TMP6);
-		ref += stride;
-
-		vis_ld64(ref[0],   TMP8);
-
-		vis_ld64_2(ref, 8, TMP10);
-		ref += stride;
-		vis_faligndata(TMP4, TMP6, REF_4);
-
-		vis_ld64(ref[0],   TMP12);
-
-		vis_ld64_2(ref, 8, TMP14);
-		ref += stride;
-		vis_faligndata(TMP8, TMP10, REF_S0);
-
-		vis_faligndata(TMP12, TMP14, REF_S4);
-
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-
-			vis_ld64(dest[0], DST_0);
-			vis_faligndata(TMP0, TMP2, REF_2);
-
-			vis_ld64_2(dest, stride, DST_2);
-			vis_faligndata(TMP4, TMP6, REF_6);
-
-			vis_faligndata(TMP8, TMP10, REF_S2);
-
-			vis_faligndata(TMP12, TMP14, REF_S6);
-		} else {
-			vis_ld64(dest[0], DST_0);
-			vis_src1(TMP2, REF_2);
-
-			vis_ld64_2(dest, stride, DST_2);
-			vis_src1(TMP6, REF_6);
-
-			vis_src1(TMP10, REF_S2);
-
-			vis_src1(TMP14, REF_S6);
-		}
-
-		vis_pmerge(ZERO,     REF_0,     TMP0);
-		vis_mul8x16au(REF_0_1, CONST_256, TMP2);
-
-		vis_pmerge(ZERO,     REF_2,     TMP4);
-		vis_mul8x16au(REF_2_1, CONST_256, TMP6);
-
-		vis_padd16(TMP0, CONST_3, TMP0);
-		vis_mul8x16al(DST_0,   CONST_512, TMP16);
-
-		vis_padd16(TMP2, CONST_3, TMP2);
-		vis_mul8x16al(DST_1,   CONST_512, TMP18);
-
-		vis_padd16(TMP0, TMP4, TMP0);
-		vis_mul8x16au(REF_4, CONST_256, TMP8);
-
-		vis_padd16(TMP2, TMP6, TMP2);
-		vis_mul8x16au(REF_4_1, CONST_256, TMP10);
-
-		vis_padd16(TMP0, TMP16, TMP0);
-		vis_mul8x16au(REF_6, CONST_256, TMP12);
-
-		vis_padd16(TMP2, TMP18, TMP2);
-		vis_mul8x16au(REF_6_1, CONST_256, TMP14);
-
-		vis_padd16(TMP8, CONST_3, TMP8);
-		vis_mul8x16al(DST_2, CONST_512, TMP16);
-
-		vis_padd16(TMP8, TMP12, TMP8);
-		vis_mul8x16al(DST_3, CONST_512, TMP18);
-
-		vis_padd16(TMP10, TMP14, TMP10);
-		vis_pack16(TMP0, DST_0);
-
-		vis_pack16(TMP2, DST_1);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-		vis_padd16(TMP10, CONST_3, TMP10);
-
-		vis_ld64_2(dest, stride, DST_0);
-		vis_padd16(TMP8, TMP16, TMP8);
-
-		vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/);
-		vis_padd16(TMP10, TMP18, TMP10);
-		vis_pack16(TMP8, DST_2);
-
-		vis_pack16(TMP10, DST_3);
-		vis_st64(DST_2, dest[0]);
-		dest += stride;
-
-		vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
-		vis_pmerge(ZERO,     REF_S0,     TMP0);
-
-		vis_pmerge(ZERO,     REF_S2,     TMP24);
-		vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
-
-		vis_padd16(TMP0, CONST_3, TMP0);
-		vis_mul8x16au(REF_S4, CONST_256, TMP8);
-
-		vis_padd16(TMP2, CONST_3, TMP2);
-		vis_mul8x16au(REF_S4_1, CONST_256, TMP10);
-
-		vis_padd16(TMP0, TMP24, TMP0);
-		vis_mul8x16au(REF_S6, CONST_256, TMP12);
-
-		vis_padd16(TMP2, TMP6, TMP2);
-		vis_mul8x16au(REF_S6_1, CONST_256, TMP14);
-
-		vis_padd16(TMP8, CONST_3, TMP8);
-		vis_mul8x16al(DST_0,   CONST_512, TMP16);
-
-		vis_padd16(TMP10, CONST_3, TMP10);
-		vis_mul8x16al(DST_1,   CONST_512, TMP18);
-
-		vis_padd16(TMP8, TMP12, TMP8);
-		vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20);
-
-		vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22);
-		vis_padd16(TMP0, TMP16, TMP0);
-
-		vis_padd16(TMP2, TMP18, TMP2);
-		vis_pack16(TMP0, DST_0);
-
-		vis_padd16(TMP10, TMP14, TMP10);
-		vis_pack16(TMP2, DST_1);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-
-		vis_padd16(TMP8, TMP20, TMP8);
-
-		vis_padd16(TMP10, TMP22, TMP10);
-		vis_pack16(TMP8, DST_2);
-
-		vis_pack16(TMP10, DST_3);
-		vis_st64(DST_2, dest[0]);
-		dest += stride;
-	} while (--height);
-}
-
-static void MC_put_y_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	int offset;
-
-	ref = vis_alignaddr(ref);
-	offset = (ref != _ref) ? 16 : 0;
-
-	vis_ld64(ref[0], TMP0);
-
-	vis_ld64_2(ref, 8, TMP2);
-
-	vis_ld64_2(ref, offset, TMP4);
-	ref += stride;
-
-	vis_ld64(ref[0], TMP6);
-	vis_faligndata(TMP0, TMP2, REF_0);
-
-	vis_ld64_2(ref, 8, TMP8);
-	vis_faligndata(TMP2, TMP4, REF_4);
-
-	vis_ld64_2(ref, offset, TMP10);
-	ref += stride;
-
-	vis_ld64(constants_fe[0], MASK_fe);
-	vis_faligndata(TMP6, TMP8, REF_2);
-
-	vis_ld64(constants_7f[0], MASK_7f);
-	vis_faligndata(TMP8, TMP10, REF_6);
-
-	vis_ld64(constants128[0], CONST_128);
-	height = (height >> 1) - 1;
-	do {	/* 24 cycles */
-		vis_ld64(ref[0], TMP0);
-		vis_xor(REF_0, REF_2, TMP12);
-
-		vis_ld64_2(ref, 8, TMP2);
-		vis_xor(REF_4, REF_6, TMP16);
-
-		vis_ld64_2(ref, offset, TMP4);
-		ref += stride;
-		vis_or(REF_0, REF_2, TMP14);
-
-		vis_ld64(ref[0], TMP6);
-		vis_or(REF_4, REF_6, TMP18);
-
-		vis_ld64_2(ref, 8, TMP8);
-		vis_faligndata(TMP0, TMP2, REF_0);
-
-		vis_ld64_2(ref, offset, TMP10);
-		ref += stride;
-		vis_faligndata(TMP2, TMP4, REF_4);
-
-		vis_and(TMP12, MASK_fe, TMP12);
-
-		vis_and(TMP16, MASK_fe, TMP16);
-		vis_mul8x16(CONST_128, TMP12, TMP12);
-
-		vis_mul8x16(CONST_128, TMP16, TMP16);
-		vis_xor(REF_0, REF_2, TMP0);
-
-		vis_xor(REF_4, REF_6, TMP2);
-
-		vis_or(REF_0, REF_2, TMP20);
-
-		vis_and(TMP12, MASK_7f, TMP12);
-
-		vis_and(TMP16, MASK_7f, TMP16);
-
-		vis_psub16(TMP14, TMP12, TMP12);
-		vis_st64(TMP12, dest[0]);
-
-		vis_psub16(TMP18, TMP16, TMP16);
-		vis_st64_2(TMP16, dest, 8);
-		dest += stride;
-
-		vis_or(REF_4, REF_6, TMP18);
-
-		vis_and(TMP0, MASK_fe, TMP0);
-
-		vis_and(TMP2, MASK_fe, TMP2);
-		vis_mul8x16(CONST_128, TMP0, TMP0);
-
-		vis_faligndata(TMP6, TMP8, REF_2);
-		vis_mul8x16(CONST_128, TMP2, TMP2);
-
-		vis_faligndata(TMP8, TMP10, REF_6);
-
-		vis_and(TMP0, MASK_7f, TMP0);
-
-		vis_and(TMP2, MASK_7f, TMP2);
-
-		vis_psub16(TMP20, TMP0, TMP0);
-		vis_st64(TMP0, dest[0]);
-
-		vis_psub16(TMP18, TMP2, TMP2);
-		vis_st64_2(TMP2, dest, 8);
-		dest += stride;
-	} while (--height);
-
-	vis_ld64(ref[0], TMP0);
-	vis_xor(REF_0, REF_2, TMP12);
-
-	vis_ld64_2(ref, 8, TMP2);
-	vis_xor(REF_4, REF_6, TMP16);
-
-	vis_ld64_2(ref, offset, TMP4);
-	vis_or(REF_0, REF_2, TMP14);
-
-	vis_or(REF_4, REF_6, TMP18);
-
-	vis_faligndata(TMP0, TMP2, REF_0);
-
-	vis_faligndata(TMP2, TMP4, REF_4);
-
-	vis_and(TMP12, MASK_fe, TMP12);
-
-	vis_and(TMP16, MASK_fe, TMP16);
-	vis_mul8x16(CONST_128, TMP12, TMP12);
-
-	vis_mul8x16(CONST_128, TMP16, TMP16);
-	vis_xor(REF_0, REF_2, TMP0);
-
-	vis_xor(REF_4, REF_6, TMP2);
-
-	vis_or(REF_0, REF_2, TMP20);
-
-	vis_and(TMP12, MASK_7f, TMP12);
-
-	vis_and(TMP16, MASK_7f, TMP16);
-
-	vis_psub16(TMP14, TMP12, TMP12);
-	vis_st64(TMP12, dest[0]);
-
-	vis_psub16(TMP18, TMP16, TMP16);
-	vis_st64_2(TMP16, dest, 8);
-	dest += stride;
-
-	vis_or(REF_4, REF_6, TMP18);
-
-	vis_and(TMP0, MASK_fe, TMP0);
-
-	vis_and(TMP2, MASK_fe, TMP2);
-	vis_mul8x16(CONST_128, TMP0, TMP0);
-
-	vis_mul8x16(CONST_128, TMP2, TMP2);
-
-	vis_and(TMP0, MASK_7f, TMP0);
-
-	vis_and(TMP2, MASK_7f, TMP2);
-
-	vis_psub16(TMP20, TMP0, TMP0);
-	vis_st64(TMP0, dest[0]);
-
-	vis_psub16(TMP18, TMP2, TMP2);
-	vis_st64_2(TMP2, dest, 8);
-}
-
-static void MC_put_y_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	int offset;
-
-	ref = vis_alignaddr(ref);
-	offset = (ref != _ref) ? 8 : 0;
-
-	vis_ld64(ref[0], TMP0);
-
-	vis_ld64_2(ref, offset, TMP2);
-	ref += stride;
-
-	vis_ld64(ref[0], TMP4);
-
-	vis_ld64_2(ref, offset, TMP6);
-	ref += stride;
-
-	vis_ld64(constants_fe[0], MASK_fe);
-	vis_faligndata(TMP0, TMP2, REF_0);
-
-	vis_ld64(constants_7f[0], MASK_7f);
-	vis_faligndata(TMP4, TMP6, REF_2);
-
-	vis_ld64(constants128[0], CONST_128);
-	height = (height >> 1) - 1;
-	do {	/* 12 cycles */
-		vis_ld64(ref[0], TMP0);
-		vis_xor(REF_0, REF_2, TMP4);
-
-		vis_ld64_2(ref, offset, TMP2);
-		ref += stride;
-		vis_and(TMP4, MASK_fe, TMP4);
-
-		vis_or(REF_0, REF_2, TMP6);
-		vis_mul8x16(CONST_128, TMP4, TMP4);
-
-		vis_faligndata(TMP0, TMP2, REF_0);
-		vis_ld64(ref[0], TMP0);
-
-		vis_ld64_2(ref, offset, TMP2);
-		ref += stride;
-		vis_xor(REF_0, REF_2, TMP12);
-
-		vis_and(TMP4, MASK_7f, TMP4);
-
-		vis_and(TMP12, MASK_fe, TMP12);
-
-		vis_mul8x16(CONST_128, TMP12, TMP12);
-		vis_or(REF_0, REF_2, TMP14);
-
-		vis_psub16(TMP6, TMP4, DST_0);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-
-		vis_faligndata(TMP0, TMP2, REF_2);
-
-		vis_and(TMP12, MASK_7f, TMP12);
-
-		vis_psub16(TMP14, TMP12, DST_0);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-	} while (--height);
-
-	vis_ld64(ref[0], TMP0);
-	vis_xor(REF_0, REF_2, TMP4);
-
-	vis_ld64_2(ref, offset, TMP2);
-	vis_and(TMP4, MASK_fe, TMP4);
-
-	vis_or(REF_0, REF_2, TMP6);
-	vis_mul8x16(CONST_128, TMP4, TMP4);
-
-	vis_faligndata(TMP0, TMP2, REF_0);
-
-	vis_xor(REF_0, REF_2, TMP12);
-
-	vis_and(TMP4, MASK_7f, TMP4);
-
-	vis_and(TMP12, MASK_fe, TMP12);
-
-	vis_mul8x16(CONST_128, TMP12, TMP12);
-	vis_or(REF_0, REF_2, TMP14);
-
-	vis_psub16(TMP6, TMP4, DST_0);
-	vis_st64(DST_0, dest[0]);
-	dest += stride;
-
-	vis_and(TMP12, MASK_7f, TMP12);
-
-	vis_psub16(TMP14, TMP12, DST_0);
-	vis_st64(DST_0, dest[0]);
-}
-
-static void MC_avg_y_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	int stride_8 = stride + 8;
-	int stride_16;
-	int offset;
-
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
-
-	ref = vis_alignaddr(ref);
-	offset = (ref != _ref) ? 16 : 0;
-
-	vis_ld64(ref[ 0], TMP0);
-	vis_fzero(ZERO);
-
-	vis_ld64(ref[ 8], TMP2);
-
-	vis_ld64_2(ref, offset, TMP4);
-	stride_16 = stride + offset;
-
-	vis_ld64(constants3[0], CONST_3);
-	vis_faligndata(TMP0, TMP2, REF_2);
-
-	vis_ld64(constants256_512[0], CONST_256);
-	vis_faligndata(TMP2, TMP4, REF_6);
-	height >>= 1;
-
-	do {	/* 31 cycles */
-		vis_ld64_2(ref, stride, TMP0);
-		vis_pmerge(ZERO,       REF_2,     TMP12);
-		vis_mul8x16au(REF_2_1, CONST_256, TMP14);
-
-		vis_ld64_2(ref, stride_8, TMP2);
-		vis_pmerge(ZERO,       REF_6,     TMP16);
-		vis_mul8x16au(REF_6_1, CONST_256, TMP18);
-
-		vis_ld64_2(ref, stride_16, TMP4);
-		ref += stride;
-
-		vis_ld64(dest[0], DST_0);
-		vis_faligndata(TMP0, TMP2, REF_0);
-
-		vis_ld64_2(dest, 8, DST_2);
-		vis_faligndata(TMP2, TMP4, REF_4);
-
-		vis_ld64_2(ref, stride, TMP6);
-		vis_pmerge(ZERO,     REF_0,     TMP0);
-		vis_mul8x16au(REF_0_1, CONST_256, TMP2);
-
-		vis_ld64_2(ref, stride_8, TMP8);
-		vis_pmerge(ZERO,     REF_4,     TMP4);
-
-		vis_ld64_2(ref, stride_16, TMP10);
-		ref += stride;
-
-		vis_ld64_2(dest, stride, REF_S0/*DST_4*/);
-		vis_faligndata(TMP6, TMP8, REF_2);
-		vis_mul8x16au(REF_4_1, CONST_256, TMP6);
-
-		vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/);
-		vis_faligndata(TMP8, TMP10, REF_6);
-		vis_mul8x16al(DST_0,   CONST_512, TMP20);
-
-		vis_padd16(TMP0, CONST_3, TMP0);
-		vis_mul8x16al(DST_1,   CONST_512, TMP22);
-
-		vis_padd16(TMP2, CONST_3, TMP2);
-		vis_mul8x16al(DST_2,   CONST_512, TMP24);
-
-		vis_padd16(TMP4, CONST_3, TMP4);
-		vis_mul8x16al(DST_3,   CONST_512, TMP26);
-
-		vis_padd16(TMP6, CONST_3, TMP6);
-
-		vis_padd16(TMP12, TMP20, TMP12);
-		vis_mul8x16al(REF_S0,   CONST_512, TMP20);
-
-		vis_padd16(TMP14, TMP22, TMP14);
-		vis_mul8x16al(REF_S0_1, CONST_512, TMP22);
-
-		vis_padd16(TMP16, TMP24, TMP16);
-		vis_mul8x16al(REF_S2,   CONST_512, TMP24);
-
-		vis_padd16(TMP18, TMP26, TMP18);
-		vis_mul8x16al(REF_S2_1, CONST_512, TMP26);
-
-		vis_padd16(TMP12, TMP0, TMP12);
-		vis_mul8x16au(REF_2,   CONST_256, TMP28);
-
-		vis_padd16(TMP14, TMP2, TMP14);
-		vis_mul8x16au(REF_2_1, CONST_256, TMP30);
-
-		vis_padd16(TMP16, TMP4, TMP16);
-		vis_mul8x16au(REF_6,   CONST_256, REF_S4);
-
-		vis_padd16(TMP18, TMP6, TMP18);
-		vis_mul8x16au(REF_6_1, CONST_256, REF_S6);
-
-		vis_pack16(TMP12, DST_0);
-		vis_padd16(TMP28, TMP0, TMP12);
-
-		vis_pack16(TMP14, DST_1);
-		vis_st64(DST_0, dest[0]);
-		vis_padd16(TMP30, TMP2, TMP14);
-
-		vis_pack16(TMP16, DST_2);
-		vis_padd16(REF_S4, TMP4, TMP16);
-
-		vis_pack16(TMP18, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-		vis_padd16(REF_S6, TMP6, TMP18);
-
-		vis_padd16(TMP12, TMP20, TMP12);
-
-		vis_padd16(TMP14, TMP22, TMP14);
-		vis_pack16(TMP12, DST_0);
-
-		vis_padd16(TMP16, TMP24, TMP16);
-		vis_pack16(TMP14, DST_1);
-		vis_st64(DST_0, dest[0]);
-
-		vis_padd16(TMP18, TMP26, TMP18);
-		vis_pack16(TMP16, DST_2);
-
-		vis_pack16(TMP18, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-	} while (--height);
-}
-
-static void MC_avg_y_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	int stride_8;
-	int offset;
-
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
-
-	ref = vis_alignaddr(ref);
-	offset = (ref != _ref) ? 8 : 0;
-
-	vis_ld64(ref[ 0], TMP0);
-	vis_fzero(ZERO);
-
-	vis_ld64_2(ref, offset, TMP2);
-	stride_8 = stride + offset;
-
-	vis_ld64(constants3[0], CONST_3);
-	vis_faligndata(TMP0, TMP2, REF_2);
-
-	vis_ld64(constants256_512[0], CONST_256);
-
-	height >>= 1;
-	do {	/* 20 cycles */
-		vis_ld64_2(ref, stride, TMP0);
-		vis_pmerge(ZERO,       REF_2,     TMP8);
-		vis_mul8x16au(REF_2_1, CONST_256, TMP10);
-
-		vis_ld64_2(ref, stride_8, TMP2);
-		ref += stride;
-
-		vis_ld64(dest[0], DST_0);
-
-		vis_ld64_2(dest, stride, DST_2);
-		vis_faligndata(TMP0, TMP2, REF_0);
-
-		vis_ld64_2(ref, stride, TMP4);
-		vis_mul8x16al(DST_0,   CONST_512, TMP16);
-		vis_pmerge(ZERO,       REF_0,     TMP12);
-
-		vis_ld64_2(ref, stride_8, TMP6);
-		ref += stride;
-		vis_mul8x16al(DST_1,   CONST_512, TMP18);
-		vis_pmerge(ZERO,       REF_0_1,   TMP14);
-
-		vis_padd16(TMP12, CONST_3, TMP12);
-		vis_mul8x16al(DST_2,   CONST_512, TMP24);
-
-		vis_padd16(TMP14, CONST_3, TMP14);
-		vis_mul8x16al(DST_3,   CONST_512, TMP26);
-
-		vis_faligndata(TMP4, TMP6, REF_2);
-
-		vis_padd16(TMP8, TMP12, TMP8);
-
-		vis_padd16(TMP10, TMP14, TMP10);
-		vis_mul8x16au(REF_2,   CONST_256, TMP20);
-
-		vis_padd16(TMP8, TMP16, TMP0);
-		vis_mul8x16au(REF_2_1, CONST_256, TMP22);
-
-		vis_padd16(TMP10, TMP18, TMP2);
-		vis_pack16(TMP0, DST_0);
-
-		vis_pack16(TMP2, DST_1);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-		vis_padd16(TMP12, TMP20, TMP12);
-
-		vis_padd16(TMP14, TMP22, TMP14);
-
-		vis_padd16(TMP12, TMP24, TMP0);
-
-		vis_padd16(TMP14, TMP26, TMP2);
-		vis_pack16(TMP0, DST_2);
-
-		vis_pack16(TMP2, DST_3);
-		vis_st64(DST_2, dest[0]);
-		dest += stride;
-	} while (--height);
-}
-
-static void MC_put_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
-			      const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-	int stride_8 = stride + 8;
-	int stride_16 = stride + 16;
-
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
-
-	ref = vis_alignaddr(ref);
-
-	vis_ld64(ref[ 0], TMP0);
-	vis_fzero(ZERO);
-
-	vis_ld64(ref[ 8], TMP2);
-
-	vis_ld64(ref[16], TMP4);
-
-	vis_ld64(constants2[0], CONST_2);
-	vis_faligndata(TMP0, TMP2, REF_S0);
-
-	vis_ld64(constants256_512[0], CONST_256);
-	vis_faligndata(TMP2, TMP4, REF_S4);
-
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_S2);
-		vis_faligndata(TMP2, TMP4, REF_S6);
-	} else {
-		vis_src1(TMP2, REF_S2);
-		vis_src1(TMP4, REF_S6);
-	}
-
-	height >>= 1;
-	do {
-		vis_ld64_2(ref, stride, TMP0);
-		vis_mul8x16au(REF_S0, CONST_256, TMP12);
-		vis_pmerge(ZERO,      REF_S0_1,  TMP14);
-
-		vis_alignaddr_g0((void *)off);
-
-		vis_ld64_2(ref, stride_8, TMP2);
-		vis_mul8x16au(REF_S2, CONST_256, TMP16);
-		vis_pmerge(ZERO,      REF_S2_1,  TMP18);
-
-		vis_ld64_2(ref, stride_16, TMP4);
-		ref += stride;
-		vis_mul8x16au(REF_S4, CONST_256, TMP20);
-		vis_pmerge(ZERO,      REF_S4_1,  TMP22);
-
-		vis_ld64_2(ref, stride, TMP6);
-		vis_mul8x16au(REF_S6, CONST_256, TMP24);
-		vis_pmerge(ZERO,      REF_S6_1,  TMP26);
-
-		vis_ld64_2(ref, stride_8, TMP8);
-		vis_faligndata(TMP0, TMP2, REF_0);
-
-		vis_ld64_2(ref, stride_16, TMP10);
-		ref += stride;
-		vis_faligndata(TMP2, TMP4, REF_4);
-
-		vis_faligndata(TMP6, TMP8, REF_S0);
-
-		vis_faligndata(TMP8, TMP10, REF_S4);
-
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_2);
-			vis_faligndata(TMP2, TMP4, REF_6);
-			vis_faligndata(TMP6, TMP8, REF_S2);
-			vis_faligndata(TMP8, TMP10, REF_S6);
-		} else {
-			vis_src1(TMP2, REF_2);
-			vis_src1(TMP4, REF_6);
-			vis_src1(TMP8, REF_S2);
-			vis_src1(TMP10, REF_S6);
-		}
-
-		vis_mul8x16au(REF_0, CONST_256, TMP0);
-		vis_pmerge(ZERO,      REF_0_1,  TMP2);
-
-		vis_mul8x16au(REF_2, CONST_256, TMP4);
-		vis_pmerge(ZERO,      REF_2_1,  TMP6);
-
-		vis_padd16(TMP0, CONST_2, TMP8);
-		vis_mul8x16au(REF_4, CONST_256, TMP0);
-
-		vis_padd16(TMP2, CONST_2, TMP10);
-		vis_mul8x16au(REF_4_1, CONST_256, TMP2);
-
-		vis_padd16(TMP8, TMP4, TMP8);
-		vis_mul8x16au(REF_6, CONST_256, TMP4);
-
-		vis_padd16(TMP10, TMP6, TMP10);
-		vis_mul8x16au(REF_6_1, CONST_256, TMP6);
-
-		vis_padd16(TMP12, TMP8, TMP12);
-
-		vis_padd16(TMP14, TMP10, TMP14);
-
-		vis_padd16(TMP12, TMP16, TMP12);
-
-		vis_padd16(TMP14, TMP18, TMP14);
-		vis_pack16(TMP12, DST_0);
-
-		vis_pack16(TMP14, DST_1);
-		vis_st64(DST_0, dest[0]);
-		vis_padd16(TMP0, CONST_2, TMP12);
-
-		vis_mul8x16au(REF_S0, CONST_256, TMP0);
-		vis_padd16(TMP2, CONST_2, TMP14);
-
-		vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
-		vis_padd16(TMP12, TMP4, TMP12);
-
-		vis_mul8x16au(REF_S2, CONST_256, TMP4);
-		vis_padd16(TMP14, TMP6, TMP14);
-
-		vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
-		vis_padd16(TMP20, TMP12, TMP20);
-
-		vis_padd16(TMP22, TMP14, TMP22);
-
-		vis_padd16(TMP20, TMP24, TMP20);
-
-		vis_padd16(TMP22, TMP26, TMP22);
-		vis_pack16(TMP20, DST_2);
-
-		vis_pack16(TMP22, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-		vis_padd16(TMP0, TMP4, TMP24);
-
-		vis_mul8x16au(REF_S4, CONST_256, TMP0);
-		vis_padd16(TMP2, TMP6, TMP26);
-
-		vis_mul8x16au(REF_S4_1, CONST_256, TMP2);
-		vis_padd16(TMP24, TMP8, TMP24);
-
-		vis_padd16(TMP26, TMP10, TMP26);
-		vis_pack16(TMP24, DST_0);
-
-		vis_pack16(TMP26, DST_1);
-		vis_st64(DST_0, dest[0]);
-		vis_pmerge(ZERO, REF_S6, TMP4);
-
-		vis_pmerge(ZERO,      REF_S6_1,  TMP6);
-
-		vis_padd16(TMP0, TMP4, TMP0);
-
-		vis_padd16(TMP2, TMP6, TMP2);
-
-		vis_padd16(TMP0, TMP12, TMP0);
-
-		vis_padd16(TMP2, TMP14, TMP2);
-		vis_pack16(TMP0, DST_2);
-
-		vis_pack16(TMP2, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-	} while (--height);
-}
-
-static void MC_put_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-	int stride_8 = stride + 8;
-
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
-
-	ref = vis_alignaddr(ref);
-
-	vis_ld64(ref[ 0], TMP0);
-	vis_fzero(ZERO);
-
-	vis_ld64(ref[ 8], TMP2);
-
-	vis_ld64(constants2[0], CONST_2);
-
-	vis_ld64(constants256_512[0], CONST_256);
-	vis_faligndata(TMP0, TMP2, REF_S0);
-
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_S2);
-	} else {
-		vis_src1(TMP2, REF_S2);
-	}
-
-	height >>= 1;
-	do {	/* 26 cycles */
-		vis_ld64_2(ref, stride, TMP0);
-		vis_mul8x16au(REF_S0,   CONST_256, TMP8);
-		vis_pmerge(ZERO,        REF_S2,    TMP12);
-
-		vis_alignaddr_g0((void *)off);
-
-		vis_ld64_2(ref, stride_8, TMP2);
-		ref += stride;
-		vis_mul8x16au(REF_S0_1, CONST_256, TMP10);
-		vis_pmerge(ZERO,        REF_S2_1,  TMP14);
-
-		vis_ld64_2(ref, stride, TMP4);
-
-		vis_ld64_2(ref, stride_8, TMP6);
-		ref += stride;
-		vis_faligndata(TMP0, TMP2, REF_S4);
-
-		vis_pmerge(ZERO, REF_S4, TMP18);
-
-		vis_pmerge(ZERO, REF_S4_1, TMP20);
-
-		vis_faligndata(TMP4, TMP6, REF_S0);
-
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_S6);
-			vis_faligndata(TMP4, TMP6, REF_S2);
-		} else {
-			vis_src1(TMP2, REF_S6);
-			vis_src1(TMP6, REF_S2);
-		}
-
-		vis_padd16(TMP18, CONST_2, TMP18);
-		vis_mul8x16au(REF_S6,   CONST_256, TMP22);
-
-		vis_padd16(TMP20, CONST_2, TMP20);
-		vis_mul8x16au(REF_S6_1, CONST_256, TMP24);
-
-		vis_mul8x16au(REF_S0,   CONST_256, TMP26);
-		vis_pmerge(ZERO, REF_S0_1, TMP28);
-
-		vis_mul8x16au(REF_S2,   CONST_256, TMP30);
-		vis_padd16(TMP18, TMP22, TMP18);
-
-		vis_mul8x16au(REF_S2_1, CONST_256, TMP32);
-		vis_padd16(TMP20, TMP24, TMP20);
-
-		vis_padd16(TMP8,  TMP18, TMP8);
-
-		vis_padd16(TMP10, TMP20, TMP10);
-
-		vis_padd16(TMP8,  TMP12, TMP8);
-
-		vis_padd16(TMP10, TMP14, TMP10);
-		vis_pack16(TMP8,  DST_0);
-
-		vis_pack16(TMP10, DST_1);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-		vis_padd16(TMP18, TMP26, TMP18);
-
-		vis_padd16(TMP20, TMP28, TMP20);
-
-		vis_padd16(TMP18, TMP30, TMP18);
-
-		vis_padd16(TMP20, TMP32, TMP20);
-		vis_pack16(TMP18, DST_2);
-
-		vis_pack16(TMP20, DST_3);
-		vis_st64(DST_2, dest[0]);
-		dest += stride;
-	} while (--height);
-}
-
-static void MC_avg_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
-			      const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-	int stride_8 = stride + 8;
-	int stride_16 = stride + 16;
-
-	vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
-
-	ref = vis_alignaddr(ref);
-
-	vis_ld64(ref[ 0], TMP0);
-	vis_fzero(ZERO);
-
-	vis_ld64(ref[ 8], TMP2);
-
-	vis_ld64(ref[16], TMP4);
-
-	vis_ld64(constants6[0], CONST_6);
-	vis_faligndata(TMP0, TMP2, REF_S0);
-
-	vis_ld64(constants256_1024[0], CONST_256);
-	vis_faligndata(TMP2, TMP4, REF_S4);
-
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_S2);
-		vis_faligndata(TMP2, TMP4, REF_S6);
-	} else {
-		vis_src1(TMP2, REF_S2);
-		vis_src1(TMP4, REF_S6);
-	}
-
-	height >>= 1;
-	do {	/* 55 cycles */
-		vis_ld64_2(ref, stride, TMP0);
-		vis_mul8x16au(REF_S0, CONST_256, TMP12);
-		vis_pmerge(ZERO,      REF_S0_1,  TMP14);
-
-		vis_alignaddr_g0((void *)off);
-
-		vis_ld64_2(ref, stride_8, TMP2);
-		vis_mul8x16au(REF_S2, CONST_256, TMP16);
-		vis_pmerge(ZERO,      REF_S2_1,  TMP18);
-
-		vis_ld64_2(ref, stride_16, TMP4);
-		ref += stride;
-		vis_mul8x16au(REF_S4, CONST_256, TMP20);
-		vis_pmerge(ZERO,      REF_S4_1,  TMP22);
-
-		vis_ld64_2(ref, stride, TMP6);
-		vis_mul8x16au(REF_S6, CONST_256, TMP24);
-		vis_pmerge(ZERO,      REF_S6_1,  TMP26);
-
-		vis_ld64_2(ref, stride_8, TMP8);
-		vis_faligndata(TMP0, TMP2, REF_0);
-
-		vis_ld64_2(ref, stride_16, TMP10);
-		ref += stride;
-		vis_faligndata(TMP2, TMP4, REF_4);
-
-		vis_ld64(dest[0], DST_0);
-		vis_faligndata(TMP6, TMP8, REF_S0);
-
-		vis_ld64_2(dest, 8, DST_2);
-		vis_faligndata(TMP8, TMP10, REF_S4);
-
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_2);
-			vis_faligndata(TMP2, TMP4, REF_6);
-			vis_faligndata(TMP6, TMP8, REF_S2);
-			vis_faligndata(TMP8, TMP10, REF_S6);
-		} else {
-			vis_src1(TMP2, REF_2);
-			vis_src1(TMP4, REF_6);
-			vis_src1(TMP8, REF_S2);
-			vis_src1(TMP10, REF_S6);
-		}
-
-		vis_mul8x16al(DST_0,   CONST_1024, TMP30);
-		vis_pmerge(ZERO, REF_0, TMP0);
-
-		vis_mul8x16al(DST_1,   CONST_1024, TMP32);
-		vis_pmerge(ZERO,      REF_0_1,  TMP2);
-
-		vis_mul8x16au(REF_2, CONST_256, TMP4);
-		vis_pmerge(ZERO,      REF_2_1,  TMP6);
-
-		vis_mul8x16al(DST_2,   CONST_1024, REF_0);
-		vis_padd16(TMP0, CONST_6, TMP0);
-
-		vis_mul8x16al(DST_3,   CONST_1024, REF_2);
-		vis_padd16(TMP2, CONST_6, TMP2);
-
-		vis_padd16(TMP0, TMP4, TMP0);
-		vis_mul8x16au(REF_4, CONST_256, TMP4);
-
-		vis_padd16(TMP2, TMP6, TMP2);
-		vis_mul8x16au(REF_4_1, CONST_256, TMP6);
-
-		vis_padd16(TMP12, TMP0, TMP12);
-		vis_mul8x16au(REF_6, CONST_256, TMP8);
-
-		vis_padd16(TMP14, TMP2, TMP14);
-		vis_mul8x16au(REF_6_1, CONST_256, TMP10);
-
-		vis_padd16(TMP12, TMP16, TMP12);
-		vis_mul8x16au(REF_S0, CONST_256, REF_4);
-
-		vis_padd16(TMP14, TMP18, TMP14);
-		vis_mul8x16au(REF_S0_1, CONST_256, REF_6);
-
-		vis_padd16(TMP12, TMP30, TMP12);
-
-		vis_padd16(TMP14, TMP32, TMP14);
-		vis_pack16(TMP12, DST_0);
-
-		vis_pack16(TMP14, DST_1);
-		vis_st64(DST_0, dest[0]);
-		vis_padd16(TMP4, CONST_6, TMP4);
-
-		vis_ld64_2(dest, stride, DST_0);
-		vis_padd16(TMP6, CONST_6, TMP6);
-		vis_mul8x16au(REF_S2, CONST_256, TMP12);
-
-		vis_padd16(TMP4, TMP8, TMP4);
-		vis_mul8x16au(REF_S2_1, CONST_256,  TMP14);
-
-		vis_padd16(TMP6, TMP10, TMP6);
-
-		vis_padd16(TMP20, TMP4, TMP20);
-
-		vis_padd16(TMP22, TMP6, TMP22);
-
-		vis_padd16(TMP20, TMP24, TMP20);
-
-		vis_padd16(TMP22, TMP26, TMP22);
-
-		vis_padd16(TMP20, REF_0, TMP20);
-		vis_mul8x16au(REF_S4, CONST_256, REF_0);
-
-		vis_padd16(TMP22, REF_2, TMP22);
-		vis_pack16(TMP20, DST_2);
-
-		vis_pack16(TMP22, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-
-		vis_ld64_2(dest, 8, DST_2);
-		vis_mul8x16al(DST_0,   CONST_1024, TMP30);
-		vis_pmerge(ZERO,      REF_S4_1,  REF_2);
-
-		vis_mul8x16al(DST_1,   CONST_1024, TMP32);
-		vis_padd16(REF_4, TMP0, TMP8);
-
-		vis_mul8x16au(REF_S6, CONST_256, REF_4);
-		vis_padd16(REF_6, TMP2, TMP10);
-
-		vis_mul8x16au(REF_S6_1, CONST_256, REF_6);
-		vis_padd16(TMP8, TMP12, TMP8);
-
-		vis_padd16(TMP10, TMP14, TMP10);
-
-		vis_padd16(TMP8, TMP30, TMP8);
-
-		vis_padd16(TMP10, TMP32, TMP10);
-		vis_pack16(TMP8, DST_0);
-
-		vis_pack16(TMP10, DST_1);
-		vis_st64(DST_0, dest[0]);
-
-		vis_padd16(REF_0, TMP4, REF_0);
-
-		vis_mul8x16al(DST_2,   CONST_1024, TMP30);
-		vis_padd16(REF_2, TMP6, REF_2);
-
-		vis_mul8x16al(DST_3,   CONST_1024, TMP32);
-		vis_padd16(REF_0, REF_4, REF_0);
-
-		vis_padd16(REF_2, REF_6, REF_2);
-
-		vis_padd16(REF_0, TMP30, REF_0);
-
-		/* stall */
-
-		vis_padd16(REF_2, TMP32, REF_2);
-		vis_pack16(REF_0, DST_2);
-
-		vis_pack16(REF_2, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-	} while (--height);
-}
-
-static void MC_avg_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
-{
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-	int stride_8 = stride + 8;
-
-	vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
-
-	ref = vis_alignaddr(ref);
-
-	vis_ld64(ref[0], TMP0);
-	vis_fzero(ZERO);
-
-	vis_ld64_2(ref, 8, TMP2);
-
-	vis_ld64(constants6[0], CONST_6);
-
-	vis_ld64(constants256_1024[0], CONST_256);
-	vis_faligndata(TMP0, TMP2, REF_S0);
-
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_S2);
-	} else {
-		vis_src1(TMP2, REF_S2);
-	}
-
-	height >>= 1;
-	do {	/* 31 cycles */
-		vis_ld64_2(ref, stride, TMP0);
-		vis_mul8x16au(REF_S0, CONST_256, TMP8);
-		vis_pmerge(ZERO,      REF_S0_1,  TMP10);
-
-		vis_ld64_2(ref, stride_8, TMP2);
-		ref += stride;
-		vis_mul8x16au(REF_S2, CONST_256, TMP12);
-		vis_pmerge(ZERO,      REF_S2_1,  TMP14);
-
-		vis_alignaddr_g0((void *)off);
-
-		vis_ld64_2(ref, stride, TMP4);
-		vis_faligndata(TMP0, TMP2, REF_S4);
-
-		vis_ld64_2(ref, stride_8, TMP6);
-		ref += stride;
-
-		vis_ld64(dest[0], DST_0);
-		vis_faligndata(TMP4, TMP6, REF_S0);
-
-		vis_ld64_2(dest, stride, DST_2);
-
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_S6);
-			vis_faligndata(TMP4, TMP6, REF_S2);
-		} else {
-			vis_src1(TMP2, REF_S6);
-			vis_src1(TMP6, REF_S2);
-		}
-
-		vis_mul8x16al(DST_0,   CONST_1024, TMP30);
-		vis_pmerge(ZERO, REF_S4, TMP22);
-
-		vis_mul8x16al(DST_1,   CONST_1024, TMP32);
-		vis_pmerge(ZERO,      REF_S4_1,  TMP24);
-
-		vis_mul8x16au(REF_S6, CONST_256, TMP26);
-		vis_pmerge(ZERO,      REF_S6_1,  TMP28);
-
-		vis_mul8x16au(REF_S0, CONST_256, REF_S4);
-		vis_padd16(TMP22, CONST_6, TMP22);
-
-		vis_mul8x16au(REF_S0_1, CONST_256, REF_S6);
-		vis_padd16(TMP24, CONST_6, TMP24);
-
-		vis_mul8x16al(DST_2,   CONST_1024, REF_0);
-		vis_padd16(TMP22, TMP26, TMP22);
-
-		vis_mul8x16al(DST_3,   CONST_1024, REF_2);
-		vis_padd16(TMP24, TMP28, TMP24);
-
-		vis_mul8x16au(REF_S2, CONST_256, TMP26);
-		vis_padd16(TMP8, TMP22, TMP8);
-
-		vis_mul8x16au(REF_S2_1, CONST_256, TMP28);
-		vis_padd16(TMP10, TMP24, TMP10);
-
-		vis_padd16(TMP8, TMP12, TMP8);
-
-		vis_padd16(TMP10, TMP14, TMP10);
-
-		vis_padd16(TMP8, TMP30, TMP8);
-
-		vis_padd16(TMP10, TMP32, TMP10);
-		vis_pack16(TMP8, DST_0);
-
-		vis_pack16(TMP10, DST_1);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-
-		vis_padd16(REF_S4, TMP22, TMP12);
-
-		vis_padd16(REF_S6, TMP24, TMP14);
-
-		vis_padd16(TMP12, TMP26, TMP12);
-
-		vis_padd16(TMP14, TMP28, TMP14);
-
-		vis_padd16(TMP12, REF_0, TMP12);
-
-		vis_padd16(TMP14, REF_2, TMP14);
-		vis_pack16(TMP12, DST_2);
-
-		vis_pack16(TMP14, DST_3);
-		vis_st64(DST_2, dest[0]);
-		dest += stride;
-	} while (--height);
-}
-
-MPEG2_MC_EXTERN(vis);
-
-#endif  /* !(ARCH_SPARC) */
diff --git a/src/libmpeg2new/libmpeg2/mpeg2_internal.h b/src/libmpeg2new/libmpeg2/mpeg2_internal.h
deleted file mode 100644
index fec7d4744..000000000
--- a/src/libmpeg2new/libmpeg2/mpeg2_internal.h
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * mpeg2_internal.h
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#define STATE_INTERNAL_NORETURN ((mpeg2_state_t)-1)
-
-/* macroblock modes */
-#define MACROBLOCK_INTRA 1
-#define MACROBLOCK_PATTERN 2
-#define MACROBLOCK_MOTION_BACKWARD 4
-#define MACROBLOCK_MOTION_FORWARD 8
-#define MACROBLOCK_QUANT 16
-#define DCT_TYPE_INTERLACED 32
-/* motion_type */
-#define MOTION_TYPE_SHIFT 6
-#define MC_FIELD 1
-#define MC_FRAME 2
-#define MC_16X8 2
-#define MC_DMV 3
-
-/* picture structure */
-#define TOP_FIELD 1
-#define BOTTOM_FIELD 2
-#define FRAME_PICTURE 3
-
-/* picture coding type */
-#define I_TYPE 1
-#define P_TYPE 2
-#define B_TYPE 3
-#define D_TYPE 4
-
-typedef void mpeg2_mc_fct (uint8_t *, const uint8_t *, int, int);
-
-typedef struct {
-    uint8_t * ref[2][3];
-    uint8_t ** ref2[2];
-    int pmv[2][2];
-    int f_code[2];
-} motion_t;
-
-typedef void motion_parser_t (mpeg2_decoder_t * decoder,
-			      motion_t * motion,
-			      mpeg2_mc_fct * const * table);
-
-struct mpeg2_decoder_s {
-    /* first, state that carries information from one macroblock to the */
-    /* next inside a slice, and is never used outside of mpeg2_slice() */
-
-    /* bit parsing stuff */
-    uint32_t bitstream_buf;		/* current 32 bit working set */
-    int bitstream_bits;			/* used bits in working set */
-    const uint8_t * bitstream_ptr;	/* buffer with stream data */
-
-    uint8_t * dest[3];
-
-    int offset;
-    int stride;
-    int uv_stride;
-    int slice_stride;
-    int slice_uv_stride;
-    int stride_frame;
-    unsigned int limit_x;
-    unsigned int limit_y_16;
-    unsigned int limit_y_8;
-    unsigned int limit_y;
-
-    /* Motion vectors */
-    /* The f_ and b_ correspond to the forward and backward motion */
-    /* predictors */
-    motion_t b_motion;
-    motion_t f_motion;
-    motion_parser_t * motion_parser[5];
-
-    /* predictor for DC coefficients in intra blocks */
-    int16_t dc_dct_pred[3];
-
-    /* DCT coefficients */
-    int16_t DCTblock[64] ATTR_ALIGN(64);
-
-    uint8_t * picture_dest[3];
-    void (* convert) (void * convert_id, uint8_t * const * src,
-		      unsigned int v_offset);
-    void * convert_id;
-
-    int dmv_offset;
-    unsigned int v_offset;
-
-    /* now non-slice-specific information */
-
-    /* sequence header stuff */
-    uint16_t * quantizer_matrix[4];
-    uint16_t (* chroma_quantizer[2])[64];
-    uint16_t quantizer_prescale[4][32][64];
-
-    /* The width and height of the picture snapped to macroblock units */
-    int width;
-    int height;
-    int vertical_position_extension;
-    int chroma_format;
-
-    /* picture header stuff */
-
-    /* what type of picture this is (I, P, B, D) */
-    int coding_type;
-
-    /* picture coding extension stuff */
-
-    /* quantization factor for intra dc coefficients */
-    int intra_dc_precision;
-    /* top/bottom/both fields */
-    int picture_structure;
-    /* bool to indicate all predictions are frame based */
-    int frame_pred_frame_dct;
-    /* bool to indicate whether intra blocks have motion vectors */
-    /* (for concealment) */
-    int concealment_motion_vectors;
-    /* bool to use different vlc tables */
-    int intra_vlc_format;
-    /* used for DMV MC */
-    int top_field_first;
-
-    /* stuff derived from bitstream */
-
-    /* pointer to the zigzag scan we're supposed to be using */
-    const uint8_t * scan;
-
-    int second_field;
-
-    int mpeg1;
-};
-
-typedef struct {
-    mpeg2_fbuf_t fbuf;
-} fbuf_alloc_t;
-
-struct mpeg2dec_s {
-    mpeg2_decoder_t decoder;
-
-    mpeg2_info_t info;
-
-    uint32_t shift;
-    int is_display_initialized;
-    mpeg2_state_t (* action) (struct mpeg2dec_s * mpeg2dec);
-    mpeg2_state_t state;
-    uint32_t ext_state;
-
-    /* allocated in init - gcc has problems allocating such big structures */
-    uint8_t * chunk_buffer;
-    /* pointer to start of the current chunk */
-    uint8_t * chunk_start;
-    /* pointer to current position in chunk_buffer */
-    uint8_t * chunk_ptr;
-    /* last start code ? */
-    uint8_t code;
-
-    /* picture tags */
-    uint32_t tag_current, tag2_current, tag_previous, tag2_previous;
-    int num_tags;
-    int bytes_since_tag;
-
-    int first;
-    int alloc_index_user;
-    int alloc_index;
-    uint8_t first_decode_slice;
-    uint8_t nb_decode_slices;
-
-    unsigned int user_data_len;
-
-    mpeg2_sequence_t new_sequence;
-    mpeg2_sequence_t sequence;
-    mpeg2_gop_t new_gop;
-    mpeg2_gop_t gop;
-    mpeg2_picture_t new_picture;
-    mpeg2_picture_t pictures[4];
-    mpeg2_picture_t * picture;
-    /*const*/ mpeg2_fbuf_t * fbuf[3];	/* 0: current fbuf, 1-2: prediction fbufs */
-
-    fbuf_alloc_t fbuf_alloc[3];
-    int custom_fbuf;
-
-    uint8_t * yuv_buf[3][3];
-    int yuv_index;
-    mpeg2_convert_t * convert;
-    void * convert_arg;
-    unsigned int convert_id_size;
-    int convert_stride;
-    void (* convert_start) (void * id, const mpeg2_fbuf_t * fbuf,
-			    const mpeg2_picture_t * picture,
-			    const mpeg2_gop_t * gop);
-
-    uint8_t * buf_start;
-    uint8_t * buf_end;
-
-    int16_t display_offset_x, display_offset_y;
-
-    int copy_matrix;
-    int8_t q_scale_type, scaled[4];
-    uint8_t quantizer_matrix[4][64];
-    uint8_t new_quantizer_matrix[4][64];
-};
-
-typedef struct {
-#ifdef ARCH_PPC
-    uint8_t regv[12*16];
-#endif
-    int dummy;
-} cpu_state_t;
-
-/* cpu_accel.c */
-uint32_t mpeg2_detect_accel (uint32_t accel);
-
-/* cpu_state.c */
-void mpeg2_cpu_state_init (uint32_t accel);
-
-/* decode.c */
-mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec);
-mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec);
-
-/* header.c */
-void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec);
-void mpeg2_reset_info (mpeg2_info_t * info);
-int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec);
-int mpeg2_header_gop (mpeg2dec_t * mpeg2dec);
-mpeg2_state_t mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec);
-int mpeg2_header_picture (mpeg2dec_t * mpeg2dec);
-int mpeg2_header_extension (mpeg2dec_t * mpeg2dec);
-int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec);
-void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec);
-void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec);
-void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels);
-mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec);
-mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec);
-void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type);
-
-/* idct.c */
-void mpeg2_idct_init (uint32_t accel);
-
-/* idct_mmx.c */
-void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride);
-void mpeg2_idct_add_mmxext (int last, int16_t * block,
-			    uint8_t * dest, int stride);
-void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride);
-void mpeg2_idct_add_mmx (int last, int16_t * block,
-			 uint8_t * dest, int stride);
-void mpeg2_idct_mmx_init (void);
-
-/* idct_altivec.c */
-void mpeg2_idct_copy_altivec (int16_t * block, uint8_t * dest, int stride);
-void mpeg2_idct_add_altivec (int last, int16_t * block,
-			     uint8_t * dest, int stride);
-void mpeg2_idct_altivec_init (void);
-
-/* idct_alpha.c */
-void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, int stride);
-void mpeg2_idct_add_mvi (int last, int16_t * block,
-			 uint8_t * dest, int stride);
-void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, int stride);
-void mpeg2_idct_add_alpha (int last, int16_t * block,
-			   uint8_t * dest, int stride);
-void mpeg2_idct_alpha_init (void);
-
-/* motion_comp.c */
-void mpeg2_mc_init (uint32_t accel);
-
-typedef struct {
-    mpeg2_mc_fct * put [8];
-    mpeg2_mc_fct * avg [8];
-} mpeg2_mc_t;
-
-#define MPEG2_MC_EXTERN(x) mpeg2_mc_t mpeg2_mc_##x = {			  \
-    {MC_put_o_16_##x, MC_put_x_16_##x, MC_put_y_16_##x, MC_put_xy_16_##x, \
-     MC_put_o_8_##x,  MC_put_x_8_##x,  MC_put_y_8_##x,  MC_put_xy_8_##x}, \
-    {MC_avg_o_16_##x, MC_avg_x_16_##x, MC_avg_y_16_##x, MC_avg_xy_16_##x, \
-     MC_avg_o_8_##x,  MC_avg_x_8_##x,  MC_avg_y_8_##x,  MC_avg_xy_8_##x}  \
-};
-
-extern mpeg2_mc_t mpeg2_mc_c;
-extern mpeg2_mc_t mpeg2_mc_mmx;
-extern mpeg2_mc_t mpeg2_mc_mmxext;
-extern mpeg2_mc_t mpeg2_mc_3dnow;
-extern mpeg2_mc_t mpeg2_mc_altivec;
-extern mpeg2_mc_t mpeg2_mc_alpha;
-extern mpeg2_mc_t mpeg2_mc_vis;
diff --git a/src/libmpeg2new/libmpeg2/rgb.c b/src/libmpeg2new/libmpeg2/rgb.c
deleted file mode 100644
index e4abcacc2..000000000
--- a/src/libmpeg2new/libmpeg2/rgb.c
+++ /dev/null
@@ -1,598 +0,0 @@
-/*
- * rgb.c
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-#include <xine/attributes.h>
-
-#include <inttypes.h>
-
-#include "mpeg2.h"
-#include "mpeg2convert.h"
-#include "convert_internal.h"
-
-static int matrix_coefficients = 6;
-
-static const int Inverse_Table_6_9[8][4] = {
-    {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
-    {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
-    {104597, 132201, 25675, 53279}, /* unspecified */
-    {104597, 132201, 25675, 53279}, /* reserved */
-    {104448, 132798, 24759, 53109}, /* FCC */
-    {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
-    {104597, 132201, 25675, 53279}, /* SMPTE 170M */
-    {117579, 136230, 16907, 35559}  /* SMPTE 240M (1987) */
-};
-
-static const uint8_t dither[] ATTR_ALIGN(32) = {
-     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
-     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
-     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
-     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
-    15, 36,  7, 18, 21, 50, 13, 31, 17, 39,  9, 21, 22, 53, 15, 35,
-    15, 36,  7, 18, 21, 50, 13, 31, 17, 39,  9, 21, 22, 53, 15, 35,
-    15, 36,  7, 18, 21, 50, 13, 31, 17, 39,  9, 21, 22, 53, 15, 35,
-    15, 36,  7, 18, 21, 50, 13, 31, 17, 39,  9, 21, 22, 53, 15, 35,
-     3,  9, 27, 63,  1,  4, 25, 59,  5, 12, 28, 67,  3,  7, 26, 62,
-     3,  9, 27, 63,  1,  4, 25, 59,  5, 12, 28, 67,  3,  7, 26, 62,
-     3,  9, 27, 63,  1,  4, 25, 59,  5, 12, 28, 67,  3,  7, 26, 62,
-     3,  9, 27, 63,  1,  4, 25, 59,  5, 12, 28, 67,  3,  7, 26, 62,
-    19, 45, 11, 27, 17, 41,  9, 22, 21, 49, 13, 30, 19, 44, 11, 26,
-    19, 45, 11, 27, 17, 41,  9, 22, 21, 49, 13, 30, 19, 44, 11, 26,
-    19, 45, 11, 27, 17, 41,  9, 22, 21, 49, 13, 30, 19, 44, 11, 26,
-    19, 45, 11, 27, 17, 41,  9, 22, 21, 49, 13, 30, 19, 44, 11, 26,
-     0,  2, 24, 57,  6, 15, 30, 70,  0,  1, 23, 55,  6, 14, 29, 69,
-     0,  2, 24, 57,  6, 15, 30, 70,  0,  1, 23, 55,  6, 14, 29, 69,
-     0,  2, 24, 57,  6, 15, 30, 70,  0,  1, 23, 55,  6, 14, 29, 69,
-     0,  2, 24, 57,  6, 15, 30, 70,  0,  1, 23, 55,  6, 14, 29, 69,
-    16, 38,  8, 20, 22, 52, 14, 34, 16, 37,  8, 19, 21, 51, 14, 33,
-    16, 38,  8, 20, 22, 52, 14, 34, 16, 37,  8, 19, 21, 51, 14, 33,
-    16, 38,  8, 20, 22, 52, 14, 34, 16, 37,  8, 19, 21, 51, 14, 33,
-    16, 38,  8, 20, 22, 52, 14, 34, 16, 37,  8, 19, 21, 51, 14, 33,
-     4, 11, 28, 66,  2,  6, 26, 61,  4, 10, 27, 65,  2,  5, 25, 60,
-     4, 11, 28, 66,  2,  6, 26, 61,  4, 10, 27, 65,  2,  5, 25, 60,
-     4, 11, 28, 66,  2,  6, 26, 61,  4, 10, 27, 65,  2,  5, 25, 60,
-     4, 11, 28, 66,  2,  6, 26, 61,  4, 10, 27, 65,  2,  5, 25, 60,
-    20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23,
-    20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23,
-    20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23,
-    20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23,
-     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
-     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
-     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
-     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
-    15, 36,  7, 18, 21, 50, 13, 31, 17, 39,  9, 21, 22, 53, 15, 35,
-    15, 36,  7, 18, 21, 50, 13, 31, 17, 39,  9, 21, 22, 53, 15, 35
-};
-
-static const uint8_t dither_temporal[64] = {
-    0x00, 0x20, 0x21, 0x01, 0x40, 0x60, 0x61, 0x41,
-    0x42, 0x62, 0x63, 0x43, 0x02, 0x22, 0x23, 0x03,
-    0x80, 0xa0, 0xa1, 0x81, 0xc0, 0xe0, 0xe1, 0xc1,
-    0xc2, 0xe2, 0xe3, 0xc3, 0x82, 0xa2, 0xa3, 0x83,
-    0x84, 0xa4, 0xa5, 0x85, 0xc4, 0xe4, 0xe5, 0xc5,
-    0xc6, 0xe6, 0xe7, 0xc7, 0x86, 0xa6, 0xa7, 0x87,
-    0x04, 0x24, 0x25, 0x05, 0x44, 0x64, 0x65, 0x45,
-    0x46, 0x66, 0x67, 0x47, 0x06, 0x26, 0x27, 0x07
-};
-
-typedef struct {
-    convert_rgb_t base;
-    void * table_rV[256];
-    void * table_gU[256];
-    int table_gV[256];
-    void * table_bU[256];
-} convert_rgb_c_t;
-
-#define RGB(type,i)							\
-    U = pu[i];								\
-    V = pv[i];								\
-    r = (type *) id->table_rV[V];					\
-    g = (type *) (((uint8_t *)id->table_gU[U]) + id->table_gV[V]);	\
-    b = (type *) id->table_bU[U];
-
-#define DST(py,dst,i,j)			\
-    Y = py[i];				\
-    dst[i] = r[Y] + g[Y] + b[Y];
-
-#define DSTRGB(py,dst,i,j)					\
-    Y = py[i];							\
-    dst[3*i] = r[Y]; dst[3*i+1] = g[Y]; dst[3*i+2] = b[Y];
-
-#define DSTBGR(py,dst,i,j)					\
-    Y = py[i];							\
-    dst[3*i] = b[Y]; dst[3*i+1] = g[Y]; dst[3*i+2] = r[Y];
-
-#define DSTDITHER(py,dst,i,j)						  \
-    Y = py[i];								  \
-    dst[i] = r[Y+pd[2*i+96*j]] + g[Y-pd[2*i+96*j]] + b[Y+pd[2*i+1+96*j]];
-
-#define DO(x) x
-#define SKIP(x)
-
-#define DECLARE_420(func,type,num,DST,DITHER)				\
-static void func (void * _id, uint8_t * const * src,			\
-		  unsigned int v_offset)				\
-{									\
-    const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id;		\
-    type * dst_1;							\
-    const uint8_t * py_1, * pu, * pv;					\
-    int i;								\
-    DITHER(uint8_t dithpos = id->base.dither_offset;)			\
-									\
-    dst_1 = (type *)(id->base.rgb_ptr + id->base.rgb_slice * v_offset);	\
-    py_1 = src[0];	pu = src[1];	pv = src[2];			\
-									\
-    i = 8;								\
-    do {								\
-	const uint8_t * py_2;						\
-	int j, U, V, Y;							\
-	const type * r, * g, * b;					\
-	type * dst_2;							\
-	DITHER(const uint8_t * const pd = dither + 2 * dithpos;)	\
-									\
-	dst_2 = (type *)((char *)dst_1 + id->base.rgb_stride);		\
-	py_2 = py_1 + id->base.y_stride;				\
-	j = id->base.width;						\
-	do {								\
-	    RGB (type, 0)						\
-	    DST (py_1, dst_1, 0, 0)					\
-	    DST (py_1, dst_1, 1, 0)					\
-	    DST (py_2, dst_2, 0, 1)					\
-	    DST (py_2, dst_2, 1, 1)					\
-									\
-	    RGB (type, 1)						\
-	    DST (py_2, dst_2, 2, 1)					\
-	    DST (py_2, dst_2, 3, 1)					\
-	    DST (py_1, dst_1, 2, 0)					\
-	    DST (py_1, dst_1, 3, 0)					\
-									\
-	    RGB (type, 2)						\
-	    DST (py_1, dst_1, 4, 0)					\
-	    DST (py_1, dst_1, 5, 0)					\
-	    DST (py_2, dst_2, 4, 1)					\
-	    DST (py_2, dst_2, 5, 1)					\
-									\
-	    RGB (type, 3)						\
-	    DST (py_2, dst_2, 6, 1)					\
-	    DST (py_2, dst_2, 7, 1)					\
-	    DST (py_1, dst_1, 6, 0)					\
-	    DST (py_1, dst_1, 7, 0)					\
-									\
-	    pu += 4;							\
-	    pv += 4;							\
-	    py_1 += 8;							\
-	    py_2 += 8;							\
-	    dst_1 += 8 * num;						\
-	    dst_2 += 8 * num;						\
-	} while (--j);							\
-	if (--i == id->base.field) {					\
-	    dst_1 = (type *)(id->base.rgb_ptr +				\
-			     id->base.rgb_slice * (v_offset + 1));	\
-	    py_1 = src[0] + id->base.y_stride_frame;			\
-	    pu = src[1] + id->base.uv_stride_frame;			\
-	    pv = src[2] + id->base.uv_stride_frame;			\
-	} else {							\
-	    py_1 += id->base.y_increm;					\
-	    pu += id->base.uv_increm;					\
-	    pv += id->base.uv_increm;					\
-	    dst_1 = (type *)((char *)dst_1 + id->base.rgb_increm);	\
-	    DITHER(dithpos += id->base.dither_stride;)			\
-	}								\
-    } while (i);							\
-}
-
-DECLARE_420 (rgb_c_32_420, uint32_t, 1, DST, SKIP)
-DECLARE_420 (rgb_c_24_rgb_420, uint8_t, 3, DSTRGB, SKIP)
-DECLARE_420 (rgb_c_24_bgr_420, uint8_t, 3, DSTBGR, SKIP)
-DECLARE_420 (rgb_c_16_420, uint16_t, 1, DST, SKIP)
-DECLARE_420 (rgb_c_8_420, uint8_t, 1, DSTDITHER, DO)
-
-#define DECLARE_422(func,type,num,DST,DITHER)				\
-static void func (void * _id, uint8_t * const * src,			\
-		  unsigned int v_offset)				\
-{									\
-    const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id;		\
-    type * dst;								\
-    const uint8_t * py, * pu, * pv;					\
-    int i;								\
-    DITHER(uint8_t dithpos = id->base.dither_offset;)			\
-									\
-    dst = (type *)(id->base.rgb_ptr + id->base.rgb_stride * v_offset);	\
-    py = src[0];	pu = src[1];	pv = src[2];			\
-									\
-    i = 16;								\
-    do {								\
-	int j, U, V, Y;							\
-	const type * r, * g, * b;					\
-	DITHER(const uint8_t * const pd = dither + 2 * dithpos;)	\
-									\
-	j = id->base.width;						\
-	do {								\
-	    RGB (type, 0)						\
-	    DST (py, dst, 0, 0)						\
-	    DST (py, dst, 1, 0)						\
-									\
-	    RGB (type, 1)						\
-	    DST (py, dst, 2, 0)						\
-	    DST (py, dst, 3, 0)						\
-									\
-	    RGB (type, 2)						\
-	    DST (py, dst, 4, 0)						\
-	    DST (py, dst, 5, 0)						\
-									\
-	    RGB (type, 3)						\
-	    DST (py, dst, 6, 0)						\
-	    DST (py, dst, 7, 0)						\
-									\
-	    pu += 4;							\
-	    pv += 4;							\
-	    py += 8;							\
-	    dst += 8 * num;						\
-	} while (--j);							\
-	py += id->base.y_increm;					\
-	pu += id->base.uv_increm;					\
-	pv += id->base.uv_increm;					\
-	dst = (type *)((char *)dst + id->base.rgb_increm);		\
-	DITHER(dithpos += id->base.dither_stride;)			\
-    } while (--i);							\
-}
-
-DECLARE_422 (rgb_c_32_422, uint32_t, 1, DST, SKIP)
-DECLARE_422 (rgb_c_24_rgb_422, uint8_t, 3, DSTRGB, SKIP)
-DECLARE_422 (rgb_c_24_bgr_422, uint8_t, 3, DSTBGR, SKIP)
-DECLARE_422 (rgb_c_16_422, uint16_t, 1, DST, SKIP)
-DECLARE_422 (rgb_c_8_422, uint8_t, 1, DSTDITHER, DO)
-
-#define DECLARE_444(func,type,num,DST,DITHER)				\
-static void func (void * _id, uint8_t * const * src,			\
-		  unsigned int v_offset)				\
-{									\
-    const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id;		\
-    type * dst;								\
-    const uint8_t * py, * pu, * pv;					\
-    int i;								\
-    DITHER(uint8_t dithpos = id->base.dither_offset;)			\
-									\
-    dst = (type *)(id->base.rgb_ptr + id->base.rgb_stride * v_offset);	\
-    py = src[0];	pu = src[1];	pv = src[2];			\
-									\
-    i = 16;								\
-    do {								\
-	int j, U, V, Y;							\
-	const type * r, * g, * b;					\
-	DITHER(const uint8_t * const pd = dither + 2 * dithpos;)	\
-									\
-	j = id->base.width;						\
-	do {								\
-	    RGB (type, 0)						\
-	    DST (py, dst, 0, 0)						\
-	    RGB (type, 1)						\
-	    DST (py, dst, 1, 0)						\
-	    RGB (type, 2)						\
-	    DST (py, dst, 2, 0)						\
-	    RGB (type, 3)						\
-	    DST (py, dst, 3, 0)						\
-	    RGB (type, 4)						\
-	    DST (py, dst, 4, 0)						\
-	    RGB (type, 5)						\
-	    DST (py, dst, 5, 0)						\
-	    RGB (type, 6)						\
-	    DST (py, dst, 6, 0)						\
-	    RGB (type, 7)						\
-	    DST (py, dst, 7, 0)						\
-									\
-	    pu += 8;							\
-	    pv += 8;							\
-	    py += 8;							\
-	    dst += 8 * num;						\
-	} while (--j);							\
-	py += id->base.y_increm;				   	\
-	pu += id->base.y_increm;				   	\
-	pv += id->base.y_increm;				   	\
-	dst = (type *)((char *)dst + id->base.rgb_increm);		\
-	DITHER(dithpos += id->base.dither_stride;)			\
-    } while (--i);							\
-}
-
-DECLARE_444 (rgb_c_32_444, uint32_t, 1, DST, SKIP)
-DECLARE_444 (rgb_c_24_rgb_444, uint8_t, 3, DSTRGB, SKIP)
-DECLARE_444 (rgb_c_24_bgr_444, uint8_t, 3, DSTBGR, SKIP)
-DECLARE_444 (rgb_c_16_444, uint16_t, 1, DST, SKIP)
-DECLARE_444 (rgb_c_8_444, uint8_t, 1, DSTDITHER, DO)
-
-static void rgb_start (void * _id, const mpeg2_fbuf_t * fbuf,
-		       const mpeg2_picture_t * picture,
-		       const mpeg2_gop_t * gop)
-{
-    convert_rgb_t * id = (convert_rgb_t *) _id;
-    int uv_stride = id->uv_stride_frame;
-    id->y_stride = id->y_stride_frame;
-    id->rgb_ptr = fbuf->buf[0];
-    id->rgb_slice = id->rgb_stride = id->rgb_stride_frame;
-    id->dither_stride = 32;
-    id->dither_offset = dither_temporal[picture->temporal_reference & 63];
-    id->field = 0;
-    if ((picture->nb_fields == 1) ||
-	(id->chroma420 && !(picture->flags & PIC_FLAG_PROGRESSIVE_FRAME))) {
-	uv_stride <<= 1;
-	id->y_stride <<= 1;
-	id->rgb_stride <<= 1;
-	id->dither_stride <<= 1;
-	id->dither_offset += 16;
-	if (picture->nb_fields == 1) {
-	    id->rgb_slice <<= 1;
-	    if (!(picture->flags & PIC_FLAG_TOP_FIELD_FIRST)) {
-		id->rgb_ptr += id->rgb_stride_frame;
-		id->dither_offset += 32;
-	    }
-	} else
-	    id->field = 8 >> id->convert420;
-    }
-    id->y_increm = (id->y_stride << id->convert420) - id->y_stride_frame;
-    id->uv_increm = uv_stride - id->uv_stride_frame;
-    id->rgb_increm = (id->rgb_stride << id->convert420) - id->rgb_stride_min;
-    id->dither_stride <<= id->convert420;
-}
-
-static inline int div_round (int dividend, int divisor)
-{
-    if (dividend > 0)
-	return (dividend + (divisor>>1)) / divisor;
-    else
-	return -((-dividend + (divisor>>1)) / divisor);
-}
-
-static unsigned int rgb_c_init (convert_rgb_c_t * id,
-				mpeg2convert_rgb_order_t order,
-				unsigned int bpp)
-{
-    int i;
-    uint8_t table_Y[1024];
-    uint32_t * table_32 = 0;
-    uint16_t * table_16 = 0;
-    uint8_t * table_8 = 0;
-    uint8_t * table_332 = 0;
-    int entry_size = 0;
-    void * table_r = 0;
-    void * table_g = 0;
-    void * table_b = 0;
-
-    int crv = Inverse_Table_6_9[matrix_coefficients][0];
-    int cbu = Inverse_Table_6_9[matrix_coefficients][1];
-    int cgu = -Inverse_Table_6_9[matrix_coefficients][2];
-    int cgv = -Inverse_Table_6_9[matrix_coefficients][3];
-
-    for (i = 0; i < 1024; i++) {
-	int j;
-
-	j = (76309 * (i - 384 - 16) + 32768) >> 16;
-	table_Y[i] = (j < 0) ? 0 : ((j > 255) ? 255 : j);
-    }
-
-    switch (bpp) {
-    case 32:
-	if (!id)
-	    return (197 + 2*682 + 256 + 132) * sizeof (uint32_t);
-	table_32 = (uint32_t *) (id + 1);
-	entry_size = sizeof (uint32_t);
-	table_r = table_32 + 197;
-	table_b = table_32 + 197 + 685;
-	table_g = table_32 + 197 + 2*682;
-
-	for (i = -197; i < 256+197; i++)
-	    ((uint32_t *) table_r)[i] =
-		table_Y[i+384] << ((order == MPEG2CONVERT_RGB) ? 16 : 0);
-	for (i = -132; i < 256+132; i++)
-	    ((uint32_t *) table_g)[i] = table_Y[i+384] << 8;
-	for (i = -232; i < 256+232; i++)
-	    ((uint32_t *) table_b)[i] =
-		table_Y[i+384] << ((order == MPEG2CONVERT_RGB) ? 0 : 16);
-	break;
-
-    case 24:
-	if (!id)
-	    return (256 + 2*232) * sizeof (uint8_t);
-	table_8 = (uint8_t *) (id + 1);
-	entry_size = sizeof (uint8_t);
-	table_r = table_g = table_b = table_8 + 232;
-
-	for (i = -232; i < 256+232; i++)
-	    ((uint8_t * )table_b)[i] = table_Y[i+384];
-	break;
-
-    case 15:
-    case 16:
-	if (!id)
-	    return (197 + 2*682 + 256 + 132) * sizeof (uint16_t);
-	table_16 = (uint16_t *) (id + 1);
-	entry_size = sizeof (uint16_t);
-	table_r = table_16 + 197;
-	table_b = table_16 + 197 + 685;
-	table_g = table_16 + 197 + 2*682;
-
-	for (i = -197; i < 256+197; i++) {
-	    int j = table_Y[i+384] >> 3;
-
-	    if (order == MPEG2CONVERT_RGB)
-		j <<= ((bpp==16) ? 11 : 10);
-
-	    ((uint16_t *)table_r)[i] = j;
-	}
-	for (i = -132; i < 256+132; i++) {
-	    int j = table_Y[i+384] >> ((bpp==16) ? 2 : 3);
-
-	    ((uint16_t *)table_g)[i] = j << 5;
-	}
-	for (i = -232; i < 256+232; i++) {
-	    int j = table_Y[i+384] >> 3;
-
-	    if (order == MPEG2CONVERT_BGR)
-		j <<= ((bpp==16) ? 11 : 10);
-
-	    ((uint16_t *)table_b)[i] = j;
-	}
-	break;
-
-    case 8:
-	if (!id)
-	    return (197 + 2*682 + 256 + 232 + 71) * sizeof (uint8_t);
-	table_332 = (uint8_t *) (id + 1);
-	entry_size = sizeof (uint8_t);
-	table_r = table_332 + 197;
-	table_g = table_332 + 197 + 682 + 30;
-	table_b = table_332 + 197 + 2*682;
-
-	for (i = -197; i < 256+197+30; i++)
-	    ((uint8_t *)table_r)[i] = ((table_Y[i+384] * 7 / 255) <<
-				       (order == MPEG2CONVERT_RGB ? 5 : 0));
-	for (i = -132; i < 256+132+30; i++)
-	    ((uint8_t *)table_g)[i-30] = ((table_Y[i+384] * 7 / 255) <<
-					  (order == MPEG2CONVERT_RGB ? 2 : 3));
-	for (i = -232; i < 256+232+71; i++)
-	    ((uint8_t *)table_b)[i] = ((table_Y[i+384] / 85) <<
-				       (order == MPEG2CONVERT_RGB ? 0 : 6));
-	break;
-    }
-
-    for (i = 0; i < 256; i++) {
-	id->table_rV[i] = (((uint8_t *)table_r) +
-			   entry_size * div_round (crv * (i-128), 76309));
-	id->table_gU[i] = (((uint8_t *)table_g) +
-			   entry_size * div_round (cgu * (i-128), 76309));
-	id->table_gV[i] = entry_size * div_round (cgv * (i-128), 76309);
-	id->table_bU[i] = (((uint8_t *)table_b) +
-			   entry_size * div_round (cbu * (i-128), 76309));
-    }
-
-    return 0;
-}
-
-static int rgb_internal (mpeg2convert_rgb_order_t order, unsigned int bpp,
-			 int stage, void * _id, const mpeg2_sequence_t * seq,
-			 int stride, uint32_t accel, void * arg,
-			 mpeg2_convert_init_t * result)
-{
-    convert_rgb_t * id = (convert_rgb_t *) _id;
-    mpeg2convert_copy_t * copy = (mpeg2convert_copy_t *) 0;
-    unsigned int id_size = sizeof (convert_rgb_t);
-    int chroma420 = (seq->chroma_height < seq->height);
-    int convert420 = 0;
-    int rgb_stride_min = ((bpp + 7) >> 3) * seq->width;
-
-#ifdef ARCH_X86
-    if (!copy && (accel & MPEG2_ACCEL_X86_MMXEXT)) {
-	convert420 = 0;
-	copy = mpeg2convert_rgb_mmxext (order, bpp, seq);
-    }
-    if (!copy && (accel & MPEG2_ACCEL_X86_MMX)) {
-	convert420 = 0;
-	copy = mpeg2convert_rgb_mmx (order, bpp, seq);
-    }
-#endif
-#ifdef ARCH_SPARC
-    if (!copy && (accel & MPEG2_ACCEL_SPARC_VIS)) {
-	convert420 = chroma420;
-	copy = mpeg2convert_rgb_vis (order, bpp, seq);
-    }
-#endif
-    if (!copy) {
-	int src, dest;
-	static void (* rgb_c[3][5]) (void *, uint8_t * const *,
-				     unsigned int) =
-	    {{rgb_c_24_bgr_420, rgb_c_8_420, rgb_c_16_420,
-	      rgb_c_24_rgb_420, rgb_c_32_420},
-	     {rgb_c_24_bgr_422, rgb_c_8_422, rgb_c_16_422,
-	      rgb_c_24_rgb_422, rgb_c_32_422},
-	     {rgb_c_24_bgr_444, rgb_c_8_444, rgb_c_16_444,
-	      rgb_c_24_rgb_444, rgb_c_32_444}};
-
-	convert420 = chroma420;
-	id_size = (sizeof (convert_rgb_c_t) +
-		   rgb_c_init ((convert_rgb_c_t *) id, order, bpp));
-	src = ((seq->chroma_width == seq->width) +
-	       (seq->chroma_height == seq->height));
-	dest = ((bpp == 24 && order == MPEG2CONVERT_BGR) ? 0 : (bpp + 7) >> 3);
-	copy = rgb_c[src][dest];
-    }
-
-    result->id_size = id_size;
-
-    if (stride < rgb_stride_min)
-	stride = rgb_stride_min;
-
-    if (stage == MPEG2_CONVERT_STRIDE)
-	return stride;
-    else if (stage == MPEG2_CONVERT_START) {
-	id->width = seq->width >> 3;
-	id->y_stride_frame = seq->width;
-	id->uv_stride_frame = seq->chroma_width;
-	id->rgb_stride_frame = stride;
-	id->rgb_stride_min = rgb_stride_min;
-	id->chroma420 = chroma420;
-	id->convert420 = convert420;
-	result->buf_size[0] = stride * seq->height;
-	result->buf_size[1] = result->buf_size[2] = 0;
-	result->start = rgb_start;
-	result->copy = copy;
-    }
-    return 0;
-}
-
-#define DECLARE(func,order,bpp)						\
-int func (int stage, void * id,						\
-	  const mpeg2_sequence_t * sequence, int stride,		\
-	  uint32_t accel, void * arg, mpeg2_convert_init_t * result)	\
-{									\
-    return rgb_internal (order, bpp, stage, id, sequence, stride,	\
-			 accel, arg, result);				\
-}
-
-DECLARE (mpeg2convert_rgb32, MPEG2CONVERT_RGB, 32)
-DECLARE (mpeg2convert_rgb24, MPEG2CONVERT_RGB, 24)
-DECLARE (mpeg2convert_rgb16, MPEG2CONVERT_RGB, 16)
-DECLARE (mpeg2convert_rgb15, MPEG2CONVERT_RGB, 15)
-DECLARE (mpeg2convert_rgb8, MPEG2CONVERT_RGB, 8)
-DECLARE (mpeg2convert_bgr32, MPEG2CONVERT_BGR, 32)
-DECLARE (mpeg2convert_bgr24, MPEG2CONVERT_BGR, 24)
-DECLARE (mpeg2convert_bgr16, MPEG2CONVERT_BGR, 16)
-DECLARE (mpeg2convert_bgr15, MPEG2CONVERT_BGR, 15)
-DECLARE (mpeg2convert_bgr8, MPEG2CONVERT_BGR, 8)
-
-mpeg2_convert_t * mpeg2convert_rgb (mpeg2convert_rgb_order_t order,
-				    unsigned int bpp)
-{
-    static mpeg2_convert_t * table[5][2] =
-	{{mpeg2convert_rgb15, mpeg2convert_bgr15},
-	 {mpeg2convert_rgb8, mpeg2convert_bgr8},
-	 {mpeg2convert_rgb16, mpeg2convert_bgr16},
-	 {mpeg2convert_rgb24, mpeg2convert_bgr24},
-	 {mpeg2convert_rgb32, mpeg2convert_bgr32}};
-
-    if (order == MPEG2CONVERT_RGB || order == MPEG2CONVERT_BGR) {
-	if (bpp == 15)
-	    return table[0][order == MPEG2CONVERT_BGR];
-	else if (bpp >= 8 && bpp <= 32 && (bpp & 7) == 0)
-	    return table[bpp >> 3][order == MPEG2CONVERT_BGR];
-    }
-    return (mpeg2_convert_t *) 0;
-}
diff --git a/src/libmpeg2new/libmpeg2/rgb_mmx.c b/src/libmpeg2new/libmpeg2/rgb_mmx.c
deleted file mode 100644
index 6ca7e65a8..000000000
--- a/src/libmpeg2new/libmpeg2/rgb_mmx.c
+++ /dev/null
@@ -1,321 +0,0 @@
-/*
- * rgb_mmx.c
- * Copyright (C) 2000-2003 Silicon Integrated System Corp.
- * All Rights Reserved.
- *
- * Author: Olie Lho <ollie@sis.com.tw>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#ifdef ARCH_X86
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "mpeg2.h"
-#include "mpeg2convert.h"
-#include "convert_internal.h"
-#include <xine/attributes.h>
-#include "mmx.h"
-
-#define CPU_MMXEXT 0
-#define CPU_MMX 1
-
-/* CPU_MMXEXT/CPU_MMX adaptation layer */
-
-#define movntq(src,dest)	\
-do {				\
-    if (cpu == CPU_MMXEXT)	\
-	movntq_r2m (src, dest);	\
-    else			\
-	movq_r2m (src, dest);	\
-} while (0)
-
-static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv)
-{
-    static mmx_t mmx_80w = {0x0080008000800080LL};
-    static mmx_t mmx_U_green = {0xf37df37df37df37dLL};
-    static mmx_t mmx_U_blue = {0x4093409340934093LL};
-    static mmx_t mmx_V_red = {0x3312331233123312LL};
-    static mmx_t mmx_V_green = {0xe5fce5fce5fce5fcLL};
-    static mmx_t mmx_10w = {0x1010101010101010LL};
-    static mmx_t mmx_00ffw = {0x00ff00ff00ff00ffLL};
-    static mmx_t mmx_Y_coeff = {0x253f253f253f253fLL};
-
-    movd_m2r (*pu, mm0);		/* mm0 = 00 00 00 00 u3 u2 u1 u0 */
-    movd_m2r (*pv, mm1);		/* mm1 = 00 00 00 00 v3 v2 v1 v0 */
-    movq_m2r (*py, mm6);		/* mm6 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
-    pxor_r2r (mm4, mm4);		/* mm4 = 0 */
-    /* XXX might do cache preload for image here */
-
-    /*
-     * Do the multiply part of the conversion for even and odd pixels
-     * register usage:
-     * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels
-     * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd  pixels
-     * mm6 -> Y even, mm7 -> Y odd
-     */
-
-    punpcklbw_r2r (mm4, mm0);		/* mm0 = u3 u2 u1 u0 */
-    punpcklbw_r2r (mm4, mm1);		/* mm1 = v3 v2 v1 v0 */
-    psubsw_m2r (mmx_80w, mm0);		/* u -= 128 */
-    psubsw_m2r (mmx_80w, mm1);		/* v -= 128 */
-    psllw_i2r (3, mm0);			/* promote precision */
-    psllw_i2r (3, mm1);			/* promote precision */
-    movq_r2r (mm0, mm2);		/* mm2 = u3 u2 u1 u0 */
-    movq_r2r (mm1, mm3);		/* mm3 = v3 v2 v1 v0 */
-    pmulhw_m2r (mmx_U_green, mm2);	/* mm2 = u * u_green */
-    pmulhw_m2r (mmx_V_green, mm3);	/* mm3 = v * v_green */
-    pmulhw_m2r (mmx_U_blue, mm0);	/* mm0 = chroma_b */
-    pmulhw_m2r (mmx_V_red, mm1);	/* mm1 = chroma_r */
-    paddsw_r2r (mm3, mm2);		/* mm2 = chroma_g */
-
-    psubusb_m2r (mmx_10w, mm6);		/* Y -= 16 */
-    movq_r2r (mm6, mm7);		/* mm7 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
-    pand_m2r (mmx_00ffw, mm6);		/* mm6 =    Y6    Y4    Y2    Y0 */
-    psrlw_i2r (8, mm7);			/* mm7 =    Y7    Y5    Y3    Y1 */
-    psllw_i2r (3, mm6);			/* promote precision */
-    psllw_i2r (3, mm7);			/* promote precision */
-    pmulhw_m2r (mmx_Y_coeff, mm6);	/* mm6 = luma_rgb even */
-    pmulhw_m2r (mmx_Y_coeff, mm7);	/* mm7 = luma_rgb odd */
-
-    /*
-     * Do the addition part of the conversion for even and odd pixels
-     * register usage:
-     * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels
-     * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd  pixels
-     * mm6 -> Y even, mm7 -> Y odd
-     */
-
-    movq_r2r (mm0, mm3);		/* mm3 = chroma_b */
-    movq_r2r (mm1, mm4);		/* mm4 = chroma_r */
-    movq_r2r (mm2, mm5);		/* mm5 = chroma_g */
-    paddsw_r2r (mm6, mm0);		/* mm0 = B6 B4 B2 B0 */
-    paddsw_r2r (mm7, mm3);		/* mm3 = B7 B5 B3 B1 */
-    paddsw_r2r (mm6, mm1);		/* mm1 = R6 R4 R2 R0 */
-    paddsw_r2r (mm7, mm4);		/* mm4 = R7 R5 R3 R1 */
-    paddsw_r2r (mm6, mm2);		/* mm2 = G6 G4 G2 G0 */
-    paddsw_r2r (mm7, mm5);		/* mm5 = G7 G5 G3 G1 */
-    packuswb_r2r (mm0, mm0);		/* saturate to 0-255 */
-    packuswb_r2r (mm1, mm1);		/* saturate to 0-255 */
-    packuswb_r2r (mm2, mm2);		/* saturate to 0-255 */
-    packuswb_r2r (mm3, mm3);		/* saturate to 0-255 */
-    packuswb_r2r (mm4, mm4);		/* saturate to 0-255 */
-    packuswb_r2r (mm5, mm5);		/* saturate to 0-255 */
-    punpcklbw_r2r (mm3, mm0);		/* mm0 = B7 B6 B5 B4 B3 B2 B1 B0 */
-    punpcklbw_r2r (mm4, mm1);		/* mm1 = R7 R6 R5 R4 R3 R2 R1 R0 */
-    punpcklbw_r2r (mm5, mm2);		/* mm2 = G7 G6 G5 G4 G3 G2 G1 G0 */
-}
-
-static inline void mmx_unpack_16rgb (uint8_t * image, const int cpu)
-{
-    static mmx_t mmx_bluemask = {0xf8f8f8f8f8f8f8f8LL};
-    static mmx_t mmx_greenmask = {0xfcfcfcfcfcfcfcfcLL};
-    static mmx_t mmx_redmask = {0xf8f8f8f8f8f8f8f8LL};
-
-    /*
-     * convert RGB plane to RGB 16 bits
-     * mm0 -> B, mm1 -> R, mm2 -> G
-     * mm4 -> GB, mm5 -> AR pixel 4-7
-     * mm6 -> GB, mm7 -> AR pixel 0-3
-     */
-
-    pand_m2r (mmx_bluemask, mm0);	/* mm0 = b7b6b5b4b3______ */
-    pand_m2r (mmx_greenmask, mm2);	/* mm2 = g7g6g5g4g3g2____ */
-    pand_m2r (mmx_redmask, mm1);	/* mm1 = r7r6r5r4r3______ */
-    psrlq_i2r (3, mm0);			/* mm0 = ______b7b6b5b4b3 */
-    pxor_r2r (mm4, mm4);		/* mm4 = 0 */
-    movq_r2r (mm0, mm5);		/* mm5 = ______b7b6b5b4b3 */
-    movq_r2r (mm2, mm7);		/* mm7 = g7g6g5g4g3g2____ */
-
-    punpcklbw_r2r (mm4, mm2);
-    punpcklbw_r2r (mm1, mm0);
-    psllq_i2r (3, mm2);
-    por_r2r (mm2, mm0);
-    movntq (mm0, *image);
-
-    punpckhbw_r2r (mm4, mm7);
-    punpckhbw_r2r (mm1, mm5);
-    psllq_i2r (3, mm7);
-    por_r2r (mm7, mm5);
-    movntq (mm5, *(image+8));
-}
-
-static inline void mmx_unpack_32rgb (uint8_t * image, const int cpu)
-{
-    /*
-     * convert RGB plane to RGB packed format,
-     * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0,
-     * mm4 -> GB, mm5 -> AR pixel 4-7,
-     * mm6 -> GB, mm7 -> AR pixel 0-3
-     */
-
-    pxor_r2r (mm3, mm3);
-    movq_r2r (mm0, mm6);
-    movq_r2r (mm1, mm7);
-    movq_r2r (mm0, mm4);
-    movq_r2r (mm1, mm5);
-    punpcklbw_r2r (mm2, mm6);
-    punpcklbw_r2r (mm3, mm7);
-    punpcklwd_r2r (mm7, mm6);
-    movntq (mm6, *image);
-    movq_r2r (mm0, mm6);
-    punpcklbw_r2r (mm2, mm6);
-    punpckhwd_r2r (mm7, mm6);
-    movntq (mm6, *(image+8));
-    punpckhbw_r2r (mm2, mm4);
-    punpckhbw_r2r (mm3, mm5);
-    punpcklwd_r2r (mm5, mm4);
-    movntq (mm4, *(image+16));
-    movq_r2r (mm0, mm4);
-    punpckhbw_r2r (mm2, mm4);
-    punpckhwd_r2r (mm5, mm4);
-    movntq (mm4, *(image+24));
-}
-
-static inline void rgb16 (void * const _id, uint8_t * const * src,
-			  const unsigned int v_offset, const int cpu)
-{
-    convert_rgb_t * const id = (convert_rgb_t *) _id;
-    uint8_t * dst;
-    uint8_t * py, * pu, * pv;
-    int i, j;
-
-    dst = id->rgb_ptr + id->rgb_slice * v_offset;
-    py = src[0];	pu = src[1];	pv = src[2];
-
-    i = 16;
-    do {
-	j = id->width;
-	do {
-	    mmx_yuv2rgb (py, pu, pv);
-	    mmx_unpack_16rgb (dst, cpu);
-	    py += 8;
-	    pu += 4;
-	    pv += 4;
-	    dst += 16;
-	} while (--j);
-
-	dst += id->rgb_increm;
-	py += id->y_increm;
-	if (--i == id->field) {
-	    dst = id->rgb_ptr + id->rgb_slice * (v_offset + 1);
-	    py = src[0] + id->y_stride_frame;
-	    pu = src[1] + id->uv_stride_frame;
-	    pv = src[2] + id->uv_stride_frame;
-	} else if (! (i & id->chroma420)) {
-	    pu += id->uv_increm;
-	    pv += id->uv_increm;
-	} else {
-	    pu -= id->uv_stride_frame;
-	    pv -= id->uv_stride_frame;
-	}
-    } while (i);
-}
-
-static inline void argb32 (void * const _id, uint8_t * const * src,
-			   const unsigned int v_offset, const int cpu)
-{
-    convert_rgb_t * const id = (convert_rgb_t *) _id;
-    uint8_t * dst;
-    uint8_t * py, * pu, * pv;
-    int i, j;
-
-    dst = id->rgb_ptr + id->rgb_slice * v_offset;
-    py = src[0];	pu = src[1];	pv = src[2];
-
-    i = 16;
-    do {
-	j = id->width;
-	do {
-	    mmx_yuv2rgb (py, pu, pv);
-	    mmx_unpack_32rgb (dst, cpu);
-	    py += 8;
-	    pu += 4;
-	    pv += 4;
-	    dst += 32;
-	} while (--j);
-
-	dst += id->rgb_increm;
-	py += id->y_increm;
-	if (--i == id->field) {
-	    dst = id->rgb_ptr + id->rgb_slice * (v_offset + 1);
-	    py = src[0] + id->y_stride_frame;
-	    pu = src[1] + id->uv_stride_frame;
-	    pv = src[2] + id->uv_stride_frame;
-	} else if (! (i & id->chroma420)) {
-	    pu += id->uv_increm;
-	    pv += id->uv_increm;
-	} else {
-	    pu -= id->uv_stride_frame;
-	    pv -= id->uv_stride_frame;
-	}
-    } while (i);
-}
-
-static void mmxext_rgb16 (void * id, uint8_t * const * src,
-			  unsigned int v_offset)
-{
-    rgb16 (id, src, v_offset, CPU_MMXEXT);
-}
-
-static void mmxext_argb32 (void * id, uint8_t * const * src,
-			   unsigned int v_offset)
-{
-    argb32 (id, src, v_offset, CPU_MMXEXT);
-}
-
-static void mmx_rgb16 (void * id, uint8_t * const * src, unsigned int v_offset)
-{
-    rgb16 (id, src, v_offset, CPU_MMX);
-}
-
-static void mmx_argb32 (void * id, uint8_t * const * src,
-			unsigned int v_offset)
-{
-    argb32 (id, src, v_offset, CPU_MMX);
-}
-
-mpeg2convert_copy_t * mpeg2convert_rgb_mmxext (int order, int bpp,
-					       const mpeg2_sequence_t * seq)
-{
-    if (order == MPEG2CONVERT_RGB && seq->chroma_width < seq->width) {
-	if (bpp == 16)
-	    return mmxext_rgb16;
-	else if (bpp == 32)
-	    return mmxext_argb32;
-    }
-    return NULL;	/* Fallback to C */
-}
-
-mpeg2convert_copy_t * mpeg2convert_rgb_mmx (int order, int bpp,
-					    const mpeg2_sequence_t * seq)
-{
-    if (order == MPEG2CONVERT_RGB && seq->chroma_width < seq->width) {
-	if (bpp == 16)
-	    return mmx_rgb16;
-	else if (bpp == 32)
-	    return mmx_argb32;
-    }
-    return NULL;	/* Fallback to C */
-}
-#endif
diff --git a/src/libmpeg2new/libmpeg2/rgb_vis.c b/src/libmpeg2new/libmpeg2/rgb_vis.c
deleted file mode 100644
index cbd7c7072..000000000
--- a/src/libmpeg2new/libmpeg2/rgb_vis.c
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * rgb_vis.c
- * Copyright (C) 2003 David S. Miller <davem@redhat.com>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#ifdef ARCH_SPARC
-
-#include <stddef.h>
-#include <inttypes.h>
-
-#include "mpeg2.h"
-#include "mpeg2convert.h"
-#include "convert_internal.h"
-#include <xine/attributes.h>
-#include "vis.h"
-
-/* Based partially upon the MMX yuv2rgb code, see there for credits.
- *
- * The difference here is that since we have enough registers we
- * process both even and odd scanlines in one pass.
- */
-
-static const uint16_t const_2048[] ATTR_ALIGN(8) = {2048, 2048, 2048, 2048};
-static const uint16_t const_1024[] ATTR_ALIGN(8) = {1024, 1024, 1024, 1024};
-static const uint16_t const_128[] ATTR_ALIGN(8) = {128, 128, 128, 128};
-static const uint8_t const_Ugreen[] ATTR_ALIGN(8) =
-	{0xf3, 0x00, 0xf3, 0x00, 0xf3, 0x00, 0xf3, 0x00};
-static const uint8_t const_Vgreen[] ATTR_ALIGN(8) =
-	{0xe6, 0x00, 0xe6, 0x00, 0xe6, 0x00, 0xe6, 0x00};
-static const uint8_t const_Ublue_Vred[] ATTR_ALIGN(8) =
-	{0x41, 0x41, 0x41, 0x41, 0x33, 0x33, 0x33, 0x33};
-static const uint8_t const_Ycoeff[] ATTR_ALIGN(4) = {0x25, 0x25, 0x25, 0x25};
-
-#define TMP0		0
-#define TMP1		1
-#define TMP2		2
-#define TMP3		3
-#define TMP4		4
-#define TMP5		5
-#define TMP6		6
-#define TMP7		7
-#define TMP8		8
-#define TMP9		9
-#define TMP10		10
-#define TMP11		11
-#define TMP12		12
-#define TMP13		13
-
-#define CONST_UBLUE	14
-#define CONST_VRED	15
-#define CONST_2048	16
-
-#define BLUE8_EVEN	18
-#define BLUE8_ODD	19
-#define RED8_EVEN	20
-#define RED8_ODD	21
-#define GREEN8_EVEN	22
-#define GREEN8_ODD	23
-
-#define BLUE8_2_EVEN	24
-#define BLUE8_2_ODD	25
-#define RED8_2_EVEN	26
-#define RED8_2_ODD	27
-#define GREEN8_2_EVEN	28
-#define GREEN8_2_ODD	29
-
-#define CONST_YCOEFF	30
-#define ZEROS		31
-
-#define PU_0		32
-#define PU_2		34
-#define PV_0		36
-#define PV_2		38
-#define PY_0		40
-#define PY_2		42
-#define PY_4		44
-#define PY_6		46
-
-#define CONST_128	56
-#define CONST_1024	58
-#define CONST_VGREEN	60
-#define CONST_UGREEN	62
-
-static inline void vis_init_consts(void)
-{
-	vis_set_gsr(7 << VIS_GSR_SCALEFACT_SHIFT);
-
-	vis_ld64(const_2048[0], CONST_2048);
-	vis_ld64(const_1024[0], CONST_1024);
-	vis_ld64(const_Ugreen[0], CONST_UGREEN);
-	vis_ld64(const_Vgreen[0], CONST_VGREEN);
-	vis_fzeros(ZEROS);
-	vis_ld64(const_Ublue_Vred[0], CONST_UBLUE);
-	vis_ld32(const_Ycoeff[0], CONST_YCOEFF);
-	vis_ld64(const_128[0],  CONST_128);
-}
-
-static inline void vis_yuv2rgb(uint8_t *py, uint8_t *pu, uint8_t *pv,
-			       int y_stride)
-{
-	vis_ld32(pu[0], TMP0);
-
-	vis_ld32(pv[0], TMP2);
-
-	vis_ld64(py[0], TMP4);
-	vis_mul8x16au(TMP0, CONST_2048, PU_0);
-
-	vis_ld64_2(py, y_stride, TMP8);
-	vis_mul8x16au(TMP2, CONST_2048, PV_0);
-
-	vis_pmerge(TMP4, TMP5, TMP6);
-
-	vis_pmerge(TMP6, TMP7, TMP4);
-
-	vis_pmerge(TMP8, TMP9, TMP10);
-
-	vis_pmerge(TMP10, TMP11, TMP8);
-	vis_mul8x16au(TMP4, CONST_2048, PY_0);
-
-	vis_psub16(PU_0, CONST_1024, PU_0);
-	vis_mul8x16au(TMP5, CONST_2048, PY_2);
-
-	vis_psub16(PV_0, CONST_1024, PV_0);
-	vis_mul8x16au(TMP8, CONST_2048, PY_4);
-
-	vis_psub16(PY_0, CONST_128, PY_0);
-	vis_mul8x16au(TMP9, CONST_2048, PY_6);
-
-	vis_psub16(PY_2, CONST_128, PY_2);
-	vis_mul8x16(CONST_YCOEFF, PY_0, PY_0);
-
-	vis_psub16(PY_4, CONST_128, PY_4);
-	vis_mul8x16(CONST_YCOEFF, PY_2, PY_2);
-
-	vis_psub16(PY_6, CONST_128, PY_6);
-	vis_mul8x16(CONST_YCOEFF, PY_4, PY_4);
-
-	vis_mul8x16(CONST_YCOEFF, PY_6, PY_6);
-
-	vis_mul8sux16(CONST_UGREEN, PU_0, TMP0);
-
-	vis_mul8sux16(CONST_VGREEN, PV_0, TMP2);
-
-	vis_mul8x16(CONST_UBLUE, PU_0, TMP4);
-
-	vis_mul8x16(CONST_VRED, PV_0, TMP6);
-	vis_padd16(TMP0, TMP2, TMP10);
-
-	vis_padd16(PY_0, TMP4, TMP0);
-
-	vis_padd16(PY_2, TMP4, TMP2);
-	vis_pack16(TMP0, BLUE8_EVEN);
-
-	vis_padd16(PY_4, TMP4, TMP0);
-	vis_pack16(TMP2, BLUE8_ODD);
-
-	vis_padd16(PY_6, TMP4, TMP2);
-	vis_pack16(TMP0, BLUE8_2_EVEN);
-
-	vis_padd16(PY_0, TMP6, TMP0);
-	vis_pack16(TMP2, BLUE8_2_ODD);
-
-	vis_padd16(PY_2, TMP6, TMP2);
-	vis_pack16(TMP0, RED8_EVEN);
-
-	vis_padd16(PY_4, TMP6, TMP0);
-	vis_pack16(TMP2, RED8_ODD);
-
-	vis_padd16(PY_6, TMP6, TMP2);
-	vis_pack16(TMP0, RED8_2_EVEN);
-
-	vis_padd16(PY_0, TMP10, TMP0);
-	vis_pack16(TMP2, RED8_2_ODD);
-
-	vis_padd16(PY_2, TMP10, TMP2);
-	vis_pack16(TMP0, GREEN8_EVEN);
-
-	vis_padd16(PY_4, TMP10, TMP0);
-	vis_pack16(TMP2, GREEN8_ODD);
-
-	vis_padd16(PY_6, TMP10, TMP2);
-	vis_pack16(TMP0, GREEN8_2_EVEN);
-
-	vis_pack16(TMP2, GREEN8_2_ODD);
-	vis_pmerge(BLUE8_EVEN, BLUE8_ODD, BLUE8_EVEN);
-
-	vis_pmerge(BLUE8_2_EVEN, BLUE8_2_ODD, BLUE8_2_EVEN);
-
-	vis_pmerge(RED8_EVEN, RED8_ODD, RED8_EVEN);
-
-	vis_pmerge(RED8_2_EVEN, RED8_2_ODD, RED8_2_EVEN);
-
-	vis_pmerge(GREEN8_EVEN, GREEN8_ODD, GREEN8_EVEN);
-
-	vis_pmerge(GREEN8_2_EVEN, GREEN8_2_ODD, GREEN8_2_EVEN);
-}
-
-static inline void vis_unpack_32rgb(uint8_t *image, int stride)
-{
-	vis_pmerge(ZEROS, GREEN8_EVEN, TMP0);
-	vis_pmerge(RED8_EVEN, BLUE8_EVEN, TMP2);
-
-	vis_pmerge(TMP0, TMP2, TMP4);
-	vis_st64(TMP4, image[0]);
-
-	vis_pmerge(TMP1, TMP3, TMP6);
-	vis_st64_2(TMP6, image, 8);
-
-	vis_pmerge(ZEROS, GREEN8_ODD, TMP8);
-	vis_pmerge(RED8_ODD, BLUE8_ODD, TMP10);
-
-	vis_pmerge(TMP8, TMP10, TMP0);
-	vis_st64_2(TMP0, image, 16);
-
-	vis_pmerge(TMP9, TMP11, TMP2);
-	vis_st64_2(TMP2, image, 24);
-
-	image += stride;
-
-	vis_pmerge(ZEROS, GREEN8_2_EVEN, TMP0);
-	vis_pmerge(RED8_2_EVEN, BLUE8_2_EVEN, TMP2);
-
-	vis_pmerge(TMP0, TMP2, TMP4);
-	vis_st64(TMP4, image[0]);
-
-	vis_pmerge(TMP1, TMP3, TMP6);
-	vis_st64_2(TMP6, image, 8);
-
-	vis_pmerge(ZEROS, GREEN8_2_ODD, TMP8);
-	vis_pmerge(RED8_2_ODD, BLUE8_2_ODD, TMP10);
-
-	vis_pmerge(TMP8, TMP10, TMP0);
-	vis_st64_2(TMP0, image, 16);
-
-	vis_pmerge(TMP9, TMP11, TMP2);
-	vis_st64_2(TMP2, image, 24);
-}
-
-static inline void vis_unpack_32bgr(uint8_t *image, int stride)
-{
-	vis_pmerge(ZEROS, GREEN8_EVEN, TMP0);
-	vis_pmerge(BLUE8_EVEN, RED8_EVEN, TMP2);
-
-	vis_pmerge(TMP0, TMP2, TMP4);
-	vis_st64(TMP4, image[0]);
-
-	vis_pmerge(TMP1, TMP3, TMP6);
-	vis_st64_2(TMP6, image, 8);
-
-	vis_pmerge(ZEROS, GREEN8_ODD, TMP8);
-	vis_pmerge(BLUE8_ODD, RED8_ODD, TMP10);
-
-	vis_pmerge(TMP8, TMP10, TMP0);
-	vis_st64_2(TMP0, image, 16);
-
-	vis_pmerge(TMP9, TMP11, TMP2);
-	vis_st64_2(TMP2, image, 24);
-
-	image += stride;
-
-	vis_pmerge(ZEROS, GREEN8_2_EVEN, TMP0);
-	vis_pmerge(BLUE8_2_EVEN, RED8_2_EVEN, TMP2);
-
-	vis_pmerge(TMP0, TMP2, TMP4);
-	vis_st64(TMP4, image[0]);
-
-	vis_pmerge(TMP1, TMP3, TMP6);
-	vis_st64_2(TMP6, image, 8);
-
-	vis_pmerge(ZEROS, GREEN8_2_ODD, TMP8);
-	vis_pmerge(BLUE8_2_ODD, RED8_2_ODD, TMP10);
-
-	vis_pmerge(TMP8, TMP10, TMP0);
-	vis_st64_2(TMP0, image, 16);
-
-	vis_pmerge(TMP9, TMP11, TMP2);
-	vis_st64_2(TMP2, image, 24);
-}
-
-static inline void vis_yuv420_argb32(uint8_t *image,
-				     uint8_t *py, uint8_t *pu, uint8_t *pv,
-				     int width, int height, int rgb_stride,
-				     int y_stride, int uv_stride)
-{
-	height >>= 1;
-	uv_stride -= width >> 1;
-	do {
-		int i = width >> 3;
-		do {
-			vis_yuv2rgb(py, pu, pv, y_stride);
-			vis_unpack_32rgb(image, rgb_stride);
-			py += 8;
-			pu += 4;
-			pv += 4;
-			image += 32;
-		} while (--i);
-
-		py    += (y_stride << 1) - width;
-		image += (rgb_stride << 1) - 4 * width;
-		pu    += uv_stride;
-		pv    += uv_stride;
-	} while (--height);
-}
-
-static inline void vis_yuv420_abgr32(uint8_t *image,
-				     uint8_t *py, uint8_t *pu, uint8_t *pv,
-				     int width, int height, int rgb_stride,
-				     int y_stride, int uv_stride)
-{
-	height >>= 1;
-	uv_stride -= width >> 1;
-	do {
-		int i = width >> 3;
-		do {
-			vis_yuv2rgb(py, pu, pv, y_stride);
-			vis_unpack_32bgr(image, rgb_stride);
-			py += 8;
-			pu += 4;
-			pv += 4;
-			image += 32;
-		} while (--i);
-
-		py    += (y_stride << 1) - width;
-		image += (rgb_stride << 1) - 4 * width;
-		pu    += uv_stride;
-		pv    += uv_stride;
-	} while (--height);
-}
-
-static void vis_argb32(void *_id, uint8_t * const *src,
-		       unsigned int v_offset)
-{
-	convert_rgb_t *id = (convert_rgb_t *) _id;
-
-	vis_init_consts();
-	vis_yuv420_argb32(id->rgb_ptr + id->rgb_stride * v_offset,
-			  src[0], src[1], src[2], id->width, 16,
-			  id->rgb_stride, id->y_stride, id->y_stride >> 1);
-}
-
-static void vis_abgr32(void *_id, uint8_t * const *src,
-		       unsigned int v_offset)
-{
-	convert_rgb_t *id = (convert_rgb_t *) _id;
-
-	vis_init_consts();
-	vis_yuv420_abgr32(id->rgb_ptr + id->rgb_stride * v_offset,
-			  src[0], src[1], src[2], id->width, 16,
-			  id->rgb_stride, id->y_stride, id->y_stride >> 1);
-}
-
-mpeg2convert_copy_t *mpeg2convert_rgb_vis(int order, int bpp,
-					  const mpeg2_sequence_t * seq)
-{
-	if (bpp == 32 && seq->chroma_height < seq->height) {
-		if (order == MPEG2CONVERT_RGB)
-			return vis_argb32;
-		if (order == MPEG2CONVERT_BGR)
-			return vis_abgr32;
-	}
-
-	return NULL;	/* Fallback to C */
-}
-
-#endif /* ARCH_SPARC */
diff --git a/src/libmpeg2new/libmpeg2/slice.c b/src/libmpeg2new/libmpeg2/slice.c
deleted file mode 100644
index ce4508639..000000000
--- a/src/libmpeg2new/libmpeg2/slice.c
+++ /dev/null
@@ -1,2058 +0,0 @@
-/*
- * slice.c
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 2003      Peter Gubanov <peter@elecard.net.ru>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#include <inttypes.h>
-
-#include "../include/mpeg2.h"
-#include "../include/attributes.h"
-#include "mpeg2_internal.h"
-
-extern mpeg2_mc_t mpeg2_mc;
-extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
-extern void (* mpeg2_idct_add) (int last, int16_t * block,
-				uint8_t * dest, int stride);
-extern void (* mpeg2_cpu_state_save) (cpu_state_t * state);
-extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state);
-
-#include "vlc.h"
-
-static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-    int macroblock_modes;
-    const MBtab * tab;
-
-    switch (decoder->coding_type) {
-    case I_TYPE:
-
-	tab = MB_I + UBITS (bit_buf, 1);
-	DUMPBITS (bit_buf, bits, tab->len);
-	macroblock_modes = tab->modes;
-
-	if ((! (decoder->frame_pred_frame_dct)) &&
-	    (decoder->picture_structure == FRAME_PICTURE)) {
-	    macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
-	    DUMPBITS (bit_buf, bits, 1);
-	}
-
-	return macroblock_modes;
-
-    case P_TYPE:
-
-	tab = MB_P + UBITS (bit_buf, 5);
-	DUMPBITS (bit_buf, bits, tab->len);
-	macroblock_modes = tab->modes;
-
-	if (decoder->picture_structure != FRAME_PICTURE) {
-	    if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
-		macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
-		DUMPBITS (bit_buf, bits, 2);
-	    }
-	    return macroblock_modes | MACROBLOCK_MOTION_FORWARD;
-	} else if (decoder->frame_pred_frame_dct) {
-	    if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
-		macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT;
-	    return macroblock_modes | MACROBLOCK_MOTION_FORWARD;
-	} else {
-	    if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
-		macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
-		DUMPBITS (bit_buf, bits, 2);
-	    }
-	    if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
-		macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
-		DUMPBITS (bit_buf, bits, 1);
-	    }
-	    return macroblock_modes | MACROBLOCK_MOTION_FORWARD;
-	}
-
-    case B_TYPE:
-
-	tab = MB_B + UBITS (bit_buf, 6);
-	DUMPBITS (bit_buf, bits, tab->len);
-	macroblock_modes = tab->modes;
-
-	if (decoder->picture_structure != FRAME_PICTURE) {
-	    if (! (macroblock_modes & MACROBLOCK_INTRA)) {
-		macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
-		DUMPBITS (bit_buf, bits, 2);
-	    }
-	    return macroblock_modes;
-	} else if (decoder->frame_pred_frame_dct) {
-	    /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
-	    macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT;
-	    return macroblock_modes;
-	} else {
-	    if (macroblock_modes & MACROBLOCK_INTRA)
-		goto intra;
-	    macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
-	    DUMPBITS (bit_buf, bits, 2);
-	    if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
-	    intra:
-		macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
-		DUMPBITS (bit_buf, bits, 1);
-	    }
-	    return macroblock_modes;
-	}
-
-    case D_TYPE:
-
-	DUMPBITS (bit_buf, bits, 1);
-	return MACROBLOCK_INTRA;
-
-    default:
-	return 0;
-    }
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static inline void get_quantizer_scale (mpeg2_decoder_t * const decoder)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-
-    int quantizer_scale_code;
-
-    quantizer_scale_code = UBITS (bit_buf, 5);
-    DUMPBITS (bit_buf, bits, 5);
-
-    decoder->quantizer_matrix[0] =
-	decoder->quantizer_prescale[0][quantizer_scale_code];
-    decoder->quantizer_matrix[1] =
-	decoder->quantizer_prescale[1][quantizer_scale_code];
-    decoder->quantizer_matrix[2] =
-	decoder->chroma_quantizer[0][quantizer_scale_code];
-    decoder->quantizer_matrix[3] =
-	decoder->chroma_quantizer[1][quantizer_scale_code];
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static inline int get_motion_delta (mpeg2_decoder_t * const decoder,
-				    const int f_code)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-
-    int delta;
-    int sign;
-    const MVtab * tab;
-
-    if (bit_buf & 0x80000000) {
-	DUMPBITS (bit_buf, bits, 1);
-	return 0;
-    } else if (bit_buf >= 0x0c000000) {
-
-	tab = MV_4 + UBITS (bit_buf, 4);
-	delta = (tab->delta << f_code) + 1;
-	bits += tab->len + f_code + 1;
-	bit_buf <<= tab->len;
-
-	sign = SBITS (bit_buf, 1);
-	bit_buf <<= 1;
-
-	if (f_code)
-	    delta += UBITS (bit_buf, f_code);
-	bit_buf <<= f_code;
-
-	return (delta ^ sign) - sign;
-
-    } else {
-
-	tab = MV_10 + UBITS (bit_buf, 10);
-	delta = (tab->delta << f_code) + 1;
-	bits += tab->len + 1;
-	bit_buf <<= tab->len;
-
-	sign = SBITS (bit_buf, 1);
-	bit_buf <<= 1;
-
-	if (f_code) {
-	    NEEDBITS (bit_buf, bits, bit_ptr);
-	    delta += UBITS (bit_buf, f_code);
-	    DUMPBITS (bit_buf, bits, f_code);
-	}
-
-	return (delta ^ sign) - sign;
-
-    }
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static inline int bound_motion_vector (const int vector, const int f_code)
-{
-    return ((int32_t)vector << (27 - f_code)) >> (27 - f_code);
-}
-
-static inline int get_dmv (mpeg2_decoder_t * const decoder)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-
-    const DMVtab * tab;
-
-    tab = DMV_2 + UBITS (bit_buf, 2);
-    DUMPBITS (bit_buf, bits, tab->len);
-    return tab->dmv;
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static inline int get_coded_block_pattern (mpeg2_decoder_t * const decoder)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-
-    const CBPtab * tab;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-
-    if (bit_buf >= 0x20000000) {
-
-	tab = CBP_7 + (UBITS (bit_buf, 7) - 16);
-	DUMPBITS (bit_buf, bits, tab->len);
-	return tab->cbp;
-
-    } else {
-
-	tab = CBP_9 + UBITS (bit_buf, 9);
-	DUMPBITS (bit_buf, bits, tab->len);
-	return tab->cbp;
-    }
-
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static inline int get_luma_dc_dct_diff (mpeg2_decoder_t * const decoder)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-    const DCtab * tab;
-    int size;
-    int dc_diff;
-
-    if (bit_buf < 0xf8000000) {
-	tab = DC_lum_5 + UBITS (bit_buf, 5);
-	size = tab->size;
-	if (size) {
-	    bits += tab->len + size;
-	    bit_buf <<= tab->len;
-	    dc_diff =
-		UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
-	    bit_buf <<= size;
-	    return dc_diff << decoder->intra_dc_precision;
-	} else {
-	    DUMPBITS (bit_buf, bits, 3);
-	    return 0;
-	}
-    } else {
-	tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0);
-	size = tab->size;
-	DUMPBITS (bit_buf, bits, tab->len);
-	NEEDBITS (bit_buf, bits, bit_ptr);
-	dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
-	DUMPBITS (bit_buf, bits, size);
-	return dc_diff << decoder->intra_dc_precision;
-    }
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static inline int get_chroma_dc_dct_diff (mpeg2_decoder_t * const decoder)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-    const DCtab * tab;
-    int size;
-    int dc_diff;
-
-    if (bit_buf < 0xf8000000) {
-	tab = DC_chrom_5 + UBITS (bit_buf, 5);
-	size = tab->size;
-	if (size) {
-	    bits += tab->len + size;
-	    bit_buf <<= tab->len;
-	    dc_diff =
-		UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
-	    bit_buf <<= size;
-	    return dc_diff << decoder->intra_dc_precision;
-	} else {
-	    DUMPBITS (bit_buf, bits, 2);
-	    return 0;
-	}
-    } else {
-	tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0);
-	size = tab->size;
-	DUMPBITS (bit_buf, bits, tab->len + 1);
-	NEEDBITS (bit_buf, bits, bit_ptr);
-	dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
-	DUMPBITS (bit_buf, bits, size);
-	return dc_diff << decoder->intra_dc_precision;
-    }
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-#define SATURATE(val)				\
-do {						\
-    val <<= 4;					\
-    if (unlikely (val != (int16_t) val))	\
-	val = (SBITS (val, 1) ^ 2047) << 4;	\
-} while (0)
-
-static void get_intra_block_B14 (mpeg2_decoder_t * const decoder,
-				 const uint16_t * const quant_matrix)
-{
-    int i;
-    int j;
-    int val;
-    const uint8_t * const scan = decoder->scan;
-    int mismatch;
-    const DCTtab * tab;
-    uint32_t bit_buf;
-    int bits;
-    const uint8_t * bit_ptr;
-    int16_t * const dest = decoder->DCTblock;
-
-    i = 0;
-    mismatch = ~dest[0];
-
-    bit_buf = decoder->bitstream_buf;
-    bits = decoder->bitstream_bits;
-    bit_ptr = decoder->bitstream_ptr;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-
-    while (1) {
-	if (bit_buf >= 0x28000000) {
-
-	    tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
-
-	    i += tab->run;
-	    if (i >= 64)
-		break;	/* end of block */
-
-	normal_code:
-	    j = scan[i];
-	    bit_buf <<= tab->len;
-	    bits += tab->len + 1;
-	    val = (tab->level * quant_matrix[j]) >> 4;
-
-	    /* if (bitstream_get (1)) val = -val; */
-	    val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
-
-	    SATURATE (val);
-	    dest[j] = val;
-	    mismatch ^= val;
-
-	    bit_buf <<= 1;
-	    NEEDBITS (bit_buf, bits, bit_ptr);
-
-	    continue;
-
-	} else if (bit_buf >= 0x04000000) {
-
-	    tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
-
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-
-	    /* escape code */
-
-	    i += UBITS (bit_buf << 6, 6) - 64;
-	    if (i >= 64)
-		break;	/* illegal, check needed to avoid buffer overflow */
-
-	    j = scan[i];
-
-	    DUMPBITS (bit_buf, bits, 12);
-	    NEEDBITS (bit_buf, bits, bit_ptr);
-	    val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16;
-
-	    SATURATE (val);
-	    dest[j] = val;
-	    mismatch ^= val;
-
-	    DUMPBITS (bit_buf, bits, 12);
-	    NEEDBITS (bit_buf, bits, bit_ptr);
-
-	    continue;
-
-	} else if (bit_buf >= 0x02000000) {
-	    tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	} else if (bit_buf >= 0x00800000) {
-	    tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	} else if (bit_buf >= 0x00200000) {
-	    tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	} else {
-	    tab = DCT_16 + UBITS (bit_buf, 16);
-	    bit_buf <<= 16;
-	    GETWORD (bit_buf, bits + 16, bit_ptr);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	}
-	break;	/* illegal, check needed to avoid buffer overflow */
-    }
-    dest[63] ^= mismatch & 16;
-    DUMPBITS (bit_buf, bits, tab->len);	/* dump end of block code */
-    decoder->bitstream_buf = bit_buf;
-    decoder->bitstream_bits = bits;
-    decoder->bitstream_ptr = bit_ptr;
-}
-
-static void get_intra_block_B15 (mpeg2_decoder_t * const decoder,
-				 const uint16_t * const quant_matrix)
-{
-    int i;
-    int j;
-    int val;
-    const uint8_t * const scan = decoder->scan;
-    int mismatch;
-    const DCTtab * tab;
-    uint32_t bit_buf;
-    int bits;
-    const uint8_t * bit_ptr;
-    int16_t * const dest = decoder->DCTblock;
-
-    i = 0;
-    mismatch = ~dest[0];
-
-    bit_buf = decoder->bitstream_buf;
-    bits = decoder->bitstream_bits;
-    bit_ptr = decoder->bitstream_ptr;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-
-    while (1) {
-	if (bit_buf >= 0x04000000) {
-
-	    tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4);
-
-	    i += tab->run;
-	    if (i < 64) {
-
-	    normal_code:
-		j = scan[i];
-		bit_buf <<= tab->len;
-		bits += tab->len + 1;
-		val = (tab->level * quant_matrix[j]) >> 4;
-
-		/* if (bitstream_get (1)) val = -val; */
-		val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
-
-		SATURATE (val);
-		dest[j] = val;
-		mismatch ^= val;
-
-		bit_buf <<= 1;
-		NEEDBITS (bit_buf, bits, bit_ptr);
-
-		continue;
-
-	    } else {
-
-		/* end of block. I commented out this code because if we */
-		/* dont exit here we will still exit at the later test :) */
-
-		/* if (i >= 128) break;	*/	/* end of block */
-
-		/* escape code */
-
-		i += UBITS (bit_buf << 6, 6) - 64;
-		if (i >= 64)
-		    break;	/* illegal, check against buffer overflow */
-
-		j = scan[i];
-
-		DUMPBITS (bit_buf, bits, 12);
-		NEEDBITS (bit_buf, bits, bit_ptr);
-		val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16;
-
-		SATURATE (val);
-		dest[j] = val;
-		mismatch ^= val;
-
-		DUMPBITS (bit_buf, bits, 12);
-		NEEDBITS (bit_buf, bits, bit_ptr);
-
-		continue;
-
-	    }
-	} else if (bit_buf >= 0x02000000) {
-	    tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	} else if (bit_buf >= 0x00800000) {
-	    tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	} else if (bit_buf >= 0x00200000) {
-	    tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	} else {
-	    tab = DCT_16 + UBITS (bit_buf, 16);
-	    bit_buf <<= 16;
-	    GETWORD (bit_buf, bits + 16, bit_ptr);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	}
-	break;	/* illegal, check needed to avoid buffer overflow */
-    }
-    dest[63] ^= mismatch & 16;
-    DUMPBITS (bit_buf, bits, tab->len);	/* dump end of block code */
-    decoder->bitstream_buf = bit_buf;
-    decoder->bitstream_bits = bits;
-    decoder->bitstream_ptr = bit_ptr;
-}
-
-static int get_non_intra_block (mpeg2_decoder_t * const decoder,
-				const uint16_t * const quant_matrix)
-{
-    int i;
-    int j;
-    int val;
-    const uint8_t * const scan = decoder->scan;
-    int mismatch;
-    const DCTtab * tab;
-    uint32_t bit_buf;
-    int bits;
-    const uint8_t * bit_ptr;
-    int16_t * const dest = decoder->DCTblock;
-
-    i = -1;
-    mismatch = -1;
-
-    bit_buf = decoder->bitstream_buf;
-    bits = decoder->bitstream_bits;
-    bit_ptr = decoder->bitstream_ptr;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    if (bit_buf >= 0x28000000) {
-	tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5);
-	goto entry_1;
-    } else
-	goto entry_2;
-
-    while (1) {
-	if (bit_buf >= 0x28000000) {
-
-	    tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
-
-	entry_1:
-	    i += tab->run;
-	    if (i >= 64)
-		break;	/* end of block */
-
-	normal_code:
-	    j = scan[i];
-	    bit_buf <<= tab->len;
-	    bits += tab->len + 1;
-	    val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5;
-
-	    /* if (bitstream_get (1)) val = -val; */
-	    val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
-
-	    SATURATE (val);
-	    dest[j] = val;
-	    mismatch ^= val;
-
-	    bit_buf <<= 1;
-	    NEEDBITS (bit_buf, bits, bit_ptr);
-
-	    continue;
-
-	}
-
-    entry_2:
-	if (bit_buf >= 0x04000000) {
-
-	    tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
-
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-
-	    /* escape code */
-
-	    i += UBITS (bit_buf << 6, 6) - 64;
-	    if (i >= 64)
-		break;	/* illegal, check needed to avoid buffer overflow */
-
-	    j = scan[i];
-
-	    DUMPBITS (bit_buf, bits, 12);
-	    NEEDBITS (bit_buf, bits, bit_ptr);
-	    val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1;
-	    val = (val * quant_matrix[j]) / 32;
-
-	    SATURATE (val);
-	    dest[j] = val;
-	    mismatch ^= val;
-
-	    DUMPBITS (bit_buf, bits, 12);
-	    NEEDBITS (bit_buf, bits, bit_ptr);
-
-	    continue;
-
-	} else if (bit_buf >= 0x02000000) {
-	    tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	} else if (bit_buf >= 0x00800000) {
-	    tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	} else if (bit_buf >= 0x00200000) {
-	    tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	} else {
-	    tab = DCT_16 + UBITS (bit_buf, 16);
-	    bit_buf <<= 16;
-	    GETWORD (bit_buf, bits + 16, bit_ptr);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	}
-	break;	/* illegal, check needed to avoid buffer overflow */
-    }
-    dest[63] ^= mismatch & 16;
-    DUMPBITS (bit_buf, bits, tab->len);	/* dump end of block code */
-    decoder->bitstream_buf = bit_buf;
-    decoder->bitstream_bits = bits;
-    decoder->bitstream_ptr = bit_ptr;
-    return i;
-}
-
-static void get_mpeg1_intra_block (mpeg2_decoder_t * const decoder)
-{
-    int i;
-    int j;
-    int val;
-    const uint8_t * const scan = decoder->scan;
-    const uint16_t * const quant_matrix = decoder->quantizer_matrix[0];
-    const DCTtab * tab;
-    uint32_t bit_buf;
-    int bits;
-    const uint8_t * bit_ptr;
-    int16_t * const dest = decoder->DCTblock;
-
-    i = 0;
-
-    bit_buf = decoder->bitstream_buf;
-    bits = decoder->bitstream_bits;
-    bit_ptr = decoder->bitstream_ptr;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-
-    while (1) {
-	if (bit_buf >= 0x28000000) {
-
-	    tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
-
-	    i += tab->run;
-	    if (i >= 64)
-		break;	/* end of block */
-
-	normal_code:
-	    j = scan[i];
-	    bit_buf <<= tab->len;
-	    bits += tab->len + 1;
-	    val = (tab->level * quant_matrix[j]) >> 4;
-
-	    /* oddification */
-	    val = (val - 1) | 1;
-
-	    /* if (bitstream_get (1)) val = -val; */
-	    val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
-
-	    SATURATE (val);
-	    dest[j] = val;
-
-	    bit_buf <<= 1;
-	    NEEDBITS (bit_buf, bits, bit_ptr);
-
-	    continue;
-
-	} else if (bit_buf >= 0x04000000) {
-
-	    tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
-
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-
-	    /* escape code */
-
-	    i += UBITS (bit_buf << 6, 6) - 64;
-	    if (i >= 64)
-		break;	/* illegal, check needed to avoid buffer overflow */
-
-	    j = scan[i];
-
-	    DUMPBITS (bit_buf, bits, 12);
-	    NEEDBITS (bit_buf, bits, bit_ptr);
-	    val = SBITS (bit_buf, 8);
-	    if (! (val & 0x7f)) {
-		DUMPBITS (bit_buf, bits, 8);
-		val = UBITS (bit_buf, 8) + 2 * val;
-	    }
-	    val = (val * quant_matrix[j]) / 16;
-
-	    /* oddification */
-	    val = (val + ~SBITS (val, 1)) | 1;
-
-	    SATURATE (val);
-	    dest[j] = val;
-
-	    DUMPBITS (bit_buf, bits, 8);
-	    NEEDBITS (bit_buf, bits, bit_ptr);
-
-	    continue;
-
-	} else if (bit_buf >= 0x02000000) {
-	    tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	} else if (bit_buf >= 0x00800000) {
-	    tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	} else if (bit_buf >= 0x00200000) {
-	    tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	} else {
-	    tab = DCT_16 + UBITS (bit_buf, 16);
-	    bit_buf <<= 16;
-	    GETWORD (bit_buf, bits + 16, bit_ptr);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	}
-	break;	/* illegal, check needed to avoid buffer overflow */
-    }
-    DUMPBITS (bit_buf, bits, tab->len);	/* dump end of block code */
-    decoder->bitstream_buf = bit_buf;
-    decoder->bitstream_bits = bits;
-    decoder->bitstream_ptr = bit_ptr;
-}
-
-static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder)
-{
-    int i;
-    int j;
-    int val;
-    const uint8_t * const scan = decoder->scan;
-    const uint16_t * const quant_matrix = decoder->quantizer_matrix[1];
-    const DCTtab * tab;
-    uint32_t bit_buf;
-    int bits;
-    const uint8_t * bit_ptr;
-    int16_t * const dest = decoder->DCTblock;
-
-    i = -1;
-
-    bit_buf = decoder->bitstream_buf;
-    bits = decoder->bitstream_bits;
-    bit_ptr = decoder->bitstream_ptr;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    if (bit_buf >= 0x28000000) {
-	tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5);
-	goto entry_1;
-    } else
-	goto entry_2;
-
-    while (1) {
-	if (bit_buf >= 0x28000000) {
-
-	    tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
-
-	entry_1:
-	    i += tab->run;
-	    if (i >= 64)
-		break;	/* end of block */
-
-	normal_code:
-	    j = scan[i];
-	    bit_buf <<= tab->len;
-	    bits += tab->len + 1;
-	    val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5;
-
-	    /* oddification */
-	    val = (val - 1) | 1;
-
-	    /* if (bitstream_get (1)) val = -val; */
-	    val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
-
-	    SATURATE (val);
-	    dest[j] = val;
-
-	    bit_buf <<= 1;
-	    NEEDBITS (bit_buf, bits, bit_ptr);
-
-	    continue;
-
-	}
-
-    entry_2:
-	if (bit_buf >= 0x04000000) {
-
-	    tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
-
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-
-	    /* escape code */
-
-	    i += UBITS (bit_buf << 6, 6) - 64;
-	    if (i >= 64)
-		break;	/* illegal, check needed to avoid buffer overflow */
-
-	    j = scan[i];
-
-	    DUMPBITS (bit_buf, bits, 12);
-	    NEEDBITS (bit_buf, bits, bit_ptr);
-	    val = SBITS (bit_buf, 8);
-	    if (! (val & 0x7f)) {
-		DUMPBITS (bit_buf, bits, 8);
-		val = UBITS (bit_buf, 8) + 2 * val;
-	    }
-	    val = 2 * (val + SBITS (val, 1)) + 1;
-	    val = (val * quant_matrix[j]) / 32;
-
-	    /* oddification */
-	    val = (val + ~SBITS (val, 1)) | 1;
-
-	    SATURATE (val);
-	    dest[j] = val;
-
-	    DUMPBITS (bit_buf, bits, 8);
-	    NEEDBITS (bit_buf, bits, bit_ptr);
-
-	    continue;
-
-	} else if (bit_buf >= 0x02000000) {
-	    tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	} else if (bit_buf >= 0x00800000) {
-	    tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	} else if (bit_buf >= 0x00200000) {
-	    tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	} else {
-	    tab = DCT_16 + UBITS (bit_buf, 16);
-	    bit_buf <<= 16;
-	    GETWORD (bit_buf, bits + 16, bit_ptr);
-	    i += tab->run;
-	    if (i < 64)
-		goto normal_code;
-	}
-	break;	/* illegal, check needed to avoid buffer overflow */
-    }
-    DUMPBITS (bit_buf, bits, tab->len);	/* dump end of block code */
-    decoder->bitstream_buf = bit_buf;
-    decoder->bitstream_bits = bits;
-    decoder->bitstream_ptr = bit_ptr;
-    return i;
-}
-
-static inline void slice_intra_DCT (mpeg2_decoder_t * const decoder,
-				    const int cc,
-				    uint8_t * const dest, const int stride)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    /* Get the intra DC coefficient and inverse quantize it */
-    if (cc == 0)
-	decoder->DCTblock[0] =
-	    decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder);
-    else
-	decoder->DCTblock[0] =
-	    decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder);
-
-    if (decoder->mpeg1) {
-	if (decoder->coding_type != D_TYPE)
-	    get_mpeg1_intra_block (decoder);
-    } else if (decoder->intra_vlc_format)
-	get_intra_block_B15 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]);
-    else
-	get_intra_block_B14 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]);
-    mpeg2_idct_copy (decoder->DCTblock, dest, stride);
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder,
-					const int cc,
-					uint8_t * const dest, const int stride)
-{
-    int last;
-
-    if (decoder->mpeg1)
-	last = get_mpeg1_non_intra_block (decoder);
-    else
-	last = get_non_intra_block (decoder,
-				    decoder->quantizer_matrix[cc ? 3 : 1]);
-    mpeg2_idct_add (last, decoder->DCTblock, dest, stride);
-}
-
-#define MOTION_420(table,ref,motion_x,motion_y,size,y)			      \
-    pos_x = 2 * decoder->offset + motion_x;				      \
-    pos_y = 2 * decoder->v_offset + motion_y + 2 * y;			      \
-    if (unlikely (pos_x > decoder->limit_x)) {				      \
-	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
-	motion_x = pos_x - 2 * decoder->offset;				      \
-    }									      \
-    if (unlikely (pos_y > decoder->limit_y_ ## size)) {			      \
-	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size;	      \
-	motion_y = pos_y - 2 * decoder->v_offset - 2 * y;		      \
-    }									      \
-    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
-    table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
-		    ref[0] + (pos_x >> 1) + (pos_y >> 1) * decoder->stride,   \
-		    decoder->stride, size);				      \
-    motion_x /= 2;	motion_y /= 2;					      \
-    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);			      \
-    offset = (((decoder->offset + motion_x) >> 1) +			      \
-	      ((((decoder->v_offset + motion_y) >> 1) + y/2) *		      \
-	       decoder->uv_stride));					      \
-    table[4+xy_half] (decoder->dest[1] + y/2 * decoder->uv_stride +	      \
-		      (decoder->offset >> 1), ref[1] + offset,		      \
-		      decoder->uv_stride, size/2);			      \
-    table[4+xy_half] (decoder->dest[2] + y/2 * decoder->uv_stride +	      \
-		      (decoder->offset >> 1), ref[2] + offset,		      \
-		      decoder->uv_stride, size/2)
-
-#define MOTION_FIELD_420(table,ref,motion_x,motion_y,dest_field,op,src_field) \
-    pos_x = 2 * decoder->offset + motion_x;				      \
-    pos_y = decoder->v_offset + motion_y;				      \
-    if (unlikely (pos_x > decoder->limit_x)) {				      \
-	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
-	motion_x = pos_x - 2 * decoder->offset;				      \
-    }									      \
-    if (unlikely (pos_y > decoder->limit_y)) {				      \
-	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;		      \
-	motion_y = pos_y - decoder->v_offset;				      \
-    }									      \
-    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
-    table[xy_half] (decoder->dest[0] + dest_field * decoder->stride +	      \
-		    decoder->offset,					      \
-		    (ref[0] + (pos_x >> 1) +				      \
-		     ((pos_y op) + src_field) * decoder->stride),	      \
-		    2 * decoder->stride, 8);				      \
-    motion_x /= 2;	motion_y /= 2;					      \
-    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);			      \
-    offset = (((decoder->offset + motion_x) >> 1) +			      \
-	      (((decoder->v_offset >> 1) + (motion_y op) + src_field) *	      \
-	       decoder->uv_stride));					      \
-    table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride +    \
-		      (decoder->offset >> 1), ref[1] + offset,		      \
-		      2 * decoder->uv_stride, 4);			      \
-    table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride +    \
-		      (decoder->offset >> 1), ref[2] + offset,		      \
-		      2 * decoder->uv_stride, 4)
-
-#define MOTION_DMV_420(table,ref,motion_x,motion_y)			      \
-    pos_x = 2 * decoder->offset + motion_x;				      \
-    pos_y = decoder->v_offset + motion_y;				      \
-    if (unlikely (pos_x > decoder->limit_x)) {				      \
-	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
-	motion_x = pos_x - 2 * decoder->offset;				      \
-    }									      \
-    if (unlikely (pos_y > decoder->limit_y)) {				      \
-	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;		      \
-	motion_y = pos_y - decoder->v_offset;				      \
-    }									      \
-    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
-    offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride;		      \
-    table[xy_half] (decoder->dest[0] + decoder->offset,			      \
-		    ref[0] + offset, 2 * decoder->stride, 8);		      \
-    table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset,     \
-		    ref[0] + decoder->stride + offset,			      \
-		    2 * decoder->stride, 8);				      \
-    motion_x /= 2;	motion_y /= 2;					      \
-    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);			      \
-    offset = (((decoder->offset + motion_x) >> 1) +			      \
-	      (((decoder->v_offset >> 1) + (motion_y & ~1)) *		      \
-	       decoder->uv_stride));					      \
-    table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1),	      \
-		      ref[1] + offset, 2 * decoder->uv_stride, 4);	      \
-    table[4+xy_half] (decoder->dest[1] + decoder->uv_stride +		      \
-		      (decoder->offset >> 1),				      \
-		      ref[1] + decoder->uv_stride + offset,		      \
-		      2 * decoder->uv_stride, 4);			      \
-    table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1),	      \
-		      ref[2] + offset, 2 * decoder->uv_stride, 4);	      \
-    table[4+xy_half] (decoder->dest[2] + decoder->uv_stride +		      \
-		      (decoder->offset >> 1),				      \
-		      ref[2] + decoder->uv_stride + offset,		      \
-		      2 * decoder->uv_stride, 4)
-
-#define MOTION_ZERO_420(table,ref)					      \
-    table[0] (decoder->dest[0] + decoder->offset,			      \
-	      (ref[0] + decoder->offset +				      \
-	       decoder->v_offset * decoder->stride), decoder->stride, 16);    \
-    offset = ((decoder->offset >> 1) +					      \
-	      (decoder->v_offset >> 1) * decoder->uv_stride);		      \
-    table[4] (decoder->dest[1] + (decoder->offset >> 1),		      \
-	      ref[1] + offset, decoder->uv_stride, 8);			      \
-    table[4] (decoder->dest[2] + (decoder->offset >> 1),		      \
-	      ref[2] + offset, decoder->uv_stride, 8)
-
-#define MOTION_422(table,ref,motion_x,motion_y,size,y)			      \
-    pos_x = 2 * decoder->offset + motion_x;				      \
-    pos_y = 2 * decoder->v_offset + motion_y + 2 * y;			      \
-    if (unlikely (pos_x > decoder->limit_x)) {				      \
-	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
-	motion_x = pos_x - 2 * decoder->offset;				      \
-    }									      \
-    if (unlikely (pos_y > decoder->limit_y_ ## size)) {			      \
-	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size;	      \
-	motion_y = pos_y - 2 * decoder->v_offset - 2 * y;		      \
-    }									      \
-    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
-    offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride;		      \
-    table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
-		    ref[0] + offset, decoder->stride, size);		      \
-    offset = (offset + (motion_x & (motion_x < 0))) >> 1;		      \
-    motion_x /= 2;							      \
-    xy_half = ((pos_y & 1) << 1) | (motion_x & 1);			      \
-    table[4+xy_half] (decoder->dest[1] + y * decoder->uv_stride +	      \
-		      (decoder->offset >> 1), ref[1] + offset,		      \
-		      decoder->uv_stride, size);			      \
-    table[4+xy_half] (decoder->dest[2] + y * decoder->uv_stride +	      \
-		      (decoder->offset >> 1), ref[2] + offset,		      \
-		      decoder->uv_stride, size)
-
-#define MOTION_FIELD_422(table,ref,motion_x,motion_y,dest_field,op,src_field) \
-    pos_x = 2 * decoder->offset + motion_x;				      \
-    pos_y = decoder->v_offset + motion_y;				      \
-    if (unlikely (pos_x > decoder->limit_x)) {				      \
-	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
-	motion_x = pos_x - 2 * decoder->offset;				      \
-    }									      \
-    if (unlikely (pos_y > decoder->limit_y)) {				      \
-	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;		      \
-	motion_y = pos_y - decoder->v_offset;				      \
-    }									      \
-    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
-    offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride;	      \
-    table[xy_half] (decoder->dest[0] + dest_field * decoder->stride +	      \
-		    decoder->offset, ref[0] + offset,			      \
-		    2 * decoder->stride, 8);				      \
-    offset = (offset + (motion_x & (motion_x < 0))) >> 1;		      \
-    motion_x /= 2;							      \
-    xy_half = ((pos_y & 1) << 1) | (motion_x & 1);			      \
-    table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride +    \
-		      (decoder->offset >> 1), ref[1] + offset,		      \
-		      2 * decoder->uv_stride, 8);			      \
-    table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride +    \
-		      (decoder->offset >> 1), ref[2] + offset,		      \
-		      2 * decoder->uv_stride, 8)
-
-#define MOTION_DMV_422(table,ref,motion_x,motion_y)			      \
-    pos_x = 2 * decoder->offset + motion_x;				      \
-    pos_y = decoder->v_offset + motion_y;				      \
-    if (unlikely (pos_x > decoder->limit_x)) {				      \
-	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
-	motion_x = pos_x - 2 * decoder->offset;				      \
-    }									      \
-    if (unlikely (pos_y > decoder->limit_y)) {				      \
-	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;		      \
-	motion_y = pos_y - decoder->v_offset;				      \
-    }									      \
-    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
-    offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride;		      \
-    table[xy_half] (decoder->dest[0] + decoder->offset,			      \
-		    ref[0] + offset, 2 * decoder->stride, 8);		      \
-    table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset,     \
-		    ref[0] + decoder->stride + offset,			      \
-		    2 * decoder->stride, 8);				      \
-    offset = (offset + (motion_x & (motion_x < 0))) >> 1;		      \
-    motion_x /= 2;							      \
-    xy_half = ((pos_y & 1) << 1) | (motion_x & 1);			      \
-    table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1),	      \
-		      ref[1] + offset, 2 * decoder->uv_stride, 8);	      \
-    table[4+xy_half] (decoder->dest[1] + decoder->uv_stride +		      \
-		      (decoder->offset >> 1),				      \
-		      ref[1] + decoder->uv_stride + offset,		      \
-		      2 * decoder->uv_stride, 8);			      \
-    table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1),	      \
-		      ref[2] + offset, 2 * decoder->uv_stride, 8);	      \
-    table[4+xy_half] (decoder->dest[2] + decoder->uv_stride +		      \
-		      (decoder->offset >> 1),				      \
-		      ref[2] + decoder->uv_stride + offset,		      \
-		      2 * decoder->uv_stride, 8)
-
-#define MOTION_ZERO_422(table,ref)					      \
-    offset = decoder->offset + decoder->v_offset * decoder->stride;	      \
-    table[0] (decoder->dest[0] + decoder->offset,			      \
-	      ref[0] + offset, decoder->stride, 16);			      \
-    offset >>= 1;							      \
-    table[4] (decoder->dest[1] + (decoder->offset >> 1),		      \
-	      ref[1] + offset, decoder->uv_stride, 16);			      \
-    table[4] (decoder->dest[2] + (decoder->offset >> 1),		      \
-	      ref[2] + offset, decoder->uv_stride, 16)
-
-#define MOTION_444(table,ref,motion_x,motion_y,size,y)			      \
-    pos_x = 2 * decoder->offset + motion_x;				      \
-    pos_y = 2 * decoder->v_offset + motion_y + 2 * y;			      \
-    if (unlikely (pos_x > decoder->limit_x)) {				      \
-	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
-	motion_x = pos_x - 2 * decoder->offset;				      \
-    }									      \
-    if (unlikely (pos_y > decoder->limit_y_ ## size)) {			      \
-	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size;	      \
-	motion_y = pos_y - 2 * decoder->v_offset - 2 * y;		      \
-    }									      \
-    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
-    offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride;		      \
-    table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
-		    ref[0] + offset, decoder->stride, size);		      \
-    table[xy_half] (decoder->dest[1] + y * decoder->stride + decoder->offset, \
-		    ref[1] + offset, decoder->stride, size);		      \
-    table[xy_half] (decoder->dest[2] + y * decoder->stride + decoder->offset, \
-		    ref[2] + offset, decoder->stride, size)
-
-#define MOTION_FIELD_444(table,ref,motion_x,motion_y,dest_field,op,src_field) \
-    pos_x = 2 * decoder->offset + motion_x;				      \
-    pos_y = decoder->v_offset + motion_y;				      \
-    if (unlikely (pos_x > decoder->limit_x)) {				      \
-	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
-	motion_x = pos_x - 2 * decoder->offset;				      \
-    }									      \
-    if (unlikely (pos_y > decoder->limit_y)) {				      \
-	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;		      \
-	motion_y = pos_y - decoder->v_offset;				      \
-    }									      \
-    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
-    offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride;	      \
-    table[xy_half] (decoder->dest[0] + dest_field * decoder->stride +	      \
-		    decoder->offset, ref[0] + offset,			      \
-		    2 * decoder->stride, 8);				      \
-    table[xy_half] (decoder->dest[1] + dest_field * decoder->stride +	      \
-		    decoder->offset, ref[1] + offset,			      \
-		    2 * decoder->stride, 8);				      \
-    table[xy_half] (decoder->dest[2] + dest_field * decoder->stride +	      \
-		    decoder->offset, ref[2] + offset,			      \
-		    2 * decoder->stride, 8)
-
-#define MOTION_DMV_444(table,ref,motion_x,motion_y)			      \
-    pos_x = 2 * decoder->offset + motion_x;				      \
-    pos_y = decoder->v_offset + motion_y;				      \
-    if (unlikely (pos_x > decoder->limit_x)) {				      \
-	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
-	motion_x = pos_x - 2 * decoder->offset;				      \
-    }									      \
-    if (unlikely (pos_y > decoder->limit_y)) {				      \
-	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;		      \
-	motion_y = pos_y - decoder->v_offset;				      \
-    }									      \
-    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
-    offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride;		      \
-    table[xy_half] (decoder->dest[0] + decoder->offset,			      \
-		    ref[0] + offset, 2 * decoder->stride, 8);		      \
-    table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset,     \
-		    ref[0] + decoder->stride + offset,			      \
-		    2 * decoder->stride, 8);				      \
-    table[xy_half] (decoder->dest[1] + decoder->offset,			      \
-		    ref[1] + offset, 2 * decoder->stride, 8);		      \
-    table[xy_half] (decoder->dest[1] + decoder->stride + decoder->offset,     \
-		    ref[1] + decoder->stride + offset,			      \
-		    2 * decoder->stride, 8);				      \
-    table[xy_half] (decoder->dest[2] + decoder->offset,			      \
-		    ref[2] + offset, 2 * decoder->stride, 8);		      \
-    table[xy_half] (decoder->dest[2] + decoder->stride + decoder->offset,     \
-		    ref[2] + decoder->stride + offset,			      \
-		    2 * decoder->stride, 8)
-
-#define MOTION_ZERO_444(table,ref)					      \
-    offset = decoder->offset + decoder->v_offset * decoder->stride;	      \
-    table[0] (decoder->dest[0] + decoder->offset,			      \
-	      ref[0] + offset, decoder->stride, 16);			      \
-    table[4] (decoder->dest[1] + decoder->offset,			      \
-	      ref[1] + offset, decoder->stride, 16);			      \
-    table[4] (decoder->dest[2] + (decoder->offset >> 1),		      \
-	      ref[2] + offset, decoder->stride, 16)
-
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-
-static void motion_mp1 (mpeg2_decoder_t * const decoder,
-			motion_t * const motion,
-			mpeg2_mc_fct * const * const table)
-{
-    int motion_x, motion_y;
-    unsigned int pos_x, pos_y, xy_half, offset;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_x = (motion->pmv[0][0] +
-		(get_motion_delta (decoder,
-				   motion->f_code[0]) << motion->f_code[1]));
-    motion_x = bound_motion_vector (motion_x,
-				    motion->f_code[0] + motion->f_code[1]);
-    motion->pmv[0][0] = motion_x;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    motion_y = (motion->pmv[0][1] +
-		(get_motion_delta (decoder,
-				   motion->f_code[0]) << motion->f_code[1]));
-    motion_y = bound_motion_vector (motion_y,
-				    motion->f_code[0] + motion->f_code[1]);
-    motion->pmv[0][1] = motion_y;
-
-    MOTION_420 (table, motion->ref[0], motion_x, motion_y, 16, 0);
-}
-
-#define MOTION_FUNCTIONS(FORMAT,MOTION,MOTION_FIELD,MOTION_DMV,MOTION_ZERO)   \
-									      \
-static void motion_fr_frame_##FORMAT (mpeg2_decoder_t * const decoder,	      \
-				      motion_t * const motion,		      \
-				      mpeg2_mc_fct * const * const table)     \
-{									      \
-    int motion_x, motion_y;						      \
-    unsigned int pos_x, pos_y, xy_half, offset;				      \
-									      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,		      \
-						     motion->f_code[0]);      \
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
-    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;			      \
-									      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,		      \
-						     motion->f_code[1]);      \
-    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);	      \
-    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;			      \
-									      \
-    MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0);		      \
-}									      \
-									      \
-static void motion_fr_field_##FORMAT (mpeg2_decoder_t * const decoder,	      \
-				      motion_t * const motion,		      \
-				      mpeg2_mc_fct * const * const table)     \
-{									      \
-    int motion_x, motion_y, field;					      \
-    unsigned int pos_x, pos_y, xy_half, offset;				      \
-									      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    field = UBITS (bit_buf, 1);						      \
-    DUMPBITS (bit_buf, bits, 1);					      \
-									      \
-    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,		      \
-						     motion->f_code[0]);      \
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
-    motion->pmv[0][0] = motion_x;					      \
-									      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    motion_y = ((motion->pmv[0][1] >> 1) +				      \
-		get_motion_delta (decoder, motion->f_code[1]));		      \
-    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */	      \
-    motion->pmv[0][1] = motion_y << 1;					      \
-									      \
-    MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); \
-									      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    field = UBITS (bit_buf, 1);						      \
-    DUMPBITS (bit_buf, bits, 1);					      \
-									      \
-    motion_x = motion->pmv[1][0] + get_motion_delta (decoder,		      \
-						     motion->f_code[0]);      \
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
-    motion->pmv[1][0] = motion_x;					      \
-									      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    motion_y = ((motion->pmv[1][1] >> 1) +				      \
-		get_motion_delta (decoder, motion->f_code[1]));		      \
-    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */	      \
-    motion->pmv[1][1] = motion_y << 1;					      \
-									      \
-    MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); \
-}									      \
-									      \
-static void motion_fr_dmv_##FORMAT (mpeg2_decoder_t * const decoder,	      \
-				    motion_t * const motion,		      \
-				    mpeg2_mc_fct * const * const table)	      \
-{									      \
-    int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y;		      \
-    unsigned int pos_x, pos_y, xy_half, offset;				      \
-									      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,		      \
-						     motion->f_code[0]);      \
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
-    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;			      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    dmv_x = get_dmv (decoder);						      \
-									      \
-    motion_y = ((motion->pmv[0][1] >> 1) +				      \
-		get_motion_delta (decoder, motion->f_code[1]));		      \
-    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */	      \
-    motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1;		      \
-    dmv_y = get_dmv (decoder);						      \
-									      \
-    m = decoder->top_field_first ? 1 : 3;				      \
-    other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;		      \
-    other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1;	      \
-    MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); \
-									      \
-    m = decoder->top_field_first ? 3 : 1;				      \
-    other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;		      \
-    other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1;	      \
-    MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0);\
-									      \
-    MOTION_DMV (mpeg2_mc.avg, motion->ref[0], motion_x, motion_y);	      \
-}									      \
-									      \
-static void motion_reuse_##FORMAT (mpeg2_decoder_t * const decoder,	      \
-				   motion_t * const motion,		      \
-				   mpeg2_mc_fct * const * const table)	      \
-{									      \
-    int motion_x, motion_y;						      \
-    unsigned int pos_x, pos_y, xy_half, offset;				      \
-									      \
-    motion_x = motion->pmv[0][0];					      \
-    motion_y = motion->pmv[0][1];					      \
-									      \
-    MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0);		      \
-}									      \
-									      \
-static void motion_zero_##FORMAT (mpeg2_decoder_t * const decoder,	      \
-				  motion_t * const motion,		      \
-				  mpeg2_mc_fct * const * const table)	      \
-{									      \
-    unsigned int offset;						      \
-									      \
-    motion->pmv[0][0] = motion->pmv[0][1] = 0;				      \
-    motion->pmv[1][0] = motion->pmv[1][1] = 0;				      \
-									      \
-    MOTION_ZERO (table, motion->ref[0]);				      \
-}									      \
-									      \
-static void motion_fi_field_##FORMAT (mpeg2_decoder_t * const decoder,	      \
-				      motion_t * const motion,		      \
-				      mpeg2_mc_fct * const * const table)     \
-{									      \
-    int motion_x, motion_y;						      \
-    uint8_t ** ref_field;						      \
-    unsigned int pos_x, pos_y, xy_half, offset;				      \
-									      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    ref_field = motion->ref2[UBITS (bit_buf, 1)];			      \
-    DUMPBITS (bit_buf, bits, 1);					      \
-									      \
-    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,		      \
-						     motion->f_code[0]);      \
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
-    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;			      \
-									      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,		      \
-						     motion->f_code[1]);      \
-    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);	      \
-    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;			      \
-									      \
-    MOTION (table, ref_field, motion_x, motion_y, 16, 0);		      \
-}									      \
-									      \
-static void motion_fi_16x8_##FORMAT (mpeg2_decoder_t * const decoder,	      \
-				     motion_t * const motion,		      \
-				     mpeg2_mc_fct * const * const table)      \
-{									      \
-    int motion_x, motion_y;						      \
-    uint8_t ** ref_field;						      \
-    unsigned int pos_x, pos_y, xy_half, offset;				      \
-									      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    ref_field = motion->ref2[UBITS (bit_buf, 1)];			      \
-    DUMPBITS (bit_buf, bits, 1);					      \
-									      \
-    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,		      \
-						     motion->f_code[0]);      \
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
-    motion->pmv[0][0] = motion_x;					      \
-									      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,		      \
-						     motion->f_code[1]);      \
-    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);	      \
-    motion->pmv[0][1] = motion_y;					      \
-									      \
-    MOTION (table, ref_field, motion_x, motion_y, 8, 0);		      \
-									      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    ref_field = motion->ref2[UBITS (bit_buf, 1)];			      \
-    DUMPBITS (bit_buf, bits, 1);					      \
-									      \
-    motion_x = motion->pmv[1][0] + get_motion_delta (decoder,		      \
-						     motion->f_code[0]);      \
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
-    motion->pmv[1][0] = motion_x;					      \
-									      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    motion_y = motion->pmv[1][1] + get_motion_delta (decoder,		      \
-						     motion->f_code[1]);      \
-    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);	      \
-    motion->pmv[1][1] = motion_y;					      \
-									      \
-    MOTION (table, ref_field, motion_x, motion_y, 8, 8);		      \
-}									      \
-									      \
-static void motion_fi_dmv_##FORMAT (mpeg2_decoder_t * const decoder,	      \
-				    motion_t * const motion,		      \
-				    mpeg2_mc_fct * const * const table)	      \
-{									      \
-    int motion_x, motion_y, other_x, other_y;				      \
-    unsigned int pos_x, pos_y, xy_half, offset;				      \
-									      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,		      \
-						     motion->f_code[0]);      \
-    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
-    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;			      \
-    NEEDBITS (bit_buf, bits, bit_ptr);					      \
-    other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (decoder);	      \
-									      \
-    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,		      \
-						     motion->f_code[1]);      \
-    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);	      \
-    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;			      \
-    other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (decoder) +	      \
-	       decoder->dmv_offset);					      \
-									      \
-    MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0);	      \
-    MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0);	      \
-}									      \
-
-MOTION_FUNCTIONS (420, MOTION_420, MOTION_FIELD_420, MOTION_DMV_420,
-		  MOTION_ZERO_420)
-MOTION_FUNCTIONS (422, MOTION_422, MOTION_FIELD_422, MOTION_DMV_422,
-		  MOTION_ZERO_422)
-MOTION_FUNCTIONS (444, MOTION_444, MOTION_FIELD_444, MOTION_DMV_444,
-		  MOTION_ZERO_444)
-
-/* like motion_frame, but parsing without actual motion compensation */
-static void motion_fr_conceal (mpeg2_decoder_t * const decoder)
-{
-    int tmp;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    tmp = (decoder->f_motion.pmv[0][0] +
-	   get_motion_delta (decoder, decoder->f_motion.f_code[0]));
-    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]);
-    decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    tmp = (decoder->f_motion.pmv[0][1] +
-	   get_motion_delta (decoder, decoder->f_motion.f_code[1]));
-    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]);
-    decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp;
-
-    DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */
-}
-
-static void motion_fi_conceal (mpeg2_decoder_t * const decoder)
-{
-    int tmp;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    DUMPBITS (bit_buf, bits, 1); /* remove field_select */
-
-    tmp = (decoder->f_motion.pmv[0][0] +
-	   get_motion_delta (decoder, decoder->f_motion.f_code[0]));
-    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]);
-    decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp;
-
-    NEEDBITS (bit_buf, bits, bit_ptr);
-    tmp = (decoder->f_motion.pmv[0][1] +
-	   get_motion_delta (decoder, decoder->f_motion.f_code[1]));
-    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]);
-    decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp;
-
-    DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */
-}
-
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-
-#define MOTION_CALL(routine,direction)				\
-do {								\
-    if ((direction) & MACROBLOCK_MOTION_FORWARD)		\
-	routine (decoder, &(decoder->f_motion), mpeg2_mc.put);	\
-    if ((direction) & MACROBLOCK_MOTION_BACKWARD)		\
-	routine (decoder, &(decoder->b_motion),			\
-		 ((direction) & MACROBLOCK_MOTION_FORWARD ?	\
-		  mpeg2_mc.avg : mpeg2_mc.put));		\
-} while (0)
-
-#define NEXT_MACROBLOCK							\
-do {									\
-    decoder->offset += 16;						\
-    if (decoder->offset == decoder->width) {				\
-	do { /* just so we can use the break statement */		\
-	    if (decoder->convert) {					\
-		decoder->convert (decoder->convert_id, decoder->dest,	\
-				  decoder->v_offset);			\
-		if (decoder->coding_type == B_TYPE)			\
-		    break;						\
-	    }								\
-	    decoder->dest[0] += decoder->slice_stride;			\
-	    decoder->dest[1] += decoder->slice_uv_stride;		\
-	    decoder->dest[2] += decoder->slice_uv_stride;		\
-	} while (0);							\
-	decoder->v_offset += 16;					\
-	if (decoder->v_offset > decoder->limit_y) {			\
-	    if (mpeg2_cpu_state_restore)				\
-		mpeg2_cpu_state_restore (&cpu_state);			\
-	    return;							\
-	}								\
-	decoder->offset = 0;						\
-    }									\
-} while (0)
-
-void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3],
-		      uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3])
-{
-    int offset, stride, height, bottom_field;
-
-    stride = decoder->stride_frame;
-    bottom_field = (decoder->picture_structure == BOTTOM_FIELD);
-    offset = bottom_field ? stride : 0;
-    height = decoder->height;
-
-    decoder->picture_dest[0] = current_fbuf[0] + offset;
-    decoder->picture_dest[1] = current_fbuf[1] + (offset >> 1);
-    decoder->picture_dest[2] = current_fbuf[2] + (offset >> 1);
-
-    decoder->f_motion.ref[0][0] = forward_fbuf[0] + offset;
-    decoder->f_motion.ref[0][1] = forward_fbuf[1] + (offset >> 1);
-    decoder->f_motion.ref[0][2] = forward_fbuf[2] + (offset >> 1);
-
-    decoder->b_motion.ref[0][0] = backward_fbuf[0] + offset;
-    decoder->b_motion.ref[0][1] = backward_fbuf[1] + (offset >> 1);
-    decoder->b_motion.ref[0][2] = backward_fbuf[2] + (offset >> 1);
-
-    if (decoder->picture_structure != FRAME_PICTURE) {
-	decoder->dmv_offset = bottom_field ? 1 : -1;
-	decoder->f_motion.ref2[0] = decoder->f_motion.ref[bottom_field];
-	decoder->f_motion.ref2[1] = decoder->f_motion.ref[!bottom_field];
-	decoder->b_motion.ref2[0] = decoder->b_motion.ref[bottom_field];
-	decoder->b_motion.ref2[1] = decoder->b_motion.ref[!bottom_field];
-	offset = stride - offset;
-
-	if (decoder->second_field && (decoder->coding_type != B_TYPE))
-	    forward_fbuf = current_fbuf;
-
-	decoder->f_motion.ref[1][0] = forward_fbuf[0] + offset;
-	decoder->f_motion.ref[1][1] = forward_fbuf[1] + (offset >> 1);
-	decoder->f_motion.ref[1][2] = forward_fbuf[2] + (offset >> 1);
-
-	decoder->b_motion.ref[1][0] = backward_fbuf[0] + offset;
-	decoder->b_motion.ref[1][1] = backward_fbuf[1] + (offset >> 1);
-	decoder->b_motion.ref[1][2] = backward_fbuf[2] + (offset >> 1);
-
-	stride <<= 1;
-	height >>= 1;
-    }
-
-    decoder->stride = stride;
-    decoder->uv_stride = stride >> 1;
-    decoder->slice_stride = 16 * stride;
-    decoder->slice_uv_stride =
-	decoder->slice_stride >> (2 - decoder->chroma_format);
-    decoder->limit_x = 2 * decoder->width - 32;
-    decoder->limit_y_16 = 2 * height - 32;
-    decoder->limit_y_8 = 2 * height - 16;
-    decoder->limit_y = height - 16;
-
-    if (decoder->mpeg1) {
-	decoder->motion_parser[0] = motion_zero_420;
-	decoder->motion_parser[MC_FRAME] = motion_mp1;
-	decoder->motion_parser[4] = motion_reuse_420;
-    } else if (decoder->picture_structure == FRAME_PICTURE) {
-	if (decoder->chroma_format == 0) {
-	    decoder->motion_parser[0] = motion_zero_420;
-	    decoder->motion_parser[MC_FIELD] = motion_fr_field_420;
-	    decoder->motion_parser[MC_FRAME] = motion_fr_frame_420;
-	    decoder->motion_parser[MC_DMV] = motion_fr_dmv_420;
-	    decoder->motion_parser[4] = motion_reuse_420;
-	} else if (decoder->chroma_format == 1) {
-	    decoder->motion_parser[0] = motion_zero_422;
-	    decoder->motion_parser[MC_FIELD] = motion_fr_field_422;
-	    decoder->motion_parser[MC_FRAME] = motion_fr_frame_422;
-	    decoder->motion_parser[MC_DMV] = motion_fr_dmv_422;
-	    decoder->motion_parser[4] = motion_reuse_422;
-	} else {
-	    decoder->motion_parser[0] = motion_zero_444;
-	    decoder->motion_parser[MC_FIELD] = motion_fr_field_444;
-	    decoder->motion_parser[MC_FRAME] = motion_fr_frame_444;
-	    decoder->motion_parser[MC_DMV] = motion_fr_dmv_444;
-	    decoder->motion_parser[4] = motion_reuse_444;
-	}
-    } else {
-	if (decoder->chroma_format == 0) {
-	    decoder->motion_parser[0] = motion_zero_420;
-	    decoder->motion_parser[MC_FIELD] = motion_fi_field_420;
-	    decoder->motion_parser[MC_16X8] = motion_fi_16x8_420;
-	    decoder->motion_parser[MC_DMV] = motion_fi_dmv_420;
-	    decoder->motion_parser[4] = motion_reuse_420;
-	} else if (decoder->chroma_format == 1) {
-	    decoder->motion_parser[0] = motion_zero_422;
-	    decoder->motion_parser[MC_FIELD] = motion_fi_field_422;
-	    decoder->motion_parser[MC_16X8] = motion_fi_16x8_422;
-	    decoder->motion_parser[MC_DMV] = motion_fi_dmv_422;
-	    decoder->motion_parser[4] = motion_reuse_422;
-	} else {
-	    decoder->motion_parser[0] = motion_zero_444;
-	    decoder->motion_parser[MC_FIELD] = motion_fi_field_444;
-	    decoder->motion_parser[MC_16X8] = motion_fi_16x8_444;
-	    decoder->motion_parser[MC_DMV] = motion_fi_dmv_444;
-	    decoder->motion_parser[4] = motion_reuse_444;
-	}
-    }
-}
-
-static inline int slice_init (mpeg2_decoder_t * const decoder, int code)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-    int offset;
-    const MBAtab * mba;
-
-    decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
-	decoder->dc_dct_pred[2] = 16384;
-
-    decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
-    decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
-    decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
-    decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
-
-    if (decoder->vertical_position_extension) {
-	code += UBITS (bit_buf, 3) << 7;
-	DUMPBITS (bit_buf, bits, 3);
-    }
-    decoder->v_offset = (code - 1) * 16;
-    offset = 0;
-    if (!(decoder->convert) || decoder->coding_type != B_TYPE)
-	offset = (code - 1) * decoder->slice_stride;
-
-    decoder->dest[0] = decoder->picture_dest[0] + offset;
-    offset >>= (2 - decoder->chroma_format);
-    decoder->dest[1] = decoder->picture_dest[1] + offset;
-    decoder->dest[2] = decoder->picture_dest[2] + offset;
-
-    get_quantizer_scale (decoder);
-
-    /* ignore intra_slice and all the extra data */
-    while (bit_buf & 0x80000000) {
-	DUMPBITS (bit_buf, bits, 9);
-	NEEDBITS (bit_buf, bits, bit_ptr);
-    }
-
-    /* decode initial macroblock address increment */
-    offset = 0;
-    while (1) {
-	if (bit_buf >= 0x08000000) {
-	    mba = MBA_5 + (UBITS (bit_buf, 6) - 2);
-	    break;
-	} else if (bit_buf >= 0x01800000) {
-	    mba = MBA_11 + (UBITS (bit_buf, 12) - 24);
-	    break;
-	} else switch (UBITS (bit_buf, 12)) {
-	case 8:		/* macroblock_escape */
-	    offset += 33;
-	    DUMPBITS (bit_buf, bits, 11);
-	    NEEDBITS (bit_buf, bits, bit_ptr);
-	    continue;
-	case 15:	/* macroblock_stuffing (MPEG1 only) */
-	    bit_buf &= 0xfffff;
-	    DUMPBITS (bit_buf, bits, 11);
-	    NEEDBITS (bit_buf, bits, bit_ptr);
-	    continue;
-	default:	/* error */
-	    return 1;
-	}
-    }
-    DUMPBITS (bit_buf, bits, mba->len + 1);
-    decoder->offset = (offset + mba->mba) << 4;
-
-    while (decoder->offset - decoder->width >= 0) {
-	decoder->offset -= decoder->width;
-	if (!(decoder->convert) || decoder->coding_type != B_TYPE) {
-	    decoder->dest[0] += decoder->slice_stride;
-	    decoder->dest[1] += decoder->slice_uv_stride;
-	    decoder->dest[2] += decoder->slice_uv_stride;
-	}
-	decoder->v_offset += 16;
-    }
-    if (decoder->v_offset > decoder->limit_y)
-	return 1;
-
-    return 0;
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-void mpeg2_slice (mpeg2_decoder_t * const decoder, const int code,
-		  const uint8_t * const buffer)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-    cpu_state_t cpu_state;
-
-    bitstream_init (decoder, buffer);
-
-    if (slice_init (decoder, code))
-	return;
-
-    if (mpeg2_cpu_state_save)
-	mpeg2_cpu_state_save (&cpu_state);
-
-    while (1) {
-	int macroblock_modes;
-	int mba_inc;
-	const MBAtab * mba;
-
-	NEEDBITS (bit_buf, bits, bit_ptr);
-
-	macroblock_modes = get_macroblock_modes (decoder);
-
-	/* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */
-	if (macroblock_modes & MACROBLOCK_QUANT)
-	    get_quantizer_scale (decoder);
-
-	if (macroblock_modes & MACROBLOCK_INTRA) {
-
-	    int DCT_offset, DCT_stride;
-	    int offset;
-	    uint8_t * dest_y;
-
-	    if (decoder->concealment_motion_vectors) {
-		if (decoder->picture_structure == FRAME_PICTURE)
-		    motion_fr_conceal (decoder);
-		else
-		    motion_fi_conceal (decoder);
-	    } else {
-		decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
-		decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
-		decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
-		decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
-	    }
-
-	    if (macroblock_modes & DCT_TYPE_INTERLACED) {
-		DCT_offset = decoder->stride;
-		DCT_stride = decoder->stride * 2;
-	    } else {
-		DCT_offset = decoder->stride * 8;
-		DCT_stride = decoder->stride;
-	    }
-
-	    offset = decoder->offset;
-	    dest_y = decoder->dest[0] + offset;
-	    slice_intra_DCT (decoder, 0, dest_y, DCT_stride);
-	    slice_intra_DCT (decoder, 0, dest_y + 8, DCT_stride);
-	    slice_intra_DCT (decoder, 0, dest_y + DCT_offset, DCT_stride);
-	    slice_intra_DCT (decoder, 0, dest_y + DCT_offset + 8, DCT_stride);
-	    if (likely (decoder->chroma_format == 0)) {
-		slice_intra_DCT (decoder, 1, decoder->dest[1] + (offset >> 1),
-				 decoder->uv_stride);
-		slice_intra_DCT (decoder, 2, decoder->dest[2] + (offset >> 1),
-				 decoder->uv_stride);
-		if (decoder->coding_type == D_TYPE) {
-		    NEEDBITS (bit_buf, bits, bit_ptr);
-		    DUMPBITS (bit_buf, bits, 1);
-		}
-	    } else if (likely (decoder->chroma_format == 1)) {
-		uint8_t * dest_u = decoder->dest[1] + (offset >> 1);
-		uint8_t * dest_v = decoder->dest[2] + (offset >> 1);
-		DCT_stride >>= 1;
-		DCT_offset >>= 1;
-		slice_intra_DCT (decoder, 1, dest_u, DCT_stride);
-		slice_intra_DCT (decoder, 2, dest_v, DCT_stride);
-		slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride);
-		slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride);
-	    } else {
-		uint8_t * dest_u = decoder->dest[1] + offset;
-		uint8_t * dest_v = decoder->dest[2] + offset;
-		slice_intra_DCT (decoder, 1, dest_u, DCT_stride);
-		slice_intra_DCT (decoder, 2, dest_v, DCT_stride);
-		slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride);
-		slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride);
-		slice_intra_DCT (decoder, 1, dest_u + 8, DCT_stride);
-		slice_intra_DCT (decoder, 2, dest_v + 8, DCT_stride);
-		slice_intra_DCT (decoder, 1, dest_u + DCT_offset + 8,
-				 DCT_stride);
-		slice_intra_DCT (decoder, 2, dest_v + DCT_offset + 8,
-				 DCT_stride);
-	    }
-	} else {
-
-	    motion_parser_t * parser;
-
-	    parser =
-		decoder->motion_parser[macroblock_modes >> MOTION_TYPE_SHIFT];
-	    MOTION_CALL (parser, macroblock_modes);
-
-	    if (macroblock_modes & MACROBLOCK_PATTERN) {
-		int coded_block_pattern;
-		int DCT_offset, DCT_stride;
-
-		if (macroblock_modes & DCT_TYPE_INTERLACED) {
-		    DCT_offset = decoder->stride;
-		    DCT_stride = decoder->stride * 2;
-		} else {
-		    DCT_offset = decoder->stride * 8;
-		    DCT_stride = decoder->stride;
-		}
-
-		coded_block_pattern = get_coded_block_pattern (decoder);
-
-		if (likely (decoder->chroma_format == 0)) {
-		    int offset = decoder->offset;
-		    uint8_t * dest_y = decoder->dest[0] + offset;
-		    if (coded_block_pattern & 1)
-			slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride);
-		    if (coded_block_pattern & 2)
-			slice_non_intra_DCT (decoder, 0, dest_y + 8,
-					     DCT_stride);
-		    if (coded_block_pattern & 4)
-			slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset,
-					     DCT_stride);
-		    if (coded_block_pattern & 8)
-			slice_non_intra_DCT (decoder, 0,
-					     dest_y + DCT_offset + 8,
-					     DCT_stride);
-		    if (coded_block_pattern & 16)
-			slice_non_intra_DCT (decoder, 1,
-					     decoder->dest[1] + (offset >> 1),
-					     decoder->uv_stride);
-		    if (coded_block_pattern & 32)
-			slice_non_intra_DCT (decoder, 2,
-					     decoder->dest[2] + (offset >> 1),
-					     decoder->uv_stride);
-		} else if (likely (decoder->chroma_format == 1)) {
-		    int offset;
-		    uint8_t * dest_y;
-
-		    coded_block_pattern |= bit_buf & (3 << 30);
-		    DUMPBITS (bit_buf, bits, 2);
-
-		    offset = decoder->offset;
-		    dest_y = decoder->dest[0] + offset;
-		    if (coded_block_pattern & 1)
-			slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride);
-		    if (coded_block_pattern & 2)
-			slice_non_intra_DCT (decoder, 0, dest_y + 8,
-					     DCT_stride);
-		    if (coded_block_pattern & 4)
-			slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset,
-					     DCT_stride);
-		    if (coded_block_pattern & 8)
-			slice_non_intra_DCT (decoder, 0,
-					     dest_y + DCT_offset + 8,
-					     DCT_stride);
-
-		    DCT_stride >>= 1;
-		    DCT_offset = (DCT_offset + offset) >> 1;
-		    if (coded_block_pattern & 16)
-			slice_non_intra_DCT (decoder, 1,
-					     decoder->dest[1] + (offset >> 1),
-					     DCT_stride);
-		    if (coded_block_pattern & 32)
-			slice_non_intra_DCT (decoder, 2,
-					     decoder->dest[2] + (offset >> 1),
-					     DCT_stride);
-		    if (coded_block_pattern & (2 << 30))
-			slice_non_intra_DCT (decoder, 1,
-					     decoder->dest[1] + DCT_offset,
-					     DCT_stride);
-		    if (coded_block_pattern & (1 << 30))
-			slice_non_intra_DCT (decoder, 2,
-					     decoder->dest[2] + DCT_offset,
-					     DCT_stride);
-		} else {
-		    int offset;
-		    uint8_t * dest_y, * dest_u, * dest_v;
-
-		    coded_block_pattern |= bit_buf & (63 << 26);
-		    DUMPBITS (bit_buf, bits, 6);
-
-		    offset = decoder->offset;
-		    dest_y = decoder->dest[0] + offset;
-		    dest_u = decoder->dest[1] + offset;
-		    dest_v = decoder->dest[2] + offset;
-
-		    if (coded_block_pattern & 1)
-			slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride);
-		    if (coded_block_pattern & 2)
-			slice_non_intra_DCT (decoder, 0, dest_y + 8,
-					     DCT_stride);
-		    if (coded_block_pattern & 4)
-			slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset,
-					     DCT_stride);
-		    if (coded_block_pattern & 8)
-			slice_non_intra_DCT (decoder, 0,
-					     dest_y + DCT_offset + 8,
-					     DCT_stride);
-
-		    if (coded_block_pattern & 16)
-			slice_non_intra_DCT (decoder, 1, dest_u, DCT_stride);
-		    if (coded_block_pattern & 32)
-			slice_non_intra_DCT (decoder, 2, dest_v, DCT_stride);
-		    if (coded_block_pattern & (32 << 26))
-			slice_non_intra_DCT (decoder, 1, dest_u + DCT_offset,
-					     DCT_stride);
-		    if (coded_block_pattern & (16 << 26))
-			slice_non_intra_DCT (decoder, 2, dest_v + DCT_offset,
-					     DCT_stride);
-		    if (coded_block_pattern & (8 << 26))
-			slice_non_intra_DCT (decoder, 1, dest_u + 8,
-					     DCT_stride);
-		    if (coded_block_pattern & (4 << 26))
-			slice_non_intra_DCT (decoder, 2, dest_v + 8,
-					     DCT_stride);
-		    if (coded_block_pattern & (2 << 26))
-			slice_non_intra_DCT (decoder, 1,
-					     dest_u + DCT_offset + 8,
-					     DCT_stride);
-		    if (coded_block_pattern & (1 << 26))
-			slice_non_intra_DCT (decoder, 2,
-					     dest_v + DCT_offset + 8,
-					     DCT_stride);
-		}
-	    }
-
-	    decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
-		decoder->dc_dct_pred[2] = 16384;
-	}
-
-	NEXT_MACROBLOCK;
-
-	NEEDBITS (bit_buf, bits, bit_ptr);
-	mba_inc = 0;
-	while (1) {
-	    if (bit_buf >= 0x10000000) {
-		mba = MBA_5 + (UBITS (bit_buf, 5) - 2);
-		break;
-	    } else if (bit_buf >= 0x03000000) {
-		mba = MBA_11 + (UBITS (bit_buf, 11) - 24);
-		break;
-	    } else switch (UBITS (bit_buf, 11)) {
-	    case 8:		/* macroblock_escape */
-		mba_inc += 33;
-		/* pass through */
-	    case 15:	/* macroblock_stuffing (MPEG1 only) */
-		DUMPBITS (bit_buf, bits, 11);
-		NEEDBITS (bit_buf, bits, bit_ptr);
-		continue;
-	    default:	/* end of slice, or error */
-		if (mpeg2_cpu_state_restore)
-		    mpeg2_cpu_state_restore (&cpu_state);
-		return;
-	    }
-	}
-	DUMPBITS (bit_buf, bits, mba->len);
-	mba_inc += mba->mba;
-
-	if (mba_inc) {
-	    decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
-		decoder->dc_dct_pred[2] = 16384;
-
-	    if (decoder->coding_type == P_TYPE) {
-		do {
-		    MOTION_CALL (decoder->motion_parser[0],
-				 MACROBLOCK_MOTION_FORWARD);
-		    NEXT_MACROBLOCK;
-		} while (--mba_inc);
-	    } else {
-		do {
-		    MOTION_CALL (decoder->motion_parser[4], macroblock_modes);
-		    NEXT_MACROBLOCK;
-		} while (--mba_inc);
-	    }
-	}
-    }
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
diff --git a/src/libmpeg2new/libmpeg2/uyvy.c b/src/libmpeg2new/libmpeg2/uyvy.c
deleted file mode 100644
index 7f107ffad..000000000
--- a/src/libmpeg2new/libmpeg2/uyvy.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * uyvy.c
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 2003      Regis Duchesne <hpreg@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "config.h"
-
-#include <inttypes.h>
-
-#include "mpeg2.h"
-#include "mpeg2convert.h"
-
-typedef struct {
-    int width;
-    int stride;
-    int chroma420;
-    uint8_t * out;
-} convert_uyvy_t;
-
-static void uyvy_start (void * _id, const mpeg2_fbuf_t * fbuf,
-			const mpeg2_picture_t * picture,
-			const mpeg2_gop_t * gop)
-{
-    convert_uyvy_t * instance = (convert_uyvy_t *) _id;
-
-    instance->out = fbuf->buf[0];
-    instance->stride = instance->width;
-    if (picture->nb_fields == 1) {
-	if (! (picture->flags & PIC_FLAG_TOP_FIELD_FIRST))
-	    instance->out += 2 * instance->stride;
-	instance->stride <<= 1;
-    }
-}
-
-#ifdef WORDS_BIGENDIAN
-#define PACK(a,b,c,d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d))
-#else
-#define PACK(a,b,c,d) (((d) << 24) | ((c) << 16) | ((b) << 8) | (a))
-#endif
-
-static void uyvy_copy (void * const _id, uint8_t * const * src,
-		       const unsigned int v_offset)
-{
-    const convert_uyvy_t * const id = (convert_uyvy_t *) _id;
-    uint8_t * _dst;
-    uint8_t * py, * pu, * pv;
-    int i, j;
-
-    _dst = id->out + 2 * id->stride * v_offset;
-    py = src[0]; pu = src[1]; pv = src[2];
-
-    i = 16;
-    do {
-	uint32_t * dst = (uint32_t *) _dst;
-
-	j = id->width >> 4;
-	do {
-	    dst[0] = PACK (pu[0],  py[0], pv[0],  py[1]);
-	    dst[1] = PACK (pu[1],  py[2], pv[1],  py[3]);
-	    dst[2] = PACK (pu[2],  py[4], pv[2],  py[5]);
-	    dst[3] = PACK (pu[3],  py[6], pv[3],  py[7]);
-	    dst[4] = PACK (pu[4],  py[8], pv[4],  py[9]);
-	    dst[5] = PACK (pu[5], py[10], pv[5], py[11]);
-	    dst[6] = PACK (pu[6], py[12], pv[6], py[13]);
-	    dst[7] = PACK (pu[7], py[14], pv[7], py[15]);
-	    py += 16;
-	    pu += 8;
-	    pv += 8;
-	    dst += 8;
-	} while (--j);
-	py -= id->width;
-	pu -= id->width >> 1;
-	pv -= id->width >> 1;
-	_dst += 2 * id->stride;
-	py += id->stride;
-	if (! (--i & id->chroma420)) {
-	    pu += id->stride >> 1;
-	    pv += id->stride >> 1;
-	}
-    } while (i);
-}
-
-int mpeg2convert_uyvy (int stage, void * _id, const mpeg2_sequence_t * seq,
-		       int stride, uint32_t accel, void * arg,
-		       mpeg2_convert_init_t * result)
-{
-    convert_uyvy_t * instance = (convert_uyvy_t *) _id;
-
-    if (seq->chroma_width == seq->width)
-	return 1;
-
-    if (instance) {
-	instance->width = seq->width;
-	instance->chroma420 = (seq->chroma_height < seq->height);
-	result->buf_size[0] = seq->width * seq->height * 2;
-	result->buf_size[1] = result->buf_size[2] = 0;
-	result->start = uyvy_start;
-	result->copy = uyvy_copy;
-    } else {
-	result->id_size = sizeof (convert_uyvy_t);
-    }
-
-    return 0;
-}
diff --git a/src/libmpeg2new/libmpeg2/vlc.h b/src/libmpeg2new/libmpeg2/vlc.h
deleted file mode 100644
index 57448ce04..000000000
--- a/src/libmpeg2new/libmpeg2/vlc.h
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
- * vlc.h
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#define GETWORD(bit_buf,shift,bit_ptr)				\
-do {								\
-    bit_buf |= ((bit_ptr[0] << 8) | bit_ptr[1]) << (shift);	\
-    bit_ptr += 2;						\
-} while (0)
-
-static inline void bitstream_init (mpeg2_decoder_t * decoder,
-				   const uint8_t * start)
-{
-    decoder->bitstream_buf =
-	(start[0] << 24) | (start[1] << 16) | (start[2] << 8) | start[3];
-    decoder->bitstream_ptr = start + 4;
-    decoder->bitstream_bits = -16;
-}
-
-/* make sure that there are at least 16 valid bits in bit_buf */
-#define NEEDBITS(bit_buf,bits,bit_ptr)		\
-do {						\
-    if (unlikely (bits > 0)) {			\
-	GETWORD (bit_buf, bits, bit_ptr);	\
-	bits -= 16;				\
-    }						\
-} while (0)
-
-/* remove num valid bits from bit_buf */
-#define DUMPBITS(bit_buf,bits,num)	\
-do {					\
-    bit_buf <<= (num);			\
-    bits += (num);			\
-} while (0)
-
-/* take num bits from the high part of bit_buf and zero extend them */
-#define UBITS(bit_buf,num) (((uint32_t)(bit_buf)) >> (32 - (num)))
-
-/* take num bits from the high part of bit_buf and sign extend them */
-#define SBITS(bit_buf,num) (((int32_t)(bit_buf)) >> (32 - (num)))
-
-typedef struct {
-    uint8_t modes;
-    uint8_t len;
-} MBtab;
-
-typedef struct {
-    uint8_t delta;
-    uint8_t len;
-} MVtab;
-
-typedef struct {
-    int8_t dmv;
-    uint8_t len;
-} DMVtab;
-
-typedef struct {
-    uint8_t cbp;
-    uint8_t len;
-} CBPtab;
-
-typedef struct {
-    uint8_t size;
-    uint8_t len;
-} DCtab;
-
-typedef struct {
-    uint8_t run;
-    uint8_t level;
-    uint8_t len;
-} DCTtab;
-
-typedef struct {
-    uint8_t mba;
-    uint8_t len;
-} MBAtab;
-
-
-#define INTRA MACROBLOCK_INTRA
-#define QUANT MACROBLOCK_QUANT
-
-static const MBtab MB_I [] = {
-    {INTRA|QUANT, 2}, {INTRA, 1}
-};
-
-#define MC MACROBLOCK_MOTION_FORWARD
-#define CODED MACROBLOCK_PATTERN
-
-static const MBtab MB_P [] = {
-    {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA,    5},
-    {MC,          3}, {MC,          3}, {MC,             3}, {MC,       3},
-    {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
-    {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
-    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
-    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
-    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
-    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1}
-};
-
-#define FWD MACROBLOCK_MOTION_FORWARD
-#define BWD MACROBLOCK_MOTION_BACKWARD
-#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD
-
-static const MBtab MB_B [] = {
-    {0,                 6}, {INTRA|QUANT,       6},
-    {BWD|CODED|QUANT,   6}, {FWD|CODED|QUANT,   6},
-    {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5},
-					{INTRA,       5}, {INTRA,       5},
-    {FWD,         4}, {FWD,         4}, {FWD,         4}, {FWD,         4},
-    {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4},
-    {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
-    {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
-    {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
-    {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
-    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
-    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
-    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
-    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}
-};
-
-#undef INTRA
-#undef QUANT
-#undef MC
-#undef CODED
-#undef FWD
-#undef BWD
-#undef INTER
-
-
-static const MVtab MV_4 [] = {
-    { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}
-};
-
-static const MVtab MV_10 [] = {
-    { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10},
-    { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10},
-    {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9},
-    { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7},
-    { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7},
-    { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}
-};
-
-
-static const DMVtab DMV_2 [] = {
-    { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2}
-};
-
-
-static const CBPtab CBP_7 [] = {
-    {0x11, 7}, {0x12, 7}, {0x14, 7}, {0x18, 7},
-    {0x21, 7}, {0x22, 7}, {0x24, 7}, {0x28, 7},
-    {0x3f, 6}, {0x3f, 6}, {0x30, 6}, {0x30, 6},
-    {0x09, 6}, {0x09, 6}, {0x06, 6}, {0x06, 6},
-    {0x1f, 5}, {0x1f, 5}, {0x1f, 5}, {0x1f, 5},
-    {0x10, 5}, {0x10, 5}, {0x10, 5}, {0x10, 5},
-    {0x2f, 5}, {0x2f, 5}, {0x2f, 5}, {0x2f, 5},
-    {0x20, 5}, {0x20, 5}, {0x20, 5}, {0x20, 5},
-    {0x07, 5}, {0x07, 5}, {0x07, 5}, {0x07, 5},
-    {0x0b, 5}, {0x0b, 5}, {0x0b, 5}, {0x0b, 5},
-    {0x0d, 5}, {0x0d, 5}, {0x0d, 5}, {0x0d, 5},
-    {0x0e, 5}, {0x0e, 5}, {0x0e, 5}, {0x0e, 5},
-    {0x05, 5}, {0x05, 5}, {0x05, 5}, {0x05, 5},
-    {0x0a, 5}, {0x0a, 5}, {0x0a, 5}, {0x0a, 5},
-    {0x03, 5}, {0x03, 5}, {0x03, 5}, {0x03, 5},
-    {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5},
-    {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4},
-    {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4},
-    {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4},
-    {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4},
-    {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
-    {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
-    {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
-    {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
-    {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3},
-    {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3},
-    {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3},
-    {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}
-};
-
-static const CBPtab CBP_9 [] = {
-    {0,    9}, {0x00, 9}, {0x39, 9}, {0x36, 9},
-    {0x37, 9}, {0x3b, 9}, {0x3d, 9}, {0x3e, 9},
-    {0x17, 8}, {0x17, 8}, {0x1b, 8}, {0x1b, 8},
-    {0x1d, 8}, {0x1d, 8}, {0x1e, 8}, {0x1e, 8},
-    {0x27, 8}, {0x27, 8}, {0x2b, 8}, {0x2b, 8},
-    {0x2d, 8}, {0x2d, 8}, {0x2e, 8}, {0x2e, 8},
-    {0x19, 8}, {0x19, 8}, {0x16, 8}, {0x16, 8},
-    {0x29, 8}, {0x29, 8}, {0x26, 8}, {0x26, 8},
-    {0x35, 8}, {0x35, 8}, {0x3a, 8}, {0x3a, 8},
-    {0x33, 8}, {0x33, 8}, {0x3c, 8}, {0x3c, 8},
-    {0x15, 8}, {0x15, 8}, {0x1a, 8}, {0x1a, 8},
-    {0x13, 8}, {0x13, 8}, {0x1c, 8}, {0x1c, 8},
-    {0x25, 8}, {0x25, 8}, {0x2a, 8}, {0x2a, 8},
-    {0x23, 8}, {0x23, 8}, {0x2c, 8}, {0x2c, 8},
-    {0x31, 8}, {0x31, 8}, {0x32, 8}, {0x32, 8},
-    {0x34, 8}, {0x34, 8}, {0x38, 8}, {0x38, 8}
-};
-
-
-static const DCtab DC_lum_5 [] = {
-    {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
-    {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
-    {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
-    {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}
-};
-
-static const DCtab DC_chrom_5 [] = {
-    {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
-    {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
-    {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
-    {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}
-};
-
-static const DCtab DC_long [] = {
-    {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
-    {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
-    {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6},
-    {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9}
-};
-
-
-static const DCTtab DCT_16 [] = {
-    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-    {  2,18, 0}, {  2,17, 0}, {  2,16, 0}, {  2,15, 0},
-    {  7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0},
-    { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0},
-    { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0}
-};
-
-static const DCTtab DCT_15 [] = {
-    {  1,40,15}, {  1,39,15}, {  1,38,15}, {  1,37,15},
-    {  1,36,15}, {  1,35,15}, {  1,34,15}, {  1,33,15},
-    {  1,32,15}, {  2,14,15}, {  2,13,15}, {  2,12,15},
-    {  2,11,15}, {  2,10,15}, {  2, 9,15}, {  2, 8,15},
-    {  1,31,14}, {  1,31,14}, {  1,30,14}, {  1,30,14},
-    {  1,29,14}, {  1,29,14}, {  1,28,14}, {  1,28,14},
-    {  1,27,14}, {  1,27,14}, {  1,26,14}, {  1,26,14},
-    {  1,25,14}, {  1,25,14}, {  1,24,14}, {  1,24,14},
-    {  1,23,14}, {  1,23,14}, {  1,22,14}, {  1,22,14},
-    {  1,21,14}, {  1,21,14}, {  1,20,14}, {  1,20,14},
-    {  1,19,14}, {  1,19,14}, {  1,18,14}, {  1,18,14},
-    {  1,17,14}, {  1,17,14}, {  1,16,14}, {  1,16,14}
-};
-
-static const DCTtab DCT_13 [] = {
-    { 11, 2,13}, { 10, 2,13}, {  6, 3,13}, {  4, 4,13},
-    {  3, 5,13}, {  2, 7,13}, {  2, 6,13}, {  1,15,13},
-    {  1,14,13}, {  1,13,13}, {  1,12,13}, { 27, 1,13},
-    { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13},
-    {  1,11,12}, {  1,11,12}, {  9, 2,12}, {  9, 2,12},
-    {  5, 3,12}, {  5, 3,12}, {  1,10,12}, {  1,10,12},
-    {  3, 4,12}, {  3, 4,12}, {  8, 2,12}, {  8, 2,12},
-    { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12},
-    {  1, 9,12}, {  1, 9,12}, { 20, 1,12}, { 20, 1,12},
-    { 19, 1,12}, { 19, 1,12}, {  2, 5,12}, {  2, 5,12},
-    {  4, 3,12}, {  4, 3,12}, {  1, 8,12}, {  1, 8,12},
-    {  7, 2,12}, {  7, 2,12}, { 18, 1,12}, { 18, 1,12}
-};
-
-static const DCTtab DCT_B14_10 [] = {
-    { 17, 1,10}, {  6, 2,10}, {  1, 7,10}, {  3, 3,10},
-    {  2, 4,10}, { 16, 1,10}, { 15, 1,10}, {  5, 2,10}
-};
-
-static const DCTtab DCT_B14_8 [] = {
-    { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 65, 0,12},
-    {  3, 2, 7}, {  3, 2, 7}, { 10, 1, 7}, { 10, 1, 7},
-    {  1, 4, 7}, {  1, 4, 7}, {  9, 1, 7}, {  9, 1, 7},
-    {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6},
-    {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6},
-    {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6},
-    {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
-    { 14, 1, 8}, {  1, 6, 8}, { 13, 1, 8}, { 12, 1, 8},
-    {  4, 2, 8}, {  2, 3, 8}, {  1, 5, 8}, { 11, 1, 8}
-};
-
-static const DCTtab DCT_B14AC_5 [] = {
-		 {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
-    {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
-    {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}
-};
-
-static const DCTtab DCT_B14DC_5 [] = {
-		 {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
-    {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
-    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
-    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
-    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}
-};
-
-static const DCTtab DCT_B15_10 [] = {
-    {  6, 2, 9}, {  6, 2, 9}, { 15, 1, 9}, { 15, 1, 9},
-    {  3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9}
-};
-
-static const DCTtab DCT_B15_8 [] = {
-    { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 65, 0,12},
-    {  8, 1, 7}, {  8, 1, 7}, {  9, 1, 7}, {  9, 1, 7},
-    {  7, 1, 7}, {  7, 1, 7}, {  3, 2, 7}, {  3, 2, 7},
-    {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6},
-    {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6},
-    {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6},
-    {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
-    {  2, 5, 8}, { 12, 1, 8}, {  1,11, 8}, {  1,10, 8},
-    { 14, 1, 8}, { 13, 1, 8}, {  4, 2, 8}, {  2, 4, 8},
-    {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
-    {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
-    {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
-    {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
-    {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
-    {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
-    {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
-    {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
-    {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
-    { 10, 1, 7}, { 10, 1, 7}, {  2, 3, 7}, {  2, 3, 7},
-    { 11, 1, 7}, { 11, 1, 7}, {  1, 8, 7}, {  1, 8, 7},
-    {  1, 9, 7}, {  1, 9, 7}, {  1,12, 8}, {  1,13, 8},
-    {  3, 3, 8}, {  5, 2, 8}, {  1,14, 8}, {  1,15, 8}
-};
-
-
-static const MBAtab MBA_5 [] = {
-		    {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4},
-    {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3},
-    {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1},
-    {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}
-};
-
-static const MBAtab MBA_11 [] = {
-    {32, 11}, {31, 11}, {30, 11}, {29, 11},
-    {28, 11}, {27, 11}, {26, 11}, {25, 11},
-    {24, 11}, {23, 11}, {22, 11}, {21, 11},
-    {20, 10}, {20, 10}, {19, 10}, {19, 10},
-    {18, 10}, {18, 10}, {17, 10}, {17, 10},
-    {16, 10}, {16, 10}, {15, 10}, {15, 10},
-    {14,  8}, {14,  8}, {14,  8}, {14,  8},
-    {14,  8}, {14,  8}, {14,  8}, {14,  8},
-    {13,  8}, {13,  8}, {13,  8}, {13,  8},
-    {13,  8}, {13,  8}, {13,  8}, {13,  8},
-    {12,  8}, {12,  8}, {12,  8}, {12,  8},
-    {12,  8}, {12,  8}, {12,  8}, {12,  8},
-    {11,  8}, {11,  8}, {11,  8}, {11,  8},
-    {11,  8}, {11,  8}, {11,  8}, {11,  8},
-    {10,  8}, {10,  8}, {10,  8}, {10,  8},
-    {10,  8}, {10,  8}, {10,  8}, {10,  8},
-    { 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
-    { 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
-    { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-    { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-    { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-    { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-    { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
-    { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
-    { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
-    { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7}
-};
diff --git a/src/libmpeg2new/xine_mpeg2new_decoder.c b/src/libmpeg2new/xine_mpeg2new_decoder.c
deleted file mode 100644
index 7494791b1..000000000
--- a/src/libmpeg2new/xine_mpeg2new_decoder.c
+++ /dev/null
@@ -1,504 +0,0 @@
-/* 
- * Copyright (C) 2000-2004 the xine project
- * 
- * This file is part of xine, a free video player.
- * 
- * xine is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * xine is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
- *
- * stuff needed to turn libmpeg2 into a xine decoder plugin
- */
-
-
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <inttypes.h>
-#include <assert.h>
-
-#include "./include/mpeg2.h"
-#include <xine/xine_internal.h>
-#include <xine/video_out.h>
-#include <xine/buffer.h>
-
-
-
-#define LOG
-#define LOG_FRAME_ALLOC_FREE
-#define LOG_ENTRY
-#define LOG_FRAME_COUNTER
-
-
-typedef struct {
-  video_decoder_class_t   decoder_class;
-} mpeg2_class_t;
-
-typedef struct {
-  uint32_t id;
-  vo_frame_t * img;
-} img_state_t;
-
-typedef struct mpeg2_video_decoder_s {
-  video_decoder_t  video_decoder;
-  mpeg2dec_t      *mpeg2dec;
-  mpeg2_class_t   *class;
-  xine_stream_t   *stream;
-  int32_t         force_aspect;
-  int             force_pan_scan;
-  double          ratio;
-  img_state_t     img_state[30];
-  uint32_t	  frame_number;
-  uint32_t        rff_pattern;
-  
-} mpeg2_video_decoder_t;
-
-
-static void mpeg2_video_print_bad_state(img_state_t * img_state) {
-  int32_t n,m;
-  m=0;
-  for(n=0;n<30;n++) {
-    if (img_state[n].id>0) {
-      printf("%d = %u\n",n, img_state[n].id);
-      m++;
-    }
-  }
-  if (m > 3) _x_abort();
-  if (m == 0) printf("NO FRAMES\n");
-} 
-
-static void mpeg2_video_free_all(img_state_t * img_state) {
-  int32_t n,m;
-  vo_frame_t * img;
-  printf("libmpeg2new:free_all\n");
-  for(n=0;n<30;n++) {
-    if (img_state[n].id>0) {
-      img = img_state[n].img;
-      img->free(img);
-      img_state[n].id = 0;
-    }
-  }
-} 
-
-
-static void mpeg2_video_print_fbuf(const mpeg2_fbuf_t * fbuf) {
-  printf("%p",fbuf);
-  vo_frame_t * img;
-  if (fbuf) {
-    img = (vo_frame_t *) fbuf->id;
-    if (img) {
-      printf (", img=%p, (id=%d)\n",
-             img, img->id);
-    } else {
-      printf (", img=NULL\n");
-    }
-  } else {
-    printf ("\n");
-  }
-}
-
-static void mpeg2_video_decode_data (video_decoder_t *this_gen, buf_element_t *buf_element) {
-  mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen;
-  uint8_t * current = buf_element->content;
-  uint8_t * end = buf_element->content + buf_element->size;
-  const mpeg2_info_t * info;
-  mpeg2_state_t state;
-  vo_frame_t * img;
-  uint32_t picture_structure;
-  int32_t frame_skipping;
-
-  /* handle aspect hints from xine-dvdnav */
-  if (buf_element->decoder_flags & BUF_FLAG_SPECIAL) {
-    if (buf_element->decoder_info[1] == BUF_SPECIAL_ASPECT) {
-      this->force_aspect = buf_element->decoder_info[2];
-      if (buf_element->decoder_info[3] == 0x1 && buf_element->decoder_info[2] == 3)
-	/* letterboxing is denied, we have to do pan&scan */
-	this->force_pan_scan = 1;
-      else
-	this->force_pan_scan = 0;
-    }
-    
-    return;
-  }
-
-  if (buf_element->decoder_flags != 0) return;
-
-#ifdef LOG_ENTRY
-  printf ("libmpeg2: decode_data: enter\n");
-#endif
-
-  mpeg2_buffer (this->mpeg2dec, current, end);
-
-  info = mpeg2_info (this->mpeg2dec);
-  
-  while ((state = mpeg2_parse (this->mpeg2dec)) != STATE_BUFFER) {
-    switch (state) {
-      case STATE_SEQUENCE:
-        /* might set nb fbuf, convert format, stride */
-        /* might set fbufs */
-        _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_BITRATE,   info->sequence->byte_rate * 8);
-        _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_WIDTH,     info->sequence->picture_width);
-        _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_HEIGHT,    info->sequence->picture_height);
-        _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION,  info->sequence->frame_period / 300);
-        if (this->force_aspect) info->sequence->pixel_width = this->force_aspect;
-        switch (info->sequence->pixel_width) {
-	case 3:
-	  this->ratio = 16.0 / 9.0;
-	  break;
-	case 4:
-	  this->ratio = 2.11;
-	  break;
-	case 2:
-	  this->ratio = 4.0 / 3.0;
-	  break;
-	case 1:
-	default:
-	  this->ratio = (double)info->sequence->picture_width/(double)info->sequence->picture_height;
-	  break;
-        }
-        _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_RATIO, (int)(10000*this->ratio));
-
-        if (info->sequence->flags & SEQ_FLAG_MPEG2) {
-          _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "MPEG 2 (libmpeg2new)");
-        } else {
-          _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "MPEG 1 (libmpeg2new)");
-        }
-
-        break;
-      case STATE_PICTURE:
-        /* might skip */
-        /* might set fbuf */
-        if (info->current_picture->nb_fields == 1) {
-          picture_structure = info->current_picture->flags & PIC_FLAG_TOP_FIELD_FIRST ? VO_TOP_FIELD : VO_BOTTOM_FIELD;
-        } else {
-          picture_structure = VO_BOTH_FIELDS;
-        }
-          
-        img = this->stream->video_out->get_frame (this->stream->video_out,
-                                              info->sequence->picture_width,
-                                              info->sequence->picture_height,
-                                              this->ratio,
-                                              XINE_IMGFMT_YV12,
-                                              picture_structure);
-        this->frame_number++;
-#ifdef LOG_FRAME_COUNTER
-        printf("libmpeg2:frame_number=%d\n",this->frame_number);
-#endif
-        img->top_field_first = info->current_picture->flags & PIC_FLAG_TOP_FIELD_FIRST ? 1 : 0;
-        img->repeat_first_field = (info->current_picture->nb_fields > 2) ? 1 : 0;
-        img->duration=info->sequence->frame_period / 300;
-        if( ((this->rff_pattern & 0xff) == 0xaa ||
-             (this->rff_pattern & 0xff) == 0x55) ) {
-          /* special case for ntsc 3:2 pulldown */
-            img->duration += img->duration/4;
-        } else {
-          if( img->repeat_first_field ) {
-            img->duration = (img->duration * info->current_picture->nb_fields) / 2; 
-          }
-        }
-
-        if ((info->current_picture->flags & 7) == 1) {
-          img->pts=buf_element->pts; /* If an I frame, use PTS */
-        } else {
-          img->pts=0;
-        }
-
- 
-#ifdef LOG_FRAME_ALLOC_FREE
-        printf ("libmpeg2:decode_data:get_frame xine=%p (id=%d)\n", img,img->id);
-#endif
-        if (this->img_state[img->id].id != 0) {
-          printf ("libmpeg2:decode_data:get_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id].id);
-          _x_abort();
-        }
-
-        this->img_state[img->id].id = 1;
-        this->img_state[img->id].img = img;
-
-        mpeg2_set_buf (this->mpeg2dec, img->base, img);
-        break;
-      case STATE_SLICE:
-      case STATE_END:
-#if 0
-    printf("libmpeg2:decode_data:current_fbuf=");
-    mpeg2_video_print_fbuf(info->current_fbuf);
-    printf("libmpeg2:decode_data:display_fbuf=");
-    mpeg2_video_print_fbuf(info->display_fbuf);
-    printf("libmpeg2:decode_data:discard_fbuf=");
-    mpeg2_video_print_fbuf(info->discard_fbuf);
-#endif
-        /* draw current picture */
-        /* might free frame buffer */
-        if (info->display_fbuf && info->display_fbuf->id) {
-          img = (vo_frame_t *) info->display_fbuf->id;
-          /* this should be used to detect any special rff pattern */
-          this->rff_pattern = this->rff_pattern << 1;
-          this->rff_pattern |= img->repeat_first_field;
-
-#ifdef LOG_FRAME_ALLOC_FREE
-          printf ("libmpeg2:decode_data:draw_frame xine=%p, fbuf=%p, id=%d \n", img, info->display_fbuf, img->id);
-#endif
-          if (this->img_state[img->id].id != 1) {
-            printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id].id);
-            _x_abort();
-          }
-          if (this->img_state[img->id].id == 1) {
-            frame_skipping = img->draw (img, this->stream);
-            /* FIXME: Handle skipping */
-            this->img_state[img->id].id = 2;
-          }
-
-        }
-        if (info->discard_fbuf && !info->discard_fbuf->id) {
-          printf ("libmpeg2:decode_data:BAD free_frame discard: xine=%p, fbuf=%p\n", info->discard_fbuf->id, info->discard_fbuf);
-          //_x_abort();
-        }
-        if (info->discard_fbuf && info->discard_fbuf->id) {
-          img = (vo_frame_t *) info->discard_fbuf->id;
-#ifdef LOG_FRAME_ALLOC_FREE
-          printf ("libmpeg2:decode_data:free_frame xine=%p, fbuf=%p,id=%d\n", img, info->discard_fbuf, img->id);
-#endif
-          if (this->img_state[img->id].id != 2) {
-            printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id].id);
-            _x_abort();
-          }
-          if (this->img_state[img->id].id == 2) {
-            img->free(img);
-            this->img_state[img->id].id = 0;
-          }
-        }
-#ifdef LOG_FRAME_ALLOC_FREE
-        mpeg2_video_print_bad_state(this->img_state);
-#endif
-        break;
-      case STATE_GOP:
-        break;
-      default:
-	printf("libmpeg2new: STATE unknown %d\n",state);
-        break;
-   }
-
- }
-#ifdef LOG_ENTRY
-  printf ("libmpeg2: decode_data: exit\n");
-#endif
-
-}
-
-static void mpeg2_video_flush (video_decoder_t *this_gen) {
-  mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen;
-
-#ifdef LOG_ENTRY
-  printf ("libmpeg2: flush\n");
-#endif
-
-/*  mpeg2_flush (&this->mpeg2); */
-}
-
-static void mpeg2_video_reset (video_decoder_t *this_gen) {
-  mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen;
-  int32_t state;
-  const mpeg2_info_t * info;
-  vo_frame_t * img;
-  int32_t frame_skipping;
-
-#ifdef LOG_ENTRY
-  printf ("libmpeg2: reset\n");
-#endif
-  mpeg2_reset (this->mpeg2dec, 1); /* 1 for full reset */
-  mpeg2_video_free_all(this->img_state);
-
-
-#if 0  /* This bit of code does not work yet. */
-  info = mpeg2_info (this->mpeg2dec);
-  state = mpeg2_reset (this->mpeg2dec);
-  printf("reset state1:%d\n",state);
-  if (info->display_fbuf && info->display_fbuf->id) {
-    img = (vo_frame_t *) info->display_fbuf->id;
-
-    if (this->img_state[img->id] != 1) {
-      printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]);
-      _x_abort();
-    }
-    if (this->img_state[img->id] == 1) {
-      frame_skipping = img->draw (img, this->stream);
-      /* FIXME: Handle skipping */
-      this->img_state[img->id] = 2;
-    }
-  }
-
-  if (info->discard_fbuf && !info->discard_fbuf->id) {
-    printf ("libmpeg2:decode_data:BAD free_frame discard_fbuf=%p\n", info->discard_fbuf);
-    _x_abort();
-  }
-  if (info->discard_fbuf && info->discard_fbuf->id) {
-    img = (vo_frame_t *) info->discard_fbuf->id;
-    if (this->img_state[img->id] != 2) {
-      printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]);
-      _x_abort();
-    }
-    if (this->img_state[img->id] == 2) {
-      img->free(img);
-      this->img_state[img->id] = 0;
-    }
-  }
-  state = mpeg2_parse (this->mpeg2dec);
-  printf("reset state2:%d\n",state);
-  if (info->display_fbuf && info->display_fbuf->id) {
-    img = (vo_frame_t *) info->display_fbuf->id;
-
-    if (this->img_state[img->id] != 1) {
-      printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]);
-      _x_abort();
-    }
-    if (this->img_state[img->id] == 1) {
-      frame_skipping = img->draw (img, this->stream);
-      /* FIXME: Handle skipping */
-      this->img_state[img->id] = 2;
-    }
-  }
-
-  if (info->discard_fbuf && !info->discard_fbuf->id) {
-    printf ("libmpeg2:decode_data:BAD free_frame discard_fbuf=%p\n", info->discard_fbuf);
-    _x_abort();
-  }
-  if (info->discard_fbuf && info->discard_fbuf->id) {
-    img = (vo_frame_t *) info->discard_fbuf->id;
-    if (this->img_state[img->id] != 2) {
-      printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]);
-      _x_abort();
-    }
-    if (this->img_state[img->id] == 2) {
-      img->free(img);
-      this->img_state[img->id] = 0;
-    }
-  }
-  state = mpeg2_parse (this->mpeg2dec);
-  printf("reset state3:%d\n",state);
-  if (info->display_fbuf && info->display_fbuf->id) {
-    img = (vo_frame_t *) info->display_fbuf->id;
-
-    if (this->img_state[img->id] != 1) {
-      printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]);
-      _x_abort();
-    }
-    if (this->img_state[img->id] == 1) {
-      frame_skipping = img->draw (img, this->stream);
-      /* FIXME: Handle skipping */
-      this->img_state[img->id] = 2;
-    }
-  }
-
-  if (info->discard_fbuf && !info->discard_fbuf->id) {
-    printf ("libmpeg2:decode_data:BAD free_frame discard_fbuf=%p\n", info->discard_fbuf);
-    _x_abort();
-  }
-  if (info->discard_fbuf && info->discard_fbuf->id) {
-    img = (vo_frame_t *) info->discard_fbuf->id;
-    if (this->img_state[img->id] != 2) {
-      printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]);
-      _x_abort();
-    }
-    if (this->img_state[img->id] == 2) {
-      img->free(img);
-      this->img_state[img->id] = 0;
-    }
-  }
-#endif
-
-}
-
-static void mpeg2_video_discontinuity (video_decoder_t *this_gen) {
-  mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen;
-
-#ifdef LOG_ENTRY
-  printf ("libmpeg2: dicontinuity\n");
-#endif
-/*  mpeg2_discontinuity (&this->mpeg2dec); */
-}
-
-static void mpeg2_video_dispose (video_decoder_t *this_gen) {
-
-  mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen;
-
-#ifdef LOG_ENTRY
-  printf ("libmpeg2: close\n");
-#endif
-
-  mpeg2_close (this->mpeg2dec);
-
-  this->stream->video_out->close(this->stream->video_out, this->stream);
-
-  free (this);
-}
-
-static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) {
-  mpeg2_video_decoder_t *this ;
-  int32_t n;
-
-  this = (mpeg2_video_decoder_t *) calloc(1, sizeof(mpeg2_video_decoder_t));
-
-  this->video_decoder.decode_data         = mpeg2_video_decode_data;
-  this->video_decoder.flush               = mpeg2_video_flush;
-  this->video_decoder.reset               = mpeg2_video_reset;
-  this->video_decoder.discontinuity       = mpeg2_video_discontinuity;
-  this->video_decoder.dispose             = mpeg2_video_dispose;
-  this->stream                            = stream;
-  this->class                             = (mpeg2_class_t *) class_gen;
-  this->frame_number=0;
-  this->rff_pattern=0;
-
-  this->mpeg2dec = mpeg2_init ();
-  mpeg2_custom_fbuf (this->mpeg2dec, 1);  /* <- Force libmpeg2 to use xine frame buffers. */
-  (stream->video_out->open) (stream->video_out, stream);
-  this->force_aspect = this->force_pan_scan = 0;
-  for(n=0;n<30;n++) this->img_state[n].id=0;
-
-  return &this->video_decoder;
-}
-
-/*
- * mpeg2 plugin class
- */
-static void *init_plugin (xine_t *xine, void *data) {
-
-  mpeg2_class_t *this;
-
-  this = (mpeg2_class_t *) calloc(1, sizeof(mpeg2_class_t));
-
-  this->decoder_class.open_plugin     = open_plugin;
-  this->decoder_class.identifier      = "mpeg2new";
-  this->decoder_class.description     = N_("mpeg2 based video decoder plugin");
-  this->decoder_class.dispose         = default_video_decoder_class_dispose;
-
-  return this;
-}
-/*
- * exported plugin catalog entry
- */
-
-static uint32_t supported_types[] = { BUF_VIDEO_MPEG, 0 };
-
-static decoder_info_t dec_info_mpeg2 = {
-  supported_types,     /* supported types */
-  6                    /* priority        */
-};
-
-plugin_info_t xine_plugin_info[] = {
-  /* type, API, "name", version, special_info, init_function */  
-  { PLUGIN_VIDEO_DECODER, 19, "mpeg2new", XINE_VERSION_CODE, &dec_info_mpeg2, init_plugin },
-  { PLUGIN_NONE, 0, "", 0, NULL, NULL }
-};
diff --git a/src/video_dec/libmpeg2new/Makefile.am b/src/video_dec/libmpeg2new/Makefile.am
new file mode 100644
index 000000000..8c248fdcb
--- /dev/null
+++ b/src/video_dec/libmpeg2new/Makefile.am
@@ -0,0 +1,12 @@
+include $(top_srcdir)/misc/Makefile.common
+
+AM_CFLAGS  = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG)
+AM_LDFLAGS = $(xineplug_ldflags)
+
+SUBDIRS = libmpeg2
+
+xineplug_LTLIBRARIES = xineplug_decode_mpeg2.la
+
+xineplug_decode_mpeg2_la_SOURCES = xine_mpeg2_decoder.c
+xineplug_decode_mpeg2_la_LIBADD = $(XINE_LIB) ./libmpeg2/libmpeg2.la 
+xineplug_decode_mpeg2_la_CFLAGS = $(AM_CFLAGS) $(MLIB_CFLAGS)
diff --git a/src/video_dec/libmpeg2new/include/Makefile.am b/src/video_dec/libmpeg2new/include/Makefile.am
new file mode 100644
index 000000000..302d01cb1
--- /dev/null
+++ b/src/video_dec/libmpeg2new/include/Makefile.am
@@ -0,0 +1,3 @@
+pkginclude_HEADERS = mpeg2.h mpeg2convert.h
+
+EXTRA_DIST = video_out.h mmx.h alpha_asm.h vis.h attributes.h tendra.h
diff --git a/src/video_dec/libmpeg2new/include/alpha_asm.h b/src/video_dec/libmpeg2new/include/alpha_asm.h
new file mode 100644
index 000000000..bf1081f24
--- /dev/null
+++ b/src/video_dec/libmpeg2new/include/alpha_asm.h
@@ -0,0 +1,181 @@
+/*
+ * Alpha assembly macros
+ * Copyright (c) 2002-2003 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307	 USA
+ */
+
+#ifndef ALPHA_ASM_H
+#define ALPHA_ASM_H
+
+#include <inttypes.h>
+
+#if defined __GNUC__
+# define GNUC_PREREQ(maj, min) \
+        ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+# define GNUC_PREREQ(maj, min) 0
+#endif
+
+#define AMASK_BWX (1 << 0)
+#define AMASK_FIX (1 << 1)
+#define AMASK_CIX (1 << 2)
+#define AMASK_MVI (1 << 8)
+
+#ifdef __alpha_bwx__
+# define HAVE_BWX() 1
+#else
+# define HAVE_BWX() (amask(AMASK_BWX) == 0)
+#endif
+#ifdef __alpha_fix__
+# define HAVE_FIX() 1
+#else
+# define HAVE_FIX() (amask(AMASK_FIX) == 0)
+#endif
+#ifdef __alpha_max__
+# define HAVE_MVI() 1
+#else
+# define HAVE_MVI() (amask(AMASK_MVI) == 0)
+#endif
+#ifdef __alpha_cix__
+# define HAVE_CIX() 1
+#else
+# define HAVE_CIX() (amask(AMASK_CIX) == 0)
+#endif
+
+inline static uint64_t BYTE_VEC(uint64_t x)
+{
+    x |= x <<  8;
+    x |= x << 16;
+    x |= x << 32;
+    return x;
+}
+inline static uint64_t WORD_VEC(uint64_t x)
+{
+    x |= x << 16;
+    x |= x << 32;
+    return x;
+}
+
+#define ldq(p) (*(const uint64_t *) (p))
+#define ldl(p) (*(const int32_t *) (p))
+#define stl(l, p) do { *(uint32_t *) (p) = (l); } while (0)
+#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
+#define sextw(x) ((int16_t) (x))
+
+#ifdef __GNUC__
+struct unaligned_long { uint64_t l; } __attribute__((packed));
+#define ldq_u(p)     (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
+#define uldq(a)	     (((const struct unaligned_long *) (a))->l)
+
+#if GNUC_PREREQ(3,3)
+#define prefetch(p)     __builtin_prefetch((p), 0, 1)
+#define prefetch_en(p)  __builtin_prefetch((p), 0, 0)
+#define prefetch_m(p)   __builtin_prefetch((p), 1, 1)
+#define prefetch_men(p) __builtin_prefetch((p), 1, 0)
+#define cmpbge	__builtin_alpha_cmpbge
+/* Avoid warnings.  */
+#define extql(a, b)	__builtin_alpha_extql(a, (uint64_t) (b))
+#define extwl(a, b)	__builtin_alpha_extwl(a, (uint64_t) (b))
+#define extqh(a, b)	__builtin_alpha_extqh(a, (uint64_t) (b))
+#define zap	__builtin_alpha_zap
+#define zapnot	__builtin_alpha_zapnot
+#define amask	__builtin_alpha_amask
+#define implver	__builtin_alpha_implver
+#define rpcc	__builtin_alpha_rpcc
+#else
+#define prefetch(p)     asm volatile("ldl $31,%0"  : : "m"(*(const char *) (p)) : "memory")
+#define prefetch_en(p)  asm volatile("ldq $31,%0"  : : "m"(*(const char *) (p)) : "memory")
+#define prefetch_m(p)   asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory")
+#define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory")
+#define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define extql(a, b)  ({ uint64_t __r; asm ("extql   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define extwl(a, b)  ({ uint64_t __r; asm ("extwl   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define extqh(a, b)  ({ uint64_t __r; asm ("extqh   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define zap(a, b)    ({ uint64_t __r; asm ("zap     %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define amask(a)     ({ uint64_t __r; asm ("amask   %1,%0"      : "=r" (__r) : "rI"  (a));	     __r; })
+#define implver()    ({ uint64_t __r; asm ("implver %0"         : "=r" (__r));			     __r; })
+#define rpcc()	     ({ uint64_t __r; asm volatile ("rpcc %0"   : "=r" (__r));			     __r; })
+#endif
+#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory")
+
+#if GNUC_PREREQ(3,3) && defined(__alpha_max__)
+#define minub8	__builtin_alpha_minub8
+#define minsb8	__builtin_alpha_minsb8
+#define minuw4	__builtin_alpha_minuw4
+#define minsw4	__builtin_alpha_minsw4
+#define maxub8	__builtin_alpha_maxub8
+#define maxsb8	__builtin_alpha_maxsb8
+#define maxuw4	__builtin_alpha_maxuw4	
+#define maxsw4	__builtin_alpha_maxsw4
+#define perr	__builtin_alpha_perr
+#define pklb	__builtin_alpha_pklb
+#define pkwb	__builtin_alpha_pkwb
+#define unpkbl	__builtin_alpha_unpkbl
+#define unpkbw	__builtin_alpha_unpkbw
+#else
+#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define minuw4(a, b) ({ uint64_t __r; asm (".arch ev6; minuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define minsw4(a, b) ({ uint64_t __r; asm (".arch ev6; minsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxub8(a, b) ({ uint64_t __r; asm (".arch ev6; maxub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxsb8(a, b) ({ uint64_t __r; asm (".arch ev6; maxsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define perr(a, b)   ({ uint64_t __r; asm (".arch ev6; perr    %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
+#define pklb(a)      ({ uint64_t __r; asm (".arch ev6; pklb    %r1,%0"     : "=r" (__r) : "rJ"  (a));	     __r; })
+#define pkwb(a)      ({ uint64_t __r; asm (".arch ev6; pkwb    %r1,%0"     : "=r" (__r) : "rJ"  (a));	     __r; })
+#define unpkbl(a)    ({ uint64_t __r; asm (".arch ev6; unpkbl  %r1,%0"     : "=r" (__r) : "rJ"  (a));	     __r; })
+#define unpkbw(a)    ({ uint64_t __r; asm (".arch ev6; unpkbw  %r1,%0"     : "=r" (__r) : "rJ"  (a));	     __r; })
+#endif
+
+#elif defined(__DECC)		/* Digital/Compaq/hp "ccc" compiler */
+
+#include <c_asm.h>
+#define ldq_u(a)     asm ("ldq_u   %v0,0(%a0)", a)
+#define uldq(a)	     (*(const __unaligned uint64_t *) (a))
+#define cmpbge(a, b) asm ("cmpbge  %a0,%a1,%v0", a, b)
+#define extql(a, b)  asm ("extql   %a0,%a1,%v0", a, b)
+#define extwl(a, b)  asm ("extwl   %a0,%a1,%v0", a, b)
+#define extqh(a, b)  asm ("extqh   %a0,%a1,%v0", a, b)
+#define zap(a, b)    asm ("zap     %a0,%a1,%v0", a, b)
+#define zapnot(a, b) asm ("zapnot  %a0,%a1,%v0", a, b)
+#define amask(a)     asm ("amask   %a0,%v0", a)
+#define implver()    asm ("implver %v0")
+#define rpcc()	     asm ("rpcc	   %v0")
+#define minub8(a, b) asm ("minub8  %a0,%a1,%v0", a, b)
+#define minsb8(a, b) asm ("minsb8  %a0,%a1,%v0", a, b)
+#define minuw4(a, b) asm ("minuw4  %a0,%a1,%v0", a, b)
+#define minsw4(a, b) asm ("minsw4  %a0,%a1,%v0", a, b)
+#define maxub8(a, b) asm ("maxub8  %a0,%a1,%v0", a, b)
+#define maxsb8(a, b) asm ("maxsb8  %a0,%a1,%v0", a, b)
+#define maxuw4(a, b) asm ("maxuw4  %a0,%a1,%v0", a, b)
+#define maxsw4(a, b) asm ("maxsw4  %a0,%a1,%v0", a, b)
+#define perr(a, b)   asm ("perr    %a0,%a1,%v0", a, b)
+#define pklb(a)      asm ("pklb    %a0,%v0", a)
+#define pkwb(a)      asm ("pkwb    %a0,%v0", a)
+#define unpkbl(a)    asm ("unpkbl  %a0,%v0", a)
+#define unpkbw(a)    asm ("unpkbw  %a0,%v0", a)
+#define wh64(a)      asm ("wh64    %a0", a)
+
+#else
+#error "Unknown compiler!"
+#endif
+
+#endif /* ALPHA_ASM_H */
diff --git a/src/video_dec/libmpeg2new/include/attributes.h b/src/video_dec/libmpeg2new/include/attributes.h
new file mode 100644
index 000000000..eefbc0dd1
--- /dev/null
+++ b/src/video_dec/libmpeg2new/include/attributes.h
@@ -0,0 +1,37 @@
+/*
+ * attributes.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* use gcc attribs to align critical data structures */
+#ifdef ATTRIBUTE_ALIGNED_MAX
+#define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((ATTRIBUTE_ALIGNED_MAX < align) ? ATTRIBUTE_ALIGNED_MAX : align)))
+#else
+#define ATTR_ALIGN(align)
+#endif
+
+#ifdef HAVE_BUILTIN_EXPECT
+#define likely(x) __builtin_expect ((x) != 0, 1)
+#define unlikely(x) __builtin_expect ((x) != 0, 0)
+#else
+#define likely(x) (x)
+#define unlikely(x) (x)
+#endif
diff --git a/src/video_dec/libmpeg2new/include/mmx.h b/src/video_dec/libmpeg2new/include/mmx.h
new file mode 100644
index 000000000..08b4d4776
--- /dev/null
+++ b/src/video_dec/libmpeg2new/include/mmx.h
@@ -0,0 +1,263 @@
+/*
+ * mmx.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * The type of an value that fits in an MMX register (note that long
+ * long constant values MUST be suffixed by LL and unsigned long long
+ * values by ULL, lest they be truncated by the compiler)
+ */
+
+typedef	union {
+	long long		q;	/* Quadword (64-bit) value */
+	unsigned long long	uq;	/* Unsigned Quadword */
+	int			d[2];	/* 2 Doubleword (32-bit) values */
+	unsigned int		ud[2];	/* 2 Unsigned Doubleword */
+	short			w[4];	/* 4 Word (16-bit) values */
+	unsigned short		uw[4];	/* 4 Unsigned Word */
+	char			b[8];	/* 8 Byte (8-bit) values */
+	unsigned char		ub[8];	/* 8 Unsigned Byte */
+	float			s[2];	/* Single-precision (32-bit) value */
+} ATTR_ALIGN(8) mmx_t;	/* On an 8-byte (64-bit) boundary */
+
+
+#define	mmx_i2r(op,imm,reg) \
+	__asm__ __volatile__ (#op " %0, %%" #reg \
+			      : /* nothing */ \
+			      : "i" (imm) )
+
+#define	mmx_m2r(op,mem,reg) \
+	__asm__ __volatile__ (#op " %0, %%" #reg \
+			      : /* nothing */ \
+			      : "m" (mem))
+
+#define	mmx_r2m(op,reg,mem) \
+	__asm__ __volatile__ (#op " %%" #reg ", %0" \
+			      : "=m" (mem) \
+			      : /* nothing */ )
+
+#define	mmx_r2r(op,regs,regd) \
+	__asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+
+#define	emms() __asm__ __volatile__ ("emms")
+
+#define	movd_m2r(var,reg)	mmx_m2r (movd, var, reg)
+#define	movd_r2m(reg,var)	mmx_r2m (movd, reg, var)
+#define	movd_v2r(var,reg)	__asm__ __volatile__ ("movd %0, %%" #reg \
+						      : /* nothing */ \
+						      : "rm" (var))
+#define	movd_r2v(reg,var)	__asm__ __volatile__ ("movd %%" #reg ", %0" \
+						      : "=rm" (var) \
+						      : /* nothing */ )
+
+#define	movq_m2r(var,reg)	mmx_m2r (movq, var, reg)
+#define	movq_r2m(reg,var)	mmx_r2m (movq, reg, var)
+#define	movq_r2r(regs,regd)	mmx_r2r (movq, regs, regd)
+
+#define	packssdw_m2r(var,reg)	mmx_m2r (packssdw, var, reg)
+#define	packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
+#define	packsswb_m2r(var,reg)	mmx_m2r (packsswb, var, reg)
+#define	packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
+
+#define	packuswb_m2r(var,reg)	mmx_m2r (packuswb, var, reg)
+#define	packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
+
+#define	paddb_m2r(var,reg)	mmx_m2r (paddb, var, reg)
+#define	paddb_r2r(regs,regd)	mmx_r2r (paddb, regs, regd)
+#define	paddd_m2r(var,reg)	mmx_m2r (paddd, var, reg)
+#define	paddd_r2r(regs,regd)	mmx_r2r (paddd, regs, regd)
+#define	paddw_m2r(var,reg)	mmx_m2r (paddw, var, reg)
+#define	paddw_r2r(regs,regd)	mmx_r2r (paddw, regs, regd)
+
+#define	paddsb_m2r(var,reg)	mmx_m2r (paddsb, var, reg)
+#define	paddsb_r2r(regs,regd)	mmx_r2r (paddsb, regs, regd)
+#define	paddsw_m2r(var,reg)	mmx_m2r (paddsw, var, reg)
+#define	paddsw_r2r(regs,regd)	mmx_r2r (paddsw, regs, regd)
+
+#define	paddusb_m2r(var,reg)	mmx_m2r (paddusb, var, reg)
+#define	paddusb_r2r(regs,regd)	mmx_r2r (paddusb, regs, regd)
+#define	paddusw_m2r(var,reg)	mmx_m2r (paddusw, var, reg)
+#define	paddusw_r2r(regs,regd)	mmx_r2r (paddusw, regs, regd)
+
+#define	pand_m2r(var,reg)	mmx_m2r (pand, var, reg)
+#define	pand_r2r(regs,regd)	mmx_r2r (pand, regs, regd)
+
+#define	pandn_m2r(var,reg)	mmx_m2r (pandn, var, reg)
+#define	pandn_r2r(regs,regd)	mmx_r2r (pandn, regs, regd)
+
+#define	pcmpeqb_m2r(var,reg)	mmx_m2r (pcmpeqb, var, reg)
+#define	pcmpeqb_r2r(regs,regd)	mmx_r2r (pcmpeqb, regs, regd)
+#define	pcmpeqd_m2r(var,reg)	mmx_m2r (pcmpeqd, var, reg)
+#define	pcmpeqd_r2r(regs,regd)	mmx_r2r (pcmpeqd, regs, regd)
+#define	pcmpeqw_m2r(var,reg)	mmx_m2r (pcmpeqw, var, reg)
+#define	pcmpeqw_r2r(regs,regd)	mmx_r2r (pcmpeqw, regs, regd)
+
+#define	pcmpgtb_m2r(var,reg)	mmx_m2r (pcmpgtb, var, reg)
+#define	pcmpgtb_r2r(regs,regd)	mmx_r2r (pcmpgtb, regs, regd)
+#define	pcmpgtd_m2r(var,reg)	mmx_m2r (pcmpgtd, var, reg)
+#define	pcmpgtd_r2r(regs,regd)	mmx_r2r (pcmpgtd, regs, regd)
+#define	pcmpgtw_m2r(var,reg)	mmx_m2r (pcmpgtw, var, reg)
+#define	pcmpgtw_r2r(regs,regd)	mmx_r2r (pcmpgtw, regs, regd)
+
+#define	pmaddwd_m2r(var,reg)	mmx_m2r (pmaddwd, var, reg)
+#define	pmaddwd_r2r(regs,regd)	mmx_r2r (pmaddwd, regs, regd)
+
+#define	pmulhw_m2r(var,reg)	mmx_m2r (pmulhw, var, reg)
+#define	pmulhw_r2r(regs,regd)	mmx_r2r (pmulhw, regs, regd)
+
+#define	pmullw_m2r(var,reg)	mmx_m2r (pmullw, var, reg)
+#define	pmullw_r2r(regs,regd)	mmx_r2r (pmullw, regs, regd)
+
+#define	por_m2r(var,reg)	mmx_m2r (por, var, reg)
+#define	por_r2r(regs,regd)	mmx_r2r (por, regs, regd)
+
+#define	pslld_i2r(imm,reg)	mmx_i2r (pslld, imm, reg)
+#define	pslld_m2r(var,reg)	mmx_m2r (pslld, var, reg)
+#define	pslld_r2r(regs,regd)	mmx_r2r (pslld, regs, regd)
+#define	psllq_i2r(imm,reg)	mmx_i2r (psllq, imm, reg)
+#define	psllq_m2r(var,reg)	mmx_m2r (psllq, var, reg)
+#define	psllq_r2r(regs,regd)	mmx_r2r (psllq, regs, regd)
+#define	psllw_i2r(imm,reg)	mmx_i2r (psllw, imm, reg)
+#define	psllw_m2r(var,reg)	mmx_m2r (psllw, var, reg)
+#define	psllw_r2r(regs,regd)	mmx_r2r (psllw, regs, regd)
+
+#define	psrad_i2r(imm,reg)	mmx_i2r (psrad, imm, reg)
+#define	psrad_m2r(var,reg)	mmx_m2r (psrad, var, reg)
+#define	psrad_r2r(regs,regd)	mmx_r2r (psrad, regs, regd)
+#define	psraw_i2r(imm,reg)	mmx_i2r (psraw, imm, reg)
+#define	psraw_m2r(var,reg)	mmx_m2r (psraw, var, reg)
+#define	psraw_r2r(regs,regd)	mmx_r2r (psraw, regs, regd)
+
+#define	psrld_i2r(imm,reg)	mmx_i2r (psrld, imm, reg)
+#define	psrld_m2r(var,reg)	mmx_m2r (psrld, var, reg)
+#define	psrld_r2r(regs,regd)	mmx_r2r (psrld, regs, regd)
+#define	psrlq_i2r(imm,reg)	mmx_i2r (psrlq, imm, reg)
+#define	psrlq_m2r(var,reg)	mmx_m2r (psrlq, var, reg)
+#define	psrlq_r2r(regs,regd)	mmx_r2r (psrlq, regs, regd)
+#define	psrlw_i2r(imm,reg)	mmx_i2r (psrlw, imm, reg)
+#define	psrlw_m2r(var,reg)	mmx_m2r (psrlw, var, reg)
+#define	psrlw_r2r(regs,regd)	mmx_r2r (psrlw, regs, regd)
+
+#define	psubb_m2r(var,reg)	mmx_m2r (psubb, var, reg)
+#define	psubb_r2r(regs,regd)	mmx_r2r (psubb, regs, regd)
+#define	psubd_m2r(var,reg)	mmx_m2r (psubd, var, reg)
+#define	psubd_r2r(regs,regd)	mmx_r2r (psubd, regs, regd)
+#define	psubw_m2r(var,reg)	mmx_m2r (psubw, var, reg)
+#define	psubw_r2r(regs,regd)	mmx_r2r (psubw, regs, regd)
+
+#define	psubsb_m2r(var,reg)	mmx_m2r (psubsb, var, reg)
+#define	psubsb_r2r(regs,regd)	mmx_r2r (psubsb, regs, regd)
+#define	psubsw_m2r(var,reg)	mmx_m2r (psubsw, var, reg)
+#define	psubsw_r2r(regs,regd)	mmx_r2r (psubsw, regs, regd)
+
+#define	psubusb_m2r(var,reg)	mmx_m2r (psubusb, var, reg)
+#define	psubusb_r2r(regs,regd)	mmx_r2r (psubusb, regs, regd)
+#define	psubusw_m2r(var,reg)	mmx_m2r (psubusw, var, reg)
+#define	psubusw_r2r(regs,regd)	mmx_r2r (psubusw, regs, regd)
+
+#define	punpckhbw_m2r(var,reg)		mmx_m2r (punpckhbw, var, reg)
+#define	punpckhbw_r2r(regs,regd)	mmx_r2r (punpckhbw, regs, regd)
+#define	punpckhdq_m2r(var,reg)		mmx_m2r (punpckhdq, var, reg)
+#define	punpckhdq_r2r(regs,regd)	mmx_r2r (punpckhdq, regs, regd)
+#define	punpckhwd_m2r(var,reg)		mmx_m2r (punpckhwd, var, reg)
+#define	punpckhwd_r2r(regs,regd)	mmx_r2r (punpckhwd, regs, regd)
+
+#define	punpcklbw_m2r(var,reg) 		mmx_m2r (punpcklbw, var, reg)
+#define	punpcklbw_r2r(regs,regd)	mmx_r2r (punpcklbw, regs, regd)
+#define	punpckldq_m2r(var,reg)		mmx_m2r (punpckldq, var, reg)
+#define	punpckldq_r2r(regs,regd)	mmx_r2r (punpckldq, regs, regd)
+#define	punpcklwd_m2r(var,reg)		mmx_m2r (punpcklwd, var, reg)
+#define	punpcklwd_r2r(regs,regd)	mmx_r2r (punpcklwd, regs, regd)
+
+#define	pxor_m2r(var,reg)	mmx_m2r (pxor, var, reg)
+#define	pxor_r2r(regs,regd)	mmx_r2r (pxor, regs, regd)
+
+
+/* 3DNOW extensions */
+
+#define pavgusb_m2r(var,reg)	mmx_m2r (pavgusb, var, reg)
+#define pavgusb_r2r(regs,regd)	mmx_r2r (pavgusb, regs, regd)
+
+
+/* AMD MMX extensions - also available in intel SSE */
+
+
+#define mmx_m2ri(op,mem,reg,imm) \
+	__asm__ __volatile__ (#op " %1, %0, %%" #reg \
+			      : /* nothing */ \
+			      : "m" (mem), "i" (imm))
+
+#define mmx_r2ri(op,regs,regd,imm) \
+	__asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
+			      : /* nothing */ \
+			      : "i" (imm) )
+
+#define	mmx_fetch(mem,hint) \
+	__asm__ __volatile__ ("prefetch" #hint " %0" \
+			      : /* nothing */ \
+			      : "m" (mem))
+
+
+#define	maskmovq(regs,maskreg)		mmx_r2ri (maskmovq, regs, maskreg)
+
+#define	movntq_r2m(mmreg,var)		mmx_r2m (movntq, mmreg, var)
+
+#define	pavgb_m2r(var,reg)		mmx_m2r (pavgb, var, reg)
+#define	pavgb_r2r(regs,regd)		mmx_r2r (pavgb, regs, regd)
+#define	pavgw_m2r(var,reg)		mmx_m2r (pavgw, var, reg)
+#define	pavgw_r2r(regs,regd)		mmx_r2r (pavgw, regs, regd)
+
+#define	pextrw_r2r(mmreg,reg,imm)	mmx_r2ri (pextrw, mmreg, reg, imm)
+
+#define	pinsrw_r2r(reg,mmreg,imm)	mmx_r2ri (pinsrw, reg, mmreg, imm)
+
+#define	pmaxsw_m2r(var,reg)		mmx_m2r (pmaxsw, var, reg)
+#define	pmaxsw_r2r(regs,regd)		mmx_r2r (pmaxsw, regs, regd)
+
+#define	pmaxub_m2r(var,reg)		mmx_m2r (pmaxub, var, reg)
+#define	pmaxub_r2r(regs,regd)		mmx_r2r (pmaxub, regs, regd)
+
+#define	pminsw_m2r(var,reg)		mmx_m2r (pminsw, var, reg)
+#define	pminsw_r2r(regs,regd)		mmx_r2r (pminsw, regs, regd)
+
+#define	pminub_m2r(var,reg)		mmx_m2r (pminub, var, reg)
+#define	pminub_r2r(regs,regd)		mmx_r2r (pminub, regs, regd)
+
+#define	pmovmskb(mmreg,reg) \
+	__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
+
+#define	pmulhuw_m2r(var,reg)		mmx_m2r (pmulhuw, var, reg)
+#define	pmulhuw_r2r(regs,regd)		mmx_r2r (pmulhuw, regs, regd)
+
+#define	prefetcht0(mem)			mmx_fetch (mem, t0)
+#define	prefetcht1(mem)			mmx_fetch (mem, t1)
+#define	prefetcht2(mem)			mmx_fetch (mem, t2)
+#define	prefetchnta(mem)		mmx_fetch (mem, nta)
+
+#define	psadbw_m2r(var,reg)		mmx_m2r (psadbw, var, reg)
+#define	psadbw_r2r(regs,regd)		mmx_r2r (psadbw, regs, regd)
+
+#define	pshufw_m2r(var,reg,imm)		mmx_m2ri(pshufw, var, reg, imm)
+#define	pshufw_r2r(regs,regd,imm)	mmx_r2ri(pshufw, regs, regd, imm)
+
+#define	sfence() __asm__ __volatile__ ("sfence\n\t")
diff --git a/src/video_dec/libmpeg2new/include/mpeg2.h b/src/video_dec/libmpeg2new/include/mpeg2.h
new file mode 100644
index 000000000..6c1a3805b
--- /dev/null
+++ b/src/video_dec/libmpeg2new/include/mpeg2.h
@@ -0,0 +1,202 @@
+/*
+ * mpeg2.h
+ * Copyright (C) 2000-2004 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef MPEG2_H
+#define MPEG2_H
+
+#define MPEG2_VERSION(a,b,c) (((a)<<16)|((b)<<8)|(c))
+#define MPEG2_RELEASE MPEG2_VERSION (0, 4, 1)	/* 0.4.1 */
+
+#define SEQ_FLAG_MPEG2 1
+#define SEQ_FLAG_CONSTRAINED_PARAMETERS 2
+#define SEQ_FLAG_PROGRESSIVE_SEQUENCE 4
+#define SEQ_FLAG_LOW_DELAY 8
+#define SEQ_FLAG_COLOUR_DESCRIPTION 16
+
+#define SEQ_MASK_VIDEO_FORMAT 0xe0
+#define SEQ_VIDEO_FORMAT_COMPONENT 0
+#define SEQ_VIDEO_FORMAT_PAL 0x20
+#define SEQ_VIDEO_FORMAT_NTSC 0x40
+#define SEQ_VIDEO_FORMAT_SECAM 0x60
+#define SEQ_VIDEO_FORMAT_MAC 0x80
+#define SEQ_VIDEO_FORMAT_UNSPECIFIED 0xa0
+
+typedef struct mpeg2_sequence_s {
+    unsigned int width, height;
+    unsigned int chroma_width, chroma_height;
+    unsigned int byte_rate;
+    unsigned int vbv_buffer_size;
+    uint32_t flags;
+
+    unsigned int picture_width, picture_height;
+    unsigned int display_width, display_height;
+    unsigned int pixel_width, pixel_height;
+    unsigned int frame_period;
+
+    uint8_t profile_level_id;
+    uint8_t colour_primaries;
+    uint8_t transfer_characteristics;
+    uint8_t matrix_coefficients;
+} mpeg2_sequence_t;
+
+#define GOP_FLAG_DROP_FRAME 1
+#define GOP_FLAG_BROKEN_LINK 2
+#define GOP_FLAG_CLOSED_GOP 4
+
+typedef struct mpeg2_gop_s {
+    uint8_t hours;
+    uint8_t minutes;
+    uint8_t seconds;
+    uint8_t pictures;
+    uint32_t flags;
+} mpeg2_gop_t;
+
+#define PIC_MASK_CODING_TYPE 7
+#define PIC_FLAG_CODING_TYPE_I 1
+#define PIC_FLAG_CODING_TYPE_P 2
+#define PIC_FLAG_CODING_TYPE_B 3
+#define PIC_FLAG_CODING_TYPE_D 4
+
+#define PIC_FLAG_TOP_FIELD_FIRST 8
+#define PIC_FLAG_PROGRESSIVE_FRAME 16
+#define PIC_FLAG_COMPOSITE_DISPLAY 32
+#define PIC_FLAG_SKIP 64
+#define PIC_FLAG_TAGS 128
+#define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000
+
+typedef struct mpeg2_picture_s {
+    unsigned int temporal_reference;
+    unsigned int nb_fields;
+    uint32_t tag, tag2;
+    uint32_t flags;
+    struct {
+	int x, y;
+    } display_offset[3];
+} mpeg2_picture_t;
+
+typedef struct mpeg2_fbuf_s {
+    uint8_t * buf[3];
+    void * id;
+} mpeg2_fbuf_t;
+
+typedef struct mpeg2_info_s {
+    const mpeg2_sequence_t * sequence;
+    const mpeg2_gop_t * gop;
+    const mpeg2_picture_t * current_picture;
+    const mpeg2_picture_t * current_picture_2nd;
+    const mpeg2_fbuf_t * current_fbuf;
+    const mpeg2_picture_t * display_picture;
+    const mpeg2_picture_t * display_picture_2nd;
+    const mpeg2_fbuf_t * display_fbuf;
+    const mpeg2_fbuf_t * discard_fbuf;
+    const uint8_t * user_data;
+    unsigned int user_data_len;
+} mpeg2_info_t;
+
+typedef struct mpeg2dec_s mpeg2dec_t;
+typedef struct mpeg2_decoder_s mpeg2_decoder_t;
+
+typedef enum {
+    STATE_BUFFER = 0,
+    STATE_SEQUENCE = 1,
+    STATE_SEQUENCE_REPEATED = 2,
+    STATE_SEQUENCE_MODIFIED = 3,
+    STATE_GOP = 4,
+    STATE_PICTURE = 5,
+    STATE_SLICE_1ST = 6,
+    STATE_PICTURE_2ND = 7,
+    STATE_SLICE = 8,
+    STATE_END = 9,
+    STATE_INVALID = 10,
+    STATE_INVALID_END = 11
+} mpeg2_state_t;
+
+typedef struct mpeg2_convert_init_s {
+    unsigned int id_size;
+    unsigned int buf_size[3];
+    void (* start) (void * id, const mpeg2_fbuf_t * fbuf,
+		    const mpeg2_picture_t * picture, const mpeg2_gop_t * gop);
+    void (* copy) (void * id, uint8_t * const * src, unsigned int v_offset);
+} mpeg2_convert_init_t;
+typedef enum {
+    MPEG2_CONVERT_SET = 0,
+    MPEG2_CONVERT_STRIDE = 1,
+    MPEG2_CONVERT_START = 2
+} mpeg2_convert_stage_t;
+typedef int mpeg2_convert_t (int stage, void * id,
+			     const mpeg2_sequence_t * sequence, int stride,
+			     uint32_t accel, void * arg,
+			     mpeg2_convert_init_t * result);
+int mpeg2_convert (mpeg2dec_t * mpeg2dec, mpeg2_convert_t convert, void * arg);
+int mpeg2_stride (mpeg2dec_t * mpeg2dec, int stride);
+void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id);
+void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf);
+
+#define MPEG2_ACCEL_X86_MMX 1
+#define MPEG2_ACCEL_X86_3DNOW 2
+#define MPEG2_ACCEL_X86_MMXEXT 4
+#define MPEG2_ACCEL_X86_SSE2 8
+#define MPEG2_ACCEL_X86_SSE3 16
+#define MPEG2_ACCEL_PPC_ALTIVEC 1
+#define MPEG2_ACCEL_ALPHA 1
+#define MPEG2_ACCEL_ALPHA_MVI 2
+#define MPEG2_ACCEL_SPARC_VIS 1
+#define MPEG2_ACCEL_SPARC_VIS2 2
+#define MPEG2_ACCEL_DETECT 0x80000000
+
+uint32_t mpeg2_accel (uint32_t accel);
+mpeg2dec_t * mpeg2_init (void);
+const mpeg2_info_t * mpeg2_info (mpeg2dec_t * mpeg2dec);
+void mpeg2_close (mpeg2dec_t * mpeg2dec);
+
+void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end);
+int mpeg2_getpos (mpeg2dec_t * mpeg2dec);
+mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec);
+
+void mpeg2_reset (mpeg2dec_t * mpeg2dec, int full_reset);
+void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip);
+void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end);
+
+void mpeg2_tag_picture (mpeg2dec_t * mpeg2dec, uint32_t tag, uint32_t tag2);
+
+void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3],
+		      uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]);
+void mpeg2_slice (mpeg2_decoder_t * decoder, int code, const uint8_t * buffer);
+int mpeg2_guess_aspect (const mpeg2_sequence_t * sequence,
+			unsigned int * pixel_width,
+			unsigned int * pixel_height);
+
+typedef enum {
+    MPEG2_ALLOC_MPEG2DEC = 0,
+    MPEG2_ALLOC_CHUNK = 1,
+    MPEG2_ALLOC_YUV = 2,
+    MPEG2_ALLOC_CONVERT_ID = 3,
+    MPEG2_ALLOC_CONVERTED = 4
+} mpeg2_alloc_t;
+
+void * mpeg2_malloc (unsigned size, mpeg2_alloc_t reason);
+void mpeg2_free (void * buf);
+void mpeg2_malloc_hooks (void * malloc (unsigned, mpeg2_alloc_t),
+			 int free (void *));
+
+#endif /* MPEG2_H */
diff --git a/src/video_dec/libmpeg2new/include/mpeg2convert.h b/src/video_dec/libmpeg2new/include/mpeg2convert.h
new file mode 100644
index 000000000..aac5d1991
--- /dev/null
+++ b/src/video_dec/libmpeg2new/include/mpeg2convert.h
@@ -0,0 +1,48 @@
+/*
+ * mpeg2convert.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef MPEG2CONVERT_H
+#define MPEG2CONVERT_H
+
+mpeg2_convert_t mpeg2convert_rgb32;
+mpeg2_convert_t mpeg2convert_rgb24;
+mpeg2_convert_t mpeg2convert_rgb16;
+mpeg2_convert_t mpeg2convert_rgb15;
+mpeg2_convert_t mpeg2convert_rgb8;
+mpeg2_convert_t mpeg2convert_bgr32;
+mpeg2_convert_t mpeg2convert_bgr24;
+mpeg2_convert_t mpeg2convert_bgr16;
+mpeg2_convert_t mpeg2convert_bgr15;
+mpeg2_convert_t mpeg2convert_bgr8;
+
+typedef enum {
+    MPEG2CONVERT_RGB = 0,
+    MPEG2CONVERT_BGR = 1
+} mpeg2convert_rgb_order_t;
+
+mpeg2_convert_t * mpeg2convert_rgb (mpeg2convert_rgb_order_t order,
+				    unsigned int bpp);
+
+mpeg2_convert_t mpeg2convert_uyvy;
+
+#endif /* MPEG2CONVERT_H */
diff --git a/src/video_dec/libmpeg2new/include/sse.h b/src/video_dec/libmpeg2new/include/sse.h
new file mode 100644
index 000000000..4bd853f8b
--- /dev/null
+++ b/src/video_dec/libmpeg2new/include/sse.h
@@ -0,0 +1,256 @@
+/*
+ * sse.h
+ * Copyright (C) 1999-2003 R. Fisher
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+typedef	union {
+	float			sf[4];	/* Single-precision (32-bit) value */
+} ATTR_ALIGN(16) sse_t;	/* On a 16 byte (128-bit) boundary */
+
+
+#define	sse_i2r(op, imm, reg) \
+	__asm__ __volatile__ (#op " %0, %%" #reg \
+			      : /* nothing */ \
+			      : "X" (imm) )
+
+#define	sse_m2r(op, mem, reg) \
+	__asm__ __volatile__ (#op " %0, %%" #reg \
+			      : /* nothing */ \
+			      : "X" (mem))
+
+#define	sse_r2m(op, reg, mem) \
+	__asm__ __volatile__ (#op " %%" #reg ", %0" \
+			      : "=X" (mem) \
+			      : /* nothing */ )
+
+#define	sse_r2r(op, regs, regd) \
+	__asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+#define	sse_r2ri(op, regs, regd, imm) \
+	__asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
+			      : /* nothing */ \
+			      : "X" (imm) )
+
+#define	sse_m2ri(op, mem, reg, subop) \
+	__asm__ __volatile__ (#op " %0, %%" #reg ", " #subop \
+			      : /* nothing */ \
+			      : "X" (mem))
+
+
+#define	movaps_m2r(var, reg)	sse_m2r(movaps, var, reg)
+#define	movaps_r2m(reg, var)	sse_r2m(movaps, reg, var)
+#define	movaps_r2r(regs, regd)	sse_r2r(movaps, regs, regd)
+
+#define	movntps_r2m(xmmreg, var)	sse_r2m(movntps, xmmreg, var)
+
+#define	movups_m2r(var, reg)	sse_m2r(movups, var, reg)
+#define	movups_r2m(reg, var)	sse_r2m(movups, reg, var)
+#define	movups_r2r(regs, regd)	sse_r2r(movups, regs, regd)
+
+#define	movhlps_r2r(regs, regd)	sse_r2r(movhlps, regs, regd)
+
+#define	movlhps_r2r(regs, regd)	sse_r2r(movlhps, regs, regd)
+
+#define	movhps_m2r(var, reg)	sse_m2r(movhps, var, reg)
+#define	movhps_r2m(reg, var)	sse_r2m(movhps, reg, var)
+
+#define	movlps_m2r(var, reg)	sse_m2r(movlps, var, reg)
+#define	movlps_r2m(reg, var)	sse_r2m(movlps, reg, var)
+
+#define	movss_m2r(var, reg)	sse_m2r(movss, var, reg)
+#define	movss_r2m(reg, var)	sse_r2m(movss, reg, var)
+#define	movss_r2r(regs, regd)	sse_r2r(movss, regs, regd)
+
+#define	shufps_m2r(var, reg, index)	sse_m2ri(shufps, var, reg, index)
+#define	shufps_r2r(regs, regd, index)	sse_r2ri(shufps, regs, regd, index)
+
+#define	cvtpi2ps_m2r(var, xmmreg)	sse_m2r(cvtpi2ps, var, xmmreg)
+#define	cvtpi2ps_r2r(mmreg, xmmreg)	sse_r2r(cvtpi2ps, mmreg, xmmreg)
+
+#define	cvtps2pi_m2r(var, mmreg)	sse_m2r(cvtps2pi, var, mmreg)
+#define	cvtps2pi_r2r(xmmreg, mmreg)	sse_r2r(cvtps2pi, mmreg, xmmreg)
+
+#define	cvttps2pi_m2r(var, mmreg)	sse_m2r(cvttps2pi, var, mmreg)
+#define	cvttps2pi_r2r(xmmreg, mmreg)	sse_r2r(cvttps2pi, mmreg, xmmreg)
+
+#define	cvtsi2ss_m2r(var, xmmreg)	sse_m2r(cvtsi2ss, var, xmmreg)
+#define	cvtsi2ss_r2r(reg, xmmreg)	sse_r2r(cvtsi2ss, reg, xmmreg)
+
+#define	cvtss2si_m2r(var, reg)		sse_m2r(cvtss2si, var, reg)
+#define	cvtss2si_r2r(xmmreg, reg)	sse_r2r(cvtss2si, xmmreg, reg)
+
+#define	cvttss2si_m2r(var, reg)		sse_m2r(cvtss2si, var, reg)
+#define	cvttss2si_r2r(xmmreg, reg)	sse_r2r(cvtss2si, xmmreg, reg)
+
+#define	movmskps(xmmreg, reg) \
+	__asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg)
+
+#define	addps_m2r(var, reg)		sse_m2r(addps, var, reg)
+#define	addps_r2r(regs, regd)		sse_r2r(addps, regs, regd)
+
+#define	addss_m2r(var, reg)		sse_m2r(addss, var, reg)
+#define	addss_r2r(regs, regd)		sse_r2r(addss, regs, regd)
+
+#define	subps_m2r(var, reg)		sse_m2r(subps, var, reg)
+#define	subps_r2r(regs, regd)		sse_r2r(subps, regs, regd)
+
+#define	subss_m2r(var, reg)		sse_m2r(subss, var, reg)
+#define	subss_r2r(regs, regd)		sse_r2r(subss, regs, regd)
+
+#define	mulps_m2r(var, reg)		sse_m2r(mulps, var, reg)
+#define	mulps_r2r(regs, regd)		sse_r2r(mulps, regs, regd)
+
+#define	mulss_m2r(var, reg)		sse_m2r(mulss, var, reg)
+#define	mulss_r2r(regs, regd)		sse_r2r(mulss, regs, regd)
+
+#define	divps_m2r(var, reg)		sse_m2r(divps, var, reg)
+#define	divps_r2r(regs, regd)		sse_r2r(divps, regs, regd)
+
+#define	divss_m2r(var, reg)		sse_m2r(divss, var, reg)
+#define	divss_r2r(regs, regd)		sse_r2r(divss, regs, regd)
+
+#define	rcpps_m2r(var, reg)		sse_m2r(rcpps, var, reg)
+#define	rcpps_r2r(regs, regd)		sse_r2r(rcpps, regs, regd)
+
+#define	rcpss_m2r(var, reg)		sse_m2r(rcpss, var, reg)
+#define	rcpss_r2r(regs, regd)		sse_r2r(rcpss, regs, regd)
+
+#define	rsqrtps_m2r(var, reg)		sse_m2r(rsqrtps, var, reg)
+#define	rsqrtps_r2r(regs, regd)		sse_r2r(rsqrtps, regs, regd)
+
+#define	rsqrtss_m2r(var, reg)		sse_m2r(rsqrtss, var, reg)
+#define	rsqrtss_r2r(regs, regd)		sse_r2r(rsqrtss, regs, regd)
+
+#define	sqrtps_m2r(var, reg)		sse_m2r(sqrtps, var, reg)
+#define	sqrtps_r2r(regs, regd)		sse_r2r(sqrtps, regs, regd)
+
+#define	sqrtss_m2r(var, reg)		sse_m2r(sqrtss, var, reg)
+#define	sqrtss_r2r(regs, regd)		sse_r2r(sqrtss, regs, regd)
+
+#define	andps_m2r(var, reg)		sse_m2r(andps, var, reg)
+#define	andps_r2r(regs, regd)		sse_r2r(andps, regs, regd)
+
+#define	andnps_m2r(var, reg)		sse_m2r(andnps, var, reg)
+#define	andnps_r2r(regs, regd)		sse_r2r(andnps, regs, regd)
+
+#define	orps_m2r(var, reg)		sse_m2r(orps, var, reg)
+#define	orps_r2r(regs, regd)		sse_r2r(orps, regs, regd)
+
+#define	xorps_m2r(var, reg)		sse_m2r(xorps, var, reg)
+#define	xorps_r2r(regs, regd)		sse_r2r(xorps, regs, regd)
+
+#define	maxps_m2r(var, reg)		sse_m2r(maxps, var, reg)
+#define	maxps_r2r(regs, regd)		sse_r2r(maxps, regs, regd)
+
+#define	maxss_m2r(var, reg)		sse_m2r(maxss, var, reg)
+#define	maxss_r2r(regs, regd)		sse_r2r(maxss, regs, regd)
+
+#define	minps_m2r(var, reg)		sse_m2r(minps, var, reg)
+#define	minps_r2r(regs, regd)		sse_r2r(minps, regs, regd)
+
+#define	minss_m2r(var, reg)		sse_m2r(minss, var, reg)
+#define	minss_r2r(regs, regd)		sse_r2r(minss, regs, regd)
+
+#define	cmpps_m2r(var, reg, op)		sse_m2ri(cmpps, var, reg, op)
+#define	cmpps_r2r(regs, regd, op)	sse_r2ri(cmpps, regs, regd, op)
+
+#define	cmpeqps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 0)
+#define	cmpeqps_r2r(regs, regd)		sse_r2ri(cmpps, regs, regd, 0)
+
+#define	cmpltps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 1)
+#define	cmpltps_r2r(regs, regd)		sse_r2ri(cmpps, regs, regd, 1)
+
+#define	cmpleps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 2)
+#define	cmpleps_r2r(regs, regd)		sse_r2ri(cmpps, regs, regd, 2)
+
+#define	cmpunordps_m2r(var, reg)	sse_m2ri(cmpps, var, reg, 3)
+#define	cmpunordps_r2r(regs, regd)	sse_r2ri(cmpps, regs, regd, 3)
+
+#define	cmpneqps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 4)
+#define	cmpneqps_r2r(regs, regd)	sse_r2ri(cmpps, regs, regd, 4)
+
+#define	cmpnltps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 5)
+#define	cmpnltps_r2r(regs, regd)	sse_r2ri(cmpps, regs, regd, 5)
+
+#define	cmpnleps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 6)
+#define	cmpnleps_r2r(regs, regd)	sse_r2ri(cmpps, regs, regd, 6)
+
+#define	cmpordps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 7)
+#define	cmpordps_r2r(regs, regd)	sse_r2ri(cmpps, regs, regd, 7)
+
+#define	cmpss_m2r(var, reg, op)		sse_m2ri(cmpss, var, reg, op)
+#define	cmpss_r2r(regs, regd, op)	sse_r2ri(cmpss, regs, regd, op)
+
+#define	cmpeqss_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 0)
+#define	cmpeqss_r2r(regs, regd)		sse_r2ri(cmpss, regs, regd, 0)
+
+#define	cmpltss_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 1)
+#define	cmpltss_r2r(regs, regd)		sse_r2ri(cmpss, regs, regd, 1)
+
+#define	cmpless_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 2)
+#define	cmpless_r2r(regs, regd)		sse_r2ri(cmpss, regs, regd, 2)
+
+#define	cmpunordss_m2r(var, reg)	sse_m2ri(cmpss, var, reg, 3)
+#define	cmpunordss_r2r(regs, regd)	sse_r2ri(cmpss, regs, regd, 3)
+
+#define	cmpneqss_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 4)
+#define	cmpneqss_r2r(regs, regd)	sse_r2ri(cmpss, regs, regd, 4)
+
+#define	cmpnltss_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 5)
+#define	cmpnltss_r2r(regs, regd)	sse_r2ri(cmpss, regs, regd, 5)
+
+#define	cmpnless_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 6)
+#define	cmpnless_r2r(regs, regd)	sse_r2ri(cmpss, regs, regd, 6)
+
+#define	cmpordss_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 7)
+#define	cmpordss_r2r(regs, regd)	sse_r2ri(cmpss, regs, regd, 7)
+
+#define	comiss_m2r(var, reg)		sse_m2r(comiss, var, reg)
+#define	comiss_r2r(regs, regd)		sse_r2r(comiss, regs, regd)
+
+#define	ucomiss_m2r(var, reg)		sse_m2r(ucomiss, var, reg)
+#define	ucomiss_r2r(regs, regd)		sse_r2r(ucomiss, regs, regd)
+
+#define	unpcklps_m2r(var, reg)		sse_m2r(unpcklps, var, reg)
+#define	unpcklps_r2r(regs, regd)	sse_r2r(unpcklps, regs, regd)
+
+#define	unpckhps_m2r(var, reg)		sse_m2r(unpckhps, var, reg)
+#define	unpckhps_r2r(regs, regd)	sse_r2r(unpckhps, regs, regd)
+
+#define	fxrstor(mem) \
+	__asm__ __volatile__ ("fxrstor %0" \
+			      : /* nothing */ \
+			      : "X" (mem))
+
+#define	fxsave(mem) \
+	__asm__ __volatile__ ("fxsave %0" \
+			      : /* nothing */ \
+			      : "X" (mem))
+
+#define	stmxcsr(mem) \
+	__asm__ __volatile__ ("stmxcsr %0" \
+			      : /* nothing */ \
+			      : "X" (mem))
+
+#define	ldmxcsr(mem) \
+	__asm__ __volatile__ ("ldmxcsr %0" \
+			      : /* nothing */ \
+			      : "X" (mem))
+
diff --git a/src/video_dec/libmpeg2new/include/tendra.h b/src/video_dec/libmpeg2new/include/tendra.h
new file mode 100644
index 000000000..09900916a
--- /dev/null
+++ b/src/video_dec/libmpeg2new/include/tendra.h
@@ -0,0 +1,35 @@
+/*
+ * tendra.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#pragma TenDRA begin
+#pragma TenDRA longlong type warning
+
+#ifdef TenDRA_check
+
+#pragma TenDRA conversion analysis (pointer-int explicit) off
+#pragma TenDRA implicit function declaration off
+
+/* avoid the "No declarations in translation unit" problem */
+int TenDRA;
+
+#endif /* TenDRA_check */
diff --git a/src/video_dec/libmpeg2new/include/video_out.h b/src/video_dec/libmpeg2new/include/video_out.h
new file mode 100644
index 000000000..342c55197
--- /dev/null
+++ b/src/video_dec/libmpeg2new/include/video_out.h
@@ -0,0 +1,58 @@
+/*
+ * video_out.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+struct mpeg2_sequence_s;
+struct mpeg2_convert_init_s;
+typedef struct {
+    int (* convert) (int stage, void * id,
+		     const struct mpeg2_sequence_s * sequence,
+		     int stride, uint32_t accel, void * arg,
+		     struct mpeg2_convert_init_s * result);
+} vo_setup_result_t;
+
+typedef struct vo_instance_s vo_instance_t;
+struct vo_instance_s {
+    int (* setup) (vo_instance_t * instance, unsigned int width,
+		   unsigned int height, unsigned int chroma_width,
+		   unsigned int chroma_height, vo_setup_result_t * result);
+    void (* setup_fbuf) (vo_instance_t * instance, uint8_t ** buf, void ** id);
+    void (* set_fbuf) (vo_instance_t * instance, uint8_t ** buf, void ** id);
+    void (* start_fbuf) (vo_instance_t * instance,
+			 uint8_t * const * buf, void * id);
+    void (* draw) (vo_instance_t * instance, uint8_t * const * buf, void * id);
+    void (* discard) (vo_instance_t * instance,
+		      uint8_t * const * buf, void * id);
+    void (* close) (vo_instance_t * instance);
+};
+
+typedef vo_instance_t * vo_open_t (void);
+
+typedef struct {
+    char * name;
+    vo_open_t * open;
+} vo_driver_t;
+
+void vo_accel (uint32_t accel);
+
+/* return NULL terminated array of all drivers */
+vo_driver_t const * vo_drivers (void);
diff --git a/src/video_dec/libmpeg2new/include/vis.h b/src/video_dec/libmpeg2new/include/vis.h
new file mode 100644
index 000000000..69dd49075
--- /dev/null
+++ b/src/video_dec/libmpeg2new/include/vis.h
@@ -0,0 +1,328 @@
+/*
+ * vis.h
+ * Copyright (C) 2003 David S. Miller <davem@redhat.com>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* You may be asking why I hard-code the instruction opcodes and don't
+ * use the normal VIS assembler mnenomics for the VIS instructions.
+ *
+ * The reason is that Sun, in their infinite wisdom, decided that a binary
+ * using a VIS instruction will cause it to be marked (in the ELF headers)
+ * as doing so, and this prevents the OS from loading such binaries if the
+ * current cpu doesn't have VIS.  There is no way to easily override this
+ * behavior of the assembler that I am aware of.
+ *
+ * This totally defeats what libmpeg2 is trying to do which is allow a
+ * single binary to be created, and then detect the availability of VIS
+ * at runtime.
+ *
+ * I'm not saying that tainting the binary by default is bad, rather I'm
+ * saying that not providing a way to override this easily unnecessarily
+ * ties people's hands.
+ *
+ * Thus, we do the opcode encoding by hand and output 32-bit words in
+ * the assembler to keep the binary from becoming tainted.
+ */
+
+#define vis_opc_base	((0x1 << 31) | (0x36 << 19))
+#define vis_opf(X)	((X) << 5)
+#define vis_sreg(X)	(X)
+#define vis_dreg(X)	(((X)&0x1f)|((X)>>5))
+#define vis_rs1_s(X)	(vis_sreg(X) << 14)
+#define vis_rs1_d(X)	(vis_dreg(X) << 14)
+#define vis_rs2_s(X)	(vis_sreg(X) << 0)
+#define vis_rs2_d(X)	(vis_dreg(X) << 0)
+#define vis_rd_s(X)	(vis_sreg(X) << 25)
+#define vis_rd_d(X)	(vis_dreg(X) << 25)
+
+#define vis_ss2s(opf,rs1,rs2,rd) \
+	__asm__ __volatile__ (".word %0" \
+			      : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs1_s(rs1) | \
+                                       vis_rs2_s(rs2) | \
+                                       vis_rd_s(rd)))
+
+#define vis_dd2d(opf,rs1,rs2,rd) \
+	__asm__ __volatile__ (".word %0" \
+			      : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs1_d(rs1) | \
+                                       vis_rs2_d(rs2) | \
+                                       vis_rd_d(rd)))
+
+#define vis_ss2d(opf,rs1,rs2,rd) \
+	__asm__ __volatile__ (".word %0" \
+			      : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs1_s(rs1) | \
+                                       vis_rs2_s(rs2) | \
+                                       vis_rd_d(rd)))
+
+#define vis_sd2d(opf,rs1,rs2,rd) \
+	__asm__ __volatile__ (".word %0" \
+			      : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs1_s(rs1) | \
+                                       vis_rs2_d(rs2) | \
+                                       vis_rd_d(rd)))
+
+#define vis_d2s(opf,rs2,rd) \
+	__asm__ __volatile__ (".word %0" \
+			      : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs2_d(rs2) | \
+                                       vis_rd_s(rd)))
+
+#define vis_s2d(opf,rs2,rd) \
+	__asm__ __volatile__ (".word %0" \
+			      : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs2_s(rs2) | \
+                                       vis_rd_d(rd)))
+
+#define vis_d12d(opf,rs1,rd) \
+	__asm__ __volatile__ (".word %0" \
+			      : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs1_d(rs1) | \
+                                       vis_rd_d(rd)))
+
+#define vis_d22d(opf,rs2,rd) \
+	__asm__ __volatile__ (".word %0" \
+			      : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs2_d(rs2) | \
+                                       vis_rd_d(rd)))
+
+#define vis_s12s(opf,rs1,rd) \
+	__asm__ __volatile__ (".word %0" \
+			      : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs1_s(rs1) | \
+                                       vis_rd_s(rd)))
+
+#define vis_s22s(opf,rs2,rd) \
+	__asm__ __volatile__ (".word %0" \
+			      : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs2_s(rs2) | \
+                                       vis_rd_s(rd)))
+
+#define vis_s(opf,rd) \
+	__asm__ __volatile__ (".word %0" \
+			      : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rd_s(rd)))
+
+#define vis_d(opf,rd) \
+	__asm__ __volatile__ (".word %0" \
+			      : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rd_d(rd)))
+
+#define vis_r2m(op,rd,mem) \
+	__asm__ __volatile__ (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) )
+
+#define vis_r2m_2(op,rd,mem1,mem2) \
+	__asm__ __volatile__ (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) )
+
+#define vis_m2r(op,mem,rd) \
+	__asm__ __volatile__ (#op "\t[%0], %%f" #rd : : "r" (&(mem)) )
+
+#define vis_m2r_2(op,mem1,mem2,rd) \
+	__asm__ __volatile__ (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) )
+
+static inline void vis_set_gsr(unsigned int _val)
+{
+	register unsigned int val asm("g1");
+
+	val = _val;
+	__asm__ __volatile__(".word 0xa7804000"
+			     : : "r" (val));
+}
+
+#define VIS_GSR_ALIGNADDR_MASK	0x0000007
+#define VIS_GSR_ALIGNADDR_SHIFT	0
+#define VIS_GSR_SCALEFACT_MASK	0x0000078
+#define VIS_GSR_SCALEFACT_SHIFT	3
+
+#define vis_ld32(mem,rs1)		vis_m2r(ld, mem, rs1)
+#define vis_ld32_2(mem1,mem2,rs1)	vis_m2r_2(ld, mem1, mem2, rs1)
+#define vis_st32(rs1,mem)		vis_r2m(st, rs1, mem)
+#define vis_st32_2(rs1,mem1,mem2)	vis_r2m_2(st, rs1, mem1, mem2)
+#define vis_ld64(mem,rs1)		vis_m2r(ldd, mem, rs1)
+#define vis_ld64_2(mem1,mem2,rs1)	vis_m2r_2(ldd, mem1, mem2, rs1)
+#define vis_st64(rs1,mem)		vis_r2m(std, rs1, mem)
+#define vis_st64_2(rs1,mem1,mem2)	vis_r2m_2(std, rs1, mem1, mem2)
+
+#define vis_ldblk(mem, rd) \
+do {	register void *__mem asm("g1"); \
+	__mem = &(mem); \
+	__asm__ __volatile__(".word 0xc1985e00 | %1" \
+			     : \
+			     : "r" (__mem), \
+			       "i" (vis_rd_d(rd)) \
+			     : "memory"); \
+} while (0)
+
+#define vis_stblk(rd, mem) \
+do {	register void *__mem asm("g1"); \
+	__mem = &(mem); \
+	__asm__ __volatile__(".word 0xc1b85e00 | %1" \
+			     : \
+			     : "r" (__mem), \
+			       "i" (vis_rd_d(rd)) \
+			     : "memory"); \
+} while (0)
+
+#define vis_membar_storestore()	\
+	__asm__ __volatile__(".word 0x8143e008" : : : "memory")
+
+#define vis_membar_sync()	\
+	__asm__ __volatile__(".word 0x8143e040" : : : "memory")
+
+/* 16 and 32 bit partitioned addition and subtraction.  The normal
+ * versions perform 4 16-bit or 2 32-bit additions or subtractions.
+ * The 's' versions perform 2 16-bit or 2 32-bit additions or
+ * subtractions.
+ */
+
+#define vis_padd16(rs1,rs2,rd)		vis_dd2d(0x50, rs1, rs2, rd)
+#define vis_padd16s(rs1,rs2,rd)		vis_ss2s(0x51, rs1, rs2, rd)
+#define vis_padd32(rs1,rs2,rd)		vis_dd2d(0x52, rs1, rs2, rd)
+#define vis_padd32s(rs1,rs2,rd)		vis_ss2s(0x53, rs1, rs2, rd)
+#define vis_psub16(rs1,rs2,rd)		vis_dd2d(0x54, rs1, rs2, rd)
+#define vis_psub16s(rs1,rs2,rd)		vis_ss2s(0x55, rs1, rs2, rd)
+#define vis_psub32(rs1,rs2,rd)		vis_dd2d(0x56, rs1, rs2, rd)
+#define vis_psub32s(rs1,rs2,rd)		vis_ss2s(0x57, rs1, rs2, rd)
+
+/* Pixel formatting instructions.  */
+
+#define vis_pack16(rs2,rd)		vis_d2s( 0x3b,      rs2, rd)
+#define vis_pack32(rs1,rs2,rd)		vis_dd2d(0x3a, rs1, rs2, rd)
+#define vis_packfix(rs2,rd)		vis_d2s( 0x3d,      rs2, rd)
+#define vis_expand(rs2,rd)		vis_s2d( 0x4d,      rs2, rd)
+#define vis_pmerge(rs1,rs2,rd)		vis_ss2d(0x4b, rs1, rs2, rd)
+
+/* Partitioned multiply instructions.  */
+
+#define vis_mul8x16(rs1,rs2,rd)		vis_sd2d(0x31, rs1, rs2, rd)
+#define vis_mul8x16au(rs1,rs2,rd)	vis_ss2d(0x33, rs1, rs2, rd)
+#define vis_mul8x16al(rs1,rs2,rd)	vis_ss2d(0x35, rs1, rs2, rd)
+#define vis_mul8sux16(rs1,rs2,rd)	vis_dd2d(0x36, rs1, rs2, rd)
+#define vis_mul8ulx16(rs1,rs2,rd)	vis_dd2d(0x37, rs1, rs2, rd)
+#define vis_muld8sux16(rs1,rs2,rd)	vis_ss2d(0x38, rs1, rs2, rd)
+#define vis_muld8ulx16(rs1,rs2,rd)	vis_ss2d(0x39, rs1, rs2, rd)
+
+/* Alignment instructions.  */
+
+static inline void *vis_alignaddr(void *_ptr)
+{
+	register void *ptr asm("g1");
+
+	ptr = _ptr;
+
+	__asm__ __volatile__(".word %2"
+			     : "=&r" (ptr)
+			     : "0" (ptr),
+			       "i" (vis_opc_base | vis_opf(0x18) |
+				    vis_rs1_s(1) |
+				    vis_rs2_s(0) |
+				    vis_rd_s(1)));
+
+	return ptr;
+}
+
+static inline void vis_alignaddr_g0(void *_ptr)
+{
+	register void *ptr asm("g1");
+
+	ptr = _ptr;
+
+	__asm__ __volatile__(".word %2"
+			     : "=&r" (ptr)
+			     : "0" (ptr),
+			       "i" (vis_opc_base | vis_opf(0x18) |
+				    vis_rs1_s(1) |
+				    vis_rs2_s(0) |
+				    vis_rd_s(0)));
+}
+
+static inline void *vis_alignaddrl(void *_ptr)
+{
+	register void *ptr asm("g1");
+
+	ptr = _ptr;
+
+	__asm__ __volatile__(".word %2"
+			     : "=&r" (ptr)
+			     : "0" (ptr),
+			       "i" (vis_opc_base | vis_opf(0x19) |
+				    vis_rs1_s(1) |
+				    vis_rs2_s(0) |
+				    vis_rd_s(1)));
+
+	return ptr;
+}
+
+static inline void vis_alignaddrl_g0(void *_ptr)
+{
+	register void *ptr asm("g1");
+
+	ptr = _ptr;
+
+	__asm__ __volatile__(".word %2"
+			     : "=&r" (ptr)
+			     : "0" (ptr),
+			       "i" (vis_opc_base | vis_opf(0x19) |
+				    vis_rs1_s(1) |
+				    vis_rs2_s(0) |
+				    vis_rd_s(0)));
+}
+
+#define vis_faligndata(rs1,rs2,rd)	vis_dd2d(0x48, rs1, rs2, rd)
+
+/* Logical operate instructions.  */
+
+#define vis_fzero(rd)			vis_d(   0x60,           rd)
+#define vis_fzeros(rd)			vis_s(   0x61,           rd)
+#define vis_fone(rd)			vis_d(   0x7e,           rd)
+#define vis_fones(rd)			vis_s(   0x7f,           rd)
+#define vis_src1(rs1,rd)		vis_d12d(0x74, rs1,      rd)
+#define vis_src1s(rs1,rd)		vis_s12s(0x75, rs1,      rd)
+#define vis_src2(rs2,rd)		vis_d22d(0x78,      rs2, rd)
+#define vis_src2s(rs2,rd)		vis_s22s(0x79,      rs2, rd)
+#define vis_not1(rs1,rd)		vis_d12d(0x6a, rs1,      rd)
+#define vis_not1s(rs1,rd)		vis_s12s(0x6b, rs1,      rd)
+#define vis_not2(rs2,rd)		vis_d22d(0x66,      rs2, rd)
+#define vis_not2s(rs2,rd)		vis_s22s(0x67,      rs2, rd)
+#define vis_or(rs1,rs2,rd)		vis_dd2d(0x7c, rs1, rs2, rd)
+#define vis_ors(rs1,rs2,rd)		vis_ss2s(0x7d, rs1, rs2, rd)
+#define vis_nor(rs1,rs2,rd)		vis_dd2d(0x62, rs1, rs2, rd)
+#define vis_nors(rs1,rs2,rd)		vis_ss2s(0x63, rs1, rs2, rd)
+#define vis_and(rs1,rs2,rd)		vis_dd2d(0x70, rs1, rs2, rd)
+#define vis_ands(rs1,rs2,rd)		vis_ss2s(0x71, rs1, rs2, rd)
+#define vis_nand(rs1,rs2,rd)		vis_dd2d(0x6e, rs1, rs2, rd)
+#define vis_nands(rs1,rs2,rd)		vis_ss2s(0x6f, rs1, rs2, rd)
+#define vis_xor(rs1,rs2,rd)		vis_dd2d(0x6c, rs1, rs2, rd)
+#define vis_xors(rs1,rs2,rd)		vis_ss2s(0x6d, rs1, rs2, rd)
+#define vis_xnor(rs1,rs2,rd)		vis_dd2d(0x72, rs1, rs2, rd)
+#define vis_xnors(rs1,rs2,rd)		vis_ss2s(0x73, rs1, rs2, rd)
+#define vis_ornot1(rs1,rs2,rd)		vis_dd2d(0x7a, rs1, rs2, rd)
+#define vis_ornot1s(rs1,rs2,rd)		vis_ss2s(0x7b, rs1, rs2, rd)
+#define vis_ornot2(rs1,rs2,rd)		vis_dd2d(0x76, rs1, rs2, rd)
+#define vis_ornot2s(rs1,rs2,rd)		vis_ss2s(0x77, rs1, rs2, rd)
+#define vis_andnot1(rs1,rs2,rd)		vis_dd2d(0x68, rs1, rs2, rd)
+#define vis_andnot1s(rs1,rs2,rd)	vis_ss2s(0x69, rs1, rs2, rd)
+#define vis_andnot2(rs1,rs2,rd)		vis_dd2d(0x64, rs1, rs2, rd)
+#define vis_andnot2s(rs1,rs2,rd)	vis_ss2s(0x65, rs1, rs2, rd)
+
+/* Pixel component distance.  */
+
+#define vis_pdist(rs1,rs2,rd)		vis_dd2d(0x3e, rs1, rs2, rd)
diff --git a/src/video_dec/libmpeg2new/libmpeg2/Makefile.am b/src/video_dec/libmpeg2new/libmpeg2/Makefile.am
new file mode 100644
index 000000000..2caa3ddc2
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/Makefile.am
@@ -0,0 +1,14 @@
+include $(top_srcdir)/misc/Makefile.common
+
+AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG)
+
+noinst_LTLIBRARIES = libmpeg2.la libmpeg2arch.la
+
+libmpeg2_la_SOURCES = alloc.c header.c decode.c slice.c motion_comp.c idct.c
+libmpeg2_la_LIBADD = libmpeg2arch.la
+
+libmpeg2arch_la_SOURCES = motion_comp_mmx.c idct_mmx.c \
+                          motion_comp_altivec.c idct_altivec.c \
+                          motion_comp_alpha.c idct_alpha.c \
+                          motion_comp_vis.c \
+                          cpu_accel.c cpu_state.c
diff --git a/src/video_dec/libmpeg2new/libmpeg2/alloc.c b/src/video_dec/libmpeg2new/libmpeg2/alloc.c
new file mode 100644
index 000000000..f1a7afa1c
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/alloc.c
@@ -0,0 +1,70 @@
+/*
+ * alloc.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "../include/mpeg2.h"
+
+static void * (* malloc_hook) (unsigned size, mpeg2_alloc_t reason) = NULL;
+static int (* free_hook) (void * buf) = NULL;
+
+void * mpeg2_malloc (unsigned size, mpeg2_alloc_t reason)
+{
+    char * buf;
+
+    if (malloc_hook) {
+	buf = (char *) malloc_hook (size, reason);
+	if (buf)
+	    return buf;
+    }
+
+    if (size) {
+	buf = (char *) malloc (size + 63 + sizeof (void **));
+	if (buf) {
+	    char * align_buf;
+
+	    align_buf = buf + 63 + sizeof (void **);
+	    align_buf -= (long)align_buf & 63;
+	    *(((void **)align_buf) - 1) = buf;
+	    return align_buf;
+	}
+    }
+    return NULL;
+}
+
+void mpeg2_free (void * buf)
+{
+    if (free_hook && free_hook (buf))
+	return;
+
+    if (buf)
+	free (*(((void **)buf) - 1));
+}
+
+void mpeg2_malloc_hooks (void * malloc (unsigned, mpeg2_alloc_t),
+			 int free (void *))
+{
+    malloc_hook = malloc;
+    free_hook = free;
+}
diff --git a/src/video_dec/libmpeg2new/libmpeg2/configure.incl b/src/video_dec/libmpeg2new/libmpeg2/configure.incl
new file mode 100644
index 000000000..f8dbd5aef
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/configure.incl
@@ -0,0 +1,11 @@
+AC_SUBST([LIBMPEG2_CFLAGS])
+
+dnl avoid -fPIC when possible
+AC_LIBTOOL_NON_PIC([LIBMPEG2_CFLAGS="$LIBMPEG2_CFLAGS -prefer-non-pic"])
+
+dnl check for cpudetect
+AC_ARG_ENABLE([accel-detect],
+    [  --disable-accel-detect  make a version without accel detection code])
+if test x"$enable_accel_detect" != x"no"; then
+    AC_DEFINE([ACCEL_DETECT],,[autodetect accelerations])
+fi
diff --git a/src/video_dec/libmpeg2new/libmpeg2/convert_internal.h b/src/video_dec/libmpeg2new/libmpeg2/convert_internal.h
new file mode 100644
index 000000000..d1e63d5e3
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/convert_internal.h
@@ -0,0 +1,42 @@
+/*
+ * convert_internal.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+typedef struct {
+    uint8_t * rgb_ptr;
+    int width;
+    int field;
+    int y_stride, rgb_stride, y_increm, uv_increm, rgb_increm, rgb_slice;
+    int chroma420, convert420;
+    int dither_offset, dither_stride;
+    int y_stride_frame, uv_stride_frame, rgb_stride_frame, rgb_stride_min;
+} convert_rgb_t;
+
+typedef void mpeg2convert_copy_t (void * id, uint8_t * const * src,
+				  unsigned int v_offset);
+
+mpeg2convert_copy_t * mpeg2convert_rgb_mmxext (int bpp, int mode,
+					       const mpeg2_sequence_t * seq);
+mpeg2convert_copy_t * mpeg2convert_rgb_mmx (int bpp, int mode,
+					    const mpeg2_sequence_t * seq);
+mpeg2convert_copy_t * mpeg2convert_rgb_vis (int bpp, int mode,
+					    const mpeg2_sequence_t * seq);
diff --git a/src/video_dec/libmpeg2new/libmpeg2/cpu_accel.c b/src/video_dec/libmpeg2new/libmpeg2/cpu_accel.c
new file mode 100644
index 000000000..7846f1e88
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/cpu_accel.c
@@ -0,0 +1,258 @@
+/*
+ * cpu_accel.c
+ * Copyright (C) 2000-2004 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <inttypes.h>
+
+#include "../include/mpeg2.h"
+#include "../include/attributes.h"
+#include "mpeg2_internal.h"
+
+#ifdef ARCH_X86
+static inline uint32_t arch_accel (uint32_t accel)
+{
+    if (accel & (MPEG2_ACCEL_X86_3DNOW | MPEG2_ACCEL_X86_MMXEXT))
+	accel |= MPEG2_ACCEL_X86_MMX;
+	
+    if (accel & (MPEG2_ACCEL_X86_SSE2 | MPEG2_ACCEL_X86_SSE3))
+	accel |= MPEG2_ACCEL_X86_MMXEXT;
+	
+    if (accel & (MPEG2_ACCEL_X86_SSE3))
+	accel |= MPEG2_ACCEL_X86_SSE2;
+
+#ifdef ACCEL_DETECT
+    if (accel & MPEG2_ACCEL_DETECT) {
+	uint32_t eax, ebx, ecx, edx;
+	int AMD;
+
+#if !defined(PIC) && !defined(__PIC__)
+#define cpuid(op,eax,ebx,ecx,edx)	\
+    __asm__ ("cpuid"			\
+	     : "=a" (eax),		\
+	       "=b" (ebx),		\
+	       "=c" (ecx),		\
+	       "=d" (edx)		\
+	     : "a" (op)			\
+	     : "cc")
+#else	/* PIC version : save ebx */
+#define cpuid(op,eax,ebx,ecx,edx)	\
+    __asm__ ("push %%ebx\n\t"		\
+	     "cpuid\n\t"		\
+	     "movl %%ebx,%1\n\t"	\
+	     "pop %%ebx"		\
+	     : "=a" (eax),		\
+	       "=r" (ebx),		\
+	       "=c" (ecx),		\
+	       "=d" (edx)		\
+	     : "a" (op)			\
+	     : "cc")
+#endif
+
+	__asm__ ("pushf\n\t"
+		 "pushf\n\t"
+		 "pop %0\n\t"
+		 "movl %0,%1\n\t"
+		 "xorl $0x200000,%0\n\t"
+		 "push %0\n\t"
+		 "popf\n\t"
+		 "pushf\n\t"
+		 "pop %0\n\t"
+		 "popf"
+		 : "=r" (eax),
+		 "=r" (ebx)
+		 :
+		 : "cc");
+
+	if (eax == ebx)			/* no cpuid */
+	    return accel;
+
+	cpuid (0x00000000, eax, ebx, ecx, edx);
+	if (!eax)			/* vendor string only */
+	    return accel;
+
+	AMD = (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65);
+
+	cpuid (0x00000001, eax, ebx, ecx, edx);
+	if (! (edx & 0x00800000))	/* no MMX */
+	    return accel;
+
+	accel |= MPEG2_ACCEL_X86_MMX;
+	if (edx & 0x02000000)	/* SSE - identical to AMD MMX extensions */
+	    accel |= MPEG2_ACCEL_X86_MMXEXT;
+
+	if (edx & 0x04000000)	/* SSE2 */
+	    accel |= MPEG2_ACCEL_X86_SSE2;
+	    
+	if (ecx & 0x00000001)	/* SSE3 */
+	    accel |= MPEG2_ACCEL_X86_SSE3;
+	    
+	cpuid (0x80000000, eax, ebx, ecx, edx);
+	if (eax < 0x80000001)		/* no extended capabilities */
+	    return accel;
+
+	cpuid (0x80000001, eax, ebx, ecx, edx);
+
+	if (edx & 0x80000000)
+	    accel |= MPEG2_ACCEL_X86_3DNOW;
+
+	if (AMD && (edx & 0x00400000))	/* AMD MMX extensions */
+	    accel |= MPEG2_ACCEL_X86_MMXEXT;
+    }
+#endif /* ACCEL_DETECT */
+
+    return accel;
+}
+#endif /* ARCH_X86 */
+
+#if defined(ACCEL_DETECT) && (defined(ARCH_PPC) || defined(ARCH_SPARC))
+#include <signal.h>
+#include <setjmp.h>
+
+static sigjmp_buf jmpbuf;
+static volatile sig_atomic_t canjump = 0;
+
+static RETSIGTYPE sigill_handler (int sig)
+{
+    if (!canjump) {
+	signal (sig, SIG_DFL);
+	raise (sig);
+    }
+
+    canjump = 0;
+    siglongjmp (jmpbuf, 1);
+}
+#endif /* ACCEL_DETECT && (ARCH_PPC || ARCH_SPARC) */
+
+#ifdef ARCH_PPC
+static inline uint32_t arch_accel (uint32_t accel)
+{
+#ifdef ACCEL_DETECT
+    if (accel & (MPEG2_ACCEL_PPC_ALTIVEC | MPEG2_ACCEL_DETECT) ==
+	MPEG2_ACCEL_DETECT) {
+	static RETSIGTYPE (* oldsig) (int);
+
+	oldsig = signal (SIGILL, sigill_handler);
+	if (sigsetjmp (jmpbuf, 1)) {
+	    signal (SIGILL, oldsig);
+	    return accel;
+	}
+
+	canjump = 1;
+
+#ifdef HAVE_ALTIVEC_H	/* gnu */
+#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t"
+#else			/* apple */
+#define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t"
+#endif
+	asm volatile ("mtspr 256, %0\n\t"
+		      VAND (0, 0, 0)
+		      :
+		      : "r" (-1));
+
+	canjump = 0;
+	accel |= MPEG2_ACCEL_PPC_ALTIVEC;
+
+	signal (SIGILL, oldsig);
+    }
+#endif /* ACCEL_DETECT */
+
+    return accel;
+}
+#endif /* ARCH_PPC */
+
+#ifdef ARCH_SPARC
+static inline uint32_t arch_accel (uint32_t accel)
+{
+    if (accel & MPEG2_ACCEL_SPARC_VIS2)
+	accel |= MPEG2_ACCEL_SPARC_VIS;
+
+#ifdef ACCEL_DETECT
+    if (accel & (MPEG2_ACCEL_SPARC_VIS2 | MPEG2_ACCEL_DETECT) ==
+	MPEG2_ACCEL_DETECT) {
+	static RETSIGTYPE (* oldsig) (int);
+
+	oldsig = signal (SIGILL, sigill_handler);
+	if (sigsetjmp (jmpbuf, 1)) {
+	    signal (SIGILL, oldsig);
+	    return accel;
+	}
+
+	canjump = 1;
+
+	/* pdist %f0, %f0, %f0 */
+	__asm__ __volatile__(".word\t0x81b007c0");
+
+	canjump = 0;
+	accel |= MPEG2_ACCEL_SPARC_VIS;
+
+	if (sigsetjmp (jmpbuf, 1)) {
+	    signal (SIGILL, oldsig);
+	    return accel;
+	}
+
+	canjump = 1;
+
+	/* edge8n %g0, %g0, %g0 */
+	__asm__ __volatile__(".word\t0x81b00020");
+
+	canjump = 0;
+	accel |= MPEG2_ACCEL_SPARC_VIS2;
+
+	signal (SIGILL, oldsig);
+    }
+#endif /* ACCEL_DETECT */
+
+    return accel;
+}
+#endif /* ARCH_SPARC */
+
+#ifdef ARCH_ALPHA
+static inline uint32_t arch_accel (uint32_t accel)
+{
+    if (accel & MPEG2_ACCEL_ALPHA_MVI)
+	accel |= MPEG2_ACCEL_ALPHA;
+
+#ifdef ACCEL_DETECT
+    if (accel & MPEG2_ACCEL_DETECT) {
+	uint64_t no_mvi;
+
+	asm volatile ("amask %1, %0"
+		      : "=r" (no_mvi)
+		      : "rI" (256));	/* AMASK_MVI */
+	accel |= no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA |
+					       MPEG2_ACCEL_ALPHA_MVI);
+    }
+#endif /* ACCEL_DETECT */
+
+    return accel;
+}
+#endif /* ARCH_ALPHA */
+
+uint32_t mpeg2_detect_accel (uint32_t accel)
+{
+#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC)
+    accel = arch_accel (accel);
+#endif
+    return accel;
+}
diff --git a/src/video_dec/libmpeg2new/libmpeg2/cpu_state.c b/src/video_dec/libmpeg2new/libmpeg2/cpu_state.c
new file mode 100644
index 000000000..edbf2dd28
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/cpu_state.c
@@ -0,0 +1,129 @@
+/*
+ * cpu_state.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "../include/mpeg2.h"
+#include "../include/attributes.h"
+#include "mpeg2_internal.h"
+#ifdef ARCH_X86
+#include "../include/mmx.h"
+#endif
+
+void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL;
+void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL;
+
+#ifdef ARCH_X86
+static void state_restore_mmx (cpu_state_t * state)
+{
+    emms ();
+}
+#endif
+
+#ifdef ARCH_PPC
+#ifdef HAVE_ALTIVEC_H	/* gnu */
+#define LI(a,b) "li " #a "," #b "\n\t"
+#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t"
+#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t"
+#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t"
+#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t"
+#else			/* apple */
+#define LI(a,b) "li r" #a "," #b "\n\t"
+#define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t"
+#define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t"
+#define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t"
+#define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t"
+#endif
+
+static void state_save_altivec (cpu_state_t * state)
+{
+    asm (LI (9, 16)
+	 STVX0 (20, 0, 3)
+	 LI (11, 32)
+	 STVX (21, 9, 3)
+	 LI (9, 48)
+	 STVX (22, 11, 3)
+	 LI (11, 64)
+	 STVX (23, 9, 3)
+	 LI (9, 80)
+	 STVX (24, 11, 3)
+	 LI (11, 96)
+	 STVX (25, 9, 3)
+	 LI (9, 112)
+	 STVX (26, 11, 3)
+	 LI (11, 128)
+	 STVX (27, 9, 3)
+	 LI (9, 144)
+	 STVX (28, 11, 3)
+	 LI (11, 160)
+	 STVX (29, 9, 3)
+	 LI (9, 176)
+	 STVX (30, 11, 3)
+	 STVX (31, 9, 3));
+}
+
+static void state_restore_altivec (cpu_state_t * state)
+{
+    asm (LI (9, 16)
+	 LVX0 (20, 0, 3)
+	 LI (11, 32)
+	 LVX (21, 9, 3)
+	 LI (9, 48)
+	 LVX (22, 11, 3)
+	 LI (11, 64)
+	 LVX (23, 9, 3)
+	 LI (9, 80)
+	 LVX (24, 11, 3)
+	 LI (11, 96)
+	 LVX (25, 9, 3)
+	 LI (9, 112)
+	 LVX (26, 11, 3)
+	 LI (11, 128)
+	 LVX (27, 9, 3)
+	 LI (9, 144)
+	 LVX (28, 11, 3)
+	 LI (11, 160)
+	 LVX (29, 9, 3)
+	 LI (9, 176)
+	 LVX (30, 11, 3)
+	 LVX (31, 9, 3));
+}
+#endif
+
+void mpeg2_cpu_state_init (uint32_t accel)
+{
+#ifdef ARCH_X86
+    if (accel & MPEG2_ACCEL_X86_MMX) {
+	mpeg2_cpu_state_restore = state_restore_mmx;
+    }
+#endif
+#ifdef ARCH_PPC
+    if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
+	mpeg2_cpu_state_save = state_save_altivec;
+	mpeg2_cpu_state_restore = state_restore_altivec;
+    }
+#endif
+}
diff --git a/src/video_dec/libmpeg2new/libmpeg2/decode.c b/src/video_dec/libmpeg2new/libmpeg2/decode.c
new file mode 100644
index 000000000..337ba4466
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/decode.c
@@ -0,0 +1,439 @@
+/*
+ * decode.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <string.h>	/* memcmp/memset, try to remove */
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "../include/mpeg2.h"
+#include "../include/attributes.h"
+#include "mpeg2_internal.h"
+
+static int mpeg2_accels = 0;
+
+#define BUFFER_SIZE (1194 * 1024)
+
+const mpeg2_info_t * mpeg2_info (mpeg2dec_t * mpeg2dec)
+{
+    return &(mpeg2dec->info);
+}
+
+static inline int skip_chunk (mpeg2dec_t * mpeg2dec, int bytes)
+{
+    uint8_t * current;
+    uint32_t shift;
+    uint8_t * limit;
+    uint8_t byte;
+
+    if (!bytes)
+	return 0;
+
+    current = mpeg2dec->buf_start;
+    shift = mpeg2dec->shift;
+    limit = current + bytes;
+
+    do {
+	byte = *current++;
+	if (shift == 0x00000100) {
+	    int skipped;
+
+	    mpeg2dec->shift = 0xffffff00;
+	    skipped = current - mpeg2dec->buf_start;
+	    mpeg2dec->buf_start = current;
+	    return skipped;
+	}
+	shift = (shift | byte) << 8;
+    } while (current < limit);
+
+    mpeg2dec->shift = shift;
+    mpeg2dec->buf_start = current;
+    return 0;
+}
+
+static inline int copy_chunk (mpeg2dec_t * mpeg2dec, int bytes)
+{
+    uint8_t * current;
+    uint32_t shift;
+    uint8_t * chunk_ptr;
+    uint8_t * limit;
+    uint8_t byte;
+
+    if (!bytes)
+	return 0;
+
+    current = mpeg2dec->buf_start;
+    shift = mpeg2dec->shift;
+    chunk_ptr = mpeg2dec->chunk_ptr;
+    limit = current + bytes;
+
+    do {
+	byte = *current++;
+	if (shift == 0x00000100) {
+	    int copied;
+
+	    mpeg2dec->shift = 0xffffff00;
+	    mpeg2dec->chunk_ptr = chunk_ptr + 1;
+	    copied = current - mpeg2dec->buf_start;
+	    mpeg2dec->buf_start = current;
+	    return copied;
+	}
+	shift = (shift | byte) << 8;
+	*chunk_ptr++ = byte;
+    } while (current < limit);
+
+    mpeg2dec->shift = shift;
+    mpeg2dec->buf_start = current;
+    return 0;
+}
+
+void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end)
+{
+    mpeg2dec->buf_start = start;
+    mpeg2dec->buf_end = end;
+}
+
+int mpeg2_getpos (mpeg2dec_t * mpeg2dec)
+{
+    return mpeg2dec->buf_end - mpeg2dec->buf_start;
+}
+
+static inline mpeg2_state_t seek_chunk (mpeg2dec_t * mpeg2dec)
+{
+    int size, skipped;
+
+    size = mpeg2dec->buf_end - mpeg2dec->buf_start;
+    skipped = skip_chunk (mpeg2dec, size);
+    if (!skipped) {
+	mpeg2dec->bytes_since_tag += size;
+	return STATE_BUFFER;
+    }
+    mpeg2dec->bytes_since_tag += skipped;
+    mpeg2dec->code = mpeg2dec->buf_start[-1];
+    return STATE_INTERNAL_NORETURN;
+}
+
+mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec)
+{
+    while (!(mpeg2dec->code == 0xb3 ||
+	     ((mpeg2dec->code == 0xb7 || mpeg2dec->code == 0xb8 ||
+	       !mpeg2dec->code) && mpeg2dec->sequence.width != (unsigned)-1)))
+	if (seek_chunk (mpeg2dec) == STATE_BUFFER)
+	    return STATE_BUFFER;
+    mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer;
+    mpeg2dec->user_data_len = 0;
+    return ((mpeg2dec->code == 0xb7) ?
+	    mpeg2_header_end (mpeg2dec) : mpeg2_parse_header (mpeg2dec));
+}
+
+#define RECEIVED(code,state) (((state) << 8) + (code))
+
+mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec)
+{
+    int size_buffer, size_chunk, copied;
+
+    if (mpeg2dec->action) {
+	mpeg2_state_t state;
+
+	state = mpeg2dec->action (mpeg2dec);
+	if ((int)state > (int)STATE_INTERNAL_NORETURN)
+	    return state;
+    }
+
+    while (1) {
+	while ((unsigned) (mpeg2dec->code - mpeg2dec->first_decode_slice) <
+	       mpeg2dec->nb_decode_slices) {
+	    size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start;
+	    size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE -
+			  mpeg2dec->chunk_ptr);
+	    if (size_buffer <= size_chunk) {
+		copied = copy_chunk (mpeg2dec, size_buffer);
+		if (!copied) {
+		    mpeg2dec->bytes_since_tag += size_buffer;
+		    mpeg2dec->chunk_ptr += size_buffer;
+		    return STATE_BUFFER;
+		}
+	    } else {
+		copied = copy_chunk (mpeg2dec, size_chunk);
+		if (!copied) {
+		    /* filled the chunk buffer without finding a start code */
+		    mpeg2dec->bytes_since_tag += size_chunk;
+		    mpeg2dec->action = seek_chunk;
+		    return STATE_INVALID;
+		}
+	    }
+	    mpeg2dec->bytes_since_tag += copied;
+
+	    mpeg2_slice (&(mpeg2dec->decoder), mpeg2dec->code,
+			 mpeg2dec->chunk_start);
+	    mpeg2dec->code = mpeg2dec->buf_start[-1];
+	    mpeg2dec->chunk_ptr = mpeg2dec->chunk_start;
+	}
+	if ((unsigned) (mpeg2dec->code - 1) >= 0xb0 - 1)
+	    break;
+	if (seek_chunk (mpeg2dec) == STATE_BUFFER)
+	    return STATE_BUFFER;
+    }
+
+    mpeg2dec->action = mpeg2_seek_header;
+    switch (mpeg2dec->code) {
+    case 0x00:
+	return mpeg2dec->state;
+    case 0xb3:
+    case 0xb7:
+    case 0xb8:
+	return (mpeg2dec->state == STATE_SLICE) ? STATE_SLICE : STATE_INVALID;
+    default:
+	mpeg2dec->action = seek_chunk;
+	return STATE_INVALID;
+    }
+}
+
+mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec)
+{
+    static int (* process_header[]) (mpeg2dec_t * mpeg2dec) = {
+	mpeg2_header_picture, mpeg2_header_extension, mpeg2_header_user_data,
+	mpeg2_header_sequence, NULL, NULL, NULL, NULL, mpeg2_header_gop
+    };
+    int size_buffer, size_chunk, copied;
+
+    mpeg2dec->action = mpeg2_parse_header;
+    mpeg2dec->info.user_data = NULL;	mpeg2dec->info.user_data_len = 0;
+    while (1) {
+	size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start;
+	size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE -
+		      mpeg2dec->chunk_ptr);
+	if (size_buffer <= size_chunk) {
+	    copied = copy_chunk (mpeg2dec, size_buffer);
+	    if (!copied) {
+		mpeg2dec->bytes_since_tag += size_buffer;
+		mpeg2dec->chunk_ptr += size_buffer;
+		return STATE_BUFFER;
+	    }
+	} else {
+	    copied = copy_chunk (mpeg2dec, size_chunk);
+	    if (!copied) {
+		/* filled the chunk buffer without finding a start code */
+		mpeg2dec->bytes_since_tag += size_chunk;
+		mpeg2dec->code = 0xb4;
+		mpeg2dec->action = mpeg2_seek_header;
+		return STATE_INVALID;
+	    }
+	}
+	mpeg2dec->bytes_since_tag += copied;
+
+	if (process_header[mpeg2dec->code & 0x0b] (mpeg2dec)) {
+	    mpeg2dec->code = mpeg2dec->buf_start[-1];
+	    mpeg2dec->action = mpeg2_seek_header;
+	    return STATE_INVALID;
+	}
+
+	mpeg2dec->code = mpeg2dec->buf_start[-1];
+	switch (RECEIVED (mpeg2dec->code, mpeg2dec->state)) {
+
+	/* state transition after a sequence header */
+	case RECEIVED (0x00, STATE_SEQUENCE):
+	case RECEIVED (0xb8, STATE_SEQUENCE):
+	    mpeg2_header_sequence_finalize (mpeg2dec);
+	    break;
+
+	/* other legal state transitions */
+	case RECEIVED (0x00, STATE_GOP):
+	    mpeg2_header_gop_finalize (mpeg2dec);
+	    break;
+	case RECEIVED (0x01, STATE_PICTURE):
+	case RECEIVED (0x01, STATE_PICTURE_2ND):
+	    mpeg2_header_picture_finalize (mpeg2dec, mpeg2_accels);
+	    mpeg2dec->action = mpeg2_header_slice_start;
+	    break;
+
+	/* legal headers within a given state */
+	case RECEIVED (0xb2, STATE_SEQUENCE):
+	case RECEIVED (0xb2, STATE_GOP):
+	case RECEIVED (0xb2, STATE_PICTURE):
+	case RECEIVED (0xb2, STATE_PICTURE_2ND):
+	case RECEIVED (0xb5, STATE_SEQUENCE):
+	case RECEIVED (0xb5, STATE_PICTURE):
+	case RECEIVED (0xb5, STATE_PICTURE_2ND):
+	    mpeg2dec->chunk_ptr = mpeg2dec->chunk_start;
+	    continue;
+
+	default:
+	    mpeg2dec->action = mpeg2_seek_header;
+	    return STATE_INVALID;
+	}
+
+	mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer;
+	mpeg2dec->user_data_len = 0;
+	return mpeg2dec->state;
+    }
+}
+
+int mpeg2_convert (mpeg2dec_t * mpeg2dec, mpeg2_convert_t convert, void * arg)
+{
+    mpeg2_convert_init_t convert_init;
+    int error;
+
+    error = convert (MPEG2_CONVERT_SET, NULL, &(mpeg2dec->sequence), 0,
+		     mpeg2_accels, arg, &convert_init);
+    if (!error) {
+	mpeg2dec->convert = convert;
+	mpeg2dec->convert_arg = arg;
+	mpeg2dec->convert_id_size = convert_init.id_size;
+	mpeg2dec->convert_stride = 0;
+    }
+    return error;
+}
+
+int mpeg2_stride (mpeg2dec_t * mpeg2dec, int stride)
+{
+    if (!mpeg2dec->convert) {
+	if (stride < (int) mpeg2dec->sequence.width)
+	    stride = mpeg2dec->sequence.width;
+	mpeg2dec->decoder.stride_frame = stride;
+    } else {
+	mpeg2_convert_init_t convert_init;
+
+	stride = mpeg2dec->convert (MPEG2_CONVERT_STRIDE, NULL,
+				    &(mpeg2dec->sequence), stride,
+				    mpeg2_accels, mpeg2dec->convert_arg,
+				    &convert_init);
+	mpeg2dec->convert_id_size = convert_init.id_size;
+	mpeg2dec->convert_stride = stride;
+    }
+    return stride;
+}
+
+void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id)
+{
+    mpeg2_fbuf_t * fbuf;
+
+    if (mpeg2dec->custom_fbuf) {
+	if (mpeg2dec->state == STATE_SEQUENCE) {
+	    mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1];
+	    mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0];
+	}
+	mpeg2_set_fbuf (mpeg2dec, (mpeg2dec->decoder.coding_type ==
+				   PIC_FLAG_CODING_TYPE_B));
+	fbuf = mpeg2dec->fbuf[0];
+    } else {
+	fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index].fbuf);
+	mpeg2dec->alloc_index_user = ++mpeg2dec->alloc_index;
+    }
+    fbuf->buf[0] = buf[0];
+    fbuf->buf[1] = buf[1];
+    fbuf->buf[2] = buf[2];
+    fbuf->id = id;
+}
+
+void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf)
+{
+    mpeg2dec->custom_fbuf = custom_fbuf;
+}
+
+void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip)
+{
+    mpeg2dec->first_decode_slice = 1;
+    mpeg2dec->nb_decode_slices = skip ? 0 : (0xb0 - 1);
+}
+
+void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end)
+{
+    start = (start < 1) ? 1 : (start > 0xb0) ? 0xb0 : start;
+    end = (end < start) ? start : (end > 0xb0) ? 0xb0 : end;
+    mpeg2dec->first_decode_slice = start;
+    mpeg2dec->nb_decode_slices = end - start;
+}
+
+void mpeg2_tag_picture (mpeg2dec_t * mpeg2dec, uint32_t tag, uint32_t tag2)
+{
+    mpeg2dec->tag_previous = mpeg2dec->tag_current;
+    mpeg2dec->tag2_previous = mpeg2dec->tag2_current;
+    mpeg2dec->tag_current = tag;
+    mpeg2dec->tag2_current = tag2;
+    mpeg2dec->num_tags++;
+    mpeg2dec->bytes_since_tag = 0;
+}
+
+uint32_t mpeg2_accel (uint32_t accel)
+{
+    if (!mpeg2_accels) {
+	mpeg2_accels = mpeg2_detect_accel (accel) | MPEG2_ACCEL_DETECT;
+	mpeg2_cpu_state_init (mpeg2_accels);
+	mpeg2_idct_init (mpeg2_accels);
+	mpeg2_mc_init (mpeg2_accels);
+    }
+    return mpeg2_accels & ~MPEG2_ACCEL_DETECT;
+}
+
+void mpeg2_reset (mpeg2dec_t * mpeg2dec, int full_reset)
+{
+    mpeg2dec->buf_start = mpeg2dec->buf_end = NULL;
+    mpeg2dec->num_tags = 0;
+    mpeg2dec->shift = 0xffffff00;
+    mpeg2dec->code = 0xb4;
+    mpeg2dec->action = mpeg2_seek_header;
+    mpeg2dec->state = STATE_INVALID;
+    mpeg2dec->first = 1;
+
+    mpeg2_reset_info(&(mpeg2dec->info));
+    mpeg2dec->info.gop = NULL;
+    mpeg2dec->info.user_data = NULL;
+    mpeg2dec->info.user_data_len = 0;
+    if (full_reset) {
+	mpeg2dec->info.sequence = NULL;
+	mpeg2_header_state_init (mpeg2dec);
+    }
+
+}
+
+mpeg2dec_t * mpeg2_init (void)
+{
+    mpeg2dec_t * mpeg2dec;
+
+    mpeg2_accel (MPEG2_ACCEL_DETECT);
+
+    mpeg2dec = (mpeg2dec_t *) mpeg2_malloc (sizeof (mpeg2dec_t),
+					    MPEG2_ALLOC_MPEG2DEC);
+    if (mpeg2dec == NULL)
+	return NULL;
+
+    memset (mpeg2dec->decoder.DCTblock, 0, 64 * sizeof (int16_t));
+    memset (mpeg2dec->quantizer_matrix, 0, 4 * 64 * sizeof (uint8_t));
+
+    mpeg2dec->chunk_buffer = (uint8_t *) mpeg2_malloc (BUFFER_SIZE + 4,
+						       MPEG2_ALLOC_CHUNK);
+
+    mpeg2dec->sequence.width = (unsigned)-1;
+    mpeg2_reset (mpeg2dec, 1);
+
+    return mpeg2dec;
+}
+
+void mpeg2_close (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2_header_state_init (mpeg2dec);
+    mpeg2_free (mpeg2dec->chunk_buffer);
+    mpeg2_free (mpeg2dec);
+}
diff --git a/src/video_dec/libmpeg2new/libmpeg2/header.c b/src/video_dec/libmpeg2new/libmpeg2/header.c
new file mode 100644
index 000000000..935a50aa3
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/header.c
@@ -0,0 +1,961 @@
+/*
+ * header.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 2003      Regis Duchesne <hpreg@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <inttypes.h>
+#include <stdlib.h>	/* defines NULL */
+#include <string.h>	/* memcmp */
+
+#include "../include/mpeg2.h"
+#include "../include/attributes.h"
+#include "mpeg2_internal.h"
+
+#define SEQ_EXT 2
+#define SEQ_DISPLAY_EXT 4
+#define QUANT_MATRIX_EXT 8
+#define COPYRIGHT_EXT 0x10
+#define PIC_DISPLAY_EXT 0x80
+#define PIC_CODING_EXT 0x100
+
+/* default intra quant matrix, in zig-zag order */
+static const uint8_t default_intra_quantizer_matrix[64] ATTR_ALIGN(16) = {
+    8,
+    16, 16,
+    19, 16, 19,
+    22, 22, 22, 22,
+    22, 22, 26, 24, 26,
+    27, 27, 27, 26, 26, 26,
+    26, 27, 27, 27, 29, 29, 29,
+    34, 34, 34, 29, 29, 29, 27, 27,
+    29, 29, 32, 32, 34, 34, 37,
+    38, 37, 35, 35, 34, 35,
+    38, 38, 40, 40, 40,
+    48, 48, 46, 46,
+    56, 56, 58,
+    69, 69,
+    83
+};
+
+uint8_t mpeg2_scan_norm[64] ATTR_ALIGN(16) = {
+    /* Zig-Zag scan pattern */
+     0,  1,  8, 16,  9,  2,  3, 10, 17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+uint8_t mpeg2_scan_alt[64] ATTR_ALIGN(16) = {
+    /* Alternate scan pattern */
+     0, 8,  16, 24,  1,  9,  2, 10, 17, 25, 32, 40, 48, 56, 57, 49,
+    41, 33, 26, 18,  3, 11,  4, 12, 19, 27, 34, 42, 50, 58, 35, 43,
+    51, 59, 20, 28,  5, 13,  6, 14, 21, 29, 36, 44, 52, 60, 37, 45,
+    53, 61, 22, 30,  7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63
+};
+
+void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec)
+{
+    if (mpeg2dec->sequence.width != (unsigned)-1) {
+	int i;
+
+	mpeg2dec->sequence.width = (unsigned)-1;
+	if (!mpeg2dec->custom_fbuf)
+	    for (i = mpeg2dec->alloc_index_user;
+		 i < mpeg2dec->alloc_index; i++) {
+		mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[0]);
+		mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[1]);
+		mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[2]);
+	    }
+	if (mpeg2dec->convert_start)
+	    for (i = 0; i < 3; i++) {
+		mpeg2_free (mpeg2dec->yuv_buf[i][0]);
+		mpeg2_free (mpeg2dec->yuv_buf[i][1]);
+		mpeg2_free (mpeg2dec->yuv_buf[i][2]);
+	    }
+	if (mpeg2dec->decoder.convert_id)
+	    mpeg2_free (mpeg2dec->decoder.convert_id);
+    }
+    mpeg2dec->decoder.coding_type = I_TYPE;
+    mpeg2dec->decoder.convert = NULL;
+    mpeg2dec->decoder.convert_id = NULL;
+    mpeg2dec->picture = mpeg2dec->pictures;
+    mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf;
+    mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf;
+    mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf;
+    mpeg2dec->first = 1;
+    mpeg2dec->alloc_index = 0;
+    mpeg2dec->alloc_index_user = 0;
+    mpeg2dec->first_decode_slice = 1;
+    mpeg2dec->nb_decode_slices = 0xb0 - 1;
+    mpeg2dec->convert = NULL;
+    mpeg2dec->convert_start = NULL;
+    mpeg2dec->custom_fbuf = 0;
+    mpeg2dec->yuv_index = 0;
+}
+
+void mpeg2_reset_info (mpeg2_info_t * info)
+{
+    info->current_picture = info->current_picture_2nd = NULL;
+    info->display_picture = info->display_picture_2nd = NULL;
+    info->current_fbuf = info->display_fbuf = info->discard_fbuf = NULL;
+}
+
+static void info_user_data (mpeg2dec_t * mpeg2dec)
+{
+    if (mpeg2dec->user_data_len) {
+	mpeg2dec->info.user_data = mpeg2dec->chunk_buffer;
+	mpeg2dec->info.user_data_len = mpeg2dec->user_data_len - 3;
+    }
+}
+
+int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
+    static unsigned int frame_period[16] = {
+	0, 1126125, 1125000, 1080000, 900900, 900000, 540000, 450450, 450000,
+	/* unofficial: xing 15 fps */
+	1800000,
+	/* unofficial: libmpeg3 "Unofficial economy rates" 5/10/12/15 fps */
+	5400000, 2700000, 2250000, 1800000, 0, 0
+    };
+    int i;
+
+    if ((buffer[6] & 0x20) != 0x20)	/* missing marker_bit */
+	return 1;
+
+    i = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2];
+    if (! (sequence->display_width = sequence->picture_width = i >> 12))
+	return 1;
+    if (! (sequence->display_height = sequence->picture_height = i & 0xfff))
+	return 1;
+    sequence->width = (sequence->picture_width + 15) & ~15;
+    sequence->height = (sequence->picture_height + 15) & ~15;
+    sequence->chroma_width = sequence->width >> 1;
+    sequence->chroma_height = sequence->height >> 1;
+
+    sequence->flags = (SEQ_FLAG_PROGRESSIVE_SEQUENCE |
+		       SEQ_VIDEO_FORMAT_UNSPECIFIED);
+
+    sequence->pixel_width = buffer[3] >> 4;	/* aspect ratio */
+    sequence->frame_period = frame_period[buffer[3] & 15];
+
+    sequence->byte_rate = (buffer[4]<<10) | (buffer[5]<<2) | (buffer[6]>>6);
+
+    sequence->vbv_buffer_size = ((buffer[6]<<16)|(buffer[7]<<8))&0x1ff800;
+
+    if (buffer[7] & 4)
+	sequence->flags |= SEQ_FLAG_CONSTRAINED_PARAMETERS;
+
+    mpeg2dec->copy_matrix = 3;
+    if (buffer[7] & 2) {
+	for (i = 0; i < 64; i++)
+	    mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] =
+		(buffer[i+7] << 7) | (buffer[i+8] >> 1);
+	buffer += 64;
+    } else
+	for (i = 0; i < 64; i++)
+	    mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] =
+		default_intra_quantizer_matrix[i];
+
+    if (buffer[7] & 1)
+	for (i = 0; i < 64; i++)
+	    mpeg2dec->new_quantizer_matrix[1][mpeg2_scan_norm[i]] =
+		buffer[i+8];
+    else
+	memset (mpeg2dec->new_quantizer_matrix[1], 16, 64);
+
+    sequence->profile_level_id = 0x80;
+    sequence->colour_primaries = 0;
+    sequence->transfer_characteristics = 0;
+    sequence->matrix_coefficients = 0;
+
+    mpeg2dec->ext_state = SEQ_EXT;
+    mpeg2dec->state = STATE_SEQUENCE;
+    mpeg2dec->display_offset_x = mpeg2dec->display_offset_y = 0;
+
+    return 0;
+}
+
+static int sequence_ext (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
+    uint32_t flags;
+
+    if (!(buffer[3] & 1))
+	return 1;
+
+    sequence->profile_level_id = (buffer[0] << 4) | (buffer[1] >> 4);
+
+    sequence->display_width = sequence->picture_width +=
+	((buffer[1] << 13) | (buffer[2] << 5)) & 0x3000;
+    sequence->display_height = sequence->picture_height +=
+	(buffer[2] << 7) & 0x3000;
+    sequence->width = (sequence->picture_width + 15) & ~15;
+    sequence->height = (sequence->picture_height + 15) & ~15;
+    flags = sequence->flags | SEQ_FLAG_MPEG2;
+    if (!(buffer[1] & 8)) {
+	flags &= ~SEQ_FLAG_PROGRESSIVE_SEQUENCE;
+	sequence->height = (sequence->height + 31) & ~31;
+    }
+    if (buffer[5] & 0x80)
+	flags |= SEQ_FLAG_LOW_DELAY;
+    sequence->flags = flags;
+    sequence->chroma_width = sequence->width;
+    sequence->chroma_height = sequence->height;
+    switch (buffer[1] & 6) {
+    case 0:	/* invalid */
+	return 1;
+    case 2:	/* 4:2:0 */
+	sequence->chroma_height >>= 1;
+    case 4:	/* 4:2:2 */
+	sequence->chroma_width >>= 1;
+    }
+
+    sequence->byte_rate += ((buffer[2]<<25) | (buffer[3]<<17)) & 0x3ffc0000;
+
+    sequence->vbv_buffer_size |= buffer[4] << 21;
+
+    sequence->frame_period =
+	sequence->frame_period * ((buffer[5]&31)+1) / (((buffer[5]>>2)&3)+1);
+
+    mpeg2dec->ext_state = SEQ_DISPLAY_EXT;
+
+    return 0;
+}
+
+static int sequence_display_ext (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
+
+    sequence->flags = ((sequence->flags & ~SEQ_MASK_VIDEO_FORMAT) |
+		       ((buffer[0]<<4) & SEQ_MASK_VIDEO_FORMAT));
+    if (buffer[0] & 1) {
+	sequence->flags |= SEQ_FLAG_COLOUR_DESCRIPTION;
+	sequence->colour_primaries = buffer[1];
+	sequence->transfer_characteristics = buffer[2];
+	sequence->matrix_coefficients = buffer[3];
+	buffer += 3;
+    }
+
+    if (!(buffer[2] & 2))	/* missing marker_bit */
+	return 1;
+
+    sequence->display_width = (buffer[1] << 6) | (buffer[2] >> 2);
+    sequence->display_height =
+	((buffer[2]& 1 ) << 13) | (buffer[3] << 5) | (buffer[4] >> 3);
+
+    return 0;
+}
+
+static inline void simplify (unsigned int * u, unsigned int * v)
+{
+    unsigned int a, b, tmp;
+
+    a = *u;	b = *v;
+    while (a) {	/* find greatest common divisor */
+	tmp = a;	a = b % tmp;	b = tmp;
+    }
+    *u /= b;	*v /= b;
+}
+
+static inline void finalize_sequence (mpeg2_sequence_t * sequence)
+{
+    int width;
+    int height;
+
+    sequence->byte_rate *= 50;
+
+    if (sequence->flags & SEQ_FLAG_MPEG2) {
+	switch (sequence->pixel_width) {
+	case 1:		/* square pixels */
+	    sequence->pixel_width = sequence->pixel_height = 1;	return;
+	case 2:		/* 4:3 aspect ratio */
+	    width = 4; height = 3;	break;
+	case 3:		/* 16:9 aspect ratio */
+	    width = 16; height = 9;	break;
+	case 4:		/* 2.21:1 aspect ratio */
+	    width = 221; height = 100;	break;
+	default:	/* illegal */
+	    sequence->pixel_width = sequence->pixel_height = 0;	return;
+	}
+	width *= sequence->display_height;
+	height *= sequence->display_width;
+
+    } else {
+	if (sequence->byte_rate == 50 * 0x3ffff) 
+	    sequence->byte_rate = 0;        /* mpeg-1 VBR */ 
+
+	switch (sequence->pixel_width) {
+	case 0:	case 15:	/* illegal */
+	    sequence->pixel_width = sequence->pixel_height = 0;		return;
+	case 1:	/* square pixels */
+	    sequence->pixel_width = sequence->pixel_height = 1;		return;
+	case 3:	/* 720x576 16:9 */
+	    sequence->pixel_width = 64;	sequence->pixel_height = 45;	return;
+	case 6:	/* 720x480 16:9 */
+	    sequence->pixel_width = 32;	sequence->pixel_height = 27;	return;
+	case 8: /* BT.601 625 lines 4:3 */
+	    sequence->pixel_width = 59;	sequence->pixel_height = 54;	return;
+	case 12: /* BT.601 525 lines 4:3 */
+	    sequence->pixel_width = 10;	sequence->pixel_height = 11;	return;
+	default:
+	    height = 88 * sequence->pixel_width + 1171;
+	    width = 2000;
+	}
+    }
+
+    sequence->pixel_width = width;
+    sequence->pixel_height = height;
+    simplify (&sequence->pixel_width, &sequence->pixel_height);
+}
+
+int mpeg2_guess_aspect (const mpeg2_sequence_t * sequence,
+			unsigned int * pixel_width,
+			unsigned int * pixel_height)
+{
+    static struct {
+	unsigned int width, height;
+    } video_modes[] = {
+	{720, 576}, /* 625 lines, 13.5 MHz (D1, DV, DVB, DVD) */
+	{704, 576}, /* 625 lines, 13.5 MHz (1/1 D1, DVB, DVD, 4CIF) */
+	{544, 576}, /* 625 lines, 10.125 MHz (DVB, laserdisc) */
+	{528, 576}, /* 625 lines, 10.125 MHz (3/4 D1, DVB, laserdisc) */
+	{480, 576}, /* 625 lines, 9 MHz (2/3 D1, DVB, SVCD) */
+	{352, 576}, /* 625 lines, 6.75 MHz (D2, 1/2 D1, CVD, DVB, DVD) */
+	{352, 288}, /* 625 lines, 6.75 MHz, 1 field (D4, VCD, DVB, DVD, CIF) */
+	{176, 144}, /* 625 lines, 3.375 MHz, half field (QCIF) */
+	{720, 486}, /* 525 lines, 13.5 MHz (D1) */
+	{704, 486}, /* 525 lines, 13.5 MHz */
+	{720, 480}, /* 525 lines, 13.5 MHz (DV, DSS, DVD) */
+	{704, 480}, /* 525 lines, 13.5 MHz (1/1 D1, ATSC, DVD) */
+	{544, 480}, /* 525 lines. 10.125 MHz (DSS, laserdisc) */
+	{528, 480}, /* 525 lines. 10.125 MHz (3/4 D1, laserdisc) */
+	{480, 480}, /* 525 lines, 9 MHz (2/3 D1, SVCD) */
+	{352, 480}, /* 525 lines, 6.75 MHz (D2, 1/2 D1, CVD, DVD) */
+	{352, 240}  /* 525  lines. 6.75 MHz, 1 field (D4, VCD, DSS, DVD) */
+    };
+    unsigned int width, height, pix_width, pix_height, i, DAR_16_9;
+
+    *pixel_width = sequence->pixel_width;
+    *pixel_height = sequence->pixel_height;
+    width = sequence->picture_width;
+    height = sequence->picture_height;
+    for (i = 0; i < sizeof (video_modes) / sizeof (video_modes[0]); i++)
+	if (width == video_modes[i].width && height == video_modes[i].height)
+	    break;
+    if (i == sizeof (video_modes) / sizeof (video_modes[0]) ||
+	(sequence->pixel_width == 1 && sequence->pixel_height == 1) ||
+	width != sequence->display_width || height != sequence->display_height)
+	return 0;
+
+    for (pix_height = 1; height * pix_height < 480; pix_height <<= 1);
+    height *= pix_height;
+    for (pix_width = 1; width * pix_width <= 352; pix_width <<= 1);
+    width *= pix_width;
+
+    if (! (sequence->flags & SEQ_FLAG_MPEG2)) {
+	static unsigned int mpeg1_check[2][2] = {{11, 54}, {27, 45}};
+	DAR_16_9 = (sequence->pixel_height == 27 ||
+		    sequence->pixel_height == 45);
+	if (width < 704 ||
+	    sequence->pixel_height != mpeg1_check[DAR_16_9][height == 576])
+	    return 0;
+    } else {
+	DAR_16_9 = (3 * sequence->picture_width * sequence->pixel_width >
+		    4 * sequence->picture_height * sequence->pixel_height);
+	switch (width) {
+	case 528: case 544:	pix_width *= 4; pix_height *= 3; break;
+	case 480:		pix_width *= 3; pix_height *= 2; break;
+	}
+    }
+    if (DAR_16_9) {
+	pix_width *= 4; pix_height *= 3;
+    }
+    if (height == 576) {
+	pix_width *= 59; pix_height *= 54;
+    } else {
+	pix_width *= 10; pix_height *= 11;
+    }
+    *pixel_width = pix_width;
+    *pixel_height = pix_height;
+    simplify (pixel_width, pixel_height);
+    return (height == 576) ? 1 : 2;
+}
+
+static void copy_matrix (mpeg2dec_t * mpeg2dec, int index)
+{
+    if (memcmp (mpeg2dec->quantizer_matrix[index],
+		mpeg2dec->new_quantizer_matrix[index], 64)) {
+	memcpy (mpeg2dec->quantizer_matrix[index],
+		mpeg2dec->new_quantizer_matrix[index], 64);
+	mpeg2dec->scaled[index] = -1;
+    }
+}
+
+static void finalize_matrix (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+    int i;
+
+    for (i = 0; i < 2; i++) {
+	if (mpeg2dec->copy_matrix & (1 << i))
+	    copy_matrix (mpeg2dec, i);
+	if ((mpeg2dec->copy_matrix & (4 << i)) &&
+	    memcmp (mpeg2dec->quantizer_matrix[i],
+		    mpeg2dec->new_quantizer_matrix[i+2], 64)) {
+	    copy_matrix (mpeg2dec, i + 2);
+	    decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i+2];
+	} else if (mpeg2dec->copy_matrix & (5 << i))
+	    decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i];
+    }
+}
+
+static mpeg2_state_t invalid_end_action (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2_reset_info (&(mpeg2dec->info));
+    mpeg2dec->info.gop = NULL;
+    info_user_data (mpeg2dec);
+    mpeg2_header_state_init (mpeg2dec);
+    mpeg2dec->sequence = mpeg2dec->new_sequence;
+    mpeg2dec->action = mpeg2_seek_header;
+    mpeg2dec->state = STATE_SEQUENCE;
+    return STATE_SEQUENCE;
+}
+
+void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
+    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+
+    finalize_sequence (sequence);
+    finalize_matrix (mpeg2dec);
+
+    decoder->mpeg1 = !(sequence->flags & SEQ_FLAG_MPEG2);
+    decoder->width = sequence->width;
+    decoder->height = sequence->height;
+    decoder->vertical_position_extension = (sequence->picture_height > 2800);
+    decoder->chroma_format = ((sequence->chroma_width == sequence->width) +
+			      (sequence->chroma_height == sequence->height));
+
+    if (mpeg2dec->sequence.width != (unsigned)-1) {
+	/*
+	 * According to 6.1.1.6, repeat sequence headers should be
+	 * identical to the original. However some encoders dont
+	 * respect that and change various fields (including bitrate
+	 * and aspect ratio) in the repeat sequence headers. So we
+	 * choose to be as conservative as possible and only restart
+	 * the decoder if the width, height, chroma_width,
+	 * chroma_height or low_delay flag are modified.
+	 */
+	if (sequence->width != mpeg2dec->sequence.width ||
+	    sequence->height != mpeg2dec->sequence.height ||
+	    sequence->chroma_width != mpeg2dec->sequence.chroma_width ||
+	    sequence->chroma_height != mpeg2dec->sequence.chroma_height ||
+	    ((sequence->flags ^ mpeg2dec->sequence.flags) &
+	     SEQ_FLAG_LOW_DELAY)) {
+	    decoder->stride_frame = sequence->width;
+	    mpeg2_header_end (mpeg2dec);
+	    mpeg2dec->action = invalid_end_action;
+	    mpeg2dec->state = STATE_INVALID_END;
+	    return;
+	}
+	mpeg2dec->state = (memcmp (&(mpeg2dec->sequence), sequence,
+				   sizeof (mpeg2_sequence_t)) ?
+			   STATE_SEQUENCE_MODIFIED : STATE_SEQUENCE_REPEATED);
+    } else
+	decoder->stride_frame = sequence->width;
+    mpeg2dec->sequence = *sequence;
+    mpeg2_reset_info (&(mpeg2dec->info));
+    mpeg2dec->info.sequence = &(mpeg2dec->sequence);
+    mpeg2dec->info.gop = NULL;
+    info_user_data (mpeg2dec);
+}
+
+int mpeg2_header_gop (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    mpeg2_gop_t * gop = &(mpeg2dec->new_gop);
+
+    if (! (buffer[1] & 8))
+	return 1;
+    gop->hours = (buffer[0] >> 2) & 31;
+    gop->minutes = ((buffer[0] << 4) | (buffer[1] >> 4)) & 63;
+    gop->seconds = ((buffer[1] << 3) | (buffer[2] >> 5)) & 63;
+    gop->pictures = ((buffer[2] << 1) | (buffer[3] >> 7)) & 63;
+    gop->flags = (buffer[0] >> 7) | ((buffer[3] >> 4) & 6);
+    mpeg2dec->state = STATE_GOP;
+    return 0;
+}
+
+void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2dec->gop = mpeg2dec->new_gop;
+    mpeg2_reset_info (&(mpeg2dec->info));
+    mpeg2dec->info.gop = &(mpeg2dec->gop);
+    info_user_data (mpeg2dec);
+}
+
+void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type)
+{
+    int i;
+
+    for (i = 0; i < 3; i++)
+	if (mpeg2dec->fbuf[1] != &mpeg2dec->fbuf_alloc[i].fbuf &&
+	    mpeg2dec->fbuf[2] != &mpeg2dec->fbuf_alloc[i].fbuf) {
+	    mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[i].fbuf;
+	    mpeg2dec->info.current_fbuf = mpeg2dec->fbuf[0];
+	    if (b_type || (mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) {
+		if (b_type || mpeg2dec->convert)
+		    mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0];
+		mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0];
+	    }
+	    break;
+	}
+}
+
+int mpeg2_header_picture (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    mpeg2_picture_t * picture = &(mpeg2dec->new_picture);
+    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+    int type;
+
+    mpeg2dec->state = ((mpeg2dec->state != STATE_SLICE_1ST) ?
+		       STATE_PICTURE : STATE_PICTURE_2ND);
+    mpeg2dec->ext_state = PIC_CODING_EXT;
+
+    picture->temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6);
+
+    type = (buffer [1] >> 3) & 7;
+    if (type == PIC_FLAG_CODING_TYPE_P || type == PIC_FLAG_CODING_TYPE_B) {
+	/* forward_f_code and backward_f_code - used in mpeg1 only */
+	decoder->f_motion.f_code[1] = (buffer[3] >> 2) & 1;
+	decoder->f_motion.f_code[0] =
+	    (((buffer[3] << 1) | (buffer[4] >> 7)) & 7) - 1;
+	decoder->b_motion.f_code[1] = (buffer[4] >> 6) & 1;
+	decoder->b_motion.f_code[0] = ((buffer[4] >> 3) & 7) - 1;
+    }
+
+    picture->flags = PIC_FLAG_PROGRESSIVE_FRAME | type;
+    picture->tag = picture->tag2 = 0;
+    if (mpeg2dec->num_tags) {
+	if (mpeg2dec->bytes_since_tag >= mpeg2dec->chunk_ptr - buffer + 4) {
+	    mpeg2dec->num_tags = 0;
+	    picture->tag = mpeg2dec->tag_current;
+	    picture->tag2 = mpeg2dec->tag2_current;
+	    picture->flags |= PIC_FLAG_TAGS;
+	} else if (mpeg2dec->num_tags > 1) {
+	    mpeg2dec->num_tags = 1;
+	    picture->tag = mpeg2dec->tag_previous;
+	    picture->tag2 = mpeg2dec->tag2_previous;
+	    picture->flags |= PIC_FLAG_TAGS;
+	}
+    }
+    picture->nb_fields = 2;
+    picture->display_offset[0].x = picture->display_offset[1].x =
+	picture->display_offset[2].x = mpeg2dec->display_offset_x;
+    picture->display_offset[0].y = picture->display_offset[1].y =
+	picture->display_offset[2].y = mpeg2dec->display_offset_y;
+
+    /* XXXXXX decode extra_information_picture as well */
+
+    mpeg2dec->q_scale_type = 0;
+    decoder->intra_dc_precision = 7;
+    decoder->frame_pred_frame_dct = 1;
+    decoder->concealment_motion_vectors = 0;
+    decoder->scan = mpeg2_scan_norm;
+    decoder->picture_structure = FRAME_PICTURE;
+    mpeg2dec->copy_matrix = 0;
+
+    return 0;
+}
+
+static int picture_coding_ext (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    mpeg2_picture_t * picture = &(mpeg2dec->new_picture);
+    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+    uint32_t flags;
+
+    /* pre subtract 1 for use later in compute_motion_vector */
+    decoder->f_motion.f_code[0] = (buffer[0] & 15) - 1;
+    decoder->f_motion.f_code[1] = (buffer[1] >> 4) - 1;
+    decoder->b_motion.f_code[0] = (buffer[1] & 15) - 1;
+    decoder->b_motion.f_code[1] = (buffer[2] >> 4) - 1;
+
+    flags = picture->flags;
+    decoder->intra_dc_precision = 7 - ((buffer[2] >> 2) & 3);
+    decoder->picture_structure = buffer[2] & 3;
+    switch (decoder->picture_structure) {
+    case TOP_FIELD:
+	flags |= PIC_FLAG_TOP_FIELD_FIRST;
+    case BOTTOM_FIELD:
+	picture->nb_fields = 1;
+	break;
+    case FRAME_PICTURE:
+	if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) {
+	    picture->nb_fields = (buffer[3] & 2) ? 3 : 2;
+	    flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0;
+	} else
+	    picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2;
+	break;
+    default:
+	return 1;
+    }
+    decoder->top_field_first = buffer[3] >> 7;
+    decoder->frame_pred_frame_dct = (buffer[3] >> 6) & 1;
+    decoder->concealment_motion_vectors = (buffer[3] >> 5) & 1;
+    mpeg2dec->q_scale_type = buffer[3] & 16;
+    decoder->intra_vlc_format = (buffer[3] >> 3) & 1;
+    decoder->scan = (buffer[3] & 4) ? mpeg2_scan_alt : mpeg2_scan_norm;
+    if (!(buffer[4] & 0x80))
+	flags &= ~PIC_FLAG_PROGRESSIVE_FRAME;
+    if (buffer[4] & 0x40)
+	flags |= (((buffer[4]<<26) | (buffer[5]<<18) | (buffer[6]<<10)) &
+		  PIC_MASK_COMPOSITE_DISPLAY) | PIC_FLAG_COMPOSITE_DISPLAY;
+    picture->flags = flags;
+
+    mpeg2dec->ext_state = PIC_DISPLAY_EXT | COPYRIGHT_EXT | QUANT_MATRIX_EXT;
+
+    return 0;
+}
+
+static int picture_display_ext (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    mpeg2_picture_t * picture = &(mpeg2dec->new_picture);
+    int i, nb_pos;
+
+    nb_pos = picture->nb_fields;
+    if (mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)
+	nb_pos >>= 1;
+
+    for (i = 0; i < nb_pos; i++) {
+	int x, y;
+
+	x = ((buffer[4*i] << 24) | (buffer[4*i+1] << 16) |
+	     (buffer[4*i+2] << 8) | buffer[4*i+3]) >> (11-2*i);
+	y = ((buffer[4*i+2] << 24) | (buffer[4*i+3] << 16) |
+	     (buffer[4*i+4] << 8) | buffer[4*i+5]) >> (10-2*i);
+	if (! (x & y & 1))
+	    return 1;
+	picture->display_offset[i].x = mpeg2dec->display_offset_x = x >> 1;
+	picture->display_offset[i].y = mpeg2dec->display_offset_y = y >> 1;
+    }
+    for (; i < 3; i++) {
+	picture->display_offset[i].x = mpeg2dec->display_offset_x;
+	picture->display_offset[i].y = mpeg2dec->display_offset_y;
+    }
+    return 0;
+}
+
+void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels)
+{
+    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+    int old_type_b = (decoder->coding_type == B_TYPE);
+    int low_delay = mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY;
+
+    finalize_matrix (mpeg2dec);
+    decoder->coding_type = mpeg2dec->new_picture.flags & PIC_MASK_CODING_TYPE;
+
+    if (mpeg2dec->state == STATE_PICTURE) {
+	mpeg2_picture_t * picture;
+	mpeg2_picture_t * other;
+
+	decoder->second_field = 0;
+
+	picture = other = mpeg2dec->pictures;
+	if (old_type_b ^ (mpeg2dec->picture < mpeg2dec->pictures + 2))
+	    picture += 2;
+	else
+	    other += 2;
+	mpeg2dec->picture = picture;
+	*picture = mpeg2dec->new_picture;
+
+	if (!old_type_b) {
+	    mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1];
+	    mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0];
+	}
+	mpeg2dec->fbuf[0] = NULL;
+	mpeg2_reset_info (&(mpeg2dec->info));
+	mpeg2dec->info.current_picture = picture;
+	mpeg2dec->info.display_picture = picture;
+	if (decoder->coding_type != B_TYPE) {
+	    if (!low_delay) {
+		if (mpeg2dec->first) {
+		    mpeg2dec->info.display_picture = NULL;
+		    mpeg2dec->first = 0;
+		} else {
+		    mpeg2dec->info.display_picture = other;
+		    if (other->nb_fields == 1)
+			mpeg2dec->info.display_picture_2nd = other + 1;
+		    mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1];
+		}
+	    }
+	    if (!low_delay + !mpeg2dec->convert)
+		mpeg2dec->info.discard_fbuf =
+		    mpeg2dec->fbuf[!low_delay + !mpeg2dec->convert];
+	}
+	if (mpeg2dec->convert) {
+	    mpeg2_convert_init_t convert_init;
+	    if (!mpeg2dec->convert_start) {
+		int y_size, uv_size;
+
+		mpeg2dec->decoder.convert_id =
+		    mpeg2_malloc (mpeg2dec->convert_id_size,
+				  MPEG2_ALLOC_CONVERT_ID);
+		mpeg2dec->convert (MPEG2_CONVERT_START,
+				   mpeg2dec->decoder.convert_id,
+				   &(mpeg2dec->sequence),
+				   mpeg2dec->convert_stride, accels,
+				   mpeg2dec->convert_arg, &convert_init);
+		mpeg2dec->convert_start = convert_init.start;
+		mpeg2dec->decoder.convert = convert_init.copy;
+
+		y_size = decoder->stride_frame * mpeg2dec->sequence.height;
+		uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format);
+		mpeg2dec->yuv_buf[0][0] =
+		    (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV);
+		mpeg2dec->yuv_buf[0][1] =
+		    (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
+		mpeg2dec->yuv_buf[0][2] =
+		    (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
+		mpeg2dec->yuv_buf[1][0] =
+		    (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV);
+		mpeg2dec->yuv_buf[1][1] =
+		    (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
+		mpeg2dec->yuv_buf[1][2] =
+		    (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
+		y_size = decoder->stride_frame * 32;
+		uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format);
+		mpeg2dec->yuv_buf[2][0] =
+		    (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV);
+		mpeg2dec->yuv_buf[2][1] =
+		    (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
+		mpeg2dec->yuv_buf[2][2] =
+		    (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
+	    }
+	    if (!mpeg2dec->custom_fbuf) {
+		while (mpeg2dec->alloc_index < 3) {
+		    mpeg2_fbuf_t * fbuf;
+
+		    fbuf = &mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf;
+		    fbuf->id = NULL;
+		    fbuf->buf[0] =
+			(uint8_t *) mpeg2_malloc (convert_init.buf_size[0],
+						  MPEG2_ALLOC_CONVERTED);
+		    fbuf->buf[1] =
+			(uint8_t *) mpeg2_malloc (convert_init.buf_size[1],
+						  MPEG2_ALLOC_CONVERTED);
+		    fbuf->buf[2] =
+			(uint8_t *) mpeg2_malloc (convert_init.buf_size[2],
+						  MPEG2_ALLOC_CONVERTED);
+		}
+		mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE));
+	    }
+	} else if (!mpeg2dec->custom_fbuf) {
+	    while (mpeg2dec->alloc_index < 3) {
+		mpeg2_fbuf_t * fbuf;
+		int y_size, uv_size;
+
+		fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf);
+		fbuf->id = NULL;
+		y_size = decoder->stride_frame * mpeg2dec->sequence.height;
+		uv_size = y_size >> (2 - decoder->chroma_format);
+		fbuf->buf[0] = (uint8_t *) mpeg2_malloc (y_size,
+							 MPEG2_ALLOC_YUV);
+		fbuf->buf[1] = (uint8_t *) mpeg2_malloc (uv_size,
+							 MPEG2_ALLOC_YUV);
+		fbuf->buf[2] = (uint8_t *) mpeg2_malloc (uv_size,
+							 MPEG2_ALLOC_YUV);
+	    }
+	    mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE));
+	}
+    } else {
+	decoder->second_field = 1;
+	mpeg2dec->picture++;	/* second field picture */
+	*(mpeg2dec->picture) = mpeg2dec->new_picture;
+	mpeg2dec->info.current_picture_2nd = mpeg2dec->picture;
+	if (low_delay || decoder->coding_type == B_TYPE)
+	    mpeg2dec->info.display_picture_2nd = mpeg2dec->picture;
+    }
+
+    info_user_data (mpeg2dec);
+}
+
+static int copyright_ext (mpeg2dec_t * mpeg2dec)
+{
+    return 0;
+}
+
+static int quant_matrix_ext (mpeg2dec_t * mpeg2dec)
+{
+    uint8_t * buffer = mpeg2dec->chunk_start;
+    int i, j;
+
+    for (i = 0; i < 4; i++)
+	if (buffer[0] & (8 >> i)) {
+	    for (j = 0; j < 64; j++)
+		mpeg2dec->new_quantizer_matrix[i][mpeg2_scan_norm[j]] =
+		    (buffer[j] << (i+5)) | (buffer[j+1] >> (3-i));
+	    mpeg2dec->copy_matrix |= 1 << i;
+	    buffer += 64;
+	}
+
+    return 0;
+}
+
+int mpeg2_header_extension (mpeg2dec_t * mpeg2dec)
+{
+    static int (* parser[]) (mpeg2dec_t *) = {
+	0, sequence_ext, sequence_display_ext, quant_matrix_ext,
+	copyright_ext, 0, 0, picture_display_ext, picture_coding_ext
+    };
+    int ext, ext_bit;
+
+    ext = mpeg2dec->chunk_start[0] >> 4;
+    ext_bit = 1 << ext;
+
+    if (!(mpeg2dec->ext_state & ext_bit))
+	return 0;	/* ignore illegal extensions */
+    mpeg2dec->ext_state &= ~ext_bit;
+    return parser[ext] (mpeg2dec);
+}
+
+int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2dec->user_data_len += mpeg2dec->chunk_ptr - 1 - mpeg2dec->chunk_start;
+    mpeg2dec->chunk_start = mpeg2dec->chunk_ptr - 1;
+    
+    return 0;
+}
+
+static void prescale (mpeg2dec_t * mpeg2dec, int index)
+{
+    static int non_linear_scale [] = {
+	 0,  1,  2,  3,  4,  5,   6,   7,
+	 8, 10, 12, 14, 16, 18,  20,  22,
+	24, 28, 32, 36, 40, 44,  48,  52,
+	56, 64, 72, 80, 88, 96, 104, 112
+    };
+    int i, j, k;
+    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+
+    if (mpeg2dec->scaled[index] != mpeg2dec->q_scale_type) {
+	mpeg2dec->scaled[index] = mpeg2dec->q_scale_type;
+	for (i = 0; i < 32; i++) {
+	    k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1);
+	    for (j = 0; j < 64; j++)
+		decoder->quantizer_prescale[index][i][j] =
+		    k * mpeg2dec->quantizer_matrix[index][j];
+	}
+    }
+}
+
+mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+
+    mpeg2dec->info.user_data = NULL;	mpeg2dec->info.user_data_len = 0;
+    mpeg2dec->state = ((mpeg2dec->picture->nb_fields > 1 ||
+			mpeg2dec->state == STATE_PICTURE_2ND) ?
+		       STATE_SLICE : STATE_SLICE_1ST);
+
+    if (mpeg2dec->decoder.coding_type != D_TYPE) {
+	prescale (mpeg2dec, 0);
+	if (decoder->chroma_quantizer[0] == decoder->quantizer_prescale[2])
+	    prescale (mpeg2dec, 2);
+	if (mpeg2dec->decoder.coding_type != I_TYPE) {
+	    prescale (mpeg2dec, 1);
+	    if (decoder->chroma_quantizer[1] == decoder->quantizer_prescale[3])
+		prescale (mpeg2dec, 3);
+	}
+    }
+
+    if (!(mpeg2dec->nb_decode_slices))
+	mpeg2dec->picture->flags |= PIC_FLAG_SKIP;
+    else if (mpeg2dec->convert_start) {
+	mpeg2dec->convert_start (decoder->convert_id, mpeg2dec->fbuf[0],
+				 mpeg2dec->picture, mpeg2dec->info.gop);
+
+	if (mpeg2dec->decoder.coding_type == B_TYPE)
+	    mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->yuv_buf[2],
+			     mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1],
+			     mpeg2dec->yuv_buf[mpeg2dec->yuv_index]);
+	else {
+	    mpeg2_init_fbuf (&(mpeg2dec->decoder),
+			     mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1],
+			     mpeg2dec->yuv_buf[mpeg2dec->yuv_index],
+			     mpeg2dec->yuv_buf[mpeg2dec->yuv_index]);
+	    if (mpeg2dec->state == STATE_SLICE)
+		mpeg2dec->yuv_index ^= 1;
+	}
+    } else {
+	int b_type;
+
+	b_type = (mpeg2dec->decoder.coding_type == B_TYPE);
+	mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->fbuf[0]->buf,
+			 mpeg2dec->fbuf[b_type + 1]->buf,
+			 mpeg2dec->fbuf[b_type]->buf);
+    }
+    mpeg2dec->action = NULL;
+    return STATE_INTERNAL_NORETURN;
+}
+
+static mpeg2_state_t seek_sequence (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2_reset_info (&(mpeg2dec->info));
+    mpeg2dec->info.sequence = NULL;
+    mpeg2dec->info.gop = NULL;
+    mpeg2_header_state_init (mpeg2dec);
+    mpeg2dec->action = mpeg2_seek_header;
+    return mpeg2_seek_header (mpeg2dec);
+}
+
+mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec)
+{
+    mpeg2_picture_t * picture;
+    int b_type;
+
+    b_type = (mpeg2dec->decoder.coding_type == B_TYPE);
+    picture = mpeg2dec->pictures;
+    if ((mpeg2dec->picture >= picture + 2) ^ b_type)
+	picture = mpeg2dec->pictures + 2;
+
+    mpeg2_reset_info (&(mpeg2dec->info));
+    if (!(mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) {
+	mpeg2dec->info.display_picture = picture;
+	if (picture->nb_fields == 1)
+	    mpeg2dec->info.display_picture_2nd = picture + 1;
+	mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[b_type];
+	if (!mpeg2dec->convert)
+	    mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type + 1];
+    } else if (!mpeg2dec->convert)
+	mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type];
+    mpeg2dec->action = seek_sequence;
+    return STATE_END;
+}
diff --git a/src/video_dec/libmpeg2new/libmpeg2/idct.c b/src/video_dec/libmpeg2new/libmpeg2/idct.c
new file mode 100644
index 000000000..8b982bb33
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/idct.c
@@ -0,0 +1,287 @@
+/*
+ * idct.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "../include/mpeg2.h"
+#include "../include/attributes.h"
+#include "mpeg2_internal.h"
+
+#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */
+#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */
+#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */
+#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */
+#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */
+#define W7 565  /* 2048 * sqrt (2) * cos (7 * pi / 16) */
+
+/* idct main entry point  */
+void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
+void (* mpeg2_idct_add) (int last, int16_t * block,
+			 uint8_t * dest, int stride);
+
+/*
+ * In legal streams, the IDCT output should be between -384 and +384.
+ * In corrupted streams, it is possible to force the IDCT output to go
+ * to +-3826 - this is the worst case for a column IDCT where the
+ * column inputs are 16-bit values.
+ */
+uint8_t mpeg2_clip[3840 * 2 + 256];
+#define CLIP(i) ((mpeg2_clip + 3840)[i])
+
+#if 0
+#define BUTTERFLY(t0,t1,W0,W1,d0,d1)	\
+do {					\
+    t0 = W0 * d0 + W1 * d1;		\
+    t1 = W0 * d1 - W1 * d0;		\
+} while (0)
+#else
+#define BUTTERFLY(t0,t1,W0,W1,d0,d1)	\
+do {					\
+    int tmp = W0 * (d0 + d1);		\
+    t0 = tmp + (W1 - W0) * d1;		\
+    t1 = tmp - (W1 + W0) * d0;		\
+} while (0)
+#endif
+
+static void inline idct_row (int16_t * const block)
+{
+    int d0, d1, d2, d3;
+    int a0, a1, a2, a3, b0, b1, b2, b3;
+    int t0, t1, t2, t3;
+
+    /* shortcut */
+    if (likely (!(block[1] | ((int32_t *)block)[1] | ((int32_t *)block)[2] |
+		  ((int32_t *)block)[3]))) {
+	uint32_t tmp = (uint16_t) (block[0] >> 1);
+	tmp |= tmp << 16;
+	((int32_t *)block)[0] = tmp;
+	((int32_t *)block)[1] = tmp;
+	((int32_t *)block)[2] = tmp;
+	((int32_t *)block)[3] = tmp;
+	return;
+    }
+
+    d0 = (block[0] << 11) + 2048;
+    d1 = block[1];
+    d2 = block[2] << 11;
+    d3 = block[3];
+    t0 = d0 + d2;
+    t1 = d0 - d2;
+    BUTTERFLY (t2, t3, W6, W2, d3, d1);
+    a0 = t0 + t2;
+    a1 = t1 + t3;
+    a2 = t1 - t3;
+    a3 = t0 - t2;
+
+    d0 = block[4];
+    d1 = block[5];
+    d2 = block[6];
+    d3 = block[7];
+    BUTTERFLY (t0, t1, W7, W1, d3, d0);
+    BUTTERFLY (t2, t3, W3, W5, d1, d2);
+    b0 = t0 + t2;
+    b3 = t1 + t3;
+    t0 -= t2;
+    t1 -= t3;
+    b1 = ((t0 + t1) >> 8) * 181;
+    b2 = ((t0 - t1) >> 8) * 181;
+
+    block[0] = (a0 + b0) >> 12;
+    block[1] = (a1 + b1) >> 12;
+    block[2] = (a2 + b2) >> 12;
+    block[3] = (a3 + b3) >> 12;
+    block[4] = (a3 - b3) >> 12;
+    block[5] = (a2 - b2) >> 12;
+    block[6] = (a1 - b1) >> 12;
+    block[7] = (a0 - b0) >> 12;
+}
+
+static void inline idct_col (int16_t * const block)
+{
+    int d0, d1, d2, d3;
+    int a0, a1, a2, a3, b0, b1, b2, b3;
+    int t0, t1, t2, t3;
+
+    d0 = (block[8*0] << 11) + 65536;
+    d1 = block[8*1];
+    d2 = block[8*2] << 11;
+    d3 = block[8*3];
+    t0 = d0 + d2;
+    t1 = d0 - d2;
+    BUTTERFLY (t2, t3, W6, W2, d3, d1);
+    a0 = t0 + t2;
+    a1 = t1 + t3;
+    a2 = t1 - t3;
+    a3 = t0 - t2;
+
+    d0 = block[8*4];
+    d1 = block[8*5];
+    d2 = block[8*6];
+    d3 = block[8*7];
+    BUTTERFLY (t0, t1, W7, W1, d3, d0);
+    BUTTERFLY (t2, t3, W3, W5, d1, d2);
+    b0 = t0 + t2;
+    b3 = t1 + t3;
+    t0 -= t2;
+    t1 -= t3;
+    b1 = ((t0 + t1) >> 8) * 181;
+    b2 = ((t0 - t1) >> 8) * 181;
+
+    block[8*0] = (a0 + b0) >> 17;
+    block[8*1] = (a1 + b1) >> 17;
+    block[8*2] = (a2 + b2) >> 17;
+    block[8*3] = (a3 + b3) >> 17;
+    block[8*4] = (a3 - b3) >> 17;
+    block[8*5] = (a2 - b2) >> 17;
+    block[8*6] = (a1 - b1) >> 17;
+    block[8*7] = (a0 - b0) >> 17;
+}
+
+static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest,
+			       const int stride)
+{
+    int i;
+
+    for (i = 0; i < 8; i++)
+	idct_row (block + 8 * i);
+    for (i = 0; i < 8; i++)
+	idct_col (block + i);
+    do {
+	dest[0] = CLIP (block[0]);
+	dest[1] = CLIP (block[1]);
+	dest[2] = CLIP (block[2]);
+	dest[3] = CLIP (block[3]);
+	dest[4] = CLIP (block[4]);
+	dest[5] = CLIP (block[5]);
+	dest[6] = CLIP (block[6]);
+	dest[7] = CLIP (block[7]);
+
+	((int32_t *)block)[0] = 0;	((int32_t *)block)[1] = 0;
+	((int32_t *)block)[2] = 0;	((int32_t *)block)[3] = 0;
+
+	dest += stride;
+	block += 8;
+    } while (--i);
+}
+
+static void mpeg2_idct_add_c (const int last, int16_t * block,
+			      uint8_t * dest, const int stride)
+{
+    int i;
+
+    if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
+	for (i = 0; i < 8; i++)
+	    idct_row (block + 8 * i);
+	for (i = 0; i < 8; i++)
+	    idct_col (block + i);
+	do {
+	    dest[0] = CLIP (block[0] + dest[0]);
+	    dest[1] = CLIP (block[1] + dest[1]);
+	    dest[2] = CLIP (block[2] + dest[2]);
+	    dest[3] = CLIP (block[3] + dest[3]);
+	    dest[4] = CLIP (block[4] + dest[4]);
+	    dest[5] = CLIP (block[5] + dest[5]);
+	    dest[6] = CLIP (block[6] + dest[6]);
+	    dest[7] = CLIP (block[7] + dest[7]);
+
+	    ((int32_t *)block)[0] = 0;	((int32_t *)block)[1] = 0;
+	    ((int32_t *)block)[2] = 0;	((int32_t *)block)[3] = 0;
+
+	    dest += stride;
+	    block += 8;
+	} while (--i);
+    } else {
+	int DC;
+
+	DC = (block[0] + 64) >> 7;
+	block[0] = block[63] = 0;
+	i = 8;
+	do {
+	    dest[0] = CLIP (DC + dest[0]);
+	    dest[1] = CLIP (DC + dest[1]);
+	    dest[2] = CLIP (DC + dest[2]);
+	    dest[3] = CLIP (DC + dest[3]);
+	    dest[4] = CLIP (DC + dest[4]);
+	    dest[5] = CLIP (DC + dest[5]);
+	    dest[6] = CLIP (DC + dest[6]);
+	    dest[7] = CLIP (DC + dest[7]);
+	    dest += stride;
+	} while (--i);
+    }
+}
+
+void mpeg2_idct_init (uint32_t accel)
+{
+#ifdef ARCH_X86
+    if (accel & MPEG2_ACCEL_X86_MMXEXT) {
+	mpeg2_idct_copy = mpeg2_idct_copy_mmxext;
+	mpeg2_idct_add = mpeg2_idct_add_mmxext;
+	mpeg2_idct_mmx_init ();
+    } else if (accel & MPEG2_ACCEL_X86_MMX) {
+	mpeg2_idct_copy = mpeg2_idct_copy_mmx;
+	mpeg2_idct_add = mpeg2_idct_add_mmx;
+	mpeg2_idct_mmx_init ();
+    } else
+#endif
+#ifdef ARCH_PPC
+    if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
+	mpeg2_idct_copy = mpeg2_idct_copy_altivec;
+	mpeg2_idct_add = mpeg2_idct_add_altivec;
+	mpeg2_idct_altivec_init ();
+    } else
+#endif
+#ifdef ARCH_ALPHA
+    if (accel & MPEG2_ACCEL_ALPHA_MVI) {
+	mpeg2_idct_copy = mpeg2_idct_copy_mvi;
+	mpeg2_idct_add = mpeg2_idct_add_mvi;
+	mpeg2_idct_alpha_init ();
+    } else if (accel & MPEG2_ACCEL_ALPHA) {
+	int i;
+
+	mpeg2_idct_copy = mpeg2_idct_copy_alpha;
+	mpeg2_idct_add = mpeg2_idct_add_alpha;
+	mpeg2_idct_alpha_init ();
+	for (i = -3840; i < 3840 + 256; i++)
+	    CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i);
+    } else
+#endif
+    {
+	extern uint8_t mpeg2_scan_norm[64];
+	extern uint8_t mpeg2_scan_alt[64];
+	int i, j;
+
+	mpeg2_idct_copy = mpeg2_idct_copy_c;
+	mpeg2_idct_add = mpeg2_idct_add_c;
+	for (i = -3840; i < 3840 + 256; i++)
+	    CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i);
+	for (i = 0; i < 64; i++) {
+	    j = mpeg2_scan_norm[i];
+	    mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
+	    j = mpeg2_scan_alt[i];
+	    mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
+	}
+    }
+}
diff --git a/src/video_dec/libmpeg2new/libmpeg2/idct_alpha.c b/src/video_dec/libmpeg2new/libmpeg2/idct_alpha.c
new file mode 100644
index 000000000..1d8fd08ee
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/idct_alpha.c
@@ -0,0 +1,379 @@
+/*
+ * idct_alpha.c
+ * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org>
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_ALPHA
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include <xine/attributes.h>
+#include "mpeg2_internal.h"
+#include "alpha_asm.h"
+
+#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */
+#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */
+#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */
+#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */
+#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */
+#define W7 565  /* 2048 * sqrt (2) * cos (7 * pi / 16) */
+
+extern uint8_t mpeg2_clip[3840 * 2 + 256];
+#define CLIP(i) ((mpeg2_clip + 3840)[i])
+
+#if 0
+#define BUTTERFLY(t0,t1,W0,W1,d0,d1)	\
+do {					\
+    t0 = W0 * d0 + W1 * d1;			\
+    t1 = W0 * d1 - W1 * d0;			\
+} while (0)
+#else
+#define BUTTERFLY(t0,t1,W0,W1,d0,d1)	\
+do {					\
+    int_fast32_t tmp = W0 * (d0 + d1);	\
+    t0 = tmp + (W1 - W0) * d1;		\
+    t1 = tmp - (W1 + W0) * d0;		\
+} while (0)
+#endif
+
+static void inline idct_row (int16_t * const block)
+{
+    uint64_t l, r;
+    int_fast32_t d0, d1, d2, d3;
+    int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
+    int_fast32_t t0, t1, t2, t3;
+
+    l = ldq (block);
+    r = ldq (block + 4);
+
+    /* shortcut */
+    if (likely (!((l & ~0xffffUL) | r))) {
+	uint64_t tmp = (uint16_t) (l >> 1);
+	tmp |= tmp << 16;
+	tmp |= tmp << 32;
+	((int32_t *)block)[0] = tmp;
+	((int32_t *)block)[1] = tmp;
+	((int32_t *)block)[2] = tmp;
+	((int32_t *)block)[3] = tmp;
+	return;
+    }
+
+    d0 = (sextw (l) << 11) + 2048;
+    d1 = sextw (extwl (l, 2));
+    d2 = sextw (extwl (l, 4)) << 11;
+    d3 = sextw (extwl (l, 6));
+    t0 = d0 + d2;
+    t1 = d0 - d2;
+    BUTTERFLY (t2, t3, W6, W2, d3, d1);
+    a0 = t0 + t2;
+    a1 = t1 + t3;
+    a2 = t1 - t3;
+    a3 = t0 - t2;
+
+    d0 = sextw (r);
+    d1 = sextw (extwl (r, 2));
+    d2 = sextw (extwl (r, 4));
+    d3 = sextw (extwl (r, 6));
+    BUTTERFLY (t0, t1, W7, W1, d3, d0);
+    BUTTERFLY (t2, t3, W3, W5, d1, d2);
+    b0 = t0 + t2;
+    b3 = t1 + t3;
+    t0 -= t2;
+    t1 -= t3;
+    b1 = ((t0 + t1) >> 8) * 181;
+    b2 = ((t0 - t1) >> 8) * 181;
+
+    block[0] = (a0 + b0) >> 12;
+    block[1] = (a1 + b1) >> 12;
+    block[2] = (a2 + b2) >> 12;
+    block[3] = (a3 + b3) >> 12;
+    block[4] = (a3 - b3) >> 12;
+    block[5] = (a2 - b2) >> 12;
+    block[6] = (a1 - b1) >> 12;
+    block[7] = (a0 - b0) >> 12;
+}
+
+static void inline idct_col (int16_t * const block)
+{
+    int_fast32_t d0, d1, d2, d3;
+    int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
+    int_fast32_t t0, t1, t2, t3;
+
+    d0 = (block[8*0] << 11) + 65536;
+    d1 = block[8*1];
+    d2 = block[8*2] << 11;
+    d3 = block[8*3];
+    t0 = d0 + d2;
+    t1 = d0 - d2;
+    BUTTERFLY (t2, t3, W6, W2, d3, d1);
+    a0 = t0 + t2;
+    a1 = t1 + t3;
+    a2 = t1 - t3;
+    a3 = t0 - t2;
+
+    d0 = block[8*4];
+    d1 = block[8*5];
+    d2 = block[8*6];
+    d3 = block[8*7];
+    BUTTERFLY (t0, t1, W7, W1, d3, d0);
+    BUTTERFLY (t2, t3, W3, W5, d1, d2);
+    b0 = t0 + t2;
+    b3 = t1 + t3;
+    t0 -= t2;
+    t1 -= t3;
+    b1 = ((t0 + t1) >> 8) * 181;
+    b2 = ((t0 - t1) >> 8) * 181;
+
+    block[8*0] = (a0 + b0) >> 17;
+    block[8*1] = (a1 + b1) >> 17;
+    block[8*2] = (a2 + b2) >> 17;
+    block[8*3] = (a3 + b3) >> 17;
+    block[8*4] = (a3 - b3) >> 17;
+    block[8*5] = (a2 - b2) >> 17;
+    block[8*6] = (a1 - b1) >> 17;
+    block[8*7] = (a0 - b0) >> 17;
+}
+
+void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, const int stride)
+{
+    uint64_t clampmask;
+    int i;
+
+    for (i = 0; i < 8; i++)
+	idct_row (block + 8 * i);
+
+    for (i = 0; i < 8; i++)
+	idct_col (block + i);
+
+    clampmask = zap (-1, 0xaa);	/* 0x00ff00ff00ff00ff */
+    do {
+	uint64_t shorts0, shorts1;
+
+	shorts0 = ldq (block);
+	shorts0 = maxsw4 (shorts0, 0);
+	shorts0 = minsw4 (shorts0, clampmask);
+	stl (pkwb (shorts0), dest);
+
+	shorts1 = ldq (block + 4);
+	shorts1 = maxsw4 (shorts1, 0);
+	shorts1 = minsw4 (shorts1, clampmask);
+	stl (pkwb (shorts1), dest + 4);
+
+	stq (0, block);
+	stq (0, block + 4);
+
+	dest += stride;
+	block += 8;
+    } while (--i);
+}
+
+void mpeg2_idct_add_mvi (const int last, int16_t * block,
+			 uint8_t * dest, const int stride)
+{
+    uint64_t clampmask;
+    uint64_t signmask;
+    int i;
+
+    if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
+	for (i = 0; i < 8; i++)
+	    idct_row (block + 8 * i);
+	for (i = 0; i < 8; i++)
+	    idct_col (block + i);
+	clampmask = zap (-1, 0xaa);	/* 0x00ff00ff00ff00ff */
+	signmask = zap (-1, 0x33);
+	signmask ^= signmask >> 1;	/* 0x8000800080008000 */
+
+	do {
+	    uint64_t shorts0, pix0, signs0;
+	    uint64_t shorts1, pix1, signs1;
+
+	    shorts0 = ldq (block);
+	    shorts1 = ldq (block + 4);
+
+	    pix0 = unpkbw (ldl (dest));
+	    /* signed subword add (MMX paddw).  */
+	    signs0 = shorts0 & signmask;
+	    shorts0 &= ~signmask;
+	    shorts0 += pix0;
+	    shorts0 ^= signs0;
+	    /* clamp. */
+	    shorts0 = maxsw4 (shorts0, 0);
+	    shorts0 = minsw4 (shorts0, clampmask);	
+
+	    /* next 4.  */
+	    pix1 = unpkbw (ldl (dest + 4));
+	    signs1 = shorts1 & signmask;
+	    shorts1 &= ~signmask;
+	    shorts1 += pix1;
+	    shorts1 ^= signs1;
+	    shorts1 = maxsw4 (shorts1, 0);
+	    shorts1 = minsw4 (shorts1, clampmask);
+
+	    stl (pkwb (shorts0), dest);
+	    stl (pkwb (shorts1), dest + 4);
+	    stq (0, block);
+	    stq (0, block + 4);
+
+	    dest += stride;
+	    block += 8;
+	} while (--i);
+    } else {
+	int DC;
+	uint64_t p0, p1, p2, p3, p4, p5, p6, p7;
+	uint64_t DCs;
+
+	DC = (block[0] + 64) >> 7;
+	block[0] = block[63] = 0;
+
+	p0 = ldq (dest + 0 * stride);
+	p1 = ldq (dest + 1 * stride);
+	p2 = ldq (dest + 2 * stride);
+	p3 = ldq (dest + 3 * stride);
+	p4 = ldq (dest + 4 * stride);
+	p5 = ldq (dest + 5 * stride);
+	p6 = ldq (dest + 6 * stride);
+	p7 = ldq (dest + 7 * stride);
+
+	if (DC > 0) {
+	    DCs = BYTE_VEC (likely (DC <= 255) ? DC : 255);
+	    p0 += minub8 (DCs, ~p0);
+	    p1 += minub8 (DCs, ~p1);
+	    p2 += minub8 (DCs, ~p2);
+	    p3 += minub8 (DCs, ~p3);
+	    p4 += minub8 (DCs, ~p4);
+	    p5 += minub8 (DCs, ~p5);
+	    p6 += minub8 (DCs, ~p6);
+	    p7 += minub8 (DCs, ~p7);
+	} else {
+	    DCs = BYTE_VEC (likely (-DC <= 255) ? -DC : 255);
+	    p0 -= minub8 (DCs, p0);
+	    p1 -= minub8 (DCs, p1);
+	    p2 -= minub8 (DCs, p2);
+	    p3 -= minub8 (DCs, p3);
+	    p4 -= minub8 (DCs, p4);
+	    p5 -= minub8 (DCs, p5);
+	    p6 -= minub8 (DCs, p6);
+	    p7 -= minub8 (DCs, p7);
+	}
+
+	stq (p0, dest + 0 * stride);
+	stq (p1, dest + 1 * stride);
+	stq (p2, dest + 2 * stride);
+	stq (p3, dest + 3 * stride);
+	stq (p4, dest + 4 * stride);
+	stq (p5, dest + 5 * stride);
+	stq (p6, dest + 6 * stride);
+	stq (p7, dest + 7 * stride);
+    }
+}
+
+void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, const int stride)
+{
+    int i;
+
+    for (i = 0; i < 8; i++)
+	idct_row (block + 8 * i);
+    for (i = 0; i < 8; i++)
+	idct_col (block + i);
+    do {
+	dest[0] = CLIP (block[0]);
+	dest[1] = CLIP (block[1]);
+	dest[2] = CLIP (block[2]);
+	dest[3] = CLIP (block[3]);
+	dest[4] = CLIP (block[4]);
+	dest[5] = CLIP (block[5]);
+	dest[6] = CLIP (block[6]);
+	dest[7] = CLIP (block[7]);
+
+	stq(0, block);
+	stq(0, block + 4);
+
+	dest += stride;
+	block += 8;
+    } while (--i);
+}
+
+void mpeg2_idct_add_alpha (const int last, int16_t * block,
+			   uint8_t * dest, const int stride)
+{
+    int i;
+
+    if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
+	for (i = 0; i < 8; i++)
+	    idct_row (block + 8 * i);
+	for (i = 0; i < 8; i++)
+	    idct_col (block + i);
+	do {
+	    dest[0] = CLIP (block[0] + dest[0]);
+	    dest[1] = CLIP (block[1] + dest[1]);
+	    dest[2] = CLIP (block[2] + dest[2]);
+	    dest[3] = CLIP (block[3] + dest[3]);
+	    dest[4] = CLIP (block[4] + dest[4]);
+	    dest[5] = CLIP (block[5] + dest[5]);
+	    dest[6] = CLIP (block[6] + dest[6]);
+	    dest[7] = CLIP (block[7] + dest[7]);
+
+	    stq(0, block);
+	    stq(0, block + 4);
+
+	    dest += stride;
+	    block += 8;
+	} while (--i);
+    } else {
+	int DC;
+
+	DC = (block[0] + 64) >> 7;
+	block[0] = block[63] = 0;
+	i = 8;
+	do {
+	    dest[0] = CLIP (DC + dest[0]);
+	    dest[1] = CLIP (DC + dest[1]);
+	    dest[2] = CLIP (DC + dest[2]);
+	    dest[3] = CLIP (DC + dest[3]);
+	    dest[4] = CLIP (DC + dest[4]);
+	    dest[5] = CLIP (DC + dest[5]);
+	    dest[6] = CLIP (DC + dest[6]);
+	    dest[7] = CLIP (DC + dest[7]);
+	    dest += stride;
+	} while (--i);
+    }
+}
+
+void mpeg2_idct_alpha_init (void)
+{
+    extern uint8_t mpeg2_scan_norm[64];
+    extern uint8_t mpeg2_scan_alt[64];
+    int i, j;
+
+    for (i = 0; i < 64; i++) {
+	j = mpeg2_scan_norm[i];
+	mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
+	j = mpeg2_scan_alt[i];
+	mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
+    }
+}
+
+#endif /* ARCH_ALPHA */
diff --git a/src/video_dec/libmpeg2new/libmpeg2/idct_altivec.c b/src/video_dec/libmpeg2new/libmpeg2/idct_altivec.c
new file mode 100644
index 000000000..f15bca165
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/idct_altivec.c
@@ -0,0 +1,288 @@
+/*
+ * idct_altivec.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_PPC
+
+#ifdef HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include <xine/attributes.h>
+#include "mpeg2_internal.h"
+
+typedef vector signed char vector_s8_t;
+typedef vector unsigned char vector_u8_t;
+typedef vector signed short vector_s16_t;
+typedef vector unsigned short vector_u16_t;
+typedef vector signed int vector_s32_t;
+typedef vector unsigned int vector_u32_t;
+
+#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
+/* work around gcc <3.3 vec_mergel bug */
+static inline vector_s16_t my_vec_mergel (vector_s16_t const A,
+					  vector_s16_t const B)
+{
+    static const vector_u8_t mergel = {
+	0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b,
+	0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f
+    };
+    return vec_perm (A, B, mergel);
+}
+#undef vec_mergel
+#define vec_mergel my_vec_mergel
+#endif
+
+#ifdef HAVE_ALTIVEC_H	/* gnu */
+#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h}
+#else			/* apple */
+#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h)
+#endif
+
+static const vector_s16_t constants ATTR_ALIGN(16) =
+    VEC_S16 (23170, 13573, 6518, 21895, -23170, -21895, 32, 31);
+static const vector_s16_t constants_1 ATTR_ALIGN(16) =
+    VEC_S16 (16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725);
+static const vector_s16_t constants_2 ATTR_ALIGN(16) =
+    VEC_S16 (16069, 22289, 20995, 18895, 16069, 18895, 20995, 22289);
+static const vector_s16_t constants_3 ATTR_ALIGN(16) =
+    VEC_S16 (21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692);
+static const vector_s16_t constants_4 ATTR_ALIGN(16) =
+    VEC_S16 (13623, 18895, 17799, 16019, 13623, 16019, 17799, 18895);
+
+#define IDCT								\
+    vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7;		\
+    vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7;		\
+    vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias;			\
+    vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8;			\
+    vector_u16_t shift;							\
+									\
+    c4 = vec_splat (constants, 0);					\
+    a0 = vec_splat (constants, 1);					\
+    a1 = vec_splat (constants, 2);					\
+    a2 = vec_splat (constants, 3);					\
+    mc4 = vec_splat (constants, 4);					\
+    ma2 = vec_splat (constants, 5);					\
+    bias = (vector_s16_t)vec_splat ((vector_s32_t)constants, 3);	\
+									\
+    zero = vec_splat_s16 (0);						\
+									\
+    vx0 = vec_adds (block[0], block[4]);				\
+    vx4 = vec_subs (block[0], block[4]);				\
+    t5 = vec_mradds (vx0, constants_1, zero);				\
+    t0 = vec_mradds (vx4, constants_1, zero);				\
+									\
+    vx1 = vec_mradds (a1, block[7], block[1]);				\
+    vx7 = vec_mradds (a1, block[1], vec_subs (zero, block[7]));		\
+    t1 = vec_mradds (vx1, constants_2, zero);				\
+    t8 = vec_mradds (vx7, constants_2, zero);				\
+									\
+    vx2 = vec_mradds (a0, block[6], block[2]);				\
+    vx6 = vec_mradds (a0, block[2], vec_subs (zero, block[6]));		\
+    t2 = vec_mradds (vx2, constants_3, zero);				\
+    t4 = vec_mradds (vx6, constants_3, zero);				\
+									\
+    vx3 = vec_mradds (block[3], constants_4, zero);			\
+    vx5 = vec_mradds (block[5], constants_4, zero);			\
+    t7 = vec_mradds (a2, vx5, vx3);					\
+    t3 = vec_mradds (ma2, vx3, vx5);					\
+									\
+    t6 = vec_adds (t8, t3);						\
+    t3 = vec_subs (t8, t3);						\
+    t8 = vec_subs (t1, t7);						\
+    t1 = vec_adds (t1, t7);						\
+    t6 = vec_mradds (a0, t6, t6);	/* a0+1 == 2*c4 */		\
+    t1 = vec_mradds (a0, t1, t1);	/* a0+1 == 2*c4 */		\
+									\
+    t7 = vec_adds (t5, t2);						\
+    t2 = vec_subs (t5, t2);						\
+    t5 = vec_adds (t0, t4);						\
+    t0 = vec_subs (t0, t4);						\
+    t4 = vec_subs (t8, t3);						\
+    t3 = vec_adds (t8, t3);						\
+									\
+    vy0 = vec_adds (t7, t1);						\
+    vy7 = vec_subs (t7, t1);						\
+    vy1 = vec_adds (t5, t3);						\
+    vy6 = vec_subs (t5, t3);						\
+    vy2 = vec_adds (t0, t4);						\
+    vy5 = vec_subs (t0, t4);						\
+    vy3 = vec_adds (t2, t6);						\
+    vy4 = vec_subs (t2, t6);						\
+									\
+    vx0 = vec_mergeh (vy0, vy4);					\
+    vx1 = vec_mergel (vy0, vy4);					\
+    vx2 = vec_mergeh (vy1, vy5);					\
+    vx3 = vec_mergel (vy1, vy5);					\
+    vx4 = vec_mergeh (vy2, vy6);					\
+    vx5 = vec_mergel (vy2, vy6);					\
+    vx6 = vec_mergeh (vy3, vy7);					\
+    vx7 = vec_mergel (vy3, vy7);					\
+									\
+    vy0 = vec_mergeh (vx0, vx4);					\
+    vy1 = vec_mergel (vx0, vx4);					\
+    vy2 = vec_mergeh (vx1, vx5);					\
+    vy3 = vec_mergel (vx1, vx5);					\
+    vy4 = vec_mergeh (vx2, vx6);					\
+    vy5 = vec_mergel (vx2, vx6);					\
+    vy6 = vec_mergeh (vx3, vx7);					\
+    vy7 = vec_mergel (vx3, vx7);					\
+									\
+    vx0 = vec_mergeh (vy0, vy4);					\
+    vx1 = vec_mergel (vy0, vy4);					\
+    vx2 = vec_mergeh (vy1, vy5);					\
+    vx3 = vec_mergel (vy1, vy5);					\
+    vx4 = vec_mergeh (vy2, vy6);					\
+    vx5 = vec_mergel (vy2, vy6);					\
+    vx6 = vec_mergeh (vy3, vy7);					\
+    vx7 = vec_mergel (vy3, vy7);					\
+									\
+    vx0 = vec_adds (vx0, bias);						\
+    t5 = vec_adds (vx0, vx4);						\
+    t0 = vec_subs (vx0, vx4);						\
+									\
+    t1 = vec_mradds (a1, vx7, vx1);					\
+    t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7));			\
+									\
+    t2 = vec_mradds (a0, vx6, vx2);					\
+    t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6));			\
+									\
+    t7 = vec_mradds (a2, vx5, vx3);					\
+    t3 = vec_mradds (ma2, vx3, vx5);					\
+									\
+    t6 = vec_adds (t8, t3);						\
+    t3 = vec_subs (t8, t3);						\
+    t8 = vec_subs (t1, t7);						\
+    t1 = vec_adds (t1, t7);						\
+									\
+    t7 = vec_adds (t5, t2);						\
+    t2 = vec_subs (t5, t2);						\
+    t5 = vec_adds (t0, t4);						\
+    t0 = vec_subs (t0, t4);						\
+    t4 = vec_subs (t8, t3);						\
+    t3 = vec_adds (t8, t3);						\
+									\
+    vy0 = vec_adds (t7, t1);						\
+    vy7 = vec_subs (t7, t1);						\
+    vy1 = vec_mradds (c4, t3, t5);					\
+    vy6 = vec_mradds (mc4, t3, t5);					\
+    vy2 = vec_mradds (c4, t4, t0);					\
+    vy5 = vec_mradds (mc4, t4, t0);					\
+    vy3 = vec_adds (t2, t6);						\
+    vy4 = vec_subs (t2, t6);						\
+									\
+    shift = vec_splat_u16 (6);						\
+    vx0 = vec_sra (vy0, shift);						\
+    vx1 = vec_sra (vy1, shift);						\
+    vx2 = vec_sra (vy2, shift);						\
+    vx3 = vec_sra (vy3, shift);						\
+    vx4 = vec_sra (vy4, shift);						\
+    vx5 = vec_sra (vy5, shift);						\
+    vx6 = vec_sra (vy6, shift);						\
+    vx7 = vec_sra (vy7, shift);
+
+void mpeg2_idct_copy_altivec (int16_t * const _block, uint8_t * dest,
+			      const int stride)
+{
+    vector_s16_t * const block = (vector_s16_t *)_block;
+    vector_u8_t tmp;
+
+    IDCT
+
+#define COPY(dest,src)						\
+    tmp = vec_packsu (src, src);				\
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);	\
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+
+    COPY (dest, vx0)	dest += stride;
+    COPY (dest, vx1)	dest += stride;
+    COPY (dest, vx2)	dest += stride;
+    COPY (dest, vx3)	dest += stride;
+    COPY (dest, vx4)	dest += stride;
+    COPY (dest, vx5)	dest += stride;
+    COPY (dest, vx6)	dest += stride;
+    COPY (dest, vx7)
+
+    block[0] = block[1] = block[2] = block[3] = zero;
+    block[4] = block[5] = block[6] = block[7] = zero;
+}
+
+void mpeg2_idct_add_altivec (const int last, int16_t * const _block,
+			     uint8_t * dest, const int stride)
+{
+    vector_s16_t * const block = (vector_s16_t *)_block;
+    vector_u8_t tmp;
+    vector_s16_t tmp2, tmp3;
+    vector_u8_t perm0;
+    vector_u8_t perm1;
+    vector_u8_t p0, p1, p;
+
+    IDCT
+
+    p0 = vec_lvsl (0, dest);
+    p1 = vec_lvsl (stride, dest);
+    p = vec_splat_u8 (-1);
+    perm0 = vec_mergeh (p, p0);
+    perm1 = vec_mergeh (p, p1);
+
+#define ADD(dest,src,perm)						\
+    /* *(uint64_t *)&tmp = *(uint64_t *)dest; */			\
+    tmp = vec_ld (0, dest);						\
+    tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm);	\
+    tmp3 = vec_adds (tmp2, src);					\
+    tmp = vec_packsu (tmp3, tmp3);					\
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);		\
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+
+    ADD (dest, vx0, perm0)	dest += stride;
+    ADD (dest, vx1, perm1)	dest += stride;
+    ADD (dest, vx2, perm0)	dest += stride;
+    ADD (dest, vx3, perm1)	dest += stride;
+    ADD (dest, vx4, perm0)	dest += stride;
+    ADD (dest, vx5, perm1)	dest += stride;
+    ADD (dest, vx6, perm0)	dest += stride;
+    ADD (dest, vx7, perm1)
+
+    block[0] = block[1] = block[2] = block[3] = zero;
+    block[4] = block[5] = block[6] = block[7] = zero;
+}
+
+void mpeg2_idct_altivec_init (void)
+{
+    extern uint8_t mpeg2_scan_norm[64];
+    extern uint8_t mpeg2_scan_alt[64];
+    int i, j;
+
+    /* the altivec idct uses a transposed input, so we patch scan tables */
+    for (i = 0; i < 64; i++) {
+	j = mpeg2_scan_norm[i];
+	mpeg2_scan_norm[i] = (j >> 3) | ((j & 7) << 3);
+	j = mpeg2_scan_alt[i];
+	mpeg2_scan_alt[i] = (j >> 3) | ((j & 7) << 3);
+    }
+}
+
+#endif
diff --git a/src/video_dec/libmpeg2new/libmpeg2/idct_mlib.c b/src/video_dec/libmpeg2new/libmpeg2/idct_mlib.c
new file mode 100644
index 000000000..55a2e9b64
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/idct_mlib.c
@@ -0,0 +1,60 @@
+/*
+ * idct_mlib.c
+ * Copyright (C) 1999-2003 Håkan Hjort <d95hjort@dtek.chalmers.se>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef LIBMPEG2_MLIB
+
+#include <mlib_types.h>
+#include <mlib_status.h>
+#include <mlib_sys.h>
+#include <mlib_video.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "../include/mpeg2.h"
+#include "mpeg2_internal.h"
+
+void mpeg2_idct_add_mlib (const int last, int16_t * const block,
+			  uint8_t * const dest, const int stride)
+{
+    mlib_VideoIDCT_IEEE_S16_S16 (block, block);
+    mlib_VideoAddBlock_U8_S16 (dest, block, stride);
+    memset (block, 0, 64 * sizeof (uint16_t));
+}
+
+void mpeg2_idct_copy_mlib_non_ieee (int16_t * const block,
+				    uint8_t * const dest, const int stride)
+{
+    mlib_VideoIDCT8x8_U8_S16 (dest, block, stride);
+    memset (block, 0, 64 * sizeof (uint16_t));
+}
+
+void mpeg2_idct_add_mlib_non_ieee (const int last, int16_t * const block,
+				   uint8_t * const dest, const int stride)
+{
+    mlib_VideoIDCT8x8_S16_S16 (block, block);
+    mlib_VideoAddBlock_U8_S16 (dest, block, stride);
+    memset (block, 0, 64 * sizeof (uint16_t));
+}
+
+#endif
diff --git a/src/video_dec/libmpeg2new/libmpeg2/idct_mmx.c b/src/video_dec/libmpeg2new/libmpeg2/idct_mmx.c
new file mode 100644
index 000000000..d5a5c08a4
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/idct_mmx.c
@@ -0,0 +1,814 @@
+/*
+ * idct_mmx.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_X86
+
+#include <inttypes.h>
+
+#include "../include/mpeg2.h"
+#include "../include/attributes.h"
+#include "mpeg2_internal.h"
+#include "../include/mmx.h"
+
+#define ROW_SHIFT 15
+#define COL_SHIFT 6
+
+#define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT)))
+#define rounder(bias) {round (bias), round (bias)}
+
+
+#if 0
+/* C row IDCT - its just here to document the MMXEXT and MMX versions */
+static inline void idct_row (int16_t * row, int offset,
+			     int16_t * table, int32_t * rounder)
+{
+    int C1, C2, C3, C4, C5, C6, C7;
+    int a0, a1, a2, a3, b0, b1, b2, b3;
+
+    row += offset;
+
+    C1 = table[1];
+    C2 = table[2];
+    C3 = table[3];
+    C4 = table[4];
+    C5 = table[5];
+    C6 = table[6];
+    C7 = table[7];
+
+    a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder;
+    a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder;
+    a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder;
+    a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder;
+
+    b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
+    b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
+    b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
+    b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
+
+    row[0] = (a0 + b0) >> ROW_SHIFT;
+    row[1] = (a1 + b1) >> ROW_SHIFT;
+    row[2] = (a2 + b2) >> ROW_SHIFT;
+    row[3] = (a3 + b3) >> ROW_SHIFT;
+    row[4] = (a3 - b3) >> ROW_SHIFT;
+    row[5] = (a2 - b2) >> ROW_SHIFT;
+    row[6] = (a1 - b1) >> ROW_SHIFT;
+    row[7] = (a0 - b0) >> ROW_SHIFT;
+}
+#endif
+
+
+/* MMXEXT row IDCT */
+
+#define mmxext_table(c1,c2,c3,c4,c5,c6,c7)	{  c4,  c2, -c4, -c2,	\
+						   c4,  c6,  c4,  c6,	\
+						   c1,  c3, -c1, -c5,	\
+						   c5,  c7,  c3, -c7,	\
+						   c4, -c6,  c4, -c6,	\
+						  -c4,  c2,  c4, -c2,	\
+						   c5, -c1,  c3, -c1,	\
+						   c7,  c3,  c7, -c5 }
+
+static inline void mmxext_row_head (int16_t * const row, const int offset,
+				    const int16_t * const table)
+{
+    movq_m2r (*(row+offset), mm2);	/* mm2 = x6 x4 x2 x0 */
+
+    movq_m2r (*(row+offset+4), mm5);	/* mm5 = x7 x5 x3 x1 */
+    movq_r2r (mm2, mm0);		/* mm0 = x6 x4 x2 x0 */
+
+    movq_m2r (*table, mm3);		/* mm3 = -C2 -C4 C2 C4 */
+    movq_r2r (mm5, mm6);		/* mm6 = x7 x5 x3 x1 */
+
+    movq_m2r (*(table+4), mm4);		/* mm4 = C6 C4 C6 C4 */
+    pmaddwd_r2r (mm0, mm3);		/* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
+
+    pshufw_r2r (mm2, mm2, 0x4e);	/* mm2 = x2 x0 x6 x4 */
+}
+
+static inline void mmxext_row (const int16_t * const table,
+			       const int32_t * const rounder)
+{
+    movq_m2r (*(table+8), mm1);		/* mm1 = -C5 -C1 C3 C1 */
+    pmaddwd_r2r (mm2, mm4);		/* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */
+
+    pmaddwd_m2r (*(table+16), mm0);	/* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */
+    pshufw_r2r (mm6, mm6, 0x4e);	/* mm6 = x3 x1 x7 x5 */
+
+    movq_m2r (*(table+12), mm7);	/* mm7 = -C7 C3 C7 C5 */
+    pmaddwd_r2r (mm5, mm1);		/* mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 */
+
+    paddd_m2r (*rounder, mm3);		/* mm3 += rounder */
+    pmaddwd_r2r (mm6, mm7);		/* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */
+
+    pmaddwd_m2r (*(table+20), mm2);	/* mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 */
+    paddd_r2r (mm4, mm3);		/* mm3 = a1 a0 + rounder */
+
+    pmaddwd_m2r (*(table+24), mm5);	/* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */
+    movq_r2r (mm3, mm4);		/* mm4 = a1 a0 + rounder */
+
+    pmaddwd_m2r (*(table+28), mm6);	/* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */
+    paddd_r2r (mm7, mm1);		/* mm1 = b1 b0 */
+
+    paddd_m2r (*rounder, mm0);		/* mm0 += rounder */
+    psubd_r2r (mm1, mm3);		/* mm3 = a1-b1 a0-b0 + rounder */
+
+    psrad_i2r (ROW_SHIFT, mm3);		/* mm3 = y6 y7 */
+    paddd_r2r (mm4, mm1);		/* mm1 = a1+b1 a0+b0 + rounder */
+
+    paddd_r2r (mm2, mm0);		/* mm0 = a3 a2 + rounder */
+    psrad_i2r (ROW_SHIFT, mm1);		/* mm1 = y1 y0 */
+
+    paddd_r2r (mm6, mm5);		/* mm5 = b3 b2 */
+    movq_r2r (mm0, mm4);		/* mm4 = a3 a2 + rounder */
+
+    paddd_r2r (mm5, mm0);		/* mm0 = a3+b3 a2+b2 + rounder */
+    psubd_r2r (mm5, mm4);		/* mm4 = a3-b3 a2-b2 + rounder */
+}
+
+static inline void mmxext_row_tail (int16_t * const row, const int store)
+{
+    psrad_i2r (ROW_SHIFT, mm0);		/* mm0 = y3 y2 */
+
+    psrad_i2r (ROW_SHIFT, mm4);		/* mm4 = y4 y5 */
+
+    packssdw_r2r (mm0, mm1);		/* mm1 = y3 y2 y1 y0 */
+
+    packssdw_r2r (mm3, mm4);		/* mm4 = y6 y7 y4 y5 */
+
+    movq_r2m (mm1, *(row+store));	/* save y3 y2 y1 y0 */
+    pshufw_r2r (mm4, mm4, 0xb1);	/* mm4 = y7 y6 y5 y4 */
+
+    /* slot */
+
+    movq_r2m (mm4, *(row+store+4));	/* save y7 y6 y5 y4 */
+}
+
+static inline void mmxext_row_mid (int16_t * const row, const int store,
+				   const int offset,
+				   const int16_t * const table)
+{
+    movq_m2r (*(row+offset), mm2);	/* mm2 = x6 x4 x2 x0 */
+    psrad_i2r (ROW_SHIFT, mm0);		/* mm0 = y3 y2 */
+
+    movq_m2r (*(row+offset+4), mm5);	/* mm5 = x7 x5 x3 x1 */
+    psrad_i2r (ROW_SHIFT, mm4);		/* mm4 = y4 y5 */
+
+    packssdw_r2r (mm0, mm1);		/* mm1 = y3 y2 y1 y0 */
+    movq_r2r (mm5, mm6);		/* mm6 = x7 x5 x3 x1 */
+
+    packssdw_r2r (mm3, mm4);		/* mm4 = y6 y7 y4 y5 */
+    movq_r2r (mm2, mm0);		/* mm0 = x6 x4 x2 x0 */
+
+    movq_r2m (mm1, *(row+store));	/* save y3 y2 y1 y0 */
+    pshufw_r2r (mm4, mm4, 0xb1);	/* mm4 = y7 y6 y5 y4 */
+
+    movq_m2r (*table, mm3);		/* mm3 = -C2 -C4 C2 C4 */
+    movq_r2m (mm4, *(row+store+4));	/* save y7 y6 y5 y4 */
+
+    pmaddwd_r2r (mm0, mm3);		/* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
+
+    movq_m2r (*(table+4), mm4);		/* mm4 = C6 C4 C6 C4 */
+    pshufw_r2r (mm2, mm2, 0x4e);	/* mm2 = x2 x0 x6 x4 */
+}
+
+
+/* MMX row IDCT */
+
+#define mmx_table(c1,c2,c3,c4,c5,c6,c7)	{  c4,  c2,  c4,  c6,	\
+					   c4,  c6, -c4, -c2,	\
+					   c1,  c3,  c3, -c7,	\
+					   c5,  c7, -c1, -c5,	\
+					   c4, -c6,  c4, -c2,	\
+					  -c4,  c2,  c4, -c6,	\
+					   c5, -c1,  c7, -c5,	\
+					   c7,  c3,  c3, -c1 }
+
+static inline void mmx_row_head (int16_t * const row, const int offset,
+				 const int16_t * const table)
+{
+    movq_m2r (*(row+offset), mm2);	/* mm2 = x6 x4 x2 x0 */
+
+    movq_m2r (*(row+offset+4), mm5);	/* mm5 = x7 x5 x3 x1 */
+    movq_r2r (mm2, mm0);		/* mm0 = x6 x4 x2 x0 */
+
+    movq_m2r (*table, mm3);		/* mm3 = C6 C4 C2 C4 */
+    movq_r2r (mm5, mm6);		/* mm6 = x7 x5 x3 x1 */
+
+    punpckldq_r2r (mm0, mm0);		/* mm0 = x2 x0 x2 x0 */
+
+    movq_m2r (*(table+4), mm4);		/* mm4 = -C2 -C4 C6 C4 */
+    pmaddwd_r2r (mm0, mm3);		/* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
+
+    movq_m2r (*(table+8), mm1);		/* mm1 = -C7 C3 C3 C1 */
+    punpckhdq_r2r (mm2, mm2);		/* mm2 = x6 x4 x6 x4 */
+}
+
+static inline void mmx_row (const int16_t * const table,
+			    const int32_t * const rounder)
+{
+    pmaddwd_r2r (mm2, mm4);		/* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */
+    punpckldq_r2r (mm5, mm5);		/* mm5 = x3 x1 x3 x1 */
+
+    pmaddwd_m2r (*(table+16), mm0);	/* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */
+    punpckhdq_r2r (mm6, mm6);		/* mm6 = x7 x5 x7 x5 */
+
+    movq_m2r (*(table+12), mm7);	/* mm7 = -C5 -C1 C7 C5 */
+    pmaddwd_r2r (mm5, mm1);		/* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */
+
+    paddd_m2r (*rounder, mm3);		/* mm3 += rounder */
+    pmaddwd_r2r (mm6, mm7);		/* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */
+
+    pmaddwd_m2r (*(table+20), mm2);	/* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */
+    paddd_r2r (mm4, mm3);		/* mm3 = a1 a0 + rounder */
+
+    pmaddwd_m2r (*(table+24), mm5);	/* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */
+    movq_r2r (mm3, mm4);		/* mm4 = a1 a0 + rounder */
+
+    pmaddwd_m2r (*(table+28), mm6);	/* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */
+    paddd_r2r (mm7, mm1);		/* mm1 = b1 b0 */
+
+    paddd_m2r (*rounder, mm0);		/* mm0 += rounder */
+    psubd_r2r (mm1, mm3);		/* mm3 = a1-b1 a0-b0 + rounder */
+
+    psrad_i2r (ROW_SHIFT, mm3);		/* mm3 = y6 y7 */
+    paddd_r2r (mm4, mm1);		/* mm1 = a1+b1 a0+b0 + rounder */
+
+    paddd_r2r (mm2, mm0);		/* mm0 = a3 a2 + rounder */
+    psrad_i2r (ROW_SHIFT, mm1);		/* mm1 = y1 y0 */
+
+    paddd_r2r (mm6, mm5);		/* mm5 = b3 b2 */
+    movq_r2r (mm0, mm7);		/* mm7 = a3 a2 + rounder */
+
+    paddd_r2r (mm5, mm0);		/* mm0 = a3+b3 a2+b2 + rounder */
+    psubd_r2r (mm5, mm7);		/* mm7 = a3-b3 a2-b2 + rounder */
+}
+
+static inline void mmx_row_tail (int16_t * const row, const int store)
+{
+    psrad_i2r (ROW_SHIFT, mm0);		/* mm0 = y3 y2 */
+
+    psrad_i2r (ROW_SHIFT, mm7);		/* mm7 = y4 y5 */
+
+    packssdw_r2r (mm0, mm1);		/* mm1 = y3 y2 y1 y0 */
+
+    packssdw_r2r (mm3, mm7);		/* mm7 = y6 y7 y4 y5 */
+
+    movq_r2m (mm1, *(row+store));	/* save y3 y2 y1 y0 */
+    movq_r2r (mm7, mm4);		/* mm4 = y6 y7 y4 y5 */
+
+    pslld_i2r (16, mm7);		/* mm7 = y7 0 y5 0 */
+
+    psrld_i2r (16, mm4);		/* mm4 = 0 y6 0 y4 */
+
+    por_r2r (mm4, mm7);			/* mm7 = y7 y6 y5 y4 */
+
+    /* slot */
+
+    movq_r2m (mm7, *(row+store+4));	/* save y7 y6 y5 y4 */
+}
+
+static inline void mmx_row_mid (int16_t * const row, const int store,
+				const int offset, const int16_t * const table)
+{
+    movq_m2r (*(row+offset), mm2);	/* mm2 = x6 x4 x2 x0 */
+    psrad_i2r (ROW_SHIFT, mm0);		/* mm0 = y3 y2 */
+
+    movq_m2r (*(row+offset+4), mm5);	/* mm5 = x7 x5 x3 x1 */
+    psrad_i2r (ROW_SHIFT, mm7);		/* mm7 = y4 y5 */
+
+    packssdw_r2r (mm0, mm1);		/* mm1 = y3 y2 y1 y0 */
+    movq_r2r (mm5, mm6);		/* mm6 = x7 x5 x3 x1 */
+
+    packssdw_r2r (mm3, mm7);		/* mm7 = y6 y7 y4 y5 */
+    movq_r2r (mm2, mm0);		/* mm0 = x6 x4 x2 x0 */
+
+    movq_r2m (mm1, *(row+store));	/* save y3 y2 y1 y0 */
+    movq_r2r (mm7, mm1);		/* mm1 = y6 y7 y4 y5 */
+
+    punpckldq_r2r (mm0, mm0);		/* mm0 = x2 x0 x2 x0 */
+    psrld_i2r (16, mm7);		/* mm7 = 0 y6 0 y4 */
+
+    movq_m2r (*table, mm3);		/* mm3 = C6 C4 C2 C4 */
+    pslld_i2r (16, mm1);		/* mm1 = y7 0 y5 0 */
+
+    movq_m2r (*(table+4), mm4);		/* mm4 = -C2 -C4 C6 C4 */
+    por_r2r (mm1, mm7);			/* mm7 = y7 y6 y5 y4 */
+
+    movq_m2r (*(table+8), mm1);		/* mm1 = -C7 C3 C3 C1 */
+    punpckhdq_r2r (mm2, mm2);		/* mm2 = x6 x4 x6 x4 */
+
+    movq_r2m (mm7, *(row+store+4));	/* save y7 y6 y5 y4 */
+    pmaddwd_r2r (mm0, mm3);		/* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
+}
+
+
+#if 0
+/* C column IDCT - its just here to document the MMXEXT and MMX versions */
+static inline void idct_col (int16_t * col, int offset)
+{
+/* multiplication - as implemented on mmx */
+#define F(c,x) (((c) * (x)) >> 16)
+
+/* saturation - it helps us handle torture test cases */
+#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x))
+
+    int16_t x0, x1, x2, x3, x4, x5, x6, x7;
+    int16_t y0, y1, y2, y3, y4, y5, y6, y7;
+    int16_t a0, a1, a2, a3, b0, b1, b2, b3;
+    int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12;
+
+    col += offset;
+
+    x0 = col[0*8];
+    x1 = col[1*8];
+    x2 = col[2*8];
+    x3 = col[3*8];
+    x4 = col[4*8];
+    x5 = col[5*8];
+    x6 = col[6*8];
+    x7 = col[7*8];
+
+    u04 = S (x0 + x4);
+    v04 = S (x0 - x4);
+    u26 = S (F (T2, x6) + x2);
+    v26 = S (F (T2, x2) - x6);
+
+    a0 = S (u04 + u26);
+    a1 = S (v04 + v26);
+    a2 = S (v04 - v26);
+    a3 = S (u04 - u26);
+
+    u17 = S (F (T1, x7) + x1);
+    v17 = S (F (T1, x1) - x7);
+    u35 = S (F (T3, x5) + x3);
+    v35 = S (F (T3, x3) - x5);
+
+    b0 = S (u17 + u35);
+    b3 = S (v17 - v35);
+    u12 = S (u17 - u35);
+    v12 = S (v17 + v35);
+    u12 = S (2 * F (C4, u12));
+    v12 = S (2 * F (C4, v12));
+    b1 = S (u12 + v12);
+    b2 = S (u12 - v12);
+
+    y0 = S (a0 + b0) >> COL_SHIFT;
+    y1 = S (a1 + b1) >> COL_SHIFT;
+    y2 = S (a2 + b2) >> COL_SHIFT;
+    y3 = S (a3 + b3) >> COL_SHIFT;
+
+    y4 = S (a3 - b3) >> COL_SHIFT;
+    y5 = S (a2 - b2) >> COL_SHIFT;
+    y6 = S (a1 - b1) >> COL_SHIFT;
+    y7 = S (a0 - b0) >> COL_SHIFT;
+
+    col[0*8] = y0;
+    col[1*8] = y1;
+    col[2*8] = y2;
+    col[3*8] = y3;
+    col[4*8] = y4;
+    col[5*8] = y5;
+    col[6*8] = y6;
+    col[7*8] = y7;
+}
+#endif
+
+
+/* MMX column IDCT */
+static inline void idct_col (int16_t * const col, const int offset)
+{
+#define T1 13036
+#define T2 27146
+#define T3 43790
+#define C4 23170
+
+    static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
+    static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
+    static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
+    static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
+
+    /* column code adapted from peter gubanov */
+    /* http://www.elecard.com/peter/idct.shtml */
+
+    movq_m2r (*_T1, mm0);		/* mm0 = T1 */
+
+    movq_m2r (*(col+offset+1*8), mm1);	/* mm1 = x1 */
+    movq_r2r (mm0, mm2);		/* mm2 = T1 */
+
+    movq_m2r (*(col+offset+7*8), mm4);	/* mm4 = x7 */
+    pmulhw_r2r (mm1, mm0);		/* mm0 = T1*x1 */
+
+    movq_m2r (*_T3, mm5);		/* mm5 = T3 */
+    pmulhw_r2r (mm4, mm2);		/* mm2 = T1*x7 */
+
+    movq_m2r (*(col+offset+5*8), mm6);	/* mm6 = x5 */
+    movq_r2r (mm5, mm7);		/* mm7 = T3-1 */
+
+    movq_m2r (*(col+offset+3*8), mm3);	/* mm3 = x3 */
+    psubsw_r2r (mm4, mm0);		/* mm0 = v17 */
+
+    movq_m2r (*_T2, mm4);		/* mm4 = T2 */
+    pmulhw_r2r (mm3, mm5);		/* mm5 = (T3-1)*x3 */
+
+    paddsw_r2r (mm2, mm1);		/* mm1 = u17 */
+    pmulhw_r2r (mm6, mm7);		/* mm7 = (T3-1)*x5 */
+
+    /* slot */
+
+    movq_r2r (mm4, mm2);		/* mm2 = T2 */
+    paddsw_r2r (mm3, mm5);		/* mm5 = T3*x3 */
+
+    pmulhw_m2r (*(col+offset+2*8), mm4);/* mm4 = T2*x2 */
+    paddsw_r2r (mm6, mm7);		/* mm7 = T3*x5 */
+
+    psubsw_r2r (mm6, mm5);		/* mm5 = v35 */
+    paddsw_r2r (mm3, mm7);		/* mm7 = u35 */
+
+    movq_m2r (*(col+offset+6*8), mm3);	/* mm3 = x6 */
+    movq_r2r (mm0, mm6);		/* mm6 = v17 */
+
+    pmulhw_r2r (mm3, mm2);		/* mm2 = T2*x6 */
+    psubsw_r2r (mm5, mm0);		/* mm0 = b3 */
+
+    psubsw_r2r (mm3, mm4);		/* mm4 = v26 */
+    paddsw_r2r (mm6, mm5);		/* mm5 = v12 */
+
+    movq_r2m (mm0, *(col+offset+3*8));	/* save b3 in scratch0 */
+    movq_r2r (mm1, mm6);		/* mm6 = u17 */
+
+    paddsw_m2r (*(col+offset+2*8), mm2);/* mm2 = u26 */
+    paddsw_r2r (mm7, mm6);		/* mm6 = b0 */
+
+    psubsw_r2r (mm7, mm1);		/* mm1 = u12 */
+    movq_r2r (mm1, mm7);		/* mm7 = u12 */
+
+    movq_m2r (*(col+offset+0*8), mm3);	/* mm3 = x0 */
+    paddsw_r2r (mm5, mm1);		/* mm1 = u12+v12 */
+
+    movq_m2r (*_C4, mm0);		/* mm0 = C4/2 */
+    psubsw_r2r (mm5, mm7);		/* mm7 = u12-v12 */
+
+    movq_r2m (mm6, *(col+offset+5*8));	/* save b0 in scratch1 */
+    pmulhw_r2r (mm0, mm1);		/* mm1 = b1/2 */
+
+    movq_r2r (mm4, mm6);		/* mm6 = v26 */
+    pmulhw_r2r (mm0, mm7);		/* mm7 = b2/2 */
+
+    movq_m2r (*(col+offset+4*8), mm5);	/* mm5 = x4 */
+    movq_r2r (mm3, mm0);		/* mm0 = x0 */
+
+    psubsw_r2r (mm5, mm3);		/* mm3 = v04 */
+    paddsw_r2r (mm5, mm0);		/* mm0 = u04 */
+
+    paddsw_r2r (mm3, mm4);		/* mm4 = a1 */
+    movq_r2r (mm0, mm5);		/* mm5 = u04 */
+
+    psubsw_r2r (mm6, mm3);		/* mm3 = a2 */
+    paddsw_r2r (mm2, mm5);		/* mm5 = a0 */
+
+    paddsw_r2r (mm1, mm1);		/* mm1 = b1 */
+    psubsw_r2r (mm2, mm0);		/* mm0 = a3 */
+
+    paddsw_r2r (mm7, mm7);		/* mm7 = b2 */
+    movq_r2r (mm3, mm2);		/* mm2 = a2 */
+
+    movq_r2r (mm4, mm6);		/* mm6 = a1 */
+    paddsw_r2r (mm7, mm3);		/* mm3 = a2+b2 */
+
+    psraw_i2r (COL_SHIFT, mm3);		/* mm3 = y2 */
+    paddsw_r2r (mm1, mm4);		/* mm4 = a1+b1 */
+
+    psraw_i2r (COL_SHIFT, mm4);		/* mm4 = y1 */
+    psubsw_r2r (mm1, mm6);		/* mm6 = a1-b1 */
+
+    movq_m2r (*(col+offset+5*8), mm1);	/* mm1 = b0 */
+    psubsw_r2r (mm7, mm2);		/* mm2 = a2-b2 */
+
+    psraw_i2r (COL_SHIFT, mm6);		/* mm6 = y6 */
+    movq_r2r (mm5, mm7);		/* mm7 = a0 */
+
+    movq_r2m (mm4, *(col+offset+1*8));	/* save y1 */
+    psraw_i2r (COL_SHIFT, mm2);		/* mm2 = y5 */
+
+    movq_r2m (mm3, *(col+offset+2*8));	/* save y2 */
+    paddsw_r2r (mm1, mm5);		/* mm5 = a0+b0 */
+
+    movq_m2r (*(col+offset+3*8), mm4);	/* mm4 = b3 */
+    psubsw_r2r (mm1, mm7);		/* mm7 = a0-b0 */
+
+    psraw_i2r (COL_SHIFT, mm5);		/* mm5 = y0 */
+    movq_r2r (mm0, mm3);		/* mm3 = a3 */
+
+    movq_r2m (mm2, *(col+offset+5*8));	/* save y5 */
+    psubsw_r2r (mm4, mm3);		/* mm3 = a3-b3 */
+
+    psraw_i2r (COL_SHIFT, mm7);		/* mm7 = y7 */
+    paddsw_r2r (mm0, mm4);		/* mm4 = a3+b3 */
+
+    movq_r2m (mm5, *(col+offset+0*8));	/* save y0 */
+    psraw_i2r (COL_SHIFT, mm3);		/* mm3 = y4 */
+
+    movq_r2m (mm6, *(col+offset+6*8));	/* save y6 */
+    psraw_i2r (COL_SHIFT, mm4);		/* mm4 = y3 */
+
+    movq_r2m (mm7, *(col+offset+7*8));	/* save y7 */
+
+    movq_r2m (mm3, *(col+offset+4*8));	/* save y4 */
+
+    movq_r2m (mm4, *(col+offset+3*8));	/* save y3 */
+}
+
+
+static const int32_t rounder0[] ATTR_ALIGN(8) =
+    rounder ((1 << (COL_SHIFT - 1)) - 0.5);
+static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
+static const int32_t rounder1[] ATTR_ALIGN(8) =
+    rounder (1.25683487303);	/* C1*(C1/C4+C1+C7)/2 */
+static const int32_t rounder7[] ATTR_ALIGN(8) =
+    rounder (-0.25);		/* C1*(C7/C4+C7-C1)/2 */
+static const int32_t rounder2[] ATTR_ALIGN(8) =
+    rounder (0.60355339059);	/* C2 * (C6+C2)/2 */
+static const int32_t rounder6[] ATTR_ALIGN(8) =
+    rounder (-0.25);		/* C2 * (C6-C2)/2 */
+static const int32_t rounder3[] ATTR_ALIGN(8) =
+    rounder (0.087788325588);	/* C3*(-C3/C4+C3+C5)/2 */
+static const int32_t rounder5[] ATTR_ALIGN(8) =
+    rounder (-0.441341716183);	/* C3*(-C5/C4+C5-C3)/2 */
+
+
+#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid)	\
+static inline void idct (int16_t * const block)				\
+{									\
+    static const int16_t table04[] ATTR_ALIGN(16) =			\
+	table (22725, 21407, 19266, 16384, 12873,  8867, 4520);		\
+    static const int16_t table17[] ATTR_ALIGN(16) =			\
+	table (31521, 29692, 26722, 22725, 17855, 12299, 6270);		\
+    static const int16_t table26[] ATTR_ALIGN(16) =			\
+	table (29692, 27969, 25172, 21407, 16819, 11585, 5906);		\
+    static const int16_t table35[] ATTR_ALIGN(16) =			\
+	table (26722, 25172, 22654, 19266, 15137, 10426, 5315);		\
+									\
+    idct_row_head (block, 0*8, table04);				\
+    idct_row (table04, rounder0);					\
+    idct_row_mid (block, 0*8, 4*8, table04);				\
+    idct_row (table04, rounder4);					\
+    idct_row_mid (block, 4*8, 1*8, table17);				\
+    idct_row (table17, rounder1);					\
+    idct_row_mid (block, 1*8, 7*8, table17);				\
+    idct_row (table17, rounder7);					\
+    idct_row_mid (block, 7*8, 2*8, table26);				\
+    idct_row (table26, rounder2);					\
+    idct_row_mid (block, 2*8, 6*8, table26);				\
+    idct_row (table26, rounder6);					\
+    idct_row_mid (block, 6*8, 3*8, table35);				\
+    idct_row (table35, rounder3);					\
+    idct_row_mid (block, 3*8, 5*8, table35);				\
+    idct_row (table35, rounder5);					\
+    idct_row_tail (block, 5*8);						\
+									\
+    idct_col (block, 0);						\
+    idct_col (block, 4);						\
+}
+
+
+#define COPY_MMX(offset,r0,r1,r2)	\
+do {					\
+    movq_m2r (*(block+offset), r0);	\
+    dest += stride;			\
+    movq_m2r (*(block+offset+4), r1);	\
+    movq_r2m (r2, *dest);		\
+    packuswb_r2r (r1, r0);		\
+} while (0)
+
+static inline void block_copy (int16_t * const block, uint8_t * dest,
+			       const int stride)
+{
+    movq_m2r (*(block+0*8), mm0);
+    movq_m2r (*(block+0*8+4), mm1);
+    movq_m2r (*(block+1*8), mm2);
+    packuswb_r2r (mm1, mm0);
+    movq_m2r (*(block+1*8+4), mm3);
+    movq_r2m (mm0, *dest);
+    packuswb_r2r (mm3, mm2);
+    COPY_MMX (2*8, mm0, mm1, mm2);
+    COPY_MMX (3*8, mm2, mm3, mm0);
+    COPY_MMX (4*8, mm0, mm1, mm2);
+    COPY_MMX (5*8, mm2, mm3, mm0);
+    COPY_MMX (6*8, mm0, mm1, mm2);
+    COPY_MMX (7*8, mm2, mm3, mm0);
+    movq_r2m (mm2, *(dest+stride));
+}
+
+
+#define ADD_MMX(offset,r1,r2,r3,r4)	\
+do {					\
+    movq_m2r (*(dest+2*stride), r1);	\
+    packuswb_r2r (r4, r3);		\
+    movq_r2r (r1, r2);			\
+    dest += stride;			\
+    movq_r2m (r3, *dest);		\
+    punpcklbw_r2r (mm0, r1);		\
+    paddsw_m2r (*(block+offset), r1);	\
+    punpckhbw_r2r (mm0, r2);		\
+    paddsw_m2r (*(block+offset+4), r2);	\
+} while (0)
+
+static inline void block_add (int16_t * const block, uint8_t * dest,
+			      const int stride)
+{
+    movq_m2r (*dest, mm1);
+    pxor_r2r (mm0, mm0);
+    movq_m2r (*(dest+stride), mm3);
+    movq_r2r (mm1, mm2);
+    punpcklbw_r2r (mm0, mm1);
+    movq_r2r (mm3, mm4);
+    paddsw_m2r (*(block+0*8), mm1);
+    punpckhbw_r2r (mm0, mm2);
+    paddsw_m2r (*(block+0*8+4), mm2);
+    punpcklbw_r2r (mm0, mm3);
+    paddsw_m2r (*(block+1*8), mm3);
+    packuswb_r2r (mm2, mm1);
+    punpckhbw_r2r (mm0, mm4);
+    movq_r2m (mm1, *dest);
+    paddsw_m2r (*(block+1*8+4), mm4);
+    ADD_MMX (2*8, mm1, mm2, mm3, mm4);
+    ADD_MMX (3*8, mm3, mm4, mm1, mm2);
+    ADD_MMX (4*8, mm1, mm2, mm3, mm4);
+    ADD_MMX (5*8, mm3, mm4, mm1, mm2);
+    ADD_MMX (6*8, mm1, mm2, mm3, mm4);
+    ADD_MMX (7*8, mm3, mm4, mm1, mm2);
+    packuswb_r2r (mm4, mm3);
+    movq_r2m (mm3, *(dest+stride));
+}
+
+
+static inline void block_zero (int16_t * const block)
+{
+    pxor_r2r (mm0, mm0);
+    movq_r2m (mm0, *(block+0*4));
+    movq_r2m (mm0, *(block+1*4));
+    movq_r2m (mm0, *(block+2*4));
+    movq_r2m (mm0, *(block+3*4));
+    movq_r2m (mm0, *(block+4*4));
+    movq_r2m (mm0, *(block+5*4));
+    movq_r2m (mm0, *(block+6*4));
+    movq_r2m (mm0, *(block+7*4));
+    movq_r2m (mm0, *(block+8*4));
+    movq_r2m (mm0, *(block+9*4));
+    movq_r2m (mm0, *(block+10*4));
+    movq_r2m (mm0, *(block+11*4));
+    movq_r2m (mm0, *(block+12*4));
+    movq_r2m (mm0, *(block+13*4));
+    movq_r2m (mm0, *(block+14*4));
+    movq_r2m (mm0, *(block+15*4));
+}
+
+
+#define CPU_MMXEXT 0
+#define CPU_MMX 1
+
+#define dup4(reg)			\
+do {					\
+    if (cpu != CPU_MMXEXT) {		\
+	punpcklwd_r2r (reg, reg);	\
+	punpckldq_r2r (reg, reg);	\
+    } else				\
+	pshufw_r2r (reg, reg, 0x00);	\
+} while (0)
+
+static inline void block_add_DC (int16_t * const block, uint8_t * dest,
+				 const int stride, const int cpu)
+{
+    movd_v2r ((block[0] + 64) >> 7, mm0);
+    pxor_r2r (mm1, mm1);
+    movq_m2r (*dest, mm2);
+    dup4 (mm0);
+    psubsw_r2r (mm0, mm1);
+    packuswb_r2r (mm0, mm0);
+    paddusb_r2r (mm0, mm2);
+    packuswb_r2r (mm1, mm1);
+    movq_m2r (*(dest + stride), mm3);
+    psubusb_r2r (mm1, mm2);
+    block[0] = 0;
+    paddusb_r2r (mm0, mm3);
+    movq_r2m (mm2, *dest);
+    psubusb_r2r (mm1, mm3);
+    movq_m2r (*(dest + 2*stride), mm2);
+    dest += stride;
+    movq_r2m (mm3, *dest);
+    paddusb_r2r (mm0, mm2);
+    movq_m2r (*(dest + 2*stride), mm3);
+    psubusb_r2r (mm1, mm2);
+    dest += stride;
+    paddusb_r2r (mm0, mm3);
+    movq_r2m (mm2, *dest);
+    psubusb_r2r (mm1, mm3);
+    movq_m2r (*(dest + 2*stride), mm2);
+    dest += stride;
+    movq_r2m (mm3, *dest);
+    paddusb_r2r (mm0, mm2);
+    movq_m2r (*(dest + 2*stride), mm3);
+    psubusb_r2r (mm1, mm2);
+    dest += stride;
+    paddusb_r2r (mm0, mm3);
+    movq_r2m (mm2, *dest);
+    psubusb_r2r (mm1, mm3);
+    movq_m2r (*(dest + 2*stride), mm2);
+    dest += stride;
+    movq_r2m (mm3, *dest);
+    paddusb_r2r (mm0, mm2);
+    movq_m2r (*(dest + 2*stride), mm3);
+    psubusb_r2r (mm1, mm2);
+    block[63] = 0;
+    paddusb_r2r (mm0, mm3);
+    movq_r2m (mm2, *(dest + stride));
+    psubusb_r2r (mm1, mm3);
+    movq_r2m (mm3, *(dest + 2*stride));
+}
+
+
+declare_idct (mmxext_idct, mmxext_table,
+	      mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
+
+void mpeg2_idct_copy_mmxext (int16_t * const block, uint8_t * const dest,
+			     const int stride)
+{
+    mmxext_idct (block);
+    block_copy (block, dest, stride);
+    block_zero (block);
+}
+
+void mpeg2_idct_add_mmxext (const int last, int16_t * const block,
+			    uint8_t * const dest, const int stride)
+{
+    if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
+	mmxext_idct (block);
+	block_add (block, dest, stride);
+	block_zero (block);
+    } else
+	block_add_DC (block, dest, stride, CPU_MMXEXT);
+}
+
+
+declare_idct (mmx_idct, mmx_table,
+	      mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
+
+void mpeg2_idct_copy_mmx (int16_t * const block, uint8_t * const dest,
+			  const int stride)
+{
+    mmx_idct (block);
+    block_copy (block, dest, stride);
+    block_zero (block);
+}
+
+void mpeg2_idct_add_mmx (const int last, int16_t * const block,
+			 uint8_t * const dest, const int stride)
+{
+    if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
+	mmx_idct (block);
+	block_add (block, dest, stride);
+	block_zero (block);
+    } else
+	block_add_DC (block, dest, stride, CPU_MMX);
+}
+
+
+void mpeg2_idct_mmx_init (void)
+{
+    extern uint8_t mpeg2_scan_norm[64];
+    extern uint8_t mpeg2_scan_alt[64];
+    int i, j;
+
+    /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
+
+    for (i = 0; i < 64; i++) {
+	j = mpeg2_scan_norm[i];
+	mpeg2_scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
+	j = mpeg2_scan_alt[i];
+	mpeg2_scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
+    }
+}
+
+#endif
diff --git a/src/video_dec/libmpeg2new/libmpeg2/libmpeg2.pc.in b/src/video_dec/libmpeg2new/libmpeg2/libmpeg2.pc.in
new file mode 100644
index 000000000..d54500b0e
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/libmpeg2.pc.in
@@ -0,0 +1,10 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libmpeg2
+Description: A decoding library for MPEG-1 and MPEG-2 streams.
+Version: @VERSION@
+Libs: -L${libdir} -lmpeg2
+Cflags: -I${includedir}/@PACKAGE@
diff --git a/src/video_dec/libmpeg2new/libmpeg2/libmpeg2convert.pc.in b/src/video_dec/libmpeg2new/libmpeg2/libmpeg2convert.pc.in
new file mode 100644
index 000000000..42383a6e2
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/libmpeg2convert.pc.in
@@ -0,0 +1,10 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libmpeg2convert
+Description: libmpeg2 helper functions for converting to various formats.
+Version: @VERSION@
+Libs: -L${libdir} -lmpeg2convert
+Cflags: -I${includedir}/@PACKAGE@
diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp.c
new file mode 100644
index 000000000..d5a265d5c
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp.c
@@ -0,0 +1,130 @@
+/*
+ * motion_comp.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <inttypes.h>
+
+#include "../include/mpeg2.h"
+#include "../include/attributes.h"
+#include "mpeg2_internal.h"
+
+mpeg2_mc_t mpeg2_mc;
+
+void mpeg2_mc_init (uint32_t accel)
+{
+#ifdef ARCH_X86
+    if (accel & MPEG2_ACCEL_X86_MMXEXT)
+	mpeg2_mc = mpeg2_mc_mmxext;
+    else if (accel & MPEG2_ACCEL_X86_3DNOW)
+	mpeg2_mc = mpeg2_mc_3dnow;
+    else if (accel & MPEG2_ACCEL_X86_MMX)
+	mpeg2_mc = mpeg2_mc_mmx;
+    else
+#endif
+#ifdef ARCH_PPC
+    if (accel & MPEG2_ACCEL_PPC_ALTIVEC)
+	mpeg2_mc = mpeg2_mc_altivec;
+    else
+#endif
+#ifdef ARCH_ALPHA
+    if (accel & MPEG2_ACCEL_ALPHA)
+	mpeg2_mc = mpeg2_mc_alpha;
+    else
+#endif
+#ifdef ARCH_SPARC
+    if (accel & MPEG2_ACCEL_SPARC_VIS)
+	mpeg2_mc = mpeg2_mc_vis;
+    else
+#endif
+	mpeg2_mc = mpeg2_mc_c;
+}
+
+#define avg2(a,b) ((a+b+1)>>1)
+#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
+
+#define predict_o(i) (ref[i])
+#define predict_x(i) (avg2 (ref[i], ref[i+1]))
+#define predict_y(i) (avg2 (ref[i], (ref+stride)[i]))
+#define predict_xy(i) (avg4 (ref[i], ref[i+1], \
+			     (ref+stride)[i], (ref+stride)[i+1]))
+
+#define put(predictor,i) dest[i] = predictor (i)
+#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i])
+
+/* mc function template */
+
+#define MC_FUNC(op,xy)							\
+static void MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref,	\
+				   const int stride, int height)	\
+{									\
+    do {								\
+	op (predict_##xy, 0);						\
+	op (predict_##xy, 1);						\
+	op (predict_##xy, 2);						\
+	op (predict_##xy, 3);						\
+	op (predict_##xy, 4);						\
+	op (predict_##xy, 5);						\
+	op (predict_##xy, 6);						\
+	op (predict_##xy, 7);						\
+	op (predict_##xy, 8);						\
+	op (predict_##xy, 9);						\
+	op (predict_##xy, 10);						\
+	op (predict_##xy, 11);						\
+	op (predict_##xy, 12);						\
+	op (predict_##xy, 13);						\
+	op (predict_##xy, 14);						\
+	op (predict_##xy, 15);						\
+	ref += stride;							\
+	dest += stride;							\
+    } while (--height);							\
+}									\
+static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref,	\
+				  const int stride, int height)		\
+{									\
+    do {								\
+	op (predict_##xy, 0);						\
+	op (predict_##xy, 1);						\
+	op (predict_##xy, 2);						\
+	op (predict_##xy, 3);						\
+	op (predict_##xy, 4);						\
+	op (predict_##xy, 5);						\
+	op (predict_##xy, 6);						\
+	op (predict_##xy, 7);						\
+	ref += stride;							\
+	dest += stride;							\
+    } while (--height);							\
+}
+
+/* definitions of the actual mc functions */
+
+MC_FUNC (put,o)
+MC_FUNC (avg,o)
+MC_FUNC (put,x)
+MC_FUNC (avg,x)
+MC_FUNC (put,y)
+MC_FUNC (avg,y)
+MC_FUNC (put,xy)
+MC_FUNC (avg,xy)
+
+MPEG2_MC_EXTERN (c)
diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp_alpha.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_alpha.c
new file mode 100644
index 000000000..1b3712a1a
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_alpha.c
@@ -0,0 +1,253 @@
+/*
+ * motion_comp_alpha.c
+ * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_ALPHA
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include <xine/attributes.h>
+#include "mpeg2_internal.h"
+#include "alpha_asm.h"
+
+static inline uint64_t avg2 (uint64_t a, uint64_t b)
+{
+    return (a | b) - (((a ^ b) & BYTE_VEC (0xfe)) >> 1);
+}
+
+// Load two unaligned quadwords from addr. This macro only works if
+// addr is actually unaligned.
+#define ULOAD16(ret_l,ret_r,addr)			\
+    do {						\
+	uint64_t _l = ldq_u (addr +  0);		\
+	uint64_t _m = ldq_u (addr +  8);		\
+	uint64_t _r = ldq_u (addr + 16);		\
+	ret_l = extql (_l, addr) | extqh (_m, addr);	\
+	ret_r = extql (_m, addr) | extqh (_r, addr);	\
+    } while (0)
+
+// Load two aligned quadwords from addr.
+#define ALOAD16(ret_l,ret_r,addr)			\
+    do {						\
+	ret_l = ldq (addr);				\
+	ret_r = ldq (addr + 8);				\
+    } while (0)
+
+#define OP8(LOAD,LOAD16,STORE)			\
+    do {					\
+	STORE (LOAD (pixels), block);		\
+	pixels += line_size;			\
+	block += line_size;			\
+    } while (--h)
+
+#define OP16(LOAD,LOAD16,STORE)			\
+    do {					\
+	uint64_t l, r;				\
+	LOAD16 (l, r, pixels);			\
+	STORE (l, block);			\
+	STORE (r, block + 8);			\
+	pixels += line_size;			\
+	block += line_size;			\
+    } while (--h)
+
+#define OP8_X2(LOAD,LOAD16,STORE)			\
+    do {						\
+	uint64_t p0, p1;				\
+							\
+	p0 = LOAD (pixels);				\
+	p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56);	\
+	STORE (avg2 (p0, p1), block);			\
+	pixels += line_size;				\
+	block += line_size;				\
+    } while (--h)
+
+#define OP16_X2(LOAD,LOAD16,STORE)				\
+    do {							\
+	uint64_t p0, p1;					\
+								\
+	LOAD16 (p0, p1, pixels);				\
+	STORE (avg2(p0, p0 >> 8 | p1 << 56), block);		\
+	STORE (avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56),	\
+	       block + 8);					\
+	pixels += line_size;					\
+	block += line_size;					\
+    } while (--h)
+
+#define OP8_Y2(LOAD,LOAD16,STORE)		\
+    do {					\
+	uint64_t p0, p1;			\
+	p0 = LOAD (pixels);			\
+	pixels += line_size;			\
+	p1 = LOAD (pixels);			\
+	do {					\
+	    uint64_t av = avg2 (p0, p1);	\
+	    if (--h == 0) line_size = 0;	\
+	    pixels += line_size;		\
+	    p0 = p1;				\
+	    p1 = LOAD (pixels);			\
+	    STORE (av, block);			\
+	    block += line_size;			\
+	} while (h);				\
+    } while (0)
+
+#define OP16_Y2(LOAD,LOAD16,STORE)		\
+    do {					\
+	uint64_t p0l, p0r, p1l, p1r;		\
+	LOAD16 (p0l, p0r, pixels);		\
+	pixels += line_size;			\
+	LOAD16 (p1l, p1r, pixels);		\
+	do {					\
+	    uint64_t avl, avr;			\
+	    if (--h == 0) line_size = 0;	\
+	    avl = avg2 (p0l, p1l);		\
+	    avr = avg2 (p0r, p1r);		\
+	    p0l = p1l;				\
+	    p0r = p1r;				\
+	    pixels += line_size;		\
+	    LOAD16 (p1l, p1r, pixels);		\
+	    STORE (avl, block);			\
+	    STORE (avr, block + 8);		\
+	    block += line_size;			\
+	} while (h);				\
+    } while (0)
+
+#define OP8_XY2(LOAD,LOAD16,STORE)				\
+    do {							\
+	uint64_t pl, ph;					\
+	uint64_t p1 = LOAD (pixels);				\
+	uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56);	\
+								\
+	ph = (((p1 & ~BYTE_VEC (0x03)) >> 2) +			\
+	      ((p2 & ~BYTE_VEC (0x03)) >> 2));			\
+	pl = ((p1 & BYTE_VEC (0x03)) +				\
+	      (p2 & BYTE_VEC (0x03)));				\
+								\
+	do {							\
+	    uint64_t npl, nph;					\
+								\
+	    pixels += line_size;				\
+	    p1 = LOAD (pixels);					\
+	    p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56);	\
+	    nph = (((p1 & ~BYTE_VEC (0x03)) >> 2) +		\
+	           ((p2 & ~BYTE_VEC (0x03)) >> 2));		\
+	    npl = ((p1 & BYTE_VEC (0x03)) +			\
+	           (p2 & BYTE_VEC (0x03)));			\
+								\
+	    STORE (ph + nph +					\
+		   (((pl + npl + BYTE_VEC (0x02)) >> 2) &	\
+		    BYTE_VEC (0x03)), block);			\
+								\
+	    block += line_size;					\
+            pl = npl;						\
+	    ph = nph;						\
+	} while (--h);						\
+    } while (0)
+
+#define OP16_XY2(LOAD,LOAD16,STORE)				\
+    do {							\
+	uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r;	\
+	LOAD16 (p0, p2, pixels);				\
+	p1 = p0 >> 8 | (p2 << 56);				\
+	p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56);		\
+								\
+	ph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) +		\
+	        ((p1 & ~BYTE_VEC (0x03)) >> 2));		\
+	pl_l = ((p0 & BYTE_VEC (0x03)) +			\
+	        (p1 & BYTE_VEC(0x03)));				\
+	ph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) +		\
+	        ((p3 & ~BYTE_VEC (0x03)) >> 2));		\
+	pl_r = ((p2 & BYTE_VEC (0x03)) +			\
+	        (p3 & BYTE_VEC (0x03)));			\
+								\
+	do {							\
+	    uint64_t npl_l, nph_l, npl_r, nph_r;		\
+								\
+	    pixels += line_size;				\
+	    LOAD16 (p0, p2, pixels);				\
+	    p1 = p0 >> 8 | (p2 << 56);				\
+	    p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56);	\
+	    nph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) +		\
+		     ((p1 & ~BYTE_VEC (0x03)) >> 2));		\
+	    npl_l = ((p0 & BYTE_VEC (0x03)) +			\
+		     (p1 & BYTE_VEC (0x03)));			\
+	    nph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) +		\
+		     ((p3 & ~BYTE_VEC (0x03)) >> 2));		\
+	    npl_r = ((p2 & BYTE_VEC (0x03)) +			\
+		     (p3 & BYTE_VEC (0x03)));			\
+								\
+	    STORE (ph_l + nph_l +				\
+		   (((pl_l + npl_l + BYTE_VEC (0x02)) >> 2) &	\
+		    BYTE_VEC(0x03)), block);			\
+	    STORE (ph_r + nph_r +				\
+		   (((pl_r + npl_r + BYTE_VEC (0x02)) >> 2) &	\
+		    BYTE_VEC(0x03)), block + 8);		\
+								\
+	    block += line_size;					\
+	    pl_l = npl_l;					\
+	    ph_l = nph_l;					\
+	    pl_r = npl_r;					\
+	    ph_r = nph_r;					\
+	} while (--h);						\
+    } while (0)
+
+#define MAKE_OP(OPNAME,SIZE,SUFF,OPKIND,STORE)				\
+static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha		\
+	(uint8_t *restrict block, const uint8_t *restrict pixels,	\
+	 int line_size, int h)						\
+{									\
+    if ((uint64_t) pixels & 0x7) {					\
+	OPKIND (uldq, ULOAD16, STORE);					\
+    } else {								\
+	OPKIND (ldq, ALOAD16, STORE);					\
+    }									\
+}
+
+#define PIXOP(OPNAME,STORE)			\
+    MAKE_OP (OPNAME, 8,  o,  OP8,      STORE);	\
+    MAKE_OP (OPNAME, 8,  x,  OP8_X2,   STORE);	\
+    MAKE_OP (OPNAME, 8,  y,  OP8_Y2,   STORE);	\
+    MAKE_OP (OPNAME, 8,  xy, OP8_XY2,  STORE);	\
+    MAKE_OP (OPNAME, 16, o,  OP16,     STORE);	\
+    MAKE_OP (OPNAME, 16, x,  OP16_X2,  STORE);	\
+    MAKE_OP (OPNAME, 16, y,  OP16_Y2,  STORE);	\
+    MAKE_OP (OPNAME, 16, xy, OP16_XY2, STORE);
+
+#define STORE(l,b) stq (l, b)
+PIXOP (put, STORE);
+#undef STORE
+#define STORE(l,b) stq (avg2 (l, ldq (b)), b);
+PIXOP (avg, STORE);
+
+mpeg2_mc_t mpeg2_mc_alpha = {
+    { MC_put_o_16_alpha, MC_put_x_16_alpha,
+      MC_put_y_16_alpha, MC_put_xy_16_alpha,
+      MC_put_o_8_alpha, MC_put_x_8_alpha,
+      MC_put_y_8_alpha, MC_put_xy_8_alpha },
+    { MC_avg_o_16_alpha, MC_avg_x_16_alpha,
+      MC_avg_y_16_alpha, MC_avg_xy_16_alpha,
+      MC_avg_o_8_alpha, MC_avg_x_8_alpha,
+      MC_avg_y_8_alpha, MC_avg_xy_8_alpha }
+};
+
+#endif
diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp_altivec.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_altivec.c
new file mode 100644
index 000000000..ee740e14e
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_altivec.c
@@ -0,0 +1,1010 @@
+/*
+ * motion_comp_altivec.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_PPC
+
+#ifdef HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include <xine/attributes.h>
+#include "mpeg2_internal.h"
+
+typedef vector signed char vector_s8_t;
+typedef vector unsigned char vector_u8_t;
+typedef vector signed short vector_s16_t;
+typedef vector unsigned short vector_u16_t;
+typedef vector signed int vector_s32_t;
+typedef vector unsigned int vector_u32_t;
+
+#ifndef COFFEE_BREAK	/* Workarounds for gcc suckage */
+
+static inline vector_u8_t my_vec_ld (int const A, const uint8_t * const B)
+{
+    return vec_ld (A, (uint8_t *)B);
+}
+#undef vec_ld
+#define vec_ld my_vec_ld
+
+static inline vector_u8_t my_vec_and (vector_u8_t const A, vector_u8_t const B)
+{
+    return vec_and (A, B);
+}
+#undef vec_and
+#define vec_and my_vec_and
+
+static inline vector_u8_t my_vec_avg (vector_u8_t const A, vector_u8_t const B)
+{
+    return vec_avg (A, B);
+}
+#undef vec_avg
+#define vec_avg my_vec_avg
+
+#endif
+
+static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t perm, ref0, ref1, tmp;
+
+    perm = vec_lvsl (0, ref);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    ref += stride;
+    tmp = vec_perm (ref0, ref1, perm);
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	vec_st (tmp, 0, dest);
+	tmp = vec_perm (ref0, ref1, perm);
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	tmp = vec_perm (ref0, ref1, perm);
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    vec_st (tmp, 0, dest);
+    tmp = vec_perm (ref0, ref1, perm);
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref,
+				const int stride, int height)
+{
+    vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1;
+
+    tmp0 = vec_lvsl (0, ref);
+    tmp0 = vec_mergeh (tmp0, tmp0);
+    perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+    tmp1 = vec_lvsl (stride, ref);
+    tmp1 = vec_mergeh (tmp1, tmp1);
+    perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    ref += stride;
+    tmp0 = vec_perm (ref0, ref1, perm0);
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp1 = vec_perm (ref0, ref1, perm1);
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp0 = vec_perm (ref0, ref1, perm0);
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+    dest += stride;
+    tmp1 = vec_perm (ref0, ref1, perm1);
+    vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+}
+
+static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t permA, permB, ref0, ref1, tmp;
+
+    permA = vec_lvsl (0, ref);
+    permB = vec_add (permA, vec_splat_u8 (1));
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    ref += stride;
+    tmp = vec_avg (vec_perm (ref0, ref1, permA),
+		   vec_perm (ref0, ref1, permB));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	vec_st (tmp, 0, dest);
+	tmp = vec_avg (vec_perm (ref0, ref1, permA),
+		       vec_perm (ref0, ref1, permB));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	tmp = vec_avg (vec_perm (ref0, ref1, permA),
+		       vec_perm (ref0, ref1, permB));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    vec_st (tmp, 0, dest);
+    tmp = vec_avg (vec_perm (ref0, ref1, permA),
+		   vec_perm (ref0, ref1, permB));
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref,
+				const int stride, int height)
+{
+    vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
+
+    ones = vec_splat_u8 (1);
+    tmp0 = vec_lvsl (0, ref);
+    tmp0 = vec_mergeh (tmp0, tmp0);
+    perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+    perm0B = vec_add (perm0A, ones);
+    tmp1 = vec_lvsl (stride, ref);
+    tmp1 = vec_mergeh (tmp1, tmp1);
+    perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+    perm1B = vec_add (perm1A, ones);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    ref += stride;
+    tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
+		    vec_perm (ref0, ref1, perm0B));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
+			vec_perm (ref0, ref1, perm1B));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
+			vec_perm (ref0, ref1, perm0B));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+    dest += stride;
+    tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
+		    vec_perm (ref0, ref1, perm1B));
+    vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+}
+
+static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp;
+
+    perm = vec_lvsl (0, ref);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    ref += stride;
+    tmp0 = vec_perm (ref0, ref1, perm);
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    ref += stride;
+    tmp1 = vec_perm (ref0, ref1, perm);
+    tmp = vec_avg (tmp0, tmp1);
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	vec_st (tmp, 0, dest);
+	tmp0 = vec_perm (ref0, ref1, perm);
+	tmp = vec_avg (tmp0, tmp1);
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	tmp1 = vec_perm (ref0, ref1, perm);
+	tmp = vec_avg (tmp0, tmp1);
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    vec_st (tmp, 0, dest);
+    tmp0 = vec_perm (ref0, ref1, perm);
+    tmp = vec_avg (tmp0, tmp1);
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref,
+				const int stride, int height)
+{
+    vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1;
+
+    tmp0 = vec_lvsl (0, ref);
+    tmp0 = vec_mergeh (tmp0, tmp0);
+    perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+    tmp1 = vec_lvsl (stride, ref);
+    tmp1 = vec_mergeh (tmp1, tmp1);
+    perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    ref += stride;
+    tmp0 = vec_perm (ref0, ref1, perm0);
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    ref += stride;
+    tmp1 = vec_perm (ref0, ref1, perm1);
+    tmp = vec_avg (tmp0, tmp1);
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp0 = vec_perm (ref0, ref1, perm0);
+	tmp = vec_avg (tmp0, tmp1);
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp1 = vec_perm (ref0, ref1, perm1);
+	tmp = vec_avg (tmp0, tmp1);
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+    dest += stride;
+    tmp0 = vec_perm (ref0, ref1, perm0);
+    tmp = vec_avg (tmp0, tmp1);
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+}
+
+static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
+				  const int stride, int height)
+{
+    vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
+    vector_u8_t ones;
+
+    ones = vec_splat_u8 (1);
+    permA = vec_lvsl (0, ref);
+    permB = vec_add (permA, ones);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    ref += stride;
+    A = vec_perm (ref0, ref1, permA);
+    B = vec_perm (ref0, ref1, permB);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    ref += stride;
+    A = vec_perm (ref0, ref1, permA);
+    B = vec_perm (ref0, ref1, permB);
+    avg1 = vec_avg (A, B);
+    xor1 = vec_xor (A, B);
+    tmp = vec_sub (vec_avg (avg0, avg1),
+		   vec_and (vec_and (ones, vec_or (xor0, xor1)),
+			    vec_xor (avg0, avg1)));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	vec_st (tmp, 0, dest);
+	A = vec_perm (ref0, ref1, permA);
+	B = vec_perm (ref0, ref1, permB);
+	avg0 = vec_avg (A, B);
+	xor0 = vec_xor (A, B);
+	tmp = vec_sub (vec_avg (avg0, avg1),
+		       vec_and (vec_and (ones, vec_or (xor0, xor1)),
+				vec_xor (avg0, avg1)));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	A = vec_perm (ref0, ref1, permA);
+	B = vec_perm (ref0, ref1, permB);
+	avg1 = vec_avg (A, B);
+	xor1 = vec_xor (A, B);
+	tmp = vec_sub (vec_avg (avg0, avg1),
+		       vec_and (vec_and (ones, vec_or (xor0, xor1)),
+				vec_xor (avg0, avg1)));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    vec_st (tmp, 0, dest);
+    A = vec_perm (ref0, ref1, permA);
+    B = vec_perm (ref0, ref1, permB);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+    tmp = vec_sub (vec_avg (avg0, avg1),
+		   vec_and (vec_and (ones, vec_or (xor0, xor1)),
+			    vec_xor (avg0, avg1)));
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
+    vector_u8_t avg0, avg1, xor0, xor1, tmp, ones;
+
+    ones = vec_splat_u8 (1);
+    perm0A = vec_lvsl (0, ref);
+    perm0A = vec_mergeh (perm0A, perm0A);
+    perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
+    perm0B = vec_add (perm0A, ones);
+    perm1A = vec_lvsl (stride, ref);
+    perm1A = vec_mergeh (perm1A, perm1A);
+    perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
+    perm1B = vec_add (perm1A, ones);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    ref += stride;
+    A = vec_perm (ref0, ref1, perm0A);
+    B = vec_perm (ref0, ref1, perm0B);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    ref += stride;
+    A = vec_perm (ref0, ref1, perm1A);
+    B = vec_perm (ref0, ref1, perm1B);
+    avg1 = vec_avg (A, B);
+    xor1 = vec_xor (A, B);
+    tmp = vec_sub (vec_avg (avg0, avg1),
+		   vec_and (vec_and (ones, vec_or (xor0, xor1)),
+			    vec_xor (avg0, avg1)));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	A = vec_perm (ref0, ref1, perm0A);
+	B = vec_perm (ref0, ref1, perm0B);
+	avg0 = vec_avg (A, B);
+	xor0 = vec_xor (A, B);
+	tmp = vec_sub (vec_avg (avg0, avg1),
+		       vec_and (vec_and (ones, vec_or (xor0, xor1)),
+				vec_xor (avg0, avg1)));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	A = vec_perm (ref0, ref1, perm1A);
+	B = vec_perm (ref0, ref1, perm1B);
+	avg1 = vec_avg (A, B);
+	xor1 = vec_xor (A, B);
+	tmp = vec_sub (vec_avg (avg0, avg1),
+		       vec_and (vec_and (ones, vec_or (xor0, xor1)),
+				vec_xor (avg0, avg1)));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+    dest += stride;
+    A = vec_perm (ref0, ref1, perm0A);
+    B = vec_perm (ref0, ref1, perm0B);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+    tmp = vec_sub (vec_avg (avg0, avg1),
+		   vec_and (vec_and (ones, vec_or (xor0, xor1)),
+			    vec_xor (avg0, avg1)));
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+}
+
+#if 0
+static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones;
+    vector_u16_t splat2, temp;
+
+    ones = vec_splat_u8 (1);
+    permA = vec_lvsl (0, ref);
+    permB = vec_add (permA, ones);
+
+    zero = vec_splat_u8 (0);
+    splat2 = vec_splat_u16 (2);
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	A = vec_perm (ref0, ref1, permA);
+	B = vec_perm (ref0, ref1, permB);
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	C = vec_perm (ref0, ref1, permA);
+	D = vec_perm (ref0, ref1, permB);
+
+	temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A),
+				(vector_u16_t)vec_mergeh (zero, B)),
+		       vec_add ((vector_u16_t)vec_mergeh (zero, C),
+				(vector_u16_t)vec_mergeh (zero, D)));
+	temp = vec_sr (vec_add (temp, splat2), splat2);
+	tmp = vec_pack (temp, temp);
+
+	vec_st (tmp, 0, dest);
+	dest += stride;
+	tmp = vec_avg (vec_perm (ref0, ref1, permA),
+		       vec_perm (ref0, ref1, permB));
+    } while (--height);
+}
+#endif
+
+static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t perm, ref0, ref1, tmp, prev;
+
+    perm = vec_lvsl (0, ref);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    ref += stride;
+    prev = vec_ld (0, dest);
+    tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_st (tmp, 0, dest);
+	tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	prev = vec_ld (2*stride, dest);
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    prev = vec_ld (stride, dest);
+    vec_st (tmp, 0, dest);
+    tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref,
+				const int stride, int height)
+{
+    vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev;
+
+    tmp0 = vec_lvsl (0, ref);
+    tmp0 = vec_mergeh (tmp0, tmp0);
+    perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+    tmp1 = vec_lvsl (stride, ref);
+    tmp1 = vec_mergeh (tmp1, tmp1);
+    perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    ref += stride;
+    prev = vec_ld (0, dest);
+    tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    prev = vec_ld (stride, dest);
+    vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+    dest += stride;
+    tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
+    vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+}
+
+static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t permA, permB, ref0, ref1, tmp, prev;
+
+    permA = vec_lvsl (0, ref);
+    permB = vec_add (permA, vec_splat_u8 (1));
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    prev = vec_ld (0, dest);
+    ref += stride;
+    tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
+				  vec_perm (ref0, ref1, permB)));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_st (tmp, 0, dest);
+	tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
+				      vec_perm (ref0, ref1, permB)));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	prev = vec_ld (2*stride, dest);
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
+				      vec_perm (ref0, ref1, permB)));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    prev = vec_ld (stride, dest);
+    vec_st (tmp, 0, dest);
+    tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
+				  vec_perm (ref0, ref1, permB)));
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref,
+				const int stride, int height)
+{
+    vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
+    vector_u8_t prev;
+
+    ones = vec_splat_u8 (1);
+    tmp0 = vec_lvsl (0, ref);
+    tmp0 = vec_mergeh (tmp0, tmp0);
+    perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+    perm0B = vec_add (perm0A, ones);
+    tmp1 = vec_lvsl (stride, ref);
+    tmp1 = vec_mergeh (tmp1, tmp1);
+    perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+    perm1B = vec_add (perm1A, ones);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    prev = vec_ld (0, dest);
+    ref += stride;
+    tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
+				   vec_perm (ref0, ref1, perm0B)));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
+				       vec_perm (ref0, ref1, perm1B)));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
+				       vec_perm (ref0, ref1, perm0B)));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    prev = vec_ld (stride, dest);
+    vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+    dest += stride;
+    tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
+				   vec_perm (ref0, ref1, perm1B)));
+    vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+}
+
+static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev;
+
+    perm = vec_lvsl (0, ref);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    ref += stride;
+    tmp0 = vec_perm (ref0, ref1, perm);
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    ref += stride;
+    prev = vec_ld (0, dest);
+    tmp1 = vec_perm (ref0, ref1, perm);
+    tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_st (tmp, 0, dest);
+	tmp0 = vec_perm (ref0, ref1, perm);
+	tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (15, ref);
+	ref += stride;
+	prev = vec_ld (2*stride, dest);
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	tmp1 = vec_perm (ref0, ref1, perm);
+	tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (15, ref);
+    prev = vec_ld (stride, dest);
+    vec_st (tmp, 0, dest);
+    tmp0 = vec_perm (ref0, ref1, perm);
+    tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref,
+				const int stride, int height)
+{
+    vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev;
+
+    tmp0 = vec_lvsl (0, ref);
+    tmp0 = vec_mergeh (tmp0, tmp0);
+    perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+    tmp1 = vec_lvsl (stride, ref);
+    tmp1 = vec_mergeh (tmp1, tmp1);
+    perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    ref += stride;
+    tmp0 = vec_perm (ref0, ref1, perm0);
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    ref += stride;
+    prev = vec_ld (0, dest);
+    tmp1 = vec_perm (ref0, ref1, perm1);
+    tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp0 = vec_perm (ref0, ref1, perm0);
+	tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (7, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	tmp1 = vec_perm (ref0, ref1, perm1);
+	tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (7, ref);
+    prev = vec_ld (stride, dest);
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+    dest += stride;
+    tmp0 = vec_perm (ref0, ref1, perm0);
+    tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+}
+
+static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
+				  const int stride, int height)
+{
+    vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
+    vector_u8_t ones, prev;
+
+    ones = vec_splat_u8 (1);
+    permA = vec_lvsl (0, ref);
+    permB = vec_add (permA, ones);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    ref += stride;
+    A = vec_perm (ref0, ref1, permA);
+    B = vec_perm (ref0, ref1, permB);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    ref += stride;
+    prev = vec_ld (0, dest);
+    A = vec_perm (ref0, ref1, permA);
+    B = vec_perm (ref0, ref1, permB);
+    avg1 = vec_avg (A, B);
+    xor1 = vec_xor (A, B);
+    tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
+				  vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					   vec_xor (avg0, avg1))));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_st (tmp, 0, dest);
+	A = vec_perm (ref0, ref1, permA);
+	B = vec_perm (ref0, ref1, permB);
+	avg0 = vec_avg (A, B);
+	xor0 = vec_xor (A, B);
+	tmp = vec_avg (prev,
+		       vec_sub (vec_avg (avg0, avg1),
+				vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					 vec_xor (avg0, avg1))));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (16, ref);
+	ref += stride;
+	prev = vec_ld (2*stride, dest);
+	vec_st (tmp, stride, dest);
+	dest += 2*stride;
+	A = vec_perm (ref0, ref1, permA);
+	B = vec_perm (ref0, ref1, permB);
+	avg1 = vec_avg (A, B);
+	xor1 = vec_xor (A, B);
+	tmp = vec_avg (prev,
+		       vec_sub (vec_avg (avg0, avg1),
+				vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					 vec_xor (avg0, avg1))));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (16, ref);
+    prev = vec_ld (stride, dest);
+    vec_st (tmp, 0, dest);
+    A = vec_perm (ref0, ref1, permA);
+    B = vec_perm (ref0, ref1, permB);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+    tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
+				  vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					   vec_xor (avg0, avg1))));
+    vec_st (tmp, stride, dest);
+}
+
+static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
+				 const int stride, int height)
+{
+    vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
+    vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev;
+
+    ones = vec_splat_u8 (1);
+    perm0A = vec_lvsl (0, ref);
+    perm0A = vec_mergeh (perm0A, perm0A);
+    perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
+    perm0B = vec_add (perm0A, ones);
+    perm1A = vec_lvsl (stride, ref);
+    perm1A = vec_mergeh (perm1A, perm1A);
+    perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
+    perm1B = vec_add (perm1A, ones);
+
+    height = (height >> 1) - 1;
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    ref += stride;
+    A = vec_perm (ref0, ref1, perm0A);
+    B = vec_perm (ref0, ref1, perm0B);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    ref += stride;
+    prev = vec_ld (0, dest);
+    A = vec_perm (ref0, ref1, perm1A);
+    B = vec_perm (ref0, ref1, perm1B);
+    avg1 = vec_avg (A, B);
+    xor1 = vec_xor (A, B);
+    tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
+				  vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					   vec_xor (avg0, avg1))));
+
+    do {
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	A = vec_perm (ref0, ref1, perm0A);
+	B = vec_perm (ref0, ref1, perm0B);
+	avg0 = vec_avg (A, B);
+	xor0 = vec_xor (A, B);
+	tmp = vec_avg (prev,
+		       vec_sub (vec_avg (avg0, avg1),
+				vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					 vec_xor (avg0, avg1))));
+
+	ref0 = vec_ld (0, ref);
+	ref1 = vec_ld (8, ref);
+	ref += stride;
+	prev = vec_ld (stride, dest);
+	vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+	vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+	dest += stride;
+	A = vec_perm (ref0, ref1, perm1A);
+	B = vec_perm (ref0, ref1, perm1B);
+	avg1 = vec_avg (A, B);
+	xor1 = vec_xor (A, B);
+	tmp = vec_avg (prev,
+		       vec_sub (vec_avg (avg0, avg1),
+				vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					 vec_xor (avg0, avg1))));
+    } while (--height);
+
+    ref0 = vec_ld (0, ref);
+    ref1 = vec_ld (8, ref);
+    prev = vec_ld (stride, dest);
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+    dest += stride;
+    A = vec_perm (ref0, ref1, perm0A);
+    B = vec_perm (ref0, ref1, perm0B);
+    avg0 = vec_avg (A, B);
+    xor0 = vec_xor (A, B);
+    tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
+				  vec_and (vec_and (ones, vec_or (xor0, xor1)),
+					   vec_xor (avg0, avg1))));
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+}
+
+MPEG2_MC_EXTERN (altivec)
+
+#endif
diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mlib.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mlib.c
new file mode 100644
index 000000000..71c085029
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mlib.c
@@ -0,0 +1,190 @@
+/*
+ * motion_comp_mlib.c
+ * Copyright (C) 2000-2003 Håkan Hjort <d95hjort@dtek.chalmers.se>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef LIBMPEG2_MLIB
+
+#include <mlib_types.h>
+#include <mlib_status.h>
+#include <mlib_sys.h>
+#include <mlib_video.h>
+#include <inttypes.h>
+
+#include "../include/mpeg2.h"
+#include "mpeg2_internal.h"
+
+static void MC_put_o_16_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoCopyRef_U8_U8_16x16 (dest, (uint8_t *) ref, stride);
+    else
+	mlib_VideoCopyRef_U8_U8_16x8 (dest, (uint8_t *) ref, stride);
+}
+
+static void MC_put_x_16_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoInterpX_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpX_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_put_y_16_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoInterpY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_put_xy_16_mlib (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoInterpXY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpXY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_put_o_8_mlib (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoCopyRef_U8_U8_8x8 (dest, (uint8_t *) ref, stride);
+    else
+	mlib_VideoCopyRef_U8_U8_8x4 (dest, (uint8_t *) ref, stride);
+}
+
+static void MC_put_x_8_mlib (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoInterpX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_put_y_8_mlib (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoInterpY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_put_xy_8_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoInterpXY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpXY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_avg_o_16_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoCopyRefAve_U8_U8_16x16 (dest, (uint8_t *) ref, stride);
+    else
+	mlib_VideoCopyRefAve_U8_U8_16x8 (dest, (uint8_t *) ref, stride);
+}
+
+static void MC_avg_x_16_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoInterpAveX_U8_U8_16x16 (dest, (uint8_t *) ref,
+					  stride, stride);
+    else
+	mlib_VideoInterpAveX_U8_U8_16x8 (dest, (uint8_t *) ref,
+					 stride, stride);
+}
+
+static void MC_avg_y_16_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoInterpAveY_U8_U8_16x16 (dest, (uint8_t *) ref,
+					  stride, stride);
+    else
+	mlib_VideoInterpAveY_U8_U8_16x8 (dest, (uint8_t *) ref,
+					 stride, stride);
+}
+
+static void MC_avg_xy_16_mlib (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    if (height == 16)
+	mlib_VideoInterpAveXY_U8_U8_16x16 (dest, (uint8_t *) ref,
+					   stride, stride);
+    else
+	mlib_VideoInterpAveXY_U8_U8_16x8 (dest, (uint8_t *) ref,
+					  stride, stride);
+}
+
+static void MC_avg_o_8_mlib (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoCopyRefAve_U8_U8_8x8 (dest, (uint8_t *) ref, stride);
+    else
+	mlib_VideoCopyRefAve_U8_U8_8x4 (dest, (uint8_t *) ref, stride);
+}
+
+static void MC_avg_x_8_mlib (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoInterpAveX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpAveX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_avg_y_8_mlib (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoInterpAveY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride);
+    else
+	mlib_VideoInterpAveY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride);
+}
+
+static void MC_avg_xy_8_mlib (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    if (height == 8)
+	mlib_VideoInterpAveXY_U8_U8_8x8 (dest, (uint8_t *) ref,
+					 stride, stride);
+    else
+	mlib_VideoInterpAveXY_U8_U8_8x4 (dest, (uint8_t *) ref,
+					 stride, stride);
+}
+
+MPEG2_MC_EXTERN (mlib)
+
+#endif
diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mmx.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mmx.c
new file mode 100644
index 000000000..8694bdfea
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mmx.c
@@ -0,0 +1,1005 @@
+/*
+ * motion_comp_mmx.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_X86
+
+#include <inttypes.h>
+
+#include "../include/mpeg2.h"
+#include "../include/attributes.h"
+#include "mpeg2_internal.h"
+#include "../include/mmx.h"
+
+#define CPU_MMXEXT 0
+#define CPU_3DNOW 1
+
+
+/* MMX code - needs a rewrite */
+
+/*
+ * Motion Compensation frequently needs to average values using the
+ * formula (x+y+1)>>1. Both MMXEXT and 3Dnow include one instruction
+ * to compute this, but it's been left out of classic MMX.
+ *
+ * We need to be careful of overflows when doing this computation.
+ * Rather than unpacking data to 16-bits, which reduces parallelism,
+ * we use the following formulas:
+ *
+ * (x+y)>>1 == (x&y)+((x^y)>>1)
+ * (x+y+1)>>1 == (x|y)-((x^y)>>1)
+ */
+
+/* some rounding constants */
+static mmx_t mask1 = {0xfefefefefefefefeLL};
+static mmx_t round4 = {0x0002000200020002LL};
+
+/*
+ * This code should probably be compiled with loop unrolling
+ * (ie, -funroll-loops in gcc)becuase some of the loops
+ * use a small static number of iterations. This was written
+ * with the assumption the compiler knows best about when
+ * unrolling will help
+ */
+
+static inline void mmx_zero_reg ()
+{
+    /* load 0 into mm0 */
+    pxor_r2r (mm0, mm0);
+}
+
+static inline void mmx_average_2_U8 (uint8_t * dest, const uint8_t * src1,
+				     const uint8_t * src2)
+{
+    /* *dest = (*src1 + *src2 + 1)/ 2; */
+
+    movq_m2r (*src1, mm1);	/* load 8 src1 bytes */
+    movq_r2r (mm1, mm2);	/* copy 8 src1 bytes */
+
+    movq_m2r (*src2, mm3);	/* load 8 src2 bytes */
+    movq_r2r (mm3, mm4);	/* copy 8 src2 bytes */
+
+    pxor_r2r (mm1, mm3);	/* xor src1 and src2 */
+    pand_m2r (mask1, mm3);	/* mask lower bits */
+    psrlq_i2r (1, mm3);		/* /2 */
+    por_r2r (mm2, mm4);		/* or src1 and src2 */
+    psubb_r2r (mm3, mm4);	/* subtract subresults */
+    movq_r2m (mm4, *dest);	/* store result in dest */
+}
+
+static inline void mmx_interp_average_2_U8 (uint8_t * dest,
+					    const uint8_t * src1,
+					    const uint8_t * src2)
+{
+    /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */
+
+    movq_m2r (*dest, mm1);	/* load 8 dest bytes */
+    movq_r2r (mm1, mm2);	/* copy 8 dest bytes */
+
+    movq_m2r (*src1, mm3);	/* load 8 src1 bytes */
+    movq_r2r (mm3, mm4);	/* copy 8 src1 bytes */
+
+    movq_m2r (*src2, mm5);	/* load 8 src2 bytes */
+    movq_r2r (mm5, mm6);	/* copy 8 src2 bytes */
+
+    pxor_r2r (mm3, mm5);	/* xor src1 and src2 */
+    pand_m2r (mask1, mm5);	/* mask lower bits */
+    psrlq_i2r (1, mm5);		/* /2 */
+    por_r2r (mm4, mm6);		/* or src1 and src2 */
+    psubb_r2r (mm5, mm6);	/* subtract subresults */
+    movq_r2r (mm6, mm5);	/* copy subresult */
+
+    pxor_r2r (mm1, mm5);	/* xor srcavg and dest */
+    pand_m2r (mask1, mm5);	/* mask lower bits */
+    psrlq_i2r (1, mm5);		/* /2 */
+    por_r2r (mm2, mm6);		/* or srcavg and dest */
+    psubb_r2r (mm5, mm6);	/* subtract subresults */
+    movq_r2m (mm6, *dest);	/* store result in dest */
+}
+
+static inline void mmx_average_4_U8 (uint8_t * dest, const uint8_t * src1,
+				     const uint8_t * src2,
+				     const uint8_t * src3,
+				     const uint8_t * src4)
+{
+    /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */
+
+    movq_m2r (*src1, mm1);	/* load 8 src1 bytes */
+    movq_r2r (mm1, mm2);	/* copy 8 src1 bytes */
+
+    punpcklbw_r2r (mm0, mm1);	/* unpack low src1 bytes */
+    punpckhbw_r2r (mm0, mm2);	/* unpack high src1 bytes */
+
+    movq_m2r (*src2, mm3);	/* load 8 src2 bytes */
+    movq_r2r (mm3, mm4);	/* copy 8 src2 bytes */
+
+    punpcklbw_r2r (mm0, mm3);	/* unpack low src2 bytes */
+    punpckhbw_r2r (mm0, mm4);	/* unpack high src2 bytes */
+
+    paddw_r2r (mm3, mm1);	/* add lows */
+    paddw_r2r (mm4, mm2);	/* add highs */
+
+    /* now have partials in mm1 and mm2 */
+
+    movq_m2r (*src3, mm3);	/* load 8 src3 bytes */
+    movq_r2r (mm3, mm4);	/* copy 8 src3 bytes */
+
+    punpcklbw_r2r (mm0, mm3);	/* unpack low src3 bytes */
+    punpckhbw_r2r (mm0, mm4);	/* unpack high src3 bytes */
+
+    paddw_r2r (mm3, mm1);	/* add lows */
+    paddw_r2r (mm4, mm2);	/* add highs */
+
+    movq_m2r (*src4, mm5);	/* load 8 src4 bytes */
+    movq_r2r (mm5, mm6);	/* copy 8 src4 bytes */
+
+    punpcklbw_r2r (mm0, mm5);	/* unpack low src4 bytes */
+    punpckhbw_r2r (mm0, mm6);	/* unpack high src4 bytes */
+
+    paddw_r2r (mm5, mm1);	/* add lows */
+    paddw_r2r (mm6, mm2);	/* add highs */
+
+    /* now have subtotal in mm1 and mm2 */
+
+    paddw_m2r (round4, mm1);
+    psraw_i2r (2, mm1);		/* /4 */
+    paddw_m2r (round4, mm2);
+    psraw_i2r (2, mm2);		/* /4 */
+
+    packuswb_r2r (mm2, mm1);	/* pack (w/ saturation) */
+    movq_r2m (mm1, *dest);	/* store result in dest */
+}
+
+static inline void mmx_interp_average_4_U8 (uint8_t * dest,
+					    const uint8_t * src1,
+					    const uint8_t * src2,
+					    const uint8_t * src3,
+					    const uint8_t * src4)
+{
+    /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */
+
+    movq_m2r (*src1, mm1);	/* load 8 src1 bytes */
+    movq_r2r (mm1, mm2);	/* copy 8 src1 bytes */
+
+    punpcklbw_r2r (mm0, mm1);	/* unpack low src1 bytes */
+    punpckhbw_r2r (mm0, mm2);	/* unpack high src1 bytes */
+
+    movq_m2r (*src2, mm3);	/* load 8 src2 bytes */
+    movq_r2r (mm3, mm4);	/* copy 8 src2 bytes */
+
+    punpcklbw_r2r (mm0, mm3);	/* unpack low src2 bytes */
+    punpckhbw_r2r (mm0, mm4);	/* unpack high src2 bytes */
+
+    paddw_r2r (mm3, mm1);	/* add lows */
+    paddw_r2r (mm4, mm2);	/* add highs */
+
+    /* now have partials in mm1 and mm2 */
+
+    movq_m2r (*src3, mm3);	/* load 8 src3 bytes */
+    movq_r2r (mm3, mm4);	/* copy 8 src3 bytes */
+
+    punpcklbw_r2r (mm0, mm3);	/* unpack low src3 bytes */
+    punpckhbw_r2r (mm0, mm4);	/* unpack high src3 bytes */
+
+    paddw_r2r (mm3, mm1);	/* add lows */
+    paddw_r2r (mm4, mm2);	/* add highs */
+
+    movq_m2r (*src4, mm5);	/* load 8 src4 bytes */
+    movq_r2r (mm5, mm6);	/* copy 8 src4 bytes */
+
+    punpcklbw_r2r (mm0, mm5);	/* unpack low src4 bytes */
+    punpckhbw_r2r (mm0, mm6);	/* unpack high src4 bytes */
+
+    paddw_r2r (mm5, mm1);	/* add lows */
+    paddw_r2r (mm6, mm2);	/* add highs */
+
+    paddw_m2r (round4, mm1);
+    psraw_i2r (2, mm1);		/* /4 */
+    paddw_m2r (round4, mm2);
+    psraw_i2r (2, mm2);		/* /4 */
+
+    /* now have subtotal/4 in mm1 and mm2 */
+
+    movq_m2r (*dest, mm3);	/* load 8 dest bytes */
+    movq_r2r (mm3, mm4);	/* copy 8 dest bytes */
+
+    packuswb_r2r (mm2, mm1);	/* pack (w/ saturation) */
+    movq_r2r (mm1,mm2);		/* copy subresult */
+
+    pxor_r2r (mm1, mm3);	/* xor srcavg and dest */
+    pand_m2r (mask1, mm3);	/* mask lower bits */
+    psrlq_i2r (1, mm3);		/* /2 */
+    por_r2r (mm2, mm4);		/* or srcavg and dest */
+    psubb_r2r (mm3, mm4);	/* subtract subresults */
+    movq_r2m (mm4, *dest);	/* store result in dest */
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_avg_mmx (const int width, int height, uint8_t * dest,
+			       const uint8_t * ref, const int stride)
+{
+    mmx_zero_reg ();
+
+    do {
+	mmx_average_2_U8 (dest, dest, ref);
+
+	if (width == 16)
+	    mmx_average_2_U8 (dest+8, dest+8, ref+8);
+
+	dest += stride;
+	ref += stride;
+    } while (--height);
+}
+
+static void MC_avg_o_16_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_avg_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_avg_o_8_mmx (uint8_t * dest, const uint8_t * ref,
+			    int stride, int height)
+{
+    MC_avg_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_put_mmx (const int width, int height, uint8_t * dest,
+			       const uint8_t * ref, const int stride)
+{
+    mmx_zero_reg ();
+
+    do {
+	movq_m2r (* ref, mm1);	/* load 8 ref bytes */
+	movq_r2m (mm1,* dest);	/* store 8 bytes at curr */
+
+	if (width == 16)
+	    {
+		movq_m2r (* (ref+8), mm1);	/* load 8 ref bytes */
+		movq_r2m (mm1,* (dest+8));	/* store 8 bytes at curr */
+	    }
+
+	dest += stride;
+	ref += stride;
+    } while (--height);
+}
+
+static void MC_put_o_16_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_put_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_put_o_8_mmx (uint8_t * dest, const uint8_t * ref,
+			    int stride, int height)
+{
+    MC_put_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+/* Half pixel interpolation in the x direction */
+static inline void MC_avg_x_mmx (const int width, int height, uint8_t * dest,
+				 const uint8_t * ref, const int stride)
+{
+    mmx_zero_reg ();
+
+    do {
+	mmx_interp_average_2_U8 (dest, ref, ref+1);
+
+	if (width == 16)
+	    mmx_interp_average_2_U8 (dest+8, ref+8, ref+9);
+
+	dest += stride;
+	ref += stride;
+    } while (--height);
+}
+
+static void MC_avg_x_16_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_avg_x_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_avg_x_8_mmx (uint8_t * dest, const uint8_t * ref,
+			    int stride, int height)
+{
+    MC_avg_x_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_put_x_mmx (const int width, int height, uint8_t * dest,
+				 const uint8_t * ref, const int stride)
+{
+    mmx_zero_reg ();
+
+    do {
+	mmx_average_2_U8 (dest, ref, ref+1);
+
+	if (width == 16)
+	    mmx_average_2_U8 (dest+8, ref+8, ref+9);
+
+	dest += stride;
+	ref += stride;
+    } while (--height);
+}
+
+static void MC_put_x_16_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_put_x_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_put_x_8_mmx (uint8_t * dest, const uint8_t * ref,
+			    int stride, int height)
+{
+    MC_put_x_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_avg_xy_mmx (const int width, int height, uint8_t * dest,
+				  const uint8_t * ref, const int stride)
+{
+    const uint8_t * ref_next = ref + stride;
+
+    mmx_zero_reg ();
+
+    do {
+	mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1);
+
+	if (width == 16)
+	    mmx_interp_average_4_U8 (dest+8, ref+8, ref+9,
+				     ref_next+8, ref_next+9);
+
+	dest += stride;
+	ref += stride;
+	ref_next += stride;
+    } while (--height);
+}
+
+static void MC_avg_xy_16_mmx (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_avg_xy_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_avg_xy_8_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_avg_xy_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_put_xy_mmx (const int width, int height, uint8_t * dest,
+				  const uint8_t * ref, const int stride)
+{
+    const uint8_t * ref_next = ref + stride;
+
+    mmx_zero_reg ();
+
+    do {
+	mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1);
+
+	if (width == 16)
+	    mmx_average_4_U8 (dest+8, ref+8, ref+9, ref_next+8, ref_next+9);
+
+	dest += stride;
+	ref += stride;
+	ref_next += stride;
+    } while (--height);
+}
+
+static void MC_put_xy_16_mmx (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_put_xy_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_put_xy_8_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_put_xy_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_avg_y_mmx (const int width, int height, uint8_t * dest,
+				 const uint8_t * ref, const int stride)
+{
+    const uint8_t * ref_next = ref + stride;
+
+    mmx_zero_reg ();
+
+    do {
+	mmx_interp_average_2_U8 (dest, ref, ref_next);
+
+	if (width == 16)
+	    mmx_interp_average_2_U8 (dest+8, ref+8, ref_next+8);
+
+	dest += stride;
+	ref += stride;
+	ref_next += stride;
+    } while (--height);
+}
+
+static void MC_avg_y_16_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_avg_y_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_avg_y_8_mmx (uint8_t * dest, const uint8_t * ref,
+			    int stride, int height)
+{
+    MC_avg_y_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_put_y_mmx (const int width, int height, uint8_t * dest,
+				 const uint8_t * ref, const int stride)
+{
+    const uint8_t * ref_next = ref + stride;
+
+    mmx_zero_reg ();
+
+    do {
+	mmx_average_2_U8 (dest, ref, ref_next);
+
+	if (width == 16)
+	    mmx_average_2_U8 (dest+8, ref+8, ref_next+8);
+
+	dest += stride;
+	ref += stride;
+	ref_next += stride;
+    } while (--height);
+}
+
+static void MC_put_y_16_mmx (uint8_t * dest, const uint8_t * ref,
+			     int stride, int height)
+{
+    MC_put_y_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_put_y_8_mmx (uint8_t * dest, const uint8_t * ref,
+			    int stride, int height)
+{
+    MC_put_y_mmx (8, height, dest, ref, stride);
+}
+
+
+MPEG2_MC_EXTERN (mmx)
+
+
+
+
+
+
+
+/* CPU_MMXEXT/CPU_3DNOW adaptation layer */
+
+#define pavg_r2r(src,dest)		\
+do {					\
+    if (cpu == CPU_MMXEXT)		\
+	pavgb_r2r (src, dest);		\
+    else				\
+	pavgusb_r2r (src, dest);	\
+} while (0)
+
+#define pavg_m2r(src,dest)		\
+do {					\
+    if (cpu == CPU_MMXEXT)		\
+	pavgb_m2r (src, dest);		\
+    else				\
+	pavgusb_m2r (src, dest);	\
+} while (0)
+
+
+/* CPU_MMXEXT code */
+
+
+static inline void MC_put1_8 (int height, uint8_t * dest, const uint8_t * ref,
+			      const int stride)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_r2m (mm0, *dest);
+	ref += stride;
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_put1_16 (int height, uint8_t * dest, const uint8_t * ref,
+			       const int stride)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_m2r (*(ref+8), mm1);
+	ref += stride;
+	movq_r2m (mm0, *dest);
+	movq_r2m (mm1, *(dest+8));
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_avg1_8 (int height, uint8_t * dest, const uint8_t * ref,
+			      const int stride, const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	pavg_m2r (*dest, mm0);
+	ref += stride;
+	movq_r2m (mm0, *dest);
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_avg1_16 (int height, uint8_t * dest, const uint8_t * ref,
+			       const int stride, const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_m2r (*(ref+8), mm1);
+	pavg_m2r (*dest, mm0);
+	pavg_m2r (*(dest+8), mm1);
+	movq_r2m (mm0, *dest);
+	ref += stride;
+	movq_r2m (mm1, *(dest+8));
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_put2_8 (int height, uint8_t * dest, const uint8_t * ref,
+			      const int stride, const int offset,
+			      const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	pavg_m2r (*(ref+offset), mm0);
+	ref += stride;
+	movq_r2m (mm0, *dest);
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_put2_16 (int height, uint8_t * dest, const uint8_t * ref,
+			       const int stride, const int offset,
+			       const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_m2r (*(ref+8), mm1);
+	pavg_m2r (*(ref+offset), mm0);
+	pavg_m2r (*(ref+offset+8), mm1);
+	movq_r2m (mm0, *dest);
+	ref += stride;
+	movq_r2m (mm1, *(dest+8));
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_avg2_8 (int height, uint8_t * dest, const uint8_t * ref,
+			      const int stride, const int offset,
+			      const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	pavg_m2r (*(ref+offset), mm0);
+	pavg_m2r (*dest, mm0);
+	ref += stride;
+	movq_r2m (mm0, *dest);
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_avg2_16 (int height, uint8_t * dest, const uint8_t * ref,
+			       const int stride, const int offset,
+			       const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_m2r (*(ref+8), mm1);
+	pavg_m2r (*(ref+offset), mm0);
+	pavg_m2r (*(ref+offset+8), mm1);
+	pavg_m2r (*dest, mm0);
+	pavg_m2r (*(dest+8), mm1);
+	ref += stride;
+	movq_r2m (mm0, *dest);
+	movq_r2m (mm1, *(dest+8));
+	dest += stride;
+    } while (--height);
+}
+
+static mmx_t mask_one = {0x0101010101010101LL};
+
+static inline void MC_put4_8 (int height, uint8_t * dest, const uint8_t * ref,
+			      const int stride, const int cpu)
+{
+    movq_m2r (*ref, mm0);
+    movq_m2r (*(ref+1), mm1);
+    movq_r2r (mm0, mm7);
+    pxor_r2r (mm1, mm7);
+    pavg_r2r (mm1, mm0);
+    ref += stride;
+
+    do {
+	movq_m2r (*ref, mm2);
+	movq_r2r (mm0, mm5);
+
+	movq_m2r (*(ref+1), mm3);
+	movq_r2r (mm2, mm6);
+
+	pxor_r2r (mm3, mm6);
+	pavg_r2r (mm3, mm2);
+
+	por_r2r (mm6, mm7);
+	pxor_r2r (mm2, mm5);
+
+	pand_r2r (mm5, mm7);
+	pavg_r2r (mm2, mm0);
+
+	pand_m2r (mask_one, mm7);
+
+	psubusb_r2r (mm7, mm0);
+
+	ref += stride;
+	movq_r2m (mm0, *dest);
+	dest += stride;
+
+	movq_r2r (mm6, mm7);	/* unroll ! */
+	movq_r2r (mm2, mm0);	/* unroll ! */
+    } while (--height);
+}
+
+static inline void MC_put4_16 (int height, uint8_t * dest, const uint8_t * ref,
+			       const int stride, const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_m2r (*(ref+stride+1), mm1);
+	movq_r2r (mm0, mm7);
+	movq_m2r (*(ref+1), mm2);
+	pxor_r2r (mm1, mm7);
+	movq_m2r (*(ref+stride), mm3);
+	movq_r2r (mm2, mm6);
+	pxor_r2r (mm3, mm6);
+	pavg_r2r (mm1, mm0);
+	pavg_r2r (mm3, mm2);
+	por_r2r (mm6, mm7);
+	movq_r2r (mm0, mm6);
+	pxor_r2r (mm2, mm6);
+	pand_r2r (mm6, mm7);
+	pand_m2r (mask_one, mm7);
+	pavg_r2r (mm2, mm0);
+	psubusb_r2r (mm7, mm0);
+	movq_r2m (mm0, *dest);
+
+	movq_m2r (*(ref+8), mm0);
+	movq_m2r (*(ref+stride+9), mm1);
+	movq_r2r (mm0, mm7);
+	movq_m2r (*(ref+9), mm2);
+	pxor_r2r (mm1, mm7);
+	movq_m2r (*(ref+stride+8), mm3);
+	movq_r2r (mm2, mm6);
+	pxor_r2r (mm3, mm6);
+	pavg_r2r (mm1, mm0);
+	pavg_r2r (mm3, mm2);
+	por_r2r (mm6, mm7);
+	movq_r2r (mm0, mm6);
+	pxor_r2r (mm2, mm6);
+	pand_r2r (mm6, mm7);
+	pand_m2r (mask_one, mm7);
+	pavg_r2r (mm2, mm0);
+	psubusb_r2r (mm7, mm0);
+	ref += stride;
+	movq_r2m (mm0, *(dest+8));
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_avg4_8 (int height, uint8_t * dest, const uint8_t * ref,
+			      const int stride, const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_m2r (*(ref+stride+1), mm1);
+	movq_r2r (mm0, mm7);
+	movq_m2r (*(ref+1), mm2);
+	pxor_r2r (mm1, mm7);
+	movq_m2r (*(ref+stride), mm3);
+	movq_r2r (mm2, mm6);
+	pxor_r2r (mm3, mm6);
+	pavg_r2r (mm1, mm0);
+	pavg_r2r (mm3, mm2);
+	por_r2r (mm6, mm7);
+	movq_r2r (mm0, mm6);
+	pxor_r2r (mm2, mm6);
+	pand_r2r (mm6, mm7);
+	pand_m2r (mask_one, mm7);
+	pavg_r2r (mm2, mm0);
+	psubusb_r2r (mm7, mm0);
+	movq_m2r (*dest, mm1);
+	pavg_r2r (mm1, mm0);
+	ref += stride;
+	movq_r2m (mm0, *dest);
+	dest += stride;
+    } while (--height);
+}
+
+static inline void MC_avg4_16 (int height, uint8_t * dest, const uint8_t * ref,
+			       const int stride, const int cpu)
+{
+    do {
+	movq_m2r (*ref, mm0);
+	movq_m2r (*(ref+stride+1), mm1);
+	movq_r2r (mm0, mm7);
+	movq_m2r (*(ref+1), mm2);
+	pxor_r2r (mm1, mm7);
+	movq_m2r (*(ref+stride), mm3);
+	movq_r2r (mm2, mm6);
+	pxor_r2r (mm3, mm6);
+	pavg_r2r (mm1, mm0);
+	pavg_r2r (mm3, mm2);
+	por_r2r (mm6, mm7);
+	movq_r2r (mm0, mm6);
+	pxor_r2r (mm2, mm6);
+	pand_r2r (mm6, mm7);
+	pand_m2r (mask_one, mm7);
+	pavg_r2r (mm2, mm0);
+	psubusb_r2r (mm7, mm0);
+	movq_m2r (*dest, mm1);
+	pavg_r2r (mm1, mm0);
+	movq_r2m (mm0, *dest);
+
+	movq_m2r (*(ref+8), mm0);
+	movq_m2r (*(ref+stride+9), mm1);
+	movq_r2r (mm0, mm7);
+	movq_m2r (*(ref+9), mm2);
+	pxor_r2r (mm1, mm7);
+	movq_m2r (*(ref+stride+8), mm3);
+	movq_r2r (mm2, mm6);
+	pxor_r2r (mm3, mm6);
+	pavg_r2r (mm1, mm0);
+	pavg_r2r (mm3, mm2);
+	por_r2r (mm6, mm7);
+	movq_r2r (mm0, mm6);
+	pxor_r2r (mm2, mm6);
+	pand_r2r (mm6, mm7);
+	pand_m2r (mask_one, mm7);
+	pavg_r2r (mm2, mm0);
+	psubusb_r2r (mm7, mm0);
+	movq_m2r (*(dest+8), mm1);
+	pavg_r2r (mm1, mm0);
+	ref += stride;
+	movq_r2m (mm0, *(dest+8));
+	dest += stride;
+    } while (--height);
+}
+
+static void MC_avg_o_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+static void MC_avg_o_8_mmxext (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+static void MC_put_o_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_put1_16 (height, dest, ref, stride);
+}
+
+static void MC_put_o_8_mmxext (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_put1_8 (height, dest, ref, stride);
+}
+
+static void MC_avg_x_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
+}
+
+static void MC_avg_x_8_mmxext (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
+}
+
+static void MC_put_x_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
+}
+
+static void MC_put_x_8_mmxext (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
+}
+
+static void MC_avg_y_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
+}
+
+static void MC_avg_y_8_mmxext (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
+}
+
+static void MC_put_y_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
+}
+
+static void MC_put_y_8_mmxext (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
+}
+
+static void MC_avg_xy_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				 int stride, int height)
+{
+    MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+static void MC_avg_xy_8_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+static void MC_put_xy_16_mmxext (uint8_t * dest, const uint8_t * ref,
+				 int stride, int height)
+{
+    MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+static void MC_put_xy_8_mmxext (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+
+MPEG2_MC_EXTERN (mmxext)
+
+
+
+static void MC_avg_o_16_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+static void MC_avg_o_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+static void MC_put_o_16_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_put1_16 (height, dest, ref, stride);
+}
+
+static void MC_put_o_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_put1_8 (height, dest, ref, stride);
+}
+
+static void MC_avg_x_16_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
+}
+
+static void MC_avg_x_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
+}
+
+static void MC_put_x_16_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
+}
+
+static void MC_put_x_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
+}
+
+static void MC_avg_y_16_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
+}
+
+static void MC_avg_y_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
+}
+
+static void MC_put_y_16_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
+}
+
+static void MC_put_y_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			      int stride, int height)
+{
+    MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
+}
+
+static void MC_avg_xy_16_3dnow (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+static void MC_avg_xy_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+static void MC_put_xy_16_3dnow (uint8_t * dest, const uint8_t * ref,
+				int stride, int height)
+{
+    MC_put4_16 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+static void MC_put_xy_8_3dnow (uint8_t * dest, const uint8_t * ref,
+			       int stride, int height)
+{
+    MC_put4_8 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+
+MPEG2_MC_EXTERN (3dnow)
+
+#endif
diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp_vis.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_vis.c
new file mode 100644
index 000000000..e724d28a2
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_vis.c
@@ -0,0 +1,2061 @@
+/*
+ * motion_comp_vis.c
+ * Copyright (C) 2003 David S. Miller <davem@redhat.com>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_SPARC
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include <xine/attributes.h>
+#include "mpeg2_internal.h"
+#include "vis.h"
+
+/* The trick used in some of this file is the formula from the MMX
+ * motion comp code, which is:
+ *
+ * (x+y+1)>>1 == (x|y)-((x^y)>>1)
+ *
+ * This allows us to average 8 bytes at a time in a 64-bit FPU reg.
+ * We avoid overflows by masking before we do the shift, and we
+ * implement the shift by multiplying by 1/2 using mul8x16.  So in
+ * VIS this is (assume 'x' is in f0, 'y' is in f2, a repeating mask
+ * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and
+ * the value 0x80808080 is in f8):
+ *
+ *	fxor		f0, f2, f10
+ *	fand		f10, f4, f10
+ *	fmul8x16	f8, f10, f10
+ *	fand		f10, f6, f10
+ *	for		f0, f2, f12
+ *	fpsub16		f12, f10, f10
+ */
+
+#define DUP4(x) {x, x, x, x}
+#define DUP8(x) {x, x, x, x, x, x, x, x}
+static const int16_t constants1[] ATTR_ALIGN(8) = DUP4 (1);
+static const int16_t constants2[] ATTR_ALIGN(8) = DUP4 (2);
+static const int16_t constants3[] ATTR_ALIGN(8) = DUP4 (3);
+static const int16_t constants6[] ATTR_ALIGN(8) = DUP4 (6);
+static const int8_t constants_fe[] ATTR_ALIGN(8) = DUP8 (0xfe);
+static const int8_t constants_7f[] ATTR_ALIGN(8) = DUP8 (0x7f);
+static const int8_t constants128[] ATTR_ALIGN(8) = DUP8 (128);
+static const int16_t constants256_512[] ATTR_ALIGN(8) =
+	{256, 512, 256, 512};
+static const int16_t constants256_1024[] ATTR_ALIGN(8) =
+	{256, 1024, 256, 1024};
+
+#define REF_0		0
+#define REF_0_1		1
+#define REF_2		2
+#define REF_2_1		3
+#define REF_4		4
+#define REF_4_1		5
+#define REF_6		6
+#define REF_6_1		7
+#define REF_S0		8
+#define REF_S0_1	9
+#define REF_S2		10
+#define REF_S2_1	11
+#define REF_S4		12
+#define REF_S4_1	13
+#define REF_S6		14
+#define REF_S6_1	15
+#define DST_0		16
+#define DST_1		17
+#define DST_2		18
+#define DST_3		19
+#define CONST_1		20
+#define CONST_2		20
+#define CONST_3		20
+#define CONST_6		20
+#define MASK_fe		20
+#define CONST_128	22
+#define CONST_256	22
+#define CONST_512	22
+#define CONST_1024	22
+#define TMP0		24
+#define TMP1		25
+#define TMP2		26
+#define TMP3		27
+#define TMP4		28
+#define TMP5		29
+#define ZERO		30
+#define MASK_7f		30
+
+#define TMP6		32
+#define TMP8		34
+#define TMP10		36
+#define TMP12		38
+#define TMP14		40
+#define TMP16		42
+#define TMP18		44
+#define TMP20		46
+#define TMP22		48
+#define TMP24		50
+#define TMP26		52
+#define TMP28		54
+#define TMP30		56
+#define TMP32		58
+
+static void MC_put_o_16_vis (uint8_t * dest, const uint8_t * _ref,
+			     const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	int offset;
+
+	ref = vis_alignaddr(ref);
+	offset = (ref != _ref) ? 16 : 0;
+	do {	/* 5 cycles */
+		vis_ld64(ref[0], TMP0);
+
+		vis_ld64_2(ref, 8, TMP2);
+
+		vis_ld64_2(ref, offset, TMP4);
+		ref += stride;
+
+		vis_faligndata(TMP0, TMP2, REF_0);
+		vis_st64(REF_0, dest[0]);
+
+		vis_faligndata(TMP2, TMP4, REF_2);
+		vis_st64_2(REF_2, dest, 8);
+		dest += stride;
+	} while (--height);
+}
+
+static void MC_put_o_8_vis (uint8_t * dest, const uint8_t * _ref,
+			    const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	int offset;
+
+	ref = vis_alignaddr(ref);
+	offset = (ref != _ref) ? 8 : 0;
+	do {	/* 4 cycles */
+		vis_ld64(ref[0], TMP0);
+
+		vis_ld64_2(ref, offset, TMP2);
+		ref += stride;
+
+		/* stall */
+
+		vis_faligndata(TMP0, TMP2, REF_0);
+		vis_st64(REF_0, dest[0]);
+		dest += stride;
+	} while (--height);
+}
+
+
+static void MC_avg_o_16_vis (uint8_t * dest, const uint8_t * _ref,
+			     const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	int stride_8 = stride + 8;
+	int offset;
+
+	ref = vis_alignaddr(ref);
+	offset = (ref != _ref) ? 16 : 0;
+
+	vis_ld64(ref[0], TMP0);
+
+	vis_ld64(ref[8], TMP2);
+
+	vis_ld64_2(ref, offset, TMP4);
+
+	vis_ld64(dest[0], DST_0);
+
+	vis_ld64(dest[8], DST_2);
+
+	vis_ld64(constants_fe[0], MASK_fe);
+	vis_faligndata(TMP0, TMP2, REF_0);
+
+	vis_ld64(constants_7f[0], MASK_7f);
+	vis_faligndata(TMP2, TMP4, REF_2);
+
+	vis_ld64(constants128[0], CONST_128);
+
+	ref += stride;
+	height = (height >> 1) - 1;
+
+	do {	/* 24 cycles */
+		vis_ld64(ref[0], TMP0);
+		vis_xor(DST_0, REF_0, TMP6);
+
+		vis_ld64_2(ref, 8, TMP2);
+		vis_and(TMP6, MASK_fe, TMP6);
+
+		vis_ld64_2(ref, offset, TMP4);
+		ref += stride;
+		vis_mul8x16(CONST_128, TMP6, TMP6);
+		vis_xor(DST_2, REF_2, TMP8);
+
+		vis_and(TMP8, MASK_fe, TMP8);
+
+		vis_or(DST_0, REF_0, TMP10);
+		vis_ld64_2(dest, stride, DST_0);
+		vis_mul8x16(CONST_128, TMP8, TMP8);
+
+		vis_or(DST_2, REF_2, TMP12);
+		vis_ld64_2(dest, stride_8, DST_2);
+
+		vis_ld64(ref[0], TMP14);
+		vis_and(TMP6, MASK_7f, TMP6);
+
+		vis_and(TMP8, MASK_7f, TMP8);
+
+		vis_psub16(TMP10, TMP6, TMP6);
+		vis_st64(TMP6, dest[0]);
+
+		vis_psub16(TMP12, TMP8, TMP8);
+		vis_st64_2(TMP8, dest, 8);
+
+		dest += stride;
+		vis_ld64_2(ref, 8, TMP16);
+		vis_faligndata(TMP0, TMP2, REF_0);
+
+		vis_ld64_2(ref, offset, TMP18);
+		vis_faligndata(TMP2, TMP4, REF_2);
+		ref += stride;
+
+		vis_xor(DST_0, REF_0, TMP20);
+
+		vis_and(TMP20, MASK_fe, TMP20);
+
+		vis_xor(DST_2, REF_2, TMP22);
+		vis_mul8x16(CONST_128, TMP20, TMP20);
+
+		vis_and(TMP22, MASK_fe, TMP22);
+
+		vis_or(DST_0, REF_0, TMP24);
+		vis_mul8x16(CONST_128, TMP22, TMP22);
+
+		vis_or(DST_2, REF_2, TMP26);
+
+		vis_ld64_2(dest, stride, DST_0);
+		vis_faligndata(TMP14, TMP16, REF_0);
+
+		vis_ld64_2(dest, stride_8, DST_2);
+		vis_faligndata(TMP16, TMP18, REF_2);
+
+		vis_and(TMP20, MASK_7f, TMP20);
+
+		vis_and(TMP22, MASK_7f, TMP22);
+
+		vis_psub16(TMP24, TMP20, TMP20);
+		vis_st64(TMP20, dest[0]);
+
+		vis_psub16(TMP26, TMP22, TMP22);
+		vis_st64_2(TMP22, dest, 8);
+		dest += stride;
+	} while (--height);
+
+	vis_ld64(ref[0], TMP0);
+	vis_xor(DST_0, REF_0, TMP6);
+
+	vis_ld64_2(ref, 8, TMP2);
+	vis_and(TMP6, MASK_fe, TMP6);
+
+	vis_ld64_2(ref, offset, TMP4);
+	vis_mul8x16(CONST_128, TMP6, TMP6);
+	vis_xor(DST_2, REF_2, TMP8);
+
+	vis_and(TMP8, MASK_fe, TMP8);
+
+	vis_or(DST_0, REF_0, TMP10);
+	vis_ld64_2(dest, stride, DST_0);
+	vis_mul8x16(CONST_128, TMP8, TMP8);
+
+	vis_or(DST_2, REF_2, TMP12);
+	vis_ld64_2(dest, stride_8, DST_2);
+
+	vis_ld64(ref[0], TMP14);
+	vis_and(TMP6, MASK_7f, TMP6);
+
+	vis_and(TMP8, MASK_7f, TMP8);
+
+	vis_psub16(TMP10, TMP6, TMP6);
+	vis_st64(TMP6, dest[0]);
+
+	vis_psub16(TMP12, TMP8, TMP8);
+	vis_st64_2(TMP8, dest, 8);
+
+	dest += stride;
+	vis_faligndata(TMP0, TMP2, REF_0);
+
+	vis_faligndata(TMP2, TMP4, REF_2);
+
+	vis_xor(DST_0, REF_0, TMP20);
+
+	vis_and(TMP20, MASK_fe, TMP20);
+
+	vis_xor(DST_2, REF_2, TMP22);
+	vis_mul8x16(CONST_128, TMP20, TMP20);
+
+	vis_and(TMP22, MASK_fe, TMP22);
+
+	vis_or(DST_0, REF_0, TMP24);
+	vis_mul8x16(CONST_128, TMP22, TMP22);
+
+	vis_or(DST_2, REF_2, TMP26);
+
+	vis_and(TMP20, MASK_7f, TMP20);
+
+	vis_and(TMP22, MASK_7f, TMP22);
+
+	vis_psub16(TMP24, TMP20, TMP20);
+	vis_st64(TMP20, dest[0]);
+
+	vis_psub16(TMP26, TMP22, TMP22);
+	vis_st64_2(TMP22, dest, 8);
+}
+
+static void MC_avg_o_8_vis (uint8_t * dest, const uint8_t * _ref,
+			    const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	int offset;
+
+	ref = vis_alignaddr(ref);
+	offset = (ref != _ref) ? 8 : 0;
+
+	vis_ld64(ref[0], TMP0);
+
+	vis_ld64_2(ref, offset, TMP2);
+
+	vis_ld64(dest[0], DST_0);
+
+	vis_ld64(constants_fe[0], MASK_fe);
+
+	vis_ld64(constants_7f[0], MASK_7f);
+	vis_faligndata(TMP0, TMP2, REF_0);
+
+	vis_ld64(constants128[0], CONST_128);
+
+	ref += stride;
+	height = (height >> 1) - 1;
+
+	do {	/* 12 cycles */
+		vis_ld64(ref[0], TMP0);
+		vis_xor(DST_0, REF_0, TMP4);
+
+		vis_ld64_2(ref, offset, TMP2);
+		vis_and(TMP4, MASK_fe, TMP4);
+
+		vis_or(DST_0, REF_0, TMP6);
+		vis_ld64_2(dest, stride, DST_0);
+		ref += stride;
+		vis_mul8x16(CONST_128, TMP4, TMP4);
+
+		vis_ld64(ref[0], TMP12);
+		vis_faligndata(TMP0, TMP2, REF_0);
+
+		vis_ld64_2(ref, offset, TMP2);
+		vis_xor(DST_0, REF_0, TMP0);
+		ref += stride;
+
+		vis_and(TMP0, MASK_fe, TMP0);
+
+		vis_and(TMP4, MASK_7f, TMP4);
+
+		vis_psub16(TMP6, TMP4, TMP4);
+		vis_st64(TMP4, dest[0]);
+		dest += stride;
+		vis_mul8x16(CONST_128, TMP0, TMP0);
+
+		vis_or(DST_0, REF_0, TMP6);
+		vis_ld64_2(dest, stride, DST_0);
+
+		vis_faligndata(TMP12, TMP2, REF_0);
+
+		vis_and(TMP0, MASK_7f, TMP0);
+
+		vis_psub16(TMP6, TMP0, TMP4);
+		vis_st64(TMP4, dest[0]);
+		dest += stride;
+	} while (--height);
+
+	vis_ld64(ref[0], TMP0);
+	vis_xor(DST_0, REF_0, TMP4);
+
+	vis_ld64_2(ref, offset, TMP2);
+	vis_and(TMP4, MASK_fe, TMP4);
+
+	vis_or(DST_0, REF_0, TMP6);
+	vis_ld64_2(dest, stride, DST_0);
+	vis_mul8x16(CONST_128, TMP4, TMP4);
+
+	vis_faligndata(TMP0, TMP2, REF_0);
+
+	vis_xor(DST_0, REF_0, TMP0);
+
+	vis_and(TMP0, MASK_fe, TMP0);
+
+	vis_and(TMP4, MASK_7f, TMP4);
+
+	vis_psub16(TMP6, TMP4, TMP4);
+	vis_st64(TMP4, dest[0]);
+	dest += stride;
+	vis_mul8x16(CONST_128, TMP0, TMP0);
+
+	vis_or(DST_0, REF_0, TMP6);
+
+	vis_and(TMP0, MASK_7f, TMP0);
+
+	vis_psub16(TMP6, TMP0, TMP4);
+	vis_st64(TMP4, dest[0]);
+}
+
+static void MC_put_x_16_vis (uint8_t * dest, const uint8_t * _ref,
+			     const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	unsigned long off = (unsigned long) ref & 0x7;
+	unsigned long off_plus_1 = off + 1;
+
+	ref = vis_alignaddr(ref);
+
+	vis_ld64(ref[0],    TMP0);
+
+	vis_ld64_2(ref, 8,  TMP2);
+
+	vis_ld64_2(ref, 16, TMP4);
+
+	vis_ld64(constants_fe[0], MASK_fe);
+
+	vis_ld64(constants_7f[0], MASK_7f);
+	vis_faligndata(TMP0, TMP2, REF_0);
+
+	vis_ld64(constants128[0], CONST_128);
+	vis_faligndata(TMP2, TMP4, REF_4);
+
+	if (off != 0x7) {
+		vis_alignaddr_g0((void *)off_plus_1);
+		vis_faligndata(TMP0, TMP2, REF_2);
+		vis_faligndata(TMP2, TMP4, REF_6);
+	} else {
+		vis_src1(TMP2, REF_2);
+		vis_src1(TMP4, REF_6);
+	}
+
+	ref += stride;
+	height = (height >> 1) - 1;
+
+	do {	/* 34 cycles */
+		vis_ld64(ref[0],    TMP0);
+		vis_xor(REF_0, REF_2, TMP6);
+
+		vis_ld64_2(ref, 8,  TMP2);
+		vis_xor(REF_4, REF_6, TMP8);
+
+		vis_ld64_2(ref, 16, TMP4);
+		vis_and(TMP6, MASK_fe, TMP6);
+		ref += stride;
+
+		vis_ld64(ref[0],    TMP14);
+		vis_mul8x16(CONST_128, TMP6, TMP6);
+		vis_and(TMP8, MASK_fe, TMP8);
+
+		vis_ld64_2(ref, 8,  TMP16);
+		vis_mul8x16(CONST_128, TMP8, TMP8);
+		vis_or(REF_0, REF_2, TMP10);
+
+		vis_ld64_2(ref, 16, TMP18);
+		ref += stride;
+		vis_or(REF_4, REF_6, TMP12);
+
+		vis_alignaddr_g0((void *)off);
+
+		vis_faligndata(TMP0, TMP2, REF_0);
+
+		vis_faligndata(TMP2, TMP4, REF_4);
+
+		if (off != 0x7) {
+			vis_alignaddr_g0((void *)off_plus_1);
+			vis_faligndata(TMP0, TMP2, REF_2);
+			vis_faligndata(TMP2, TMP4, REF_6);
+		} else {
+			vis_src1(TMP2, REF_2);
+			vis_src1(TMP4, REF_6);
+		}
+
+		vis_and(TMP6, MASK_7f, TMP6);
+
+		vis_and(TMP8, MASK_7f, TMP8);
+
+		vis_psub16(TMP10, TMP6, TMP6);
+		vis_st64(TMP6, dest[0]);
+
+		vis_psub16(TMP12, TMP8, TMP8);
+		vis_st64_2(TMP8, dest, 8);
+		dest += stride;
+
+		vis_xor(REF_0, REF_2, TMP6);
+
+		vis_xor(REF_4, REF_6, TMP8);
+
+		vis_and(TMP6, MASK_fe, TMP6);
+
+		vis_mul8x16(CONST_128, TMP6, TMP6);
+		vis_and(TMP8, MASK_fe, TMP8);
+
+		vis_mul8x16(CONST_128, TMP8, TMP8);
+		vis_or(REF_0, REF_2, TMP10);
+
+		vis_or(REF_4, REF_6, TMP12);
+
+		vis_alignaddr_g0((void *)off);
+
+		vis_faligndata(TMP14, TMP16, REF_0);
+
+		vis_faligndata(TMP16, TMP18, REF_4);
+
+		if (off != 0x7) {
+			vis_alignaddr_g0((void *)off_plus_1);
+			vis_faligndata(TMP14, TMP16, REF_2);
+			vis_faligndata(TMP16, TMP18, REF_6);
+		} else {
+			vis_src1(TMP16, REF_2);
+			vis_src1(TMP18, REF_6);
+		}
+
+		vis_and(TMP6, MASK_7f, TMP6);
+
+		vis_and(TMP8, MASK_7f, TMP8);
+
+		vis_psub16(TMP10, TMP6, TMP6);
+		vis_st64(TMP6, dest[0]);
+
+		vis_psub16(TMP12, TMP8, TMP8);
+		vis_st64_2(TMP8, dest, 8);
+		dest += stride;
+	} while (--height);
+
+	vis_ld64(ref[0],    TMP0);
+	vis_xor(REF_0, REF_2, TMP6);
+
+	vis_ld64_2(ref, 8,  TMP2);
+	vis_xor(REF_4, REF_6, TMP8);
+
+	vis_ld64_2(ref, 16, TMP4);
+	vis_and(TMP6, MASK_fe, TMP6);
+
+	vis_mul8x16(CONST_128, TMP6, TMP6);
+	vis_and(TMP8, MASK_fe, TMP8);
+
+	vis_mul8x16(CONST_128, TMP8, TMP8);
+	vis_or(REF_0, REF_2, TMP10);
+
+	vis_or(REF_4, REF_6, TMP12);
+
+	vis_alignaddr_g0((void *)off);
+
+	vis_faligndata(TMP0, TMP2, REF_0);
+
+	vis_faligndata(TMP2, TMP4, REF_4);
+
+	if (off != 0x7) {
+		vis_alignaddr_g0((void *)off_plus_1);
+		vis_faligndata(TMP0, TMP2, REF_2);
+		vis_faligndata(TMP2, TMP4, REF_6);
+	} else {
+		vis_src1(TMP2, REF_2);
+		vis_src1(TMP4, REF_6);
+	}
+
+	vis_and(TMP6, MASK_7f, TMP6);
+
+	vis_and(TMP8, MASK_7f, TMP8);
+
+	vis_psub16(TMP10, TMP6, TMP6);
+	vis_st64(TMP6, dest[0]);
+
+	vis_psub16(TMP12, TMP8, TMP8);
+	vis_st64_2(TMP8, dest, 8);
+	dest += stride;
+
+	vis_xor(REF_0, REF_2, TMP6);
+
+	vis_xor(REF_4, REF_6, TMP8);
+
+	vis_and(TMP6, MASK_fe, TMP6);
+
+	vis_mul8x16(CONST_128, TMP6, TMP6);
+	vis_and(TMP8, MASK_fe, TMP8);
+
+	vis_mul8x16(CONST_128, TMP8, TMP8);
+	vis_or(REF_0, REF_2, TMP10);
+
+	vis_or(REF_4, REF_6, TMP12);
+
+	vis_and(TMP6, MASK_7f, TMP6);
+
+	vis_and(TMP8, MASK_7f, TMP8);
+
+	vis_psub16(TMP10, TMP6, TMP6);
+	vis_st64(TMP6, dest[0]);
+
+	vis_psub16(TMP12, TMP8, TMP8);
+	vis_st64_2(TMP8, dest, 8);
+}
+
+static void MC_put_x_8_vis (uint8_t * dest, const uint8_t * _ref,
+			    const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	unsigned long off = (unsigned long) ref & 0x7;
+	unsigned long off_plus_1 = off + 1;
+
+	ref = vis_alignaddr(ref);
+
+	vis_ld64(ref[0], TMP0);
+
+	vis_ld64(ref[8], TMP2);
+
+	vis_ld64(constants_fe[0], MASK_fe);
+
+	vis_ld64(constants_7f[0], MASK_7f);
+
+	vis_ld64(constants128[0], CONST_128);
+	vis_faligndata(TMP0, TMP2, REF_0);
+
+	if (off != 0x7) {
+		vis_alignaddr_g0((void *)off_plus_1);
+		vis_faligndata(TMP0, TMP2, REF_2);
+	} else {
+		vis_src1(TMP2, REF_2);
+	}
+
+	ref += stride;
+	height = (height >> 1) - 1;
+
+	do {	/* 20 cycles */
+		vis_ld64(ref[0], TMP0);
+		vis_xor(REF_0, REF_2, TMP4);
+
+		vis_ld64_2(ref, 8, TMP2);
+		vis_and(TMP4, MASK_fe, TMP4);
+		ref += stride;
+
+		vis_ld64(ref[0], TMP8);
+		vis_or(REF_0, REF_2, TMP6);
+		vis_mul8x16(CONST_128, TMP4, TMP4);
+
+		vis_alignaddr_g0((void *)off);
+
+		vis_ld64_2(ref, 8, TMP10);
+		ref += stride;
+		vis_faligndata(TMP0, TMP2, REF_0);
+
+		if (off != 0x7) {
+			vis_alignaddr_g0((void *)off_plus_1);
+			vis_faligndata(TMP0, TMP2, REF_2);
+		} else {
+			vis_src1(TMP2, REF_2);
+		}
+
+		vis_and(TMP4, MASK_7f, TMP4);
+
+		vis_psub16(TMP6, TMP4, DST_0);
+		vis_st64(DST_0, dest[0]);
+		dest += stride;
+
+		vis_xor(REF_0, REF_2, TMP12);
+
+		vis_and(TMP12, MASK_fe, TMP12);
+
+		vis_or(REF_0, REF_2, TMP14);
+		vis_mul8x16(CONST_128, TMP12, TMP12);
+
+		vis_alignaddr_g0((void *)off);
+		vis_faligndata(TMP8, TMP10, REF_0);
+		if (off != 0x7) {
+			vis_alignaddr_g0((void *)off_plus_1);
+			vis_faligndata(TMP8, TMP10, REF_2);
+		} else {
+			vis_src1(TMP10, REF_2);
+		}
+
+		vis_and(TMP12, MASK_7f, TMP12);
+
+		vis_psub16(TMP14, TMP12, DST_0);
+		vis_st64(DST_0, dest[0]);
+		dest += stride;
+	} while (--height);
+
+	vis_ld64(ref[0], TMP0);
+	vis_xor(REF_0, REF_2, TMP4);
+
+	vis_ld64_2(ref, 8, TMP2);
+	vis_and(TMP4, MASK_fe, TMP4);
+
+	vis_or(REF_0, REF_2, TMP6);
+	vis_mul8x16(CONST_128, TMP4, TMP4);
+
+	vis_alignaddr_g0((void *)off);
+
+	vis_faligndata(TMP0, TMP2, REF_0);
+
+	if (off != 0x7) {
+		vis_alignaddr_g0((void *)off_plus_1);
+		vis_faligndata(TMP0, TMP2, REF_2);
+	} else {
+		vis_src1(TMP2, REF_2);
+	}
+
+	vis_and(TMP4, MASK_7f, TMP4);
+
+	vis_psub16(TMP6, TMP4, DST_0);
+	vis_st64(DST_0, dest[0]);
+	dest += stride;
+
+	vis_xor(REF_0, REF_2, TMP12);
+
+	vis_and(TMP12, MASK_fe, TMP12);
+
+	vis_or(REF_0, REF_2, TMP14);
+	vis_mul8x16(CONST_128, TMP12, TMP12);
+
+	vis_and(TMP12, MASK_7f, TMP12);
+
+	vis_psub16(TMP14, TMP12, DST_0);
+	vis_st64(DST_0, dest[0]);
+	dest += stride;
+}
+
+static void MC_avg_x_16_vis (uint8_t * dest, const uint8_t * _ref,
+			     const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	unsigned long off = (unsigned long) ref & 0x7;
+	unsigned long off_plus_1 = off + 1;
+
+	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+	vis_ld64(constants3[0], CONST_3);
+	vis_fzero(ZERO);
+	vis_ld64(constants256_512[0], CONST_256);
+
+	ref = vis_alignaddr(ref);
+	do {	/* 26 cycles */
+		vis_ld64(ref[0], TMP0);
+
+		vis_ld64(ref[8], TMP2);
+
+		vis_alignaddr_g0((void *)off);
+
+		vis_ld64(ref[16], TMP4);
+
+		vis_ld64(dest[0], DST_0);
+		vis_faligndata(TMP0, TMP2, REF_0);
+
+		vis_ld64(dest[8], DST_2);
+		vis_faligndata(TMP2, TMP4, REF_4);
+
+		if (off != 0x7) {
+			vis_alignaddr_g0((void *)off_plus_1);
+			vis_faligndata(TMP0, TMP2, REF_2);
+			vis_faligndata(TMP2, TMP4, REF_6);
+		} else {
+			vis_src1(TMP2, REF_2);
+			vis_src1(TMP4, REF_6);
+		}
+
+		vis_mul8x16au(REF_0,   CONST_256, TMP0);
+
+		vis_pmerge(ZERO,     REF_2,     TMP4);
+		vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+
+		vis_pmerge(ZERO, REF_2_1, TMP6);
+
+		vis_padd16(TMP0, TMP4, TMP0);
+
+		vis_mul8x16al(DST_0,   CONST_512, TMP4);
+		vis_padd16(TMP2, TMP6, TMP2);
+
+		vis_mul8x16al(DST_1,   CONST_512, TMP6);
+
+		vis_mul8x16au(REF_6,   CONST_256, TMP12);
+
+		vis_padd16(TMP0, TMP4, TMP0);
+		vis_mul8x16au(REF_6_1, CONST_256, TMP14);
+
+		vis_padd16(TMP2, TMP6, TMP2);
+		vis_mul8x16au(REF_4,   CONST_256, TMP16);
+
+		vis_padd16(TMP0, CONST_3, TMP8);
+		vis_mul8x16au(REF_4_1, CONST_256, TMP18);
+
+		vis_padd16(TMP2, CONST_3, TMP10);
+		vis_pack16(TMP8, DST_0);
+
+		vis_pack16(TMP10, DST_1);
+		vis_padd16(TMP16, TMP12, TMP0);
+
+		vis_st64(DST_0, dest[0]);
+		vis_mul8x16al(DST_2,   CONST_512, TMP4);
+		vis_padd16(TMP18, TMP14, TMP2);
+
+		vis_mul8x16al(DST_3,   CONST_512, TMP6);
+		vis_padd16(TMP0, CONST_3, TMP0);
+
+		vis_padd16(TMP2, CONST_3, TMP2);
+
+		vis_padd16(TMP0, TMP4, TMP0);
+
+		vis_padd16(TMP2, TMP6, TMP2);
+		vis_pack16(TMP0, DST_2);
+
+		vis_pack16(TMP2, DST_3);
+		vis_st64(DST_2, dest[8]);
+
+		ref += stride;
+		dest += stride;
+	} while (--height);
+}
+
+static void MC_avg_x_8_vis (uint8_t * dest, const uint8_t * _ref,
+			    const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	unsigned long off = (unsigned long) ref & 0x7;
+	unsigned long off_plus_1 = off + 1;
+	int stride_times_2 = stride << 1;
+
+	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+	vis_ld64(constants3[0], CONST_3);
+	vis_fzero(ZERO);
+	vis_ld64(constants256_512[0], CONST_256);
+
+	ref = vis_alignaddr(ref);
+	height >>= 2;
+	do {	/* 47 cycles */
+		vis_ld64(ref[0],   TMP0);
+
+		vis_ld64_2(ref, 8, TMP2);
+		ref += stride;
+
+		vis_alignaddr_g0((void *)off);
+
+		vis_ld64(ref[0],   TMP4);
+		vis_faligndata(TMP0, TMP2, REF_0);
+
+		vis_ld64_2(ref, 8, TMP6);
+		ref += stride;
+
+		vis_ld64(ref[0],   TMP8);
+
+		vis_ld64_2(ref, 8, TMP10);
+		ref += stride;
+		vis_faligndata(TMP4, TMP6, REF_4);
+
+		vis_ld64(ref[0],   TMP12);
+
+		vis_ld64_2(ref, 8, TMP14);
+		ref += stride;
+		vis_faligndata(TMP8, TMP10, REF_S0);
+
+		vis_faligndata(TMP12, TMP14, REF_S4);
+
+		if (off != 0x7) {
+			vis_alignaddr_g0((void *)off_plus_1);
+
+			vis_ld64(dest[0], DST_0);
+			vis_faligndata(TMP0, TMP2, REF_2);
+
+			vis_ld64_2(dest, stride, DST_2);
+			vis_faligndata(TMP4, TMP6, REF_6);
+
+			vis_faligndata(TMP8, TMP10, REF_S2);
+
+			vis_faligndata(TMP12, TMP14, REF_S6);
+		} else {
+			vis_ld64(dest[0], DST_0);
+			vis_src1(TMP2, REF_2);
+
+			vis_ld64_2(dest, stride, DST_2);
+			vis_src1(TMP6, REF_6);
+
+			vis_src1(TMP10, REF_S2);
+
+			vis_src1(TMP14, REF_S6);
+		}
+
+		vis_pmerge(ZERO,     REF_0,     TMP0);
+		vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+
+		vis_pmerge(ZERO,     REF_2,     TMP4);
+		vis_mul8x16au(REF_2_1, CONST_256, TMP6);
+
+		vis_padd16(TMP0, CONST_3, TMP0);
+		vis_mul8x16al(DST_0,   CONST_512, TMP16);
+
+		vis_padd16(TMP2, CONST_3, TMP2);
+		vis_mul8x16al(DST_1,   CONST_512, TMP18);
+
+		vis_padd16(TMP0, TMP4, TMP0);
+		vis_mul8x16au(REF_4, CONST_256, TMP8);
+
+		vis_padd16(TMP2, TMP6, TMP2);
+		vis_mul8x16au(REF_4_1, CONST_256, TMP10);
+
+		vis_padd16(TMP0, TMP16, TMP0);
+		vis_mul8x16au(REF_6, CONST_256, TMP12);
+
+		vis_padd16(TMP2, TMP18, TMP2);
+		vis_mul8x16au(REF_6_1, CONST_256, TMP14);
+
+		vis_padd16(TMP8, CONST_3, TMP8);
+		vis_mul8x16al(DST_2, CONST_512, TMP16);
+
+		vis_padd16(TMP8, TMP12, TMP8);
+		vis_mul8x16al(DST_3, CONST_512, TMP18);
+
+		vis_padd16(TMP10, TMP14, TMP10);
+		vis_pack16(TMP0, DST_0);
+
+		vis_pack16(TMP2, DST_1);
+		vis_st64(DST_0, dest[0]);
+		dest += stride;
+		vis_padd16(TMP10, CONST_3, TMP10);
+
+		vis_ld64_2(dest, stride, DST_0);
+		vis_padd16(TMP8, TMP16, TMP8);
+
+		vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/);
+		vis_padd16(TMP10, TMP18, TMP10);
+		vis_pack16(TMP8, DST_2);
+
+		vis_pack16(TMP10, DST_3);
+		vis_st64(DST_2, dest[0]);
+		dest += stride;
+
+		vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
+		vis_pmerge(ZERO,     REF_S0,     TMP0);
+
+		vis_pmerge(ZERO,     REF_S2,     TMP24);
+		vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
+
+		vis_padd16(TMP0, CONST_3, TMP0);
+		vis_mul8x16au(REF_S4, CONST_256, TMP8);
+
+		vis_padd16(TMP2, CONST_3, TMP2);
+		vis_mul8x16au(REF_S4_1, CONST_256, TMP10);
+
+		vis_padd16(TMP0, TMP24, TMP0);
+		vis_mul8x16au(REF_S6, CONST_256, TMP12);
+
+		vis_padd16(TMP2, TMP6, TMP2);
+		vis_mul8x16au(REF_S6_1, CONST_256, TMP14);
+
+		vis_padd16(TMP8, CONST_3, TMP8);
+		vis_mul8x16al(DST_0,   CONST_512, TMP16);
+
+		vis_padd16(TMP10, CONST_3, TMP10);
+		vis_mul8x16al(DST_1,   CONST_512, TMP18);
+
+		vis_padd16(TMP8, TMP12, TMP8);
+		vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20);
+
+		vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22);
+		vis_padd16(TMP0, TMP16, TMP0);
+
+		vis_padd16(TMP2, TMP18, TMP2);
+		vis_pack16(TMP0, DST_0);
+
+		vis_padd16(TMP10, TMP14, TMP10);
+		vis_pack16(TMP2, DST_1);
+		vis_st64(DST_0, dest[0]);
+		dest += stride;
+
+		vis_padd16(TMP8, TMP20, TMP8);
+
+		vis_padd16(TMP10, TMP22, TMP10);
+		vis_pack16(TMP8, DST_2);
+
+		vis_pack16(TMP10, DST_3);
+		vis_st64(DST_2, dest[0]);
+		dest += stride;
+	} while (--height);
+}
+
+static void MC_put_y_16_vis (uint8_t * dest, const uint8_t * _ref,
+			     const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	int offset;
+
+	ref = vis_alignaddr(ref);
+	offset = (ref != _ref) ? 16 : 0;
+
+	vis_ld64(ref[0], TMP0);
+
+	vis_ld64_2(ref, 8, TMP2);
+
+	vis_ld64_2(ref, offset, TMP4);
+	ref += stride;
+
+	vis_ld64(ref[0], TMP6);
+	vis_faligndata(TMP0, TMP2, REF_0);
+
+	vis_ld64_2(ref, 8, TMP8);
+	vis_faligndata(TMP2, TMP4, REF_4);
+
+	vis_ld64_2(ref, offset, TMP10);
+	ref += stride;
+
+	vis_ld64(constants_fe[0], MASK_fe);
+	vis_faligndata(TMP6, TMP8, REF_2);
+
+	vis_ld64(constants_7f[0], MASK_7f);
+	vis_faligndata(TMP8, TMP10, REF_6);
+
+	vis_ld64(constants128[0], CONST_128);
+	height = (height >> 1) - 1;
+	do {	/* 24 cycles */
+		vis_ld64(ref[0], TMP0);
+		vis_xor(REF_0, REF_2, TMP12);
+
+		vis_ld64_2(ref, 8, TMP2);
+		vis_xor(REF_4, REF_6, TMP16);
+
+		vis_ld64_2(ref, offset, TMP4);
+		ref += stride;
+		vis_or(REF_0, REF_2, TMP14);
+
+		vis_ld64(ref[0], TMP6);
+		vis_or(REF_4, REF_6, TMP18);
+
+		vis_ld64_2(ref, 8, TMP8);
+		vis_faligndata(TMP0, TMP2, REF_0);
+
+		vis_ld64_2(ref, offset, TMP10);
+		ref += stride;
+		vis_faligndata(TMP2, TMP4, REF_4);
+
+		vis_and(TMP12, MASK_fe, TMP12);
+
+		vis_and(TMP16, MASK_fe, TMP16);
+		vis_mul8x16(CONST_128, TMP12, TMP12);
+
+		vis_mul8x16(CONST_128, TMP16, TMP16);
+		vis_xor(REF_0, REF_2, TMP0);
+
+		vis_xor(REF_4, REF_6, TMP2);
+
+		vis_or(REF_0, REF_2, TMP20);
+
+		vis_and(TMP12, MASK_7f, TMP12);
+
+		vis_and(TMP16, MASK_7f, TMP16);
+
+		vis_psub16(TMP14, TMP12, TMP12);
+		vis_st64(TMP12, dest[0]);
+
+		vis_psub16(TMP18, TMP16, TMP16);
+		vis_st64_2(TMP16, dest, 8);
+		dest += stride;
+
+		vis_or(REF_4, REF_6, TMP18);
+
+		vis_and(TMP0, MASK_fe, TMP0);
+
+		vis_and(TMP2, MASK_fe, TMP2);
+		vis_mul8x16(CONST_128, TMP0, TMP0);
+
+		vis_faligndata(TMP6, TMP8, REF_2);
+		vis_mul8x16(CONST_128, TMP2, TMP2);
+
+		vis_faligndata(TMP8, TMP10, REF_6);
+
+		vis_and(TMP0, MASK_7f, TMP0);
+
+		vis_and(TMP2, MASK_7f, TMP2);
+
+		vis_psub16(TMP20, TMP0, TMP0);
+		vis_st64(TMP0, dest[0]);
+
+		vis_psub16(TMP18, TMP2, TMP2);
+		vis_st64_2(TMP2, dest, 8);
+		dest += stride;
+	} while (--height);
+
+	vis_ld64(ref[0], TMP0);
+	vis_xor(REF_0, REF_2, TMP12);
+
+	vis_ld64_2(ref, 8, TMP2);
+	vis_xor(REF_4, REF_6, TMP16);
+
+	vis_ld64_2(ref, offset, TMP4);
+	vis_or(REF_0, REF_2, TMP14);
+
+	vis_or(REF_4, REF_6, TMP18);
+
+	vis_faligndata(TMP0, TMP2, REF_0);
+
+	vis_faligndata(TMP2, TMP4, REF_4);
+
+	vis_and(TMP12, MASK_fe, TMP12);
+
+	vis_and(TMP16, MASK_fe, TMP16);
+	vis_mul8x16(CONST_128, TMP12, TMP12);
+
+	vis_mul8x16(CONST_128, TMP16, TMP16);
+	vis_xor(REF_0, REF_2, TMP0);
+
+	vis_xor(REF_4, REF_6, TMP2);
+
+	vis_or(REF_0, REF_2, TMP20);
+
+	vis_and(TMP12, MASK_7f, TMP12);
+
+	vis_and(TMP16, MASK_7f, TMP16);
+
+	vis_psub16(TMP14, TMP12, TMP12);
+	vis_st64(TMP12, dest[0]);
+
+	vis_psub16(TMP18, TMP16, TMP16);
+	vis_st64_2(TMP16, dest, 8);
+	dest += stride;
+
+	vis_or(REF_4, REF_6, TMP18);
+
+	vis_and(TMP0, MASK_fe, TMP0);
+
+	vis_and(TMP2, MASK_fe, TMP2);
+	vis_mul8x16(CONST_128, TMP0, TMP0);
+
+	vis_mul8x16(CONST_128, TMP2, TMP2);
+
+	vis_and(TMP0, MASK_7f, TMP0);
+
+	vis_and(TMP2, MASK_7f, TMP2);
+
+	vis_psub16(TMP20, TMP0, TMP0);
+	vis_st64(TMP0, dest[0]);
+
+	vis_psub16(TMP18, TMP2, TMP2);
+	vis_st64_2(TMP2, dest, 8);
+}
+
+static void MC_put_y_8_vis (uint8_t * dest, const uint8_t * _ref,
+			    const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	int offset;
+
+	ref = vis_alignaddr(ref);
+	offset = (ref != _ref) ? 8 : 0;
+
+	vis_ld64(ref[0], TMP0);
+
+	vis_ld64_2(ref, offset, TMP2);
+	ref += stride;
+
+	vis_ld64(ref[0], TMP4);
+
+	vis_ld64_2(ref, offset, TMP6);
+	ref += stride;
+
+	vis_ld64(constants_fe[0], MASK_fe);
+	vis_faligndata(TMP0, TMP2, REF_0);
+
+	vis_ld64(constants_7f[0], MASK_7f);
+	vis_faligndata(TMP4, TMP6, REF_2);
+
+	vis_ld64(constants128[0], CONST_128);
+	height = (height >> 1) - 1;
+	do {	/* 12 cycles */
+		vis_ld64(ref[0], TMP0);
+		vis_xor(REF_0, REF_2, TMP4);
+
+		vis_ld64_2(ref, offset, TMP2);
+		ref += stride;
+		vis_and(TMP4, MASK_fe, TMP4);
+
+		vis_or(REF_0, REF_2, TMP6);
+		vis_mul8x16(CONST_128, TMP4, TMP4);
+
+		vis_faligndata(TMP0, TMP2, REF_0);
+		vis_ld64(ref[0], TMP0);
+
+		vis_ld64_2(ref, offset, TMP2);
+		ref += stride;
+		vis_xor(REF_0, REF_2, TMP12);
+
+		vis_and(TMP4, MASK_7f, TMP4);
+
+		vis_and(TMP12, MASK_fe, TMP12);
+
+		vis_mul8x16(CONST_128, TMP12, TMP12);
+		vis_or(REF_0, REF_2, TMP14);
+
+		vis_psub16(TMP6, TMP4, DST_0);
+		vis_st64(DST_0, dest[0]);
+		dest += stride;
+
+		vis_faligndata(TMP0, TMP2, REF_2);
+
+		vis_and(TMP12, MASK_7f, TMP12);
+
+		vis_psub16(TMP14, TMP12, DST_0);
+		vis_st64(DST_0, dest[0]);
+		dest += stride;
+	} while (--height);
+
+	vis_ld64(ref[0], TMP0);
+	vis_xor(REF_0, REF_2, TMP4);
+
+	vis_ld64_2(ref, offset, TMP2);
+	vis_and(TMP4, MASK_fe, TMP4);
+
+	vis_or(REF_0, REF_2, TMP6);
+	vis_mul8x16(CONST_128, TMP4, TMP4);
+
+	vis_faligndata(TMP0, TMP2, REF_0);
+
+	vis_xor(REF_0, REF_2, TMP12);
+
+	vis_and(TMP4, MASK_7f, TMP4);
+
+	vis_and(TMP12, MASK_fe, TMP12);
+
+	vis_mul8x16(CONST_128, TMP12, TMP12);
+	vis_or(REF_0, REF_2, TMP14);
+
+	vis_psub16(TMP6, TMP4, DST_0);
+	vis_st64(DST_0, dest[0]);
+	dest += stride;
+
+	vis_and(TMP12, MASK_7f, TMP12);
+
+	vis_psub16(TMP14, TMP12, DST_0);
+	vis_st64(DST_0, dest[0]);
+}
+
+static void MC_avg_y_16_vis (uint8_t * dest, const uint8_t * _ref,
+			     const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	int stride_8 = stride + 8;
+	int stride_16;
+	int offset;
+
+	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+	ref = vis_alignaddr(ref);
+	offset = (ref != _ref) ? 16 : 0;
+
+	vis_ld64(ref[ 0], TMP0);
+	vis_fzero(ZERO);
+
+	vis_ld64(ref[ 8], TMP2);
+
+	vis_ld64_2(ref, offset, TMP4);
+	stride_16 = stride + offset;
+
+	vis_ld64(constants3[0], CONST_3);
+	vis_faligndata(TMP0, TMP2, REF_2);
+
+	vis_ld64(constants256_512[0], CONST_256);
+	vis_faligndata(TMP2, TMP4, REF_6);
+	height >>= 1;
+
+	do {	/* 31 cycles */
+		vis_ld64_2(ref, stride, TMP0);
+		vis_pmerge(ZERO,       REF_2,     TMP12);
+		vis_mul8x16au(REF_2_1, CONST_256, TMP14);
+
+		vis_ld64_2(ref, stride_8, TMP2);
+		vis_pmerge(ZERO,       REF_6,     TMP16);
+		vis_mul8x16au(REF_6_1, CONST_256, TMP18);
+
+		vis_ld64_2(ref, stride_16, TMP4);
+		ref += stride;
+
+		vis_ld64(dest[0], DST_0);
+		vis_faligndata(TMP0, TMP2, REF_0);
+
+		vis_ld64_2(dest, 8, DST_2);
+		vis_faligndata(TMP2, TMP4, REF_4);
+
+		vis_ld64_2(ref, stride, TMP6);
+		vis_pmerge(ZERO,     REF_0,     TMP0);
+		vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+
+		vis_ld64_2(ref, stride_8, TMP8);
+		vis_pmerge(ZERO,     REF_4,     TMP4);
+
+		vis_ld64_2(ref, stride_16, TMP10);
+		ref += stride;
+
+		vis_ld64_2(dest, stride, REF_S0/*DST_4*/);
+		vis_faligndata(TMP6, TMP8, REF_2);
+		vis_mul8x16au(REF_4_1, CONST_256, TMP6);
+
+		vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/);
+		vis_faligndata(TMP8, TMP10, REF_6);
+		vis_mul8x16al(DST_0,   CONST_512, TMP20);
+
+		vis_padd16(TMP0, CONST_3, TMP0);
+		vis_mul8x16al(DST_1,   CONST_512, TMP22);
+
+		vis_padd16(TMP2, CONST_3, TMP2);
+		vis_mul8x16al(DST_2,   CONST_512, TMP24);
+
+		vis_padd16(TMP4, CONST_3, TMP4);
+		vis_mul8x16al(DST_3,   CONST_512, TMP26);
+
+		vis_padd16(TMP6, CONST_3, TMP6);
+
+		vis_padd16(TMP12, TMP20, TMP12);
+		vis_mul8x16al(REF_S0,   CONST_512, TMP20);
+
+		vis_padd16(TMP14, TMP22, TMP14);
+		vis_mul8x16al(REF_S0_1, CONST_512, TMP22);
+
+		vis_padd16(TMP16, TMP24, TMP16);
+		vis_mul8x16al(REF_S2,   CONST_512, TMP24);
+
+		vis_padd16(TMP18, TMP26, TMP18);
+		vis_mul8x16al(REF_S2_1, CONST_512, TMP26);
+
+		vis_padd16(TMP12, TMP0, TMP12);
+		vis_mul8x16au(REF_2,   CONST_256, TMP28);
+
+		vis_padd16(TMP14, TMP2, TMP14);
+		vis_mul8x16au(REF_2_1, CONST_256, TMP30);
+
+		vis_padd16(TMP16, TMP4, TMP16);
+		vis_mul8x16au(REF_6,   CONST_256, REF_S4);
+
+		vis_padd16(TMP18, TMP6, TMP18);
+		vis_mul8x16au(REF_6_1, CONST_256, REF_S6);
+
+		vis_pack16(TMP12, DST_0);
+		vis_padd16(TMP28, TMP0, TMP12);
+
+		vis_pack16(TMP14, DST_1);
+		vis_st64(DST_0, dest[0]);
+		vis_padd16(TMP30, TMP2, TMP14);
+
+		vis_pack16(TMP16, DST_2);
+		vis_padd16(REF_S4, TMP4, TMP16);
+
+		vis_pack16(TMP18, DST_3);
+		vis_st64_2(DST_2, dest, 8);
+		dest += stride;
+		vis_padd16(REF_S6, TMP6, TMP18);
+
+		vis_padd16(TMP12, TMP20, TMP12);
+
+		vis_padd16(TMP14, TMP22, TMP14);
+		vis_pack16(TMP12, DST_0);
+
+		vis_padd16(TMP16, TMP24, TMP16);
+		vis_pack16(TMP14, DST_1);
+		vis_st64(DST_0, dest[0]);
+
+		vis_padd16(TMP18, TMP26, TMP18);
+		vis_pack16(TMP16, DST_2);
+
+		vis_pack16(TMP18, DST_3);
+		vis_st64_2(DST_2, dest, 8);
+		dest += stride;
+	} while (--height);
+}
+
+static void MC_avg_y_8_vis (uint8_t * dest, const uint8_t * _ref,
+			    const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	int stride_8;
+	int offset;
+
+	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+	ref = vis_alignaddr(ref);
+	offset = (ref != _ref) ? 8 : 0;
+
+	vis_ld64(ref[ 0], TMP0);
+	vis_fzero(ZERO);
+
+	vis_ld64_2(ref, offset, TMP2);
+	stride_8 = stride + offset;
+
+	vis_ld64(constants3[0], CONST_3);
+	vis_faligndata(TMP0, TMP2, REF_2);
+
+	vis_ld64(constants256_512[0], CONST_256);
+
+	height >>= 1;
+	do {	/* 20 cycles */
+		vis_ld64_2(ref, stride, TMP0);
+		vis_pmerge(ZERO,       REF_2,     TMP8);
+		vis_mul8x16au(REF_2_1, CONST_256, TMP10);
+
+		vis_ld64_2(ref, stride_8, TMP2);
+		ref += stride;
+
+		vis_ld64(dest[0], DST_0);
+
+		vis_ld64_2(dest, stride, DST_2);
+		vis_faligndata(TMP0, TMP2, REF_0);
+
+		vis_ld64_2(ref, stride, TMP4);
+		vis_mul8x16al(DST_0,   CONST_512, TMP16);
+		vis_pmerge(ZERO,       REF_0,     TMP12);
+
+		vis_ld64_2(ref, stride_8, TMP6);
+		ref += stride;
+		vis_mul8x16al(DST_1,   CONST_512, TMP18);
+		vis_pmerge(ZERO,       REF_0_1,   TMP14);
+
+		vis_padd16(TMP12, CONST_3, TMP12);
+		vis_mul8x16al(DST_2,   CONST_512, TMP24);
+
+		vis_padd16(TMP14, CONST_3, TMP14);
+		vis_mul8x16al(DST_3,   CONST_512, TMP26);
+
+		vis_faligndata(TMP4, TMP6, REF_2);
+
+		vis_padd16(TMP8, TMP12, TMP8);
+
+		vis_padd16(TMP10, TMP14, TMP10);
+		vis_mul8x16au(REF_2,   CONST_256, TMP20);
+
+		vis_padd16(TMP8, TMP16, TMP0);
+		vis_mul8x16au(REF_2_1, CONST_256, TMP22);
+
+		vis_padd16(TMP10, TMP18, TMP2);
+		vis_pack16(TMP0, DST_0);
+
+		vis_pack16(TMP2, DST_1);
+		vis_st64(DST_0, dest[0]);
+		dest += stride;
+		vis_padd16(TMP12, TMP20, TMP12);
+
+		vis_padd16(TMP14, TMP22, TMP14);
+
+		vis_padd16(TMP12, TMP24, TMP0);
+
+		vis_padd16(TMP14, TMP26, TMP2);
+		vis_pack16(TMP0, DST_2);
+
+		vis_pack16(TMP2, DST_3);
+		vis_st64(DST_2, dest[0]);
+		dest += stride;
+	} while (--height);
+}
+
+static void MC_put_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
+			      const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	unsigned long off = (unsigned long) ref & 0x7;
+	unsigned long off_plus_1 = off + 1;
+	int stride_8 = stride + 8;
+	int stride_16 = stride + 16;
+
+	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+	ref = vis_alignaddr(ref);
+
+	vis_ld64(ref[ 0], TMP0);
+	vis_fzero(ZERO);
+
+	vis_ld64(ref[ 8], TMP2);
+
+	vis_ld64(ref[16], TMP4);
+
+	vis_ld64(constants2[0], CONST_2);
+	vis_faligndata(TMP0, TMP2, REF_S0);
+
+	vis_ld64(constants256_512[0], CONST_256);
+	vis_faligndata(TMP2, TMP4, REF_S4);
+
+	if (off != 0x7) {
+		vis_alignaddr_g0((void *)off_plus_1);
+		vis_faligndata(TMP0, TMP2, REF_S2);
+		vis_faligndata(TMP2, TMP4, REF_S6);
+	} else {
+		vis_src1(TMP2, REF_S2);
+		vis_src1(TMP4, REF_S6);
+	}
+
+	height >>= 1;
+	do {
+		vis_ld64_2(ref, stride, TMP0);
+		vis_mul8x16au(REF_S0, CONST_256, TMP12);
+		vis_pmerge(ZERO,      REF_S0_1,  TMP14);
+
+		vis_alignaddr_g0((void *)off);
+
+		vis_ld64_2(ref, stride_8, TMP2);
+		vis_mul8x16au(REF_S2, CONST_256, TMP16);
+		vis_pmerge(ZERO,      REF_S2_1,  TMP18);
+
+		vis_ld64_2(ref, stride_16, TMP4);
+		ref += stride;
+		vis_mul8x16au(REF_S4, CONST_256, TMP20);
+		vis_pmerge(ZERO,      REF_S4_1,  TMP22);
+
+		vis_ld64_2(ref, stride, TMP6);
+		vis_mul8x16au(REF_S6, CONST_256, TMP24);
+		vis_pmerge(ZERO,      REF_S6_1,  TMP26);
+
+		vis_ld64_2(ref, stride_8, TMP8);
+		vis_faligndata(TMP0, TMP2, REF_0);
+
+		vis_ld64_2(ref, stride_16, TMP10);
+		ref += stride;
+		vis_faligndata(TMP2, TMP4, REF_4);
+
+		vis_faligndata(TMP6, TMP8, REF_S0);
+
+		vis_faligndata(TMP8, TMP10, REF_S4);
+
+		if (off != 0x7) {
+			vis_alignaddr_g0((void *)off_plus_1);
+			vis_faligndata(TMP0, TMP2, REF_2);
+			vis_faligndata(TMP2, TMP4, REF_6);
+			vis_faligndata(TMP6, TMP8, REF_S2);
+			vis_faligndata(TMP8, TMP10, REF_S6);
+		} else {
+			vis_src1(TMP2, REF_2);
+			vis_src1(TMP4, REF_6);
+			vis_src1(TMP8, REF_S2);
+			vis_src1(TMP10, REF_S6);
+		}
+
+		vis_mul8x16au(REF_0, CONST_256, TMP0);
+		vis_pmerge(ZERO,      REF_0_1,  TMP2);
+
+		vis_mul8x16au(REF_2, CONST_256, TMP4);
+		vis_pmerge(ZERO,      REF_2_1,  TMP6);
+
+		vis_padd16(TMP0, CONST_2, TMP8);
+		vis_mul8x16au(REF_4, CONST_256, TMP0);
+
+		vis_padd16(TMP2, CONST_2, TMP10);
+		vis_mul8x16au(REF_4_1, CONST_256, TMP2);
+
+		vis_padd16(TMP8, TMP4, TMP8);
+		vis_mul8x16au(REF_6, CONST_256, TMP4);
+
+		vis_padd16(TMP10, TMP6, TMP10);
+		vis_mul8x16au(REF_6_1, CONST_256, TMP6);
+
+		vis_padd16(TMP12, TMP8, TMP12);
+
+		vis_padd16(TMP14, TMP10, TMP14);
+
+		vis_padd16(TMP12, TMP16, TMP12);
+
+		vis_padd16(TMP14, TMP18, TMP14);
+		vis_pack16(TMP12, DST_0);
+
+		vis_pack16(TMP14, DST_1);
+		vis_st64(DST_0, dest[0]);
+		vis_padd16(TMP0, CONST_2, TMP12);
+
+		vis_mul8x16au(REF_S0, CONST_256, TMP0);
+		vis_padd16(TMP2, CONST_2, TMP14);
+
+		vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
+		vis_padd16(TMP12, TMP4, TMP12);
+
+		vis_mul8x16au(REF_S2, CONST_256, TMP4);
+		vis_padd16(TMP14, TMP6, TMP14);
+
+		vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
+		vis_padd16(TMP20, TMP12, TMP20);
+
+		vis_padd16(TMP22, TMP14, TMP22);
+
+		vis_padd16(TMP20, TMP24, TMP20);
+
+		vis_padd16(TMP22, TMP26, TMP22);
+		vis_pack16(TMP20, DST_2);
+
+		vis_pack16(TMP22, DST_3);
+		vis_st64_2(DST_2, dest, 8);
+		dest += stride;
+		vis_padd16(TMP0, TMP4, TMP24);
+
+		vis_mul8x16au(REF_S4, CONST_256, TMP0);
+		vis_padd16(TMP2, TMP6, TMP26);
+
+		vis_mul8x16au(REF_S4_1, CONST_256, TMP2);
+		vis_padd16(TMP24, TMP8, TMP24);
+
+		vis_padd16(TMP26, TMP10, TMP26);
+		vis_pack16(TMP24, DST_0);
+
+		vis_pack16(TMP26, DST_1);
+		vis_st64(DST_0, dest[0]);
+		vis_pmerge(ZERO, REF_S6, TMP4);
+
+		vis_pmerge(ZERO,      REF_S6_1,  TMP6);
+
+		vis_padd16(TMP0, TMP4, TMP0);
+
+		vis_padd16(TMP2, TMP6, TMP2);
+
+		vis_padd16(TMP0, TMP12, TMP0);
+
+		vis_padd16(TMP2, TMP14, TMP2);
+		vis_pack16(TMP0, DST_2);
+
+		vis_pack16(TMP2, DST_3);
+		vis_st64_2(DST_2, dest, 8);
+		dest += stride;
+	} while (--height);
+}
+
+static void MC_put_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
+			     const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	unsigned long off = (unsigned long) ref & 0x7;
+	unsigned long off_plus_1 = off + 1;
+	int stride_8 = stride + 8;
+
+	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+	ref = vis_alignaddr(ref);
+
+	vis_ld64(ref[ 0], TMP0);
+	vis_fzero(ZERO);
+
+	vis_ld64(ref[ 8], TMP2);
+
+	vis_ld64(constants2[0], CONST_2);
+
+	vis_ld64(constants256_512[0], CONST_256);
+	vis_faligndata(TMP0, TMP2, REF_S0);
+
+	if (off != 0x7) {
+		vis_alignaddr_g0((void *)off_plus_1);
+		vis_faligndata(TMP0, TMP2, REF_S2);
+	} else {
+		vis_src1(TMP2, REF_S2);
+	}
+
+	height >>= 1;
+	do {	/* 26 cycles */
+		vis_ld64_2(ref, stride, TMP0);
+		vis_mul8x16au(REF_S0,   CONST_256, TMP8);
+		vis_pmerge(ZERO,        REF_S2,    TMP12);
+
+		vis_alignaddr_g0((void *)off);
+
+		vis_ld64_2(ref, stride_8, TMP2);
+		ref += stride;
+		vis_mul8x16au(REF_S0_1, CONST_256, TMP10);
+		vis_pmerge(ZERO,        REF_S2_1,  TMP14);
+
+		vis_ld64_2(ref, stride, TMP4);
+
+		vis_ld64_2(ref, stride_8, TMP6);
+		ref += stride;
+		vis_faligndata(TMP0, TMP2, REF_S4);
+
+		vis_pmerge(ZERO, REF_S4, TMP18);
+
+		vis_pmerge(ZERO, REF_S4_1, TMP20);
+
+		vis_faligndata(TMP4, TMP6, REF_S0);
+
+		if (off != 0x7) {
+			vis_alignaddr_g0((void *)off_plus_1);
+			vis_faligndata(TMP0, TMP2, REF_S6);
+			vis_faligndata(TMP4, TMP6, REF_S2);
+		} else {
+			vis_src1(TMP2, REF_S6);
+			vis_src1(TMP6, REF_S2);
+		}
+
+		vis_padd16(TMP18, CONST_2, TMP18);
+		vis_mul8x16au(REF_S6,   CONST_256, TMP22);
+
+		vis_padd16(TMP20, CONST_2, TMP20);
+		vis_mul8x16au(REF_S6_1, CONST_256, TMP24);
+
+		vis_mul8x16au(REF_S0,   CONST_256, TMP26);
+		vis_pmerge(ZERO, REF_S0_1, TMP28);
+
+		vis_mul8x16au(REF_S2,   CONST_256, TMP30);
+		vis_padd16(TMP18, TMP22, TMP18);
+
+		vis_mul8x16au(REF_S2_1, CONST_256, TMP32);
+		vis_padd16(TMP20, TMP24, TMP20);
+
+		vis_padd16(TMP8,  TMP18, TMP8);
+
+		vis_padd16(TMP10, TMP20, TMP10);
+
+		vis_padd16(TMP8,  TMP12, TMP8);
+
+		vis_padd16(TMP10, TMP14, TMP10);
+		vis_pack16(TMP8,  DST_0);
+
+		vis_pack16(TMP10, DST_1);
+		vis_st64(DST_0, dest[0]);
+		dest += stride;
+		vis_padd16(TMP18, TMP26, TMP18);
+
+		vis_padd16(TMP20, TMP28, TMP20);
+
+		vis_padd16(TMP18, TMP30, TMP18);
+
+		vis_padd16(TMP20, TMP32, TMP20);
+		vis_pack16(TMP18, DST_2);
+
+		vis_pack16(TMP20, DST_3);
+		vis_st64(DST_2, dest[0]);
+		dest += stride;
+	} while (--height);
+}
+
+static void MC_avg_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
+			      const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	unsigned long off = (unsigned long) ref & 0x7;
+	unsigned long off_plus_1 = off + 1;
+	int stride_8 = stride + 8;
+	int stride_16 = stride + 16;
+
+	vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
+
+	ref = vis_alignaddr(ref);
+
+	vis_ld64(ref[ 0], TMP0);
+	vis_fzero(ZERO);
+
+	vis_ld64(ref[ 8], TMP2);
+
+	vis_ld64(ref[16], TMP4);
+
+	vis_ld64(constants6[0], CONST_6);
+	vis_faligndata(TMP0, TMP2, REF_S0);
+
+	vis_ld64(constants256_1024[0], CONST_256);
+	vis_faligndata(TMP2, TMP4, REF_S4);
+
+	if (off != 0x7) {
+		vis_alignaddr_g0((void *)off_plus_1);
+		vis_faligndata(TMP0, TMP2, REF_S2);
+		vis_faligndata(TMP2, TMP4, REF_S6);
+	} else {
+		vis_src1(TMP2, REF_S2);
+		vis_src1(TMP4, REF_S6);
+	}
+
+	height >>= 1;
+	do {	/* 55 cycles */
+		vis_ld64_2(ref, stride, TMP0);
+		vis_mul8x16au(REF_S0, CONST_256, TMP12);
+		vis_pmerge(ZERO,      REF_S0_1,  TMP14);
+
+		vis_alignaddr_g0((void *)off);
+
+		vis_ld64_2(ref, stride_8, TMP2);
+		vis_mul8x16au(REF_S2, CONST_256, TMP16);
+		vis_pmerge(ZERO,      REF_S2_1,  TMP18);
+
+		vis_ld64_2(ref, stride_16, TMP4);
+		ref += stride;
+		vis_mul8x16au(REF_S4, CONST_256, TMP20);
+		vis_pmerge(ZERO,      REF_S4_1,  TMP22);
+
+		vis_ld64_2(ref, stride, TMP6);
+		vis_mul8x16au(REF_S6, CONST_256, TMP24);
+		vis_pmerge(ZERO,      REF_S6_1,  TMP26);
+
+		vis_ld64_2(ref, stride_8, TMP8);
+		vis_faligndata(TMP0, TMP2, REF_0);
+
+		vis_ld64_2(ref, stride_16, TMP10);
+		ref += stride;
+		vis_faligndata(TMP2, TMP4, REF_4);
+
+		vis_ld64(dest[0], DST_0);
+		vis_faligndata(TMP6, TMP8, REF_S0);
+
+		vis_ld64_2(dest, 8, DST_2);
+		vis_faligndata(TMP8, TMP10, REF_S4);
+
+		if (off != 0x7) {
+			vis_alignaddr_g0((void *)off_plus_1);
+			vis_faligndata(TMP0, TMP2, REF_2);
+			vis_faligndata(TMP2, TMP4, REF_6);
+			vis_faligndata(TMP6, TMP8, REF_S2);
+			vis_faligndata(TMP8, TMP10, REF_S6);
+		} else {
+			vis_src1(TMP2, REF_2);
+			vis_src1(TMP4, REF_6);
+			vis_src1(TMP8, REF_S2);
+			vis_src1(TMP10, REF_S6);
+		}
+
+		vis_mul8x16al(DST_0,   CONST_1024, TMP30);
+		vis_pmerge(ZERO, REF_0, TMP0);
+
+		vis_mul8x16al(DST_1,   CONST_1024, TMP32);
+		vis_pmerge(ZERO,      REF_0_1,  TMP2);
+
+		vis_mul8x16au(REF_2, CONST_256, TMP4);
+		vis_pmerge(ZERO,      REF_2_1,  TMP6);
+
+		vis_mul8x16al(DST_2,   CONST_1024, REF_0);
+		vis_padd16(TMP0, CONST_6, TMP0);
+
+		vis_mul8x16al(DST_3,   CONST_1024, REF_2);
+		vis_padd16(TMP2, CONST_6, TMP2);
+
+		vis_padd16(TMP0, TMP4, TMP0);
+		vis_mul8x16au(REF_4, CONST_256, TMP4);
+
+		vis_padd16(TMP2, TMP6, TMP2);
+		vis_mul8x16au(REF_4_1, CONST_256, TMP6);
+
+		vis_padd16(TMP12, TMP0, TMP12);
+		vis_mul8x16au(REF_6, CONST_256, TMP8);
+
+		vis_padd16(TMP14, TMP2, TMP14);
+		vis_mul8x16au(REF_6_1, CONST_256, TMP10);
+
+		vis_padd16(TMP12, TMP16, TMP12);
+		vis_mul8x16au(REF_S0, CONST_256, REF_4);
+
+		vis_padd16(TMP14, TMP18, TMP14);
+		vis_mul8x16au(REF_S0_1, CONST_256, REF_6);
+
+		vis_padd16(TMP12, TMP30, TMP12);
+
+		vis_padd16(TMP14, TMP32, TMP14);
+		vis_pack16(TMP12, DST_0);
+
+		vis_pack16(TMP14, DST_1);
+		vis_st64(DST_0, dest[0]);
+		vis_padd16(TMP4, CONST_6, TMP4);
+
+		vis_ld64_2(dest, stride, DST_0);
+		vis_padd16(TMP6, CONST_6, TMP6);
+		vis_mul8x16au(REF_S2, CONST_256, TMP12);
+
+		vis_padd16(TMP4, TMP8, TMP4);
+		vis_mul8x16au(REF_S2_1, CONST_256,  TMP14);
+
+		vis_padd16(TMP6, TMP10, TMP6);
+
+		vis_padd16(TMP20, TMP4, TMP20);
+
+		vis_padd16(TMP22, TMP6, TMP22);
+
+		vis_padd16(TMP20, TMP24, TMP20);
+
+		vis_padd16(TMP22, TMP26, TMP22);
+
+		vis_padd16(TMP20, REF_0, TMP20);
+		vis_mul8x16au(REF_S4, CONST_256, REF_0);
+
+		vis_padd16(TMP22, REF_2, TMP22);
+		vis_pack16(TMP20, DST_2);
+
+		vis_pack16(TMP22, DST_3);
+		vis_st64_2(DST_2, dest, 8);
+		dest += stride;
+
+		vis_ld64_2(dest, 8, DST_2);
+		vis_mul8x16al(DST_0,   CONST_1024, TMP30);
+		vis_pmerge(ZERO,      REF_S4_1,  REF_2);
+
+		vis_mul8x16al(DST_1,   CONST_1024, TMP32);
+		vis_padd16(REF_4, TMP0, TMP8);
+
+		vis_mul8x16au(REF_S6, CONST_256, REF_4);
+		vis_padd16(REF_6, TMP2, TMP10);
+
+		vis_mul8x16au(REF_S6_1, CONST_256, REF_6);
+		vis_padd16(TMP8, TMP12, TMP8);
+
+		vis_padd16(TMP10, TMP14, TMP10);
+
+		vis_padd16(TMP8, TMP30, TMP8);
+
+		vis_padd16(TMP10, TMP32, TMP10);
+		vis_pack16(TMP8, DST_0);
+
+		vis_pack16(TMP10, DST_1);
+		vis_st64(DST_0, dest[0]);
+
+		vis_padd16(REF_0, TMP4, REF_0);
+
+		vis_mul8x16al(DST_2,   CONST_1024, TMP30);
+		vis_padd16(REF_2, TMP6, REF_2);
+
+		vis_mul8x16al(DST_3,   CONST_1024, TMP32);
+		vis_padd16(REF_0, REF_4, REF_0);
+
+		vis_padd16(REF_2, REF_6, REF_2);
+
+		vis_padd16(REF_0, TMP30, REF_0);
+
+		/* stall */
+
+		vis_padd16(REF_2, TMP32, REF_2);
+		vis_pack16(REF_0, DST_2);
+
+		vis_pack16(REF_2, DST_3);
+		vis_st64_2(DST_2, dest, 8);
+		dest += stride;
+	} while (--height);
+}
+
+static void MC_avg_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
+			     const int stride, int height)
+{
+	uint8_t *ref = (uint8_t *) _ref;
+	unsigned long off = (unsigned long) ref & 0x7;
+	unsigned long off_plus_1 = off + 1;
+	int stride_8 = stride + 8;
+
+	vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
+
+	ref = vis_alignaddr(ref);
+
+	vis_ld64(ref[0], TMP0);
+	vis_fzero(ZERO);
+
+	vis_ld64_2(ref, 8, TMP2);
+
+	vis_ld64(constants6[0], CONST_6);
+
+	vis_ld64(constants256_1024[0], CONST_256);
+	vis_faligndata(TMP0, TMP2, REF_S0);
+
+	if (off != 0x7) {
+		vis_alignaddr_g0((void *)off_plus_1);
+		vis_faligndata(TMP0, TMP2, REF_S2);
+	} else {
+		vis_src1(TMP2, REF_S2);
+	}
+
+	height >>= 1;
+	do {	/* 31 cycles */
+		vis_ld64_2(ref, stride, TMP0);
+		vis_mul8x16au(REF_S0, CONST_256, TMP8);
+		vis_pmerge(ZERO,      REF_S0_1,  TMP10);
+
+		vis_ld64_2(ref, stride_8, TMP2);
+		ref += stride;
+		vis_mul8x16au(REF_S2, CONST_256, TMP12);
+		vis_pmerge(ZERO,      REF_S2_1,  TMP14);
+
+		vis_alignaddr_g0((void *)off);
+
+		vis_ld64_2(ref, stride, TMP4);
+		vis_faligndata(TMP0, TMP2, REF_S4);
+
+		vis_ld64_2(ref, stride_8, TMP6);
+		ref += stride;
+
+		vis_ld64(dest[0], DST_0);
+		vis_faligndata(TMP4, TMP6, REF_S0);
+
+		vis_ld64_2(dest, stride, DST_2);
+
+		if (off != 0x7) {
+			vis_alignaddr_g0((void *)off_plus_1);
+			vis_faligndata(TMP0, TMP2, REF_S6);
+			vis_faligndata(TMP4, TMP6, REF_S2);
+		} else {
+			vis_src1(TMP2, REF_S6);
+			vis_src1(TMP6, REF_S2);
+		}
+
+		vis_mul8x16al(DST_0,   CONST_1024, TMP30);
+		vis_pmerge(ZERO, REF_S4, TMP22);
+
+		vis_mul8x16al(DST_1,   CONST_1024, TMP32);
+		vis_pmerge(ZERO,      REF_S4_1,  TMP24);
+
+		vis_mul8x16au(REF_S6, CONST_256, TMP26);
+		vis_pmerge(ZERO,      REF_S6_1,  TMP28);
+
+		vis_mul8x16au(REF_S0, CONST_256, REF_S4);
+		vis_padd16(TMP22, CONST_6, TMP22);
+
+		vis_mul8x16au(REF_S0_1, CONST_256, REF_S6);
+		vis_padd16(TMP24, CONST_6, TMP24);
+
+		vis_mul8x16al(DST_2,   CONST_1024, REF_0);
+		vis_padd16(TMP22, TMP26, TMP22);
+
+		vis_mul8x16al(DST_3,   CONST_1024, REF_2);
+		vis_padd16(TMP24, TMP28, TMP24);
+
+		vis_mul8x16au(REF_S2, CONST_256, TMP26);
+		vis_padd16(TMP8, TMP22, TMP8);
+
+		vis_mul8x16au(REF_S2_1, CONST_256, TMP28);
+		vis_padd16(TMP10, TMP24, TMP10);
+
+		vis_padd16(TMP8, TMP12, TMP8);
+
+		vis_padd16(TMP10, TMP14, TMP10);
+
+		vis_padd16(TMP8, TMP30, TMP8);
+
+		vis_padd16(TMP10, TMP32, TMP10);
+		vis_pack16(TMP8, DST_0);
+
+		vis_pack16(TMP10, DST_1);
+		vis_st64(DST_0, dest[0]);
+		dest += stride;
+
+		vis_padd16(REF_S4, TMP22, TMP12);
+
+		vis_padd16(REF_S6, TMP24, TMP14);
+
+		vis_padd16(TMP12, TMP26, TMP12);
+
+		vis_padd16(TMP14, TMP28, TMP14);
+
+		vis_padd16(TMP12, REF_0, TMP12);
+
+		vis_padd16(TMP14, REF_2, TMP14);
+		vis_pack16(TMP12, DST_2);
+
+		vis_pack16(TMP14, DST_3);
+		vis_st64(DST_2, dest[0]);
+		dest += stride;
+	} while (--height);
+}
+
+MPEG2_MC_EXTERN(vis);
+
+#endif  /* !(ARCH_SPARC) */
diff --git a/src/video_dec/libmpeg2new/libmpeg2/mpeg2_internal.h b/src/video_dec/libmpeg2new/libmpeg2/mpeg2_internal.h
new file mode 100644
index 000000000..fec7d4744
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/mpeg2_internal.h
@@ -0,0 +1,302 @@
+/*
+ * mpeg2_internal.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#define STATE_INTERNAL_NORETURN ((mpeg2_state_t)-1)
+
+/* macroblock modes */
+#define MACROBLOCK_INTRA 1
+#define MACROBLOCK_PATTERN 2
+#define MACROBLOCK_MOTION_BACKWARD 4
+#define MACROBLOCK_MOTION_FORWARD 8
+#define MACROBLOCK_QUANT 16
+#define DCT_TYPE_INTERLACED 32
+/* motion_type */
+#define MOTION_TYPE_SHIFT 6
+#define MC_FIELD 1
+#define MC_FRAME 2
+#define MC_16X8 2
+#define MC_DMV 3
+
+/* picture structure */
+#define TOP_FIELD 1
+#define BOTTOM_FIELD 2
+#define FRAME_PICTURE 3
+
+/* picture coding type */
+#define I_TYPE 1
+#define P_TYPE 2
+#define B_TYPE 3
+#define D_TYPE 4
+
+typedef void mpeg2_mc_fct (uint8_t *, const uint8_t *, int, int);
+
+typedef struct {
+    uint8_t * ref[2][3];
+    uint8_t ** ref2[2];
+    int pmv[2][2];
+    int f_code[2];
+} motion_t;
+
+typedef void motion_parser_t (mpeg2_decoder_t * decoder,
+			      motion_t * motion,
+			      mpeg2_mc_fct * const * table);
+
+struct mpeg2_decoder_s {
+    /* first, state that carries information from one macroblock to the */
+    /* next inside a slice, and is never used outside of mpeg2_slice() */
+
+    /* bit parsing stuff */
+    uint32_t bitstream_buf;		/* current 32 bit working set */
+    int bitstream_bits;			/* used bits in working set */
+    const uint8_t * bitstream_ptr;	/* buffer with stream data */
+
+    uint8_t * dest[3];
+
+    int offset;
+    int stride;
+    int uv_stride;
+    int slice_stride;
+    int slice_uv_stride;
+    int stride_frame;
+    unsigned int limit_x;
+    unsigned int limit_y_16;
+    unsigned int limit_y_8;
+    unsigned int limit_y;
+
+    /* Motion vectors */
+    /* The f_ and b_ correspond to the forward and backward motion */
+    /* predictors */
+    motion_t b_motion;
+    motion_t f_motion;
+    motion_parser_t * motion_parser[5];
+
+    /* predictor for DC coefficients in intra blocks */
+    int16_t dc_dct_pred[3];
+
+    /* DCT coefficients */
+    int16_t DCTblock[64] ATTR_ALIGN(64);
+
+    uint8_t * picture_dest[3];
+    void (* convert) (void * convert_id, uint8_t * const * src,
+		      unsigned int v_offset);
+    void * convert_id;
+
+    int dmv_offset;
+    unsigned int v_offset;
+
+    /* now non-slice-specific information */
+
+    /* sequence header stuff */
+    uint16_t * quantizer_matrix[4];
+    uint16_t (* chroma_quantizer[2])[64];
+    uint16_t quantizer_prescale[4][32][64];
+
+    /* The width and height of the picture snapped to macroblock units */
+    int width;
+    int height;
+    int vertical_position_extension;
+    int chroma_format;
+
+    /* picture header stuff */
+
+    /* what type of picture this is (I, P, B, D) */
+    int coding_type;
+
+    /* picture coding extension stuff */
+
+    /* quantization factor for intra dc coefficients */
+    int intra_dc_precision;
+    /* top/bottom/both fields */
+    int picture_structure;
+    /* bool to indicate all predictions are frame based */
+    int frame_pred_frame_dct;
+    /* bool to indicate whether intra blocks have motion vectors */
+    /* (for concealment) */
+    int concealment_motion_vectors;
+    /* bool to use different vlc tables */
+    int intra_vlc_format;
+    /* used for DMV MC */
+    int top_field_first;
+
+    /* stuff derived from bitstream */
+
+    /* pointer to the zigzag scan we're supposed to be using */
+    const uint8_t * scan;
+
+    int second_field;
+
+    int mpeg1;
+};
+
+typedef struct {
+    mpeg2_fbuf_t fbuf;
+} fbuf_alloc_t;
+
+struct mpeg2dec_s {
+    mpeg2_decoder_t decoder;
+
+    mpeg2_info_t info;
+
+    uint32_t shift;
+    int is_display_initialized;
+    mpeg2_state_t (* action) (struct mpeg2dec_s * mpeg2dec);
+    mpeg2_state_t state;
+    uint32_t ext_state;
+
+    /* allocated in init - gcc has problems allocating such big structures */
+    uint8_t * chunk_buffer;
+    /* pointer to start of the current chunk */
+    uint8_t * chunk_start;
+    /* pointer to current position in chunk_buffer */
+    uint8_t * chunk_ptr;
+    /* last start code ? */
+    uint8_t code;
+
+    /* picture tags */
+    uint32_t tag_current, tag2_current, tag_previous, tag2_previous;
+    int num_tags;
+    int bytes_since_tag;
+
+    int first;
+    int alloc_index_user;
+    int alloc_index;
+    uint8_t first_decode_slice;
+    uint8_t nb_decode_slices;
+
+    unsigned int user_data_len;
+
+    mpeg2_sequence_t new_sequence;
+    mpeg2_sequence_t sequence;
+    mpeg2_gop_t new_gop;
+    mpeg2_gop_t gop;
+    mpeg2_picture_t new_picture;
+    mpeg2_picture_t pictures[4];
+    mpeg2_picture_t * picture;
+    /*const*/ mpeg2_fbuf_t * fbuf[3];	/* 0: current fbuf, 1-2: prediction fbufs */
+
+    fbuf_alloc_t fbuf_alloc[3];
+    int custom_fbuf;
+
+    uint8_t * yuv_buf[3][3];
+    int yuv_index;
+    mpeg2_convert_t * convert;
+    void * convert_arg;
+    unsigned int convert_id_size;
+    int convert_stride;
+    void (* convert_start) (void * id, const mpeg2_fbuf_t * fbuf,
+			    const mpeg2_picture_t * picture,
+			    const mpeg2_gop_t * gop);
+
+    uint8_t * buf_start;
+    uint8_t * buf_end;
+
+    int16_t display_offset_x, display_offset_y;
+
+    int copy_matrix;
+    int8_t q_scale_type, scaled[4];
+    uint8_t quantizer_matrix[4][64];
+    uint8_t new_quantizer_matrix[4][64];
+};
+
+typedef struct {
+#ifdef ARCH_PPC
+    uint8_t regv[12*16];
+#endif
+    int dummy;
+} cpu_state_t;
+
+/* cpu_accel.c */
+uint32_t mpeg2_detect_accel (uint32_t accel);
+
+/* cpu_state.c */
+void mpeg2_cpu_state_init (uint32_t accel);
+
+/* decode.c */
+mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec);
+mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec);
+
+/* header.c */
+void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec);
+void mpeg2_reset_info (mpeg2_info_t * info);
+int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec);
+int mpeg2_header_gop (mpeg2dec_t * mpeg2dec);
+mpeg2_state_t mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec);
+int mpeg2_header_picture (mpeg2dec_t * mpeg2dec);
+int mpeg2_header_extension (mpeg2dec_t * mpeg2dec);
+int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec);
+void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec);
+void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec);
+void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels);
+mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec);
+mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec);
+void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type);
+
+/* idct.c */
+void mpeg2_idct_init (uint32_t accel);
+
+/* idct_mmx.c */
+void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_mmxext (int last, int16_t * block,
+			    uint8_t * dest, int stride);
+void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_mmx (int last, int16_t * block,
+			 uint8_t * dest, int stride);
+void mpeg2_idct_mmx_init (void);
+
+/* idct_altivec.c */
+void mpeg2_idct_copy_altivec (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_altivec (int last, int16_t * block,
+			     uint8_t * dest, int stride);
+void mpeg2_idct_altivec_init (void);
+
+/* idct_alpha.c */
+void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_mvi (int last, int16_t * block,
+			 uint8_t * dest, int stride);
+void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_alpha (int last, int16_t * block,
+			   uint8_t * dest, int stride);
+void mpeg2_idct_alpha_init (void);
+
+/* motion_comp.c */
+void mpeg2_mc_init (uint32_t accel);
+
+typedef struct {
+    mpeg2_mc_fct * put [8];
+    mpeg2_mc_fct * avg [8];
+} mpeg2_mc_t;
+
+#define MPEG2_MC_EXTERN(x) mpeg2_mc_t mpeg2_mc_##x = {			  \
+    {MC_put_o_16_##x, MC_put_x_16_##x, MC_put_y_16_##x, MC_put_xy_16_##x, \
+     MC_put_o_8_##x,  MC_put_x_8_##x,  MC_put_y_8_##x,  MC_put_xy_8_##x}, \
+    {MC_avg_o_16_##x, MC_avg_x_16_##x, MC_avg_y_16_##x, MC_avg_xy_16_##x, \
+     MC_avg_o_8_##x,  MC_avg_x_8_##x,  MC_avg_y_8_##x,  MC_avg_xy_8_##x}  \
+};
+
+extern mpeg2_mc_t mpeg2_mc_c;
+extern mpeg2_mc_t mpeg2_mc_mmx;
+extern mpeg2_mc_t mpeg2_mc_mmxext;
+extern mpeg2_mc_t mpeg2_mc_3dnow;
+extern mpeg2_mc_t mpeg2_mc_altivec;
+extern mpeg2_mc_t mpeg2_mc_alpha;
+extern mpeg2_mc_t mpeg2_mc_vis;
diff --git a/src/video_dec/libmpeg2new/libmpeg2/rgb.c b/src/video_dec/libmpeg2new/libmpeg2/rgb.c
new file mode 100644
index 000000000..e4abcacc2
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/rgb.c
@@ -0,0 +1,598 @@
+/*
+ * rgb.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+#include <xine/attributes.h>
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2convert.h"
+#include "convert_internal.h"
+
+static int matrix_coefficients = 6;
+
+static const int Inverse_Table_6_9[8][4] = {
+    {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
+    {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
+    {104597, 132201, 25675, 53279}, /* unspecified */
+    {104597, 132201, 25675, 53279}, /* reserved */
+    {104448, 132798, 24759, 53109}, /* FCC */
+    {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
+    {104597, 132201, 25675, 53279}, /* SMPTE 170M */
+    {117579, 136230, 16907, 35559}  /* SMPTE 240M (1987) */
+};
+
+static const uint8_t dither[] ATTR_ALIGN(32) = {
+     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
+     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
+     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
+     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
+    15, 36,  7, 18, 21, 50, 13, 31, 17, 39,  9, 21, 22, 53, 15, 35,
+    15, 36,  7, 18, 21, 50, 13, 31, 17, 39,  9, 21, 22, 53, 15, 35,
+    15, 36,  7, 18, 21, 50, 13, 31, 17, 39,  9, 21, 22, 53, 15, 35,
+    15, 36,  7, 18, 21, 50, 13, 31, 17, 39,  9, 21, 22, 53, 15, 35,
+     3,  9, 27, 63,  1,  4, 25, 59,  5, 12, 28, 67,  3,  7, 26, 62,
+     3,  9, 27, 63,  1,  4, 25, 59,  5, 12, 28, 67,  3,  7, 26, 62,
+     3,  9, 27, 63,  1,  4, 25, 59,  5, 12, 28, 67,  3,  7, 26, 62,
+     3,  9, 27, 63,  1,  4, 25, 59,  5, 12, 28, 67,  3,  7, 26, 62,
+    19, 45, 11, 27, 17, 41,  9, 22, 21, 49, 13, 30, 19, 44, 11, 26,
+    19, 45, 11, 27, 17, 41,  9, 22, 21, 49, 13, 30, 19, 44, 11, 26,
+    19, 45, 11, 27, 17, 41,  9, 22, 21, 49, 13, 30, 19, 44, 11, 26,
+    19, 45, 11, 27, 17, 41,  9, 22, 21, 49, 13, 30, 19, 44, 11, 26,
+     0,  2, 24, 57,  6, 15, 30, 70,  0,  1, 23, 55,  6, 14, 29, 69,
+     0,  2, 24, 57,  6, 15, 30, 70,  0,  1, 23, 55,  6, 14, 29, 69,
+     0,  2, 24, 57,  6, 15, 30, 70,  0,  1, 23, 55,  6, 14, 29, 69,
+     0,  2, 24, 57,  6, 15, 30, 70,  0,  1, 23, 55,  6, 14, 29, 69,
+    16, 38,  8, 20, 22, 52, 14, 34, 16, 37,  8, 19, 21, 51, 14, 33,
+    16, 38,  8, 20, 22, 52, 14, 34, 16, 37,  8, 19, 21, 51, 14, 33,
+    16, 38,  8, 20, 22, 52, 14, 34, 16, 37,  8, 19, 21, 51, 14, 33,
+    16, 38,  8, 20, 22, 52, 14, 34, 16, 37,  8, 19, 21, 51, 14, 33,
+     4, 11, 28, 66,  2,  6, 26, 61,  4, 10, 27, 65,  2,  5, 25, 60,
+     4, 11, 28, 66,  2,  6, 26, 61,  4, 10, 27, 65,  2,  5, 25, 60,
+     4, 11, 28, 66,  2,  6, 26, 61,  4, 10, 27, 65,  2,  5, 25, 60,
+     4, 11, 28, 66,  2,  6, 26, 61,  4, 10, 27, 65,  2,  5, 25, 60,
+    20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23,
+    20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23,
+    20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23,
+    20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23,
+     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
+     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
+     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
+     0,  0, 23, 54,  5, 13, 29, 68,  1,  3, 24, 58,  7, 17, 30, 71,
+    15, 36,  7, 18, 21, 50, 13, 31, 17, 39,  9, 21, 22, 53, 15, 35,
+    15, 36,  7, 18, 21, 50, 13, 31, 17, 39,  9, 21, 22, 53, 15, 35
+};
+
+static const uint8_t dither_temporal[64] = {
+    0x00, 0x20, 0x21, 0x01, 0x40, 0x60, 0x61, 0x41,
+    0x42, 0x62, 0x63, 0x43, 0x02, 0x22, 0x23, 0x03,
+    0x80, 0xa0, 0xa1, 0x81, 0xc0, 0xe0, 0xe1, 0xc1,
+    0xc2, 0xe2, 0xe3, 0xc3, 0x82, 0xa2, 0xa3, 0x83,
+    0x84, 0xa4, 0xa5, 0x85, 0xc4, 0xe4, 0xe5, 0xc5,
+    0xc6, 0xe6, 0xe7, 0xc7, 0x86, 0xa6, 0xa7, 0x87,
+    0x04, 0x24, 0x25, 0x05, 0x44, 0x64, 0x65, 0x45,
+    0x46, 0x66, 0x67, 0x47, 0x06, 0x26, 0x27, 0x07
+};
+
+typedef struct {
+    convert_rgb_t base;
+    void * table_rV[256];
+    void * table_gU[256];
+    int table_gV[256];
+    void * table_bU[256];
+} convert_rgb_c_t;
+
+#define RGB(type,i)							\
+    U = pu[i];								\
+    V = pv[i];								\
+    r = (type *) id->table_rV[V];					\
+    g = (type *) (((uint8_t *)id->table_gU[U]) + id->table_gV[V]);	\
+    b = (type *) id->table_bU[U];
+
+#define DST(py,dst,i,j)			\
+    Y = py[i];				\
+    dst[i] = r[Y] + g[Y] + b[Y];
+
+#define DSTRGB(py,dst,i,j)					\
+    Y = py[i];							\
+    dst[3*i] = r[Y]; dst[3*i+1] = g[Y]; dst[3*i+2] = b[Y];
+
+#define DSTBGR(py,dst,i,j)					\
+    Y = py[i];							\
+    dst[3*i] = b[Y]; dst[3*i+1] = g[Y]; dst[3*i+2] = r[Y];
+
+#define DSTDITHER(py,dst,i,j)						  \
+    Y = py[i];								  \
+    dst[i] = r[Y+pd[2*i+96*j]] + g[Y-pd[2*i+96*j]] + b[Y+pd[2*i+1+96*j]];
+
+#define DO(x) x
+#define SKIP(x)
+
+#define DECLARE_420(func,type,num,DST,DITHER)				\
+static void func (void * _id, uint8_t * const * src,			\
+		  unsigned int v_offset)				\
+{									\
+    const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id;		\
+    type * dst_1;							\
+    const uint8_t * py_1, * pu, * pv;					\
+    int i;								\
+    DITHER(uint8_t dithpos = id->base.dither_offset;)			\
+									\
+    dst_1 = (type *)(id->base.rgb_ptr + id->base.rgb_slice * v_offset);	\
+    py_1 = src[0];	pu = src[1];	pv = src[2];			\
+									\
+    i = 8;								\
+    do {								\
+	const uint8_t * py_2;						\
+	int j, U, V, Y;							\
+	const type * r, * g, * b;					\
+	type * dst_2;							\
+	DITHER(const uint8_t * const pd = dither + 2 * dithpos;)	\
+									\
+	dst_2 = (type *)((char *)dst_1 + id->base.rgb_stride);		\
+	py_2 = py_1 + id->base.y_stride;				\
+	j = id->base.width;						\
+	do {								\
+	    RGB (type, 0)						\
+	    DST (py_1, dst_1, 0, 0)					\
+	    DST (py_1, dst_1, 1, 0)					\
+	    DST (py_2, dst_2, 0, 1)					\
+	    DST (py_2, dst_2, 1, 1)					\
+									\
+	    RGB (type, 1)						\
+	    DST (py_2, dst_2, 2, 1)					\
+	    DST (py_2, dst_2, 3, 1)					\
+	    DST (py_1, dst_1, 2, 0)					\
+	    DST (py_1, dst_1, 3, 0)					\
+									\
+	    RGB (type, 2)						\
+	    DST (py_1, dst_1, 4, 0)					\
+	    DST (py_1, dst_1, 5, 0)					\
+	    DST (py_2, dst_2, 4, 1)					\
+	    DST (py_2, dst_2, 5, 1)					\
+									\
+	    RGB (type, 3)						\
+	    DST (py_2, dst_2, 6, 1)					\
+	    DST (py_2, dst_2, 7, 1)					\
+	    DST (py_1, dst_1, 6, 0)					\
+	    DST (py_1, dst_1, 7, 0)					\
+									\
+	    pu += 4;							\
+	    pv += 4;							\
+	    py_1 += 8;							\
+	    py_2 += 8;							\
+	    dst_1 += 8 * num;						\
+	    dst_2 += 8 * num;						\
+	} while (--j);							\
+	if (--i == id->base.field) {					\
+	    dst_1 = (type *)(id->base.rgb_ptr +				\
+			     id->base.rgb_slice * (v_offset + 1));	\
+	    py_1 = src[0] + id->base.y_stride_frame;			\
+	    pu = src[1] + id->base.uv_stride_frame;			\
+	    pv = src[2] + id->base.uv_stride_frame;			\
+	} else {							\
+	    py_1 += id->base.y_increm;					\
+	    pu += id->base.uv_increm;					\
+	    pv += id->base.uv_increm;					\
+	    dst_1 = (type *)((char *)dst_1 + id->base.rgb_increm);	\
+	    DITHER(dithpos += id->base.dither_stride;)			\
+	}								\
+    } while (i);							\
+}
+
+DECLARE_420 (rgb_c_32_420, uint32_t, 1, DST, SKIP)
+DECLARE_420 (rgb_c_24_rgb_420, uint8_t, 3, DSTRGB, SKIP)
+DECLARE_420 (rgb_c_24_bgr_420, uint8_t, 3, DSTBGR, SKIP)
+DECLARE_420 (rgb_c_16_420, uint16_t, 1, DST, SKIP)
+DECLARE_420 (rgb_c_8_420, uint8_t, 1, DSTDITHER, DO)
+
+#define DECLARE_422(func,type,num,DST,DITHER)				\
+static void func (void * _id, uint8_t * const * src,			\
+		  unsigned int v_offset)				\
+{									\
+    const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id;		\
+    type * dst;								\
+    const uint8_t * py, * pu, * pv;					\
+    int i;								\
+    DITHER(uint8_t dithpos = id->base.dither_offset;)			\
+									\
+    dst = (type *)(id->base.rgb_ptr + id->base.rgb_stride * v_offset);	\
+    py = src[0];	pu = src[1];	pv = src[2];			\
+									\
+    i = 16;								\
+    do {								\
+	int j, U, V, Y;							\
+	const type * r, * g, * b;					\
+	DITHER(const uint8_t * const pd = dither + 2 * dithpos;)	\
+									\
+	j = id->base.width;						\
+	do {								\
+	    RGB (type, 0)						\
+	    DST (py, dst, 0, 0)						\
+	    DST (py, dst, 1, 0)						\
+									\
+	    RGB (type, 1)						\
+	    DST (py, dst, 2, 0)						\
+	    DST (py, dst, 3, 0)						\
+									\
+	    RGB (type, 2)						\
+	    DST (py, dst, 4, 0)						\
+	    DST (py, dst, 5, 0)						\
+									\
+	    RGB (type, 3)						\
+	    DST (py, dst, 6, 0)						\
+	    DST (py, dst, 7, 0)						\
+									\
+	    pu += 4;							\
+	    pv += 4;							\
+	    py += 8;							\
+	    dst += 8 * num;						\
+	} while (--j);							\
+	py += id->base.y_increm;					\
+	pu += id->base.uv_increm;					\
+	pv += id->base.uv_increm;					\
+	dst = (type *)((char *)dst + id->base.rgb_increm);		\
+	DITHER(dithpos += id->base.dither_stride;)			\
+    } while (--i);							\
+}
+
+DECLARE_422 (rgb_c_32_422, uint32_t, 1, DST, SKIP)
+DECLARE_422 (rgb_c_24_rgb_422, uint8_t, 3, DSTRGB, SKIP)
+DECLARE_422 (rgb_c_24_bgr_422, uint8_t, 3, DSTBGR, SKIP)
+DECLARE_422 (rgb_c_16_422, uint16_t, 1, DST, SKIP)
+DECLARE_422 (rgb_c_8_422, uint8_t, 1, DSTDITHER, DO)
+
+#define DECLARE_444(func,type,num,DST,DITHER)				\
+static void func (void * _id, uint8_t * const * src,			\
+		  unsigned int v_offset)				\
+{									\
+    const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id;		\
+    type * dst;								\
+    const uint8_t * py, * pu, * pv;					\
+    int i;								\
+    DITHER(uint8_t dithpos = id->base.dither_offset;)			\
+									\
+    dst = (type *)(id->base.rgb_ptr + id->base.rgb_stride * v_offset);	\
+    py = src[0];	pu = src[1];	pv = src[2];			\
+									\
+    i = 16;								\
+    do {								\
+	int j, U, V, Y;							\
+	const type * r, * g, * b;					\
+	DITHER(const uint8_t * const pd = dither + 2 * dithpos;)	\
+									\
+	j = id->base.width;						\
+	do {								\
+	    RGB (type, 0)						\
+	    DST (py, dst, 0, 0)						\
+	    RGB (type, 1)						\
+	    DST (py, dst, 1, 0)						\
+	    RGB (type, 2)						\
+	    DST (py, dst, 2, 0)						\
+	    RGB (type, 3)						\
+	    DST (py, dst, 3, 0)						\
+	    RGB (type, 4)						\
+	    DST (py, dst, 4, 0)						\
+	    RGB (type, 5)						\
+	    DST (py, dst, 5, 0)						\
+	    RGB (type, 6)						\
+	    DST (py, dst, 6, 0)						\
+	    RGB (type, 7)						\
+	    DST (py, dst, 7, 0)						\
+									\
+	    pu += 8;							\
+	    pv += 8;							\
+	    py += 8;							\
+	    dst += 8 * num;						\
+	} while (--j);							\
+	py += id->base.y_increm;				   	\
+	pu += id->base.y_increm;				   	\
+	pv += id->base.y_increm;				   	\
+	dst = (type *)((char *)dst + id->base.rgb_increm);		\
+	DITHER(dithpos += id->base.dither_stride;)			\
+    } while (--i);							\
+}
+
+DECLARE_444 (rgb_c_32_444, uint32_t, 1, DST, SKIP)
+DECLARE_444 (rgb_c_24_rgb_444, uint8_t, 3, DSTRGB, SKIP)
+DECLARE_444 (rgb_c_24_bgr_444, uint8_t, 3, DSTBGR, SKIP)
+DECLARE_444 (rgb_c_16_444, uint16_t, 1, DST, SKIP)
+DECLARE_444 (rgb_c_8_444, uint8_t, 1, DSTDITHER, DO)
+
+static void rgb_start (void * _id, const mpeg2_fbuf_t * fbuf,
+		       const mpeg2_picture_t * picture,
+		       const mpeg2_gop_t * gop)
+{
+    convert_rgb_t * id = (convert_rgb_t *) _id;
+    int uv_stride = id->uv_stride_frame;
+    id->y_stride = id->y_stride_frame;
+    id->rgb_ptr = fbuf->buf[0];
+    id->rgb_slice = id->rgb_stride = id->rgb_stride_frame;
+    id->dither_stride = 32;
+    id->dither_offset = dither_temporal[picture->temporal_reference & 63];
+    id->field = 0;
+    if ((picture->nb_fields == 1) ||
+	(id->chroma420 && !(picture->flags & PIC_FLAG_PROGRESSIVE_FRAME))) {
+	uv_stride <<= 1;
+	id->y_stride <<= 1;
+	id->rgb_stride <<= 1;
+	id->dither_stride <<= 1;
+	id->dither_offset += 16;
+	if (picture->nb_fields == 1) {
+	    id->rgb_slice <<= 1;
+	    if (!(picture->flags & PIC_FLAG_TOP_FIELD_FIRST)) {
+		id->rgb_ptr += id->rgb_stride_frame;
+		id->dither_offset += 32;
+	    }
+	} else
+	    id->field = 8 >> id->convert420;
+    }
+    id->y_increm = (id->y_stride << id->convert420) - id->y_stride_frame;
+    id->uv_increm = uv_stride - id->uv_stride_frame;
+    id->rgb_increm = (id->rgb_stride << id->convert420) - id->rgb_stride_min;
+    id->dither_stride <<= id->convert420;
+}
+
+static inline int div_round (int dividend, int divisor)
+{
+    if (dividend > 0)
+	return (dividend + (divisor>>1)) / divisor;
+    else
+	return -((-dividend + (divisor>>1)) / divisor);
+}
+
+static unsigned int rgb_c_init (convert_rgb_c_t * id,
+				mpeg2convert_rgb_order_t order,
+				unsigned int bpp)
+{
+    int i;
+    uint8_t table_Y[1024];
+    uint32_t * table_32 = 0;
+    uint16_t * table_16 = 0;
+    uint8_t * table_8 = 0;
+    uint8_t * table_332 = 0;
+    int entry_size = 0;
+    void * table_r = 0;
+    void * table_g = 0;
+    void * table_b = 0;
+
+    int crv = Inverse_Table_6_9[matrix_coefficients][0];
+    int cbu = Inverse_Table_6_9[matrix_coefficients][1];
+    int cgu = -Inverse_Table_6_9[matrix_coefficients][2];
+    int cgv = -Inverse_Table_6_9[matrix_coefficients][3];
+
+    for (i = 0; i < 1024; i++) {
+	int j;
+
+	j = (76309 * (i - 384 - 16) + 32768) >> 16;
+	table_Y[i] = (j < 0) ? 0 : ((j > 255) ? 255 : j);
+    }
+
+    switch (bpp) {
+    case 32:
+	if (!id)
+	    return (197 + 2*682 + 256 + 132) * sizeof (uint32_t);
+	table_32 = (uint32_t *) (id + 1);
+	entry_size = sizeof (uint32_t);
+	table_r = table_32 + 197;
+	table_b = table_32 + 197 + 685;
+	table_g = table_32 + 197 + 2*682;
+
+	for (i = -197; i < 256+197; i++)
+	    ((uint32_t *) table_r)[i] =
+		table_Y[i+384] << ((order == MPEG2CONVERT_RGB) ? 16 : 0);
+	for (i = -132; i < 256+132; i++)
+	    ((uint32_t *) table_g)[i] = table_Y[i+384] << 8;
+	for (i = -232; i < 256+232; i++)
+	    ((uint32_t *) table_b)[i] =
+		table_Y[i+384] << ((order == MPEG2CONVERT_RGB) ? 0 : 16);
+	break;
+
+    case 24:
+	if (!id)
+	    return (256 + 2*232) * sizeof (uint8_t);
+	table_8 = (uint8_t *) (id + 1);
+	entry_size = sizeof (uint8_t);
+	table_r = table_g = table_b = table_8 + 232;
+
+	for (i = -232; i < 256+232; i++)
+	    ((uint8_t * )table_b)[i] = table_Y[i+384];
+	break;
+
+    case 15:
+    case 16:
+	if (!id)
+	    return (197 + 2*682 + 256 + 132) * sizeof (uint16_t);
+	table_16 = (uint16_t *) (id + 1);
+	entry_size = sizeof (uint16_t);
+	table_r = table_16 + 197;
+	table_b = table_16 + 197 + 685;
+	table_g = table_16 + 197 + 2*682;
+
+	for (i = -197; i < 256+197; i++) {
+	    int j = table_Y[i+384] >> 3;
+
+	    if (order == MPEG2CONVERT_RGB)
+		j <<= ((bpp==16) ? 11 : 10);
+
+	    ((uint16_t *)table_r)[i] = j;
+	}
+	for (i = -132; i < 256+132; i++) {
+	    int j = table_Y[i+384] >> ((bpp==16) ? 2 : 3);
+
+	    ((uint16_t *)table_g)[i] = j << 5;
+	}
+	for (i = -232; i < 256+232; i++) {
+	    int j = table_Y[i+384] >> 3;
+
+	    if (order == MPEG2CONVERT_BGR)
+		j <<= ((bpp==16) ? 11 : 10);
+
+	    ((uint16_t *)table_b)[i] = j;
+	}
+	break;
+
+    case 8:
+	if (!id)
+	    return (197 + 2*682 + 256 + 232 + 71) * sizeof (uint8_t);
+	table_332 = (uint8_t *) (id + 1);
+	entry_size = sizeof (uint8_t);
+	table_r = table_332 + 197;
+	table_g = table_332 + 197 + 682 + 30;
+	table_b = table_332 + 197 + 2*682;
+
+	for (i = -197; i < 256+197+30; i++)
+	    ((uint8_t *)table_r)[i] = ((table_Y[i+384] * 7 / 255) <<
+				       (order == MPEG2CONVERT_RGB ? 5 : 0));
+	for (i = -132; i < 256+132+30; i++)
+	    ((uint8_t *)table_g)[i-30] = ((table_Y[i+384] * 7 / 255) <<
+					  (order == MPEG2CONVERT_RGB ? 2 : 3));
+	for (i = -232; i < 256+232+71; i++)
+	    ((uint8_t *)table_b)[i] = ((table_Y[i+384] / 85) <<
+				       (order == MPEG2CONVERT_RGB ? 0 : 6));
+	break;
+    }
+
+    for (i = 0; i < 256; i++) {
+	id->table_rV[i] = (((uint8_t *)table_r) +
+			   entry_size * div_round (crv * (i-128), 76309));
+	id->table_gU[i] = (((uint8_t *)table_g) +
+			   entry_size * div_round (cgu * (i-128), 76309));
+	id->table_gV[i] = entry_size * div_round (cgv * (i-128), 76309);
+	id->table_bU[i] = (((uint8_t *)table_b) +
+			   entry_size * div_round (cbu * (i-128), 76309));
+    }
+
+    return 0;
+}
+
+static int rgb_internal (mpeg2convert_rgb_order_t order, unsigned int bpp,
+			 int stage, void * _id, const mpeg2_sequence_t * seq,
+			 int stride, uint32_t accel, void * arg,
+			 mpeg2_convert_init_t * result)
+{
+    convert_rgb_t * id = (convert_rgb_t *) _id;
+    mpeg2convert_copy_t * copy = (mpeg2convert_copy_t *) 0;
+    unsigned int id_size = sizeof (convert_rgb_t);
+    int chroma420 = (seq->chroma_height < seq->height);
+    int convert420 = 0;
+    int rgb_stride_min = ((bpp + 7) >> 3) * seq->width;
+
+#ifdef ARCH_X86
+    if (!copy && (accel & MPEG2_ACCEL_X86_MMXEXT)) {
+	convert420 = 0;
+	copy = mpeg2convert_rgb_mmxext (order, bpp, seq);
+    }
+    if (!copy && (accel & MPEG2_ACCEL_X86_MMX)) {
+	convert420 = 0;
+	copy = mpeg2convert_rgb_mmx (order, bpp, seq);
+    }
+#endif
+#ifdef ARCH_SPARC
+    if (!copy && (accel & MPEG2_ACCEL_SPARC_VIS)) {
+	convert420 = chroma420;
+	copy = mpeg2convert_rgb_vis (order, bpp, seq);
+    }
+#endif
+    if (!copy) {
+	int src, dest;
+	static void (* rgb_c[3][5]) (void *, uint8_t * const *,
+				     unsigned int) =
+	    {{rgb_c_24_bgr_420, rgb_c_8_420, rgb_c_16_420,
+	      rgb_c_24_rgb_420, rgb_c_32_420},
+	     {rgb_c_24_bgr_422, rgb_c_8_422, rgb_c_16_422,
+	      rgb_c_24_rgb_422, rgb_c_32_422},
+	     {rgb_c_24_bgr_444, rgb_c_8_444, rgb_c_16_444,
+	      rgb_c_24_rgb_444, rgb_c_32_444}};
+
+	convert420 = chroma420;
+	id_size = (sizeof (convert_rgb_c_t) +
+		   rgb_c_init ((convert_rgb_c_t *) id, order, bpp));
+	src = ((seq->chroma_width == seq->width) +
+	       (seq->chroma_height == seq->height));
+	dest = ((bpp == 24 && order == MPEG2CONVERT_BGR) ? 0 : (bpp + 7) >> 3);
+	copy = rgb_c[src][dest];
+    }
+
+    result->id_size = id_size;
+
+    if (stride < rgb_stride_min)
+	stride = rgb_stride_min;
+
+    if (stage == MPEG2_CONVERT_STRIDE)
+	return stride;
+    else if (stage == MPEG2_CONVERT_START) {
+	id->width = seq->width >> 3;
+	id->y_stride_frame = seq->width;
+	id->uv_stride_frame = seq->chroma_width;
+	id->rgb_stride_frame = stride;
+	id->rgb_stride_min = rgb_stride_min;
+	id->chroma420 = chroma420;
+	id->convert420 = convert420;
+	result->buf_size[0] = stride * seq->height;
+	result->buf_size[1] = result->buf_size[2] = 0;
+	result->start = rgb_start;
+	result->copy = copy;
+    }
+    return 0;
+}
+
+#define DECLARE(func,order,bpp)						\
+int func (int stage, void * id,						\
+	  const mpeg2_sequence_t * sequence, int stride,		\
+	  uint32_t accel, void * arg, mpeg2_convert_init_t * result)	\
+{									\
+    return rgb_internal (order, bpp, stage, id, sequence, stride,	\
+			 accel, arg, result);				\
+}
+
+DECLARE (mpeg2convert_rgb32, MPEG2CONVERT_RGB, 32)
+DECLARE (mpeg2convert_rgb24, MPEG2CONVERT_RGB, 24)
+DECLARE (mpeg2convert_rgb16, MPEG2CONVERT_RGB, 16)
+DECLARE (mpeg2convert_rgb15, MPEG2CONVERT_RGB, 15)
+DECLARE (mpeg2convert_rgb8, MPEG2CONVERT_RGB, 8)
+DECLARE (mpeg2convert_bgr32, MPEG2CONVERT_BGR, 32)
+DECLARE (mpeg2convert_bgr24, MPEG2CONVERT_BGR, 24)
+DECLARE (mpeg2convert_bgr16, MPEG2CONVERT_BGR, 16)
+DECLARE (mpeg2convert_bgr15, MPEG2CONVERT_BGR, 15)
+DECLARE (mpeg2convert_bgr8, MPEG2CONVERT_BGR, 8)
+
+mpeg2_convert_t * mpeg2convert_rgb (mpeg2convert_rgb_order_t order,
+				    unsigned int bpp)
+{
+    static mpeg2_convert_t * table[5][2] =
+	{{mpeg2convert_rgb15, mpeg2convert_bgr15},
+	 {mpeg2convert_rgb8, mpeg2convert_bgr8},
+	 {mpeg2convert_rgb16, mpeg2convert_bgr16},
+	 {mpeg2convert_rgb24, mpeg2convert_bgr24},
+	 {mpeg2convert_rgb32, mpeg2convert_bgr32}};
+
+    if (order == MPEG2CONVERT_RGB || order == MPEG2CONVERT_BGR) {
+	if (bpp == 15)
+	    return table[0][order == MPEG2CONVERT_BGR];
+	else if (bpp >= 8 && bpp <= 32 && (bpp & 7) == 0)
+	    return table[bpp >> 3][order == MPEG2CONVERT_BGR];
+    }
+    return (mpeg2_convert_t *) 0;
+}
diff --git a/src/video_dec/libmpeg2new/libmpeg2/rgb_mmx.c b/src/video_dec/libmpeg2new/libmpeg2/rgb_mmx.c
new file mode 100644
index 000000000..6ca7e65a8
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/rgb_mmx.c
@@ -0,0 +1,321 @@
+/*
+ * rgb_mmx.c
+ * Copyright (C) 2000-2003 Silicon Integrated System Corp.
+ * All Rights Reserved.
+ *
+ * Author: Olie Lho <ollie@sis.com.tw>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_X86
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2convert.h"
+#include "convert_internal.h"
+#include <xine/attributes.h>
+#include "mmx.h"
+
+#define CPU_MMXEXT 0
+#define CPU_MMX 1
+
+/* CPU_MMXEXT/CPU_MMX adaptation layer */
+
+#define movntq(src,dest)	\
+do {				\
+    if (cpu == CPU_MMXEXT)	\
+	movntq_r2m (src, dest);	\
+    else			\
+	movq_r2m (src, dest);	\
+} while (0)
+
+static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv)
+{
+    static mmx_t mmx_80w = {0x0080008000800080LL};
+    static mmx_t mmx_U_green = {0xf37df37df37df37dLL};
+    static mmx_t mmx_U_blue = {0x4093409340934093LL};
+    static mmx_t mmx_V_red = {0x3312331233123312LL};
+    static mmx_t mmx_V_green = {0xe5fce5fce5fce5fcLL};
+    static mmx_t mmx_10w = {0x1010101010101010LL};
+    static mmx_t mmx_00ffw = {0x00ff00ff00ff00ffLL};
+    static mmx_t mmx_Y_coeff = {0x253f253f253f253fLL};
+
+    movd_m2r (*pu, mm0);		/* mm0 = 00 00 00 00 u3 u2 u1 u0 */
+    movd_m2r (*pv, mm1);		/* mm1 = 00 00 00 00 v3 v2 v1 v0 */
+    movq_m2r (*py, mm6);		/* mm6 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+    pxor_r2r (mm4, mm4);		/* mm4 = 0 */
+    /* XXX might do cache preload for image here */
+
+    /*
+     * Do the multiply part of the conversion for even and odd pixels
+     * register usage:
+     * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels
+     * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd  pixels
+     * mm6 -> Y even, mm7 -> Y odd
+     */
+
+    punpcklbw_r2r (mm4, mm0);		/* mm0 = u3 u2 u1 u0 */
+    punpcklbw_r2r (mm4, mm1);		/* mm1 = v3 v2 v1 v0 */
+    psubsw_m2r (mmx_80w, mm0);		/* u -= 128 */
+    psubsw_m2r (mmx_80w, mm1);		/* v -= 128 */
+    psllw_i2r (3, mm0);			/* promote precision */
+    psllw_i2r (3, mm1);			/* promote precision */
+    movq_r2r (mm0, mm2);		/* mm2 = u3 u2 u1 u0 */
+    movq_r2r (mm1, mm3);		/* mm3 = v3 v2 v1 v0 */
+    pmulhw_m2r (mmx_U_green, mm2);	/* mm2 = u * u_green */
+    pmulhw_m2r (mmx_V_green, mm3);	/* mm3 = v * v_green */
+    pmulhw_m2r (mmx_U_blue, mm0);	/* mm0 = chroma_b */
+    pmulhw_m2r (mmx_V_red, mm1);	/* mm1 = chroma_r */
+    paddsw_r2r (mm3, mm2);		/* mm2 = chroma_g */
+
+    psubusb_m2r (mmx_10w, mm6);		/* Y -= 16 */
+    movq_r2r (mm6, mm7);		/* mm7 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+    pand_m2r (mmx_00ffw, mm6);		/* mm6 =    Y6    Y4    Y2    Y0 */
+    psrlw_i2r (8, mm7);			/* mm7 =    Y7    Y5    Y3    Y1 */
+    psllw_i2r (3, mm6);			/* promote precision */
+    psllw_i2r (3, mm7);			/* promote precision */
+    pmulhw_m2r (mmx_Y_coeff, mm6);	/* mm6 = luma_rgb even */
+    pmulhw_m2r (mmx_Y_coeff, mm7);	/* mm7 = luma_rgb odd */
+
+    /*
+     * Do the addition part of the conversion for even and odd pixels
+     * register usage:
+     * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels
+     * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd  pixels
+     * mm6 -> Y even, mm7 -> Y odd
+     */
+
+    movq_r2r (mm0, mm3);		/* mm3 = chroma_b */
+    movq_r2r (mm1, mm4);		/* mm4 = chroma_r */
+    movq_r2r (mm2, mm5);		/* mm5 = chroma_g */
+    paddsw_r2r (mm6, mm0);		/* mm0 = B6 B4 B2 B0 */
+    paddsw_r2r (mm7, mm3);		/* mm3 = B7 B5 B3 B1 */
+    paddsw_r2r (mm6, mm1);		/* mm1 = R6 R4 R2 R0 */
+    paddsw_r2r (mm7, mm4);		/* mm4 = R7 R5 R3 R1 */
+    paddsw_r2r (mm6, mm2);		/* mm2 = G6 G4 G2 G0 */
+    paddsw_r2r (mm7, mm5);		/* mm5 = G7 G5 G3 G1 */
+    packuswb_r2r (mm0, mm0);		/* saturate to 0-255 */
+    packuswb_r2r (mm1, mm1);		/* saturate to 0-255 */
+    packuswb_r2r (mm2, mm2);		/* saturate to 0-255 */
+    packuswb_r2r (mm3, mm3);		/* saturate to 0-255 */
+    packuswb_r2r (mm4, mm4);		/* saturate to 0-255 */
+    packuswb_r2r (mm5, mm5);		/* saturate to 0-255 */
+    punpcklbw_r2r (mm3, mm0);		/* mm0 = B7 B6 B5 B4 B3 B2 B1 B0 */
+    punpcklbw_r2r (mm4, mm1);		/* mm1 = R7 R6 R5 R4 R3 R2 R1 R0 */
+    punpcklbw_r2r (mm5, mm2);		/* mm2 = G7 G6 G5 G4 G3 G2 G1 G0 */
+}
+
+static inline void mmx_unpack_16rgb (uint8_t * image, const int cpu)
+{
+    static mmx_t mmx_bluemask = {0xf8f8f8f8f8f8f8f8LL};
+    static mmx_t mmx_greenmask = {0xfcfcfcfcfcfcfcfcLL};
+    static mmx_t mmx_redmask = {0xf8f8f8f8f8f8f8f8LL};
+
+    /*
+     * convert RGB plane to RGB 16 bits
+     * mm0 -> B, mm1 -> R, mm2 -> G
+     * mm4 -> GB, mm5 -> AR pixel 4-7
+     * mm6 -> GB, mm7 -> AR pixel 0-3
+     */
+
+    pand_m2r (mmx_bluemask, mm0);	/* mm0 = b7b6b5b4b3______ */
+    pand_m2r (mmx_greenmask, mm2);	/* mm2 = g7g6g5g4g3g2____ */
+    pand_m2r (mmx_redmask, mm1);	/* mm1 = r7r6r5r4r3______ */
+    psrlq_i2r (3, mm0);			/* mm0 = ______b7b6b5b4b3 */
+    pxor_r2r (mm4, mm4);		/* mm4 = 0 */
+    movq_r2r (mm0, mm5);		/* mm5 = ______b7b6b5b4b3 */
+    movq_r2r (mm2, mm7);		/* mm7 = g7g6g5g4g3g2____ */
+
+    punpcklbw_r2r (mm4, mm2);
+    punpcklbw_r2r (mm1, mm0);
+    psllq_i2r (3, mm2);
+    por_r2r (mm2, mm0);
+    movntq (mm0, *image);
+
+    punpckhbw_r2r (mm4, mm7);
+    punpckhbw_r2r (mm1, mm5);
+    psllq_i2r (3, mm7);
+    por_r2r (mm7, mm5);
+    movntq (mm5, *(image+8));
+}
+
+static inline void mmx_unpack_32rgb (uint8_t * image, const int cpu)
+{
+    /*
+     * convert RGB plane to RGB packed format,
+     * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0,
+     * mm4 -> GB, mm5 -> AR pixel 4-7,
+     * mm6 -> GB, mm7 -> AR pixel 0-3
+     */
+
+    pxor_r2r (mm3, mm3);
+    movq_r2r (mm0, mm6);
+    movq_r2r (mm1, mm7);
+    movq_r2r (mm0, mm4);
+    movq_r2r (mm1, mm5);
+    punpcklbw_r2r (mm2, mm6);
+    punpcklbw_r2r (mm3, mm7);
+    punpcklwd_r2r (mm7, mm6);
+    movntq (mm6, *image);
+    movq_r2r (mm0, mm6);
+    punpcklbw_r2r (mm2, mm6);
+    punpckhwd_r2r (mm7, mm6);
+    movntq (mm6, *(image+8));
+    punpckhbw_r2r (mm2, mm4);
+    punpckhbw_r2r (mm3, mm5);
+    punpcklwd_r2r (mm5, mm4);
+    movntq (mm4, *(image+16));
+    movq_r2r (mm0, mm4);
+    punpckhbw_r2r (mm2, mm4);
+    punpckhwd_r2r (mm5, mm4);
+    movntq (mm4, *(image+24));
+}
+
+static inline void rgb16 (void * const _id, uint8_t * const * src,
+			  const unsigned int v_offset, const int cpu)
+{
+    convert_rgb_t * const id = (convert_rgb_t *) _id;
+    uint8_t * dst;
+    uint8_t * py, * pu, * pv;
+    int i, j;
+
+    dst = id->rgb_ptr + id->rgb_slice * v_offset;
+    py = src[0];	pu = src[1];	pv = src[2];
+
+    i = 16;
+    do {
+	j = id->width;
+	do {
+	    mmx_yuv2rgb (py, pu, pv);
+	    mmx_unpack_16rgb (dst, cpu);
+	    py += 8;
+	    pu += 4;
+	    pv += 4;
+	    dst += 16;
+	} while (--j);
+
+	dst += id->rgb_increm;
+	py += id->y_increm;
+	if (--i == id->field) {
+	    dst = id->rgb_ptr + id->rgb_slice * (v_offset + 1);
+	    py = src[0] + id->y_stride_frame;
+	    pu = src[1] + id->uv_stride_frame;
+	    pv = src[2] + id->uv_stride_frame;
+	} else if (! (i & id->chroma420)) {
+	    pu += id->uv_increm;
+	    pv += id->uv_increm;
+	} else {
+	    pu -= id->uv_stride_frame;
+	    pv -= id->uv_stride_frame;
+	}
+    } while (i);
+}
+
+static inline void argb32 (void * const _id, uint8_t * const * src,
+			   const unsigned int v_offset, const int cpu)
+{
+    convert_rgb_t * const id = (convert_rgb_t *) _id;
+    uint8_t * dst;
+    uint8_t * py, * pu, * pv;
+    int i, j;
+
+    dst = id->rgb_ptr + id->rgb_slice * v_offset;
+    py = src[0];	pu = src[1];	pv = src[2];
+
+    i = 16;
+    do {
+	j = id->width;
+	do {
+	    mmx_yuv2rgb (py, pu, pv);
+	    mmx_unpack_32rgb (dst, cpu);
+	    py += 8;
+	    pu += 4;
+	    pv += 4;
+	    dst += 32;
+	} while (--j);
+
+	dst += id->rgb_increm;
+	py += id->y_increm;
+	if (--i == id->field) {
+	    dst = id->rgb_ptr + id->rgb_slice * (v_offset + 1);
+	    py = src[0] + id->y_stride_frame;
+	    pu = src[1] + id->uv_stride_frame;
+	    pv = src[2] + id->uv_stride_frame;
+	} else if (! (i & id->chroma420)) {
+	    pu += id->uv_increm;
+	    pv += id->uv_increm;
+	} else {
+	    pu -= id->uv_stride_frame;
+	    pv -= id->uv_stride_frame;
+	}
+    } while (i);
+}
+
+static void mmxext_rgb16 (void * id, uint8_t * const * src,
+			  unsigned int v_offset)
+{
+    rgb16 (id, src, v_offset, CPU_MMXEXT);
+}
+
+static void mmxext_argb32 (void * id, uint8_t * const * src,
+			   unsigned int v_offset)
+{
+    argb32 (id, src, v_offset, CPU_MMXEXT);
+}
+
+static void mmx_rgb16 (void * id, uint8_t * const * src, unsigned int v_offset)
+{
+    rgb16 (id, src, v_offset, CPU_MMX);
+}
+
+static void mmx_argb32 (void * id, uint8_t * const * src,
+			unsigned int v_offset)
+{
+    argb32 (id, src, v_offset, CPU_MMX);
+}
+
+mpeg2convert_copy_t * mpeg2convert_rgb_mmxext (int order, int bpp,
+					       const mpeg2_sequence_t * seq)
+{
+    if (order == MPEG2CONVERT_RGB && seq->chroma_width < seq->width) {
+	if (bpp == 16)
+	    return mmxext_rgb16;
+	else if (bpp == 32)
+	    return mmxext_argb32;
+    }
+    return NULL;	/* Fallback to C */
+}
+
+mpeg2convert_copy_t * mpeg2convert_rgb_mmx (int order, int bpp,
+					    const mpeg2_sequence_t * seq)
+{
+    if (order == MPEG2CONVERT_RGB && seq->chroma_width < seq->width) {
+	if (bpp == 16)
+	    return mmx_rgb16;
+	else if (bpp == 32)
+	    return mmx_argb32;
+    }
+    return NULL;	/* Fallback to C */
+}
+#endif
diff --git a/src/video_dec/libmpeg2new/libmpeg2/rgb_vis.c b/src/video_dec/libmpeg2new/libmpeg2/rgb_vis.c
new file mode 100644
index 000000000..cbd7c7072
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/rgb_vis.c
@@ -0,0 +1,384 @@
+/*
+ * rgb_vis.c
+ * Copyright (C) 2003 David S. Miller <davem@redhat.com>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_SPARC
+
+#include <stddef.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2convert.h"
+#include "convert_internal.h"
+#include <xine/attributes.h>
+#include "vis.h"
+
+/* Based partially upon the MMX yuv2rgb code, see there for credits.
+ *
+ * The difference here is that since we have enough registers we
+ * process both even and odd scanlines in one pass.
+ */
+
+static const uint16_t const_2048[] ATTR_ALIGN(8) = {2048, 2048, 2048, 2048};
+static const uint16_t const_1024[] ATTR_ALIGN(8) = {1024, 1024, 1024, 1024};
+static const uint16_t const_128[] ATTR_ALIGN(8) = {128, 128, 128, 128};
+static const uint8_t const_Ugreen[] ATTR_ALIGN(8) =
+	{0xf3, 0x00, 0xf3, 0x00, 0xf3, 0x00, 0xf3, 0x00};
+static const uint8_t const_Vgreen[] ATTR_ALIGN(8) =
+	{0xe6, 0x00, 0xe6, 0x00, 0xe6, 0x00, 0xe6, 0x00};
+static const uint8_t const_Ublue_Vred[] ATTR_ALIGN(8) =
+	{0x41, 0x41, 0x41, 0x41, 0x33, 0x33, 0x33, 0x33};
+static const uint8_t const_Ycoeff[] ATTR_ALIGN(4) = {0x25, 0x25, 0x25, 0x25};
+
+#define TMP0		0
+#define TMP1		1
+#define TMP2		2
+#define TMP3		3
+#define TMP4		4
+#define TMP5		5
+#define TMP6		6
+#define TMP7		7
+#define TMP8		8
+#define TMP9		9
+#define TMP10		10
+#define TMP11		11
+#define TMP12		12
+#define TMP13		13
+
+#define CONST_UBLUE	14
+#define CONST_VRED	15
+#define CONST_2048	16
+
+#define BLUE8_EVEN	18
+#define BLUE8_ODD	19
+#define RED8_EVEN	20
+#define RED8_ODD	21
+#define GREEN8_EVEN	22
+#define GREEN8_ODD	23
+
+#define BLUE8_2_EVEN	24
+#define BLUE8_2_ODD	25
+#define RED8_2_EVEN	26
+#define RED8_2_ODD	27
+#define GREEN8_2_EVEN	28
+#define GREEN8_2_ODD	29
+
+#define CONST_YCOEFF	30
+#define ZEROS		31
+
+#define PU_0		32
+#define PU_2		34
+#define PV_0		36
+#define PV_2		38
+#define PY_0		40
+#define PY_2		42
+#define PY_4		44
+#define PY_6		46
+
+#define CONST_128	56
+#define CONST_1024	58
+#define CONST_VGREEN	60
+#define CONST_UGREEN	62
+
+static inline void vis_init_consts(void)
+{
+	vis_set_gsr(7 << VIS_GSR_SCALEFACT_SHIFT);
+
+	vis_ld64(const_2048[0], CONST_2048);
+	vis_ld64(const_1024[0], CONST_1024);
+	vis_ld64(const_Ugreen[0], CONST_UGREEN);
+	vis_ld64(const_Vgreen[0], CONST_VGREEN);
+	vis_fzeros(ZEROS);
+	vis_ld64(const_Ublue_Vred[0], CONST_UBLUE);
+	vis_ld32(const_Ycoeff[0], CONST_YCOEFF);
+	vis_ld64(const_128[0],  CONST_128);
+}
+
+static inline void vis_yuv2rgb(uint8_t *py, uint8_t *pu, uint8_t *pv,
+			       int y_stride)
+{
+	vis_ld32(pu[0], TMP0);
+
+	vis_ld32(pv[0], TMP2);
+
+	vis_ld64(py[0], TMP4);
+	vis_mul8x16au(TMP0, CONST_2048, PU_0);
+
+	vis_ld64_2(py, y_stride, TMP8);
+	vis_mul8x16au(TMP2, CONST_2048, PV_0);
+
+	vis_pmerge(TMP4, TMP5, TMP6);
+
+	vis_pmerge(TMP6, TMP7, TMP4);
+
+	vis_pmerge(TMP8, TMP9, TMP10);
+
+	vis_pmerge(TMP10, TMP11, TMP8);
+	vis_mul8x16au(TMP4, CONST_2048, PY_0);
+
+	vis_psub16(PU_0, CONST_1024, PU_0);
+	vis_mul8x16au(TMP5, CONST_2048, PY_2);
+
+	vis_psub16(PV_0, CONST_1024, PV_0);
+	vis_mul8x16au(TMP8, CONST_2048, PY_4);
+
+	vis_psub16(PY_0, CONST_128, PY_0);
+	vis_mul8x16au(TMP9, CONST_2048, PY_6);
+
+	vis_psub16(PY_2, CONST_128, PY_2);
+	vis_mul8x16(CONST_YCOEFF, PY_0, PY_0);
+
+	vis_psub16(PY_4, CONST_128, PY_4);
+	vis_mul8x16(CONST_YCOEFF, PY_2, PY_2);
+
+	vis_psub16(PY_6, CONST_128, PY_6);
+	vis_mul8x16(CONST_YCOEFF, PY_4, PY_4);
+
+	vis_mul8x16(CONST_YCOEFF, PY_6, PY_6);
+
+	vis_mul8sux16(CONST_UGREEN, PU_0, TMP0);
+
+	vis_mul8sux16(CONST_VGREEN, PV_0, TMP2);
+
+	vis_mul8x16(CONST_UBLUE, PU_0, TMP4);
+
+	vis_mul8x16(CONST_VRED, PV_0, TMP6);
+	vis_padd16(TMP0, TMP2, TMP10);
+
+	vis_padd16(PY_0, TMP4, TMP0);
+
+	vis_padd16(PY_2, TMP4, TMP2);
+	vis_pack16(TMP0, BLUE8_EVEN);
+
+	vis_padd16(PY_4, TMP4, TMP0);
+	vis_pack16(TMP2, BLUE8_ODD);
+
+	vis_padd16(PY_6, TMP4, TMP2);
+	vis_pack16(TMP0, BLUE8_2_EVEN);
+
+	vis_padd16(PY_0, TMP6, TMP0);
+	vis_pack16(TMP2, BLUE8_2_ODD);
+
+	vis_padd16(PY_2, TMP6, TMP2);
+	vis_pack16(TMP0, RED8_EVEN);
+
+	vis_padd16(PY_4, TMP6, TMP0);
+	vis_pack16(TMP2, RED8_ODD);
+
+	vis_padd16(PY_6, TMP6, TMP2);
+	vis_pack16(TMP0, RED8_2_EVEN);
+
+	vis_padd16(PY_0, TMP10, TMP0);
+	vis_pack16(TMP2, RED8_2_ODD);
+
+	vis_padd16(PY_2, TMP10, TMP2);
+	vis_pack16(TMP0, GREEN8_EVEN);
+
+	vis_padd16(PY_4, TMP10, TMP0);
+	vis_pack16(TMP2, GREEN8_ODD);
+
+	vis_padd16(PY_6, TMP10, TMP2);
+	vis_pack16(TMP0, GREEN8_2_EVEN);
+
+	vis_pack16(TMP2, GREEN8_2_ODD);
+	vis_pmerge(BLUE8_EVEN, BLUE8_ODD, BLUE8_EVEN);
+
+	vis_pmerge(BLUE8_2_EVEN, BLUE8_2_ODD, BLUE8_2_EVEN);
+
+	vis_pmerge(RED8_EVEN, RED8_ODD, RED8_EVEN);
+
+	vis_pmerge(RED8_2_EVEN, RED8_2_ODD, RED8_2_EVEN);
+
+	vis_pmerge(GREEN8_EVEN, GREEN8_ODD, GREEN8_EVEN);
+
+	vis_pmerge(GREEN8_2_EVEN, GREEN8_2_ODD, GREEN8_2_EVEN);
+}
+
+static inline void vis_unpack_32rgb(uint8_t *image, int stride)
+{
+	vis_pmerge(ZEROS, GREEN8_EVEN, TMP0);
+	vis_pmerge(RED8_EVEN, BLUE8_EVEN, TMP2);
+
+	vis_pmerge(TMP0, TMP2, TMP4);
+	vis_st64(TMP4, image[0]);
+
+	vis_pmerge(TMP1, TMP3, TMP6);
+	vis_st64_2(TMP6, image, 8);
+
+	vis_pmerge(ZEROS, GREEN8_ODD, TMP8);
+	vis_pmerge(RED8_ODD, BLUE8_ODD, TMP10);
+
+	vis_pmerge(TMP8, TMP10, TMP0);
+	vis_st64_2(TMP0, image, 16);
+
+	vis_pmerge(TMP9, TMP11, TMP2);
+	vis_st64_2(TMP2, image, 24);
+
+	image += stride;
+
+	vis_pmerge(ZEROS, GREEN8_2_EVEN, TMP0);
+	vis_pmerge(RED8_2_EVEN, BLUE8_2_EVEN, TMP2);
+
+	vis_pmerge(TMP0, TMP2, TMP4);
+	vis_st64(TMP4, image[0]);
+
+	vis_pmerge(TMP1, TMP3, TMP6);
+	vis_st64_2(TMP6, image, 8);
+
+	vis_pmerge(ZEROS, GREEN8_2_ODD, TMP8);
+	vis_pmerge(RED8_2_ODD, BLUE8_2_ODD, TMP10);
+
+	vis_pmerge(TMP8, TMP10, TMP0);
+	vis_st64_2(TMP0, image, 16);
+
+	vis_pmerge(TMP9, TMP11, TMP2);
+	vis_st64_2(TMP2, image, 24);
+}
+
+static inline void vis_unpack_32bgr(uint8_t *image, int stride)
+{
+	vis_pmerge(ZEROS, GREEN8_EVEN, TMP0);
+	vis_pmerge(BLUE8_EVEN, RED8_EVEN, TMP2);
+
+	vis_pmerge(TMP0, TMP2, TMP4);
+	vis_st64(TMP4, image[0]);
+
+	vis_pmerge(TMP1, TMP3, TMP6);
+	vis_st64_2(TMP6, image, 8);
+
+	vis_pmerge(ZEROS, GREEN8_ODD, TMP8);
+	vis_pmerge(BLUE8_ODD, RED8_ODD, TMP10);
+
+	vis_pmerge(TMP8, TMP10, TMP0);
+	vis_st64_2(TMP0, image, 16);
+
+	vis_pmerge(TMP9, TMP11, TMP2);
+	vis_st64_2(TMP2, image, 24);
+
+	image += stride;
+
+	vis_pmerge(ZEROS, GREEN8_2_EVEN, TMP0);
+	vis_pmerge(BLUE8_2_EVEN, RED8_2_EVEN, TMP2);
+
+	vis_pmerge(TMP0, TMP2, TMP4);
+	vis_st64(TMP4, image[0]);
+
+	vis_pmerge(TMP1, TMP3, TMP6);
+	vis_st64_2(TMP6, image, 8);
+
+	vis_pmerge(ZEROS, GREEN8_2_ODD, TMP8);
+	vis_pmerge(BLUE8_2_ODD, RED8_2_ODD, TMP10);
+
+	vis_pmerge(TMP8, TMP10, TMP0);
+	vis_st64_2(TMP0, image, 16);
+
+	vis_pmerge(TMP9, TMP11, TMP2);
+	vis_st64_2(TMP2, image, 24);
+}
+
+static inline void vis_yuv420_argb32(uint8_t *image,
+				     uint8_t *py, uint8_t *pu, uint8_t *pv,
+				     int width, int height, int rgb_stride,
+				     int y_stride, int uv_stride)
+{
+	height >>= 1;
+	uv_stride -= width >> 1;
+	do {
+		int i = width >> 3;
+		do {
+			vis_yuv2rgb(py, pu, pv, y_stride);
+			vis_unpack_32rgb(image, rgb_stride);
+			py += 8;
+			pu += 4;
+			pv += 4;
+			image += 32;
+		} while (--i);
+
+		py    += (y_stride << 1) - width;
+		image += (rgb_stride << 1) - 4 * width;
+		pu    += uv_stride;
+		pv    += uv_stride;
+	} while (--height);
+}
+
+static inline void vis_yuv420_abgr32(uint8_t *image,
+				     uint8_t *py, uint8_t *pu, uint8_t *pv,
+				     int width, int height, int rgb_stride,
+				     int y_stride, int uv_stride)
+{
+	height >>= 1;
+	uv_stride -= width >> 1;
+	do {
+		int i = width >> 3;
+		do {
+			vis_yuv2rgb(py, pu, pv, y_stride);
+			vis_unpack_32bgr(image, rgb_stride);
+			py += 8;
+			pu += 4;
+			pv += 4;
+			image += 32;
+		} while (--i);
+
+		py    += (y_stride << 1) - width;
+		image += (rgb_stride << 1) - 4 * width;
+		pu    += uv_stride;
+		pv    += uv_stride;
+	} while (--height);
+}
+
+static void vis_argb32(void *_id, uint8_t * const *src,
+		       unsigned int v_offset)
+{
+	convert_rgb_t *id = (convert_rgb_t *) _id;
+
+	vis_init_consts();
+	vis_yuv420_argb32(id->rgb_ptr + id->rgb_stride * v_offset,
+			  src[0], src[1], src[2], id->width, 16,
+			  id->rgb_stride, id->y_stride, id->y_stride >> 1);
+}
+
+static void vis_abgr32(void *_id, uint8_t * const *src,
+		       unsigned int v_offset)
+{
+	convert_rgb_t *id = (convert_rgb_t *) _id;
+
+	vis_init_consts();
+	vis_yuv420_abgr32(id->rgb_ptr + id->rgb_stride * v_offset,
+			  src[0], src[1], src[2], id->width, 16,
+			  id->rgb_stride, id->y_stride, id->y_stride >> 1);
+}
+
+mpeg2convert_copy_t *mpeg2convert_rgb_vis(int order, int bpp,
+					  const mpeg2_sequence_t * seq)
+{
+	if (bpp == 32 && seq->chroma_height < seq->height) {
+		if (order == MPEG2CONVERT_RGB)
+			return vis_argb32;
+		if (order == MPEG2CONVERT_BGR)
+			return vis_abgr32;
+	}
+
+	return NULL;	/* Fallback to C */
+}
+
+#endif /* ARCH_SPARC */
diff --git a/src/video_dec/libmpeg2new/libmpeg2/slice.c b/src/video_dec/libmpeg2new/libmpeg2/slice.c
new file mode 100644
index 000000000..ce4508639
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/slice.c
@@ -0,0 +1,2058 @@
+/*
+ * slice.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 2003      Peter Gubanov <peter@elecard.net.ru>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <inttypes.h>
+
+#include "../include/mpeg2.h"
+#include "../include/attributes.h"
+#include "mpeg2_internal.h"
+
+extern mpeg2_mc_t mpeg2_mc;
+extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
+extern void (* mpeg2_idct_add) (int last, int16_t * block,
+				uint8_t * dest, int stride);
+extern void (* mpeg2_cpu_state_save) (cpu_state_t * state);
+extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state);
+
+#include "vlc.h"
+
+static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    int macroblock_modes;
+    const MBtab * tab;
+
+    switch (decoder->coding_type) {
+    case I_TYPE:
+
+	tab = MB_I + UBITS (bit_buf, 1);
+	DUMPBITS (bit_buf, bits, tab->len);
+	macroblock_modes = tab->modes;
+
+	if ((! (decoder->frame_pred_frame_dct)) &&
+	    (decoder->picture_structure == FRAME_PICTURE)) {
+	    macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
+	    DUMPBITS (bit_buf, bits, 1);
+	}
+
+	return macroblock_modes;
+
+    case P_TYPE:
+
+	tab = MB_P + UBITS (bit_buf, 5);
+	DUMPBITS (bit_buf, bits, tab->len);
+	macroblock_modes = tab->modes;
+
+	if (decoder->picture_structure != FRAME_PICTURE) {
+	    if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
+		macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
+		DUMPBITS (bit_buf, bits, 2);
+	    }
+	    return macroblock_modes | MACROBLOCK_MOTION_FORWARD;
+	} else if (decoder->frame_pred_frame_dct) {
+	    if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
+		macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT;
+	    return macroblock_modes | MACROBLOCK_MOTION_FORWARD;
+	} else {
+	    if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
+		macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
+		DUMPBITS (bit_buf, bits, 2);
+	    }
+	    if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
+		macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
+		DUMPBITS (bit_buf, bits, 1);
+	    }
+	    return macroblock_modes | MACROBLOCK_MOTION_FORWARD;
+	}
+
+    case B_TYPE:
+
+	tab = MB_B + UBITS (bit_buf, 6);
+	DUMPBITS (bit_buf, bits, tab->len);
+	macroblock_modes = tab->modes;
+
+	if (decoder->picture_structure != FRAME_PICTURE) {
+	    if (! (macroblock_modes & MACROBLOCK_INTRA)) {
+		macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
+		DUMPBITS (bit_buf, bits, 2);
+	    }
+	    return macroblock_modes;
+	} else if (decoder->frame_pred_frame_dct) {
+	    /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
+	    macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT;
+	    return macroblock_modes;
+	} else {
+	    if (macroblock_modes & MACROBLOCK_INTRA)
+		goto intra;
+	    macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
+	    DUMPBITS (bit_buf, bits, 2);
+	    if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
+	    intra:
+		macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
+		DUMPBITS (bit_buf, bits, 1);
+	    }
+	    return macroblock_modes;
+	}
+
+    case D_TYPE:
+
+	DUMPBITS (bit_buf, bits, 1);
+	return MACROBLOCK_INTRA;
+
+    default:
+	return 0;
+    }
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline void get_quantizer_scale (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+    int quantizer_scale_code;
+
+    quantizer_scale_code = UBITS (bit_buf, 5);
+    DUMPBITS (bit_buf, bits, 5);
+
+    decoder->quantizer_matrix[0] =
+	decoder->quantizer_prescale[0][quantizer_scale_code];
+    decoder->quantizer_matrix[1] =
+	decoder->quantizer_prescale[1][quantizer_scale_code];
+    decoder->quantizer_matrix[2] =
+	decoder->chroma_quantizer[0][quantizer_scale_code];
+    decoder->quantizer_matrix[3] =
+	decoder->chroma_quantizer[1][quantizer_scale_code];
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int get_motion_delta (mpeg2_decoder_t * const decoder,
+				    const int f_code)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+    int delta;
+    int sign;
+    const MVtab * tab;
+
+    if (bit_buf & 0x80000000) {
+	DUMPBITS (bit_buf, bits, 1);
+	return 0;
+    } else if (bit_buf >= 0x0c000000) {
+
+	tab = MV_4 + UBITS (bit_buf, 4);
+	delta = (tab->delta << f_code) + 1;
+	bits += tab->len + f_code + 1;
+	bit_buf <<= tab->len;
+
+	sign = SBITS (bit_buf, 1);
+	bit_buf <<= 1;
+
+	if (f_code)
+	    delta += UBITS (bit_buf, f_code);
+	bit_buf <<= f_code;
+
+	return (delta ^ sign) - sign;
+
+    } else {
+
+	tab = MV_10 + UBITS (bit_buf, 10);
+	delta = (tab->delta << f_code) + 1;
+	bits += tab->len + 1;
+	bit_buf <<= tab->len;
+
+	sign = SBITS (bit_buf, 1);
+	bit_buf <<= 1;
+
+	if (f_code) {
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+	    delta += UBITS (bit_buf, f_code);
+	    DUMPBITS (bit_buf, bits, f_code);
+	}
+
+	return (delta ^ sign) - sign;
+
+    }
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int bound_motion_vector (const int vector, const int f_code)
+{
+    return ((int32_t)vector << (27 - f_code)) >> (27 - f_code);
+}
+
+static inline int get_dmv (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+    const DMVtab * tab;
+
+    tab = DMV_2 + UBITS (bit_buf, 2);
+    DUMPBITS (bit_buf, bits, tab->len);
+    return tab->dmv;
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int get_coded_block_pattern (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+    const CBPtab * tab;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+
+    if (bit_buf >= 0x20000000) {
+
+	tab = CBP_7 + (UBITS (bit_buf, 7) - 16);
+	DUMPBITS (bit_buf, bits, tab->len);
+	return tab->cbp;
+
+    } else {
+
+	tab = CBP_9 + UBITS (bit_buf, 9);
+	DUMPBITS (bit_buf, bits, tab->len);
+	return tab->cbp;
+    }
+
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int get_luma_dc_dct_diff (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    const DCtab * tab;
+    int size;
+    int dc_diff;
+
+    if (bit_buf < 0xf8000000) {
+	tab = DC_lum_5 + UBITS (bit_buf, 5);
+	size = tab->size;
+	if (size) {
+	    bits += tab->len + size;
+	    bit_buf <<= tab->len;
+	    dc_diff =
+		UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
+	    bit_buf <<= size;
+	    return dc_diff << decoder->intra_dc_precision;
+	} else {
+	    DUMPBITS (bit_buf, bits, 3);
+	    return 0;
+	}
+    } else {
+	tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0);
+	size = tab->size;
+	DUMPBITS (bit_buf, bits, tab->len);
+	NEEDBITS (bit_buf, bits, bit_ptr);
+	dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
+	DUMPBITS (bit_buf, bits, size);
+	return dc_diff << decoder->intra_dc_precision;
+    }
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int get_chroma_dc_dct_diff (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    const DCtab * tab;
+    int size;
+    int dc_diff;
+
+    if (bit_buf < 0xf8000000) {
+	tab = DC_chrom_5 + UBITS (bit_buf, 5);
+	size = tab->size;
+	if (size) {
+	    bits += tab->len + size;
+	    bit_buf <<= tab->len;
+	    dc_diff =
+		UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
+	    bit_buf <<= size;
+	    return dc_diff << decoder->intra_dc_precision;
+	} else {
+	    DUMPBITS (bit_buf, bits, 2);
+	    return 0;
+	}
+    } else {
+	tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0);
+	size = tab->size;
+	DUMPBITS (bit_buf, bits, tab->len + 1);
+	NEEDBITS (bit_buf, bits, bit_ptr);
+	dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
+	DUMPBITS (bit_buf, bits, size);
+	return dc_diff << decoder->intra_dc_precision;
+    }
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+#define SATURATE(val)				\
+do {						\
+    val <<= 4;					\
+    if (unlikely (val != (int16_t) val))	\
+	val = (SBITS (val, 1) ^ 2047) << 4;	\
+} while (0)
+
+static void get_intra_block_B14 (mpeg2_decoder_t * const decoder,
+				 const uint16_t * const quant_matrix)
+{
+    int i;
+    int j;
+    int val;
+    const uint8_t * const scan = decoder->scan;
+    int mismatch;
+    const DCTtab * tab;
+    uint32_t bit_buf;
+    int bits;
+    const uint8_t * bit_ptr;
+    int16_t * const dest = decoder->DCTblock;
+
+    i = 0;
+    mismatch = ~dest[0];
+
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+
+    while (1) {
+	if (bit_buf >= 0x28000000) {
+
+	    tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
+
+	    i += tab->run;
+	    if (i >= 64)
+		break;	/* end of block */
+
+	normal_code:
+	    j = scan[i];
+	    bit_buf <<= tab->len;
+	    bits += tab->len + 1;
+	    val = (tab->level * quant_matrix[j]) >> 4;
+
+	    /* if (bitstream_get (1)) val = -val; */
+	    val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
+
+	    SATURATE (val);
+	    dest[j] = val;
+	    mismatch ^= val;
+
+	    bit_buf <<= 1;
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	} else if (bit_buf >= 0x04000000) {
+
+	    tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
+
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+
+	    /* escape code */
+
+	    i += UBITS (bit_buf << 6, 6) - 64;
+	    if (i >= 64)
+		break;	/* illegal, check needed to avoid buffer overflow */
+
+	    j = scan[i];
+
+	    DUMPBITS (bit_buf, bits, 12);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+	    val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16;
+
+	    SATURATE (val);
+	    dest[j] = val;
+	    mismatch ^= val;
+
+	    DUMPBITS (bit_buf, bits, 12);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	} else if (bit_buf >= 0x02000000) {
+	    tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00800000) {
+	    tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00200000) {
+	    tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else {
+	    tab = DCT_16 + UBITS (bit_buf, 16);
+	    bit_buf <<= 16;
+	    GETWORD (bit_buf, bits + 16, bit_ptr);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	}
+	break;	/* illegal, check needed to avoid buffer overflow */
+    }
+    dest[63] ^= mismatch & 16;
+    DUMPBITS (bit_buf, bits, tab->len);	/* dump end of block code */
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
+}
+
+static void get_intra_block_B15 (mpeg2_decoder_t * const decoder,
+				 const uint16_t * const quant_matrix)
+{
+    int i;
+    int j;
+    int val;
+    const uint8_t * const scan = decoder->scan;
+    int mismatch;
+    const DCTtab * tab;
+    uint32_t bit_buf;
+    int bits;
+    const uint8_t * bit_ptr;
+    int16_t * const dest = decoder->DCTblock;
+
+    i = 0;
+    mismatch = ~dest[0];
+
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+
+    while (1) {
+	if (bit_buf >= 0x04000000) {
+
+	    tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4);
+
+	    i += tab->run;
+	    if (i < 64) {
+
+	    normal_code:
+		j = scan[i];
+		bit_buf <<= tab->len;
+		bits += tab->len + 1;
+		val = (tab->level * quant_matrix[j]) >> 4;
+
+		/* if (bitstream_get (1)) val = -val; */
+		val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
+
+		SATURATE (val);
+		dest[j] = val;
+		mismatch ^= val;
+
+		bit_buf <<= 1;
+		NEEDBITS (bit_buf, bits, bit_ptr);
+
+		continue;
+
+	    } else {
+
+		/* end of block. I commented out this code because if we */
+		/* dont exit here we will still exit at the later test :) */
+
+		/* if (i >= 128) break;	*/	/* end of block */
+
+		/* escape code */
+
+		i += UBITS (bit_buf << 6, 6) - 64;
+		if (i >= 64)
+		    break;	/* illegal, check against buffer overflow */
+
+		j = scan[i];
+
+		DUMPBITS (bit_buf, bits, 12);
+		NEEDBITS (bit_buf, bits, bit_ptr);
+		val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16;
+
+		SATURATE (val);
+		dest[j] = val;
+		mismatch ^= val;
+
+		DUMPBITS (bit_buf, bits, 12);
+		NEEDBITS (bit_buf, bits, bit_ptr);
+
+		continue;
+
+	    }
+	} else if (bit_buf >= 0x02000000) {
+	    tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00800000) {
+	    tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00200000) {
+	    tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else {
+	    tab = DCT_16 + UBITS (bit_buf, 16);
+	    bit_buf <<= 16;
+	    GETWORD (bit_buf, bits + 16, bit_ptr);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	}
+	break;	/* illegal, check needed to avoid buffer overflow */
+    }
+    dest[63] ^= mismatch & 16;
+    DUMPBITS (bit_buf, bits, tab->len);	/* dump end of block code */
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
+}
+
+static int get_non_intra_block (mpeg2_decoder_t * const decoder,
+				const uint16_t * const quant_matrix)
+{
+    int i;
+    int j;
+    int val;
+    const uint8_t * const scan = decoder->scan;
+    int mismatch;
+    const DCTtab * tab;
+    uint32_t bit_buf;
+    int bits;
+    const uint8_t * bit_ptr;
+    int16_t * const dest = decoder->DCTblock;
+
+    i = -1;
+    mismatch = -1;
+
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    if (bit_buf >= 0x28000000) {
+	tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5);
+	goto entry_1;
+    } else
+	goto entry_2;
+
+    while (1) {
+	if (bit_buf >= 0x28000000) {
+
+	    tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
+
+	entry_1:
+	    i += tab->run;
+	    if (i >= 64)
+		break;	/* end of block */
+
+	normal_code:
+	    j = scan[i];
+	    bit_buf <<= tab->len;
+	    bits += tab->len + 1;
+	    val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5;
+
+	    /* if (bitstream_get (1)) val = -val; */
+	    val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
+
+	    SATURATE (val);
+	    dest[j] = val;
+	    mismatch ^= val;
+
+	    bit_buf <<= 1;
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	}
+
+    entry_2:
+	if (bit_buf >= 0x04000000) {
+
+	    tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
+
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+
+	    /* escape code */
+
+	    i += UBITS (bit_buf << 6, 6) - 64;
+	    if (i >= 64)
+		break;	/* illegal, check needed to avoid buffer overflow */
+
+	    j = scan[i];
+
+	    DUMPBITS (bit_buf, bits, 12);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+	    val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1;
+	    val = (val * quant_matrix[j]) / 32;
+
+	    SATURATE (val);
+	    dest[j] = val;
+	    mismatch ^= val;
+
+	    DUMPBITS (bit_buf, bits, 12);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	} else if (bit_buf >= 0x02000000) {
+	    tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00800000) {
+	    tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00200000) {
+	    tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else {
+	    tab = DCT_16 + UBITS (bit_buf, 16);
+	    bit_buf <<= 16;
+	    GETWORD (bit_buf, bits + 16, bit_ptr);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	}
+	break;	/* illegal, check needed to avoid buffer overflow */
+    }
+    dest[63] ^= mismatch & 16;
+    DUMPBITS (bit_buf, bits, tab->len);	/* dump end of block code */
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
+    return i;
+}
+
+static void get_mpeg1_intra_block (mpeg2_decoder_t * const decoder)
+{
+    int i;
+    int j;
+    int val;
+    const uint8_t * const scan = decoder->scan;
+    const uint16_t * const quant_matrix = decoder->quantizer_matrix[0];
+    const DCTtab * tab;
+    uint32_t bit_buf;
+    int bits;
+    const uint8_t * bit_ptr;
+    int16_t * const dest = decoder->DCTblock;
+
+    i = 0;
+
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+
+    while (1) {
+	if (bit_buf >= 0x28000000) {
+
+	    tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
+
+	    i += tab->run;
+	    if (i >= 64)
+		break;	/* end of block */
+
+	normal_code:
+	    j = scan[i];
+	    bit_buf <<= tab->len;
+	    bits += tab->len + 1;
+	    val = (tab->level * quant_matrix[j]) >> 4;
+
+	    /* oddification */
+	    val = (val - 1) | 1;
+
+	    /* if (bitstream_get (1)) val = -val; */
+	    val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
+
+	    SATURATE (val);
+	    dest[j] = val;
+
+	    bit_buf <<= 1;
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	} else if (bit_buf >= 0x04000000) {
+
+	    tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
+
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+
+	    /* escape code */
+
+	    i += UBITS (bit_buf << 6, 6) - 64;
+	    if (i >= 64)
+		break;	/* illegal, check needed to avoid buffer overflow */
+
+	    j = scan[i];
+
+	    DUMPBITS (bit_buf, bits, 12);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+	    val = SBITS (bit_buf, 8);
+	    if (! (val & 0x7f)) {
+		DUMPBITS (bit_buf, bits, 8);
+		val = UBITS (bit_buf, 8) + 2 * val;
+	    }
+	    val = (val * quant_matrix[j]) / 16;
+
+	    /* oddification */
+	    val = (val + ~SBITS (val, 1)) | 1;
+
+	    SATURATE (val);
+	    dest[j] = val;
+
+	    DUMPBITS (bit_buf, bits, 8);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	} else if (bit_buf >= 0x02000000) {
+	    tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00800000) {
+	    tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00200000) {
+	    tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else {
+	    tab = DCT_16 + UBITS (bit_buf, 16);
+	    bit_buf <<= 16;
+	    GETWORD (bit_buf, bits + 16, bit_ptr);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	}
+	break;	/* illegal, check needed to avoid buffer overflow */
+    }
+    DUMPBITS (bit_buf, bits, tab->len);	/* dump end of block code */
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
+}
+
+static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder)
+{
+    int i;
+    int j;
+    int val;
+    const uint8_t * const scan = decoder->scan;
+    const uint16_t * const quant_matrix = decoder->quantizer_matrix[1];
+    const DCTtab * tab;
+    uint32_t bit_buf;
+    int bits;
+    const uint8_t * bit_ptr;
+    int16_t * const dest = decoder->DCTblock;
+
+    i = -1;
+
+    bit_buf = decoder->bitstream_buf;
+    bits = decoder->bitstream_bits;
+    bit_ptr = decoder->bitstream_ptr;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    if (bit_buf >= 0x28000000) {
+	tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5);
+	goto entry_1;
+    } else
+	goto entry_2;
+
+    while (1) {
+	if (bit_buf >= 0x28000000) {
+
+	    tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
+
+	entry_1:
+	    i += tab->run;
+	    if (i >= 64)
+		break;	/* end of block */
+
+	normal_code:
+	    j = scan[i];
+	    bit_buf <<= tab->len;
+	    bits += tab->len + 1;
+	    val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5;
+
+	    /* oddification */
+	    val = (val - 1) | 1;
+
+	    /* if (bitstream_get (1)) val = -val; */
+	    val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
+
+	    SATURATE (val);
+	    dest[j] = val;
+
+	    bit_buf <<= 1;
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	}
+
+    entry_2:
+	if (bit_buf >= 0x04000000) {
+
+	    tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
+
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+
+	    /* escape code */
+
+	    i += UBITS (bit_buf << 6, 6) - 64;
+	    if (i >= 64)
+		break;	/* illegal, check needed to avoid buffer overflow */
+
+	    j = scan[i];
+
+	    DUMPBITS (bit_buf, bits, 12);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+	    val = SBITS (bit_buf, 8);
+	    if (! (val & 0x7f)) {
+		DUMPBITS (bit_buf, bits, 8);
+		val = UBITS (bit_buf, 8) + 2 * val;
+	    }
+	    val = 2 * (val + SBITS (val, 1)) + 1;
+	    val = (val * quant_matrix[j]) / 32;
+
+	    /* oddification */
+	    val = (val + ~SBITS (val, 1)) | 1;
+
+	    SATURATE (val);
+	    dest[j] = val;
+
+	    DUMPBITS (bit_buf, bits, 8);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+
+	    continue;
+
+	} else if (bit_buf >= 0x02000000) {
+	    tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00800000) {
+	    tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else if (bit_buf >= 0x00200000) {
+	    tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	} else {
+	    tab = DCT_16 + UBITS (bit_buf, 16);
+	    bit_buf <<= 16;
+	    GETWORD (bit_buf, bits + 16, bit_ptr);
+	    i += tab->run;
+	    if (i < 64)
+		goto normal_code;
+	}
+	break;	/* illegal, check needed to avoid buffer overflow */
+    }
+    DUMPBITS (bit_buf, bits, tab->len);	/* dump end of block code */
+    decoder->bitstream_buf = bit_buf;
+    decoder->bitstream_bits = bits;
+    decoder->bitstream_ptr = bit_ptr;
+    return i;
+}
+
+static inline void slice_intra_DCT (mpeg2_decoder_t * const decoder,
+				    const int cc,
+				    uint8_t * const dest, const int stride)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    /* Get the intra DC coefficient and inverse quantize it */
+    if (cc == 0)
+	decoder->DCTblock[0] =
+	    decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder);
+    else
+	decoder->DCTblock[0] =
+	    decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder);
+
+    if (decoder->mpeg1) {
+	if (decoder->coding_type != D_TYPE)
+	    get_mpeg1_intra_block (decoder);
+    } else if (decoder->intra_vlc_format)
+	get_intra_block_B15 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]);
+    else
+	get_intra_block_B14 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]);
+    mpeg2_idct_copy (decoder->DCTblock, dest, stride);
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder,
+					const int cc,
+					uint8_t * const dest, const int stride)
+{
+    int last;
+
+    if (decoder->mpeg1)
+	last = get_mpeg1_non_intra_block (decoder);
+    else
+	last = get_non_intra_block (decoder,
+				    decoder->quantizer_matrix[cc ? 3 : 1]);
+    mpeg2_idct_add (last, decoder->DCTblock, dest, stride);
+}
+
+#define MOTION_420(table,ref,motion_x,motion_y,size,y)			      \
+    pos_x = 2 * decoder->offset + motion_x;				      \
+    pos_y = 2 * decoder->v_offset + motion_y + 2 * y;			      \
+    if (unlikely (pos_x > decoder->limit_x)) {				      \
+	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
+	motion_x = pos_x - 2 * decoder->offset;				      \
+    }									      \
+    if (unlikely (pos_y > decoder->limit_y_ ## size)) {			      \
+	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size;	      \
+	motion_y = pos_y - 2 * decoder->v_offset - 2 * y;		      \
+    }									      \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
+    table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
+		    ref[0] + (pos_x >> 1) + (pos_y >> 1) * decoder->stride,   \
+		    decoder->stride, size);				      \
+    motion_x /= 2;	motion_y /= 2;					      \
+    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);			      \
+    offset = (((decoder->offset + motion_x) >> 1) +			      \
+	      ((((decoder->v_offset + motion_y) >> 1) + y/2) *		      \
+	       decoder->uv_stride));					      \
+    table[4+xy_half] (decoder->dest[1] + y/2 * decoder->uv_stride +	      \
+		      (decoder->offset >> 1), ref[1] + offset,		      \
+		      decoder->uv_stride, size/2);			      \
+    table[4+xy_half] (decoder->dest[2] + y/2 * decoder->uv_stride +	      \
+		      (decoder->offset >> 1), ref[2] + offset,		      \
+		      decoder->uv_stride, size/2)
+
+#define MOTION_FIELD_420(table,ref,motion_x,motion_y,dest_field,op,src_field) \
+    pos_x = 2 * decoder->offset + motion_x;				      \
+    pos_y = decoder->v_offset + motion_y;				      \
+    if (unlikely (pos_x > decoder->limit_x)) {				      \
+	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
+	motion_x = pos_x - 2 * decoder->offset;				      \
+    }									      \
+    if (unlikely (pos_y > decoder->limit_y)) {				      \
+	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;		      \
+	motion_y = pos_y - decoder->v_offset;				      \
+    }									      \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
+    table[xy_half] (decoder->dest[0] + dest_field * decoder->stride +	      \
+		    decoder->offset,					      \
+		    (ref[0] + (pos_x >> 1) +				      \
+		     ((pos_y op) + src_field) * decoder->stride),	      \
+		    2 * decoder->stride, 8);				      \
+    motion_x /= 2;	motion_y /= 2;					      \
+    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);			      \
+    offset = (((decoder->offset + motion_x) >> 1) +			      \
+	      (((decoder->v_offset >> 1) + (motion_y op) + src_field) *	      \
+	       decoder->uv_stride));					      \
+    table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride +    \
+		      (decoder->offset >> 1), ref[1] + offset,		      \
+		      2 * decoder->uv_stride, 4);			      \
+    table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride +    \
+		      (decoder->offset >> 1), ref[2] + offset,		      \
+		      2 * decoder->uv_stride, 4)
+
+#define MOTION_DMV_420(table,ref,motion_x,motion_y)			      \
+    pos_x = 2 * decoder->offset + motion_x;				      \
+    pos_y = decoder->v_offset + motion_y;				      \
+    if (unlikely (pos_x > decoder->limit_x)) {				      \
+	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
+	motion_x = pos_x - 2 * decoder->offset;				      \
+    }									      \
+    if (unlikely (pos_y > decoder->limit_y)) {				      \
+	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;		      \
+	motion_y = pos_y - decoder->v_offset;				      \
+    }									      \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
+    offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride;		      \
+    table[xy_half] (decoder->dest[0] + decoder->offset,			      \
+		    ref[0] + offset, 2 * decoder->stride, 8);		      \
+    table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset,     \
+		    ref[0] + decoder->stride + offset,			      \
+		    2 * decoder->stride, 8);				      \
+    motion_x /= 2;	motion_y /= 2;					      \
+    xy_half = ((motion_y & 1) << 1) | (motion_x & 1);			      \
+    offset = (((decoder->offset + motion_x) >> 1) +			      \
+	      (((decoder->v_offset >> 1) + (motion_y & ~1)) *		      \
+	       decoder->uv_stride));					      \
+    table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1),	      \
+		      ref[1] + offset, 2 * decoder->uv_stride, 4);	      \
+    table[4+xy_half] (decoder->dest[1] + decoder->uv_stride +		      \
+		      (decoder->offset >> 1),				      \
+		      ref[1] + decoder->uv_stride + offset,		      \
+		      2 * decoder->uv_stride, 4);			      \
+    table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1),	      \
+		      ref[2] + offset, 2 * decoder->uv_stride, 4);	      \
+    table[4+xy_half] (decoder->dest[2] + decoder->uv_stride +		      \
+		      (decoder->offset >> 1),				      \
+		      ref[2] + decoder->uv_stride + offset,		      \
+		      2 * decoder->uv_stride, 4)
+
+#define MOTION_ZERO_420(table,ref)					      \
+    table[0] (decoder->dest[0] + decoder->offset,			      \
+	      (ref[0] + decoder->offset +				      \
+	       decoder->v_offset * decoder->stride), decoder->stride, 16);    \
+    offset = ((decoder->offset >> 1) +					      \
+	      (decoder->v_offset >> 1) * decoder->uv_stride);		      \
+    table[4] (decoder->dest[1] + (decoder->offset >> 1),		      \
+	      ref[1] + offset, decoder->uv_stride, 8);			      \
+    table[4] (decoder->dest[2] + (decoder->offset >> 1),		      \
+	      ref[2] + offset, decoder->uv_stride, 8)
+
+#define MOTION_422(table,ref,motion_x,motion_y,size,y)			      \
+    pos_x = 2 * decoder->offset + motion_x;				      \
+    pos_y = 2 * decoder->v_offset + motion_y + 2 * y;			      \
+    if (unlikely (pos_x > decoder->limit_x)) {				      \
+	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
+	motion_x = pos_x - 2 * decoder->offset;				      \
+    }									      \
+    if (unlikely (pos_y > decoder->limit_y_ ## size)) {			      \
+	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size;	      \
+	motion_y = pos_y - 2 * decoder->v_offset - 2 * y;		      \
+    }									      \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
+    offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride;		      \
+    table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
+		    ref[0] + offset, decoder->stride, size);		      \
+    offset = (offset + (motion_x & (motion_x < 0))) >> 1;		      \
+    motion_x /= 2;							      \
+    xy_half = ((pos_y & 1) << 1) | (motion_x & 1);			      \
+    table[4+xy_half] (decoder->dest[1] + y * decoder->uv_stride +	      \
+		      (decoder->offset >> 1), ref[1] + offset,		      \
+		      decoder->uv_stride, size);			      \
+    table[4+xy_half] (decoder->dest[2] + y * decoder->uv_stride +	      \
+		      (decoder->offset >> 1), ref[2] + offset,		      \
+		      decoder->uv_stride, size)
+
+#define MOTION_FIELD_422(table,ref,motion_x,motion_y,dest_field,op,src_field) \
+    pos_x = 2 * decoder->offset + motion_x;				      \
+    pos_y = decoder->v_offset + motion_y;				      \
+    if (unlikely (pos_x > decoder->limit_x)) {				      \
+	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
+	motion_x = pos_x - 2 * decoder->offset;				      \
+    }									      \
+    if (unlikely (pos_y > decoder->limit_y)) {				      \
+	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;		      \
+	motion_y = pos_y - decoder->v_offset;				      \
+    }									      \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
+    offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride;	      \
+    table[xy_half] (decoder->dest[0] + dest_field * decoder->stride +	      \
+		    decoder->offset, ref[0] + offset,			      \
+		    2 * decoder->stride, 8);				      \
+    offset = (offset + (motion_x & (motion_x < 0))) >> 1;		      \
+    motion_x /= 2;							      \
+    xy_half = ((pos_y & 1) << 1) | (motion_x & 1);			      \
+    table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride +    \
+		      (decoder->offset >> 1), ref[1] + offset,		      \
+		      2 * decoder->uv_stride, 8);			      \
+    table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride +    \
+		      (decoder->offset >> 1), ref[2] + offset,		      \
+		      2 * decoder->uv_stride, 8)
+
+#define MOTION_DMV_422(table,ref,motion_x,motion_y)			      \
+    pos_x = 2 * decoder->offset + motion_x;				      \
+    pos_y = decoder->v_offset + motion_y;				      \
+    if (unlikely (pos_x > decoder->limit_x)) {				      \
+	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
+	motion_x = pos_x - 2 * decoder->offset;				      \
+    }									      \
+    if (unlikely (pos_y > decoder->limit_y)) {				      \
+	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;		      \
+	motion_y = pos_y - decoder->v_offset;				      \
+    }									      \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
+    offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride;		      \
+    table[xy_half] (decoder->dest[0] + decoder->offset,			      \
+		    ref[0] + offset, 2 * decoder->stride, 8);		      \
+    table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset,     \
+		    ref[0] + decoder->stride + offset,			      \
+		    2 * decoder->stride, 8);				      \
+    offset = (offset + (motion_x & (motion_x < 0))) >> 1;		      \
+    motion_x /= 2;							      \
+    xy_half = ((pos_y & 1) << 1) | (motion_x & 1);			      \
+    table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1),	      \
+		      ref[1] + offset, 2 * decoder->uv_stride, 8);	      \
+    table[4+xy_half] (decoder->dest[1] + decoder->uv_stride +		      \
+		      (decoder->offset >> 1),				      \
+		      ref[1] + decoder->uv_stride + offset,		      \
+		      2 * decoder->uv_stride, 8);			      \
+    table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1),	      \
+		      ref[2] + offset, 2 * decoder->uv_stride, 8);	      \
+    table[4+xy_half] (decoder->dest[2] + decoder->uv_stride +		      \
+		      (decoder->offset >> 1),				      \
+		      ref[2] + decoder->uv_stride + offset,		      \
+		      2 * decoder->uv_stride, 8)
+
+#define MOTION_ZERO_422(table,ref)					      \
+    offset = decoder->offset + decoder->v_offset * decoder->stride;	      \
+    table[0] (decoder->dest[0] + decoder->offset,			      \
+	      ref[0] + offset, decoder->stride, 16);			      \
+    offset >>= 1;							      \
+    table[4] (decoder->dest[1] + (decoder->offset >> 1),		      \
+	      ref[1] + offset, decoder->uv_stride, 16);			      \
+    table[4] (decoder->dest[2] + (decoder->offset >> 1),		      \
+	      ref[2] + offset, decoder->uv_stride, 16)
+
+#define MOTION_444(table,ref,motion_x,motion_y,size,y)			      \
+    pos_x = 2 * decoder->offset + motion_x;				      \
+    pos_y = 2 * decoder->v_offset + motion_y + 2 * y;			      \
+    if (unlikely (pos_x > decoder->limit_x)) {				      \
+	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
+	motion_x = pos_x - 2 * decoder->offset;				      \
+    }									      \
+    if (unlikely (pos_y > decoder->limit_y_ ## size)) {			      \
+	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size;	      \
+	motion_y = pos_y - 2 * decoder->v_offset - 2 * y;		      \
+    }									      \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
+    offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride;		      \
+    table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
+		    ref[0] + offset, decoder->stride, size);		      \
+    table[xy_half] (decoder->dest[1] + y * decoder->stride + decoder->offset, \
+		    ref[1] + offset, decoder->stride, size);		      \
+    table[xy_half] (decoder->dest[2] + y * decoder->stride + decoder->offset, \
+		    ref[2] + offset, decoder->stride, size)
+
+#define MOTION_FIELD_444(table,ref,motion_x,motion_y,dest_field,op,src_field) \
+    pos_x = 2 * decoder->offset + motion_x;				      \
+    pos_y = decoder->v_offset + motion_y;				      \
+    if (unlikely (pos_x > decoder->limit_x)) {				      \
+	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
+	motion_x = pos_x - 2 * decoder->offset;				      \
+    }									      \
+    if (unlikely (pos_y > decoder->limit_y)) {				      \
+	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;		      \
+	motion_y = pos_y - decoder->v_offset;				      \
+    }									      \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
+    offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride;	      \
+    table[xy_half] (decoder->dest[0] + dest_field * decoder->stride +	      \
+		    decoder->offset, ref[0] + offset,			      \
+		    2 * decoder->stride, 8);				      \
+    table[xy_half] (decoder->dest[1] + dest_field * decoder->stride +	      \
+		    decoder->offset, ref[1] + offset,			      \
+		    2 * decoder->stride, 8);				      \
+    table[xy_half] (decoder->dest[2] + dest_field * decoder->stride +	      \
+		    decoder->offset, ref[2] + offset,			      \
+		    2 * decoder->stride, 8)
+
+#define MOTION_DMV_444(table,ref,motion_x,motion_y)			      \
+    pos_x = 2 * decoder->offset + motion_x;				      \
+    pos_y = decoder->v_offset + motion_y;				      \
+    if (unlikely (pos_x > decoder->limit_x)) {				      \
+	pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x;		      \
+	motion_x = pos_x - 2 * decoder->offset;				      \
+    }									      \
+    if (unlikely (pos_y > decoder->limit_y)) {				      \
+	pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y;		      \
+	motion_y = pos_y - decoder->v_offset;				      \
+    }									      \
+    xy_half = ((pos_y & 1) << 1) | (pos_x & 1);				      \
+    offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride;		      \
+    table[xy_half] (decoder->dest[0] + decoder->offset,			      \
+		    ref[0] + offset, 2 * decoder->stride, 8);		      \
+    table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset,     \
+		    ref[0] + decoder->stride + offset,			      \
+		    2 * decoder->stride, 8);				      \
+    table[xy_half] (decoder->dest[1] + decoder->offset,			      \
+		    ref[1] + offset, 2 * decoder->stride, 8);		      \
+    table[xy_half] (decoder->dest[1] + decoder->stride + decoder->offset,     \
+		    ref[1] + decoder->stride + offset,			      \
+		    2 * decoder->stride, 8);				      \
+    table[xy_half] (decoder->dest[2] + decoder->offset,			      \
+		    ref[2] + offset, 2 * decoder->stride, 8);		      \
+    table[xy_half] (decoder->dest[2] + decoder->stride + decoder->offset,     \
+		    ref[2] + decoder->stride + offset,			      \
+		    2 * decoder->stride, 8)
+
+#define MOTION_ZERO_444(table,ref)					      \
+    offset = decoder->offset + decoder->v_offset * decoder->stride;	      \
+    table[0] (decoder->dest[0] + decoder->offset,			      \
+	      ref[0] + offset, decoder->stride, 16);			      \
+    table[4] (decoder->dest[1] + decoder->offset,			      \
+	      ref[1] + offset, decoder->stride, 16);			      \
+    table[4] (decoder->dest[2] + (decoder->offset >> 1),		      \
+	      ref[2] + offset, decoder->stride, 16)
+
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+static void motion_mp1 (mpeg2_decoder_t * const decoder,
+			motion_t * const motion,
+			mpeg2_mc_fct * const * const table)
+{
+    int motion_x, motion_y;
+    unsigned int pos_x, pos_y, xy_half, offset;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    motion_x = (motion->pmv[0][0] +
+		(get_motion_delta (decoder,
+				   motion->f_code[0]) << motion->f_code[1]));
+    motion_x = bound_motion_vector (motion_x,
+				    motion->f_code[0] + motion->f_code[1]);
+    motion->pmv[0][0] = motion_x;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    motion_y = (motion->pmv[0][1] +
+		(get_motion_delta (decoder,
+				   motion->f_code[0]) << motion->f_code[1]));
+    motion_y = bound_motion_vector (motion_y,
+				    motion->f_code[0] + motion->f_code[1]);
+    motion->pmv[0][1] = motion_y;
+
+    MOTION_420 (table, motion->ref[0], motion_x, motion_y, 16, 0);
+}
+
+#define MOTION_FUNCTIONS(FORMAT,MOTION,MOTION_FIELD,MOTION_DMV,MOTION_ZERO)   \
+									      \
+static void motion_fr_frame_##FORMAT (mpeg2_decoder_t * const decoder,	      \
+				      motion_t * const motion,		      \
+				      mpeg2_mc_fct * const * const table)     \
+{									      \
+    int motion_x, motion_y;						      \
+    unsigned int pos_x, pos_y, xy_half, offset;				      \
+									      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,		      \
+						     motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;			      \
+									      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,		      \
+						     motion->f_code[1]);      \
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);	      \
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;			      \
+									      \
+    MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0);		      \
+}									      \
+									      \
+static void motion_fr_field_##FORMAT (mpeg2_decoder_t * const decoder,	      \
+				      motion_t * const motion,		      \
+				      mpeg2_mc_fct * const * const table)     \
+{									      \
+    int motion_x, motion_y, field;					      \
+    unsigned int pos_x, pos_y, xy_half, offset;				      \
+									      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    field = UBITS (bit_buf, 1);						      \
+    DUMPBITS (bit_buf, bits, 1);					      \
+									      \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,		      \
+						     motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
+    motion->pmv[0][0] = motion_x;					      \
+									      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    motion_y = ((motion->pmv[0][1] >> 1) +				      \
+		get_motion_delta (decoder, motion->f_code[1]));		      \
+    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */	      \
+    motion->pmv[0][1] = motion_y << 1;					      \
+									      \
+    MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); \
+									      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    field = UBITS (bit_buf, 1);						      \
+    DUMPBITS (bit_buf, bits, 1);					      \
+									      \
+    motion_x = motion->pmv[1][0] + get_motion_delta (decoder,		      \
+						     motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
+    motion->pmv[1][0] = motion_x;					      \
+									      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    motion_y = ((motion->pmv[1][1] >> 1) +				      \
+		get_motion_delta (decoder, motion->f_code[1]));		      \
+    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */	      \
+    motion->pmv[1][1] = motion_y << 1;					      \
+									      \
+    MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); \
+}									      \
+									      \
+static void motion_fr_dmv_##FORMAT (mpeg2_decoder_t * const decoder,	      \
+				    motion_t * const motion,		      \
+				    mpeg2_mc_fct * const * const table)	      \
+{									      \
+    int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y;		      \
+    unsigned int pos_x, pos_y, xy_half, offset;				      \
+									      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,		      \
+						     motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;			      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    dmv_x = get_dmv (decoder);						      \
+									      \
+    motion_y = ((motion->pmv[0][1] >> 1) +				      \
+		get_motion_delta (decoder, motion->f_code[1]));		      \
+    /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */	      \
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1;		      \
+    dmv_y = get_dmv (decoder);						      \
+									      \
+    m = decoder->top_field_first ? 1 : 3;				      \
+    other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;		      \
+    other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1;	      \
+    MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); \
+									      \
+    m = decoder->top_field_first ? 3 : 1;				      \
+    other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;		      \
+    other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1;	      \
+    MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0);\
+									      \
+    MOTION_DMV (mpeg2_mc.avg, motion->ref[0], motion_x, motion_y);	      \
+}									      \
+									      \
+static void motion_reuse_##FORMAT (mpeg2_decoder_t * const decoder,	      \
+				   motion_t * const motion,		      \
+				   mpeg2_mc_fct * const * const table)	      \
+{									      \
+    int motion_x, motion_y;						      \
+    unsigned int pos_x, pos_y, xy_half, offset;				      \
+									      \
+    motion_x = motion->pmv[0][0];					      \
+    motion_y = motion->pmv[0][1];					      \
+									      \
+    MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0);		      \
+}									      \
+									      \
+static void motion_zero_##FORMAT (mpeg2_decoder_t * const decoder,	      \
+				  motion_t * const motion,		      \
+				  mpeg2_mc_fct * const * const table)	      \
+{									      \
+    unsigned int offset;						      \
+									      \
+    motion->pmv[0][0] = motion->pmv[0][1] = 0;				      \
+    motion->pmv[1][0] = motion->pmv[1][1] = 0;				      \
+									      \
+    MOTION_ZERO (table, motion->ref[0]);				      \
+}									      \
+									      \
+static void motion_fi_field_##FORMAT (mpeg2_decoder_t * const decoder,	      \
+				      motion_t * const motion,		      \
+				      mpeg2_mc_fct * const * const table)     \
+{									      \
+    int motion_x, motion_y;						      \
+    uint8_t ** ref_field;						      \
+    unsigned int pos_x, pos_y, xy_half, offset;				      \
+									      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    ref_field = motion->ref2[UBITS (bit_buf, 1)];			      \
+    DUMPBITS (bit_buf, bits, 1);					      \
+									      \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,		      \
+						     motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;			      \
+									      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,		      \
+						     motion->f_code[1]);      \
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);	      \
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;			      \
+									      \
+    MOTION (table, ref_field, motion_x, motion_y, 16, 0);		      \
+}									      \
+									      \
+static void motion_fi_16x8_##FORMAT (mpeg2_decoder_t * const decoder,	      \
+				     motion_t * const motion,		      \
+				     mpeg2_mc_fct * const * const table)      \
+{									      \
+    int motion_x, motion_y;						      \
+    uint8_t ** ref_field;						      \
+    unsigned int pos_x, pos_y, xy_half, offset;				      \
+									      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    ref_field = motion->ref2[UBITS (bit_buf, 1)];			      \
+    DUMPBITS (bit_buf, bits, 1);					      \
+									      \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,		      \
+						     motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
+    motion->pmv[0][0] = motion_x;					      \
+									      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,		      \
+						     motion->f_code[1]);      \
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);	      \
+    motion->pmv[0][1] = motion_y;					      \
+									      \
+    MOTION (table, ref_field, motion_x, motion_y, 8, 0);		      \
+									      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    ref_field = motion->ref2[UBITS (bit_buf, 1)];			      \
+    DUMPBITS (bit_buf, bits, 1);					      \
+									      \
+    motion_x = motion->pmv[1][0] + get_motion_delta (decoder,		      \
+						     motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
+    motion->pmv[1][0] = motion_x;					      \
+									      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    motion_y = motion->pmv[1][1] + get_motion_delta (decoder,		      \
+						     motion->f_code[1]);      \
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);	      \
+    motion->pmv[1][1] = motion_y;					      \
+									      \
+    MOTION (table, ref_field, motion_x, motion_y, 8, 8);		      \
+}									      \
+									      \
+static void motion_fi_dmv_##FORMAT (mpeg2_decoder_t * const decoder,	      \
+				    motion_t * const motion,		      \
+				    mpeg2_mc_fct * const * const table)	      \
+{									      \
+    int motion_x, motion_y, other_x, other_y;				      \
+    unsigned int pos_x, pos_y, xy_half, offset;				      \
+									      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    motion_x = motion->pmv[0][0] + get_motion_delta (decoder,		      \
+						     motion->f_code[0]);      \
+    motion_x = bound_motion_vector (motion_x, motion->f_code[0]);	      \
+    motion->pmv[1][0] = motion->pmv[0][0] = motion_x;			      \
+    NEEDBITS (bit_buf, bits, bit_ptr);					      \
+    other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (decoder);	      \
+									      \
+    motion_y = motion->pmv[0][1] + get_motion_delta (decoder,		      \
+						     motion->f_code[1]);      \
+    motion_y = bound_motion_vector (motion_y, motion->f_code[1]);	      \
+    motion->pmv[1][1] = motion->pmv[0][1] = motion_y;			      \
+    other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (decoder) +	      \
+	       decoder->dmv_offset);					      \
+									      \
+    MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0);	      \
+    MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0);	      \
+}									      \
+
+MOTION_FUNCTIONS (420, MOTION_420, MOTION_FIELD_420, MOTION_DMV_420,
+		  MOTION_ZERO_420)
+MOTION_FUNCTIONS (422, MOTION_422, MOTION_FIELD_422, MOTION_DMV_422,
+		  MOTION_ZERO_422)
+MOTION_FUNCTIONS (444, MOTION_444, MOTION_FIELD_444, MOTION_DMV_444,
+		  MOTION_ZERO_444)
+
+/* like motion_frame, but parsing without actual motion compensation */
+static void motion_fr_conceal (mpeg2_decoder_t * const decoder)
+{
+    int tmp;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    tmp = (decoder->f_motion.pmv[0][0] +
+	   get_motion_delta (decoder, decoder->f_motion.f_code[0]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]);
+    decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    tmp = (decoder->f_motion.pmv[0][1] +
+	   get_motion_delta (decoder, decoder->f_motion.f_code[1]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]);
+    decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp;
+
+    DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */
+}
+
+static void motion_fi_conceal (mpeg2_decoder_t * const decoder)
+{
+    int tmp;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    DUMPBITS (bit_buf, bits, 1); /* remove field_select */
+
+    tmp = (decoder->f_motion.pmv[0][0] +
+	   get_motion_delta (decoder, decoder->f_motion.f_code[0]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]);
+    decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp;
+
+    NEEDBITS (bit_buf, bits, bit_ptr);
+    tmp = (decoder->f_motion.pmv[0][1] +
+	   get_motion_delta (decoder, decoder->f_motion.f_code[1]));
+    tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]);
+    decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp;
+
+    DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */
+}
+
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+
+#define MOTION_CALL(routine,direction)				\
+do {								\
+    if ((direction) & MACROBLOCK_MOTION_FORWARD)		\
+	routine (decoder, &(decoder->f_motion), mpeg2_mc.put);	\
+    if ((direction) & MACROBLOCK_MOTION_BACKWARD)		\
+	routine (decoder, &(decoder->b_motion),			\
+		 ((direction) & MACROBLOCK_MOTION_FORWARD ?	\
+		  mpeg2_mc.avg : mpeg2_mc.put));		\
+} while (0)
+
+#define NEXT_MACROBLOCK							\
+do {									\
+    decoder->offset += 16;						\
+    if (decoder->offset == decoder->width) {				\
+	do { /* just so we can use the break statement */		\
+	    if (decoder->convert) {					\
+		decoder->convert (decoder->convert_id, decoder->dest,	\
+				  decoder->v_offset);			\
+		if (decoder->coding_type == B_TYPE)			\
+		    break;						\
+	    }								\
+	    decoder->dest[0] += decoder->slice_stride;			\
+	    decoder->dest[1] += decoder->slice_uv_stride;		\
+	    decoder->dest[2] += decoder->slice_uv_stride;		\
+	} while (0);							\
+	decoder->v_offset += 16;					\
+	if (decoder->v_offset > decoder->limit_y) {			\
+	    if (mpeg2_cpu_state_restore)				\
+		mpeg2_cpu_state_restore (&cpu_state);			\
+	    return;							\
+	}								\
+	decoder->offset = 0;						\
+    }									\
+} while (0)
+
+void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3],
+		      uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3])
+{
+    int offset, stride, height, bottom_field;
+
+    stride = decoder->stride_frame;
+    bottom_field = (decoder->picture_structure == BOTTOM_FIELD);
+    offset = bottom_field ? stride : 0;
+    height = decoder->height;
+
+    decoder->picture_dest[0] = current_fbuf[0] + offset;
+    decoder->picture_dest[1] = current_fbuf[1] + (offset >> 1);
+    decoder->picture_dest[2] = current_fbuf[2] + (offset >> 1);
+
+    decoder->f_motion.ref[0][0] = forward_fbuf[0] + offset;
+    decoder->f_motion.ref[0][1] = forward_fbuf[1] + (offset >> 1);
+    decoder->f_motion.ref[0][2] = forward_fbuf[2] + (offset >> 1);
+
+    decoder->b_motion.ref[0][0] = backward_fbuf[0] + offset;
+    decoder->b_motion.ref[0][1] = backward_fbuf[1] + (offset >> 1);
+    decoder->b_motion.ref[0][2] = backward_fbuf[2] + (offset >> 1);
+
+    if (decoder->picture_structure != FRAME_PICTURE) {
+	decoder->dmv_offset = bottom_field ? 1 : -1;
+	decoder->f_motion.ref2[0] = decoder->f_motion.ref[bottom_field];
+	decoder->f_motion.ref2[1] = decoder->f_motion.ref[!bottom_field];
+	decoder->b_motion.ref2[0] = decoder->b_motion.ref[bottom_field];
+	decoder->b_motion.ref2[1] = decoder->b_motion.ref[!bottom_field];
+	offset = stride - offset;
+
+	if (decoder->second_field && (decoder->coding_type != B_TYPE))
+	    forward_fbuf = current_fbuf;
+
+	decoder->f_motion.ref[1][0] = forward_fbuf[0] + offset;
+	decoder->f_motion.ref[1][1] = forward_fbuf[1] + (offset >> 1);
+	decoder->f_motion.ref[1][2] = forward_fbuf[2] + (offset >> 1);
+
+	decoder->b_motion.ref[1][0] = backward_fbuf[0] + offset;
+	decoder->b_motion.ref[1][1] = backward_fbuf[1] + (offset >> 1);
+	decoder->b_motion.ref[1][2] = backward_fbuf[2] + (offset >> 1);
+
+	stride <<= 1;
+	height >>= 1;
+    }
+
+    decoder->stride = stride;
+    decoder->uv_stride = stride >> 1;
+    decoder->slice_stride = 16 * stride;
+    decoder->slice_uv_stride =
+	decoder->slice_stride >> (2 - decoder->chroma_format);
+    decoder->limit_x = 2 * decoder->width - 32;
+    decoder->limit_y_16 = 2 * height - 32;
+    decoder->limit_y_8 = 2 * height - 16;
+    decoder->limit_y = height - 16;
+
+    if (decoder->mpeg1) {
+	decoder->motion_parser[0] = motion_zero_420;
+	decoder->motion_parser[MC_FRAME] = motion_mp1;
+	decoder->motion_parser[4] = motion_reuse_420;
+    } else if (decoder->picture_structure == FRAME_PICTURE) {
+	if (decoder->chroma_format == 0) {
+	    decoder->motion_parser[0] = motion_zero_420;
+	    decoder->motion_parser[MC_FIELD] = motion_fr_field_420;
+	    decoder->motion_parser[MC_FRAME] = motion_fr_frame_420;
+	    decoder->motion_parser[MC_DMV] = motion_fr_dmv_420;
+	    decoder->motion_parser[4] = motion_reuse_420;
+	} else if (decoder->chroma_format == 1) {
+	    decoder->motion_parser[0] = motion_zero_422;
+	    decoder->motion_parser[MC_FIELD] = motion_fr_field_422;
+	    decoder->motion_parser[MC_FRAME] = motion_fr_frame_422;
+	    decoder->motion_parser[MC_DMV] = motion_fr_dmv_422;
+	    decoder->motion_parser[4] = motion_reuse_422;
+	} else {
+	    decoder->motion_parser[0] = motion_zero_444;
+	    decoder->motion_parser[MC_FIELD] = motion_fr_field_444;
+	    decoder->motion_parser[MC_FRAME] = motion_fr_frame_444;
+	    decoder->motion_parser[MC_DMV] = motion_fr_dmv_444;
+	    decoder->motion_parser[4] = motion_reuse_444;
+	}
+    } else {
+	if (decoder->chroma_format == 0) {
+	    decoder->motion_parser[0] = motion_zero_420;
+	    decoder->motion_parser[MC_FIELD] = motion_fi_field_420;
+	    decoder->motion_parser[MC_16X8] = motion_fi_16x8_420;
+	    decoder->motion_parser[MC_DMV] = motion_fi_dmv_420;
+	    decoder->motion_parser[4] = motion_reuse_420;
+	} else if (decoder->chroma_format == 1) {
+	    decoder->motion_parser[0] = motion_zero_422;
+	    decoder->motion_parser[MC_FIELD] = motion_fi_field_422;
+	    decoder->motion_parser[MC_16X8] = motion_fi_16x8_422;
+	    decoder->motion_parser[MC_DMV] = motion_fi_dmv_422;
+	    decoder->motion_parser[4] = motion_reuse_422;
+	} else {
+	    decoder->motion_parser[0] = motion_zero_444;
+	    decoder->motion_parser[MC_FIELD] = motion_fi_field_444;
+	    decoder->motion_parser[MC_16X8] = motion_fi_16x8_444;
+	    decoder->motion_parser[MC_DMV] = motion_fi_dmv_444;
+	    decoder->motion_parser[4] = motion_reuse_444;
+	}
+    }
+}
+
+static inline int slice_init (mpeg2_decoder_t * const decoder, int code)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    int offset;
+    const MBAtab * mba;
+
+    decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
+	decoder->dc_dct_pred[2] = 16384;
+
+    decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
+    decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
+    decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
+    decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
+
+    if (decoder->vertical_position_extension) {
+	code += UBITS (bit_buf, 3) << 7;
+	DUMPBITS (bit_buf, bits, 3);
+    }
+    decoder->v_offset = (code - 1) * 16;
+    offset = 0;
+    if (!(decoder->convert) || decoder->coding_type != B_TYPE)
+	offset = (code - 1) * decoder->slice_stride;
+
+    decoder->dest[0] = decoder->picture_dest[0] + offset;
+    offset >>= (2 - decoder->chroma_format);
+    decoder->dest[1] = decoder->picture_dest[1] + offset;
+    decoder->dest[2] = decoder->picture_dest[2] + offset;
+
+    get_quantizer_scale (decoder);
+
+    /* ignore intra_slice and all the extra data */
+    while (bit_buf & 0x80000000) {
+	DUMPBITS (bit_buf, bits, 9);
+	NEEDBITS (bit_buf, bits, bit_ptr);
+    }
+
+    /* decode initial macroblock address increment */
+    offset = 0;
+    while (1) {
+	if (bit_buf >= 0x08000000) {
+	    mba = MBA_5 + (UBITS (bit_buf, 6) - 2);
+	    break;
+	} else if (bit_buf >= 0x01800000) {
+	    mba = MBA_11 + (UBITS (bit_buf, 12) - 24);
+	    break;
+	} else switch (UBITS (bit_buf, 12)) {
+	case 8:		/* macroblock_escape */
+	    offset += 33;
+	    DUMPBITS (bit_buf, bits, 11);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+	    continue;
+	case 15:	/* macroblock_stuffing (MPEG1 only) */
+	    bit_buf &= 0xfffff;
+	    DUMPBITS (bit_buf, bits, 11);
+	    NEEDBITS (bit_buf, bits, bit_ptr);
+	    continue;
+	default:	/* error */
+	    return 1;
+	}
+    }
+    DUMPBITS (bit_buf, bits, mba->len + 1);
+    decoder->offset = (offset + mba->mba) << 4;
+
+    while (decoder->offset - decoder->width >= 0) {
+	decoder->offset -= decoder->width;
+	if (!(decoder->convert) || decoder->coding_type != B_TYPE) {
+	    decoder->dest[0] += decoder->slice_stride;
+	    decoder->dest[1] += decoder->slice_uv_stride;
+	    decoder->dest[2] += decoder->slice_uv_stride;
+	}
+	decoder->v_offset += 16;
+    }
+    if (decoder->v_offset > decoder->limit_y)
+	return 1;
+
+    return 0;
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+void mpeg2_slice (mpeg2_decoder_t * const decoder, const int code,
+		  const uint8_t * const buffer)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+    cpu_state_t cpu_state;
+
+    bitstream_init (decoder, buffer);
+
+    if (slice_init (decoder, code))
+	return;
+
+    if (mpeg2_cpu_state_save)
+	mpeg2_cpu_state_save (&cpu_state);
+
+    while (1) {
+	int macroblock_modes;
+	int mba_inc;
+	const MBAtab * mba;
+
+	NEEDBITS (bit_buf, bits, bit_ptr);
+
+	macroblock_modes = get_macroblock_modes (decoder);
+
+	/* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */
+	if (macroblock_modes & MACROBLOCK_QUANT)
+	    get_quantizer_scale (decoder);
+
+	if (macroblock_modes & MACROBLOCK_INTRA) {
+
+	    int DCT_offset, DCT_stride;
+	    int offset;
+	    uint8_t * dest_y;
+
+	    if (decoder->concealment_motion_vectors) {
+		if (decoder->picture_structure == FRAME_PICTURE)
+		    motion_fr_conceal (decoder);
+		else
+		    motion_fi_conceal (decoder);
+	    } else {
+		decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
+		decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
+		decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
+		decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
+	    }
+
+	    if (macroblock_modes & DCT_TYPE_INTERLACED) {
+		DCT_offset = decoder->stride;
+		DCT_stride = decoder->stride * 2;
+	    } else {
+		DCT_offset = decoder->stride * 8;
+		DCT_stride = decoder->stride;
+	    }
+
+	    offset = decoder->offset;
+	    dest_y = decoder->dest[0] + offset;
+	    slice_intra_DCT (decoder, 0, dest_y, DCT_stride);
+	    slice_intra_DCT (decoder, 0, dest_y + 8, DCT_stride);
+	    slice_intra_DCT (decoder, 0, dest_y + DCT_offset, DCT_stride);
+	    slice_intra_DCT (decoder, 0, dest_y + DCT_offset + 8, DCT_stride);
+	    if (likely (decoder->chroma_format == 0)) {
+		slice_intra_DCT (decoder, 1, decoder->dest[1] + (offset >> 1),
+				 decoder->uv_stride);
+		slice_intra_DCT (decoder, 2, decoder->dest[2] + (offset >> 1),
+				 decoder->uv_stride);
+		if (decoder->coding_type == D_TYPE) {
+		    NEEDBITS (bit_buf, bits, bit_ptr);
+		    DUMPBITS (bit_buf, bits, 1);
+		}
+	    } else if (likely (decoder->chroma_format == 1)) {
+		uint8_t * dest_u = decoder->dest[1] + (offset >> 1);
+		uint8_t * dest_v = decoder->dest[2] + (offset >> 1);
+		DCT_stride >>= 1;
+		DCT_offset >>= 1;
+		slice_intra_DCT (decoder, 1, dest_u, DCT_stride);
+		slice_intra_DCT (decoder, 2, dest_v, DCT_stride);
+		slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride);
+		slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride);
+	    } else {
+		uint8_t * dest_u = decoder->dest[1] + offset;
+		uint8_t * dest_v = decoder->dest[2] + offset;
+		slice_intra_DCT (decoder, 1, dest_u, DCT_stride);
+		slice_intra_DCT (decoder, 2, dest_v, DCT_stride);
+		slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride);
+		slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride);
+		slice_intra_DCT (decoder, 1, dest_u + 8, DCT_stride);
+		slice_intra_DCT (decoder, 2, dest_v + 8, DCT_stride);
+		slice_intra_DCT (decoder, 1, dest_u + DCT_offset + 8,
+				 DCT_stride);
+		slice_intra_DCT (decoder, 2, dest_v + DCT_offset + 8,
+				 DCT_stride);
+	    }
+	} else {
+
+	    motion_parser_t * parser;
+
+	    parser =
+		decoder->motion_parser[macroblock_modes >> MOTION_TYPE_SHIFT];
+	    MOTION_CALL (parser, macroblock_modes);
+
+	    if (macroblock_modes & MACROBLOCK_PATTERN) {
+		int coded_block_pattern;
+		int DCT_offset, DCT_stride;
+
+		if (macroblock_modes & DCT_TYPE_INTERLACED) {
+		    DCT_offset = decoder->stride;
+		    DCT_stride = decoder->stride * 2;
+		} else {
+		    DCT_offset = decoder->stride * 8;
+		    DCT_stride = decoder->stride;
+		}
+
+		coded_block_pattern = get_coded_block_pattern (decoder);
+
+		if (likely (decoder->chroma_format == 0)) {
+		    int offset = decoder->offset;
+		    uint8_t * dest_y = decoder->dest[0] + offset;
+		    if (coded_block_pattern & 1)
+			slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride);
+		    if (coded_block_pattern & 2)
+			slice_non_intra_DCT (decoder, 0, dest_y + 8,
+					     DCT_stride);
+		    if (coded_block_pattern & 4)
+			slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset,
+					     DCT_stride);
+		    if (coded_block_pattern & 8)
+			slice_non_intra_DCT (decoder, 0,
+					     dest_y + DCT_offset + 8,
+					     DCT_stride);
+		    if (coded_block_pattern & 16)
+			slice_non_intra_DCT (decoder, 1,
+					     decoder->dest[1] + (offset >> 1),
+					     decoder->uv_stride);
+		    if (coded_block_pattern & 32)
+			slice_non_intra_DCT (decoder, 2,
+					     decoder->dest[2] + (offset >> 1),
+					     decoder->uv_stride);
+		} else if (likely (decoder->chroma_format == 1)) {
+		    int offset;
+		    uint8_t * dest_y;
+
+		    coded_block_pattern |= bit_buf & (3 << 30);
+		    DUMPBITS (bit_buf, bits, 2);
+
+		    offset = decoder->offset;
+		    dest_y = decoder->dest[0] + offset;
+		    if (coded_block_pattern & 1)
+			slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride);
+		    if (coded_block_pattern & 2)
+			slice_non_intra_DCT (decoder, 0, dest_y + 8,
+					     DCT_stride);
+		    if (coded_block_pattern & 4)
+			slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset,
+					     DCT_stride);
+		    if (coded_block_pattern & 8)
+			slice_non_intra_DCT (decoder, 0,
+					     dest_y + DCT_offset + 8,
+					     DCT_stride);
+
+		    DCT_stride >>= 1;
+		    DCT_offset = (DCT_offset + offset) >> 1;
+		    if (coded_block_pattern & 16)
+			slice_non_intra_DCT (decoder, 1,
+					     decoder->dest[1] + (offset >> 1),
+					     DCT_stride);
+		    if (coded_block_pattern & 32)
+			slice_non_intra_DCT (decoder, 2,
+					     decoder->dest[2] + (offset >> 1),
+					     DCT_stride);
+		    if (coded_block_pattern & (2 << 30))
+			slice_non_intra_DCT (decoder, 1,
+					     decoder->dest[1] + DCT_offset,
+					     DCT_stride);
+		    if (coded_block_pattern & (1 << 30))
+			slice_non_intra_DCT (decoder, 2,
+					     decoder->dest[2] + DCT_offset,
+					     DCT_stride);
+		} else {
+		    int offset;
+		    uint8_t * dest_y, * dest_u, * dest_v;
+
+		    coded_block_pattern |= bit_buf & (63 << 26);
+		    DUMPBITS (bit_buf, bits, 6);
+
+		    offset = decoder->offset;
+		    dest_y = decoder->dest[0] + offset;
+		    dest_u = decoder->dest[1] + offset;
+		    dest_v = decoder->dest[2] + offset;
+
+		    if (coded_block_pattern & 1)
+			slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride);
+		    if (coded_block_pattern & 2)
+			slice_non_intra_DCT (decoder, 0, dest_y + 8,
+					     DCT_stride);
+		    if (coded_block_pattern & 4)
+			slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset,
+					     DCT_stride);
+		    if (coded_block_pattern & 8)
+			slice_non_intra_DCT (decoder, 0,
+					     dest_y + DCT_offset + 8,
+					     DCT_stride);
+
+		    if (coded_block_pattern & 16)
+			slice_non_intra_DCT (decoder, 1, dest_u, DCT_stride);
+		    if (coded_block_pattern & 32)
+			slice_non_intra_DCT (decoder, 2, dest_v, DCT_stride);
+		    if (coded_block_pattern & (32 << 26))
+			slice_non_intra_DCT (decoder, 1, dest_u + DCT_offset,
+					     DCT_stride);
+		    if (coded_block_pattern & (16 << 26))
+			slice_non_intra_DCT (decoder, 2, dest_v + DCT_offset,
+					     DCT_stride);
+		    if (coded_block_pattern & (8 << 26))
+			slice_non_intra_DCT (decoder, 1, dest_u + 8,
+					     DCT_stride);
+		    if (coded_block_pattern & (4 << 26))
+			slice_non_intra_DCT (decoder, 2, dest_v + 8,
+					     DCT_stride);
+		    if (coded_block_pattern & (2 << 26))
+			slice_non_intra_DCT (decoder, 1,
+					     dest_u + DCT_offset + 8,
+					     DCT_stride);
+		    if (coded_block_pattern & (1 << 26))
+			slice_non_intra_DCT (decoder, 2,
+					     dest_v + DCT_offset + 8,
+					     DCT_stride);
+		}
+	    }
+
+	    decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
+		decoder->dc_dct_pred[2] = 16384;
+	}
+
+	NEXT_MACROBLOCK;
+
+	NEEDBITS (bit_buf, bits, bit_ptr);
+	mba_inc = 0;
+	while (1) {
+	    if (bit_buf >= 0x10000000) {
+		mba = MBA_5 + (UBITS (bit_buf, 5) - 2);
+		break;
+	    } else if (bit_buf >= 0x03000000) {
+		mba = MBA_11 + (UBITS (bit_buf, 11) - 24);
+		break;
+	    } else switch (UBITS (bit_buf, 11)) {
+	    case 8:		/* macroblock_escape */
+		mba_inc += 33;
+		/* pass through */
+	    case 15:	/* macroblock_stuffing (MPEG1 only) */
+		DUMPBITS (bit_buf, bits, 11);
+		NEEDBITS (bit_buf, bits, bit_ptr);
+		continue;
+	    default:	/* end of slice, or error */
+		if (mpeg2_cpu_state_restore)
+		    mpeg2_cpu_state_restore (&cpu_state);
+		return;
+	    }
+	}
+	DUMPBITS (bit_buf, bits, mba->len);
+	mba_inc += mba->mba;
+
+	if (mba_inc) {
+	    decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
+		decoder->dc_dct_pred[2] = 16384;
+
+	    if (decoder->coding_type == P_TYPE) {
+		do {
+		    MOTION_CALL (decoder->motion_parser[0],
+				 MACROBLOCK_MOTION_FORWARD);
+		    NEXT_MACROBLOCK;
+		} while (--mba_inc);
+	    } else {
+		do {
+		    MOTION_CALL (decoder->motion_parser[4], macroblock_modes);
+		    NEXT_MACROBLOCK;
+		} while (--mba_inc);
+	    }
+	}
+    }
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
diff --git a/src/video_dec/libmpeg2new/libmpeg2/uyvy.c b/src/video_dec/libmpeg2new/libmpeg2/uyvy.c
new file mode 100644
index 000000000..7f107ffad
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/uyvy.c
@@ -0,0 +1,123 @@
+/*
+ * uyvy.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 2003      Regis Duchesne <hpreg@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config.h"
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2convert.h"
+
+typedef struct {
+    int width;
+    int stride;
+    int chroma420;
+    uint8_t * out;
+} convert_uyvy_t;
+
+static void uyvy_start (void * _id, const mpeg2_fbuf_t * fbuf,
+			const mpeg2_picture_t * picture,
+			const mpeg2_gop_t * gop)
+{
+    convert_uyvy_t * instance = (convert_uyvy_t *) _id;
+
+    instance->out = fbuf->buf[0];
+    instance->stride = instance->width;
+    if (picture->nb_fields == 1) {
+	if (! (picture->flags & PIC_FLAG_TOP_FIELD_FIRST))
+	    instance->out += 2 * instance->stride;
+	instance->stride <<= 1;
+    }
+}
+
+#ifdef WORDS_BIGENDIAN
+#define PACK(a,b,c,d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d))
+#else
+#define PACK(a,b,c,d) (((d) << 24) | ((c) << 16) | ((b) << 8) | (a))
+#endif
+
+static void uyvy_copy (void * const _id, uint8_t * const * src,
+		       const unsigned int v_offset)
+{
+    const convert_uyvy_t * const id = (convert_uyvy_t *) _id;
+    uint8_t * _dst;
+    uint8_t * py, * pu, * pv;
+    int i, j;
+
+    _dst = id->out + 2 * id->stride * v_offset;
+    py = src[0]; pu = src[1]; pv = src[2];
+
+    i = 16;
+    do {
+	uint32_t * dst = (uint32_t *) _dst;
+
+	j = id->width >> 4;
+	do {
+	    dst[0] = PACK (pu[0],  py[0], pv[0],  py[1]);
+	    dst[1] = PACK (pu[1],  py[2], pv[1],  py[3]);
+	    dst[2] = PACK (pu[2],  py[4], pv[2],  py[5]);
+	    dst[3] = PACK (pu[3],  py[6], pv[3],  py[7]);
+	    dst[4] = PACK (pu[4],  py[8], pv[4],  py[9]);
+	    dst[5] = PACK (pu[5], py[10], pv[5], py[11]);
+	    dst[6] = PACK (pu[6], py[12], pv[6], py[13]);
+	    dst[7] = PACK (pu[7], py[14], pv[7], py[15]);
+	    py += 16;
+	    pu += 8;
+	    pv += 8;
+	    dst += 8;
+	} while (--j);
+	py -= id->width;
+	pu -= id->width >> 1;
+	pv -= id->width >> 1;
+	_dst += 2 * id->stride;
+	py += id->stride;
+	if (! (--i & id->chroma420)) {
+	    pu += id->stride >> 1;
+	    pv += id->stride >> 1;
+	}
+    } while (i);
+}
+
+int mpeg2convert_uyvy (int stage, void * _id, const mpeg2_sequence_t * seq,
+		       int stride, uint32_t accel, void * arg,
+		       mpeg2_convert_init_t * result)
+{
+    convert_uyvy_t * instance = (convert_uyvy_t *) _id;
+
+    if (seq->chroma_width == seq->width)
+	return 1;
+
+    if (instance) {
+	instance->width = seq->width;
+	instance->chroma420 = (seq->chroma_height < seq->height);
+	result->buf_size[0] = seq->width * seq->height * 2;
+	result->buf_size[1] = result->buf_size[2] = 0;
+	result->start = uyvy_start;
+	result->copy = uyvy_copy;
+    } else {
+	result->id_size = sizeof (convert_uyvy_t);
+    }
+
+    return 0;
+}
diff --git a/src/video_dec/libmpeg2new/libmpeg2/vlc.h b/src/video_dec/libmpeg2new/libmpeg2/vlc.h
new file mode 100644
index 000000000..57448ce04
--- /dev/null
+++ b/src/video_dec/libmpeg2new/libmpeg2/vlc.h
@@ -0,0 +1,429 @@
+/*
+ * vlc.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#define GETWORD(bit_buf,shift,bit_ptr)				\
+do {								\
+    bit_buf |= ((bit_ptr[0] << 8) | bit_ptr[1]) << (shift);	\
+    bit_ptr += 2;						\
+} while (0)
+
+static inline void bitstream_init (mpeg2_decoder_t * decoder,
+				   const uint8_t * start)
+{
+    decoder->bitstream_buf =
+	(start[0] << 24) | (start[1] << 16) | (start[2] << 8) | start[3];
+    decoder->bitstream_ptr = start + 4;
+    decoder->bitstream_bits = -16;
+}
+
+/* make sure that there are at least 16 valid bits in bit_buf */
+#define NEEDBITS(bit_buf,bits,bit_ptr)		\
+do {						\
+    if (unlikely (bits > 0)) {			\
+	GETWORD (bit_buf, bits, bit_ptr);	\
+	bits -= 16;				\
+    }						\
+} while (0)
+
+/* remove num valid bits from bit_buf */
+#define DUMPBITS(bit_buf,bits,num)	\
+do {					\
+    bit_buf <<= (num);			\
+    bits += (num);			\
+} while (0)
+
+/* take num bits from the high part of bit_buf and zero extend them */
+#define UBITS(bit_buf,num) (((uint32_t)(bit_buf)) >> (32 - (num)))
+
+/* take num bits from the high part of bit_buf and sign extend them */
+#define SBITS(bit_buf,num) (((int32_t)(bit_buf)) >> (32 - (num)))
+
+typedef struct {
+    uint8_t modes;
+    uint8_t len;
+} MBtab;
+
+typedef struct {
+    uint8_t delta;
+    uint8_t len;
+} MVtab;
+
+typedef struct {
+    int8_t dmv;
+    uint8_t len;
+} DMVtab;
+
+typedef struct {
+    uint8_t cbp;
+    uint8_t len;
+} CBPtab;
+
+typedef struct {
+    uint8_t size;
+    uint8_t len;
+} DCtab;
+
+typedef struct {
+    uint8_t run;
+    uint8_t level;
+    uint8_t len;
+} DCTtab;
+
+typedef struct {
+    uint8_t mba;
+    uint8_t len;
+} MBAtab;
+
+
+#define INTRA MACROBLOCK_INTRA
+#define QUANT MACROBLOCK_QUANT
+
+static const MBtab MB_I [] = {
+    {INTRA|QUANT, 2}, {INTRA, 1}
+};
+
+#define MC MACROBLOCK_MOTION_FORWARD
+#define CODED MACROBLOCK_PATTERN
+
+static const MBtab MB_P [] = {
+    {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA,    5},
+    {MC,          3}, {MC,          3}, {MC,             3}, {MC,       3},
+    {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
+    {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
+    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
+    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
+    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
+    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1}
+};
+
+#define FWD MACROBLOCK_MOTION_FORWARD
+#define BWD MACROBLOCK_MOTION_BACKWARD
+#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD
+
+static const MBtab MB_B [] = {
+    {0,                 6}, {INTRA|QUANT,       6},
+    {BWD|CODED|QUANT,   6}, {FWD|CODED|QUANT,   6},
+    {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5},
+					{INTRA,       5}, {INTRA,       5},
+    {FWD,         4}, {FWD,         4}, {FWD,         4}, {FWD,         4},
+    {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4},
+    {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
+    {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
+    {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
+    {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
+    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
+    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
+    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
+    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}
+};
+
+#undef INTRA
+#undef QUANT
+#undef MC
+#undef CODED
+#undef FWD
+#undef BWD
+#undef INTER
+
+
+static const MVtab MV_4 [] = {
+    { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}
+};
+
+static const MVtab MV_10 [] = {
+    { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10},
+    { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10},
+    {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9},
+    { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7},
+    { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7},
+    { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}
+};
+
+
+static const DMVtab DMV_2 [] = {
+    { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2}
+};
+
+
+static const CBPtab CBP_7 [] = {
+    {0x11, 7}, {0x12, 7}, {0x14, 7}, {0x18, 7},
+    {0x21, 7}, {0x22, 7}, {0x24, 7}, {0x28, 7},
+    {0x3f, 6}, {0x3f, 6}, {0x30, 6}, {0x30, 6},
+    {0x09, 6}, {0x09, 6}, {0x06, 6}, {0x06, 6},
+    {0x1f, 5}, {0x1f, 5}, {0x1f, 5}, {0x1f, 5},
+    {0x10, 5}, {0x10, 5}, {0x10, 5}, {0x10, 5},
+    {0x2f, 5}, {0x2f, 5}, {0x2f, 5}, {0x2f, 5},
+    {0x20, 5}, {0x20, 5}, {0x20, 5}, {0x20, 5},
+    {0x07, 5}, {0x07, 5}, {0x07, 5}, {0x07, 5},
+    {0x0b, 5}, {0x0b, 5}, {0x0b, 5}, {0x0b, 5},
+    {0x0d, 5}, {0x0d, 5}, {0x0d, 5}, {0x0d, 5},
+    {0x0e, 5}, {0x0e, 5}, {0x0e, 5}, {0x0e, 5},
+    {0x05, 5}, {0x05, 5}, {0x05, 5}, {0x05, 5},
+    {0x0a, 5}, {0x0a, 5}, {0x0a, 5}, {0x0a, 5},
+    {0x03, 5}, {0x03, 5}, {0x03, 5}, {0x03, 5},
+    {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5},
+    {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4},
+    {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4},
+    {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4},
+    {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4},
+    {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
+    {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
+    {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
+    {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
+    {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3},
+    {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3},
+    {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3},
+    {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}
+};
+
+static const CBPtab CBP_9 [] = {
+    {0,    9}, {0x00, 9}, {0x39, 9}, {0x36, 9},
+    {0x37, 9}, {0x3b, 9}, {0x3d, 9}, {0x3e, 9},
+    {0x17, 8}, {0x17, 8}, {0x1b, 8}, {0x1b, 8},
+    {0x1d, 8}, {0x1d, 8}, {0x1e, 8}, {0x1e, 8},
+    {0x27, 8}, {0x27, 8}, {0x2b, 8}, {0x2b, 8},
+    {0x2d, 8}, {0x2d, 8}, {0x2e, 8}, {0x2e, 8},
+    {0x19, 8}, {0x19, 8}, {0x16, 8}, {0x16, 8},
+    {0x29, 8}, {0x29, 8}, {0x26, 8}, {0x26, 8},
+    {0x35, 8}, {0x35, 8}, {0x3a, 8}, {0x3a, 8},
+    {0x33, 8}, {0x33, 8}, {0x3c, 8}, {0x3c, 8},
+    {0x15, 8}, {0x15, 8}, {0x1a, 8}, {0x1a, 8},
+    {0x13, 8}, {0x13, 8}, {0x1c, 8}, {0x1c, 8},
+    {0x25, 8}, {0x25, 8}, {0x2a, 8}, {0x2a, 8},
+    {0x23, 8}, {0x23, 8}, {0x2c, 8}, {0x2c, 8},
+    {0x31, 8}, {0x31, 8}, {0x32, 8}, {0x32, 8},
+    {0x34, 8}, {0x34, 8}, {0x38, 8}, {0x38, 8}
+};
+
+
+static const DCtab DC_lum_5 [] = {
+    {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+    {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+    {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
+    {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}
+};
+
+static const DCtab DC_chrom_5 [] = {
+    {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
+    {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+    {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+    {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}
+};
+
+static const DCtab DC_long [] = {
+    {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
+    {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
+    {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6},
+    {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9}
+};
+
+
+static const DCTtab DCT_16 [] = {
+    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+    {  2,18, 0}, {  2,17, 0}, {  2,16, 0}, {  2,15, 0},
+    {  7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0},
+    { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0},
+    { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0}
+};
+
+static const DCTtab DCT_15 [] = {
+    {  1,40,15}, {  1,39,15}, {  1,38,15}, {  1,37,15},
+    {  1,36,15}, {  1,35,15}, {  1,34,15}, {  1,33,15},
+    {  1,32,15}, {  2,14,15}, {  2,13,15}, {  2,12,15},
+    {  2,11,15}, {  2,10,15}, {  2, 9,15}, {  2, 8,15},
+    {  1,31,14}, {  1,31,14}, {  1,30,14}, {  1,30,14},
+    {  1,29,14}, {  1,29,14}, {  1,28,14}, {  1,28,14},
+    {  1,27,14}, {  1,27,14}, {  1,26,14}, {  1,26,14},
+    {  1,25,14}, {  1,25,14}, {  1,24,14}, {  1,24,14},
+    {  1,23,14}, {  1,23,14}, {  1,22,14}, {  1,22,14},
+    {  1,21,14}, {  1,21,14}, {  1,20,14}, {  1,20,14},
+    {  1,19,14}, {  1,19,14}, {  1,18,14}, {  1,18,14},
+    {  1,17,14}, {  1,17,14}, {  1,16,14}, {  1,16,14}
+};
+
+static const DCTtab DCT_13 [] = {
+    { 11, 2,13}, { 10, 2,13}, {  6, 3,13}, {  4, 4,13},
+    {  3, 5,13}, {  2, 7,13}, {  2, 6,13}, {  1,15,13},
+    {  1,14,13}, {  1,13,13}, {  1,12,13}, { 27, 1,13},
+    { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13},
+    {  1,11,12}, {  1,11,12}, {  9, 2,12}, {  9, 2,12},
+    {  5, 3,12}, {  5, 3,12}, {  1,10,12}, {  1,10,12},
+    {  3, 4,12}, {  3, 4,12}, {  8, 2,12}, {  8, 2,12},
+    { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12},
+    {  1, 9,12}, {  1, 9,12}, { 20, 1,12}, { 20, 1,12},
+    { 19, 1,12}, { 19, 1,12}, {  2, 5,12}, {  2, 5,12},
+    {  4, 3,12}, {  4, 3,12}, {  1, 8,12}, {  1, 8,12},
+    {  7, 2,12}, {  7, 2,12}, { 18, 1,12}, { 18, 1,12}
+};
+
+static const DCTtab DCT_B14_10 [] = {
+    { 17, 1,10}, {  6, 2,10}, {  1, 7,10}, {  3, 3,10},
+    {  2, 4,10}, { 16, 1,10}, { 15, 1,10}, {  5, 2,10}
+};
+
+static const DCTtab DCT_B14_8 [] = {
+    { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 65, 0,12},
+    {  3, 2, 7}, {  3, 2, 7}, { 10, 1, 7}, { 10, 1, 7},
+    {  1, 4, 7}, {  1, 4, 7}, {  9, 1, 7}, {  9, 1, 7},
+    {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6},
+    {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6},
+    {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6},
+    {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
+    { 14, 1, 8}, {  1, 6, 8}, { 13, 1, 8}, { 12, 1, 8},
+    {  4, 2, 8}, {  2, 3, 8}, {  1, 5, 8}, { 11, 1, 8}
+};
+
+static const DCTtab DCT_B14AC_5 [] = {
+		 {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
+    {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
+    {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}
+};
+
+static const DCTtab DCT_B14DC_5 [] = {
+		 {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
+    {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
+    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
+    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
+    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}
+};
+
+static const DCTtab DCT_B15_10 [] = {
+    {  6, 2, 9}, {  6, 2, 9}, { 15, 1, 9}, { 15, 1, 9},
+    {  3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9}
+};
+
+static const DCTtab DCT_B15_8 [] = {
+    { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 65, 0,12},
+    {  8, 1, 7}, {  8, 1, 7}, {  9, 1, 7}, {  9, 1, 7},
+    {  7, 1, 7}, {  7, 1, 7}, {  3, 2, 7}, {  3, 2, 7},
+    {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6},
+    {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6},
+    {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6},
+    {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
+    {  2, 5, 8}, { 12, 1, 8}, {  1,11, 8}, {  1,10, 8},
+    { 14, 1, 8}, { 13, 1, 8}, {  4, 2, 8}, {  2, 4, 8},
+    {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
+    {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
+    {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
+    {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
+    {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
+    {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+    {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
+    {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
+    {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
+    {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
+    { 10, 1, 7}, { 10, 1, 7}, {  2, 3, 7}, {  2, 3, 7},
+    { 11, 1, 7}, { 11, 1, 7}, {  1, 8, 7}, {  1, 8, 7},
+    {  1, 9, 7}, {  1, 9, 7}, {  1,12, 8}, {  1,13, 8},
+    {  3, 3, 8}, {  5, 2, 8}, {  1,14, 8}, {  1,15, 8}
+};
+
+
+static const MBAtab MBA_5 [] = {
+		    {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4},
+    {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3},
+    {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1},
+    {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}
+};
+
+static const MBAtab MBA_11 [] = {
+    {32, 11}, {31, 11}, {30, 11}, {29, 11},
+    {28, 11}, {27, 11}, {26, 11}, {25, 11},
+    {24, 11}, {23, 11}, {22, 11}, {21, 11},
+    {20, 10}, {20, 10}, {19, 10}, {19, 10},
+    {18, 10}, {18, 10}, {17, 10}, {17, 10},
+    {16, 10}, {16, 10}, {15, 10}, {15, 10},
+    {14,  8}, {14,  8}, {14,  8}, {14,  8},
+    {14,  8}, {14,  8}, {14,  8}, {14,  8},
+    {13,  8}, {13,  8}, {13,  8}, {13,  8},
+    {13,  8}, {13,  8}, {13,  8}, {13,  8},
+    {12,  8}, {12,  8}, {12,  8}, {12,  8},
+    {12,  8}, {12,  8}, {12,  8}, {12,  8},
+    {11,  8}, {11,  8}, {11,  8}, {11,  8},
+    {11,  8}, {11,  8}, {11,  8}, {11,  8},
+    {10,  8}, {10,  8}, {10,  8}, {10,  8},
+    {10,  8}, {10,  8}, {10,  8}, {10,  8},
+    { 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
+    { 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
+    { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+    { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+    { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+    { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+    { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
+    { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
+    { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
+    { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7}
+};
diff --git a/src/video_dec/libmpeg2new/xine_mpeg2new_decoder.c b/src/video_dec/libmpeg2new/xine_mpeg2new_decoder.c
new file mode 100644
index 000000000..7494791b1
--- /dev/null
+++ b/src/video_dec/libmpeg2new/xine_mpeg2new_decoder.c
@@ -0,0 +1,504 @@
+/* 
+ * Copyright (C) 2000-2004 the xine project
+ * 
+ * This file is part of xine, a free video player.
+ * 
+ * xine is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * xine is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ *
+ * stuff needed to turn libmpeg2 into a xine decoder plugin
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <assert.h>
+
+#include "./include/mpeg2.h"
+#include <xine/xine_internal.h>
+#include <xine/video_out.h>
+#include <xine/buffer.h>
+
+
+
+#define LOG
+#define LOG_FRAME_ALLOC_FREE
+#define LOG_ENTRY
+#define LOG_FRAME_COUNTER
+
+
+typedef struct {
+  video_decoder_class_t   decoder_class;
+} mpeg2_class_t;
+
+typedef struct {
+  uint32_t id;
+  vo_frame_t * img;
+} img_state_t;
+
+typedef struct mpeg2_video_decoder_s {
+  video_decoder_t  video_decoder;
+  mpeg2dec_t      *mpeg2dec;
+  mpeg2_class_t   *class;
+  xine_stream_t   *stream;
+  int32_t         force_aspect;
+  int             force_pan_scan;
+  double          ratio;
+  img_state_t     img_state[30];
+  uint32_t	  frame_number;
+  uint32_t        rff_pattern;
+  
+} mpeg2_video_decoder_t;
+
+
+static void mpeg2_video_print_bad_state(img_state_t * img_state) {
+  int32_t n,m;
+  m=0;
+  for(n=0;n<30;n++) {
+    if (img_state[n].id>0) {
+      printf("%d = %u\n",n, img_state[n].id);
+      m++;
+    }
+  }
+  if (m > 3) _x_abort();
+  if (m == 0) printf("NO FRAMES\n");
+} 
+
+static void mpeg2_video_free_all(img_state_t * img_state) {
+  int32_t n,m;
+  vo_frame_t * img;
+  printf("libmpeg2new:free_all\n");
+  for(n=0;n<30;n++) {
+    if (img_state[n].id>0) {
+      img = img_state[n].img;
+      img->free(img);
+      img_state[n].id = 0;
+    }
+  }
+} 
+
+
+static void mpeg2_video_print_fbuf(const mpeg2_fbuf_t * fbuf) {
+  printf("%p",fbuf);
+  vo_frame_t * img;
+  if (fbuf) {
+    img = (vo_frame_t *) fbuf->id;
+    if (img) {
+      printf (", img=%p, (id=%d)\n",
+             img, img->id);
+    } else {
+      printf (", img=NULL\n");
+    }
+  } else {
+    printf ("\n");
+  }
+}
+
+static void mpeg2_video_decode_data (video_decoder_t *this_gen, buf_element_t *buf_element) {
+  mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen;
+  uint8_t * current = buf_element->content;
+  uint8_t * end = buf_element->content + buf_element->size;
+  const mpeg2_info_t * info;
+  mpeg2_state_t state;
+  vo_frame_t * img;
+  uint32_t picture_structure;
+  int32_t frame_skipping;
+
+  /* handle aspect hints from xine-dvdnav */
+  if (buf_element->decoder_flags & BUF_FLAG_SPECIAL) {
+    if (buf_element->decoder_info[1] == BUF_SPECIAL_ASPECT) {
+      this->force_aspect = buf_element->decoder_info[2];
+      if (buf_element->decoder_info[3] == 0x1 && buf_element->decoder_info[2] == 3)
+	/* letterboxing is denied, we have to do pan&scan */
+	this->force_pan_scan = 1;
+      else
+	this->force_pan_scan = 0;
+    }
+    
+    return;
+  }
+
+  if (buf_element->decoder_flags != 0) return;
+
+#ifdef LOG_ENTRY
+  printf ("libmpeg2: decode_data: enter\n");
+#endif
+
+  mpeg2_buffer (this->mpeg2dec, current, end);
+
+  info = mpeg2_info (this->mpeg2dec);
+  
+  while ((state = mpeg2_parse (this->mpeg2dec)) != STATE_BUFFER) {
+    switch (state) {
+      case STATE_SEQUENCE:
+        /* might set nb fbuf, convert format, stride */
+        /* might set fbufs */
+        _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_BITRATE,   info->sequence->byte_rate * 8);
+        _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_WIDTH,     info->sequence->picture_width);
+        _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_HEIGHT,    info->sequence->picture_height);
+        _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION,  info->sequence->frame_period / 300);
+        if (this->force_aspect) info->sequence->pixel_width = this->force_aspect;
+        switch (info->sequence->pixel_width) {
+	case 3:
+	  this->ratio = 16.0 / 9.0;
+	  break;
+	case 4:
+	  this->ratio = 2.11;
+	  break;
+	case 2:
+	  this->ratio = 4.0 / 3.0;
+	  break;
+	case 1:
+	default:
+	  this->ratio = (double)info->sequence->picture_width/(double)info->sequence->picture_height;
+	  break;
+        }
+        _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_RATIO, (int)(10000*this->ratio));
+
+        if (info->sequence->flags & SEQ_FLAG_MPEG2) {
+          _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "MPEG 2 (libmpeg2new)");
+        } else {
+          _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "MPEG 1 (libmpeg2new)");
+        }
+
+        break;
+      case STATE_PICTURE:
+        /* might skip */
+        /* might set fbuf */
+        if (info->current_picture->nb_fields == 1) {
+          picture_structure = info->current_picture->flags & PIC_FLAG_TOP_FIELD_FIRST ? VO_TOP_FIELD : VO_BOTTOM_FIELD;
+        } else {
+          picture_structure = VO_BOTH_FIELDS;
+        }
+          
+        img = this->stream->video_out->get_frame (this->stream->video_out,
+                                              info->sequence->picture_width,
+                                              info->sequence->picture_height,
+                                              this->ratio,
+                                              XINE_IMGFMT_YV12,
+                                              picture_structure);
+        this->frame_number++;
+#ifdef LOG_FRAME_COUNTER
+        printf("libmpeg2:frame_number=%d\n",this->frame_number);
+#endif
+        img->top_field_first = info->current_picture->flags & PIC_FLAG_TOP_FIELD_FIRST ? 1 : 0;
+        img->repeat_first_field = (info->current_picture->nb_fields > 2) ? 1 : 0;
+        img->duration=info->sequence->frame_period / 300;
+        if( ((this->rff_pattern & 0xff) == 0xaa ||
+             (this->rff_pattern & 0xff) == 0x55) ) {
+          /* special case for ntsc 3:2 pulldown */
+            img->duration += img->duration/4;
+        } else {
+          if( img->repeat_first_field ) {
+            img->duration = (img->duration * info->current_picture->nb_fields) / 2; 
+          }
+        }
+
+        if ((info->current_picture->flags & 7) == 1) {
+          img->pts=buf_element->pts; /* If an I frame, use PTS */
+        } else {
+          img->pts=0;
+        }
+
+ 
+#ifdef LOG_FRAME_ALLOC_FREE
+        printf ("libmpeg2:decode_data:get_frame xine=%p (id=%d)\n", img,img->id);
+#endif
+        if (this->img_state[img->id].id != 0) {
+          printf ("libmpeg2:decode_data:get_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id].id);
+          _x_abort();
+        }
+
+        this->img_state[img->id].id = 1;
+        this->img_state[img->id].img = img;
+
+        mpeg2_set_buf (this->mpeg2dec, img->base, img);
+        break;
+      case STATE_SLICE:
+      case STATE_END:
+#if 0
+    printf("libmpeg2:decode_data:current_fbuf=");
+    mpeg2_video_print_fbuf(info->current_fbuf);
+    printf("libmpeg2:decode_data:display_fbuf=");
+    mpeg2_video_print_fbuf(info->display_fbuf);
+    printf("libmpeg2:decode_data:discard_fbuf=");
+    mpeg2_video_print_fbuf(info->discard_fbuf);
+#endif
+        /* draw current picture */
+        /* might free frame buffer */
+        if (info->display_fbuf && info->display_fbuf->id) {
+          img = (vo_frame_t *) info->display_fbuf->id;
+          /* this should be used to detect any special rff pattern */
+          this->rff_pattern = this->rff_pattern << 1;
+          this->rff_pattern |= img->repeat_first_field;
+
+#ifdef LOG_FRAME_ALLOC_FREE
+          printf ("libmpeg2:decode_data:draw_frame xine=%p, fbuf=%p, id=%d \n", img, info->display_fbuf, img->id);
+#endif
+          if (this->img_state[img->id].id != 1) {
+            printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id].id);
+            _x_abort();
+          }
+          if (this->img_state[img->id].id == 1) {
+            frame_skipping = img->draw (img, this->stream);
+            /* FIXME: Handle skipping */
+            this->img_state[img->id].id = 2;
+          }
+
+        }
+        if (info->discard_fbuf && !info->discard_fbuf->id) {
+          printf ("libmpeg2:decode_data:BAD free_frame discard: xine=%p, fbuf=%p\n", info->discard_fbuf->id, info->discard_fbuf);
+          //_x_abort();
+        }
+        if (info->discard_fbuf && info->discard_fbuf->id) {
+          img = (vo_frame_t *) info->discard_fbuf->id;
+#ifdef LOG_FRAME_ALLOC_FREE
+          printf ("libmpeg2:decode_data:free_frame xine=%p, fbuf=%p,id=%d\n", img, info->discard_fbuf, img->id);
+#endif
+          if (this->img_state[img->id].id != 2) {
+            printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id].id);
+            _x_abort();
+          }
+          if (this->img_state[img->id].id == 2) {
+            img->free(img);
+            this->img_state[img->id].id = 0;
+          }
+        }
+#ifdef LOG_FRAME_ALLOC_FREE
+        mpeg2_video_print_bad_state(this->img_state);
+#endif
+        break;
+      case STATE_GOP:
+        break;
+      default:
+	printf("libmpeg2new: STATE unknown %d\n",state);
+        break;
+   }
+
+ }
+#ifdef LOG_ENTRY
+  printf ("libmpeg2: decode_data: exit\n");
+#endif
+
+}
+
+static void mpeg2_video_flush (video_decoder_t *this_gen) {
+  mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen;
+
+#ifdef LOG_ENTRY
+  printf ("libmpeg2: flush\n");
+#endif
+
+/*  mpeg2_flush (&this->mpeg2); */
+}
+
+static void mpeg2_video_reset (video_decoder_t *this_gen) {
+  mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen;
+  int32_t state;
+  const mpeg2_info_t * info;
+  vo_frame_t * img;
+  int32_t frame_skipping;
+
+#ifdef LOG_ENTRY
+  printf ("libmpeg2: reset\n");
+#endif
+  mpeg2_reset (this->mpeg2dec, 1); /* 1 for full reset */
+  mpeg2_video_free_all(this->img_state);
+
+
+#if 0  /* This bit of code does not work yet. */
+  info = mpeg2_info (this->mpeg2dec);
+  state = mpeg2_reset (this->mpeg2dec);
+  printf("reset state1:%d\n",state);
+  if (info->display_fbuf && info->display_fbuf->id) {
+    img = (vo_frame_t *) info->display_fbuf->id;
+
+    if (this->img_state[img->id] != 1) {
+      printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]);
+      _x_abort();
+    }
+    if (this->img_state[img->id] == 1) {
+      frame_skipping = img->draw (img, this->stream);
+      /* FIXME: Handle skipping */
+      this->img_state[img->id] = 2;
+    }
+  }
+
+  if (info->discard_fbuf && !info->discard_fbuf->id) {
+    printf ("libmpeg2:decode_data:BAD free_frame discard_fbuf=%p\n", info->discard_fbuf);
+    _x_abort();
+  }
+  if (info->discard_fbuf && info->discard_fbuf->id) {
+    img = (vo_frame_t *) info->discard_fbuf->id;
+    if (this->img_state[img->id] != 2) {
+      printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]);
+      _x_abort();
+    }
+    if (this->img_state[img->id] == 2) {
+      img->free(img);
+      this->img_state[img->id] = 0;
+    }
+  }
+  state = mpeg2_parse (this->mpeg2dec);
+  printf("reset state2:%d\n",state);
+  if (info->display_fbuf && info->display_fbuf->id) {
+    img = (vo_frame_t *) info->display_fbuf->id;
+
+    if (this->img_state[img->id] != 1) {
+      printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]);
+      _x_abort();
+    }
+    if (this->img_state[img->id] == 1) {
+      frame_skipping = img->draw (img, this->stream);
+      /* FIXME: Handle skipping */
+      this->img_state[img->id] = 2;
+    }
+  }
+
+  if (info->discard_fbuf && !info->discard_fbuf->id) {
+    printf ("libmpeg2:decode_data:BAD free_frame discard_fbuf=%p\n", info->discard_fbuf);
+    _x_abort();
+  }
+  if (info->discard_fbuf && info->discard_fbuf->id) {
+    img = (vo_frame_t *) info->discard_fbuf->id;
+    if (this->img_state[img->id] != 2) {
+      printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]);
+      _x_abort();
+    }
+    if (this->img_state[img->id] == 2) {
+      img->free(img);
+      this->img_state[img->id] = 0;
+    }
+  }
+  state = mpeg2_parse (this->mpeg2dec);
+  printf("reset state3:%d\n",state);
+  if (info->display_fbuf && info->display_fbuf->id) {
+    img = (vo_frame_t *) info->display_fbuf->id;
+
+    if (this->img_state[img->id] != 1) {
+      printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]);
+      _x_abort();
+    }
+    if (this->img_state[img->id] == 1) {
+      frame_skipping = img->draw (img, this->stream);
+      /* FIXME: Handle skipping */
+      this->img_state[img->id] = 2;
+    }
+  }
+
+  if (info->discard_fbuf && !info->discard_fbuf->id) {
+    printf ("libmpeg2:decode_data:BAD free_frame discard_fbuf=%p\n", info->discard_fbuf);
+    _x_abort();
+  }
+  if (info->discard_fbuf && info->discard_fbuf->id) {
+    img = (vo_frame_t *) info->discard_fbuf->id;
+    if (this->img_state[img->id] != 2) {
+      printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]);
+      _x_abort();
+    }
+    if (this->img_state[img->id] == 2) {
+      img->free(img);
+      this->img_state[img->id] = 0;
+    }
+  }
+#endif
+
+}
+
+static void mpeg2_video_discontinuity (video_decoder_t *this_gen) {
+  mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen;
+
+#ifdef LOG_ENTRY
+  printf ("libmpeg2: dicontinuity\n");
+#endif
+/*  mpeg2_discontinuity (&this->mpeg2dec); */
+}
+
+static void mpeg2_video_dispose (video_decoder_t *this_gen) {
+
+  mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen;
+
+#ifdef LOG_ENTRY
+  printf ("libmpeg2: close\n");
+#endif
+
+  mpeg2_close (this->mpeg2dec);
+
+  this->stream->video_out->close(this->stream->video_out, this->stream);
+
+  free (this);
+}
+
+static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) {
+  mpeg2_video_decoder_t *this ;
+  int32_t n;
+
+  this = (mpeg2_video_decoder_t *) calloc(1, sizeof(mpeg2_video_decoder_t));
+
+  this->video_decoder.decode_data         = mpeg2_video_decode_data;
+  this->video_decoder.flush               = mpeg2_video_flush;
+  this->video_decoder.reset               = mpeg2_video_reset;
+  this->video_decoder.discontinuity       = mpeg2_video_discontinuity;
+  this->video_decoder.dispose             = mpeg2_video_dispose;
+  this->stream                            = stream;
+  this->class                             = (mpeg2_class_t *) class_gen;
+  this->frame_number=0;
+  this->rff_pattern=0;
+
+  this->mpeg2dec = mpeg2_init ();
+  mpeg2_custom_fbuf (this->mpeg2dec, 1);  /* <- Force libmpeg2 to use xine frame buffers. */
+  (stream->video_out->open) (stream->video_out, stream);
+  this->force_aspect = this->force_pan_scan = 0;
+  for(n=0;n<30;n++) this->img_state[n].id=0;
+
+  return &this->video_decoder;
+}
+
+/*
+ * mpeg2 plugin class
+ */
+static void *init_plugin (xine_t *xine, void *data) {
+
+  mpeg2_class_t *this;
+
+  this = (mpeg2_class_t *) calloc(1, sizeof(mpeg2_class_t));
+
+  this->decoder_class.open_plugin     = open_plugin;
+  this->decoder_class.identifier      = "mpeg2new";
+  this->decoder_class.description     = N_("mpeg2 based video decoder plugin");
+  this->decoder_class.dispose         = default_video_decoder_class_dispose;
+
+  return this;
+}
+/*
+ * exported plugin catalog entry
+ */
+
+static uint32_t supported_types[] = { BUF_VIDEO_MPEG, 0 };
+
+static decoder_info_t dec_info_mpeg2 = {
+  supported_types,     /* supported types */
+  6                    /* priority        */
+};
+
+plugin_info_t xine_plugin_info[] = {
+  /* type, API, "name", version, special_info, init_function */  
+  { PLUGIN_VIDEO_DECODER, 19, "mpeg2new", XINE_VERSION_CODE, &dec_info_mpeg2, init_plugin },
+  { PLUGIN_NONE, 0, "", 0, NULL, NULL }
+};
-- 
cgit v1.2.3