From 5574ccbb47739ee876dcd49bf8d9f0cfc2528d2a Mon Sep 17 00:00:00 2001 From: Darren Salt Date: Mon, 12 Jan 2009 20:23:42 +0000 Subject: Move libmpeg2new into src/video_dec. --HG-- rename : src/libmpeg2new/Makefile.am => src/video_dec/libmpeg2new/Makefile.am rename : src/libmpeg2new/include/Makefile.am => src/video_dec/libmpeg2new/include/Makefile.am rename : src/libmpeg2new/include/alpha_asm.h => src/video_dec/libmpeg2new/include/alpha_asm.h rename : src/libmpeg2new/include/attributes.h => src/video_dec/libmpeg2new/include/attributes.h rename : src/libmpeg2new/include/mmx.h => src/video_dec/libmpeg2new/include/mmx.h rename : src/libmpeg2new/include/mpeg2.h => src/video_dec/libmpeg2new/include/mpeg2.h rename : src/libmpeg2new/include/mpeg2convert.h => src/video_dec/libmpeg2new/include/mpeg2convert.h rename : src/libmpeg2new/include/sse.h => src/video_dec/libmpeg2new/include/sse.h rename : src/libmpeg2new/include/tendra.h => src/video_dec/libmpeg2new/include/tendra.h rename : src/libmpeg2new/include/video_out.h => src/video_dec/libmpeg2new/include/video_out.h rename : src/libmpeg2new/include/vis.h => src/video_dec/libmpeg2new/include/vis.h rename : src/libmpeg2new/libmpeg2/Makefile.am => src/video_dec/libmpeg2new/libmpeg2/Makefile.am rename : src/libmpeg2new/libmpeg2/alloc.c => src/video_dec/libmpeg2new/libmpeg2/alloc.c rename : src/libmpeg2new/libmpeg2/configure.incl => src/video_dec/libmpeg2new/libmpeg2/configure.incl rename : src/libmpeg2new/libmpeg2/convert_internal.h => src/video_dec/libmpeg2new/libmpeg2/convert_internal.h rename : src/libmpeg2new/libmpeg2/cpu_accel.c => src/video_dec/libmpeg2new/libmpeg2/cpu_accel.c rename : src/libmpeg2new/libmpeg2/cpu_state.c => src/video_dec/libmpeg2new/libmpeg2/cpu_state.c rename : src/libmpeg2new/libmpeg2/decode.c => src/video_dec/libmpeg2new/libmpeg2/decode.c rename : src/libmpeg2new/libmpeg2/header.c => src/video_dec/libmpeg2new/libmpeg2/header.c rename : src/libmpeg2new/libmpeg2/idct.c => src/video_dec/libmpeg2new/libmpeg2/idct.c rename : src/libmpeg2new/libmpeg2/idct_alpha.c => src/video_dec/libmpeg2new/libmpeg2/idct_alpha.c rename : src/libmpeg2new/libmpeg2/idct_altivec.c => src/video_dec/libmpeg2new/libmpeg2/idct_altivec.c rename : src/libmpeg2new/libmpeg2/idct_mlib.c => src/video_dec/libmpeg2new/libmpeg2/idct_mlib.c rename : src/libmpeg2new/libmpeg2/idct_mmx.c => src/video_dec/libmpeg2new/libmpeg2/idct_mmx.c rename : src/libmpeg2new/libmpeg2/libmpeg2.pc.in => src/video_dec/libmpeg2new/libmpeg2/libmpeg2.pc.in rename : src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in => src/video_dec/libmpeg2new/libmpeg2/libmpeg2convert.pc.in rename : src/libmpeg2new/libmpeg2/motion_comp.c => src/video_dec/libmpeg2new/libmpeg2/motion_comp.c rename : src/libmpeg2new/libmpeg2/motion_comp_alpha.c => src/video_dec/libmpeg2new/libmpeg2/motion_comp_alpha.c rename : src/libmpeg2new/libmpeg2/motion_comp_altivec.c => src/video_dec/libmpeg2new/libmpeg2/motion_comp_altivec.c rename : src/libmpeg2new/libmpeg2/motion_comp_mlib.c => src/video_dec/libmpeg2new/libmpeg2/motion_comp_mlib.c rename : src/libmpeg2new/libmpeg2/motion_comp_mmx.c => src/video_dec/libmpeg2new/libmpeg2/motion_comp_mmx.c rename : src/libmpeg2new/libmpeg2/motion_comp_vis.c => src/video_dec/libmpeg2new/libmpeg2/motion_comp_vis.c rename : src/libmpeg2new/libmpeg2/mpeg2_internal.h => src/video_dec/libmpeg2new/libmpeg2/mpeg2_internal.h rename : src/libmpeg2new/libmpeg2/rgb.c => src/video_dec/libmpeg2new/libmpeg2/rgb.c rename : src/libmpeg2new/libmpeg2/rgb_mmx.c => src/video_dec/libmpeg2new/libmpeg2/rgb_mmx.c rename : src/libmpeg2new/libmpeg2/rgb_vis.c => src/video_dec/libmpeg2new/libmpeg2/rgb_vis.c rename : src/libmpeg2new/libmpeg2/slice.c => src/video_dec/libmpeg2new/libmpeg2/slice.c rename : src/libmpeg2new/libmpeg2/uyvy.c => src/video_dec/libmpeg2new/libmpeg2/uyvy.c rename : src/libmpeg2new/libmpeg2/vlc.h => src/video_dec/libmpeg2new/libmpeg2/vlc.h rename : src/libmpeg2new/xine_mpeg2new_decoder.c => src/video_dec/libmpeg2new/xine_mpeg2new_decoder.c --- src/libmpeg2new/Makefile.am | 12 - src/libmpeg2new/include/Makefile.am | 3 - src/libmpeg2new/include/alpha_asm.h | 181 -- src/libmpeg2new/include/attributes.h | 37 - src/libmpeg2new/include/mmx.h | 263 --- src/libmpeg2new/include/mpeg2.h | 202 -- src/libmpeg2new/include/mpeg2convert.h | 48 - src/libmpeg2new/include/sse.h | 256 --- src/libmpeg2new/include/tendra.h | 35 - src/libmpeg2new/include/video_out.h | 58 - src/libmpeg2new/include/vis.h | 328 ---- src/libmpeg2new/libmpeg2/Makefile.am | 14 - src/libmpeg2new/libmpeg2/alloc.c | 70 - src/libmpeg2new/libmpeg2/configure.incl | 11 - src/libmpeg2new/libmpeg2/convert_internal.h | 42 - src/libmpeg2new/libmpeg2/cpu_accel.c | 258 --- src/libmpeg2new/libmpeg2/cpu_state.c | 129 -- src/libmpeg2new/libmpeg2/decode.c | 439 ----- src/libmpeg2new/libmpeg2/header.c | 961 --------- src/libmpeg2new/libmpeg2/idct.c | 287 --- src/libmpeg2new/libmpeg2/idct_alpha.c | 379 ---- src/libmpeg2new/libmpeg2/idct_altivec.c | 288 --- src/libmpeg2new/libmpeg2/idct_mlib.c | 60 - src/libmpeg2new/libmpeg2/idct_mmx.c | 814 -------- src/libmpeg2new/libmpeg2/libmpeg2.pc.in | 10 - src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in | 10 - src/libmpeg2new/libmpeg2/motion_comp.c | 130 -- src/libmpeg2new/libmpeg2/motion_comp_alpha.c | 253 --- src/libmpeg2new/libmpeg2/motion_comp_altivec.c | 1010 ---------- src/libmpeg2new/libmpeg2/motion_comp_mlib.c | 190 -- src/libmpeg2new/libmpeg2/motion_comp_mmx.c | 1005 ---------- src/libmpeg2new/libmpeg2/motion_comp_vis.c | 2061 -------------------- src/libmpeg2new/libmpeg2/mpeg2_internal.h | 302 --- src/libmpeg2new/libmpeg2/rgb.c | 598 ------ src/libmpeg2new/libmpeg2/rgb_mmx.c | 321 --- src/libmpeg2new/libmpeg2/rgb_vis.c | 384 ---- src/libmpeg2new/libmpeg2/slice.c | 2058 ------------------- src/libmpeg2new/libmpeg2/uyvy.c | 123 -- src/libmpeg2new/libmpeg2/vlc.h | 429 ---- src/libmpeg2new/xine_mpeg2new_decoder.c | 504 ----- src/video_dec/libmpeg2new/Makefile.am | 12 + src/video_dec/libmpeg2new/include/Makefile.am | 3 + src/video_dec/libmpeg2new/include/alpha_asm.h | 181 ++ src/video_dec/libmpeg2new/include/attributes.h | 37 + src/video_dec/libmpeg2new/include/mmx.h | 263 +++ src/video_dec/libmpeg2new/include/mpeg2.h | 202 ++ src/video_dec/libmpeg2new/include/mpeg2convert.h | 48 + src/video_dec/libmpeg2new/include/sse.h | 256 +++ src/video_dec/libmpeg2new/include/tendra.h | 35 + src/video_dec/libmpeg2new/include/video_out.h | 58 + src/video_dec/libmpeg2new/include/vis.h | 328 ++++ src/video_dec/libmpeg2new/libmpeg2/Makefile.am | 14 + src/video_dec/libmpeg2new/libmpeg2/alloc.c | 70 + src/video_dec/libmpeg2new/libmpeg2/configure.incl | 11 + .../libmpeg2new/libmpeg2/convert_internal.h | 42 + src/video_dec/libmpeg2new/libmpeg2/cpu_accel.c | 258 +++ src/video_dec/libmpeg2new/libmpeg2/cpu_state.c | 129 ++ src/video_dec/libmpeg2new/libmpeg2/decode.c | 439 +++++ src/video_dec/libmpeg2new/libmpeg2/header.c | 961 +++++++++ src/video_dec/libmpeg2new/libmpeg2/idct.c | 287 +++ src/video_dec/libmpeg2new/libmpeg2/idct_alpha.c | 379 ++++ src/video_dec/libmpeg2new/libmpeg2/idct_altivec.c | 288 +++ src/video_dec/libmpeg2new/libmpeg2/idct_mlib.c | 60 + src/video_dec/libmpeg2new/libmpeg2/idct_mmx.c | 814 ++++++++ src/video_dec/libmpeg2new/libmpeg2/libmpeg2.pc.in | 10 + .../libmpeg2new/libmpeg2/libmpeg2convert.pc.in | 10 + src/video_dec/libmpeg2new/libmpeg2/motion_comp.c | 130 ++ .../libmpeg2new/libmpeg2/motion_comp_alpha.c | 253 +++ .../libmpeg2new/libmpeg2/motion_comp_altivec.c | 1010 ++++++++++ .../libmpeg2new/libmpeg2/motion_comp_mlib.c | 190 ++ .../libmpeg2new/libmpeg2/motion_comp_mmx.c | 1005 ++++++++++ .../libmpeg2new/libmpeg2/motion_comp_vis.c | 2061 ++++++++++++++++++++ .../libmpeg2new/libmpeg2/mpeg2_internal.h | 302 +++ src/video_dec/libmpeg2new/libmpeg2/rgb.c | 598 ++++++ src/video_dec/libmpeg2new/libmpeg2/rgb_mmx.c | 321 +++ src/video_dec/libmpeg2new/libmpeg2/rgb_vis.c | 384 ++++ src/video_dec/libmpeg2new/libmpeg2/slice.c | 2058 +++++++++++++++++++ src/video_dec/libmpeg2new/libmpeg2/uyvy.c | 123 ++ src/video_dec/libmpeg2new/libmpeg2/vlc.h | 429 ++++ src/video_dec/libmpeg2new/xine_mpeg2new_decoder.c | 504 +++++ 80 files changed, 14563 insertions(+), 14563 deletions(-) delete mode 100644 src/libmpeg2new/Makefile.am delete mode 100644 src/libmpeg2new/include/Makefile.am delete mode 100644 src/libmpeg2new/include/alpha_asm.h delete mode 100644 src/libmpeg2new/include/attributes.h delete mode 100644 src/libmpeg2new/include/mmx.h delete mode 100644 src/libmpeg2new/include/mpeg2.h delete mode 100644 src/libmpeg2new/include/mpeg2convert.h delete mode 100644 src/libmpeg2new/include/sse.h delete mode 100644 src/libmpeg2new/include/tendra.h delete mode 100644 src/libmpeg2new/include/video_out.h delete mode 100644 src/libmpeg2new/include/vis.h delete mode 100644 src/libmpeg2new/libmpeg2/Makefile.am delete mode 100644 src/libmpeg2new/libmpeg2/alloc.c delete mode 100644 src/libmpeg2new/libmpeg2/configure.incl delete mode 100644 src/libmpeg2new/libmpeg2/convert_internal.h delete mode 100644 src/libmpeg2new/libmpeg2/cpu_accel.c delete mode 100644 src/libmpeg2new/libmpeg2/cpu_state.c delete mode 100644 src/libmpeg2new/libmpeg2/decode.c delete mode 100644 src/libmpeg2new/libmpeg2/header.c delete mode 100644 src/libmpeg2new/libmpeg2/idct.c delete mode 100644 src/libmpeg2new/libmpeg2/idct_alpha.c delete mode 100644 src/libmpeg2new/libmpeg2/idct_altivec.c delete mode 100644 src/libmpeg2new/libmpeg2/idct_mlib.c delete mode 100644 src/libmpeg2new/libmpeg2/idct_mmx.c delete mode 100644 src/libmpeg2new/libmpeg2/libmpeg2.pc.in delete mode 100644 src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in delete mode 100644 src/libmpeg2new/libmpeg2/motion_comp.c delete mode 100644 src/libmpeg2new/libmpeg2/motion_comp_alpha.c delete mode 100644 src/libmpeg2new/libmpeg2/motion_comp_altivec.c delete mode 100644 src/libmpeg2new/libmpeg2/motion_comp_mlib.c delete mode 100644 src/libmpeg2new/libmpeg2/motion_comp_mmx.c delete mode 100644 src/libmpeg2new/libmpeg2/motion_comp_vis.c delete mode 100644 src/libmpeg2new/libmpeg2/mpeg2_internal.h delete mode 100644 src/libmpeg2new/libmpeg2/rgb.c delete mode 100644 src/libmpeg2new/libmpeg2/rgb_mmx.c delete mode 100644 src/libmpeg2new/libmpeg2/rgb_vis.c delete mode 100644 src/libmpeg2new/libmpeg2/slice.c delete mode 100644 src/libmpeg2new/libmpeg2/uyvy.c delete mode 100644 src/libmpeg2new/libmpeg2/vlc.h delete mode 100644 src/libmpeg2new/xine_mpeg2new_decoder.c create mode 100644 src/video_dec/libmpeg2new/Makefile.am create mode 100644 src/video_dec/libmpeg2new/include/Makefile.am create mode 100644 src/video_dec/libmpeg2new/include/alpha_asm.h create mode 100644 src/video_dec/libmpeg2new/include/attributes.h create mode 100644 src/video_dec/libmpeg2new/include/mmx.h create mode 100644 src/video_dec/libmpeg2new/include/mpeg2.h create mode 100644 src/video_dec/libmpeg2new/include/mpeg2convert.h create mode 100644 src/video_dec/libmpeg2new/include/sse.h create mode 100644 src/video_dec/libmpeg2new/include/tendra.h create mode 100644 src/video_dec/libmpeg2new/include/video_out.h create mode 100644 src/video_dec/libmpeg2new/include/vis.h create mode 100644 src/video_dec/libmpeg2new/libmpeg2/Makefile.am create mode 100644 src/video_dec/libmpeg2new/libmpeg2/alloc.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/configure.incl create mode 100644 src/video_dec/libmpeg2new/libmpeg2/convert_internal.h create mode 100644 src/video_dec/libmpeg2new/libmpeg2/cpu_accel.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/cpu_state.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/decode.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/header.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/idct.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/idct_alpha.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/idct_altivec.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/idct_mlib.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/idct_mmx.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/libmpeg2.pc.in create mode 100644 src/video_dec/libmpeg2new/libmpeg2/libmpeg2convert.pc.in create mode 100644 src/video_dec/libmpeg2new/libmpeg2/motion_comp.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/motion_comp_alpha.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/motion_comp_altivec.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/motion_comp_mlib.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/motion_comp_mmx.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/motion_comp_vis.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/mpeg2_internal.h create mode 100644 src/video_dec/libmpeg2new/libmpeg2/rgb.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/rgb_mmx.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/rgb_vis.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/slice.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/uyvy.c create mode 100644 src/video_dec/libmpeg2new/libmpeg2/vlc.h create mode 100644 src/video_dec/libmpeg2new/xine_mpeg2new_decoder.c diff --git a/src/libmpeg2new/Makefile.am b/src/libmpeg2new/Makefile.am deleted file mode 100644 index 8c248fdcb..000000000 --- a/src/libmpeg2new/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ -include $(top_srcdir)/misc/Makefile.common - -AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG) -AM_LDFLAGS = $(xineplug_ldflags) - -SUBDIRS = libmpeg2 - -xineplug_LTLIBRARIES = xineplug_decode_mpeg2.la - -xineplug_decode_mpeg2_la_SOURCES = xine_mpeg2_decoder.c -xineplug_decode_mpeg2_la_LIBADD = $(XINE_LIB) ./libmpeg2/libmpeg2.la -xineplug_decode_mpeg2_la_CFLAGS = $(AM_CFLAGS) $(MLIB_CFLAGS) diff --git a/src/libmpeg2new/include/Makefile.am b/src/libmpeg2new/include/Makefile.am deleted file mode 100644 index 302d01cb1..000000000 --- a/src/libmpeg2new/include/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -pkginclude_HEADERS = mpeg2.h mpeg2convert.h - -EXTRA_DIST = video_out.h mmx.h alpha_asm.h vis.h attributes.h tendra.h diff --git a/src/libmpeg2new/include/alpha_asm.h b/src/libmpeg2new/include/alpha_asm.h deleted file mode 100644 index bf1081f24..000000000 --- a/src/libmpeg2new/include/alpha_asm.h +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Alpha assembly macros - * Copyright (c) 2002-2003 Falk Hueffner - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef ALPHA_ASM_H -#define ALPHA_ASM_H - -#include - -#if defined __GNUC__ -# define GNUC_PREREQ(maj, min) \ - ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) -#else -# define GNUC_PREREQ(maj, min) 0 -#endif - -#define AMASK_BWX (1 << 0) -#define AMASK_FIX (1 << 1) -#define AMASK_CIX (1 << 2) -#define AMASK_MVI (1 << 8) - -#ifdef __alpha_bwx__ -# define HAVE_BWX() 1 -#else -# define HAVE_BWX() (amask(AMASK_BWX) == 0) -#endif -#ifdef __alpha_fix__ -# define HAVE_FIX() 1 -#else -# define HAVE_FIX() (amask(AMASK_FIX) == 0) -#endif -#ifdef __alpha_max__ -# define HAVE_MVI() 1 -#else -# define HAVE_MVI() (amask(AMASK_MVI) == 0) -#endif -#ifdef __alpha_cix__ -# define HAVE_CIX() 1 -#else -# define HAVE_CIX() (amask(AMASK_CIX) == 0) -#endif - -inline static uint64_t BYTE_VEC(uint64_t x) -{ - x |= x << 8; - x |= x << 16; - x |= x << 32; - return x; -} -inline static uint64_t WORD_VEC(uint64_t x) -{ - x |= x << 16; - x |= x << 32; - return x; -} - -#define ldq(p) (*(const uint64_t *) (p)) -#define ldl(p) (*(const int32_t *) (p)) -#define stl(l, p) do { *(uint32_t *) (p) = (l); } while (0) -#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0) -#define sextw(x) ((int16_t) (x)) - -#ifdef __GNUC__ -struct unaligned_long { uint64_t l; } __attribute__((packed)); -#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul)) -#define uldq(a) (((const struct unaligned_long *) (a))->l) - -#if GNUC_PREREQ(3,3) -#define prefetch(p) __builtin_prefetch((p), 0, 1) -#define prefetch_en(p) __builtin_prefetch((p), 0, 0) -#define prefetch_m(p) __builtin_prefetch((p), 1, 1) -#define prefetch_men(p) __builtin_prefetch((p), 1, 0) -#define cmpbge __builtin_alpha_cmpbge -/* Avoid warnings. */ -#define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b)) -#define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b)) -#define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b)) -#define zap __builtin_alpha_zap -#define zapnot __builtin_alpha_zapnot -#define amask __builtin_alpha_amask -#define implver __builtin_alpha_implver -#define rpcc __builtin_alpha_rpcc -#else -#define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory") -#define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory") -#define prefetch_m(p) asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory") -#define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory") -#define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define extql(a, b) ({ uint64_t __r; asm ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define extwl(a, b) ({ uint64_t __r; asm ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; }) -#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; }) -#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; }) -#endif -#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory") - -#if GNUC_PREREQ(3,3) && defined(__alpha_max__) -#define minub8 __builtin_alpha_minub8 -#define minsb8 __builtin_alpha_minsb8 -#define minuw4 __builtin_alpha_minuw4 -#define minsw4 __builtin_alpha_minsw4 -#define maxub8 __builtin_alpha_maxub8 -#define maxsb8 __builtin_alpha_maxsb8 -#define maxuw4 __builtin_alpha_maxuw4 -#define maxsw4 __builtin_alpha_maxsw4 -#define perr __builtin_alpha_perr -#define pklb __builtin_alpha_pklb -#define pkwb __builtin_alpha_pkwb -#define unpkbl __builtin_alpha_unpkbl -#define unpkbw __builtin_alpha_unpkbw -#else -#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define minuw4(a, b) ({ uint64_t __r; asm (".arch ev6; minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define minsw4(a, b) ({ uint64_t __r; asm (".arch ev6; minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define maxub8(a, b) ({ uint64_t __r; asm (".arch ev6; maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define maxsb8(a, b) ({ uint64_t __r; asm (".arch ev6; maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define perr(a, b) ({ uint64_t __r; asm (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; }) -#define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) -#define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) -#define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) -#define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) -#endif - -#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */ - -#include -#define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a) -#define uldq(a) (*(const __unaligned uint64_t *) (a)) -#define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b) -#define extql(a, b) asm ("extql %a0,%a1,%v0", a, b) -#define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b) -#define extqh(a, b) asm ("extqh %a0,%a1,%v0", a, b) -#define zap(a, b) asm ("zap %a0,%a1,%v0", a, b) -#define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b) -#define amask(a) asm ("amask %a0,%v0", a) -#define implver() asm ("implver %v0") -#define rpcc() asm ("rpcc %v0") -#define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b) -#define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b) -#define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b) -#define minsw4(a, b) asm ("minsw4 %a0,%a1,%v0", a, b) -#define maxub8(a, b) asm ("maxub8 %a0,%a1,%v0", a, b) -#define maxsb8(a, b) asm ("maxsb8 %a0,%a1,%v0", a, b) -#define maxuw4(a, b) asm ("maxuw4 %a0,%a1,%v0", a, b) -#define maxsw4(a, b) asm ("maxsw4 %a0,%a1,%v0", a, b) -#define perr(a, b) asm ("perr %a0,%a1,%v0", a, b) -#define pklb(a) asm ("pklb %a0,%v0", a) -#define pkwb(a) asm ("pkwb %a0,%v0", a) -#define unpkbl(a) asm ("unpkbl %a0,%v0", a) -#define unpkbw(a) asm ("unpkbw %a0,%v0", a) -#define wh64(a) asm ("wh64 %a0", a) - -#else -#error "Unknown compiler!" -#endif - -#endif /* ALPHA_ASM_H */ diff --git a/src/libmpeg2new/include/attributes.h b/src/libmpeg2new/include/attributes.h deleted file mode 100644 index eefbc0dd1..000000000 --- a/src/libmpeg2new/include/attributes.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * attributes.h - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* use gcc attribs to align critical data structures */ -#ifdef ATTRIBUTE_ALIGNED_MAX -#define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((ATTRIBUTE_ALIGNED_MAX < align) ? ATTRIBUTE_ALIGNED_MAX : align))) -#else -#define ATTR_ALIGN(align) -#endif - -#ifdef HAVE_BUILTIN_EXPECT -#define likely(x) __builtin_expect ((x) != 0, 1) -#define unlikely(x) __builtin_expect ((x) != 0, 0) -#else -#define likely(x) (x) -#define unlikely(x) (x) -#endif diff --git a/src/libmpeg2new/include/mmx.h b/src/libmpeg2new/include/mmx.h deleted file mode 100644 index 08b4d4776..000000000 --- a/src/libmpeg2new/include/mmx.h +++ /dev/null @@ -1,263 +0,0 @@ -/* - * mmx.h - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* - * The type of an value that fits in an MMX register (note that long - * long constant values MUST be suffixed by LL and unsigned long long - * values by ULL, lest they be truncated by the compiler) - */ - -typedef union { - long long q; /* Quadword (64-bit) value */ - unsigned long long uq; /* Unsigned Quadword */ - int d[2]; /* 2 Doubleword (32-bit) values */ - unsigned int ud[2]; /* 2 Unsigned Doubleword */ - short w[4]; /* 4 Word (16-bit) values */ - unsigned short uw[4]; /* 4 Unsigned Word */ - char b[8]; /* 8 Byte (8-bit) values */ - unsigned char ub[8]; /* 8 Unsigned Byte */ - float s[2]; /* Single-precision (32-bit) value */ -} ATTR_ALIGN(8) mmx_t; /* On an 8-byte (64-bit) boundary */ - - -#define mmx_i2r(op,imm,reg) \ - __asm__ __volatile__ (#op " %0, %%" #reg \ - : /* nothing */ \ - : "i" (imm) ) - -#define mmx_m2r(op,mem,reg) \ - __asm__ __volatile__ (#op " %0, %%" #reg \ - : /* nothing */ \ - : "m" (mem)) - -#define mmx_r2m(op,reg,mem) \ - __asm__ __volatile__ (#op " %%" #reg ", %0" \ - : "=m" (mem) \ - : /* nothing */ ) - -#define mmx_r2r(op,regs,regd) \ - __asm__ __volatile__ (#op " %" #regs ", %" #regd) - - -#define emms() __asm__ __volatile__ ("emms") - -#define movd_m2r(var,reg) mmx_m2r (movd, var, reg) -#define movd_r2m(reg,var) mmx_r2m (movd, reg, var) -#define movd_v2r(var,reg) __asm__ __volatile__ ("movd %0, %%" #reg \ - : /* nothing */ \ - : "rm" (var)) -#define movd_r2v(reg,var) __asm__ __volatile__ ("movd %%" #reg ", %0" \ - : "=rm" (var) \ - : /* nothing */ ) - -#define movq_m2r(var,reg) mmx_m2r (movq, var, reg) -#define movq_r2m(reg,var) mmx_r2m (movq, reg, var) -#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd) - -#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg) -#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd) -#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg) -#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd) - -#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg) -#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd) - -#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg) -#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd) -#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg) -#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd) -#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg) -#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd) - -#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg) -#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd) -#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg) -#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd) - -#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg) -#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd) -#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg) -#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd) - -#define pand_m2r(var,reg) mmx_m2r (pand, var, reg) -#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd) - -#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg) -#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd) - -#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg) -#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd) -#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg) -#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd) -#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg) -#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd) - -#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg) -#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd) -#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg) -#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd) -#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg) -#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd) - -#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg) -#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd) - -#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg) -#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd) - -#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg) -#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd) - -#define por_m2r(var,reg) mmx_m2r (por, var, reg) -#define por_r2r(regs,regd) mmx_r2r (por, regs, regd) - -#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg) -#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg) -#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd) -#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg) -#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg) -#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd) -#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg) -#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg) -#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd) - -#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg) -#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg) -#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd) -#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg) -#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg) -#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd) - -#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg) -#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg) -#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd) -#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg) -#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg) -#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd) -#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg) -#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg) -#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd) - -#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg) -#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd) -#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg) -#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd) -#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg) -#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd) - -#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg) -#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd) -#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg) -#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd) - -#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg) -#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd) -#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg) -#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd) - -#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg) -#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd) -#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg) -#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd) -#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg) -#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd) - -#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg) -#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd) -#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg) -#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd) -#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg) -#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd) - -#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg) -#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd) - - -/* 3DNOW extensions */ - -#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg) -#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd) - - -/* AMD MMX extensions - also available in intel SSE */ - - -#define mmx_m2ri(op,mem,reg,imm) \ - __asm__ __volatile__ (#op " %1, %0, %%" #reg \ - : /* nothing */ \ - : "m" (mem), "i" (imm)) - -#define mmx_r2ri(op,regs,regd,imm) \ - __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ - : /* nothing */ \ - : "i" (imm) ) - -#define mmx_fetch(mem,hint) \ - __asm__ __volatile__ ("prefetch" #hint " %0" \ - : /* nothing */ \ - : "m" (mem)) - - -#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg) - -#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var) - -#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg) -#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd) -#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg) -#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd) - -#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm) - -#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm) - -#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg) -#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd) - -#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg) -#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd) - -#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg) -#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd) - -#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg) -#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd) - -#define pmovmskb(mmreg,reg) \ - __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) - -#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg) -#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd) - -#define prefetcht0(mem) mmx_fetch (mem, t0) -#define prefetcht1(mem) mmx_fetch (mem, t1) -#define prefetcht2(mem) mmx_fetch (mem, t2) -#define prefetchnta(mem) mmx_fetch (mem, nta) - -#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg) -#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd) - -#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm) -#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm) - -#define sfence() __asm__ __volatile__ ("sfence\n\t") diff --git a/src/libmpeg2new/include/mpeg2.h b/src/libmpeg2new/include/mpeg2.h deleted file mode 100644 index 6c1a3805b..000000000 --- a/src/libmpeg2new/include/mpeg2.h +++ /dev/null @@ -1,202 +0,0 @@ -/* - * mpeg2.h - * Copyright (C) 2000-2004 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef MPEG2_H -#define MPEG2_H - -#define MPEG2_VERSION(a,b,c) (((a)<<16)|((b)<<8)|(c)) -#define MPEG2_RELEASE MPEG2_VERSION (0, 4, 1) /* 0.4.1 */ - -#define SEQ_FLAG_MPEG2 1 -#define SEQ_FLAG_CONSTRAINED_PARAMETERS 2 -#define SEQ_FLAG_PROGRESSIVE_SEQUENCE 4 -#define SEQ_FLAG_LOW_DELAY 8 -#define SEQ_FLAG_COLOUR_DESCRIPTION 16 - -#define SEQ_MASK_VIDEO_FORMAT 0xe0 -#define SEQ_VIDEO_FORMAT_COMPONENT 0 -#define SEQ_VIDEO_FORMAT_PAL 0x20 -#define SEQ_VIDEO_FORMAT_NTSC 0x40 -#define SEQ_VIDEO_FORMAT_SECAM 0x60 -#define SEQ_VIDEO_FORMAT_MAC 0x80 -#define SEQ_VIDEO_FORMAT_UNSPECIFIED 0xa0 - -typedef struct mpeg2_sequence_s { - unsigned int width, height; - unsigned int chroma_width, chroma_height; - unsigned int byte_rate; - unsigned int vbv_buffer_size; - uint32_t flags; - - unsigned int picture_width, picture_height; - unsigned int display_width, display_height; - unsigned int pixel_width, pixel_height; - unsigned int frame_period; - - uint8_t profile_level_id; - uint8_t colour_primaries; - uint8_t transfer_characteristics; - uint8_t matrix_coefficients; -} mpeg2_sequence_t; - -#define GOP_FLAG_DROP_FRAME 1 -#define GOP_FLAG_BROKEN_LINK 2 -#define GOP_FLAG_CLOSED_GOP 4 - -typedef struct mpeg2_gop_s { - uint8_t hours; - uint8_t minutes; - uint8_t seconds; - uint8_t pictures; - uint32_t flags; -} mpeg2_gop_t; - -#define PIC_MASK_CODING_TYPE 7 -#define PIC_FLAG_CODING_TYPE_I 1 -#define PIC_FLAG_CODING_TYPE_P 2 -#define PIC_FLAG_CODING_TYPE_B 3 -#define PIC_FLAG_CODING_TYPE_D 4 - -#define PIC_FLAG_TOP_FIELD_FIRST 8 -#define PIC_FLAG_PROGRESSIVE_FRAME 16 -#define PIC_FLAG_COMPOSITE_DISPLAY 32 -#define PIC_FLAG_SKIP 64 -#define PIC_FLAG_TAGS 128 -#define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000 - -typedef struct mpeg2_picture_s { - unsigned int temporal_reference; - unsigned int nb_fields; - uint32_t tag, tag2; - uint32_t flags; - struct { - int x, y; - } display_offset[3]; -} mpeg2_picture_t; - -typedef struct mpeg2_fbuf_s { - uint8_t * buf[3]; - void * id; -} mpeg2_fbuf_t; - -typedef struct mpeg2_info_s { - const mpeg2_sequence_t * sequence; - const mpeg2_gop_t * gop; - const mpeg2_picture_t * current_picture; - const mpeg2_picture_t * current_picture_2nd; - const mpeg2_fbuf_t * current_fbuf; - const mpeg2_picture_t * display_picture; - const mpeg2_picture_t * display_picture_2nd; - const mpeg2_fbuf_t * display_fbuf; - const mpeg2_fbuf_t * discard_fbuf; - const uint8_t * user_data; - unsigned int user_data_len; -} mpeg2_info_t; - -typedef struct mpeg2dec_s mpeg2dec_t; -typedef struct mpeg2_decoder_s mpeg2_decoder_t; - -typedef enum { - STATE_BUFFER = 0, - STATE_SEQUENCE = 1, - STATE_SEQUENCE_REPEATED = 2, - STATE_SEQUENCE_MODIFIED = 3, - STATE_GOP = 4, - STATE_PICTURE = 5, - STATE_SLICE_1ST = 6, - STATE_PICTURE_2ND = 7, - STATE_SLICE = 8, - STATE_END = 9, - STATE_INVALID = 10, - STATE_INVALID_END = 11 -} mpeg2_state_t; - -typedef struct mpeg2_convert_init_s { - unsigned int id_size; - unsigned int buf_size[3]; - void (* start) (void * id, const mpeg2_fbuf_t * fbuf, - const mpeg2_picture_t * picture, const mpeg2_gop_t * gop); - void (* copy) (void * id, uint8_t * const * src, unsigned int v_offset); -} mpeg2_convert_init_t; -typedef enum { - MPEG2_CONVERT_SET = 0, - MPEG2_CONVERT_STRIDE = 1, - MPEG2_CONVERT_START = 2 -} mpeg2_convert_stage_t; -typedef int mpeg2_convert_t (int stage, void * id, - const mpeg2_sequence_t * sequence, int stride, - uint32_t accel, void * arg, - mpeg2_convert_init_t * result); -int mpeg2_convert (mpeg2dec_t * mpeg2dec, mpeg2_convert_t convert, void * arg); -int mpeg2_stride (mpeg2dec_t * mpeg2dec, int stride); -void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id); -void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf); - -#define MPEG2_ACCEL_X86_MMX 1 -#define MPEG2_ACCEL_X86_3DNOW 2 -#define MPEG2_ACCEL_X86_MMXEXT 4 -#define MPEG2_ACCEL_X86_SSE2 8 -#define MPEG2_ACCEL_X86_SSE3 16 -#define MPEG2_ACCEL_PPC_ALTIVEC 1 -#define MPEG2_ACCEL_ALPHA 1 -#define MPEG2_ACCEL_ALPHA_MVI 2 -#define MPEG2_ACCEL_SPARC_VIS 1 -#define MPEG2_ACCEL_SPARC_VIS2 2 -#define MPEG2_ACCEL_DETECT 0x80000000 - -uint32_t mpeg2_accel (uint32_t accel); -mpeg2dec_t * mpeg2_init (void); -const mpeg2_info_t * mpeg2_info (mpeg2dec_t * mpeg2dec); -void mpeg2_close (mpeg2dec_t * mpeg2dec); - -void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end); -int mpeg2_getpos (mpeg2dec_t * mpeg2dec); -mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec); - -void mpeg2_reset (mpeg2dec_t * mpeg2dec, int full_reset); -void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip); -void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end); - -void mpeg2_tag_picture (mpeg2dec_t * mpeg2dec, uint32_t tag, uint32_t tag2); - -void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], - uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]); -void mpeg2_slice (mpeg2_decoder_t * decoder, int code, const uint8_t * buffer); -int mpeg2_guess_aspect (const mpeg2_sequence_t * sequence, - unsigned int * pixel_width, - unsigned int * pixel_height); - -typedef enum { - MPEG2_ALLOC_MPEG2DEC = 0, - MPEG2_ALLOC_CHUNK = 1, - MPEG2_ALLOC_YUV = 2, - MPEG2_ALLOC_CONVERT_ID = 3, - MPEG2_ALLOC_CONVERTED = 4 -} mpeg2_alloc_t; - -void * mpeg2_malloc (unsigned size, mpeg2_alloc_t reason); -void mpeg2_free (void * buf); -void mpeg2_malloc_hooks (void * malloc (unsigned, mpeg2_alloc_t), - int free (void *)); - -#endif /* MPEG2_H */ diff --git a/src/libmpeg2new/include/mpeg2convert.h b/src/libmpeg2new/include/mpeg2convert.h deleted file mode 100644 index aac5d1991..000000000 --- a/src/libmpeg2new/include/mpeg2convert.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * mpeg2convert.h - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef MPEG2CONVERT_H -#define MPEG2CONVERT_H - -mpeg2_convert_t mpeg2convert_rgb32; -mpeg2_convert_t mpeg2convert_rgb24; -mpeg2_convert_t mpeg2convert_rgb16; -mpeg2_convert_t mpeg2convert_rgb15; -mpeg2_convert_t mpeg2convert_rgb8; -mpeg2_convert_t mpeg2convert_bgr32; -mpeg2_convert_t mpeg2convert_bgr24; -mpeg2_convert_t mpeg2convert_bgr16; -mpeg2_convert_t mpeg2convert_bgr15; -mpeg2_convert_t mpeg2convert_bgr8; - -typedef enum { - MPEG2CONVERT_RGB = 0, - MPEG2CONVERT_BGR = 1 -} mpeg2convert_rgb_order_t; - -mpeg2_convert_t * mpeg2convert_rgb (mpeg2convert_rgb_order_t order, - unsigned int bpp); - -mpeg2_convert_t mpeg2convert_uyvy; - -#endif /* MPEG2CONVERT_H */ diff --git a/src/libmpeg2new/include/sse.h b/src/libmpeg2new/include/sse.h deleted file mode 100644 index 4bd853f8b..000000000 --- a/src/libmpeg2new/include/sse.h +++ /dev/null @@ -1,256 +0,0 @@ -/* - * sse.h - * Copyright (C) 1999-2003 R. Fisher - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -typedef union { - float sf[4]; /* Single-precision (32-bit) value */ -} ATTR_ALIGN(16) sse_t; /* On a 16 byte (128-bit) boundary */ - - -#define sse_i2r(op, imm, reg) \ - __asm__ __volatile__ (#op " %0, %%" #reg \ - : /* nothing */ \ - : "X" (imm) ) - -#define sse_m2r(op, mem, reg) \ - __asm__ __volatile__ (#op " %0, %%" #reg \ - : /* nothing */ \ - : "X" (mem)) - -#define sse_r2m(op, reg, mem) \ - __asm__ __volatile__ (#op " %%" #reg ", %0" \ - : "=X" (mem) \ - : /* nothing */ ) - -#define sse_r2r(op, regs, regd) \ - __asm__ __volatile__ (#op " %" #regs ", %" #regd) - -#define sse_r2ri(op, regs, regd, imm) \ - __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ - : /* nothing */ \ - : "X" (imm) ) - -#define sse_m2ri(op, mem, reg, subop) \ - __asm__ __volatile__ (#op " %0, %%" #reg ", " #subop \ - : /* nothing */ \ - : "X" (mem)) - - -#define movaps_m2r(var, reg) sse_m2r(movaps, var, reg) -#define movaps_r2m(reg, var) sse_r2m(movaps, reg, var) -#define movaps_r2r(regs, regd) sse_r2r(movaps, regs, regd) - -#define movntps_r2m(xmmreg, var) sse_r2m(movntps, xmmreg, var) - -#define movups_m2r(var, reg) sse_m2r(movups, var, reg) -#define movups_r2m(reg, var) sse_r2m(movups, reg, var) -#define movups_r2r(regs, regd) sse_r2r(movups, regs, regd) - -#define movhlps_r2r(regs, regd) sse_r2r(movhlps, regs, regd) - -#define movlhps_r2r(regs, regd) sse_r2r(movlhps, regs, regd) - -#define movhps_m2r(var, reg) sse_m2r(movhps, var, reg) -#define movhps_r2m(reg, var) sse_r2m(movhps, reg, var) - -#define movlps_m2r(var, reg) sse_m2r(movlps, var, reg) -#define movlps_r2m(reg, var) sse_r2m(movlps, reg, var) - -#define movss_m2r(var, reg) sse_m2r(movss, var, reg) -#define movss_r2m(reg, var) sse_r2m(movss, reg, var) -#define movss_r2r(regs, regd) sse_r2r(movss, regs, regd) - -#define shufps_m2r(var, reg, index) sse_m2ri(shufps, var, reg, index) -#define shufps_r2r(regs, regd, index) sse_r2ri(shufps, regs, regd, index) - -#define cvtpi2ps_m2r(var, xmmreg) sse_m2r(cvtpi2ps, var, xmmreg) -#define cvtpi2ps_r2r(mmreg, xmmreg) sse_r2r(cvtpi2ps, mmreg, xmmreg) - -#define cvtps2pi_m2r(var, mmreg) sse_m2r(cvtps2pi, var, mmreg) -#define cvtps2pi_r2r(xmmreg, mmreg) sse_r2r(cvtps2pi, mmreg, xmmreg) - -#define cvttps2pi_m2r(var, mmreg) sse_m2r(cvttps2pi, var, mmreg) -#define cvttps2pi_r2r(xmmreg, mmreg) sse_r2r(cvttps2pi, mmreg, xmmreg) - -#define cvtsi2ss_m2r(var, xmmreg) sse_m2r(cvtsi2ss, var, xmmreg) -#define cvtsi2ss_r2r(reg, xmmreg) sse_r2r(cvtsi2ss, reg, xmmreg) - -#define cvtss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg) -#define cvtss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg) - -#define cvttss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg) -#define cvttss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg) - -#define movmskps(xmmreg, reg) \ - __asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg) - -#define addps_m2r(var, reg) sse_m2r(addps, var, reg) -#define addps_r2r(regs, regd) sse_r2r(addps, regs, regd) - -#define addss_m2r(var, reg) sse_m2r(addss, var, reg) -#define addss_r2r(regs, regd) sse_r2r(addss, regs, regd) - -#define subps_m2r(var, reg) sse_m2r(subps, var, reg) -#define subps_r2r(regs, regd) sse_r2r(subps, regs, regd) - -#define subss_m2r(var, reg) sse_m2r(subss, var, reg) -#define subss_r2r(regs, regd) sse_r2r(subss, regs, regd) - -#define mulps_m2r(var, reg) sse_m2r(mulps, var, reg) -#define mulps_r2r(regs, regd) sse_r2r(mulps, regs, regd) - -#define mulss_m2r(var, reg) sse_m2r(mulss, var, reg) -#define mulss_r2r(regs, regd) sse_r2r(mulss, regs, regd) - -#define divps_m2r(var, reg) sse_m2r(divps, var, reg) -#define divps_r2r(regs, regd) sse_r2r(divps, regs, regd) - -#define divss_m2r(var, reg) sse_m2r(divss, var, reg) -#define divss_r2r(regs, regd) sse_r2r(divss, regs, regd) - -#define rcpps_m2r(var, reg) sse_m2r(rcpps, var, reg) -#define rcpps_r2r(regs, regd) sse_r2r(rcpps, regs, regd) - -#define rcpss_m2r(var, reg) sse_m2r(rcpss, var, reg) -#define rcpss_r2r(regs, regd) sse_r2r(rcpss, regs, regd) - -#define rsqrtps_m2r(var, reg) sse_m2r(rsqrtps, var, reg) -#define rsqrtps_r2r(regs, regd) sse_r2r(rsqrtps, regs, regd) - -#define rsqrtss_m2r(var, reg) sse_m2r(rsqrtss, var, reg) -#define rsqrtss_r2r(regs, regd) sse_r2r(rsqrtss, regs, regd) - -#define sqrtps_m2r(var, reg) sse_m2r(sqrtps, var, reg) -#define sqrtps_r2r(regs, regd) sse_r2r(sqrtps, regs, regd) - -#define sqrtss_m2r(var, reg) sse_m2r(sqrtss, var, reg) -#define sqrtss_r2r(regs, regd) sse_r2r(sqrtss, regs, regd) - -#define andps_m2r(var, reg) sse_m2r(andps, var, reg) -#define andps_r2r(regs, regd) sse_r2r(andps, regs, regd) - -#define andnps_m2r(var, reg) sse_m2r(andnps, var, reg) -#define andnps_r2r(regs, regd) sse_r2r(andnps, regs, regd) - -#define orps_m2r(var, reg) sse_m2r(orps, var, reg) -#define orps_r2r(regs, regd) sse_r2r(orps, regs, regd) - -#define xorps_m2r(var, reg) sse_m2r(xorps, var, reg) -#define xorps_r2r(regs, regd) sse_r2r(xorps, regs, regd) - -#define maxps_m2r(var, reg) sse_m2r(maxps, var, reg) -#define maxps_r2r(regs, regd) sse_r2r(maxps, regs, regd) - -#define maxss_m2r(var, reg) sse_m2r(maxss, var, reg) -#define maxss_r2r(regs, regd) sse_r2r(maxss, regs, regd) - -#define minps_m2r(var, reg) sse_m2r(minps, var, reg) -#define minps_r2r(regs, regd) sse_r2r(minps, regs, regd) - -#define minss_m2r(var, reg) sse_m2r(minss, var, reg) -#define minss_r2r(regs, regd) sse_r2r(minss, regs, regd) - -#define cmpps_m2r(var, reg, op) sse_m2ri(cmpps, var, reg, op) -#define cmpps_r2r(regs, regd, op) sse_r2ri(cmpps, regs, regd, op) - -#define cmpeqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 0) -#define cmpeqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 0) - -#define cmpltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 1) -#define cmpltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 1) - -#define cmpleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 2) -#define cmpleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 2) - -#define cmpunordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 3) -#define cmpunordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 3) - -#define cmpneqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 4) -#define cmpneqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 4) - -#define cmpnltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 5) -#define cmpnltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 5) - -#define cmpnleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 6) -#define cmpnleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 6) - -#define cmpordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 7) -#define cmpordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 7) - -#define cmpss_m2r(var, reg, op) sse_m2ri(cmpss, var, reg, op) -#define cmpss_r2r(regs, regd, op) sse_r2ri(cmpss, regs, regd, op) - -#define cmpeqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 0) -#define cmpeqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 0) - -#define cmpltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 1) -#define cmpltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 1) - -#define cmpless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 2) -#define cmpless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 2) - -#define cmpunordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 3) -#define cmpunordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 3) - -#define cmpneqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 4) -#define cmpneqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 4) - -#define cmpnltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 5) -#define cmpnltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 5) - -#define cmpnless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 6) -#define cmpnless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 6) - -#define cmpordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 7) -#define cmpordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 7) - -#define comiss_m2r(var, reg) sse_m2r(comiss, var, reg) -#define comiss_r2r(regs, regd) sse_r2r(comiss, regs, regd) - -#define ucomiss_m2r(var, reg) sse_m2r(ucomiss, var, reg) -#define ucomiss_r2r(regs, regd) sse_r2r(ucomiss, regs, regd) - -#define unpcklps_m2r(var, reg) sse_m2r(unpcklps, var, reg) -#define unpcklps_r2r(regs, regd) sse_r2r(unpcklps, regs, regd) - -#define unpckhps_m2r(var, reg) sse_m2r(unpckhps, var, reg) -#define unpckhps_r2r(regs, regd) sse_r2r(unpckhps, regs, regd) - -#define fxrstor(mem) \ - __asm__ __volatile__ ("fxrstor %0" \ - : /* nothing */ \ - : "X" (mem)) - -#define fxsave(mem) \ - __asm__ __volatile__ ("fxsave %0" \ - : /* nothing */ \ - : "X" (mem)) - -#define stmxcsr(mem) \ - __asm__ __volatile__ ("stmxcsr %0" \ - : /* nothing */ \ - : "X" (mem)) - -#define ldmxcsr(mem) \ - __asm__ __volatile__ ("ldmxcsr %0" \ - : /* nothing */ \ - : "X" (mem)) - diff --git a/src/libmpeg2new/include/tendra.h b/src/libmpeg2new/include/tendra.h deleted file mode 100644 index 09900916a..000000000 --- a/src/libmpeg2new/include/tendra.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * tendra.h - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#pragma TenDRA begin -#pragma TenDRA longlong type warning - -#ifdef TenDRA_check - -#pragma TenDRA conversion analysis (pointer-int explicit) off -#pragma TenDRA implicit function declaration off - -/* avoid the "No declarations in translation unit" problem */ -int TenDRA; - -#endif /* TenDRA_check */ diff --git a/src/libmpeg2new/include/video_out.h b/src/libmpeg2new/include/video_out.h deleted file mode 100644 index 342c55197..000000000 --- a/src/libmpeg2new/include/video_out.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * video_out.h - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -struct mpeg2_sequence_s; -struct mpeg2_convert_init_s; -typedef struct { - int (* convert) (int stage, void * id, - const struct mpeg2_sequence_s * sequence, - int stride, uint32_t accel, void * arg, - struct mpeg2_convert_init_s * result); -} vo_setup_result_t; - -typedef struct vo_instance_s vo_instance_t; -struct vo_instance_s { - int (* setup) (vo_instance_t * instance, unsigned int width, - unsigned int height, unsigned int chroma_width, - unsigned int chroma_height, vo_setup_result_t * result); - void (* setup_fbuf) (vo_instance_t * instance, uint8_t ** buf, void ** id); - void (* set_fbuf) (vo_instance_t * instance, uint8_t ** buf, void ** id); - void (* start_fbuf) (vo_instance_t * instance, - uint8_t * const * buf, void * id); - void (* draw) (vo_instance_t * instance, uint8_t * const * buf, void * id); - void (* discard) (vo_instance_t * instance, - uint8_t * const * buf, void * id); - void (* close) (vo_instance_t * instance); -}; - -typedef vo_instance_t * vo_open_t (void); - -typedef struct { - char * name; - vo_open_t * open; -} vo_driver_t; - -void vo_accel (uint32_t accel); - -/* return NULL terminated array of all drivers */ -vo_driver_t const * vo_drivers (void); diff --git a/src/libmpeg2new/include/vis.h b/src/libmpeg2new/include/vis.h deleted file mode 100644 index 69dd49075..000000000 --- a/src/libmpeg2new/include/vis.h +++ /dev/null @@ -1,328 +0,0 @@ -/* - * vis.h - * Copyright (C) 2003 David S. Miller - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* You may be asking why I hard-code the instruction opcodes and don't - * use the normal VIS assembler mnenomics for the VIS instructions. - * - * The reason is that Sun, in their infinite wisdom, decided that a binary - * using a VIS instruction will cause it to be marked (in the ELF headers) - * as doing so, and this prevents the OS from loading such binaries if the - * current cpu doesn't have VIS. There is no way to easily override this - * behavior of the assembler that I am aware of. - * - * This totally defeats what libmpeg2 is trying to do which is allow a - * single binary to be created, and then detect the availability of VIS - * at runtime. - * - * I'm not saying that tainting the binary by default is bad, rather I'm - * saying that not providing a way to override this easily unnecessarily - * ties people's hands. - * - * Thus, we do the opcode encoding by hand and output 32-bit words in - * the assembler to keep the binary from becoming tainted. - */ - -#define vis_opc_base ((0x1 << 31) | (0x36 << 19)) -#define vis_opf(X) ((X) << 5) -#define vis_sreg(X) (X) -#define vis_dreg(X) (((X)&0x1f)|((X)>>5)) -#define vis_rs1_s(X) (vis_sreg(X) << 14) -#define vis_rs1_d(X) (vis_dreg(X) << 14) -#define vis_rs2_s(X) (vis_sreg(X) << 0) -#define vis_rs2_d(X) (vis_dreg(X) << 0) -#define vis_rd_s(X) (vis_sreg(X) << 25) -#define vis_rd_d(X) (vis_dreg(X) << 25) - -#define vis_ss2s(opf,rs1,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs1_s(rs1) | \ - vis_rs2_s(rs2) | \ - vis_rd_s(rd))) - -#define vis_dd2d(opf,rs1,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs1_d(rs1) | \ - vis_rs2_d(rs2) | \ - vis_rd_d(rd))) - -#define vis_ss2d(opf,rs1,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs1_s(rs1) | \ - vis_rs2_s(rs2) | \ - vis_rd_d(rd))) - -#define vis_sd2d(opf,rs1,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs1_s(rs1) | \ - vis_rs2_d(rs2) | \ - vis_rd_d(rd))) - -#define vis_d2s(opf,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs2_d(rs2) | \ - vis_rd_s(rd))) - -#define vis_s2d(opf,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs2_s(rs2) | \ - vis_rd_d(rd))) - -#define vis_d12d(opf,rs1,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs1_d(rs1) | \ - vis_rd_d(rd))) - -#define vis_d22d(opf,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs2_d(rs2) | \ - vis_rd_d(rd))) - -#define vis_s12s(opf,rs1,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs1_s(rs1) | \ - vis_rd_s(rd))) - -#define vis_s22s(opf,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs2_s(rs2) | \ - vis_rd_s(rd))) - -#define vis_s(opf,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rd_s(rd))) - -#define vis_d(opf,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rd_d(rd))) - -#define vis_r2m(op,rd,mem) \ - __asm__ __volatile__ (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) ) - -#define vis_r2m_2(op,rd,mem1,mem2) \ - __asm__ __volatile__ (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) ) - -#define vis_m2r(op,mem,rd) \ - __asm__ __volatile__ (#op "\t[%0], %%f" #rd : : "r" (&(mem)) ) - -#define vis_m2r_2(op,mem1,mem2,rd) \ - __asm__ __volatile__ (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) ) - -static inline void vis_set_gsr(unsigned int _val) -{ - register unsigned int val asm("g1"); - - val = _val; - __asm__ __volatile__(".word 0xa7804000" - : : "r" (val)); -} - -#define VIS_GSR_ALIGNADDR_MASK 0x0000007 -#define VIS_GSR_ALIGNADDR_SHIFT 0 -#define VIS_GSR_SCALEFACT_MASK 0x0000078 -#define VIS_GSR_SCALEFACT_SHIFT 3 - -#define vis_ld32(mem,rs1) vis_m2r(ld, mem, rs1) -#define vis_ld32_2(mem1,mem2,rs1) vis_m2r_2(ld, mem1, mem2, rs1) -#define vis_st32(rs1,mem) vis_r2m(st, rs1, mem) -#define vis_st32_2(rs1,mem1,mem2) vis_r2m_2(st, rs1, mem1, mem2) -#define vis_ld64(mem,rs1) vis_m2r(ldd, mem, rs1) -#define vis_ld64_2(mem1,mem2,rs1) vis_m2r_2(ldd, mem1, mem2, rs1) -#define vis_st64(rs1,mem) vis_r2m(std, rs1, mem) -#define vis_st64_2(rs1,mem1,mem2) vis_r2m_2(std, rs1, mem1, mem2) - -#define vis_ldblk(mem, rd) \ -do { register void *__mem asm("g1"); \ - __mem = &(mem); \ - __asm__ __volatile__(".word 0xc1985e00 | %1" \ - : \ - : "r" (__mem), \ - "i" (vis_rd_d(rd)) \ - : "memory"); \ -} while (0) - -#define vis_stblk(rd, mem) \ -do { register void *__mem asm("g1"); \ - __mem = &(mem); \ - __asm__ __volatile__(".word 0xc1b85e00 | %1" \ - : \ - : "r" (__mem), \ - "i" (vis_rd_d(rd)) \ - : "memory"); \ -} while (0) - -#define vis_membar_storestore() \ - __asm__ __volatile__(".word 0x8143e008" : : : "memory") - -#define vis_membar_sync() \ - __asm__ __volatile__(".word 0x8143e040" : : : "memory") - -/* 16 and 32 bit partitioned addition and subtraction. The normal - * versions perform 4 16-bit or 2 32-bit additions or subtractions. - * The 's' versions perform 2 16-bit or 2 32-bit additions or - * subtractions. - */ - -#define vis_padd16(rs1,rs2,rd) vis_dd2d(0x50, rs1, rs2, rd) -#define vis_padd16s(rs1,rs2,rd) vis_ss2s(0x51, rs1, rs2, rd) -#define vis_padd32(rs1,rs2,rd) vis_dd2d(0x52, rs1, rs2, rd) -#define vis_padd32s(rs1,rs2,rd) vis_ss2s(0x53, rs1, rs2, rd) -#define vis_psub16(rs1,rs2,rd) vis_dd2d(0x54, rs1, rs2, rd) -#define vis_psub16s(rs1,rs2,rd) vis_ss2s(0x55, rs1, rs2, rd) -#define vis_psub32(rs1,rs2,rd) vis_dd2d(0x56, rs1, rs2, rd) -#define vis_psub32s(rs1,rs2,rd) vis_ss2s(0x57, rs1, rs2, rd) - -/* Pixel formatting instructions. */ - -#define vis_pack16(rs2,rd) vis_d2s( 0x3b, rs2, rd) -#define vis_pack32(rs1,rs2,rd) vis_dd2d(0x3a, rs1, rs2, rd) -#define vis_packfix(rs2,rd) vis_d2s( 0x3d, rs2, rd) -#define vis_expand(rs2,rd) vis_s2d( 0x4d, rs2, rd) -#define vis_pmerge(rs1,rs2,rd) vis_ss2d(0x4b, rs1, rs2, rd) - -/* Partitioned multiply instructions. */ - -#define vis_mul8x16(rs1,rs2,rd) vis_sd2d(0x31, rs1, rs2, rd) -#define vis_mul8x16au(rs1,rs2,rd) vis_ss2d(0x33, rs1, rs2, rd) -#define vis_mul8x16al(rs1,rs2,rd) vis_ss2d(0x35, rs1, rs2, rd) -#define vis_mul8sux16(rs1,rs2,rd) vis_dd2d(0x36, rs1, rs2, rd) -#define vis_mul8ulx16(rs1,rs2,rd) vis_dd2d(0x37, rs1, rs2, rd) -#define vis_muld8sux16(rs1,rs2,rd) vis_ss2d(0x38, rs1, rs2, rd) -#define vis_muld8ulx16(rs1,rs2,rd) vis_ss2d(0x39, rs1, rs2, rd) - -/* Alignment instructions. */ - -static inline void *vis_alignaddr(void *_ptr) -{ - register void *ptr asm("g1"); - - ptr = _ptr; - - __asm__ __volatile__(".word %2" - : "=&r" (ptr) - : "0" (ptr), - "i" (vis_opc_base | vis_opf(0x18) | - vis_rs1_s(1) | - vis_rs2_s(0) | - vis_rd_s(1))); - - return ptr; -} - -static inline void vis_alignaddr_g0(void *_ptr) -{ - register void *ptr asm("g1"); - - ptr = _ptr; - - __asm__ __volatile__(".word %2" - : "=&r" (ptr) - : "0" (ptr), - "i" (vis_opc_base | vis_opf(0x18) | - vis_rs1_s(1) | - vis_rs2_s(0) | - vis_rd_s(0))); -} - -static inline void *vis_alignaddrl(void *_ptr) -{ - register void *ptr asm("g1"); - - ptr = _ptr; - - __asm__ __volatile__(".word %2" - : "=&r" (ptr) - : "0" (ptr), - "i" (vis_opc_base | vis_opf(0x19) | - vis_rs1_s(1) | - vis_rs2_s(0) | - vis_rd_s(1))); - - return ptr; -} - -static inline void vis_alignaddrl_g0(void *_ptr) -{ - register void *ptr asm("g1"); - - ptr = _ptr; - - __asm__ __volatile__(".word %2" - : "=&r" (ptr) - : "0" (ptr), - "i" (vis_opc_base | vis_opf(0x19) | - vis_rs1_s(1) | - vis_rs2_s(0) | - vis_rd_s(0))); -} - -#define vis_faligndata(rs1,rs2,rd) vis_dd2d(0x48, rs1, rs2, rd) - -/* Logical operate instructions. */ - -#define vis_fzero(rd) vis_d( 0x60, rd) -#define vis_fzeros(rd) vis_s( 0x61, rd) -#define vis_fone(rd) vis_d( 0x7e, rd) -#define vis_fones(rd) vis_s( 0x7f, rd) -#define vis_src1(rs1,rd) vis_d12d(0x74, rs1, rd) -#define vis_src1s(rs1,rd) vis_s12s(0x75, rs1, rd) -#define vis_src2(rs2,rd) vis_d22d(0x78, rs2, rd) -#define vis_src2s(rs2,rd) vis_s22s(0x79, rs2, rd) -#define vis_not1(rs1,rd) vis_d12d(0x6a, rs1, rd) -#define vis_not1s(rs1,rd) vis_s12s(0x6b, rs1, rd) -#define vis_not2(rs2,rd) vis_d22d(0x66, rs2, rd) -#define vis_not2s(rs2,rd) vis_s22s(0x67, rs2, rd) -#define vis_or(rs1,rs2,rd) vis_dd2d(0x7c, rs1, rs2, rd) -#define vis_ors(rs1,rs2,rd) vis_ss2s(0x7d, rs1, rs2, rd) -#define vis_nor(rs1,rs2,rd) vis_dd2d(0x62, rs1, rs2, rd) -#define vis_nors(rs1,rs2,rd) vis_ss2s(0x63, rs1, rs2, rd) -#define vis_and(rs1,rs2,rd) vis_dd2d(0x70, rs1, rs2, rd) -#define vis_ands(rs1,rs2,rd) vis_ss2s(0x71, rs1, rs2, rd) -#define vis_nand(rs1,rs2,rd) vis_dd2d(0x6e, rs1, rs2, rd) -#define vis_nands(rs1,rs2,rd) vis_ss2s(0x6f, rs1, rs2, rd) -#define vis_xor(rs1,rs2,rd) vis_dd2d(0x6c, rs1, rs2, rd) -#define vis_xors(rs1,rs2,rd) vis_ss2s(0x6d, rs1, rs2, rd) -#define vis_xnor(rs1,rs2,rd) vis_dd2d(0x72, rs1, rs2, rd) -#define vis_xnors(rs1,rs2,rd) vis_ss2s(0x73, rs1, rs2, rd) -#define vis_ornot1(rs1,rs2,rd) vis_dd2d(0x7a, rs1, rs2, rd) -#define vis_ornot1s(rs1,rs2,rd) vis_ss2s(0x7b, rs1, rs2, rd) -#define vis_ornot2(rs1,rs2,rd) vis_dd2d(0x76, rs1, rs2, rd) -#define vis_ornot2s(rs1,rs2,rd) vis_ss2s(0x77, rs1, rs2, rd) -#define vis_andnot1(rs1,rs2,rd) vis_dd2d(0x68, rs1, rs2, rd) -#define vis_andnot1s(rs1,rs2,rd) vis_ss2s(0x69, rs1, rs2, rd) -#define vis_andnot2(rs1,rs2,rd) vis_dd2d(0x64, rs1, rs2, rd) -#define vis_andnot2s(rs1,rs2,rd) vis_ss2s(0x65, rs1, rs2, rd) - -/* Pixel component distance. */ - -#define vis_pdist(rs1,rs2,rd) vis_dd2d(0x3e, rs1, rs2, rd) diff --git a/src/libmpeg2new/libmpeg2/Makefile.am b/src/libmpeg2new/libmpeg2/Makefile.am deleted file mode 100644 index 2caa3ddc2..000000000 --- a/src/libmpeg2new/libmpeg2/Makefile.am +++ /dev/null @@ -1,14 +0,0 @@ -include $(top_srcdir)/misc/Makefile.common - -AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG) - -noinst_LTLIBRARIES = libmpeg2.la libmpeg2arch.la - -libmpeg2_la_SOURCES = alloc.c header.c decode.c slice.c motion_comp.c idct.c -libmpeg2_la_LIBADD = libmpeg2arch.la - -libmpeg2arch_la_SOURCES = motion_comp_mmx.c idct_mmx.c \ - motion_comp_altivec.c idct_altivec.c \ - motion_comp_alpha.c idct_alpha.c \ - motion_comp_vis.c \ - cpu_accel.c cpu_state.c diff --git a/src/libmpeg2new/libmpeg2/alloc.c b/src/libmpeg2new/libmpeg2/alloc.c deleted file mode 100644 index f1a7afa1c..000000000 --- a/src/libmpeg2new/libmpeg2/alloc.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * alloc.c - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include - -#include "../include/mpeg2.h" - -static void * (* malloc_hook) (unsigned size, mpeg2_alloc_t reason) = NULL; -static int (* free_hook) (void * buf) = NULL; - -void * mpeg2_malloc (unsigned size, mpeg2_alloc_t reason) -{ - char * buf; - - if (malloc_hook) { - buf = (char *) malloc_hook (size, reason); - if (buf) - return buf; - } - - if (size) { - buf = (char *) malloc (size + 63 + sizeof (void **)); - if (buf) { - char * align_buf; - - align_buf = buf + 63 + sizeof (void **); - align_buf -= (long)align_buf & 63; - *(((void **)align_buf) - 1) = buf; - return align_buf; - } - } - return NULL; -} - -void mpeg2_free (void * buf) -{ - if (free_hook && free_hook (buf)) - return; - - if (buf) - free (*(((void **)buf) - 1)); -} - -void mpeg2_malloc_hooks (void * malloc (unsigned, mpeg2_alloc_t), - int free (void *)) -{ - malloc_hook = malloc; - free_hook = free; -} diff --git a/src/libmpeg2new/libmpeg2/configure.incl b/src/libmpeg2new/libmpeg2/configure.incl deleted file mode 100644 index f8dbd5aef..000000000 --- a/src/libmpeg2new/libmpeg2/configure.incl +++ /dev/null @@ -1,11 +0,0 @@ -AC_SUBST([LIBMPEG2_CFLAGS]) - -dnl avoid -fPIC when possible -AC_LIBTOOL_NON_PIC([LIBMPEG2_CFLAGS="$LIBMPEG2_CFLAGS -prefer-non-pic"]) - -dnl check for cpudetect -AC_ARG_ENABLE([accel-detect], - [ --disable-accel-detect make a version without accel detection code]) -if test x"$enable_accel_detect" != x"no"; then - AC_DEFINE([ACCEL_DETECT],,[autodetect accelerations]) -fi diff --git a/src/libmpeg2new/libmpeg2/convert_internal.h b/src/libmpeg2new/libmpeg2/convert_internal.h deleted file mode 100644 index d1e63d5e3..000000000 --- a/src/libmpeg2new/libmpeg2/convert_internal.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * convert_internal.h - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -typedef struct { - uint8_t * rgb_ptr; - int width; - int field; - int y_stride, rgb_stride, y_increm, uv_increm, rgb_increm, rgb_slice; - int chroma420, convert420; - int dither_offset, dither_stride; - int y_stride_frame, uv_stride_frame, rgb_stride_frame, rgb_stride_min; -} convert_rgb_t; - -typedef void mpeg2convert_copy_t (void * id, uint8_t * const * src, - unsigned int v_offset); - -mpeg2convert_copy_t * mpeg2convert_rgb_mmxext (int bpp, int mode, - const mpeg2_sequence_t * seq); -mpeg2convert_copy_t * mpeg2convert_rgb_mmx (int bpp, int mode, - const mpeg2_sequence_t * seq); -mpeg2convert_copy_t * mpeg2convert_rgb_vis (int bpp, int mode, - const mpeg2_sequence_t * seq); diff --git a/src/libmpeg2new/libmpeg2/cpu_accel.c b/src/libmpeg2new/libmpeg2/cpu_accel.c deleted file mode 100644 index 7846f1e88..000000000 --- a/src/libmpeg2new/libmpeg2/cpu_accel.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - * cpu_accel.c - * Copyright (C) 2000-2004 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#include - -#include "../include/mpeg2.h" -#include "../include/attributes.h" -#include "mpeg2_internal.h" - -#ifdef ARCH_X86 -static inline uint32_t arch_accel (uint32_t accel) -{ - if (accel & (MPEG2_ACCEL_X86_3DNOW | MPEG2_ACCEL_X86_MMXEXT)) - accel |= MPEG2_ACCEL_X86_MMX; - - if (accel & (MPEG2_ACCEL_X86_SSE2 | MPEG2_ACCEL_X86_SSE3)) - accel |= MPEG2_ACCEL_X86_MMXEXT; - - if (accel & (MPEG2_ACCEL_X86_SSE3)) - accel |= MPEG2_ACCEL_X86_SSE2; - -#ifdef ACCEL_DETECT - if (accel & MPEG2_ACCEL_DETECT) { - uint32_t eax, ebx, ecx, edx; - int AMD; - -#if !defined(PIC) && !defined(__PIC__) -#define cpuid(op,eax,ebx,ecx,edx) \ - __asm__ ("cpuid" \ - : "=a" (eax), \ - "=b" (ebx), \ - "=c" (ecx), \ - "=d" (edx) \ - : "a" (op) \ - : "cc") -#else /* PIC version : save ebx */ -#define cpuid(op,eax,ebx,ecx,edx) \ - __asm__ ("push %%ebx\n\t" \ - "cpuid\n\t" \ - "movl %%ebx,%1\n\t" \ - "pop %%ebx" \ - : "=a" (eax), \ - "=r" (ebx), \ - "=c" (ecx), \ - "=d" (edx) \ - : "a" (op) \ - : "cc") -#endif - - __asm__ ("pushf\n\t" - "pushf\n\t" - "pop %0\n\t" - "movl %0,%1\n\t" - "xorl $0x200000,%0\n\t" - "push %0\n\t" - "popf\n\t" - "pushf\n\t" - "pop %0\n\t" - "popf" - : "=r" (eax), - "=r" (ebx) - : - : "cc"); - - if (eax == ebx) /* no cpuid */ - return accel; - - cpuid (0x00000000, eax, ebx, ecx, edx); - if (!eax) /* vendor string only */ - return accel; - - AMD = (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65); - - cpuid (0x00000001, eax, ebx, ecx, edx); - if (! (edx & 0x00800000)) /* no MMX */ - return accel; - - accel |= MPEG2_ACCEL_X86_MMX; - if (edx & 0x02000000) /* SSE - identical to AMD MMX extensions */ - accel |= MPEG2_ACCEL_X86_MMXEXT; - - if (edx & 0x04000000) /* SSE2 */ - accel |= MPEG2_ACCEL_X86_SSE2; - - if (ecx & 0x00000001) /* SSE3 */ - accel |= MPEG2_ACCEL_X86_SSE3; - - cpuid (0x80000000, eax, ebx, ecx, edx); - if (eax < 0x80000001) /* no extended capabilities */ - return accel; - - cpuid (0x80000001, eax, ebx, ecx, edx); - - if (edx & 0x80000000) - accel |= MPEG2_ACCEL_X86_3DNOW; - - if (AMD && (edx & 0x00400000)) /* AMD MMX extensions */ - accel |= MPEG2_ACCEL_X86_MMXEXT; - } -#endif /* ACCEL_DETECT */ - - return accel; -} -#endif /* ARCH_X86 */ - -#if defined(ACCEL_DETECT) && (defined(ARCH_PPC) || defined(ARCH_SPARC)) -#include -#include - -static sigjmp_buf jmpbuf; -static volatile sig_atomic_t canjump = 0; - -static RETSIGTYPE sigill_handler (int sig) -{ - if (!canjump) { - signal (sig, SIG_DFL); - raise (sig); - } - - canjump = 0; - siglongjmp (jmpbuf, 1); -} -#endif /* ACCEL_DETECT && (ARCH_PPC || ARCH_SPARC) */ - -#ifdef ARCH_PPC -static inline uint32_t arch_accel (uint32_t accel) -{ -#ifdef ACCEL_DETECT - if (accel & (MPEG2_ACCEL_PPC_ALTIVEC | MPEG2_ACCEL_DETECT) == - MPEG2_ACCEL_DETECT) { - static RETSIGTYPE (* oldsig) (int); - - oldsig = signal (SIGILL, sigill_handler); - if (sigsetjmp (jmpbuf, 1)) { - signal (SIGILL, oldsig); - return accel; - } - - canjump = 1; - -#ifdef HAVE_ALTIVEC_H /* gnu */ -#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t" -#else /* apple */ -#define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t" -#endif - asm volatile ("mtspr 256, %0\n\t" - VAND (0, 0, 0) - : - : "r" (-1)); - - canjump = 0; - accel |= MPEG2_ACCEL_PPC_ALTIVEC; - - signal (SIGILL, oldsig); - } -#endif /* ACCEL_DETECT */ - - return accel; -} -#endif /* ARCH_PPC */ - -#ifdef ARCH_SPARC -static inline uint32_t arch_accel (uint32_t accel) -{ - if (accel & MPEG2_ACCEL_SPARC_VIS2) - accel |= MPEG2_ACCEL_SPARC_VIS; - -#ifdef ACCEL_DETECT - if (accel & (MPEG2_ACCEL_SPARC_VIS2 | MPEG2_ACCEL_DETECT) == - MPEG2_ACCEL_DETECT) { - static RETSIGTYPE (* oldsig) (int); - - oldsig = signal (SIGILL, sigill_handler); - if (sigsetjmp (jmpbuf, 1)) { - signal (SIGILL, oldsig); - return accel; - } - - canjump = 1; - - /* pdist %f0, %f0, %f0 */ - __asm__ __volatile__(".word\t0x81b007c0"); - - canjump = 0; - accel |= MPEG2_ACCEL_SPARC_VIS; - - if (sigsetjmp (jmpbuf, 1)) { - signal (SIGILL, oldsig); - return accel; - } - - canjump = 1; - - /* edge8n %g0, %g0, %g0 */ - __asm__ __volatile__(".word\t0x81b00020"); - - canjump = 0; - accel |= MPEG2_ACCEL_SPARC_VIS2; - - signal (SIGILL, oldsig); - } -#endif /* ACCEL_DETECT */ - - return accel; -} -#endif /* ARCH_SPARC */ - -#ifdef ARCH_ALPHA -static inline uint32_t arch_accel (uint32_t accel) -{ - if (accel & MPEG2_ACCEL_ALPHA_MVI) - accel |= MPEG2_ACCEL_ALPHA; - -#ifdef ACCEL_DETECT - if (accel & MPEG2_ACCEL_DETECT) { - uint64_t no_mvi; - - asm volatile ("amask %1, %0" - : "=r" (no_mvi) - : "rI" (256)); /* AMASK_MVI */ - accel |= no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA | - MPEG2_ACCEL_ALPHA_MVI); - } -#endif /* ACCEL_DETECT */ - - return accel; -} -#endif /* ARCH_ALPHA */ - -uint32_t mpeg2_detect_accel (uint32_t accel) -{ -#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) - accel = arch_accel (accel); -#endif - return accel; -} diff --git a/src/libmpeg2new/libmpeg2/cpu_state.c b/src/libmpeg2new/libmpeg2/cpu_state.c deleted file mode 100644 index edbf2dd28..000000000 --- a/src/libmpeg2new/libmpeg2/cpu_state.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * cpu_state.c - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#include -#include - -#include "../include/mpeg2.h" -#include "../include/attributes.h" -#include "mpeg2_internal.h" -#ifdef ARCH_X86 -#include "../include/mmx.h" -#endif - -void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL; -void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL; - -#ifdef ARCH_X86 -static void state_restore_mmx (cpu_state_t * state) -{ - emms (); -} -#endif - -#ifdef ARCH_PPC -#ifdef HAVE_ALTIVEC_H /* gnu */ -#define LI(a,b) "li " #a "," #b "\n\t" -#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t" -#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t" -#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t" -#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t" -#else /* apple */ -#define LI(a,b) "li r" #a "," #b "\n\t" -#define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t" -#define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t" -#define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t" -#define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t" -#endif - -static void state_save_altivec (cpu_state_t * state) -{ - asm (LI (9, 16) - STVX0 (20, 0, 3) - LI (11, 32) - STVX (21, 9, 3) - LI (9, 48) - STVX (22, 11, 3) - LI (11, 64) - STVX (23, 9, 3) - LI (9, 80) - STVX (24, 11, 3) - LI (11, 96) - STVX (25, 9, 3) - LI (9, 112) - STVX (26, 11, 3) - LI (11, 128) - STVX (27, 9, 3) - LI (9, 144) - STVX (28, 11, 3) - LI (11, 160) - STVX (29, 9, 3) - LI (9, 176) - STVX (30, 11, 3) - STVX (31, 9, 3)); -} - -static void state_restore_altivec (cpu_state_t * state) -{ - asm (LI (9, 16) - LVX0 (20, 0, 3) - LI (11, 32) - LVX (21, 9, 3) - LI (9, 48) - LVX (22, 11, 3) - LI (11, 64) - LVX (23, 9, 3) - LI (9, 80) - LVX (24, 11, 3) - LI (11, 96) - LVX (25, 9, 3) - LI (9, 112) - LVX (26, 11, 3) - LI (11, 128) - LVX (27, 9, 3) - LI (9, 144) - LVX (28, 11, 3) - LI (11, 160) - LVX (29, 9, 3) - LI (9, 176) - LVX (30, 11, 3) - LVX (31, 9, 3)); -} -#endif - -void mpeg2_cpu_state_init (uint32_t accel) -{ -#ifdef ARCH_X86 - if (accel & MPEG2_ACCEL_X86_MMX) { - mpeg2_cpu_state_restore = state_restore_mmx; - } -#endif -#ifdef ARCH_PPC - if (accel & MPEG2_ACCEL_PPC_ALTIVEC) { - mpeg2_cpu_state_save = state_save_altivec; - mpeg2_cpu_state_restore = state_restore_altivec; - } -#endif -} diff --git a/src/libmpeg2new/libmpeg2/decode.c b/src/libmpeg2new/libmpeg2/decode.c deleted file mode 100644 index 337ba4466..000000000 --- a/src/libmpeg2new/libmpeg2/decode.c +++ /dev/null @@ -1,439 +0,0 @@ -/* - * decode.c - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#include /* memcmp/memset, try to remove */ -#include -#include - -#include "../include/mpeg2.h" -#include "../include/attributes.h" -#include "mpeg2_internal.h" - -static int mpeg2_accels = 0; - -#define BUFFER_SIZE (1194 * 1024) - -const mpeg2_info_t * mpeg2_info (mpeg2dec_t * mpeg2dec) -{ - return &(mpeg2dec->info); -} - -static inline int skip_chunk (mpeg2dec_t * mpeg2dec, int bytes) -{ - uint8_t * current; - uint32_t shift; - uint8_t * limit; - uint8_t byte; - - if (!bytes) - return 0; - - current = mpeg2dec->buf_start; - shift = mpeg2dec->shift; - limit = current + bytes; - - do { - byte = *current++; - if (shift == 0x00000100) { - int skipped; - - mpeg2dec->shift = 0xffffff00; - skipped = current - mpeg2dec->buf_start; - mpeg2dec->buf_start = current; - return skipped; - } - shift = (shift | byte) << 8; - } while (current < limit); - - mpeg2dec->shift = shift; - mpeg2dec->buf_start = current; - return 0; -} - -static inline int copy_chunk (mpeg2dec_t * mpeg2dec, int bytes) -{ - uint8_t * current; - uint32_t shift; - uint8_t * chunk_ptr; - uint8_t * limit; - uint8_t byte; - - if (!bytes) - return 0; - - current = mpeg2dec->buf_start; - shift = mpeg2dec->shift; - chunk_ptr = mpeg2dec->chunk_ptr; - limit = current + bytes; - - do { - byte = *current++; - if (shift == 0x00000100) { - int copied; - - mpeg2dec->shift = 0xffffff00; - mpeg2dec->chunk_ptr = chunk_ptr + 1; - copied = current - mpeg2dec->buf_start; - mpeg2dec->buf_start = current; - return copied; - } - shift = (shift | byte) << 8; - *chunk_ptr++ = byte; - } while (current < limit); - - mpeg2dec->shift = shift; - mpeg2dec->buf_start = current; - return 0; -} - -void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end) -{ - mpeg2dec->buf_start = start; - mpeg2dec->buf_end = end; -} - -int mpeg2_getpos (mpeg2dec_t * mpeg2dec) -{ - return mpeg2dec->buf_end - mpeg2dec->buf_start; -} - -static inline mpeg2_state_t seek_chunk (mpeg2dec_t * mpeg2dec) -{ - int size, skipped; - - size = mpeg2dec->buf_end - mpeg2dec->buf_start; - skipped = skip_chunk (mpeg2dec, size); - if (!skipped) { - mpeg2dec->bytes_since_tag += size; - return STATE_BUFFER; - } - mpeg2dec->bytes_since_tag += skipped; - mpeg2dec->code = mpeg2dec->buf_start[-1]; - return STATE_INTERNAL_NORETURN; -} - -mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec) -{ - while (!(mpeg2dec->code == 0xb3 || - ((mpeg2dec->code == 0xb7 || mpeg2dec->code == 0xb8 || - !mpeg2dec->code) && mpeg2dec->sequence.width != (unsigned)-1))) - if (seek_chunk (mpeg2dec) == STATE_BUFFER) - return STATE_BUFFER; - mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; - mpeg2dec->user_data_len = 0; - return ((mpeg2dec->code == 0xb7) ? - mpeg2_header_end (mpeg2dec) : mpeg2_parse_header (mpeg2dec)); -} - -#define RECEIVED(code,state) (((state) << 8) + (code)) - -mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec) -{ - int size_buffer, size_chunk, copied; - - if (mpeg2dec->action) { - mpeg2_state_t state; - - state = mpeg2dec->action (mpeg2dec); - if ((int)state > (int)STATE_INTERNAL_NORETURN) - return state; - } - - while (1) { - while ((unsigned) (mpeg2dec->code - mpeg2dec->first_decode_slice) < - mpeg2dec->nb_decode_slices) { - size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start; - size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE - - mpeg2dec->chunk_ptr); - if (size_buffer <= size_chunk) { - copied = copy_chunk (mpeg2dec, size_buffer); - if (!copied) { - mpeg2dec->bytes_since_tag += size_buffer; - mpeg2dec->chunk_ptr += size_buffer; - return STATE_BUFFER; - } - } else { - copied = copy_chunk (mpeg2dec, size_chunk); - if (!copied) { - /* filled the chunk buffer without finding a start code */ - mpeg2dec->bytes_since_tag += size_chunk; - mpeg2dec->action = seek_chunk; - return STATE_INVALID; - } - } - mpeg2dec->bytes_since_tag += copied; - - mpeg2_slice (&(mpeg2dec->decoder), mpeg2dec->code, - mpeg2dec->chunk_start); - mpeg2dec->code = mpeg2dec->buf_start[-1]; - mpeg2dec->chunk_ptr = mpeg2dec->chunk_start; - } - if ((unsigned) (mpeg2dec->code - 1) >= 0xb0 - 1) - break; - if (seek_chunk (mpeg2dec) == STATE_BUFFER) - return STATE_BUFFER; - } - - mpeg2dec->action = mpeg2_seek_header; - switch (mpeg2dec->code) { - case 0x00: - return mpeg2dec->state; - case 0xb3: - case 0xb7: - case 0xb8: - return (mpeg2dec->state == STATE_SLICE) ? STATE_SLICE : STATE_INVALID; - default: - mpeg2dec->action = seek_chunk; - return STATE_INVALID; - } -} - -mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec) -{ - static int (* process_header[]) (mpeg2dec_t * mpeg2dec) = { - mpeg2_header_picture, mpeg2_header_extension, mpeg2_header_user_data, - mpeg2_header_sequence, NULL, NULL, NULL, NULL, mpeg2_header_gop - }; - int size_buffer, size_chunk, copied; - - mpeg2dec->action = mpeg2_parse_header; - mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; - while (1) { - size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start; - size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE - - mpeg2dec->chunk_ptr); - if (size_buffer <= size_chunk) { - copied = copy_chunk (mpeg2dec, size_buffer); - if (!copied) { - mpeg2dec->bytes_since_tag += size_buffer; - mpeg2dec->chunk_ptr += size_buffer; - return STATE_BUFFER; - } - } else { - copied = copy_chunk (mpeg2dec, size_chunk); - if (!copied) { - /* filled the chunk buffer without finding a start code */ - mpeg2dec->bytes_since_tag += size_chunk; - mpeg2dec->code = 0xb4; - mpeg2dec->action = mpeg2_seek_header; - return STATE_INVALID; - } - } - mpeg2dec->bytes_since_tag += copied; - - if (process_header[mpeg2dec->code & 0x0b] (mpeg2dec)) { - mpeg2dec->code = mpeg2dec->buf_start[-1]; - mpeg2dec->action = mpeg2_seek_header; - return STATE_INVALID; - } - - mpeg2dec->code = mpeg2dec->buf_start[-1]; - switch (RECEIVED (mpeg2dec->code, mpeg2dec->state)) { - - /* state transition after a sequence header */ - case RECEIVED (0x00, STATE_SEQUENCE): - case RECEIVED (0xb8, STATE_SEQUENCE): - mpeg2_header_sequence_finalize (mpeg2dec); - break; - - /* other legal state transitions */ - case RECEIVED (0x00, STATE_GOP): - mpeg2_header_gop_finalize (mpeg2dec); - break; - case RECEIVED (0x01, STATE_PICTURE): - case RECEIVED (0x01, STATE_PICTURE_2ND): - mpeg2_header_picture_finalize (mpeg2dec, mpeg2_accels); - mpeg2dec->action = mpeg2_header_slice_start; - break; - - /* legal headers within a given state */ - case RECEIVED (0xb2, STATE_SEQUENCE): - case RECEIVED (0xb2, STATE_GOP): - case RECEIVED (0xb2, STATE_PICTURE): - case RECEIVED (0xb2, STATE_PICTURE_2ND): - case RECEIVED (0xb5, STATE_SEQUENCE): - case RECEIVED (0xb5, STATE_PICTURE): - case RECEIVED (0xb5, STATE_PICTURE_2ND): - mpeg2dec->chunk_ptr = mpeg2dec->chunk_start; - continue; - - default: - mpeg2dec->action = mpeg2_seek_header; - return STATE_INVALID; - } - - mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; - mpeg2dec->user_data_len = 0; - return mpeg2dec->state; - } -} - -int mpeg2_convert (mpeg2dec_t * mpeg2dec, mpeg2_convert_t convert, void * arg) -{ - mpeg2_convert_init_t convert_init; - int error; - - error = convert (MPEG2_CONVERT_SET, NULL, &(mpeg2dec->sequence), 0, - mpeg2_accels, arg, &convert_init); - if (!error) { - mpeg2dec->convert = convert; - mpeg2dec->convert_arg = arg; - mpeg2dec->convert_id_size = convert_init.id_size; - mpeg2dec->convert_stride = 0; - } - return error; -} - -int mpeg2_stride (mpeg2dec_t * mpeg2dec, int stride) -{ - if (!mpeg2dec->convert) { - if (stride < (int) mpeg2dec->sequence.width) - stride = mpeg2dec->sequence.width; - mpeg2dec->decoder.stride_frame = stride; - } else { - mpeg2_convert_init_t convert_init; - - stride = mpeg2dec->convert (MPEG2_CONVERT_STRIDE, NULL, - &(mpeg2dec->sequence), stride, - mpeg2_accels, mpeg2dec->convert_arg, - &convert_init); - mpeg2dec->convert_id_size = convert_init.id_size; - mpeg2dec->convert_stride = stride; - } - return stride; -} - -void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id) -{ - mpeg2_fbuf_t * fbuf; - - if (mpeg2dec->custom_fbuf) { - if (mpeg2dec->state == STATE_SEQUENCE) { - mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1]; - mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0]; - } - mpeg2_set_fbuf (mpeg2dec, (mpeg2dec->decoder.coding_type == - PIC_FLAG_CODING_TYPE_B)); - fbuf = mpeg2dec->fbuf[0]; - } else { - fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index].fbuf); - mpeg2dec->alloc_index_user = ++mpeg2dec->alloc_index; - } - fbuf->buf[0] = buf[0]; - fbuf->buf[1] = buf[1]; - fbuf->buf[2] = buf[2]; - fbuf->id = id; -} - -void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf) -{ - mpeg2dec->custom_fbuf = custom_fbuf; -} - -void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip) -{ - mpeg2dec->first_decode_slice = 1; - mpeg2dec->nb_decode_slices = skip ? 0 : (0xb0 - 1); -} - -void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end) -{ - start = (start < 1) ? 1 : (start > 0xb0) ? 0xb0 : start; - end = (end < start) ? start : (end > 0xb0) ? 0xb0 : end; - mpeg2dec->first_decode_slice = start; - mpeg2dec->nb_decode_slices = end - start; -} - -void mpeg2_tag_picture (mpeg2dec_t * mpeg2dec, uint32_t tag, uint32_t tag2) -{ - mpeg2dec->tag_previous = mpeg2dec->tag_current; - mpeg2dec->tag2_previous = mpeg2dec->tag2_current; - mpeg2dec->tag_current = tag; - mpeg2dec->tag2_current = tag2; - mpeg2dec->num_tags++; - mpeg2dec->bytes_since_tag = 0; -} - -uint32_t mpeg2_accel (uint32_t accel) -{ - if (!mpeg2_accels) { - mpeg2_accels = mpeg2_detect_accel (accel) | MPEG2_ACCEL_DETECT; - mpeg2_cpu_state_init (mpeg2_accels); - mpeg2_idct_init (mpeg2_accels); - mpeg2_mc_init (mpeg2_accels); - } - return mpeg2_accels & ~MPEG2_ACCEL_DETECT; -} - -void mpeg2_reset (mpeg2dec_t * mpeg2dec, int full_reset) -{ - mpeg2dec->buf_start = mpeg2dec->buf_end = NULL; - mpeg2dec->num_tags = 0; - mpeg2dec->shift = 0xffffff00; - mpeg2dec->code = 0xb4; - mpeg2dec->action = mpeg2_seek_header; - mpeg2dec->state = STATE_INVALID; - mpeg2dec->first = 1; - - mpeg2_reset_info(&(mpeg2dec->info)); - mpeg2dec->info.gop = NULL; - mpeg2dec->info.user_data = NULL; - mpeg2dec->info.user_data_len = 0; - if (full_reset) { - mpeg2dec->info.sequence = NULL; - mpeg2_header_state_init (mpeg2dec); - } - -} - -mpeg2dec_t * mpeg2_init (void) -{ - mpeg2dec_t * mpeg2dec; - - mpeg2_accel (MPEG2_ACCEL_DETECT); - - mpeg2dec = (mpeg2dec_t *) mpeg2_malloc (sizeof (mpeg2dec_t), - MPEG2_ALLOC_MPEG2DEC); - if (mpeg2dec == NULL) - return NULL; - - memset (mpeg2dec->decoder.DCTblock, 0, 64 * sizeof (int16_t)); - memset (mpeg2dec->quantizer_matrix, 0, 4 * 64 * sizeof (uint8_t)); - - mpeg2dec->chunk_buffer = (uint8_t *) mpeg2_malloc (BUFFER_SIZE + 4, - MPEG2_ALLOC_CHUNK); - - mpeg2dec->sequence.width = (unsigned)-1; - mpeg2_reset (mpeg2dec, 1); - - return mpeg2dec; -} - -void mpeg2_close (mpeg2dec_t * mpeg2dec) -{ - mpeg2_header_state_init (mpeg2dec); - mpeg2_free (mpeg2dec->chunk_buffer); - mpeg2_free (mpeg2dec); -} diff --git a/src/libmpeg2new/libmpeg2/header.c b/src/libmpeg2new/libmpeg2/header.c deleted file mode 100644 index 935a50aa3..000000000 --- a/src/libmpeg2new/libmpeg2/header.c +++ /dev/null @@ -1,961 +0,0 @@ -/* - * header.c - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 2003 Regis Duchesne - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#include -#include /* defines NULL */ -#include /* memcmp */ - -#include "../include/mpeg2.h" -#include "../include/attributes.h" -#include "mpeg2_internal.h" - -#define SEQ_EXT 2 -#define SEQ_DISPLAY_EXT 4 -#define QUANT_MATRIX_EXT 8 -#define COPYRIGHT_EXT 0x10 -#define PIC_DISPLAY_EXT 0x80 -#define PIC_CODING_EXT 0x100 - -/* default intra quant matrix, in zig-zag order */ -static const uint8_t default_intra_quantizer_matrix[64] ATTR_ALIGN(16) = { - 8, - 16, 16, - 19, 16, 19, - 22, 22, 22, 22, - 22, 22, 26, 24, 26, - 27, 27, 27, 26, 26, 26, - 26, 27, 27, 27, 29, 29, 29, - 34, 34, 34, 29, 29, 29, 27, 27, - 29, 29, 32, 32, 34, 34, 37, - 38, 37, 35, 35, 34, 35, - 38, 38, 40, 40, 40, - 48, 48, 46, 46, - 56, 56, 58, - 69, 69, - 83 -}; - -uint8_t mpeg2_scan_norm[64] ATTR_ALIGN(16) = { - /* Zig-Zag scan pattern */ - 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, - 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, - 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, - 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 -}; - -uint8_t mpeg2_scan_alt[64] ATTR_ALIGN(16) = { - /* Alternate scan pattern */ - 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49, - 41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43, - 51, 59, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 52, 60, 37, 45, - 53, 61, 22, 30, 7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63 -}; - -void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec) -{ - if (mpeg2dec->sequence.width != (unsigned)-1) { - int i; - - mpeg2dec->sequence.width = (unsigned)-1; - if (!mpeg2dec->custom_fbuf) - for (i = mpeg2dec->alloc_index_user; - i < mpeg2dec->alloc_index; i++) { - mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[0]); - mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[1]); - mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[2]); - } - if (mpeg2dec->convert_start) - for (i = 0; i < 3; i++) { - mpeg2_free (mpeg2dec->yuv_buf[i][0]); - mpeg2_free (mpeg2dec->yuv_buf[i][1]); - mpeg2_free (mpeg2dec->yuv_buf[i][2]); - } - if (mpeg2dec->decoder.convert_id) - mpeg2_free (mpeg2dec->decoder.convert_id); - } - mpeg2dec->decoder.coding_type = I_TYPE; - mpeg2dec->decoder.convert = NULL; - mpeg2dec->decoder.convert_id = NULL; - mpeg2dec->picture = mpeg2dec->pictures; - mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf; - mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf; - mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf; - mpeg2dec->first = 1; - mpeg2dec->alloc_index = 0; - mpeg2dec->alloc_index_user = 0; - mpeg2dec->first_decode_slice = 1; - mpeg2dec->nb_decode_slices = 0xb0 - 1; - mpeg2dec->convert = NULL; - mpeg2dec->convert_start = NULL; - mpeg2dec->custom_fbuf = 0; - mpeg2dec->yuv_index = 0; -} - -void mpeg2_reset_info (mpeg2_info_t * info) -{ - info->current_picture = info->current_picture_2nd = NULL; - info->display_picture = info->display_picture_2nd = NULL; - info->current_fbuf = info->display_fbuf = info->discard_fbuf = NULL; -} - -static void info_user_data (mpeg2dec_t * mpeg2dec) -{ - if (mpeg2dec->user_data_len) { - mpeg2dec->info.user_data = mpeg2dec->chunk_buffer; - mpeg2dec->info.user_data_len = mpeg2dec->user_data_len - 3; - } -} - -int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec) -{ - uint8_t * buffer = mpeg2dec->chunk_start; - mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); - static unsigned int frame_period[16] = { - 0, 1126125, 1125000, 1080000, 900900, 900000, 540000, 450450, 450000, - /* unofficial: xing 15 fps */ - 1800000, - /* unofficial: libmpeg3 "Unofficial economy rates" 5/10/12/15 fps */ - 5400000, 2700000, 2250000, 1800000, 0, 0 - }; - int i; - - if ((buffer[6] & 0x20) != 0x20) /* missing marker_bit */ - return 1; - - i = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; - if (! (sequence->display_width = sequence->picture_width = i >> 12)) - return 1; - if (! (sequence->display_height = sequence->picture_height = i & 0xfff)) - return 1; - sequence->width = (sequence->picture_width + 15) & ~15; - sequence->height = (sequence->picture_height + 15) & ~15; - sequence->chroma_width = sequence->width >> 1; - sequence->chroma_height = sequence->height >> 1; - - sequence->flags = (SEQ_FLAG_PROGRESSIVE_SEQUENCE | - SEQ_VIDEO_FORMAT_UNSPECIFIED); - - sequence->pixel_width = buffer[3] >> 4; /* aspect ratio */ - sequence->frame_period = frame_period[buffer[3] & 15]; - - sequence->byte_rate = (buffer[4]<<10) | (buffer[5]<<2) | (buffer[6]>>6); - - sequence->vbv_buffer_size = ((buffer[6]<<16)|(buffer[7]<<8))&0x1ff800; - - if (buffer[7] & 4) - sequence->flags |= SEQ_FLAG_CONSTRAINED_PARAMETERS; - - mpeg2dec->copy_matrix = 3; - if (buffer[7] & 2) { - for (i = 0; i < 64; i++) - mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] = - (buffer[i+7] << 7) | (buffer[i+8] >> 1); - buffer += 64; - } else - for (i = 0; i < 64; i++) - mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] = - default_intra_quantizer_matrix[i]; - - if (buffer[7] & 1) - for (i = 0; i < 64; i++) - mpeg2dec->new_quantizer_matrix[1][mpeg2_scan_norm[i]] = - buffer[i+8]; - else - memset (mpeg2dec->new_quantizer_matrix[1], 16, 64); - - sequence->profile_level_id = 0x80; - sequence->colour_primaries = 0; - sequence->transfer_characteristics = 0; - sequence->matrix_coefficients = 0; - - mpeg2dec->ext_state = SEQ_EXT; - mpeg2dec->state = STATE_SEQUENCE; - mpeg2dec->display_offset_x = mpeg2dec->display_offset_y = 0; - - return 0; -} - -static int sequence_ext (mpeg2dec_t * mpeg2dec) -{ - uint8_t * buffer = mpeg2dec->chunk_start; - mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); - uint32_t flags; - - if (!(buffer[3] & 1)) - return 1; - - sequence->profile_level_id = (buffer[0] << 4) | (buffer[1] >> 4); - - sequence->display_width = sequence->picture_width += - ((buffer[1] << 13) | (buffer[2] << 5)) & 0x3000; - sequence->display_height = sequence->picture_height += - (buffer[2] << 7) & 0x3000; - sequence->width = (sequence->picture_width + 15) & ~15; - sequence->height = (sequence->picture_height + 15) & ~15; - flags = sequence->flags | SEQ_FLAG_MPEG2; - if (!(buffer[1] & 8)) { - flags &= ~SEQ_FLAG_PROGRESSIVE_SEQUENCE; - sequence->height = (sequence->height + 31) & ~31; - } - if (buffer[5] & 0x80) - flags |= SEQ_FLAG_LOW_DELAY; - sequence->flags = flags; - sequence->chroma_width = sequence->width; - sequence->chroma_height = sequence->height; - switch (buffer[1] & 6) { - case 0: /* invalid */ - return 1; - case 2: /* 4:2:0 */ - sequence->chroma_height >>= 1; - case 4: /* 4:2:2 */ - sequence->chroma_width >>= 1; - } - - sequence->byte_rate += ((buffer[2]<<25) | (buffer[3]<<17)) & 0x3ffc0000; - - sequence->vbv_buffer_size |= buffer[4] << 21; - - sequence->frame_period = - sequence->frame_period * ((buffer[5]&31)+1) / (((buffer[5]>>2)&3)+1); - - mpeg2dec->ext_state = SEQ_DISPLAY_EXT; - - return 0; -} - -static int sequence_display_ext (mpeg2dec_t * mpeg2dec) -{ - uint8_t * buffer = mpeg2dec->chunk_start; - mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); - - sequence->flags = ((sequence->flags & ~SEQ_MASK_VIDEO_FORMAT) | - ((buffer[0]<<4) & SEQ_MASK_VIDEO_FORMAT)); - if (buffer[0] & 1) { - sequence->flags |= SEQ_FLAG_COLOUR_DESCRIPTION; - sequence->colour_primaries = buffer[1]; - sequence->transfer_characteristics = buffer[2]; - sequence->matrix_coefficients = buffer[3]; - buffer += 3; - } - - if (!(buffer[2] & 2)) /* missing marker_bit */ - return 1; - - sequence->display_width = (buffer[1] << 6) | (buffer[2] >> 2); - sequence->display_height = - ((buffer[2]& 1 ) << 13) | (buffer[3] << 5) | (buffer[4] >> 3); - - return 0; -} - -static inline void simplify (unsigned int * u, unsigned int * v) -{ - unsigned int a, b, tmp; - - a = *u; b = *v; - while (a) { /* find greatest common divisor */ - tmp = a; a = b % tmp; b = tmp; - } - *u /= b; *v /= b; -} - -static inline void finalize_sequence (mpeg2_sequence_t * sequence) -{ - int width; - int height; - - sequence->byte_rate *= 50; - - if (sequence->flags & SEQ_FLAG_MPEG2) { - switch (sequence->pixel_width) { - case 1: /* square pixels */ - sequence->pixel_width = sequence->pixel_height = 1; return; - case 2: /* 4:3 aspect ratio */ - width = 4; height = 3; break; - case 3: /* 16:9 aspect ratio */ - width = 16; height = 9; break; - case 4: /* 2.21:1 aspect ratio */ - width = 221; height = 100; break; - default: /* illegal */ - sequence->pixel_width = sequence->pixel_height = 0; return; - } - width *= sequence->display_height; - height *= sequence->display_width; - - } else { - if (sequence->byte_rate == 50 * 0x3ffff) - sequence->byte_rate = 0; /* mpeg-1 VBR */ - - switch (sequence->pixel_width) { - case 0: case 15: /* illegal */ - sequence->pixel_width = sequence->pixel_height = 0; return; - case 1: /* square pixels */ - sequence->pixel_width = sequence->pixel_height = 1; return; - case 3: /* 720x576 16:9 */ - sequence->pixel_width = 64; sequence->pixel_height = 45; return; - case 6: /* 720x480 16:9 */ - sequence->pixel_width = 32; sequence->pixel_height = 27; return; - case 8: /* BT.601 625 lines 4:3 */ - sequence->pixel_width = 59; sequence->pixel_height = 54; return; - case 12: /* BT.601 525 lines 4:3 */ - sequence->pixel_width = 10; sequence->pixel_height = 11; return; - default: - height = 88 * sequence->pixel_width + 1171; - width = 2000; - } - } - - sequence->pixel_width = width; - sequence->pixel_height = height; - simplify (&sequence->pixel_width, &sequence->pixel_height); -} - -int mpeg2_guess_aspect (const mpeg2_sequence_t * sequence, - unsigned int * pixel_width, - unsigned int * pixel_height) -{ - static struct { - unsigned int width, height; - } video_modes[] = { - {720, 576}, /* 625 lines, 13.5 MHz (D1, DV, DVB, DVD) */ - {704, 576}, /* 625 lines, 13.5 MHz (1/1 D1, DVB, DVD, 4CIF) */ - {544, 576}, /* 625 lines, 10.125 MHz (DVB, laserdisc) */ - {528, 576}, /* 625 lines, 10.125 MHz (3/4 D1, DVB, laserdisc) */ - {480, 576}, /* 625 lines, 9 MHz (2/3 D1, DVB, SVCD) */ - {352, 576}, /* 625 lines, 6.75 MHz (D2, 1/2 D1, CVD, DVB, DVD) */ - {352, 288}, /* 625 lines, 6.75 MHz, 1 field (D4, VCD, DVB, DVD, CIF) */ - {176, 144}, /* 625 lines, 3.375 MHz, half field (QCIF) */ - {720, 486}, /* 525 lines, 13.5 MHz (D1) */ - {704, 486}, /* 525 lines, 13.5 MHz */ - {720, 480}, /* 525 lines, 13.5 MHz (DV, DSS, DVD) */ - {704, 480}, /* 525 lines, 13.5 MHz (1/1 D1, ATSC, DVD) */ - {544, 480}, /* 525 lines. 10.125 MHz (DSS, laserdisc) */ - {528, 480}, /* 525 lines. 10.125 MHz (3/4 D1, laserdisc) */ - {480, 480}, /* 525 lines, 9 MHz (2/3 D1, SVCD) */ - {352, 480}, /* 525 lines, 6.75 MHz (D2, 1/2 D1, CVD, DVD) */ - {352, 240} /* 525 lines. 6.75 MHz, 1 field (D4, VCD, DSS, DVD) */ - }; - unsigned int width, height, pix_width, pix_height, i, DAR_16_9; - - *pixel_width = sequence->pixel_width; - *pixel_height = sequence->pixel_height; - width = sequence->picture_width; - height = sequence->picture_height; - for (i = 0; i < sizeof (video_modes) / sizeof (video_modes[0]); i++) - if (width == video_modes[i].width && height == video_modes[i].height) - break; - if (i == sizeof (video_modes) / sizeof (video_modes[0]) || - (sequence->pixel_width == 1 && sequence->pixel_height == 1) || - width != sequence->display_width || height != sequence->display_height) - return 0; - - for (pix_height = 1; height * pix_height < 480; pix_height <<= 1); - height *= pix_height; - for (pix_width = 1; width * pix_width <= 352; pix_width <<= 1); - width *= pix_width; - - if (! (sequence->flags & SEQ_FLAG_MPEG2)) { - static unsigned int mpeg1_check[2][2] = {{11, 54}, {27, 45}}; - DAR_16_9 = (sequence->pixel_height == 27 || - sequence->pixel_height == 45); - if (width < 704 || - sequence->pixel_height != mpeg1_check[DAR_16_9][height == 576]) - return 0; - } else { - DAR_16_9 = (3 * sequence->picture_width * sequence->pixel_width > - 4 * sequence->picture_height * sequence->pixel_height); - switch (width) { - case 528: case 544: pix_width *= 4; pix_height *= 3; break; - case 480: pix_width *= 3; pix_height *= 2; break; - } - } - if (DAR_16_9) { - pix_width *= 4; pix_height *= 3; - } - if (height == 576) { - pix_width *= 59; pix_height *= 54; - } else { - pix_width *= 10; pix_height *= 11; - } - *pixel_width = pix_width; - *pixel_height = pix_height; - simplify (pixel_width, pixel_height); - return (height == 576) ? 1 : 2; -} - -static void copy_matrix (mpeg2dec_t * mpeg2dec, int index) -{ - if (memcmp (mpeg2dec->quantizer_matrix[index], - mpeg2dec->new_quantizer_matrix[index], 64)) { - memcpy (mpeg2dec->quantizer_matrix[index], - mpeg2dec->new_quantizer_matrix[index], 64); - mpeg2dec->scaled[index] = -1; - } -} - -static void finalize_matrix (mpeg2dec_t * mpeg2dec) -{ - mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); - int i; - - for (i = 0; i < 2; i++) { - if (mpeg2dec->copy_matrix & (1 << i)) - copy_matrix (mpeg2dec, i); - if ((mpeg2dec->copy_matrix & (4 << i)) && - memcmp (mpeg2dec->quantizer_matrix[i], - mpeg2dec->new_quantizer_matrix[i+2], 64)) { - copy_matrix (mpeg2dec, i + 2); - decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i+2]; - } else if (mpeg2dec->copy_matrix & (5 << i)) - decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i]; - } -} - -static mpeg2_state_t invalid_end_action (mpeg2dec_t * mpeg2dec) -{ - mpeg2_reset_info (&(mpeg2dec->info)); - mpeg2dec->info.gop = NULL; - info_user_data (mpeg2dec); - mpeg2_header_state_init (mpeg2dec); - mpeg2dec->sequence = mpeg2dec->new_sequence; - mpeg2dec->action = mpeg2_seek_header; - mpeg2dec->state = STATE_SEQUENCE; - return STATE_SEQUENCE; -} - -void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec) -{ - mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); - mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); - - finalize_sequence (sequence); - finalize_matrix (mpeg2dec); - - decoder->mpeg1 = !(sequence->flags & SEQ_FLAG_MPEG2); - decoder->width = sequence->width; - decoder->height = sequence->height; - decoder->vertical_position_extension = (sequence->picture_height > 2800); - decoder->chroma_format = ((sequence->chroma_width == sequence->width) + - (sequence->chroma_height == sequence->height)); - - if (mpeg2dec->sequence.width != (unsigned)-1) { - /* - * According to 6.1.1.6, repeat sequence headers should be - * identical to the original. However some encoders dont - * respect that and change various fields (including bitrate - * and aspect ratio) in the repeat sequence headers. So we - * choose to be as conservative as possible and only restart - * the decoder if the width, height, chroma_width, - * chroma_height or low_delay flag are modified. - */ - if (sequence->width != mpeg2dec->sequence.width || - sequence->height != mpeg2dec->sequence.height || - sequence->chroma_width != mpeg2dec->sequence.chroma_width || - sequence->chroma_height != mpeg2dec->sequence.chroma_height || - ((sequence->flags ^ mpeg2dec->sequence.flags) & - SEQ_FLAG_LOW_DELAY)) { - decoder->stride_frame = sequence->width; - mpeg2_header_end (mpeg2dec); - mpeg2dec->action = invalid_end_action; - mpeg2dec->state = STATE_INVALID_END; - return; - } - mpeg2dec->state = (memcmp (&(mpeg2dec->sequence), sequence, - sizeof (mpeg2_sequence_t)) ? - STATE_SEQUENCE_MODIFIED : STATE_SEQUENCE_REPEATED); - } else - decoder->stride_frame = sequence->width; - mpeg2dec->sequence = *sequence; - mpeg2_reset_info (&(mpeg2dec->info)); - mpeg2dec->info.sequence = &(mpeg2dec->sequence); - mpeg2dec->info.gop = NULL; - info_user_data (mpeg2dec); -} - -int mpeg2_header_gop (mpeg2dec_t * mpeg2dec) -{ - uint8_t * buffer = mpeg2dec->chunk_start; - mpeg2_gop_t * gop = &(mpeg2dec->new_gop); - - if (! (buffer[1] & 8)) - return 1; - gop->hours = (buffer[0] >> 2) & 31; - gop->minutes = ((buffer[0] << 4) | (buffer[1] >> 4)) & 63; - gop->seconds = ((buffer[1] << 3) | (buffer[2] >> 5)) & 63; - gop->pictures = ((buffer[2] << 1) | (buffer[3] >> 7)) & 63; - gop->flags = (buffer[0] >> 7) | ((buffer[3] >> 4) & 6); - mpeg2dec->state = STATE_GOP; - return 0; -} - -void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec) -{ - mpeg2dec->gop = mpeg2dec->new_gop; - mpeg2_reset_info (&(mpeg2dec->info)); - mpeg2dec->info.gop = &(mpeg2dec->gop); - info_user_data (mpeg2dec); -} - -void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type) -{ - int i; - - for (i = 0; i < 3; i++) - if (mpeg2dec->fbuf[1] != &mpeg2dec->fbuf_alloc[i].fbuf && - mpeg2dec->fbuf[2] != &mpeg2dec->fbuf_alloc[i].fbuf) { - mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[i].fbuf; - mpeg2dec->info.current_fbuf = mpeg2dec->fbuf[0]; - if (b_type || (mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { - if (b_type || mpeg2dec->convert) - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0]; - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0]; - } - break; - } -} - -int mpeg2_header_picture (mpeg2dec_t * mpeg2dec) -{ - uint8_t * buffer = mpeg2dec->chunk_start; - mpeg2_picture_t * picture = &(mpeg2dec->new_picture); - mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); - int type; - - mpeg2dec->state = ((mpeg2dec->state != STATE_SLICE_1ST) ? - STATE_PICTURE : STATE_PICTURE_2ND); - mpeg2dec->ext_state = PIC_CODING_EXT; - - picture->temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6); - - type = (buffer [1] >> 3) & 7; - if (type == PIC_FLAG_CODING_TYPE_P || type == PIC_FLAG_CODING_TYPE_B) { - /* forward_f_code and backward_f_code - used in mpeg1 only */ - decoder->f_motion.f_code[1] = (buffer[3] >> 2) & 1; - decoder->f_motion.f_code[0] = - (((buffer[3] << 1) | (buffer[4] >> 7)) & 7) - 1; - decoder->b_motion.f_code[1] = (buffer[4] >> 6) & 1; - decoder->b_motion.f_code[0] = ((buffer[4] >> 3) & 7) - 1; - } - - picture->flags = PIC_FLAG_PROGRESSIVE_FRAME | type; - picture->tag = picture->tag2 = 0; - if (mpeg2dec->num_tags) { - if (mpeg2dec->bytes_since_tag >= mpeg2dec->chunk_ptr - buffer + 4) { - mpeg2dec->num_tags = 0; - picture->tag = mpeg2dec->tag_current; - picture->tag2 = mpeg2dec->tag2_current; - picture->flags |= PIC_FLAG_TAGS; - } else if (mpeg2dec->num_tags > 1) { - mpeg2dec->num_tags = 1; - picture->tag = mpeg2dec->tag_previous; - picture->tag2 = mpeg2dec->tag2_previous; - picture->flags |= PIC_FLAG_TAGS; - } - } - picture->nb_fields = 2; - picture->display_offset[0].x = picture->display_offset[1].x = - picture->display_offset[2].x = mpeg2dec->display_offset_x; - picture->display_offset[0].y = picture->display_offset[1].y = - picture->display_offset[2].y = mpeg2dec->display_offset_y; - - /* XXXXXX decode extra_information_picture as well */ - - mpeg2dec->q_scale_type = 0; - decoder->intra_dc_precision = 7; - decoder->frame_pred_frame_dct = 1; - decoder->concealment_motion_vectors = 0; - decoder->scan = mpeg2_scan_norm; - decoder->picture_structure = FRAME_PICTURE; - mpeg2dec->copy_matrix = 0; - - return 0; -} - -static int picture_coding_ext (mpeg2dec_t * mpeg2dec) -{ - uint8_t * buffer = mpeg2dec->chunk_start; - mpeg2_picture_t * picture = &(mpeg2dec->new_picture); - mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); - uint32_t flags; - - /* pre subtract 1 for use later in compute_motion_vector */ - decoder->f_motion.f_code[0] = (buffer[0] & 15) - 1; - decoder->f_motion.f_code[1] = (buffer[1] >> 4) - 1; - decoder->b_motion.f_code[0] = (buffer[1] & 15) - 1; - decoder->b_motion.f_code[1] = (buffer[2] >> 4) - 1; - - flags = picture->flags; - decoder->intra_dc_precision = 7 - ((buffer[2] >> 2) & 3); - decoder->picture_structure = buffer[2] & 3; - switch (decoder->picture_structure) { - case TOP_FIELD: - flags |= PIC_FLAG_TOP_FIELD_FIRST; - case BOTTOM_FIELD: - picture->nb_fields = 1; - break; - case FRAME_PICTURE: - if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) { - picture->nb_fields = (buffer[3] & 2) ? 3 : 2; - flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0; - } else - picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2; - break; - default: - return 1; - } - decoder->top_field_first = buffer[3] >> 7; - decoder->frame_pred_frame_dct = (buffer[3] >> 6) & 1; - decoder->concealment_motion_vectors = (buffer[3] >> 5) & 1; - mpeg2dec->q_scale_type = buffer[3] & 16; - decoder->intra_vlc_format = (buffer[3] >> 3) & 1; - decoder->scan = (buffer[3] & 4) ? mpeg2_scan_alt : mpeg2_scan_norm; - if (!(buffer[4] & 0x80)) - flags &= ~PIC_FLAG_PROGRESSIVE_FRAME; - if (buffer[4] & 0x40) - flags |= (((buffer[4]<<26) | (buffer[5]<<18) | (buffer[6]<<10)) & - PIC_MASK_COMPOSITE_DISPLAY) | PIC_FLAG_COMPOSITE_DISPLAY; - picture->flags = flags; - - mpeg2dec->ext_state = PIC_DISPLAY_EXT | COPYRIGHT_EXT | QUANT_MATRIX_EXT; - - return 0; -} - -static int picture_display_ext (mpeg2dec_t * mpeg2dec) -{ - uint8_t * buffer = mpeg2dec->chunk_start; - mpeg2_picture_t * picture = &(mpeg2dec->new_picture); - int i, nb_pos; - - nb_pos = picture->nb_fields; - if (mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE) - nb_pos >>= 1; - - for (i = 0; i < nb_pos; i++) { - int x, y; - - x = ((buffer[4*i] << 24) | (buffer[4*i+1] << 16) | - (buffer[4*i+2] << 8) | buffer[4*i+3]) >> (11-2*i); - y = ((buffer[4*i+2] << 24) | (buffer[4*i+3] << 16) | - (buffer[4*i+4] << 8) | buffer[4*i+5]) >> (10-2*i); - if (! (x & y & 1)) - return 1; - picture->display_offset[i].x = mpeg2dec->display_offset_x = x >> 1; - picture->display_offset[i].y = mpeg2dec->display_offset_y = y >> 1; - } - for (; i < 3; i++) { - picture->display_offset[i].x = mpeg2dec->display_offset_x; - picture->display_offset[i].y = mpeg2dec->display_offset_y; - } - return 0; -} - -void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels) -{ - mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); - int old_type_b = (decoder->coding_type == B_TYPE); - int low_delay = mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY; - - finalize_matrix (mpeg2dec); - decoder->coding_type = mpeg2dec->new_picture.flags & PIC_MASK_CODING_TYPE; - - if (mpeg2dec->state == STATE_PICTURE) { - mpeg2_picture_t * picture; - mpeg2_picture_t * other; - - decoder->second_field = 0; - - picture = other = mpeg2dec->pictures; - if (old_type_b ^ (mpeg2dec->picture < mpeg2dec->pictures + 2)) - picture += 2; - else - other += 2; - mpeg2dec->picture = picture; - *picture = mpeg2dec->new_picture; - - if (!old_type_b) { - mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1]; - mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0]; - } - mpeg2dec->fbuf[0] = NULL; - mpeg2_reset_info (&(mpeg2dec->info)); - mpeg2dec->info.current_picture = picture; - mpeg2dec->info.display_picture = picture; - if (decoder->coding_type != B_TYPE) { - if (!low_delay) { - if (mpeg2dec->first) { - mpeg2dec->info.display_picture = NULL; - mpeg2dec->first = 0; - } else { - mpeg2dec->info.display_picture = other; - if (other->nb_fields == 1) - mpeg2dec->info.display_picture_2nd = other + 1; - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1]; - } - } - if (!low_delay + !mpeg2dec->convert) - mpeg2dec->info.discard_fbuf = - mpeg2dec->fbuf[!low_delay + !mpeg2dec->convert]; - } - if (mpeg2dec->convert) { - mpeg2_convert_init_t convert_init; - if (!mpeg2dec->convert_start) { - int y_size, uv_size; - - mpeg2dec->decoder.convert_id = - mpeg2_malloc (mpeg2dec->convert_id_size, - MPEG2_ALLOC_CONVERT_ID); - mpeg2dec->convert (MPEG2_CONVERT_START, - mpeg2dec->decoder.convert_id, - &(mpeg2dec->sequence), - mpeg2dec->convert_stride, accels, - mpeg2dec->convert_arg, &convert_init); - mpeg2dec->convert_start = convert_init.start; - mpeg2dec->decoder.convert = convert_init.copy; - - y_size = decoder->stride_frame * mpeg2dec->sequence.height; - uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format); - mpeg2dec->yuv_buf[0][0] = - (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV); - mpeg2dec->yuv_buf[0][1] = - (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); - mpeg2dec->yuv_buf[0][2] = - (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); - mpeg2dec->yuv_buf[1][0] = - (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV); - mpeg2dec->yuv_buf[1][1] = - (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); - mpeg2dec->yuv_buf[1][2] = - (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); - y_size = decoder->stride_frame * 32; - uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format); - mpeg2dec->yuv_buf[2][0] = - (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV); - mpeg2dec->yuv_buf[2][1] = - (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); - mpeg2dec->yuv_buf[2][2] = - (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); - } - if (!mpeg2dec->custom_fbuf) { - while (mpeg2dec->alloc_index < 3) { - mpeg2_fbuf_t * fbuf; - - fbuf = &mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf; - fbuf->id = NULL; - fbuf->buf[0] = - (uint8_t *) mpeg2_malloc (convert_init.buf_size[0], - MPEG2_ALLOC_CONVERTED); - fbuf->buf[1] = - (uint8_t *) mpeg2_malloc (convert_init.buf_size[1], - MPEG2_ALLOC_CONVERTED); - fbuf->buf[2] = - (uint8_t *) mpeg2_malloc (convert_init.buf_size[2], - MPEG2_ALLOC_CONVERTED); - } - mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE)); - } - } else if (!mpeg2dec->custom_fbuf) { - while (mpeg2dec->alloc_index < 3) { - mpeg2_fbuf_t * fbuf; - int y_size, uv_size; - - fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf); - fbuf->id = NULL; - y_size = decoder->stride_frame * mpeg2dec->sequence.height; - uv_size = y_size >> (2 - decoder->chroma_format); - fbuf->buf[0] = (uint8_t *) mpeg2_malloc (y_size, - MPEG2_ALLOC_YUV); - fbuf->buf[1] = (uint8_t *) mpeg2_malloc (uv_size, - MPEG2_ALLOC_YUV); - fbuf->buf[2] = (uint8_t *) mpeg2_malloc (uv_size, - MPEG2_ALLOC_YUV); - } - mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE)); - } - } else { - decoder->second_field = 1; - mpeg2dec->picture++; /* second field picture */ - *(mpeg2dec->picture) = mpeg2dec->new_picture; - mpeg2dec->info.current_picture_2nd = mpeg2dec->picture; - if (low_delay || decoder->coding_type == B_TYPE) - mpeg2dec->info.display_picture_2nd = mpeg2dec->picture; - } - - info_user_data (mpeg2dec); -} - -static int copyright_ext (mpeg2dec_t * mpeg2dec) -{ - return 0; -} - -static int quant_matrix_ext (mpeg2dec_t * mpeg2dec) -{ - uint8_t * buffer = mpeg2dec->chunk_start; - int i, j; - - for (i = 0; i < 4; i++) - if (buffer[0] & (8 >> i)) { - for (j = 0; j < 64; j++) - mpeg2dec->new_quantizer_matrix[i][mpeg2_scan_norm[j]] = - (buffer[j] << (i+5)) | (buffer[j+1] >> (3-i)); - mpeg2dec->copy_matrix |= 1 << i; - buffer += 64; - } - - return 0; -} - -int mpeg2_header_extension (mpeg2dec_t * mpeg2dec) -{ - static int (* parser[]) (mpeg2dec_t *) = { - 0, sequence_ext, sequence_display_ext, quant_matrix_ext, - copyright_ext, 0, 0, picture_display_ext, picture_coding_ext - }; - int ext, ext_bit; - - ext = mpeg2dec->chunk_start[0] >> 4; - ext_bit = 1 << ext; - - if (!(mpeg2dec->ext_state & ext_bit)) - return 0; /* ignore illegal extensions */ - mpeg2dec->ext_state &= ~ext_bit; - return parser[ext] (mpeg2dec); -} - -int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec) -{ - mpeg2dec->user_data_len += mpeg2dec->chunk_ptr - 1 - mpeg2dec->chunk_start; - mpeg2dec->chunk_start = mpeg2dec->chunk_ptr - 1; - - return 0; -} - -static void prescale (mpeg2dec_t * mpeg2dec, int index) -{ - static int non_linear_scale [] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 10, 12, 14, 16, 18, 20, 22, - 24, 28, 32, 36, 40, 44, 48, 52, - 56, 64, 72, 80, 88, 96, 104, 112 - }; - int i, j, k; - mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); - - if (mpeg2dec->scaled[index] != mpeg2dec->q_scale_type) { - mpeg2dec->scaled[index] = mpeg2dec->q_scale_type; - for (i = 0; i < 32; i++) { - k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1); - for (j = 0; j < 64; j++) - decoder->quantizer_prescale[index][i][j] = - k * mpeg2dec->quantizer_matrix[index][j]; - } - } -} - -mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec) -{ - mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); - - mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; - mpeg2dec->state = ((mpeg2dec->picture->nb_fields > 1 || - mpeg2dec->state == STATE_PICTURE_2ND) ? - STATE_SLICE : STATE_SLICE_1ST); - - if (mpeg2dec->decoder.coding_type != D_TYPE) { - prescale (mpeg2dec, 0); - if (decoder->chroma_quantizer[0] == decoder->quantizer_prescale[2]) - prescale (mpeg2dec, 2); - if (mpeg2dec->decoder.coding_type != I_TYPE) { - prescale (mpeg2dec, 1); - if (decoder->chroma_quantizer[1] == decoder->quantizer_prescale[3]) - prescale (mpeg2dec, 3); - } - } - - if (!(mpeg2dec->nb_decode_slices)) - mpeg2dec->picture->flags |= PIC_FLAG_SKIP; - else if (mpeg2dec->convert_start) { - mpeg2dec->convert_start (decoder->convert_id, mpeg2dec->fbuf[0], - mpeg2dec->picture, mpeg2dec->info.gop); - - if (mpeg2dec->decoder.coding_type == B_TYPE) - mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->yuv_buf[2], - mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1], - mpeg2dec->yuv_buf[mpeg2dec->yuv_index]); - else { - mpeg2_init_fbuf (&(mpeg2dec->decoder), - mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1], - mpeg2dec->yuv_buf[mpeg2dec->yuv_index], - mpeg2dec->yuv_buf[mpeg2dec->yuv_index]); - if (mpeg2dec->state == STATE_SLICE) - mpeg2dec->yuv_index ^= 1; - } - } else { - int b_type; - - b_type = (mpeg2dec->decoder.coding_type == B_TYPE); - mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->fbuf[0]->buf, - mpeg2dec->fbuf[b_type + 1]->buf, - mpeg2dec->fbuf[b_type]->buf); - } - mpeg2dec->action = NULL; - return STATE_INTERNAL_NORETURN; -} - -static mpeg2_state_t seek_sequence (mpeg2dec_t * mpeg2dec) -{ - mpeg2_reset_info (&(mpeg2dec->info)); - mpeg2dec->info.sequence = NULL; - mpeg2dec->info.gop = NULL; - mpeg2_header_state_init (mpeg2dec); - mpeg2dec->action = mpeg2_seek_header; - return mpeg2_seek_header (mpeg2dec); -} - -mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec) -{ - mpeg2_picture_t * picture; - int b_type; - - b_type = (mpeg2dec->decoder.coding_type == B_TYPE); - picture = mpeg2dec->pictures; - if ((mpeg2dec->picture >= picture + 2) ^ b_type) - picture = mpeg2dec->pictures + 2; - - mpeg2_reset_info (&(mpeg2dec->info)); - if (!(mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { - mpeg2dec->info.display_picture = picture; - if (picture->nb_fields == 1) - mpeg2dec->info.display_picture_2nd = picture + 1; - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[b_type]; - if (!mpeg2dec->convert) - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type + 1]; - } else if (!mpeg2dec->convert) - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type]; - mpeg2dec->action = seek_sequence; - return STATE_END; -} diff --git a/src/libmpeg2new/libmpeg2/idct.c b/src/libmpeg2new/libmpeg2/idct.c deleted file mode 100644 index 8b982bb33..000000000 --- a/src/libmpeg2new/libmpeg2/idct.c +++ /dev/null @@ -1,287 +0,0 @@ -/* - * idct.c - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#include -#include - -#include "../include/mpeg2.h" -#include "../include/attributes.h" -#include "mpeg2_internal.h" - -#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ -#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ -#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ -#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */ -#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ -#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ - -/* idct main entry point */ -void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); -void (* mpeg2_idct_add) (int last, int16_t * block, - uint8_t * dest, int stride); - -/* - * In legal streams, the IDCT output should be between -384 and +384. - * In corrupted streams, it is possible to force the IDCT output to go - * to +-3826 - this is the worst case for a column IDCT where the - * column inputs are 16-bit values. - */ -uint8_t mpeg2_clip[3840 * 2 + 256]; -#define CLIP(i) ((mpeg2_clip + 3840)[i]) - -#if 0 -#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ -do { \ - t0 = W0 * d0 + W1 * d1; \ - t1 = W0 * d1 - W1 * d0; \ -} while (0) -#else -#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ -do { \ - int tmp = W0 * (d0 + d1); \ - t0 = tmp + (W1 - W0) * d1; \ - t1 = tmp - (W1 + W0) * d0; \ -} while (0) -#endif - -static void inline idct_row (int16_t * const block) -{ - int d0, d1, d2, d3; - int a0, a1, a2, a3, b0, b1, b2, b3; - int t0, t1, t2, t3; - - /* shortcut */ - if (likely (!(block[1] | ((int32_t *)block)[1] | ((int32_t *)block)[2] | - ((int32_t *)block)[3]))) { - uint32_t tmp = (uint16_t) (block[0] >> 1); - tmp |= tmp << 16; - ((int32_t *)block)[0] = tmp; - ((int32_t *)block)[1] = tmp; - ((int32_t *)block)[2] = tmp; - ((int32_t *)block)[3] = tmp; - return; - } - - d0 = (block[0] << 11) + 2048; - d1 = block[1]; - d2 = block[2] << 11; - d3 = block[3]; - t0 = d0 + d2; - t1 = d0 - d2; - BUTTERFLY (t2, t3, W6, W2, d3, d1); - a0 = t0 + t2; - a1 = t1 + t3; - a2 = t1 - t3; - a3 = t0 - t2; - - d0 = block[4]; - d1 = block[5]; - d2 = block[6]; - d3 = block[7]; - BUTTERFLY (t0, t1, W7, W1, d3, d0); - BUTTERFLY (t2, t3, W3, W5, d1, d2); - b0 = t0 + t2; - b3 = t1 + t3; - t0 -= t2; - t1 -= t3; - b1 = ((t0 + t1) >> 8) * 181; - b2 = ((t0 - t1) >> 8) * 181; - - block[0] = (a0 + b0) >> 12; - block[1] = (a1 + b1) >> 12; - block[2] = (a2 + b2) >> 12; - block[3] = (a3 + b3) >> 12; - block[4] = (a3 - b3) >> 12; - block[5] = (a2 - b2) >> 12; - block[6] = (a1 - b1) >> 12; - block[7] = (a0 - b0) >> 12; -} - -static void inline idct_col (int16_t * const block) -{ - int d0, d1, d2, d3; - int a0, a1, a2, a3, b0, b1, b2, b3; - int t0, t1, t2, t3; - - d0 = (block[8*0] << 11) + 65536; - d1 = block[8*1]; - d2 = block[8*2] << 11; - d3 = block[8*3]; - t0 = d0 + d2; - t1 = d0 - d2; - BUTTERFLY (t2, t3, W6, W2, d3, d1); - a0 = t0 + t2; - a1 = t1 + t3; - a2 = t1 - t3; - a3 = t0 - t2; - - d0 = block[8*4]; - d1 = block[8*5]; - d2 = block[8*6]; - d3 = block[8*7]; - BUTTERFLY (t0, t1, W7, W1, d3, d0); - BUTTERFLY (t2, t3, W3, W5, d1, d2); - b0 = t0 + t2; - b3 = t1 + t3; - t0 -= t2; - t1 -= t3; - b1 = ((t0 + t1) >> 8) * 181; - b2 = ((t0 - t1) >> 8) * 181; - - block[8*0] = (a0 + b0) >> 17; - block[8*1] = (a1 + b1) >> 17; - block[8*2] = (a2 + b2) >> 17; - block[8*3] = (a3 + b3) >> 17; - block[8*4] = (a3 - b3) >> 17; - block[8*5] = (a2 - b2) >> 17; - block[8*6] = (a1 - b1) >> 17; - block[8*7] = (a0 - b0) >> 17; -} - -static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, - const int stride) -{ - int i; - - for (i = 0; i < 8; i++) - idct_row (block + 8 * i); - for (i = 0; i < 8; i++) - idct_col (block + i); - do { - dest[0] = CLIP (block[0]); - dest[1] = CLIP (block[1]); - dest[2] = CLIP (block[2]); - dest[3] = CLIP (block[3]); - dest[4] = CLIP (block[4]); - dest[5] = CLIP (block[5]); - dest[6] = CLIP (block[6]); - dest[7] = CLIP (block[7]); - - ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0; - ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0; - - dest += stride; - block += 8; - } while (--i); -} - -static void mpeg2_idct_add_c (const int last, int16_t * block, - uint8_t * dest, const int stride) -{ - int i; - - if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { - for (i = 0; i < 8; i++) - idct_row (block + 8 * i); - for (i = 0; i < 8; i++) - idct_col (block + i); - do { - dest[0] = CLIP (block[0] + dest[0]); - dest[1] = CLIP (block[1] + dest[1]); - dest[2] = CLIP (block[2] + dest[2]); - dest[3] = CLIP (block[3] + dest[3]); - dest[4] = CLIP (block[4] + dest[4]); - dest[5] = CLIP (block[5] + dest[5]); - dest[6] = CLIP (block[6] + dest[6]); - dest[7] = CLIP (block[7] + dest[7]); - - ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0; - ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0; - - dest += stride; - block += 8; - } while (--i); - } else { - int DC; - - DC = (block[0] + 64) >> 7; - block[0] = block[63] = 0; - i = 8; - do { - dest[0] = CLIP (DC + dest[0]); - dest[1] = CLIP (DC + dest[1]); - dest[2] = CLIP (DC + dest[2]); - dest[3] = CLIP (DC + dest[3]); - dest[4] = CLIP (DC + dest[4]); - dest[5] = CLIP (DC + dest[5]); - dest[6] = CLIP (DC + dest[6]); - dest[7] = CLIP (DC + dest[7]); - dest += stride; - } while (--i); - } -} - -void mpeg2_idct_init (uint32_t accel) -{ -#ifdef ARCH_X86 - if (accel & MPEG2_ACCEL_X86_MMXEXT) { - mpeg2_idct_copy = mpeg2_idct_copy_mmxext; - mpeg2_idct_add = mpeg2_idct_add_mmxext; - mpeg2_idct_mmx_init (); - } else if (accel & MPEG2_ACCEL_X86_MMX) { - mpeg2_idct_copy = mpeg2_idct_copy_mmx; - mpeg2_idct_add = mpeg2_idct_add_mmx; - mpeg2_idct_mmx_init (); - } else -#endif -#ifdef ARCH_PPC - if (accel & MPEG2_ACCEL_PPC_ALTIVEC) { - mpeg2_idct_copy = mpeg2_idct_copy_altivec; - mpeg2_idct_add = mpeg2_idct_add_altivec; - mpeg2_idct_altivec_init (); - } else -#endif -#ifdef ARCH_ALPHA - if (accel & MPEG2_ACCEL_ALPHA_MVI) { - mpeg2_idct_copy = mpeg2_idct_copy_mvi; - mpeg2_idct_add = mpeg2_idct_add_mvi; - mpeg2_idct_alpha_init (); - } else if (accel & MPEG2_ACCEL_ALPHA) { - int i; - - mpeg2_idct_copy = mpeg2_idct_copy_alpha; - mpeg2_idct_add = mpeg2_idct_add_alpha; - mpeg2_idct_alpha_init (); - for (i = -3840; i < 3840 + 256; i++) - CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i); - } else -#endif - { - extern uint8_t mpeg2_scan_norm[64]; - extern uint8_t mpeg2_scan_alt[64]; - int i, j; - - mpeg2_idct_copy = mpeg2_idct_copy_c; - mpeg2_idct_add = mpeg2_idct_add_c; - for (i = -3840; i < 3840 + 256; i++) - CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i); - for (i = 0; i < 64; i++) { - j = mpeg2_scan_norm[i]; - mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); - j = mpeg2_scan_alt[i]; - mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); - } - } -} diff --git a/src/libmpeg2new/libmpeg2/idct_alpha.c b/src/libmpeg2new/libmpeg2/idct_alpha.c deleted file mode 100644 index 1d8fd08ee..000000000 --- a/src/libmpeg2new/libmpeg2/idct_alpha.c +++ /dev/null @@ -1,379 +0,0 @@ -/* - * idct_alpha.c - * Copyright (C) 2002-2003 Falk Hueffner - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef ARCH_ALPHA - -#include -#include - -#include "mpeg2.h" -#include -#include "mpeg2_internal.h" -#include "alpha_asm.h" - -#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ -#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ -#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ -#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */ -#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ -#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ - -extern uint8_t mpeg2_clip[3840 * 2 + 256]; -#define CLIP(i) ((mpeg2_clip + 3840)[i]) - -#if 0 -#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ -do { \ - t0 = W0 * d0 + W1 * d1; \ - t1 = W0 * d1 - W1 * d0; \ -} while (0) -#else -#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ -do { \ - int_fast32_t tmp = W0 * (d0 + d1); \ - t0 = tmp + (W1 - W0) * d1; \ - t1 = tmp - (W1 + W0) * d0; \ -} while (0) -#endif - -static void inline idct_row (int16_t * const block) -{ - uint64_t l, r; - int_fast32_t d0, d1, d2, d3; - int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; - int_fast32_t t0, t1, t2, t3; - - l = ldq (block); - r = ldq (block + 4); - - /* shortcut */ - if (likely (!((l & ~0xffffUL) | r))) { - uint64_t tmp = (uint16_t) (l >> 1); - tmp |= tmp << 16; - tmp |= tmp << 32; - ((int32_t *)block)[0] = tmp; - ((int32_t *)block)[1] = tmp; - ((int32_t *)block)[2] = tmp; - ((int32_t *)block)[3] = tmp; - return; - } - - d0 = (sextw (l) << 11) + 2048; - d1 = sextw (extwl (l, 2)); - d2 = sextw (extwl (l, 4)) << 11; - d3 = sextw (extwl (l, 6)); - t0 = d0 + d2; - t1 = d0 - d2; - BUTTERFLY (t2, t3, W6, W2, d3, d1); - a0 = t0 + t2; - a1 = t1 + t3; - a2 = t1 - t3; - a3 = t0 - t2; - - d0 = sextw (r); - d1 = sextw (extwl (r, 2)); - d2 = sextw (extwl (r, 4)); - d3 = sextw (extwl (r, 6)); - BUTTERFLY (t0, t1, W7, W1, d3, d0); - BUTTERFLY (t2, t3, W3, W5, d1, d2); - b0 = t0 + t2; - b3 = t1 + t3; - t0 -= t2; - t1 -= t3; - b1 = ((t0 + t1) >> 8) * 181; - b2 = ((t0 - t1) >> 8) * 181; - - block[0] = (a0 + b0) >> 12; - block[1] = (a1 + b1) >> 12; - block[2] = (a2 + b2) >> 12; - block[3] = (a3 + b3) >> 12; - block[4] = (a3 - b3) >> 12; - block[5] = (a2 - b2) >> 12; - block[6] = (a1 - b1) >> 12; - block[7] = (a0 - b0) >> 12; -} - -static void inline idct_col (int16_t * const block) -{ - int_fast32_t d0, d1, d2, d3; - int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; - int_fast32_t t0, t1, t2, t3; - - d0 = (block[8*0] << 11) + 65536; - d1 = block[8*1]; - d2 = block[8*2] << 11; - d3 = block[8*3]; - t0 = d0 + d2; - t1 = d0 - d2; - BUTTERFLY (t2, t3, W6, W2, d3, d1); - a0 = t0 + t2; - a1 = t1 + t3; - a2 = t1 - t3; - a3 = t0 - t2; - - d0 = block[8*4]; - d1 = block[8*5]; - d2 = block[8*6]; - d3 = block[8*7]; - BUTTERFLY (t0, t1, W7, W1, d3, d0); - BUTTERFLY (t2, t3, W3, W5, d1, d2); - b0 = t0 + t2; - b3 = t1 + t3; - t0 -= t2; - t1 -= t3; - b1 = ((t0 + t1) >> 8) * 181; - b2 = ((t0 - t1) >> 8) * 181; - - block[8*0] = (a0 + b0) >> 17; - block[8*1] = (a1 + b1) >> 17; - block[8*2] = (a2 + b2) >> 17; - block[8*3] = (a3 + b3) >> 17; - block[8*4] = (a3 - b3) >> 17; - block[8*5] = (a2 - b2) >> 17; - block[8*6] = (a1 - b1) >> 17; - block[8*7] = (a0 - b0) >> 17; -} - -void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, const int stride) -{ - uint64_t clampmask; - int i; - - for (i = 0; i < 8; i++) - idct_row (block + 8 * i); - - for (i = 0; i < 8; i++) - idct_col (block + i); - - clampmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */ - do { - uint64_t shorts0, shorts1; - - shorts0 = ldq (block); - shorts0 = maxsw4 (shorts0, 0); - shorts0 = minsw4 (shorts0, clampmask); - stl (pkwb (shorts0), dest); - - shorts1 = ldq (block + 4); - shorts1 = maxsw4 (shorts1, 0); - shorts1 = minsw4 (shorts1, clampmask); - stl (pkwb (shorts1), dest + 4); - - stq (0, block); - stq (0, block + 4); - - dest += stride; - block += 8; - } while (--i); -} - -void mpeg2_idct_add_mvi (const int last, int16_t * block, - uint8_t * dest, const int stride) -{ - uint64_t clampmask; - uint64_t signmask; - int i; - - if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { - for (i = 0; i < 8; i++) - idct_row (block + 8 * i); - for (i = 0; i < 8; i++) - idct_col (block + i); - clampmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */ - signmask = zap (-1, 0x33); - signmask ^= signmask >> 1; /* 0x8000800080008000 */ - - do { - uint64_t shorts0, pix0, signs0; - uint64_t shorts1, pix1, signs1; - - shorts0 = ldq (block); - shorts1 = ldq (block + 4); - - pix0 = unpkbw (ldl (dest)); - /* signed subword add (MMX paddw). */ - signs0 = shorts0 & signmask; - shorts0 &= ~signmask; - shorts0 += pix0; - shorts0 ^= signs0; - /* clamp. */ - shorts0 = maxsw4 (shorts0, 0); - shorts0 = minsw4 (shorts0, clampmask); - - /* next 4. */ - pix1 = unpkbw (ldl (dest + 4)); - signs1 = shorts1 & signmask; - shorts1 &= ~signmask; - shorts1 += pix1; - shorts1 ^= signs1; - shorts1 = maxsw4 (shorts1, 0); - shorts1 = minsw4 (shorts1, clampmask); - - stl (pkwb (shorts0), dest); - stl (pkwb (shorts1), dest + 4); - stq (0, block); - stq (0, block + 4); - - dest += stride; - block += 8; - } while (--i); - } else { - int DC; - uint64_t p0, p1, p2, p3, p4, p5, p6, p7; - uint64_t DCs; - - DC = (block[0] + 64) >> 7; - block[0] = block[63] = 0; - - p0 = ldq (dest + 0 * stride); - p1 = ldq (dest + 1 * stride); - p2 = ldq (dest + 2 * stride); - p3 = ldq (dest + 3 * stride); - p4 = ldq (dest + 4 * stride); - p5 = ldq (dest + 5 * stride); - p6 = ldq (dest + 6 * stride); - p7 = ldq (dest + 7 * stride); - - if (DC > 0) { - DCs = BYTE_VEC (likely (DC <= 255) ? DC : 255); - p0 += minub8 (DCs, ~p0); - p1 += minub8 (DCs, ~p1); - p2 += minub8 (DCs, ~p2); - p3 += minub8 (DCs, ~p3); - p4 += minub8 (DCs, ~p4); - p5 += minub8 (DCs, ~p5); - p6 += minub8 (DCs, ~p6); - p7 += minub8 (DCs, ~p7); - } else { - DCs = BYTE_VEC (likely (-DC <= 255) ? -DC : 255); - p0 -= minub8 (DCs, p0); - p1 -= minub8 (DCs, p1); - p2 -= minub8 (DCs, p2); - p3 -= minub8 (DCs, p3); - p4 -= minub8 (DCs, p4); - p5 -= minub8 (DCs, p5); - p6 -= minub8 (DCs, p6); - p7 -= minub8 (DCs, p7); - } - - stq (p0, dest + 0 * stride); - stq (p1, dest + 1 * stride); - stq (p2, dest + 2 * stride); - stq (p3, dest + 3 * stride); - stq (p4, dest + 4 * stride); - stq (p5, dest + 5 * stride); - stq (p6, dest + 6 * stride); - stq (p7, dest + 7 * stride); - } -} - -void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, const int stride) -{ - int i; - - for (i = 0; i < 8; i++) - idct_row (block + 8 * i); - for (i = 0; i < 8; i++) - idct_col (block + i); - do { - dest[0] = CLIP (block[0]); - dest[1] = CLIP (block[1]); - dest[2] = CLIP (block[2]); - dest[3] = CLIP (block[3]); - dest[4] = CLIP (block[4]); - dest[5] = CLIP (block[5]); - dest[6] = CLIP (block[6]); - dest[7] = CLIP (block[7]); - - stq(0, block); - stq(0, block + 4); - - dest += stride; - block += 8; - } while (--i); -} - -void mpeg2_idct_add_alpha (const int last, int16_t * block, - uint8_t * dest, const int stride) -{ - int i; - - if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { - for (i = 0; i < 8; i++) - idct_row (block + 8 * i); - for (i = 0; i < 8; i++) - idct_col (block + i); - do { - dest[0] = CLIP (block[0] + dest[0]); - dest[1] = CLIP (block[1] + dest[1]); - dest[2] = CLIP (block[2] + dest[2]); - dest[3] = CLIP (block[3] + dest[3]); - dest[4] = CLIP (block[4] + dest[4]); - dest[5] = CLIP (block[5] + dest[5]); - dest[6] = CLIP (block[6] + dest[6]); - dest[7] = CLIP (block[7] + dest[7]); - - stq(0, block); - stq(0, block + 4); - - dest += stride; - block += 8; - } while (--i); - } else { - int DC; - - DC = (block[0] + 64) >> 7; - block[0] = block[63] = 0; - i = 8; - do { - dest[0] = CLIP (DC + dest[0]); - dest[1] = CLIP (DC + dest[1]); - dest[2] = CLIP (DC + dest[2]); - dest[3] = CLIP (DC + dest[3]); - dest[4] = CLIP (DC + dest[4]); - dest[5] = CLIP (DC + dest[5]); - dest[6] = CLIP (DC + dest[6]); - dest[7] = CLIP (DC + dest[7]); - dest += stride; - } while (--i); - } -} - -void mpeg2_idct_alpha_init (void) -{ - extern uint8_t mpeg2_scan_norm[64]; - extern uint8_t mpeg2_scan_alt[64]; - int i, j; - - for (i = 0; i < 64; i++) { - j = mpeg2_scan_norm[i]; - mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); - j = mpeg2_scan_alt[i]; - mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); - } -} - -#endif /* ARCH_ALPHA */ diff --git a/src/libmpeg2new/libmpeg2/idct_altivec.c b/src/libmpeg2new/libmpeg2/idct_altivec.c deleted file mode 100644 index f15bca165..000000000 --- a/src/libmpeg2new/libmpeg2/idct_altivec.c +++ /dev/null @@ -1,288 +0,0 @@ -/* - * idct_altivec.c - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef ARCH_PPC - -#ifdef HAVE_ALTIVEC_H -#include -#endif -#include - -#include "mpeg2.h" -#include -#include "mpeg2_internal.h" - -typedef vector signed char vector_s8_t; -typedef vector unsigned char vector_u8_t; -typedef vector signed short vector_s16_t; -typedef vector unsigned short vector_u16_t; -typedef vector signed int vector_s32_t; -typedef vector unsigned int vector_u32_t; - -#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303) -/* work around gcc <3.3 vec_mergel bug */ -static inline vector_s16_t my_vec_mergel (vector_s16_t const A, - vector_s16_t const B) -{ - static const vector_u8_t mergel = { - 0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b, - 0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f - }; - return vec_perm (A, B, mergel); -} -#undef vec_mergel -#define vec_mergel my_vec_mergel -#endif - -#ifdef HAVE_ALTIVEC_H /* gnu */ -#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h} -#else /* apple */ -#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h) -#endif - -static const vector_s16_t constants ATTR_ALIGN(16) = - VEC_S16 (23170, 13573, 6518, 21895, -23170, -21895, 32, 31); -static const vector_s16_t constants_1 ATTR_ALIGN(16) = - VEC_S16 (16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725); -static const vector_s16_t constants_2 ATTR_ALIGN(16) = - VEC_S16 (16069, 22289, 20995, 18895, 16069, 18895, 20995, 22289); -static const vector_s16_t constants_3 ATTR_ALIGN(16) = - VEC_S16 (21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692); -static const vector_s16_t constants_4 ATTR_ALIGN(16) = - VEC_S16 (13623, 18895, 17799, 16019, 13623, 16019, 17799, 18895); - -#define IDCT \ - vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ - vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ - vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \ - vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \ - vector_u16_t shift; \ - \ - c4 = vec_splat (constants, 0); \ - a0 = vec_splat (constants, 1); \ - a1 = vec_splat (constants, 2); \ - a2 = vec_splat (constants, 3); \ - mc4 = vec_splat (constants, 4); \ - ma2 = vec_splat (constants, 5); \ - bias = (vector_s16_t)vec_splat ((vector_s32_t)constants, 3); \ - \ - zero = vec_splat_s16 (0); \ - \ - vx0 = vec_adds (block[0], block[4]); \ - vx4 = vec_subs (block[0], block[4]); \ - t5 = vec_mradds (vx0, constants_1, zero); \ - t0 = vec_mradds (vx4, constants_1, zero); \ - \ - vx1 = vec_mradds (a1, block[7], block[1]); \ - vx7 = vec_mradds (a1, block[1], vec_subs (zero, block[7])); \ - t1 = vec_mradds (vx1, constants_2, zero); \ - t8 = vec_mradds (vx7, constants_2, zero); \ - \ - vx2 = vec_mradds (a0, block[6], block[2]); \ - vx6 = vec_mradds (a0, block[2], vec_subs (zero, block[6])); \ - t2 = vec_mradds (vx2, constants_3, zero); \ - t4 = vec_mradds (vx6, constants_3, zero); \ - \ - vx3 = vec_mradds (block[3], constants_4, zero); \ - vx5 = vec_mradds (block[5], constants_4, zero); \ - t7 = vec_mradds (a2, vx5, vx3); \ - t3 = vec_mradds (ma2, vx3, vx5); \ - \ - t6 = vec_adds (t8, t3); \ - t3 = vec_subs (t8, t3); \ - t8 = vec_subs (t1, t7); \ - t1 = vec_adds (t1, t7); \ - t6 = vec_mradds (a0, t6, t6); /* a0+1 == 2*c4 */ \ - t1 = vec_mradds (a0, t1, t1); /* a0+1 == 2*c4 */ \ - \ - t7 = vec_adds (t5, t2); \ - t2 = vec_subs (t5, t2); \ - t5 = vec_adds (t0, t4); \ - t0 = vec_subs (t0, t4); \ - t4 = vec_subs (t8, t3); \ - t3 = vec_adds (t8, t3); \ - \ - vy0 = vec_adds (t7, t1); \ - vy7 = vec_subs (t7, t1); \ - vy1 = vec_adds (t5, t3); \ - vy6 = vec_subs (t5, t3); \ - vy2 = vec_adds (t0, t4); \ - vy5 = vec_subs (t0, t4); \ - vy3 = vec_adds (t2, t6); \ - vy4 = vec_subs (t2, t6); \ - \ - vx0 = vec_mergeh (vy0, vy4); \ - vx1 = vec_mergel (vy0, vy4); \ - vx2 = vec_mergeh (vy1, vy5); \ - vx3 = vec_mergel (vy1, vy5); \ - vx4 = vec_mergeh (vy2, vy6); \ - vx5 = vec_mergel (vy2, vy6); \ - vx6 = vec_mergeh (vy3, vy7); \ - vx7 = vec_mergel (vy3, vy7); \ - \ - vy0 = vec_mergeh (vx0, vx4); \ - vy1 = vec_mergel (vx0, vx4); \ - vy2 = vec_mergeh (vx1, vx5); \ - vy3 = vec_mergel (vx1, vx5); \ - vy4 = vec_mergeh (vx2, vx6); \ - vy5 = vec_mergel (vx2, vx6); \ - vy6 = vec_mergeh (vx3, vx7); \ - vy7 = vec_mergel (vx3, vx7); \ - \ - vx0 = vec_mergeh (vy0, vy4); \ - vx1 = vec_mergel (vy0, vy4); \ - vx2 = vec_mergeh (vy1, vy5); \ - vx3 = vec_mergel (vy1, vy5); \ - vx4 = vec_mergeh (vy2, vy6); \ - vx5 = vec_mergel (vy2, vy6); \ - vx6 = vec_mergeh (vy3, vy7); \ - vx7 = vec_mergel (vy3, vy7); \ - \ - vx0 = vec_adds (vx0, bias); \ - t5 = vec_adds (vx0, vx4); \ - t0 = vec_subs (vx0, vx4); \ - \ - t1 = vec_mradds (a1, vx7, vx1); \ - t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ - \ - t2 = vec_mradds (a0, vx6, vx2); \ - t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \ - \ - t7 = vec_mradds (a2, vx5, vx3); \ - t3 = vec_mradds (ma2, vx3, vx5); \ - \ - t6 = vec_adds (t8, t3); \ - t3 = vec_subs (t8, t3); \ - t8 = vec_subs (t1, t7); \ - t1 = vec_adds (t1, t7); \ - \ - t7 = vec_adds (t5, t2); \ - t2 = vec_subs (t5, t2); \ - t5 = vec_adds (t0, t4); \ - t0 = vec_subs (t0, t4); \ - t4 = vec_subs (t8, t3); \ - t3 = vec_adds (t8, t3); \ - \ - vy0 = vec_adds (t7, t1); \ - vy7 = vec_subs (t7, t1); \ - vy1 = vec_mradds (c4, t3, t5); \ - vy6 = vec_mradds (mc4, t3, t5); \ - vy2 = vec_mradds (c4, t4, t0); \ - vy5 = vec_mradds (mc4, t4, t0); \ - vy3 = vec_adds (t2, t6); \ - vy4 = vec_subs (t2, t6); \ - \ - shift = vec_splat_u16 (6); \ - vx0 = vec_sra (vy0, shift); \ - vx1 = vec_sra (vy1, shift); \ - vx2 = vec_sra (vy2, shift); \ - vx3 = vec_sra (vy3, shift); \ - vx4 = vec_sra (vy4, shift); \ - vx5 = vec_sra (vy5, shift); \ - vx6 = vec_sra (vy6, shift); \ - vx7 = vec_sra (vy7, shift); - -void mpeg2_idct_copy_altivec (int16_t * const _block, uint8_t * dest, - const int stride) -{ - vector_s16_t * const block = (vector_s16_t *)_block; - vector_u8_t tmp; - - IDCT - -#define COPY(dest,src) \ - tmp = vec_packsu (src, src); \ - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - - COPY (dest, vx0) dest += stride; - COPY (dest, vx1) dest += stride; - COPY (dest, vx2) dest += stride; - COPY (dest, vx3) dest += stride; - COPY (dest, vx4) dest += stride; - COPY (dest, vx5) dest += stride; - COPY (dest, vx6) dest += stride; - COPY (dest, vx7) - - block[0] = block[1] = block[2] = block[3] = zero; - block[4] = block[5] = block[6] = block[7] = zero; -} - -void mpeg2_idct_add_altivec (const int last, int16_t * const _block, - uint8_t * dest, const int stride) -{ - vector_s16_t * const block = (vector_s16_t *)_block; - vector_u8_t tmp; - vector_s16_t tmp2, tmp3; - vector_u8_t perm0; - vector_u8_t perm1; - vector_u8_t p0, p1, p; - - IDCT - - p0 = vec_lvsl (0, dest); - p1 = vec_lvsl (stride, dest); - p = vec_splat_u8 (-1); - perm0 = vec_mergeh (p, p0); - perm1 = vec_mergeh (p, p1); - -#define ADD(dest,src,perm) \ - /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \ - tmp = vec_ld (0, dest); \ - tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \ - tmp3 = vec_adds (tmp2, src); \ - tmp = vec_packsu (tmp3, tmp3); \ - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - - ADD (dest, vx0, perm0) dest += stride; - ADD (dest, vx1, perm1) dest += stride; - ADD (dest, vx2, perm0) dest += stride; - ADD (dest, vx3, perm1) dest += stride; - ADD (dest, vx4, perm0) dest += stride; - ADD (dest, vx5, perm1) dest += stride; - ADD (dest, vx6, perm0) dest += stride; - ADD (dest, vx7, perm1) - - block[0] = block[1] = block[2] = block[3] = zero; - block[4] = block[5] = block[6] = block[7] = zero; -} - -void mpeg2_idct_altivec_init (void) -{ - extern uint8_t mpeg2_scan_norm[64]; - extern uint8_t mpeg2_scan_alt[64]; - int i, j; - - /* the altivec idct uses a transposed input, so we patch scan tables */ - for (i = 0; i < 64; i++) { - j = mpeg2_scan_norm[i]; - mpeg2_scan_norm[i] = (j >> 3) | ((j & 7) << 3); - j = mpeg2_scan_alt[i]; - mpeg2_scan_alt[i] = (j >> 3) | ((j & 7) << 3); - } -} - -#endif diff --git a/src/libmpeg2new/libmpeg2/idct_mlib.c b/src/libmpeg2new/libmpeg2/idct_mlib.c deleted file mode 100644 index 55a2e9b64..000000000 --- a/src/libmpeg2new/libmpeg2/idct_mlib.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * idct_mlib.c - * Copyright (C) 1999-2003 HÃ¥kan Hjort - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef LIBMPEG2_MLIB - -#include -#include -#include -#include -#include -#include - -#include "../include/mpeg2.h" -#include "mpeg2_internal.h" - -void mpeg2_idct_add_mlib (const int last, int16_t * const block, - uint8_t * const dest, const int stride) -{ - mlib_VideoIDCT_IEEE_S16_S16 (block, block); - mlib_VideoAddBlock_U8_S16 (dest, block, stride); - memset (block, 0, 64 * sizeof (uint16_t)); -} - -void mpeg2_idct_copy_mlib_non_ieee (int16_t * const block, - uint8_t * const dest, const int stride) -{ - mlib_VideoIDCT8x8_U8_S16 (dest, block, stride); - memset (block, 0, 64 * sizeof (uint16_t)); -} - -void mpeg2_idct_add_mlib_non_ieee (const int last, int16_t * const block, - uint8_t * const dest, const int stride) -{ - mlib_VideoIDCT8x8_S16_S16 (block, block); - mlib_VideoAddBlock_U8_S16 (dest, block, stride); - memset (block, 0, 64 * sizeof (uint16_t)); -} - -#endif diff --git a/src/libmpeg2new/libmpeg2/idct_mmx.c b/src/libmpeg2new/libmpeg2/idct_mmx.c deleted file mode 100644 index d5a5c08a4..000000000 --- a/src/libmpeg2new/libmpeg2/idct_mmx.c +++ /dev/null @@ -1,814 +0,0 @@ -/* - * idct_mmx.c - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef ARCH_X86 - -#include - -#include "../include/mpeg2.h" -#include "../include/attributes.h" -#include "mpeg2_internal.h" -#include "../include/mmx.h" - -#define ROW_SHIFT 15 -#define COL_SHIFT 6 - -#define round(bias) ((int)(((bias)+0.5) * (1<> ROW_SHIFT; - row[1] = (a1 + b1) >> ROW_SHIFT; - row[2] = (a2 + b2) >> ROW_SHIFT; - row[3] = (a3 + b3) >> ROW_SHIFT; - row[4] = (a3 - b3) >> ROW_SHIFT; - row[5] = (a2 - b2) >> ROW_SHIFT; - row[6] = (a1 - b1) >> ROW_SHIFT; - row[7] = (a0 - b0) >> ROW_SHIFT; -} -#endif - - -/* MMXEXT row IDCT */ - -#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \ - c4, c6, c4, c6, \ - c1, c3, -c1, -c5, \ - c5, c7, c3, -c7, \ - c4, -c6, c4, -c6, \ - -c4, c2, c4, -c2, \ - c5, -c1, c3, -c1, \ - c7, c3, c7, -c5 } - -static inline void mmxext_row_head (int16_t * const row, const int offset, - const int16_t * const table) -{ - movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ - - movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ - movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ - - movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */ - movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ - - movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */ - pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */ - - pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */ -} - -static inline void mmxext_row (const int16_t * const table, - const int32_t * const rounder) -{ - movq_m2r (*(table+8), mm1); /* mm1 = -C5 -C1 C3 C1 */ - pmaddwd_r2r (mm2, mm4); /* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */ - - pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */ - pshufw_r2r (mm6, mm6, 0x4e); /* mm6 = x3 x1 x7 x5 */ - - movq_m2r (*(table+12), mm7); /* mm7 = -C7 C3 C7 C5 */ - pmaddwd_r2r (mm5, mm1); /* mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 */ - - paddd_m2r (*rounder, mm3); /* mm3 += rounder */ - pmaddwd_r2r (mm6, mm7); /* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */ - - pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 */ - paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */ - - pmaddwd_m2r (*(table+24), mm5); /* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */ - movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */ - - pmaddwd_m2r (*(table+28), mm6); /* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */ - paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */ - - paddd_m2r (*rounder, mm0); /* mm0 += rounder */ - psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */ - - psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */ - paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */ - - paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */ - psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */ - - paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */ - movq_r2r (mm0, mm4); /* mm4 = a3 a2 + rounder */ - - paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */ - psubd_r2r (mm5, mm4); /* mm4 = a3-b3 a2-b2 + rounder */ -} - -static inline void mmxext_row_tail (int16_t * const row, const int store) -{ - psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ - - psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */ - - packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ - - packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */ - - movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ - pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */ - - /* slot */ - - movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */ -} - -static inline void mmxext_row_mid (int16_t * const row, const int store, - const int offset, - const int16_t * const table) -{ - movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ - psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ - - movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ - psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */ - - packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ - movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ - - packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */ - movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ - - movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ - pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */ - - movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */ - movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */ - - pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */ - - movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */ - pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */ -} - - -/* MMX row IDCT */ - -#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \ - c4, c6, -c4, -c2, \ - c1, c3, c3, -c7, \ - c5, c7, -c1, -c5, \ - c4, -c6, c4, -c2, \ - -c4, c2, c4, -c6, \ - c5, -c1, c7, -c5, \ - c7, c3, c3, -c1 } - -static inline void mmx_row_head (int16_t * const row, const int offset, - const int16_t * const table) -{ - movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ - - movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ - movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ - - movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */ - movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ - - punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */ - - movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */ - pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */ - - movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */ - punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */ -} - -static inline void mmx_row (const int16_t * const table, - const int32_t * const rounder) -{ - pmaddwd_r2r (mm2, mm4); /* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */ - punpckldq_r2r (mm5, mm5); /* mm5 = x3 x1 x3 x1 */ - - pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */ - punpckhdq_r2r (mm6, mm6); /* mm6 = x7 x5 x7 x5 */ - - movq_m2r (*(table+12), mm7); /* mm7 = -C5 -C1 C7 C5 */ - pmaddwd_r2r (mm5, mm1); /* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */ - - paddd_m2r (*rounder, mm3); /* mm3 += rounder */ - pmaddwd_r2r (mm6, mm7); /* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */ - - pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */ - paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */ - - pmaddwd_m2r (*(table+24), mm5); /* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */ - movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */ - - pmaddwd_m2r (*(table+28), mm6); /* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */ - paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */ - - paddd_m2r (*rounder, mm0); /* mm0 += rounder */ - psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */ - - psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */ - paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */ - - paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */ - psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */ - - paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */ - movq_r2r (mm0, mm7); /* mm7 = a3 a2 + rounder */ - - paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */ - psubd_r2r (mm5, mm7); /* mm7 = a3-b3 a2-b2 + rounder */ -} - -static inline void mmx_row_tail (int16_t * const row, const int store) -{ - psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ - - psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */ - - packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ - - packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */ - - movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ - movq_r2r (mm7, mm4); /* mm4 = y6 y7 y4 y5 */ - - pslld_i2r (16, mm7); /* mm7 = y7 0 y5 0 */ - - psrld_i2r (16, mm4); /* mm4 = 0 y6 0 y4 */ - - por_r2r (mm4, mm7); /* mm7 = y7 y6 y5 y4 */ - - /* slot */ - - movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */ -} - -static inline void mmx_row_mid (int16_t * const row, const int store, - const int offset, const int16_t * const table) -{ - movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ - psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ - - movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ - psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */ - - packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ - movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ - - packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */ - movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ - - movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ - movq_r2r (mm7, mm1); /* mm1 = y6 y7 y4 y5 */ - - punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */ - psrld_i2r (16, mm7); /* mm7 = 0 y6 0 y4 */ - - movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */ - pslld_i2r (16, mm1); /* mm1 = y7 0 y5 0 */ - - movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */ - por_r2r (mm1, mm7); /* mm7 = y7 y6 y5 y4 */ - - movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */ - punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */ - - movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */ - pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */ -} - - -#if 0 -/* C column IDCT - its just here to document the MMXEXT and MMX versions */ -static inline void idct_col (int16_t * col, int offset) -{ -/* multiplication - as implemented on mmx */ -#define F(c,x) (((c) * (x)) >> 16) - -/* saturation - it helps us handle torture test cases */ -#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x)) - - int16_t x0, x1, x2, x3, x4, x5, x6, x7; - int16_t y0, y1, y2, y3, y4, y5, y6, y7; - int16_t a0, a1, a2, a3, b0, b1, b2, b3; - int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12; - - col += offset; - - x0 = col[0*8]; - x1 = col[1*8]; - x2 = col[2*8]; - x3 = col[3*8]; - x4 = col[4*8]; - x5 = col[5*8]; - x6 = col[6*8]; - x7 = col[7*8]; - - u04 = S (x0 + x4); - v04 = S (x0 - x4); - u26 = S (F (T2, x6) + x2); - v26 = S (F (T2, x2) - x6); - - a0 = S (u04 + u26); - a1 = S (v04 + v26); - a2 = S (v04 - v26); - a3 = S (u04 - u26); - - u17 = S (F (T1, x7) + x1); - v17 = S (F (T1, x1) - x7); - u35 = S (F (T3, x5) + x3); - v35 = S (F (T3, x3) - x5); - - b0 = S (u17 + u35); - b3 = S (v17 - v35); - u12 = S (u17 - u35); - v12 = S (v17 + v35); - u12 = S (2 * F (C4, u12)); - v12 = S (2 * F (C4, v12)); - b1 = S (u12 + v12); - b2 = S (u12 - v12); - - y0 = S (a0 + b0) >> COL_SHIFT; - y1 = S (a1 + b1) >> COL_SHIFT; - y2 = S (a2 + b2) >> COL_SHIFT; - y3 = S (a3 + b3) >> COL_SHIFT; - - y4 = S (a3 - b3) >> COL_SHIFT; - y5 = S (a2 - b2) >> COL_SHIFT; - y6 = S (a1 - b1) >> COL_SHIFT; - y7 = S (a0 - b0) >> COL_SHIFT; - - col[0*8] = y0; - col[1*8] = y1; - col[2*8] = y2; - col[3*8] = y3; - col[4*8] = y4; - col[5*8] = y5; - col[6*8] = y6; - col[7*8] = y7; -} -#endif - - -/* MMX column IDCT */ -static inline void idct_col (int16_t * const col, const int offset) -{ -#define T1 13036 -#define T2 27146 -#define T3 43790 -#define C4 23170 - - static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; - static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; - static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; - static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; - - /* column code adapted from peter gubanov */ - /* http://www.elecard.com/peter/idct.shtml */ - - movq_m2r (*_T1, mm0); /* mm0 = T1 */ - - movq_m2r (*(col+offset+1*8), mm1); /* mm1 = x1 */ - movq_r2r (mm0, mm2); /* mm2 = T1 */ - - movq_m2r (*(col+offset+7*8), mm4); /* mm4 = x7 */ - pmulhw_r2r (mm1, mm0); /* mm0 = T1*x1 */ - - movq_m2r (*_T3, mm5); /* mm5 = T3 */ - pmulhw_r2r (mm4, mm2); /* mm2 = T1*x7 */ - - movq_m2r (*(col+offset+5*8), mm6); /* mm6 = x5 */ - movq_r2r (mm5, mm7); /* mm7 = T3-1 */ - - movq_m2r (*(col+offset+3*8), mm3); /* mm3 = x3 */ - psubsw_r2r (mm4, mm0); /* mm0 = v17 */ - - movq_m2r (*_T2, mm4); /* mm4 = T2 */ - pmulhw_r2r (mm3, mm5); /* mm5 = (T3-1)*x3 */ - - paddsw_r2r (mm2, mm1); /* mm1 = u17 */ - pmulhw_r2r (mm6, mm7); /* mm7 = (T3-1)*x5 */ - - /* slot */ - - movq_r2r (mm4, mm2); /* mm2 = T2 */ - paddsw_r2r (mm3, mm5); /* mm5 = T3*x3 */ - - pmulhw_m2r (*(col+offset+2*8), mm4);/* mm4 = T2*x2 */ - paddsw_r2r (mm6, mm7); /* mm7 = T3*x5 */ - - psubsw_r2r (mm6, mm5); /* mm5 = v35 */ - paddsw_r2r (mm3, mm7); /* mm7 = u35 */ - - movq_m2r (*(col+offset+6*8), mm3); /* mm3 = x6 */ - movq_r2r (mm0, mm6); /* mm6 = v17 */ - - pmulhw_r2r (mm3, mm2); /* mm2 = T2*x6 */ - psubsw_r2r (mm5, mm0); /* mm0 = b3 */ - - psubsw_r2r (mm3, mm4); /* mm4 = v26 */ - paddsw_r2r (mm6, mm5); /* mm5 = v12 */ - - movq_r2m (mm0, *(col+offset+3*8)); /* save b3 in scratch0 */ - movq_r2r (mm1, mm6); /* mm6 = u17 */ - - paddsw_m2r (*(col+offset+2*8), mm2);/* mm2 = u26 */ - paddsw_r2r (mm7, mm6); /* mm6 = b0 */ - - psubsw_r2r (mm7, mm1); /* mm1 = u12 */ - movq_r2r (mm1, mm7); /* mm7 = u12 */ - - movq_m2r (*(col+offset+0*8), mm3); /* mm3 = x0 */ - paddsw_r2r (mm5, mm1); /* mm1 = u12+v12 */ - - movq_m2r (*_C4, mm0); /* mm0 = C4/2 */ - psubsw_r2r (mm5, mm7); /* mm7 = u12-v12 */ - - movq_r2m (mm6, *(col+offset+5*8)); /* save b0 in scratch1 */ - pmulhw_r2r (mm0, mm1); /* mm1 = b1/2 */ - - movq_r2r (mm4, mm6); /* mm6 = v26 */ - pmulhw_r2r (mm0, mm7); /* mm7 = b2/2 */ - - movq_m2r (*(col+offset+4*8), mm5); /* mm5 = x4 */ - movq_r2r (mm3, mm0); /* mm0 = x0 */ - - psubsw_r2r (mm5, mm3); /* mm3 = v04 */ - paddsw_r2r (mm5, mm0); /* mm0 = u04 */ - - paddsw_r2r (mm3, mm4); /* mm4 = a1 */ - movq_r2r (mm0, mm5); /* mm5 = u04 */ - - psubsw_r2r (mm6, mm3); /* mm3 = a2 */ - paddsw_r2r (mm2, mm5); /* mm5 = a0 */ - - paddsw_r2r (mm1, mm1); /* mm1 = b1 */ - psubsw_r2r (mm2, mm0); /* mm0 = a3 */ - - paddsw_r2r (mm7, mm7); /* mm7 = b2 */ - movq_r2r (mm3, mm2); /* mm2 = a2 */ - - movq_r2r (mm4, mm6); /* mm6 = a1 */ - paddsw_r2r (mm7, mm3); /* mm3 = a2+b2 */ - - psraw_i2r (COL_SHIFT, mm3); /* mm3 = y2 */ - paddsw_r2r (mm1, mm4); /* mm4 = a1+b1 */ - - psraw_i2r (COL_SHIFT, mm4); /* mm4 = y1 */ - psubsw_r2r (mm1, mm6); /* mm6 = a1-b1 */ - - movq_m2r (*(col+offset+5*8), mm1); /* mm1 = b0 */ - psubsw_r2r (mm7, mm2); /* mm2 = a2-b2 */ - - psraw_i2r (COL_SHIFT, mm6); /* mm6 = y6 */ - movq_r2r (mm5, mm7); /* mm7 = a0 */ - - movq_r2m (mm4, *(col+offset+1*8)); /* save y1 */ - psraw_i2r (COL_SHIFT, mm2); /* mm2 = y5 */ - - movq_r2m (mm3, *(col+offset+2*8)); /* save y2 */ - paddsw_r2r (mm1, mm5); /* mm5 = a0+b0 */ - - movq_m2r (*(col+offset+3*8), mm4); /* mm4 = b3 */ - psubsw_r2r (mm1, mm7); /* mm7 = a0-b0 */ - - psraw_i2r (COL_SHIFT, mm5); /* mm5 = y0 */ - movq_r2r (mm0, mm3); /* mm3 = a3 */ - - movq_r2m (mm2, *(col+offset+5*8)); /* save y5 */ - psubsw_r2r (mm4, mm3); /* mm3 = a3-b3 */ - - psraw_i2r (COL_SHIFT, mm7); /* mm7 = y7 */ - paddsw_r2r (mm0, mm4); /* mm4 = a3+b3 */ - - movq_r2m (mm5, *(col+offset+0*8)); /* save y0 */ - psraw_i2r (COL_SHIFT, mm3); /* mm3 = y4 */ - - movq_r2m (mm6, *(col+offset+6*8)); /* save y6 */ - psraw_i2r (COL_SHIFT, mm4); /* mm4 = y3 */ - - movq_r2m (mm7, *(col+offset+7*8)); /* save y7 */ - - movq_r2m (mm3, *(col+offset+4*8)); /* save y4 */ - - movq_r2m (mm4, *(col+offset+3*8)); /* save y3 */ -} - - -static const int32_t rounder0[] ATTR_ALIGN(8) = - rounder ((1 << (COL_SHIFT - 1)) - 0.5); -static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); -static const int32_t rounder1[] ATTR_ALIGN(8) = - rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ -static const int32_t rounder7[] ATTR_ALIGN(8) = - rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ -static const int32_t rounder2[] ATTR_ALIGN(8) = - rounder (0.60355339059); /* C2 * (C6+C2)/2 */ -static const int32_t rounder6[] ATTR_ALIGN(8) = - rounder (-0.25); /* C2 * (C6-C2)/2 */ -static const int32_t rounder3[] ATTR_ALIGN(8) = - rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ -static const int32_t rounder5[] ATTR_ALIGN(8) = - rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ - - -#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ -static inline void idct (int16_t * const block) \ -{ \ - static const int16_t table04[] ATTR_ALIGN(16) = \ - table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ - static const int16_t table17[] ATTR_ALIGN(16) = \ - table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ - static const int16_t table26[] ATTR_ALIGN(16) = \ - table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ - static const int16_t table35[] ATTR_ALIGN(16) = \ - table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ - \ - idct_row_head (block, 0*8, table04); \ - idct_row (table04, rounder0); \ - idct_row_mid (block, 0*8, 4*8, table04); \ - idct_row (table04, rounder4); \ - idct_row_mid (block, 4*8, 1*8, table17); \ - idct_row (table17, rounder1); \ - idct_row_mid (block, 1*8, 7*8, table17); \ - idct_row (table17, rounder7); \ - idct_row_mid (block, 7*8, 2*8, table26); \ - idct_row (table26, rounder2); \ - idct_row_mid (block, 2*8, 6*8, table26); \ - idct_row (table26, rounder6); \ - idct_row_mid (block, 6*8, 3*8, table35); \ - idct_row (table35, rounder3); \ - idct_row_mid (block, 3*8, 5*8, table35); \ - idct_row (table35, rounder5); \ - idct_row_tail (block, 5*8); \ - \ - idct_col (block, 0); \ - idct_col (block, 4); \ -} - - -#define COPY_MMX(offset,r0,r1,r2) \ -do { \ - movq_m2r (*(block+offset), r0); \ - dest += stride; \ - movq_m2r (*(block+offset+4), r1); \ - movq_r2m (r2, *dest); \ - packuswb_r2r (r1, r0); \ -} while (0) - -static inline void block_copy (int16_t * const block, uint8_t * dest, - const int stride) -{ - movq_m2r (*(block+0*8), mm0); - movq_m2r (*(block+0*8+4), mm1); - movq_m2r (*(block+1*8), mm2); - packuswb_r2r (mm1, mm0); - movq_m2r (*(block+1*8+4), mm3); - movq_r2m (mm0, *dest); - packuswb_r2r (mm3, mm2); - COPY_MMX (2*8, mm0, mm1, mm2); - COPY_MMX (3*8, mm2, mm3, mm0); - COPY_MMX (4*8, mm0, mm1, mm2); - COPY_MMX (5*8, mm2, mm3, mm0); - COPY_MMX (6*8, mm0, mm1, mm2); - COPY_MMX (7*8, mm2, mm3, mm0); - movq_r2m (mm2, *(dest+stride)); -} - - -#define ADD_MMX(offset,r1,r2,r3,r4) \ -do { \ - movq_m2r (*(dest+2*stride), r1); \ - packuswb_r2r (r4, r3); \ - movq_r2r (r1, r2); \ - dest += stride; \ - movq_r2m (r3, *dest); \ - punpcklbw_r2r (mm0, r1); \ - paddsw_m2r (*(block+offset), r1); \ - punpckhbw_r2r (mm0, r2); \ - paddsw_m2r (*(block+offset+4), r2); \ -} while (0) - -static inline void block_add (int16_t * const block, uint8_t * dest, - const int stride) -{ - movq_m2r (*dest, mm1); - pxor_r2r (mm0, mm0); - movq_m2r (*(dest+stride), mm3); - movq_r2r (mm1, mm2); - punpcklbw_r2r (mm0, mm1); - movq_r2r (mm3, mm4); - paddsw_m2r (*(block+0*8), mm1); - punpckhbw_r2r (mm0, mm2); - paddsw_m2r (*(block+0*8+4), mm2); - punpcklbw_r2r (mm0, mm3); - paddsw_m2r (*(block+1*8), mm3); - packuswb_r2r (mm2, mm1); - punpckhbw_r2r (mm0, mm4); - movq_r2m (mm1, *dest); - paddsw_m2r (*(block+1*8+4), mm4); - ADD_MMX (2*8, mm1, mm2, mm3, mm4); - ADD_MMX (3*8, mm3, mm4, mm1, mm2); - ADD_MMX (4*8, mm1, mm2, mm3, mm4); - ADD_MMX (5*8, mm3, mm4, mm1, mm2); - ADD_MMX (6*8, mm1, mm2, mm3, mm4); - ADD_MMX (7*8, mm3, mm4, mm1, mm2); - packuswb_r2r (mm4, mm3); - movq_r2m (mm3, *(dest+stride)); -} - - -static inline void block_zero (int16_t * const block) -{ - pxor_r2r (mm0, mm0); - movq_r2m (mm0, *(block+0*4)); - movq_r2m (mm0, *(block+1*4)); - movq_r2m (mm0, *(block+2*4)); - movq_r2m (mm0, *(block+3*4)); - movq_r2m (mm0, *(block+4*4)); - movq_r2m (mm0, *(block+5*4)); - movq_r2m (mm0, *(block+6*4)); - movq_r2m (mm0, *(block+7*4)); - movq_r2m (mm0, *(block+8*4)); - movq_r2m (mm0, *(block+9*4)); - movq_r2m (mm0, *(block+10*4)); - movq_r2m (mm0, *(block+11*4)); - movq_r2m (mm0, *(block+12*4)); - movq_r2m (mm0, *(block+13*4)); - movq_r2m (mm0, *(block+14*4)); - movq_r2m (mm0, *(block+15*4)); -} - - -#define CPU_MMXEXT 0 -#define CPU_MMX 1 - -#define dup4(reg) \ -do { \ - if (cpu != CPU_MMXEXT) { \ - punpcklwd_r2r (reg, reg); \ - punpckldq_r2r (reg, reg); \ - } else \ - pshufw_r2r (reg, reg, 0x00); \ -} while (0) - -static inline void block_add_DC (int16_t * const block, uint8_t * dest, - const int stride, const int cpu) -{ - movd_v2r ((block[0] + 64) >> 7, mm0); - pxor_r2r (mm1, mm1); - movq_m2r (*dest, mm2); - dup4 (mm0); - psubsw_r2r (mm0, mm1); - packuswb_r2r (mm0, mm0); - paddusb_r2r (mm0, mm2); - packuswb_r2r (mm1, mm1); - movq_m2r (*(dest + stride), mm3); - psubusb_r2r (mm1, mm2); - block[0] = 0; - paddusb_r2r (mm0, mm3); - movq_r2m (mm2, *dest); - psubusb_r2r (mm1, mm3); - movq_m2r (*(dest + 2*stride), mm2); - dest += stride; - movq_r2m (mm3, *dest); - paddusb_r2r (mm0, mm2); - movq_m2r (*(dest + 2*stride), mm3); - psubusb_r2r (mm1, mm2); - dest += stride; - paddusb_r2r (mm0, mm3); - movq_r2m (mm2, *dest); - psubusb_r2r (mm1, mm3); - movq_m2r (*(dest + 2*stride), mm2); - dest += stride; - movq_r2m (mm3, *dest); - paddusb_r2r (mm0, mm2); - movq_m2r (*(dest + 2*stride), mm3); - psubusb_r2r (mm1, mm2); - dest += stride; - paddusb_r2r (mm0, mm3); - movq_r2m (mm2, *dest); - psubusb_r2r (mm1, mm3); - movq_m2r (*(dest + 2*stride), mm2); - dest += stride; - movq_r2m (mm3, *dest); - paddusb_r2r (mm0, mm2); - movq_m2r (*(dest + 2*stride), mm3); - psubusb_r2r (mm1, mm2); - block[63] = 0; - paddusb_r2r (mm0, mm3); - movq_r2m (mm2, *(dest + stride)); - psubusb_r2r (mm1, mm3); - movq_r2m (mm3, *(dest + 2*stride)); -} - - -declare_idct (mmxext_idct, mmxext_table, - mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) - -void mpeg2_idct_copy_mmxext (int16_t * const block, uint8_t * const dest, - const int stride) -{ - mmxext_idct (block); - block_copy (block, dest, stride); - block_zero (block); -} - -void mpeg2_idct_add_mmxext (const int last, int16_t * const block, - uint8_t * const dest, const int stride) -{ - if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { - mmxext_idct (block); - block_add (block, dest, stride); - block_zero (block); - } else - block_add_DC (block, dest, stride, CPU_MMXEXT); -} - - -declare_idct (mmx_idct, mmx_table, - mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid) - -void mpeg2_idct_copy_mmx (int16_t * const block, uint8_t * const dest, - const int stride) -{ - mmx_idct (block); - block_copy (block, dest, stride); - block_zero (block); -} - -void mpeg2_idct_add_mmx (const int last, int16_t * const block, - uint8_t * const dest, const int stride) -{ - if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { - mmx_idct (block); - block_add (block, dest, stride); - block_zero (block); - } else - block_add_DC (block, dest, stride, CPU_MMX); -} - - -void mpeg2_idct_mmx_init (void) -{ - extern uint8_t mpeg2_scan_norm[64]; - extern uint8_t mpeg2_scan_alt[64]; - int i, j; - - /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ - - for (i = 0; i < 64; i++) { - j = mpeg2_scan_norm[i]; - mpeg2_scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); - j = mpeg2_scan_alt[i]; - mpeg2_scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); - } -} - -#endif diff --git a/src/libmpeg2new/libmpeg2/libmpeg2.pc.in b/src/libmpeg2new/libmpeg2/libmpeg2.pc.in deleted file mode 100644 index d54500b0e..000000000 --- a/src/libmpeg2new/libmpeg2/libmpeg2.pc.in +++ /dev/null @@ -1,10 +0,0 @@ -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ - -Name: libmpeg2 -Description: A decoding library for MPEG-1 and MPEG-2 streams. -Version: @VERSION@ -Libs: -L${libdir} -lmpeg2 -Cflags: -I${includedir}/@PACKAGE@ diff --git a/src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in b/src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in deleted file mode 100644 index 42383a6e2..000000000 --- a/src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in +++ /dev/null @@ -1,10 +0,0 @@ -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ - -Name: libmpeg2convert -Description: libmpeg2 helper functions for converting to various formats. -Version: @VERSION@ -Libs: -L${libdir} -lmpeg2convert -Cflags: -I${includedir}/@PACKAGE@ diff --git a/src/libmpeg2new/libmpeg2/motion_comp.c b/src/libmpeg2new/libmpeg2/motion_comp.c deleted file mode 100644 index d5a265d5c..000000000 --- a/src/libmpeg2new/libmpeg2/motion_comp.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * motion_comp.c - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#include - -#include "../include/mpeg2.h" -#include "../include/attributes.h" -#include "mpeg2_internal.h" - -mpeg2_mc_t mpeg2_mc; - -void mpeg2_mc_init (uint32_t accel) -{ -#ifdef ARCH_X86 - if (accel & MPEG2_ACCEL_X86_MMXEXT) - mpeg2_mc = mpeg2_mc_mmxext; - else if (accel & MPEG2_ACCEL_X86_3DNOW) - mpeg2_mc = mpeg2_mc_3dnow; - else if (accel & MPEG2_ACCEL_X86_MMX) - mpeg2_mc = mpeg2_mc_mmx; - else -#endif -#ifdef ARCH_PPC - if (accel & MPEG2_ACCEL_PPC_ALTIVEC) - mpeg2_mc = mpeg2_mc_altivec; - else -#endif -#ifdef ARCH_ALPHA - if (accel & MPEG2_ACCEL_ALPHA) - mpeg2_mc = mpeg2_mc_alpha; - else -#endif -#ifdef ARCH_SPARC - if (accel & MPEG2_ACCEL_SPARC_VIS) - mpeg2_mc = mpeg2_mc_vis; - else -#endif - mpeg2_mc = mpeg2_mc_c; -} - -#define avg2(a,b) ((a+b+1)>>1) -#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) - -#define predict_o(i) (ref[i]) -#define predict_x(i) (avg2 (ref[i], ref[i+1])) -#define predict_y(i) (avg2 (ref[i], (ref+stride)[i])) -#define predict_xy(i) (avg4 (ref[i], ref[i+1], \ - (ref+stride)[i], (ref+stride)[i+1])) - -#define put(predictor,i) dest[i] = predictor (i) -#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i]) - -/* mc function template */ - -#define MC_FUNC(op,xy) \ -static void MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \ - const int stride, int height) \ -{ \ - do { \ - op (predict_##xy, 0); \ - op (predict_##xy, 1); \ - op (predict_##xy, 2); \ - op (predict_##xy, 3); \ - op (predict_##xy, 4); \ - op (predict_##xy, 5); \ - op (predict_##xy, 6); \ - op (predict_##xy, 7); \ - op (predict_##xy, 8); \ - op (predict_##xy, 9); \ - op (predict_##xy, 10); \ - op (predict_##xy, 11); \ - op (predict_##xy, 12); \ - op (predict_##xy, 13); \ - op (predict_##xy, 14); \ - op (predict_##xy, 15); \ - ref += stride; \ - dest += stride; \ - } while (--height); \ -} \ -static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \ - const int stride, int height) \ -{ \ - do { \ - op (predict_##xy, 0); \ - op (predict_##xy, 1); \ - op (predict_##xy, 2); \ - op (predict_##xy, 3); \ - op (predict_##xy, 4); \ - op (predict_##xy, 5); \ - op (predict_##xy, 6); \ - op (predict_##xy, 7); \ - ref += stride; \ - dest += stride; \ - } while (--height); \ -} - -/* definitions of the actual mc functions */ - -MC_FUNC (put,o) -MC_FUNC (avg,o) -MC_FUNC (put,x) -MC_FUNC (avg,x) -MC_FUNC (put,y) -MC_FUNC (avg,y) -MC_FUNC (put,xy) -MC_FUNC (avg,xy) - -MPEG2_MC_EXTERN (c) diff --git a/src/libmpeg2new/libmpeg2/motion_comp_alpha.c b/src/libmpeg2new/libmpeg2/motion_comp_alpha.c deleted file mode 100644 index 1b3712a1a..000000000 --- a/src/libmpeg2new/libmpeg2/motion_comp_alpha.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * motion_comp_alpha.c - * Copyright (C) 2002-2003 Falk Hueffner - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef ARCH_ALPHA - -#include - -#include "mpeg2.h" -#include -#include "mpeg2_internal.h" -#include "alpha_asm.h" - -static inline uint64_t avg2 (uint64_t a, uint64_t b) -{ - return (a | b) - (((a ^ b) & BYTE_VEC (0xfe)) >> 1); -} - -// Load two unaligned quadwords from addr. This macro only works if -// addr is actually unaligned. -#define ULOAD16(ret_l,ret_r,addr) \ - do { \ - uint64_t _l = ldq_u (addr + 0); \ - uint64_t _m = ldq_u (addr + 8); \ - uint64_t _r = ldq_u (addr + 16); \ - ret_l = extql (_l, addr) | extqh (_m, addr); \ - ret_r = extql (_m, addr) | extqh (_r, addr); \ - } while (0) - -// Load two aligned quadwords from addr. -#define ALOAD16(ret_l,ret_r,addr) \ - do { \ - ret_l = ldq (addr); \ - ret_r = ldq (addr + 8); \ - } while (0) - -#define OP8(LOAD,LOAD16,STORE) \ - do { \ - STORE (LOAD (pixels), block); \ - pixels += line_size; \ - block += line_size; \ - } while (--h) - -#define OP16(LOAD,LOAD16,STORE) \ - do { \ - uint64_t l, r; \ - LOAD16 (l, r, pixels); \ - STORE (l, block); \ - STORE (r, block + 8); \ - pixels += line_size; \ - block += line_size; \ - } while (--h) - -#define OP8_X2(LOAD,LOAD16,STORE) \ - do { \ - uint64_t p0, p1; \ - \ - p0 = LOAD (pixels); \ - p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56); \ - STORE (avg2 (p0, p1), block); \ - pixels += line_size; \ - block += line_size; \ - } while (--h) - -#define OP16_X2(LOAD,LOAD16,STORE) \ - do { \ - uint64_t p0, p1; \ - \ - LOAD16 (p0, p1, pixels); \ - STORE (avg2(p0, p0 >> 8 | p1 << 56), block); \ - STORE (avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56), \ - block + 8); \ - pixels += line_size; \ - block += line_size; \ - } while (--h) - -#define OP8_Y2(LOAD,LOAD16,STORE) \ - do { \ - uint64_t p0, p1; \ - p0 = LOAD (pixels); \ - pixels += line_size; \ - p1 = LOAD (pixels); \ - do { \ - uint64_t av = avg2 (p0, p1); \ - if (--h == 0) line_size = 0; \ - pixels += line_size; \ - p0 = p1; \ - p1 = LOAD (pixels); \ - STORE (av, block); \ - block += line_size; \ - } while (h); \ - } while (0) - -#define OP16_Y2(LOAD,LOAD16,STORE) \ - do { \ - uint64_t p0l, p0r, p1l, p1r; \ - LOAD16 (p0l, p0r, pixels); \ - pixels += line_size; \ - LOAD16 (p1l, p1r, pixels); \ - do { \ - uint64_t avl, avr; \ - if (--h == 0) line_size = 0; \ - avl = avg2 (p0l, p1l); \ - avr = avg2 (p0r, p1r); \ - p0l = p1l; \ - p0r = p1r; \ - pixels += line_size; \ - LOAD16 (p1l, p1r, pixels); \ - STORE (avl, block); \ - STORE (avr, block + 8); \ - block += line_size; \ - } while (h); \ - } while (0) - -#define OP8_XY2(LOAD,LOAD16,STORE) \ - do { \ - uint64_t pl, ph; \ - uint64_t p1 = LOAD (pixels); \ - uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56); \ - \ - ph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \ - ((p2 & ~BYTE_VEC (0x03)) >> 2)); \ - pl = ((p1 & BYTE_VEC (0x03)) + \ - (p2 & BYTE_VEC (0x03))); \ - \ - do { \ - uint64_t npl, nph; \ - \ - pixels += line_size; \ - p1 = LOAD (pixels); \ - p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56); \ - nph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \ - ((p2 & ~BYTE_VEC (0x03)) >> 2)); \ - npl = ((p1 & BYTE_VEC (0x03)) + \ - (p2 & BYTE_VEC (0x03))); \ - \ - STORE (ph + nph + \ - (((pl + npl + BYTE_VEC (0x02)) >> 2) & \ - BYTE_VEC (0x03)), block); \ - \ - block += line_size; \ - pl = npl; \ - ph = nph; \ - } while (--h); \ - } while (0) - -#define OP16_XY2(LOAD,LOAD16,STORE) \ - do { \ - uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r; \ - LOAD16 (p0, p2, pixels); \ - p1 = p0 >> 8 | (p2 << 56); \ - p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \ - \ - ph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \ - ((p1 & ~BYTE_VEC (0x03)) >> 2)); \ - pl_l = ((p0 & BYTE_VEC (0x03)) + \ - (p1 & BYTE_VEC(0x03))); \ - ph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \ - ((p3 & ~BYTE_VEC (0x03)) >> 2)); \ - pl_r = ((p2 & BYTE_VEC (0x03)) + \ - (p3 & BYTE_VEC (0x03))); \ - \ - do { \ - uint64_t npl_l, nph_l, npl_r, nph_r; \ - \ - pixels += line_size; \ - LOAD16 (p0, p2, pixels); \ - p1 = p0 >> 8 | (p2 << 56); \ - p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \ - nph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \ - ((p1 & ~BYTE_VEC (0x03)) >> 2)); \ - npl_l = ((p0 & BYTE_VEC (0x03)) + \ - (p1 & BYTE_VEC (0x03))); \ - nph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \ - ((p3 & ~BYTE_VEC (0x03)) >> 2)); \ - npl_r = ((p2 & BYTE_VEC (0x03)) + \ - (p3 & BYTE_VEC (0x03))); \ - \ - STORE (ph_l + nph_l + \ - (((pl_l + npl_l + BYTE_VEC (0x02)) >> 2) & \ - BYTE_VEC(0x03)), block); \ - STORE (ph_r + nph_r + \ - (((pl_r + npl_r + BYTE_VEC (0x02)) >> 2) & \ - BYTE_VEC(0x03)), block + 8); \ - \ - block += line_size; \ - pl_l = npl_l; \ - ph_l = nph_l; \ - pl_r = npl_r; \ - ph_r = nph_r; \ - } while (--h); \ - } while (0) - -#define MAKE_OP(OPNAME,SIZE,SUFF,OPKIND,STORE) \ -static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha \ - (uint8_t *restrict block, const uint8_t *restrict pixels, \ - int line_size, int h) \ -{ \ - if ((uint64_t) pixels & 0x7) { \ - OPKIND (uldq, ULOAD16, STORE); \ - } else { \ - OPKIND (ldq, ALOAD16, STORE); \ - } \ -} - -#define PIXOP(OPNAME,STORE) \ - MAKE_OP (OPNAME, 8, o, OP8, STORE); \ - MAKE_OP (OPNAME, 8, x, OP8_X2, STORE); \ - MAKE_OP (OPNAME, 8, y, OP8_Y2, STORE); \ - MAKE_OP (OPNAME, 8, xy, OP8_XY2, STORE); \ - MAKE_OP (OPNAME, 16, o, OP16, STORE); \ - MAKE_OP (OPNAME, 16, x, OP16_X2, STORE); \ - MAKE_OP (OPNAME, 16, y, OP16_Y2, STORE); \ - MAKE_OP (OPNAME, 16, xy, OP16_XY2, STORE); - -#define STORE(l,b) stq (l, b) -PIXOP (put, STORE); -#undef STORE -#define STORE(l,b) stq (avg2 (l, ldq (b)), b); -PIXOP (avg, STORE); - -mpeg2_mc_t mpeg2_mc_alpha = { - { MC_put_o_16_alpha, MC_put_x_16_alpha, - MC_put_y_16_alpha, MC_put_xy_16_alpha, - MC_put_o_8_alpha, MC_put_x_8_alpha, - MC_put_y_8_alpha, MC_put_xy_8_alpha }, - { MC_avg_o_16_alpha, MC_avg_x_16_alpha, - MC_avg_y_16_alpha, MC_avg_xy_16_alpha, - MC_avg_o_8_alpha, MC_avg_x_8_alpha, - MC_avg_y_8_alpha, MC_avg_xy_8_alpha } -}; - -#endif diff --git a/src/libmpeg2new/libmpeg2/motion_comp_altivec.c b/src/libmpeg2new/libmpeg2/motion_comp_altivec.c deleted file mode 100644 index ee740e14e..000000000 --- a/src/libmpeg2new/libmpeg2/motion_comp_altivec.c +++ /dev/null @@ -1,1010 +0,0 @@ -/* - * motion_comp_altivec.c - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef ARCH_PPC - -#ifdef HAVE_ALTIVEC_H -#include -#endif -#include - -#include "mpeg2.h" -#include -#include "mpeg2_internal.h" - -typedef vector signed char vector_s8_t; -typedef vector unsigned char vector_u8_t; -typedef vector signed short vector_s16_t; -typedef vector unsigned short vector_u16_t; -typedef vector signed int vector_s32_t; -typedef vector unsigned int vector_u32_t; - -#ifndef COFFEE_BREAK /* Workarounds for gcc suckage */ - -static inline vector_u8_t my_vec_ld (int const A, const uint8_t * const B) -{ - return vec_ld (A, (uint8_t *)B); -} -#undef vec_ld -#define vec_ld my_vec_ld - -static inline vector_u8_t my_vec_and (vector_u8_t const A, vector_u8_t const B) -{ - return vec_and (A, B); -} -#undef vec_and -#define vec_and my_vec_and - -static inline vector_u8_t my_vec_avg (vector_u8_t const A, vector_u8_t const B) -{ - return vec_avg (A, B); -} -#undef vec_avg -#define vec_avg my_vec_avg - -#endif - -static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t perm, ref0, ref1, tmp; - - perm = vec_lvsl (0, ref); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - tmp = vec_perm (ref0, ref1, perm); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - vec_st (tmp, 0, dest); - tmp = vec_perm (ref0, ref1, perm); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - vec_st (tmp, stride, dest); - dest += 2*stride; - tmp = vec_perm (ref0, ref1, perm); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - vec_st (tmp, 0, dest); - tmp = vec_perm (ref0, ref1, perm); - vec_st (tmp, stride, dest); -} - -static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1; - - tmp0 = vec_lvsl (0, ref); - tmp0 = vec_mergeh (tmp0, tmp0); - perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); - tmp1 = vec_lvsl (stride, ref); - tmp1 = vec_mergeh (tmp1, tmp1); - perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_perm (ref0, ref1, perm1); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_perm (ref0, ref1, perm1); - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); -} - -static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t permA, permB, ref0, ref1, tmp; - - permA = vec_lvsl (0, ref); - permB = vec_add (permA, vec_splat_u8 (1)); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - tmp = vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB)); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - vec_st (tmp, 0, dest); - tmp = vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB)); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - vec_st (tmp, stride, dest); - dest += 2*stride; - tmp = vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB)); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - vec_st (tmp, 0, dest); - tmp = vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB)); - vec_st (tmp, stride, dest); -} - -static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; - - ones = vec_splat_u8 (1); - tmp0 = vec_lvsl (0, ref); - tmp0 = vec_mergeh (tmp0, tmp0); - perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); - perm0B = vec_add (perm0A, ones); - tmp1 = vec_lvsl (stride, ref); - tmp1 = vec_mergeh (tmp1, tmp1); - perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); - perm1B = vec_add (perm1A, ones); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), - vec_perm (ref0, ref1, perm0B)); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), - vec_perm (ref0, ref1, perm1B)); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), - vec_perm (ref0, ref1, perm0B)); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), - vec_perm (ref0, ref1, perm1B)); - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); -} - -static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp; - - perm = vec_lvsl (0, ref); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - tmp0 = vec_perm (ref0, ref1, perm); - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - tmp1 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (tmp0, tmp1); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - vec_st (tmp, 0, dest); - tmp0 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (tmp0, tmp1); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - vec_st (tmp, stride, dest); - dest += 2*stride; - tmp1 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (tmp0, tmp1); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - vec_st (tmp, 0, dest); - tmp0 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (tmp0, tmp1); - vec_st (tmp, stride, dest); -} - -static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1; - - tmp0 = vec_lvsl (0, ref); - tmp0 = vec_mergeh (tmp0, tmp0); - perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); - tmp1 = vec_lvsl (stride, ref); - tmp1 = vec_mergeh (tmp1, tmp1); - perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - tmp1 = vec_perm (ref0, ref1, perm1); - tmp = vec_avg (tmp0, tmp1); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - tmp = vec_avg (tmp0, tmp1); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_perm (ref0, ref1, perm1); - tmp = vec_avg (tmp0, tmp1); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - tmp = vec_avg (tmp0, tmp1); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); -} - -static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; - vector_u8_t ones; - - ones = vec_splat_u8 (1); - permA = vec_lvsl (0, ref); - permB = vec_add (permA, ones); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - vec_st (tmp, 0, dest); - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - vec_st (tmp, stride, dest); - dest += 2*stride; - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - vec_st (tmp, 0, dest); - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - vec_st (tmp, stride, dest); -} - -static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; - vector_u8_t avg0, avg1, xor0, xor1, tmp, ones; - - ones = vec_splat_u8 (1); - perm0A = vec_lvsl (0, ref); - perm0A = vec_mergeh (perm0A, perm0A); - perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); - perm0B = vec_add (perm0A, ones); - perm1A = vec_lvsl (stride, ref); - perm1A = vec_mergeh (perm1A, perm1A); - perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); - perm1B = vec_add (perm1A, ones); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - A = vec_perm (ref0, ref1, perm0A); - B = vec_perm (ref0, ref1, perm0B); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - A = vec_perm (ref0, ref1, perm1A); - B = vec_perm (ref0, ref1, perm1B); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - A = vec_perm (ref0, ref1, perm0A); - B = vec_perm (ref0, ref1, perm0B); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - A = vec_perm (ref0, ref1, perm1A); - B = vec_perm (ref0, ref1, perm1B); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - A = vec_perm (ref0, ref1, perm0A); - B = vec_perm (ref0, ref1, perm0B); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); -} - -#if 0 -static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones; - vector_u16_t splat2, temp; - - ones = vec_splat_u8 (1); - permA = vec_lvsl (0, ref); - permB = vec_add (permA, ones); - - zero = vec_splat_u8 (0); - splat2 = vec_splat_u16 (2); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - C = vec_perm (ref0, ref1, permA); - D = vec_perm (ref0, ref1, permB); - - temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A), - (vector_u16_t)vec_mergeh (zero, B)), - vec_add ((vector_u16_t)vec_mergeh (zero, C), - (vector_u16_t)vec_mergeh (zero, D))); - temp = vec_sr (vec_add (temp, splat2), splat2); - tmp = vec_pack (temp, temp); - - vec_st (tmp, 0, dest); - dest += stride; - tmp = vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB)); - } while (--height); -} -#endif - -static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t perm, ref0, ref1, tmp, prev; - - perm = vec_lvsl (0, ref); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - prev = vec_ld (0, dest); - tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - prev = vec_ld (2*stride, dest); - vec_st (tmp, stride, dest); - dest += 2*stride; - tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); - vec_st (tmp, stride, dest); -} - -static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev; - - tmp0 = vec_lvsl (0, ref); - tmp0 = vec_mergeh (tmp0, tmp0); - perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); - tmp1 = vec_lvsl (stride, ref); - tmp1 = vec_mergeh (tmp1, tmp1); - perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - prev = vec_ld (0, dest); - tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); -} - -static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t permA, permB, ref0, ref1, tmp, prev; - - permA = vec_lvsl (0, ref); - permB = vec_add (permA, vec_splat_u8 (1)); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - prev = vec_ld (0, dest); - ref += stride; - tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB))); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB))); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - prev = vec_ld (2*stride, dest); - vec_st (tmp, stride, dest); - dest += 2*stride; - tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB))); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB))); - vec_st (tmp, stride, dest); -} - -static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; - vector_u8_t prev; - - ones = vec_splat_u8 (1); - tmp0 = vec_lvsl (0, ref); - tmp0 = vec_mergeh (tmp0, tmp0); - perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); - perm0B = vec_add (perm0A, ones); - tmp1 = vec_lvsl (stride, ref); - tmp1 = vec_mergeh (tmp1, tmp1); - perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); - perm1B = vec_add (perm1A, ones); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - prev = vec_ld (0, dest); - ref += stride; - tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), - vec_perm (ref0, ref1, perm0B))); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), - vec_perm (ref0, ref1, perm1B))); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), - vec_perm (ref0, ref1, perm0B))); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), - vec_perm (ref0, ref1, perm1B))); - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); -} - -static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev; - - perm = vec_lvsl (0, ref); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - tmp0 = vec_perm (ref0, ref1, perm); - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - prev = vec_ld (0, dest); - tmp1 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - tmp0 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - prev = vec_ld (2*stride, dest); - vec_st (tmp, stride, dest); - dest += 2*stride; - tmp1 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - tmp0 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - vec_st (tmp, stride, dest); -} - -static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev; - - tmp0 = vec_lvsl (0, ref); - tmp0 = vec_mergeh (tmp0, tmp0); - perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); - tmp1 = vec_lvsl (stride, ref); - tmp1 = vec_mergeh (tmp1, tmp1); - perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - prev = vec_ld (0, dest); - tmp1 = vec_perm (ref0, ref1, perm1); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_perm (ref0, ref1, perm1); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); -} - -static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; - vector_u8_t ones, prev; - - ones = vec_splat_u8 (1); - permA = vec_lvsl (0, ref); - permB = vec_add (permA, ones); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - prev = vec_ld (0, dest); - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_avg (prev, - vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - prev = vec_ld (2*stride, dest); - vec_st (tmp, stride, dest); - dest += 2*stride; - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_avg (prev, - vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - vec_st (tmp, stride, dest); -} - -static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; - vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev; - - ones = vec_splat_u8 (1); - perm0A = vec_lvsl (0, ref); - perm0A = vec_mergeh (perm0A, perm0A); - perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); - perm0B = vec_add (perm0A, ones); - perm1A = vec_lvsl (stride, ref); - perm1A = vec_mergeh (perm1A, perm1A); - perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); - perm1B = vec_add (perm1A, ones); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - A = vec_perm (ref0, ref1, perm0A); - B = vec_perm (ref0, ref1, perm0B); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - prev = vec_ld (0, dest); - A = vec_perm (ref0, ref1, perm1A); - B = vec_perm (ref0, ref1, perm1B); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - A = vec_perm (ref0, ref1, perm0A); - B = vec_perm (ref0, ref1, perm0B); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_avg (prev, - vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - A = vec_perm (ref0, ref1, perm1A); - B = vec_perm (ref0, ref1, perm1B); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_avg (prev, - vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - A = vec_perm (ref0, ref1, perm0A); - B = vec_perm (ref0, ref1, perm0B); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); -} - -MPEG2_MC_EXTERN (altivec) - -#endif diff --git a/src/libmpeg2new/libmpeg2/motion_comp_mlib.c b/src/libmpeg2new/libmpeg2/motion_comp_mlib.c deleted file mode 100644 index 71c085029..000000000 --- a/src/libmpeg2new/libmpeg2/motion_comp_mlib.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - * motion_comp_mlib.c - * Copyright (C) 2000-2003 HÃ¥kan Hjort - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef LIBMPEG2_MLIB - -#include -#include -#include -#include -#include - -#include "../include/mpeg2.h" -#include "mpeg2_internal.h" - -static void MC_put_o_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoCopyRef_U8_U8_16x16 (dest, (uint8_t *) ref, stride); - else - mlib_VideoCopyRef_U8_U8_16x8 (dest, (uint8_t *) ref, stride); -} - -static void MC_put_x_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpX_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpX_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_put_y_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_put_xy_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpXY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpXY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_put_o_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoCopyRef_U8_U8_8x8 (dest, (uint8_t *) ref, stride); - else - mlib_VideoCopyRef_U8_U8_8x4 (dest, (uint8_t *) ref, stride); -} - -static void MC_put_x_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_put_y_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_put_xy_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpXY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpXY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_avg_o_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoCopyRefAve_U8_U8_16x16 (dest, (uint8_t *) ref, stride); - else - mlib_VideoCopyRefAve_U8_U8_16x8 (dest, (uint8_t *) ref, stride); -} - -static void MC_avg_x_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpAveX_U8_U8_16x16 (dest, (uint8_t *) ref, - stride, stride); - else - mlib_VideoInterpAveX_U8_U8_16x8 (dest, (uint8_t *) ref, - stride, stride); -} - -static void MC_avg_y_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpAveY_U8_U8_16x16 (dest, (uint8_t *) ref, - stride, stride); - else - mlib_VideoInterpAveY_U8_U8_16x8 (dest, (uint8_t *) ref, - stride, stride); -} - -static void MC_avg_xy_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpAveXY_U8_U8_16x16 (dest, (uint8_t *) ref, - stride, stride); - else - mlib_VideoInterpAveXY_U8_U8_16x8 (dest, (uint8_t *) ref, - stride, stride); -} - -static void MC_avg_o_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoCopyRefAve_U8_U8_8x8 (dest, (uint8_t *) ref, stride); - else - mlib_VideoCopyRefAve_U8_U8_8x4 (dest, (uint8_t *) ref, stride); -} - -static void MC_avg_x_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpAveX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpAveX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_avg_y_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpAveY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpAveY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_avg_xy_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpAveXY_U8_U8_8x8 (dest, (uint8_t *) ref, - stride, stride); - else - mlib_VideoInterpAveXY_U8_U8_8x4 (dest, (uint8_t *) ref, - stride, stride); -} - -MPEG2_MC_EXTERN (mlib) - -#endif diff --git a/src/libmpeg2new/libmpeg2/motion_comp_mmx.c b/src/libmpeg2new/libmpeg2/motion_comp_mmx.c deleted file mode 100644 index 8694bdfea..000000000 --- a/src/libmpeg2new/libmpeg2/motion_comp_mmx.c +++ /dev/null @@ -1,1005 +0,0 @@ -/* - * motion_comp_mmx.c - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef ARCH_X86 - -#include - -#include "../include/mpeg2.h" -#include "../include/attributes.h" -#include "mpeg2_internal.h" -#include "../include/mmx.h" - -#define CPU_MMXEXT 0 -#define CPU_3DNOW 1 - - -/* MMX code - needs a rewrite */ - -/* - * Motion Compensation frequently needs to average values using the - * formula (x+y+1)>>1. Both MMXEXT and 3Dnow include one instruction - * to compute this, but it's been left out of classic MMX. - * - * We need to be careful of overflows when doing this computation. - * Rather than unpacking data to 16-bits, which reduces parallelism, - * we use the following formulas: - * - * (x+y)>>1 == (x&y)+((x^y)>>1) - * (x+y+1)>>1 == (x|y)-((x^y)>>1) - */ - -/* some rounding constants */ -static mmx_t mask1 = {0xfefefefefefefefeLL}; -static mmx_t round4 = {0x0002000200020002LL}; - -/* - * This code should probably be compiled with loop unrolling - * (ie, -funroll-loops in gcc)becuase some of the loops - * use a small static number of iterations. This was written - * with the assumption the compiler knows best about when - * unrolling will help - */ - -static inline void mmx_zero_reg () -{ - /* load 0 into mm0 */ - pxor_r2r (mm0, mm0); -} - -static inline void mmx_average_2_U8 (uint8_t * dest, const uint8_t * src1, - const uint8_t * src2) -{ - /* *dest = (*src1 + *src2 + 1)/ 2; */ - - movq_m2r (*src1, mm1); /* load 8 src1 bytes */ - movq_r2r (mm1, mm2); /* copy 8 src1 bytes */ - - movq_m2r (*src2, mm3); /* load 8 src2 bytes */ - movq_r2r (mm3, mm4); /* copy 8 src2 bytes */ - - pxor_r2r (mm1, mm3); /* xor src1 and src2 */ - pand_m2r (mask1, mm3); /* mask lower bits */ - psrlq_i2r (1, mm3); /* /2 */ - por_r2r (mm2, mm4); /* or src1 and src2 */ - psubb_r2r (mm3, mm4); /* subtract subresults */ - movq_r2m (mm4, *dest); /* store result in dest */ -} - -static inline void mmx_interp_average_2_U8 (uint8_t * dest, - const uint8_t * src1, - const uint8_t * src2) -{ - /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */ - - movq_m2r (*dest, mm1); /* load 8 dest bytes */ - movq_r2r (mm1, mm2); /* copy 8 dest bytes */ - - movq_m2r (*src1, mm3); /* load 8 src1 bytes */ - movq_r2r (mm3, mm4); /* copy 8 src1 bytes */ - - movq_m2r (*src2, mm5); /* load 8 src2 bytes */ - movq_r2r (mm5, mm6); /* copy 8 src2 bytes */ - - pxor_r2r (mm3, mm5); /* xor src1 and src2 */ - pand_m2r (mask1, mm5); /* mask lower bits */ - psrlq_i2r (1, mm5); /* /2 */ - por_r2r (mm4, mm6); /* or src1 and src2 */ - psubb_r2r (mm5, mm6); /* subtract subresults */ - movq_r2r (mm6, mm5); /* copy subresult */ - - pxor_r2r (mm1, mm5); /* xor srcavg and dest */ - pand_m2r (mask1, mm5); /* mask lower bits */ - psrlq_i2r (1, mm5); /* /2 */ - por_r2r (mm2, mm6); /* or srcavg and dest */ - psubb_r2r (mm5, mm6); /* subtract subresults */ - movq_r2m (mm6, *dest); /* store result in dest */ -} - -static inline void mmx_average_4_U8 (uint8_t * dest, const uint8_t * src1, - const uint8_t * src2, - const uint8_t * src3, - const uint8_t * src4) -{ - /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */ - - movq_m2r (*src1, mm1); /* load 8 src1 bytes */ - movq_r2r (mm1, mm2); /* copy 8 src1 bytes */ - - punpcklbw_r2r (mm0, mm1); /* unpack low src1 bytes */ - punpckhbw_r2r (mm0, mm2); /* unpack high src1 bytes */ - - movq_m2r (*src2, mm3); /* load 8 src2 bytes */ - movq_r2r (mm3, mm4); /* copy 8 src2 bytes */ - - punpcklbw_r2r (mm0, mm3); /* unpack low src2 bytes */ - punpckhbw_r2r (mm0, mm4); /* unpack high src2 bytes */ - - paddw_r2r (mm3, mm1); /* add lows */ - paddw_r2r (mm4, mm2); /* add highs */ - - /* now have partials in mm1 and mm2 */ - - movq_m2r (*src3, mm3); /* load 8 src3 bytes */ - movq_r2r (mm3, mm4); /* copy 8 src3 bytes */ - - punpcklbw_r2r (mm0, mm3); /* unpack low src3 bytes */ - punpckhbw_r2r (mm0, mm4); /* unpack high src3 bytes */ - - paddw_r2r (mm3, mm1); /* add lows */ - paddw_r2r (mm4, mm2); /* add highs */ - - movq_m2r (*src4, mm5); /* load 8 src4 bytes */ - movq_r2r (mm5, mm6); /* copy 8 src4 bytes */ - - punpcklbw_r2r (mm0, mm5); /* unpack low src4 bytes */ - punpckhbw_r2r (mm0, mm6); /* unpack high src4 bytes */ - - paddw_r2r (mm5, mm1); /* add lows */ - paddw_r2r (mm6, mm2); /* add highs */ - - /* now have subtotal in mm1 and mm2 */ - - paddw_m2r (round4, mm1); - psraw_i2r (2, mm1); /* /4 */ - paddw_m2r (round4, mm2); - psraw_i2r (2, mm2); /* /4 */ - - packuswb_r2r (mm2, mm1); /* pack (w/ saturation) */ - movq_r2m (mm1, *dest); /* store result in dest */ -} - -static inline void mmx_interp_average_4_U8 (uint8_t * dest, - const uint8_t * src1, - const uint8_t * src2, - const uint8_t * src3, - const uint8_t * src4) -{ - /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */ - - movq_m2r (*src1, mm1); /* load 8 src1 bytes */ - movq_r2r (mm1, mm2); /* copy 8 src1 bytes */ - - punpcklbw_r2r (mm0, mm1); /* unpack low src1 bytes */ - punpckhbw_r2r (mm0, mm2); /* unpack high src1 bytes */ - - movq_m2r (*src2, mm3); /* load 8 src2 bytes */ - movq_r2r (mm3, mm4); /* copy 8 src2 bytes */ - - punpcklbw_r2r (mm0, mm3); /* unpack low src2 bytes */ - punpckhbw_r2r (mm0, mm4); /* unpack high src2 bytes */ - - paddw_r2r (mm3, mm1); /* add lows */ - paddw_r2r (mm4, mm2); /* add highs */ - - /* now have partials in mm1 and mm2 */ - - movq_m2r (*src3, mm3); /* load 8 src3 bytes */ - movq_r2r (mm3, mm4); /* copy 8 src3 bytes */ - - punpcklbw_r2r (mm0, mm3); /* unpack low src3 bytes */ - punpckhbw_r2r (mm0, mm4); /* unpack high src3 bytes */ - - paddw_r2r (mm3, mm1); /* add lows */ - paddw_r2r (mm4, mm2); /* add highs */ - - movq_m2r (*src4, mm5); /* load 8 src4 bytes */ - movq_r2r (mm5, mm6); /* copy 8 src4 bytes */ - - punpcklbw_r2r (mm0, mm5); /* unpack low src4 bytes */ - punpckhbw_r2r (mm0, mm6); /* unpack high src4 bytes */ - - paddw_r2r (mm5, mm1); /* add lows */ - paddw_r2r (mm6, mm2); /* add highs */ - - paddw_m2r (round4, mm1); - psraw_i2r (2, mm1); /* /4 */ - paddw_m2r (round4, mm2); - psraw_i2r (2, mm2); /* /4 */ - - /* now have subtotal/4 in mm1 and mm2 */ - - movq_m2r (*dest, mm3); /* load 8 dest bytes */ - movq_r2r (mm3, mm4); /* copy 8 dest bytes */ - - packuswb_r2r (mm2, mm1); /* pack (w/ saturation) */ - movq_r2r (mm1,mm2); /* copy subresult */ - - pxor_r2r (mm1, mm3); /* xor srcavg and dest */ - pand_m2r (mask1, mm3); /* mask lower bits */ - psrlq_i2r (1, mm3); /* /2 */ - por_r2r (mm2, mm4); /* or srcavg and dest */ - psubb_r2r (mm3, mm4); /* subtract subresults */ - movq_r2m (mm4, *dest); /* store result in dest */ -} - -/*-----------------------------------------------------------------------*/ - -static inline void MC_avg_mmx (const int width, int height, uint8_t * dest, - const uint8_t * ref, const int stride) -{ - mmx_zero_reg (); - - do { - mmx_average_2_U8 (dest, dest, ref); - - if (width == 16) - mmx_average_2_U8 (dest+8, dest+8, ref+8); - - dest += stride; - ref += stride; - } while (--height); -} - -static void MC_avg_o_16_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg_mmx (16, height, dest, ref, stride); -} - -static void MC_avg_o_8_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg_mmx (8, height, dest, ref, stride); -} - -/*-----------------------------------------------------------------------*/ - -static inline void MC_put_mmx (const int width, int height, uint8_t * dest, - const uint8_t * ref, const int stride) -{ - mmx_zero_reg (); - - do { - movq_m2r (* ref, mm1); /* load 8 ref bytes */ - movq_r2m (mm1,* dest); /* store 8 bytes at curr */ - - if (width == 16) - { - movq_m2r (* (ref+8), mm1); /* load 8 ref bytes */ - movq_r2m (mm1,* (dest+8)); /* store 8 bytes at curr */ - } - - dest += stride; - ref += stride; - } while (--height); -} - -static void MC_put_o_16_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put_mmx (16, height, dest, ref, stride); -} - -static void MC_put_o_8_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put_mmx (8, height, dest, ref, stride); -} - -/*-----------------------------------------------------------------------*/ - -/* Half pixel interpolation in the x direction */ -static inline void MC_avg_x_mmx (const int width, int height, uint8_t * dest, - const uint8_t * ref, const int stride) -{ - mmx_zero_reg (); - - do { - mmx_interp_average_2_U8 (dest, ref, ref+1); - - if (width == 16) - mmx_interp_average_2_U8 (dest+8, ref+8, ref+9); - - dest += stride; - ref += stride; - } while (--height); -} - -static void MC_avg_x_16_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg_x_mmx (16, height, dest, ref, stride); -} - -static void MC_avg_x_8_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg_x_mmx (8, height, dest, ref, stride); -} - -/*-----------------------------------------------------------------------*/ - -static inline void MC_put_x_mmx (const int width, int height, uint8_t * dest, - const uint8_t * ref, const int stride) -{ - mmx_zero_reg (); - - do { - mmx_average_2_U8 (dest, ref, ref+1); - - if (width == 16) - mmx_average_2_U8 (dest+8, ref+8, ref+9); - - dest += stride; - ref += stride; - } while (--height); -} - -static void MC_put_x_16_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put_x_mmx (16, height, dest, ref, stride); -} - -static void MC_put_x_8_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put_x_mmx (8, height, dest, ref, stride); -} - -/*-----------------------------------------------------------------------*/ - -static inline void MC_avg_xy_mmx (const int width, int height, uint8_t * dest, - const uint8_t * ref, const int stride) -{ - const uint8_t * ref_next = ref + stride; - - mmx_zero_reg (); - - do { - mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); - - if (width == 16) - mmx_interp_average_4_U8 (dest+8, ref+8, ref+9, - ref_next+8, ref_next+9); - - dest += stride; - ref += stride; - ref_next += stride; - } while (--height); -} - -static void MC_avg_xy_16_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg_xy_mmx (16, height, dest, ref, stride); -} - -static void MC_avg_xy_8_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg_xy_mmx (8, height, dest, ref, stride); -} - -/*-----------------------------------------------------------------------*/ - -static inline void MC_put_xy_mmx (const int width, int height, uint8_t * dest, - const uint8_t * ref, const int stride) -{ - const uint8_t * ref_next = ref + stride; - - mmx_zero_reg (); - - do { - mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); - - if (width == 16) - mmx_average_4_U8 (dest+8, ref+8, ref+9, ref_next+8, ref_next+9); - - dest += stride; - ref += stride; - ref_next += stride; - } while (--height); -} - -static void MC_put_xy_16_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put_xy_mmx (16, height, dest, ref, stride); -} - -static void MC_put_xy_8_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put_xy_mmx (8, height, dest, ref, stride); -} - -/*-----------------------------------------------------------------------*/ - -static inline void MC_avg_y_mmx (const int width, int height, uint8_t * dest, - const uint8_t * ref, const int stride) -{ - const uint8_t * ref_next = ref + stride; - - mmx_zero_reg (); - - do { - mmx_interp_average_2_U8 (dest, ref, ref_next); - - if (width == 16) - mmx_interp_average_2_U8 (dest+8, ref+8, ref_next+8); - - dest += stride; - ref += stride; - ref_next += stride; - } while (--height); -} - -static void MC_avg_y_16_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg_y_mmx (16, height, dest, ref, stride); -} - -static void MC_avg_y_8_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg_y_mmx (8, height, dest, ref, stride); -} - -/*-----------------------------------------------------------------------*/ - -static inline void MC_put_y_mmx (const int width, int height, uint8_t * dest, - const uint8_t * ref, const int stride) -{ - const uint8_t * ref_next = ref + stride; - - mmx_zero_reg (); - - do { - mmx_average_2_U8 (dest, ref, ref_next); - - if (width == 16) - mmx_average_2_U8 (dest+8, ref+8, ref_next+8); - - dest += stride; - ref += stride; - ref_next += stride; - } while (--height); -} - -static void MC_put_y_16_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put_y_mmx (16, height, dest, ref, stride); -} - -static void MC_put_y_8_mmx (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put_y_mmx (8, height, dest, ref, stride); -} - - -MPEG2_MC_EXTERN (mmx) - - - - - - - -/* CPU_MMXEXT/CPU_3DNOW adaptation layer */ - -#define pavg_r2r(src,dest) \ -do { \ - if (cpu == CPU_MMXEXT) \ - pavgb_r2r (src, dest); \ - else \ - pavgusb_r2r (src, dest); \ -} while (0) - -#define pavg_m2r(src,dest) \ -do { \ - if (cpu == CPU_MMXEXT) \ - pavgb_m2r (src, dest); \ - else \ - pavgusb_m2r (src, dest); \ -} while (0) - - -/* CPU_MMXEXT code */ - - -static inline void MC_put1_8 (int height, uint8_t * dest, const uint8_t * ref, - const int stride) -{ - do { - movq_m2r (*ref, mm0); - movq_r2m (mm0, *dest); - ref += stride; - dest += stride; - } while (--height); -} - -static inline void MC_put1_16 (int height, uint8_t * dest, const uint8_t * ref, - const int stride) -{ - do { - movq_m2r (*ref, mm0); - movq_m2r (*(ref+8), mm1); - ref += stride; - movq_r2m (mm0, *dest); - movq_r2m (mm1, *(dest+8)); - dest += stride; - } while (--height); -} - -static inline void MC_avg1_8 (int height, uint8_t * dest, const uint8_t * ref, - const int stride, const int cpu) -{ - do { - movq_m2r (*ref, mm0); - pavg_m2r (*dest, mm0); - ref += stride; - movq_r2m (mm0, *dest); - dest += stride; - } while (--height); -} - -static inline void MC_avg1_16 (int height, uint8_t * dest, const uint8_t * ref, - const int stride, const int cpu) -{ - do { - movq_m2r (*ref, mm0); - movq_m2r (*(ref+8), mm1); - pavg_m2r (*dest, mm0); - pavg_m2r (*(dest+8), mm1); - movq_r2m (mm0, *dest); - ref += stride; - movq_r2m (mm1, *(dest+8)); - dest += stride; - } while (--height); -} - -static inline void MC_put2_8 (int height, uint8_t * dest, const uint8_t * ref, - const int stride, const int offset, - const int cpu) -{ - do { - movq_m2r (*ref, mm0); - pavg_m2r (*(ref+offset), mm0); - ref += stride; - movq_r2m (mm0, *dest); - dest += stride; - } while (--height); -} - -static inline void MC_put2_16 (int height, uint8_t * dest, const uint8_t * ref, - const int stride, const int offset, - const int cpu) -{ - do { - movq_m2r (*ref, mm0); - movq_m2r (*(ref+8), mm1); - pavg_m2r (*(ref+offset), mm0); - pavg_m2r (*(ref+offset+8), mm1); - movq_r2m (mm0, *dest); - ref += stride; - movq_r2m (mm1, *(dest+8)); - dest += stride; - } while (--height); -} - -static inline void MC_avg2_8 (int height, uint8_t * dest, const uint8_t * ref, - const int stride, const int offset, - const int cpu) -{ - do { - movq_m2r (*ref, mm0); - pavg_m2r (*(ref+offset), mm0); - pavg_m2r (*dest, mm0); - ref += stride; - movq_r2m (mm0, *dest); - dest += stride; - } while (--height); -} - -static inline void MC_avg2_16 (int height, uint8_t * dest, const uint8_t * ref, - const int stride, const int offset, - const int cpu) -{ - do { - movq_m2r (*ref, mm0); - movq_m2r (*(ref+8), mm1); - pavg_m2r (*(ref+offset), mm0); - pavg_m2r (*(ref+offset+8), mm1); - pavg_m2r (*dest, mm0); - pavg_m2r (*(dest+8), mm1); - ref += stride; - movq_r2m (mm0, *dest); - movq_r2m (mm1, *(dest+8)); - dest += stride; - } while (--height); -} - -static mmx_t mask_one = {0x0101010101010101LL}; - -static inline void MC_put4_8 (int height, uint8_t * dest, const uint8_t * ref, - const int stride, const int cpu) -{ - movq_m2r (*ref, mm0); - movq_m2r (*(ref+1), mm1); - movq_r2r (mm0, mm7); - pxor_r2r (mm1, mm7); - pavg_r2r (mm1, mm0); - ref += stride; - - do { - movq_m2r (*ref, mm2); - movq_r2r (mm0, mm5); - - movq_m2r (*(ref+1), mm3); - movq_r2r (mm2, mm6); - - pxor_r2r (mm3, mm6); - pavg_r2r (mm3, mm2); - - por_r2r (mm6, mm7); - pxor_r2r (mm2, mm5); - - pand_r2r (mm5, mm7); - pavg_r2r (mm2, mm0); - - pand_m2r (mask_one, mm7); - - psubusb_r2r (mm7, mm0); - - ref += stride; - movq_r2m (mm0, *dest); - dest += stride; - - movq_r2r (mm6, mm7); /* unroll ! */ - movq_r2r (mm2, mm0); /* unroll ! */ - } while (--height); -} - -static inline void MC_put4_16 (int height, uint8_t * dest, const uint8_t * ref, - const int stride, const int cpu) -{ - do { - movq_m2r (*ref, mm0); - movq_m2r (*(ref+stride+1), mm1); - movq_r2r (mm0, mm7); - movq_m2r (*(ref+1), mm2); - pxor_r2r (mm1, mm7); - movq_m2r (*(ref+stride), mm3); - movq_r2r (mm2, mm6); - pxor_r2r (mm3, mm6); - pavg_r2r (mm1, mm0); - pavg_r2r (mm3, mm2); - por_r2r (mm6, mm7); - movq_r2r (mm0, mm6); - pxor_r2r (mm2, mm6); - pand_r2r (mm6, mm7); - pand_m2r (mask_one, mm7); - pavg_r2r (mm2, mm0); - psubusb_r2r (mm7, mm0); - movq_r2m (mm0, *dest); - - movq_m2r (*(ref+8), mm0); - movq_m2r (*(ref+stride+9), mm1); - movq_r2r (mm0, mm7); - movq_m2r (*(ref+9), mm2); - pxor_r2r (mm1, mm7); - movq_m2r (*(ref+stride+8), mm3); - movq_r2r (mm2, mm6); - pxor_r2r (mm3, mm6); - pavg_r2r (mm1, mm0); - pavg_r2r (mm3, mm2); - por_r2r (mm6, mm7); - movq_r2r (mm0, mm6); - pxor_r2r (mm2, mm6); - pand_r2r (mm6, mm7); - pand_m2r (mask_one, mm7); - pavg_r2r (mm2, mm0); - psubusb_r2r (mm7, mm0); - ref += stride; - movq_r2m (mm0, *(dest+8)); - dest += stride; - } while (--height); -} - -static inline void MC_avg4_8 (int height, uint8_t * dest, const uint8_t * ref, - const int stride, const int cpu) -{ - do { - movq_m2r (*ref, mm0); - movq_m2r (*(ref+stride+1), mm1); - movq_r2r (mm0, mm7); - movq_m2r (*(ref+1), mm2); - pxor_r2r (mm1, mm7); - movq_m2r (*(ref+stride), mm3); - movq_r2r (mm2, mm6); - pxor_r2r (mm3, mm6); - pavg_r2r (mm1, mm0); - pavg_r2r (mm3, mm2); - por_r2r (mm6, mm7); - movq_r2r (mm0, mm6); - pxor_r2r (mm2, mm6); - pand_r2r (mm6, mm7); - pand_m2r (mask_one, mm7); - pavg_r2r (mm2, mm0); - psubusb_r2r (mm7, mm0); - movq_m2r (*dest, mm1); - pavg_r2r (mm1, mm0); - ref += stride; - movq_r2m (mm0, *dest); - dest += stride; - } while (--height); -} - -static inline void MC_avg4_16 (int height, uint8_t * dest, const uint8_t * ref, - const int stride, const int cpu) -{ - do { - movq_m2r (*ref, mm0); - movq_m2r (*(ref+stride+1), mm1); - movq_r2r (mm0, mm7); - movq_m2r (*(ref+1), mm2); - pxor_r2r (mm1, mm7); - movq_m2r (*(ref+stride), mm3); - movq_r2r (mm2, mm6); - pxor_r2r (mm3, mm6); - pavg_r2r (mm1, mm0); - pavg_r2r (mm3, mm2); - por_r2r (mm6, mm7); - movq_r2r (mm0, mm6); - pxor_r2r (mm2, mm6); - pand_r2r (mm6, mm7); - pand_m2r (mask_one, mm7); - pavg_r2r (mm2, mm0); - psubusb_r2r (mm7, mm0); - movq_m2r (*dest, mm1); - pavg_r2r (mm1, mm0); - movq_r2m (mm0, *dest); - - movq_m2r (*(ref+8), mm0); - movq_m2r (*(ref+stride+9), mm1); - movq_r2r (mm0, mm7); - movq_m2r (*(ref+9), mm2); - pxor_r2r (mm1, mm7); - movq_m2r (*(ref+stride+8), mm3); - movq_r2r (mm2, mm6); - pxor_r2r (mm3, mm6); - pavg_r2r (mm1, mm0); - pavg_r2r (mm3, mm2); - por_r2r (mm6, mm7); - movq_r2r (mm0, mm6); - pxor_r2r (mm2, mm6); - pand_r2r (mm6, mm7); - pand_m2r (mask_one, mm7); - pavg_r2r (mm2, mm0); - psubusb_r2r (mm7, mm0); - movq_m2r (*(dest+8), mm1); - pavg_r2r (mm1, mm0); - ref += stride; - movq_r2m (mm0, *(dest+8)); - dest += stride; - } while (--height); -} - -static void MC_avg_o_16_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT); -} - -static void MC_avg_o_8_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT); -} - -static void MC_put_o_16_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put1_16 (height, dest, ref, stride); -} - -static void MC_put_o_8_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put1_8 (height, dest, ref, stride); -} - -static void MC_avg_x_16_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); -} - -static void MC_avg_x_8_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); -} - -static void MC_put_x_16_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); -} - -static void MC_put_x_8_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); -} - -static void MC_avg_y_16_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); -} - -static void MC_avg_y_8_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); -} - -static void MC_put_y_16_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); -} - -static void MC_put_y_8_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); -} - -static void MC_avg_xy_16_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT); -} - -static void MC_avg_xy_8_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT); -} - -static void MC_put_xy_16_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT); -} - -static void MC_put_xy_8_mmxext (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT); -} - - -MPEG2_MC_EXTERN (mmxext) - - - -static void MC_avg_o_16_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW); -} - -static void MC_avg_o_8_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW); -} - -static void MC_put_o_16_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put1_16 (height, dest, ref, stride); -} - -static void MC_put_o_8_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put1_8 (height, dest, ref, stride); -} - -static void MC_avg_x_16_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW); -} - -static void MC_avg_x_8_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW); -} - -static void MC_put_x_16_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW); -} - -static void MC_put_x_8_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW); -} - -static void MC_avg_y_16_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW); -} - -static void MC_avg_y_8_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW); -} - -static void MC_put_y_16_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW); -} - -static void MC_put_y_8_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW); -} - -static void MC_avg_xy_16_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW); -} - -static void MC_avg_xy_8_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW); -} - -static void MC_put_xy_16_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put4_16 (height, dest, ref, stride, CPU_3DNOW); -} - -static void MC_put_xy_8_3dnow (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - MC_put4_8 (height, dest, ref, stride, CPU_3DNOW); -} - - -MPEG2_MC_EXTERN (3dnow) - -#endif diff --git a/src/libmpeg2new/libmpeg2/motion_comp_vis.c b/src/libmpeg2new/libmpeg2/motion_comp_vis.c deleted file mode 100644 index e724d28a2..000000000 --- a/src/libmpeg2new/libmpeg2/motion_comp_vis.c +++ /dev/null @@ -1,2061 +0,0 @@ -/* - * motion_comp_vis.c - * Copyright (C) 2003 David S. Miller - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef ARCH_SPARC - -#include - -#include "mpeg2.h" -#include -#include "mpeg2_internal.h" -#include "vis.h" - -/* The trick used in some of this file is the formula from the MMX - * motion comp code, which is: - * - * (x+y+1)>>1 == (x|y)-((x^y)>>1) - * - * This allows us to average 8 bytes at a time in a 64-bit FPU reg. - * We avoid overflows by masking before we do the shift, and we - * implement the shift by multiplying by 1/2 using mul8x16. So in - * VIS this is (assume 'x' is in f0, 'y' is in f2, a repeating mask - * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and - * the value 0x80808080 is in f8): - * - * fxor f0, f2, f10 - * fand f10, f4, f10 - * fmul8x16 f8, f10, f10 - * fand f10, f6, f10 - * for f0, f2, f12 - * fpsub16 f12, f10, f10 - */ - -#define DUP4(x) {x, x, x, x} -#define DUP8(x) {x, x, x, x, x, x, x, x} -static const int16_t constants1[] ATTR_ALIGN(8) = DUP4 (1); -static const int16_t constants2[] ATTR_ALIGN(8) = DUP4 (2); -static const int16_t constants3[] ATTR_ALIGN(8) = DUP4 (3); -static const int16_t constants6[] ATTR_ALIGN(8) = DUP4 (6); -static const int8_t constants_fe[] ATTR_ALIGN(8) = DUP8 (0xfe); -static const int8_t constants_7f[] ATTR_ALIGN(8) = DUP8 (0x7f); -static const int8_t constants128[] ATTR_ALIGN(8) = DUP8 (128); -static const int16_t constants256_512[] ATTR_ALIGN(8) = - {256, 512, 256, 512}; -static const int16_t constants256_1024[] ATTR_ALIGN(8) = - {256, 1024, 256, 1024}; - -#define REF_0 0 -#define REF_0_1 1 -#define REF_2 2 -#define REF_2_1 3 -#define REF_4 4 -#define REF_4_1 5 -#define REF_6 6 -#define REF_6_1 7 -#define REF_S0 8 -#define REF_S0_1 9 -#define REF_S2 10 -#define REF_S2_1 11 -#define REF_S4 12 -#define REF_S4_1 13 -#define REF_S6 14 -#define REF_S6_1 15 -#define DST_0 16 -#define DST_1 17 -#define DST_2 18 -#define DST_3 19 -#define CONST_1 20 -#define CONST_2 20 -#define CONST_3 20 -#define CONST_6 20 -#define MASK_fe 20 -#define CONST_128 22 -#define CONST_256 22 -#define CONST_512 22 -#define CONST_1024 22 -#define TMP0 24 -#define TMP1 25 -#define TMP2 26 -#define TMP3 27 -#define TMP4 28 -#define TMP5 29 -#define ZERO 30 -#define MASK_7f 30 - -#define TMP6 32 -#define TMP8 34 -#define TMP10 36 -#define TMP12 38 -#define TMP14 40 -#define TMP16 42 -#define TMP18 44 -#define TMP20 46 -#define TMP22 48 -#define TMP24 50 -#define TMP26 52 -#define TMP28 54 -#define TMP30 56 -#define TMP32 58 - -static void MC_put_o_16_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int offset; - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 16 : 0; - do { /* 5 cycles */ - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, 8, TMP2); - - vis_ld64_2(ref, offset, TMP4); - ref += stride; - - vis_faligndata(TMP0, TMP2, REF_0); - vis_st64(REF_0, dest[0]); - - vis_faligndata(TMP2, TMP4, REF_2); - vis_st64_2(REF_2, dest, 8); - dest += stride; - } while (--height); -} - -static void MC_put_o_8_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int offset; - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 8 : 0; - do { /* 4 cycles */ - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, offset, TMP2); - ref += stride; - - /* stall */ - - vis_faligndata(TMP0, TMP2, REF_0); - vis_st64(REF_0, dest[0]); - dest += stride; - } while (--height); -} - - -static void MC_avg_o_16_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int stride_8 = stride + 8; - int offset; - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 16 : 0; - - vis_ld64(ref[0], TMP0); - - vis_ld64(ref[8], TMP2); - - vis_ld64_2(ref, offset, TMP4); - - vis_ld64(dest[0], DST_0); - - vis_ld64(dest[8], DST_2); - - vis_ld64(constants_fe[0], MASK_fe); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64(constants_7f[0], MASK_7f); - vis_faligndata(TMP2, TMP4, REF_2); - - vis_ld64(constants128[0], CONST_128); - - ref += stride; - height = (height >> 1) - 1; - - do { /* 24 cycles */ - vis_ld64(ref[0], TMP0); - vis_xor(DST_0, REF_0, TMP6); - - vis_ld64_2(ref, 8, TMP2); - vis_and(TMP6, MASK_fe, TMP6); - - vis_ld64_2(ref, offset, TMP4); - ref += stride; - vis_mul8x16(CONST_128, TMP6, TMP6); - vis_xor(DST_2, REF_2, TMP8); - - vis_and(TMP8, MASK_fe, TMP8); - - vis_or(DST_0, REF_0, TMP10); - vis_ld64_2(dest, stride, DST_0); - vis_mul8x16(CONST_128, TMP8, TMP8); - - vis_or(DST_2, REF_2, TMP12); - vis_ld64_2(dest, stride_8, DST_2); - - vis_ld64(ref[0], TMP14); - vis_and(TMP6, MASK_7f, TMP6); - - vis_and(TMP8, MASK_7f, TMP8); - - vis_psub16(TMP10, TMP6, TMP6); - vis_st64(TMP6, dest[0]); - - vis_psub16(TMP12, TMP8, TMP8); - vis_st64_2(TMP8, dest, 8); - - dest += stride; - vis_ld64_2(ref, 8, TMP16); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, offset, TMP18); - vis_faligndata(TMP2, TMP4, REF_2); - ref += stride; - - vis_xor(DST_0, REF_0, TMP20); - - vis_and(TMP20, MASK_fe, TMP20); - - vis_xor(DST_2, REF_2, TMP22); - vis_mul8x16(CONST_128, TMP20, TMP20); - - vis_and(TMP22, MASK_fe, TMP22); - - vis_or(DST_0, REF_0, TMP24); - vis_mul8x16(CONST_128, TMP22, TMP22); - - vis_or(DST_2, REF_2, TMP26); - - vis_ld64_2(dest, stride, DST_0); - vis_faligndata(TMP14, TMP16, REF_0); - - vis_ld64_2(dest, stride_8, DST_2); - vis_faligndata(TMP16, TMP18, REF_2); - - vis_and(TMP20, MASK_7f, TMP20); - - vis_and(TMP22, MASK_7f, TMP22); - - vis_psub16(TMP24, TMP20, TMP20); - vis_st64(TMP20, dest[0]); - - vis_psub16(TMP26, TMP22, TMP22); - vis_st64_2(TMP22, dest, 8); - dest += stride; - } while (--height); - - vis_ld64(ref[0], TMP0); - vis_xor(DST_0, REF_0, TMP6); - - vis_ld64_2(ref, 8, TMP2); - vis_and(TMP6, MASK_fe, TMP6); - - vis_ld64_2(ref, offset, TMP4); - vis_mul8x16(CONST_128, TMP6, TMP6); - vis_xor(DST_2, REF_2, TMP8); - - vis_and(TMP8, MASK_fe, TMP8); - - vis_or(DST_0, REF_0, TMP10); - vis_ld64_2(dest, stride, DST_0); - vis_mul8x16(CONST_128, TMP8, TMP8); - - vis_or(DST_2, REF_2, TMP12); - vis_ld64_2(dest, stride_8, DST_2); - - vis_ld64(ref[0], TMP14); - vis_and(TMP6, MASK_7f, TMP6); - - vis_and(TMP8, MASK_7f, TMP8); - - vis_psub16(TMP10, TMP6, TMP6); - vis_st64(TMP6, dest[0]); - - vis_psub16(TMP12, TMP8, TMP8); - vis_st64_2(TMP8, dest, 8); - - dest += stride; - vis_faligndata(TMP0, TMP2, REF_0); - - vis_faligndata(TMP2, TMP4, REF_2); - - vis_xor(DST_0, REF_0, TMP20); - - vis_and(TMP20, MASK_fe, TMP20); - - vis_xor(DST_2, REF_2, TMP22); - vis_mul8x16(CONST_128, TMP20, TMP20); - - vis_and(TMP22, MASK_fe, TMP22); - - vis_or(DST_0, REF_0, TMP24); - vis_mul8x16(CONST_128, TMP22, TMP22); - - vis_or(DST_2, REF_2, TMP26); - - vis_and(TMP20, MASK_7f, TMP20); - - vis_and(TMP22, MASK_7f, TMP22); - - vis_psub16(TMP24, TMP20, TMP20); - vis_st64(TMP20, dest[0]); - - vis_psub16(TMP26, TMP22, TMP22); - vis_st64_2(TMP22, dest, 8); -} - -static void MC_avg_o_8_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int offset; - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 8 : 0; - - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, offset, TMP2); - - vis_ld64(dest[0], DST_0); - - vis_ld64(constants_fe[0], MASK_fe); - - vis_ld64(constants_7f[0], MASK_7f); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64(constants128[0], CONST_128); - - ref += stride; - height = (height >> 1) - 1; - - do { /* 12 cycles */ - vis_ld64(ref[0], TMP0); - vis_xor(DST_0, REF_0, TMP4); - - vis_ld64_2(ref, offset, TMP2); - vis_and(TMP4, MASK_fe, TMP4); - - vis_or(DST_0, REF_0, TMP6); - vis_ld64_2(dest, stride, DST_0); - ref += stride; - vis_mul8x16(CONST_128, TMP4, TMP4); - - vis_ld64(ref[0], TMP12); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, offset, TMP2); - vis_xor(DST_0, REF_0, TMP0); - ref += stride; - - vis_and(TMP0, MASK_fe, TMP0); - - vis_and(TMP4, MASK_7f, TMP4); - - vis_psub16(TMP6, TMP4, TMP4); - vis_st64(TMP4, dest[0]); - dest += stride; - vis_mul8x16(CONST_128, TMP0, TMP0); - - vis_or(DST_0, REF_0, TMP6); - vis_ld64_2(dest, stride, DST_0); - - vis_faligndata(TMP12, TMP2, REF_0); - - vis_and(TMP0, MASK_7f, TMP0); - - vis_psub16(TMP6, TMP0, TMP4); - vis_st64(TMP4, dest[0]); - dest += stride; - } while (--height); - - vis_ld64(ref[0], TMP0); - vis_xor(DST_0, REF_0, TMP4); - - vis_ld64_2(ref, offset, TMP2); - vis_and(TMP4, MASK_fe, TMP4); - - vis_or(DST_0, REF_0, TMP6); - vis_ld64_2(dest, stride, DST_0); - vis_mul8x16(CONST_128, TMP4, TMP4); - - vis_faligndata(TMP0, TMP2, REF_0); - - vis_xor(DST_0, REF_0, TMP0); - - vis_and(TMP0, MASK_fe, TMP0); - - vis_and(TMP4, MASK_7f, TMP4); - - vis_psub16(TMP6, TMP4, TMP4); - vis_st64(TMP4, dest[0]); - dest += stride; - vis_mul8x16(CONST_128, TMP0, TMP0); - - vis_or(DST_0, REF_0, TMP6); - - vis_and(TMP0, MASK_7f, TMP0); - - vis_psub16(TMP6, TMP0, TMP4); - vis_st64(TMP4, dest[0]); -} - -static void MC_put_x_16_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - - ref = vis_alignaddr(ref); - - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, 8, TMP2); - - vis_ld64_2(ref, 16, TMP4); - - vis_ld64(constants_fe[0], MASK_fe); - - vis_ld64(constants_7f[0], MASK_7f); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64(constants128[0], CONST_128); - vis_faligndata(TMP2, TMP4, REF_4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - vis_faligndata(TMP2, TMP4, REF_6); - } else { - vis_src1(TMP2, REF_2); - vis_src1(TMP4, REF_6); - } - - ref += stride; - height = (height >> 1) - 1; - - do { /* 34 cycles */ - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP6); - - vis_ld64_2(ref, 8, TMP2); - vis_xor(REF_4, REF_6, TMP8); - - vis_ld64_2(ref, 16, TMP4); - vis_and(TMP6, MASK_fe, TMP6); - ref += stride; - - vis_ld64(ref[0], TMP14); - vis_mul8x16(CONST_128, TMP6, TMP6); - vis_and(TMP8, MASK_fe, TMP8); - - vis_ld64_2(ref, 8, TMP16); - vis_mul8x16(CONST_128, TMP8, TMP8); - vis_or(REF_0, REF_2, TMP10); - - vis_ld64_2(ref, 16, TMP18); - ref += stride; - vis_or(REF_4, REF_6, TMP12); - - vis_alignaddr_g0((void *)off); - - vis_faligndata(TMP0, TMP2, REF_0); - - vis_faligndata(TMP2, TMP4, REF_4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - vis_faligndata(TMP2, TMP4, REF_6); - } else { - vis_src1(TMP2, REF_2); - vis_src1(TMP4, REF_6); - } - - vis_and(TMP6, MASK_7f, TMP6); - - vis_and(TMP8, MASK_7f, TMP8); - - vis_psub16(TMP10, TMP6, TMP6); - vis_st64(TMP6, dest[0]); - - vis_psub16(TMP12, TMP8, TMP8); - vis_st64_2(TMP8, dest, 8); - dest += stride; - - vis_xor(REF_0, REF_2, TMP6); - - vis_xor(REF_4, REF_6, TMP8); - - vis_and(TMP6, MASK_fe, TMP6); - - vis_mul8x16(CONST_128, TMP6, TMP6); - vis_and(TMP8, MASK_fe, TMP8); - - vis_mul8x16(CONST_128, TMP8, TMP8); - vis_or(REF_0, REF_2, TMP10); - - vis_or(REF_4, REF_6, TMP12); - - vis_alignaddr_g0((void *)off); - - vis_faligndata(TMP14, TMP16, REF_0); - - vis_faligndata(TMP16, TMP18, REF_4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP14, TMP16, REF_2); - vis_faligndata(TMP16, TMP18, REF_6); - } else { - vis_src1(TMP16, REF_2); - vis_src1(TMP18, REF_6); - } - - vis_and(TMP6, MASK_7f, TMP6); - - vis_and(TMP8, MASK_7f, TMP8); - - vis_psub16(TMP10, TMP6, TMP6); - vis_st64(TMP6, dest[0]); - - vis_psub16(TMP12, TMP8, TMP8); - vis_st64_2(TMP8, dest, 8); - dest += stride; - } while (--height); - - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP6); - - vis_ld64_2(ref, 8, TMP2); - vis_xor(REF_4, REF_6, TMP8); - - vis_ld64_2(ref, 16, TMP4); - vis_and(TMP6, MASK_fe, TMP6); - - vis_mul8x16(CONST_128, TMP6, TMP6); - vis_and(TMP8, MASK_fe, TMP8); - - vis_mul8x16(CONST_128, TMP8, TMP8); - vis_or(REF_0, REF_2, TMP10); - - vis_or(REF_4, REF_6, TMP12); - - vis_alignaddr_g0((void *)off); - - vis_faligndata(TMP0, TMP2, REF_0); - - vis_faligndata(TMP2, TMP4, REF_4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - vis_faligndata(TMP2, TMP4, REF_6); - } else { - vis_src1(TMP2, REF_2); - vis_src1(TMP4, REF_6); - } - - vis_and(TMP6, MASK_7f, TMP6); - - vis_and(TMP8, MASK_7f, TMP8); - - vis_psub16(TMP10, TMP6, TMP6); - vis_st64(TMP6, dest[0]); - - vis_psub16(TMP12, TMP8, TMP8); - vis_st64_2(TMP8, dest, 8); - dest += stride; - - vis_xor(REF_0, REF_2, TMP6); - - vis_xor(REF_4, REF_6, TMP8); - - vis_and(TMP6, MASK_fe, TMP6); - - vis_mul8x16(CONST_128, TMP6, TMP6); - vis_and(TMP8, MASK_fe, TMP8); - - vis_mul8x16(CONST_128, TMP8, TMP8); - vis_or(REF_0, REF_2, TMP10); - - vis_or(REF_4, REF_6, TMP12); - - vis_and(TMP6, MASK_7f, TMP6); - - vis_and(TMP8, MASK_7f, TMP8); - - vis_psub16(TMP10, TMP6, TMP6); - vis_st64(TMP6, dest[0]); - - vis_psub16(TMP12, TMP8, TMP8); - vis_st64_2(TMP8, dest, 8); -} - -static void MC_put_x_8_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - - ref = vis_alignaddr(ref); - - vis_ld64(ref[0], TMP0); - - vis_ld64(ref[8], TMP2); - - vis_ld64(constants_fe[0], MASK_fe); - - vis_ld64(constants_7f[0], MASK_7f); - - vis_ld64(constants128[0], CONST_128); - vis_faligndata(TMP0, TMP2, REF_0); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - } else { - vis_src1(TMP2, REF_2); - } - - ref += stride; - height = (height >> 1) - 1; - - do { /* 20 cycles */ - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP4); - - vis_ld64_2(ref, 8, TMP2); - vis_and(TMP4, MASK_fe, TMP4); - ref += stride; - - vis_ld64(ref[0], TMP8); - vis_or(REF_0, REF_2, TMP6); - vis_mul8x16(CONST_128, TMP4, TMP4); - - vis_alignaddr_g0((void *)off); - - vis_ld64_2(ref, 8, TMP10); - ref += stride; - vis_faligndata(TMP0, TMP2, REF_0); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - } else { - vis_src1(TMP2, REF_2); - } - - vis_and(TMP4, MASK_7f, TMP4); - - vis_psub16(TMP6, TMP4, DST_0); - vis_st64(DST_0, dest[0]); - dest += stride; - - vis_xor(REF_0, REF_2, TMP12); - - vis_and(TMP12, MASK_fe, TMP12); - - vis_or(REF_0, REF_2, TMP14); - vis_mul8x16(CONST_128, TMP12, TMP12); - - vis_alignaddr_g0((void *)off); - vis_faligndata(TMP8, TMP10, REF_0); - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP8, TMP10, REF_2); - } else { - vis_src1(TMP10, REF_2); - } - - vis_and(TMP12, MASK_7f, TMP12); - - vis_psub16(TMP14, TMP12, DST_0); - vis_st64(DST_0, dest[0]); - dest += stride; - } while (--height); - - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP4); - - vis_ld64_2(ref, 8, TMP2); - vis_and(TMP4, MASK_fe, TMP4); - - vis_or(REF_0, REF_2, TMP6); - vis_mul8x16(CONST_128, TMP4, TMP4); - - vis_alignaddr_g0((void *)off); - - vis_faligndata(TMP0, TMP2, REF_0); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - } else { - vis_src1(TMP2, REF_2); - } - - vis_and(TMP4, MASK_7f, TMP4); - - vis_psub16(TMP6, TMP4, DST_0); - vis_st64(DST_0, dest[0]); - dest += stride; - - vis_xor(REF_0, REF_2, TMP12); - - vis_and(TMP12, MASK_fe, TMP12); - - vis_or(REF_0, REF_2, TMP14); - vis_mul8x16(CONST_128, TMP12, TMP12); - - vis_and(TMP12, MASK_7f, TMP12); - - vis_psub16(TMP14, TMP12, DST_0); - vis_st64(DST_0, dest[0]); - dest += stride; -} - -static void MC_avg_x_16_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - - vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); - - vis_ld64(constants3[0], CONST_3); - vis_fzero(ZERO); - vis_ld64(constants256_512[0], CONST_256); - - ref = vis_alignaddr(ref); - do { /* 26 cycles */ - vis_ld64(ref[0], TMP0); - - vis_ld64(ref[8], TMP2); - - vis_alignaddr_g0((void *)off); - - vis_ld64(ref[16], TMP4); - - vis_ld64(dest[0], DST_0); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64(dest[8], DST_2); - vis_faligndata(TMP2, TMP4, REF_4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - vis_faligndata(TMP2, TMP4, REF_6); - } else { - vis_src1(TMP2, REF_2); - vis_src1(TMP4, REF_6); - } - - vis_mul8x16au(REF_0, CONST_256, TMP0); - - vis_pmerge(ZERO, REF_2, TMP4); - vis_mul8x16au(REF_0_1, CONST_256, TMP2); - - vis_pmerge(ZERO, REF_2_1, TMP6); - - vis_padd16(TMP0, TMP4, TMP0); - - vis_mul8x16al(DST_0, CONST_512, TMP4); - vis_padd16(TMP2, TMP6, TMP2); - - vis_mul8x16al(DST_1, CONST_512, TMP6); - - vis_mul8x16au(REF_6, CONST_256, TMP12); - - vis_padd16(TMP0, TMP4, TMP0); - vis_mul8x16au(REF_6_1, CONST_256, TMP14); - - vis_padd16(TMP2, TMP6, TMP2); - vis_mul8x16au(REF_4, CONST_256, TMP16); - - vis_padd16(TMP0, CONST_3, TMP8); - vis_mul8x16au(REF_4_1, CONST_256, TMP18); - - vis_padd16(TMP2, CONST_3, TMP10); - vis_pack16(TMP8, DST_0); - - vis_pack16(TMP10, DST_1); - vis_padd16(TMP16, TMP12, TMP0); - - vis_st64(DST_0, dest[0]); - vis_mul8x16al(DST_2, CONST_512, TMP4); - vis_padd16(TMP18, TMP14, TMP2); - - vis_mul8x16al(DST_3, CONST_512, TMP6); - vis_padd16(TMP0, CONST_3, TMP0); - - vis_padd16(TMP2, CONST_3, TMP2); - - vis_padd16(TMP0, TMP4, TMP0); - - vis_padd16(TMP2, TMP6, TMP2); - vis_pack16(TMP0, DST_2); - - vis_pack16(TMP2, DST_3); - vis_st64(DST_2, dest[8]); - - ref += stride; - dest += stride; - } while (--height); -} - -static void MC_avg_x_8_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - int stride_times_2 = stride << 1; - - vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); - - vis_ld64(constants3[0], CONST_3); - vis_fzero(ZERO); - vis_ld64(constants256_512[0], CONST_256); - - ref = vis_alignaddr(ref); - height >>= 2; - do { /* 47 cycles */ - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, 8, TMP2); - ref += stride; - - vis_alignaddr_g0((void *)off); - - vis_ld64(ref[0], TMP4); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, 8, TMP6); - ref += stride; - - vis_ld64(ref[0], TMP8); - - vis_ld64_2(ref, 8, TMP10); - ref += stride; - vis_faligndata(TMP4, TMP6, REF_4); - - vis_ld64(ref[0], TMP12); - - vis_ld64_2(ref, 8, TMP14); - ref += stride; - vis_faligndata(TMP8, TMP10, REF_S0); - - vis_faligndata(TMP12, TMP14, REF_S4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - - vis_ld64(dest[0], DST_0); - vis_faligndata(TMP0, TMP2, REF_2); - - vis_ld64_2(dest, stride, DST_2); - vis_faligndata(TMP4, TMP6, REF_6); - - vis_faligndata(TMP8, TMP10, REF_S2); - - vis_faligndata(TMP12, TMP14, REF_S6); - } else { - vis_ld64(dest[0], DST_0); - vis_src1(TMP2, REF_2); - - vis_ld64_2(dest, stride, DST_2); - vis_src1(TMP6, REF_6); - - vis_src1(TMP10, REF_S2); - - vis_src1(TMP14, REF_S6); - } - - vis_pmerge(ZERO, REF_0, TMP0); - vis_mul8x16au(REF_0_1, CONST_256, TMP2); - - vis_pmerge(ZERO, REF_2, TMP4); - vis_mul8x16au(REF_2_1, CONST_256, TMP6); - - vis_padd16(TMP0, CONST_3, TMP0); - vis_mul8x16al(DST_0, CONST_512, TMP16); - - vis_padd16(TMP2, CONST_3, TMP2); - vis_mul8x16al(DST_1, CONST_512, TMP18); - - vis_padd16(TMP0, TMP4, TMP0); - vis_mul8x16au(REF_4, CONST_256, TMP8); - - vis_padd16(TMP2, TMP6, TMP2); - vis_mul8x16au(REF_4_1, CONST_256, TMP10); - - vis_padd16(TMP0, TMP16, TMP0); - vis_mul8x16au(REF_6, CONST_256, TMP12); - - vis_padd16(TMP2, TMP18, TMP2); - vis_mul8x16au(REF_6_1, CONST_256, TMP14); - - vis_padd16(TMP8, CONST_3, TMP8); - vis_mul8x16al(DST_2, CONST_512, TMP16); - - vis_padd16(TMP8, TMP12, TMP8); - vis_mul8x16al(DST_3, CONST_512, TMP18); - - vis_padd16(TMP10, TMP14, TMP10); - vis_pack16(TMP0, DST_0); - - vis_pack16(TMP2, DST_1); - vis_st64(DST_0, dest[0]); - dest += stride; - vis_padd16(TMP10, CONST_3, TMP10); - - vis_ld64_2(dest, stride, DST_0); - vis_padd16(TMP8, TMP16, TMP8); - - vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/); - vis_padd16(TMP10, TMP18, TMP10); - vis_pack16(TMP8, DST_2); - - vis_pack16(TMP10, DST_3); - vis_st64(DST_2, dest[0]); - dest += stride; - - vis_mul8x16au(REF_S0_1, CONST_256, TMP2); - vis_pmerge(ZERO, REF_S0, TMP0); - - vis_pmerge(ZERO, REF_S2, TMP24); - vis_mul8x16au(REF_S2_1, CONST_256, TMP6); - - vis_padd16(TMP0, CONST_3, TMP0); - vis_mul8x16au(REF_S4, CONST_256, TMP8); - - vis_padd16(TMP2, CONST_3, TMP2); - vis_mul8x16au(REF_S4_1, CONST_256, TMP10); - - vis_padd16(TMP0, TMP24, TMP0); - vis_mul8x16au(REF_S6, CONST_256, TMP12); - - vis_padd16(TMP2, TMP6, TMP2); - vis_mul8x16au(REF_S6_1, CONST_256, TMP14); - - vis_padd16(TMP8, CONST_3, TMP8); - vis_mul8x16al(DST_0, CONST_512, TMP16); - - vis_padd16(TMP10, CONST_3, TMP10); - vis_mul8x16al(DST_1, CONST_512, TMP18); - - vis_padd16(TMP8, TMP12, TMP8); - vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20); - - vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22); - vis_padd16(TMP0, TMP16, TMP0); - - vis_padd16(TMP2, TMP18, TMP2); - vis_pack16(TMP0, DST_0); - - vis_padd16(TMP10, TMP14, TMP10); - vis_pack16(TMP2, DST_1); - vis_st64(DST_0, dest[0]); - dest += stride; - - vis_padd16(TMP8, TMP20, TMP8); - - vis_padd16(TMP10, TMP22, TMP10); - vis_pack16(TMP8, DST_2); - - vis_pack16(TMP10, DST_3); - vis_st64(DST_2, dest[0]); - dest += stride; - } while (--height); -} - -static void MC_put_y_16_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int offset; - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 16 : 0; - - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, 8, TMP2); - - vis_ld64_2(ref, offset, TMP4); - ref += stride; - - vis_ld64(ref[0], TMP6); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, 8, TMP8); - vis_faligndata(TMP2, TMP4, REF_4); - - vis_ld64_2(ref, offset, TMP10); - ref += stride; - - vis_ld64(constants_fe[0], MASK_fe); - vis_faligndata(TMP6, TMP8, REF_2); - - vis_ld64(constants_7f[0], MASK_7f); - vis_faligndata(TMP8, TMP10, REF_6); - - vis_ld64(constants128[0], CONST_128); - height = (height >> 1) - 1; - do { /* 24 cycles */ - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP12); - - vis_ld64_2(ref, 8, TMP2); - vis_xor(REF_4, REF_6, TMP16); - - vis_ld64_2(ref, offset, TMP4); - ref += stride; - vis_or(REF_0, REF_2, TMP14); - - vis_ld64(ref[0], TMP6); - vis_or(REF_4, REF_6, TMP18); - - vis_ld64_2(ref, 8, TMP8); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, offset, TMP10); - ref += stride; - vis_faligndata(TMP2, TMP4, REF_4); - - vis_and(TMP12, MASK_fe, TMP12); - - vis_and(TMP16, MASK_fe, TMP16); - vis_mul8x16(CONST_128, TMP12, TMP12); - - vis_mul8x16(CONST_128, TMP16, TMP16); - vis_xor(REF_0, REF_2, TMP0); - - vis_xor(REF_4, REF_6, TMP2); - - vis_or(REF_0, REF_2, TMP20); - - vis_and(TMP12, MASK_7f, TMP12); - - vis_and(TMP16, MASK_7f, TMP16); - - vis_psub16(TMP14, TMP12, TMP12); - vis_st64(TMP12, dest[0]); - - vis_psub16(TMP18, TMP16, TMP16); - vis_st64_2(TMP16, dest, 8); - dest += stride; - - vis_or(REF_4, REF_6, TMP18); - - vis_and(TMP0, MASK_fe, TMP0); - - vis_and(TMP2, MASK_fe, TMP2); - vis_mul8x16(CONST_128, TMP0, TMP0); - - vis_faligndata(TMP6, TMP8, REF_2); - vis_mul8x16(CONST_128, TMP2, TMP2); - - vis_faligndata(TMP8, TMP10, REF_6); - - vis_and(TMP0, MASK_7f, TMP0); - - vis_and(TMP2, MASK_7f, TMP2); - - vis_psub16(TMP20, TMP0, TMP0); - vis_st64(TMP0, dest[0]); - - vis_psub16(TMP18, TMP2, TMP2); - vis_st64_2(TMP2, dest, 8); - dest += stride; - } while (--height); - - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP12); - - vis_ld64_2(ref, 8, TMP2); - vis_xor(REF_4, REF_6, TMP16); - - vis_ld64_2(ref, offset, TMP4); - vis_or(REF_0, REF_2, TMP14); - - vis_or(REF_4, REF_6, TMP18); - - vis_faligndata(TMP0, TMP2, REF_0); - - vis_faligndata(TMP2, TMP4, REF_4); - - vis_and(TMP12, MASK_fe, TMP12); - - vis_and(TMP16, MASK_fe, TMP16); - vis_mul8x16(CONST_128, TMP12, TMP12); - - vis_mul8x16(CONST_128, TMP16, TMP16); - vis_xor(REF_0, REF_2, TMP0); - - vis_xor(REF_4, REF_6, TMP2); - - vis_or(REF_0, REF_2, TMP20); - - vis_and(TMP12, MASK_7f, TMP12); - - vis_and(TMP16, MASK_7f, TMP16); - - vis_psub16(TMP14, TMP12, TMP12); - vis_st64(TMP12, dest[0]); - - vis_psub16(TMP18, TMP16, TMP16); - vis_st64_2(TMP16, dest, 8); - dest += stride; - - vis_or(REF_4, REF_6, TMP18); - - vis_and(TMP0, MASK_fe, TMP0); - - vis_and(TMP2, MASK_fe, TMP2); - vis_mul8x16(CONST_128, TMP0, TMP0); - - vis_mul8x16(CONST_128, TMP2, TMP2); - - vis_and(TMP0, MASK_7f, TMP0); - - vis_and(TMP2, MASK_7f, TMP2); - - vis_psub16(TMP20, TMP0, TMP0); - vis_st64(TMP0, dest[0]); - - vis_psub16(TMP18, TMP2, TMP2); - vis_st64_2(TMP2, dest, 8); -} - -static void MC_put_y_8_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int offset; - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 8 : 0; - - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, offset, TMP2); - ref += stride; - - vis_ld64(ref[0], TMP4); - - vis_ld64_2(ref, offset, TMP6); - ref += stride; - - vis_ld64(constants_fe[0], MASK_fe); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64(constants_7f[0], MASK_7f); - vis_faligndata(TMP4, TMP6, REF_2); - - vis_ld64(constants128[0], CONST_128); - height = (height >> 1) - 1; - do { /* 12 cycles */ - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP4); - - vis_ld64_2(ref, offset, TMP2); - ref += stride; - vis_and(TMP4, MASK_fe, TMP4); - - vis_or(REF_0, REF_2, TMP6); - vis_mul8x16(CONST_128, TMP4, TMP4); - - vis_faligndata(TMP0, TMP2, REF_0); - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, offset, TMP2); - ref += stride; - vis_xor(REF_0, REF_2, TMP12); - - vis_and(TMP4, MASK_7f, TMP4); - - vis_and(TMP12, MASK_fe, TMP12); - - vis_mul8x16(CONST_128, TMP12, TMP12); - vis_or(REF_0, REF_2, TMP14); - - vis_psub16(TMP6, TMP4, DST_0); - vis_st64(DST_0, dest[0]); - dest += stride; - - vis_faligndata(TMP0, TMP2, REF_2); - - vis_and(TMP12, MASK_7f, TMP12); - - vis_psub16(TMP14, TMP12, DST_0); - vis_st64(DST_0, dest[0]); - dest += stride; - } while (--height); - - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP4); - - vis_ld64_2(ref, offset, TMP2); - vis_and(TMP4, MASK_fe, TMP4); - - vis_or(REF_0, REF_2, TMP6); - vis_mul8x16(CONST_128, TMP4, TMP4); - - vis_faligndata(TMP0, TMP2, REF_0); - - vis_xor(REF_0, REF_2, TMP12); - - vis_and(TMP4, MASK_7f, TMP4); - - vis_and(TMP12, MASK_fe, TMP12); - - vis_mul8x16(CONST_128, TMP12, TMP12); - vis_or(REF_0, REF_2, TMP14); - - vis_psub16(TMP6, TMP4, DST_0); - vis_st64(DST_0, dest[0]); - dest += stride; - - vis_and(TMP12, MASK_7f, TMP12); - - vis_psub16(TMP14, TMP12, DST_0); - vis_st64(DST_0, dest[0]); -} - -static void MC_avg_y_16_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int stride_8 = stride + 8; - int stride_16; - int offset; - - vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 16 : 0; - - vis_ld64(ref[ 0], TMP0); - vis_fzero(ZERO); - - vis_ld64(ref[ 8], TMP2); - - vis_ld64_2(ref, offset, TMP4); - stride_16 = stride + offset; - - vis_ld64(constants3[0], CONST_3); - vis_faligndata(TMP0, TMP2, REF_2); - - vis_ld64(constants256_512[0], CONST_256); - vis_faligndata(TMP2, TMP4, REF_6); - height >>= 1; - - do { /* 31 cycles */ - vis_ld64_2(ref, stride, TMP0); - vis_pmerge(ZERO, REF_2, TMP12); - vis_mul8x16au(REF_2_1, CONST_256, TMP14); - - vis_ld64_2(ref, stride_8, TMP2); - vis_pmerge(ZERO, REF_6, TMP16); - vis_mul8x16au(REF_6_1, CONST_256, TMP18); - - vis_ld64_2(ref, stride_16, TMP4); - ref += stride; - - vis_ld64(dest[0], DST_0); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(dest, 8, DST_2); - vis_faligndata(TMP2, TMP4, REF_4); - - vis_ld64_2(ref, stride, TMP6); - vis_pmerge(ZERO, REF_0, TMP0); - vis_mul8x16au(REF_0_1, CONST_256, TMP2); - - vis_ld64_2(ref, stride_8, TMP8); - vis_pmerge(ZERO, REF_4, TMP4); - - vis_ld64_2(ref, stride_16, TMP10); - ref += stride; - - vis_ld64_2(dest, stride, REF_S0/*DST_4*/); - vis_faligndata(TMP6, TMP8, REF_2); - vis_mul8x16au(REF_4_1, CONST_256, TMP6); - - vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/); - vis_faligndata(TMP8, TMP10, REF_6); - vis_mul8x16al(DST_0, CONST_512, TMP20); - - vis_padd16(TMP0, CONST_3, TMP0); - vis_mul8x16al(DST_1, CONST_512, TMP22); - - vis_padd16(TMP2, CONST_3, TMP2); - vis_mul8x16al(DST_2, CONST_512, TMP24); - - vis_padd16(TMP4, CONST_3, TMP4); - vis_mul8x16al(DST_3, CONST_512, TMP26); - - vis_padd16(TMP6, CONST_3, TMP6); - - vis_padd16(TMP12, TMP20, TMP12); - vis_mul8x16al(REF_S0, CONST_512, TMP20); - - vis_padd16(TMP14, TMP22, TMP14); - vis_mul8x16al(REF_S0_1, CONST_512, TMP22); - - vis_padd16(TMP16, TMP24, TMP16); - vis_mul8x16al(REF_S2, CONST_512, TMP24); - - vis_padd16(TMP18, TMP26, TMP18); - vis_mul8x16al(REF_S2_1, CONST_512, TMP26); - - vis_padd16(TMP12, TMP0, TMP12); - vis_mul8x16au(REF_2, CONST_256, TMP28); - - vis_padd16(TMP14, TMP2, TMP14); - vis_mul8x16au(REF_2_1, CONST_256, TMP30); - - vis_padd16(TMP16, TMP4, TMP16); - vis_mul8x16au(REF_6, CONST_256, REF_S4); - - vis_padd16(TMP18, TMP6, TMP18); - vis_mul8x16au(REF_6_1, CONST_256, REF_S6); - - vis_pack16(TMP12, DST_0); - vis_padd16(TMP28, TMP0, TMP12); - - vis_pack16(TMP14, DST_1); - vis_st64(DST_0, dest[0]); - vis_padd16(TMP30, TMP2, TMP14); - - vis_pack16(TMP16, DST_2); - vis_padd16(REF_S4, TMP4, TMP16); - - vis_pack16(TMP18, DST_3); - vis_st64_2(DST_2, dest, 8); - dest += stride; - vis_padd16(REF_S6, TMP6, TMP18); - - vis_padd16(TMP12, TMP20, TMP12); - - vis_padd16(TMP14, TMP22, TMP14); - vis_pack16(TMP12, DST_0); - - vis_padd16(TMP16, TMP24, TMP16); - vis_pack16(TMP14, DST_1); - vis_st64(DST_0, dest[0]); - - vis_padd16(TMP18, TMP26, TMP18); - vis_pack16(TMP16, DST_2); - - vis_pack16(TMP18, DST_3); - vis_st64_2(DST_2, dest, 8); - dest += stride; - } while (--height); -} - -static void MC_avg_y_8_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int stride_8; - int offset; - - vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 8 : 0; - - vis_ld64(ref[ 0], TMP0); - vis_fzero(ZERO); - - vis_ld64_2(ref, offset, TMP2); - stride_8 = stride + offset; - - vis_ld64(constants3[0], CONST_3); - vis_faligndata(TMP0, TMP2, REF_2); - - vis_ld64(constants256_512[0], CONST_256); - - height >>= 1; - do { /* 20 cycles */ - vis_ld64_2(ref, stride, TMP0); - vis_pmerge(ZERO, REF_2, TMP8); - vis_mul8x16au(REF_2_1, CONST_256, TMP10); - - vis_ld64_2(ref, stride_8, TMP2); - ref += stride; - - vis_ld64(dest[0], DST_0); - - vis_ld64_2(dest, stride, DST_2); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, stride, TMP4); - vis_mul8x16al(DST_0, CONST_512, TMP16); - vis_pmerge(ZERO, REF_0, TMP12); - - vis_ld64_2(ref, stride_8, TMP6); - ref += stride; - vis_mul8x16al(DST_1, CONST_512, TMP18); - vis_pmerge(ZERO, REF_0_1, TMP14); - - vis_padd16(TMP12, CONST_3, TMP12); - vis_mul8x16al(DST_2, CONST_512, TMP24); - - vis_padd16(TMP14, CONST_3, TMP14); - vis_mul8x16al(DST_3, CONST_512, TMP26); - - vis_faligndata(TMP4, TMP6, REF_2); - - vis_padd16(TMP8, TMP12, TMP8); - - vis_padd16(TMP10, TMP14, TMP10); - vis_mul8x16au(REF_2, CONST_256, TMP20); - - vis_padd16(TMP8, TMP16, TMP0); - vis_mul8x16au(REF_2_1, CONST_256, TMP22); - - vis_padd16(TMP10, TMP18, TMP2); - vis_pack16(TMP0, DST_0); - - vis_pack16(TMP2, DST_1); - vis_st64(DST_0, dest[0]); - dest += stride; - vis_padd16(TMP12, TMP20, TMP12); - - vis_padd16(TMP14, TMP22, TMP14); - - vis_padd16(TMP12, TMP24, TMP0); - - vis_padd16(TMP14, TMP26, TMP2); - vis_pack16(TMP0, DST_2); - - vis_pack16(TMP2, DST_3); - vis_st64(DST_2, dest[0]); - dest += stride; - } while (--height); -} - -static void MC_put_xy_16_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - int stride_8 = stride + 8; - int stride_16 = stride + 16; - - vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); - - ref = vis_alignaddr(ref); - - vis_ld64(ref[ 0], TMP0); - vis_fzero(ZERO); - - vis_ld64(ref[ 8], TMP2); - - vis_ld64(ref[16], TMP4); - - vis_ld64(constants2[0], CONST_2); - vis_faligndata(TMP0, TMP2, REF_S0); - - vis_ld64(constants256_512[0], CONST_256); - vis_faligndata(TMP2, TMP4, REF_S4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_S2); - vis_faligndata(TMP2, TMP4, REF_S6); - } else { - vis_src1(TMP2, REF_S2); - vis_src1(TMP4, REF_S6); - } - - height >>= 1; - do { - vis_ld64_2(ref, stride, TMP0); - vis_mul8x16au(REF_S0, CONST_256, TMP12); - vis_pmerge(ZERO, REF_S0_1, TMP14); - - vis_alignaddr_g0((void *)off); - - vis_ld64_2(ref, stride_8, TMP2); - vis_mul8x16au(REF_S2, CONST_256, TMP16); - vis_pmerge(ZERO, REF_S2_1, TMP18); - - vis_ld64_2(ref, stride_16, TMP4); - ref += stride; - vis_mul8x16au(REF_S4, CONST_256, TMP20); - vis_pmerge(ZERO, REF_S4_1, TMP22); - - vis_ld64_2(ref, stride, TMP6); - vis_mul8x16au(REF_S6, CONST_256, TMP24); - vis_pmerge(ZERO, REF_S6_1, TMP26); - - vis_ld64_2(ref, stride_8, TMP8); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, stride_16, TMP10); - ref += stride; - vis_faligndata(TMP2, TMP4, REF_4); - - vis_faligndata(TMP6, TMP8, REF_S0); - - vis_faligndata(TMP8, TMP10, REF_S4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - vis_faligndata(TMP2, TMP4, REF_6); - vis_faligndata(TMP6, TMP8, REF_S2); - vis_faligndata(TMP8, TMP10, REF_S6); - } else { - vis_src1(TMP2, REF_2); - vis_src1(TMP4, REF_6); - vis_src1(TMP8, REF_S2); - vis_src1(TMP10, REF_S6); - } - - vis_mul8x16au(REF_0, CONST_256, TMP0); - vis_pmerge(ZERO, REF_0_1, TMP2); - - vis_mul8x16au(REF_2, CONST_256, TMP4); - vis_pmerge(ZERO, REF_2_1, TMP6); - - vis_padd16(TMP0, CONST_2, TMP8); - vis_mul8x16au(REF_4, CONST_256, TMP0); - - vis_padd16(TMP2, CONST_2, TMP10); - vis_mul8x16au(REF_4_1, CONST_256, TMP2); - - vis_padd16(TMP8, TMP4, TMP8); - vis_mul8x16au(REF_6, CONST_256, TMP4); - - vis_padd16(TMP10, TMP6, TMP10); - vis_mul8x16au(REF_6_1, CONST_256, TMP6); - - vis_padd16(TMP12, TMP8, TMP12); - - vis_padd16(TMP14, TMP10, TMP14); - - vis_padd16(TMP12, TMP16, TMP12); - - vis_padd16(TMP14, TMP18, TMP14); - vis_pack16(TMP12, DST_0); - - vis_pack16(TMP14, DST_1); - vis_st64(DST_0, dest[0]); - vis_padd16(TMP0, CONST_2, TMP12); - - vis_mul8x16au(REF_S0, CONST_256, TMP0); - vis_padd16(TMP2, CONST_2, TMP14); - - vis_mul8x16au(REF_S0_1, CONST_256, TMP2); - vis_padd16(TMP12, TMP4, TMP12); - - vis_mul8x16au(REF_S2, CONST_256, TMP4); - vis_padd16(TMP14, TMP6, TMP14); - - vis_mul8x16au(REF_S2_1, CONST_256, TMP6); - vis_padd16(TMP20, TMP12, TMP20); - - vis_padd16(TMP22, TMP14, TMP22); - - vis_padd16(TMP20, TMP24, TMP20); - - vis_padd16(TMP22, TMP26, TMP22); - vis_pack16(TMP20, DST_2); - - vis_pack16(TMP22, DST_3); - vis_st64_2(DST_2, dest, 8); - dest += stride; - vis_padd16(TMP0, TMP4, TMP24); - - vis_mul8x16au(REF_S4, CONST_256, TMP0); - vis_padd16(TMP2, TMP6, TMP26); - - vis_mul8x16au(REF_S4_1, CONST_256, TMP2); - vis_padd16(TMP24, TMP8, TMP24); - - vis_padd16(TMP26, TMP10, TMP26); - vis_pack16(TMP24, DST_0); - - vis_pack16(TMP26, DST_1); - vis_st64(DST_0, dest[0]); - vis_pmerge(ZERO, REF_S6, TMP4); - - vis_pmerge(ZERO, REF_S6_1, TMP6); - - vis_padd16(TMP0, TMP4, TMP0); - - vis_padd16(TMP2, TMP6, TMP2); - - vis_padd16(TMP0, TMP12, TMP0); - - vis_padd16(TMP2, TMP14, TMP2); - vis_pack16(TMP0, DST_2); - - vis_pack16(TMP2, DST_3); - vis_st64_2(DST_2, dest, 8); - dest += stride; - } while (--height); -} - -static void MC_put_xy_8_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - int stride_8 = stride + 8; - - vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); - - ref = vis_alignaddr(ref); - - vis_ld64(ref[ 0], TMP0); - vis_fzero(ZERO); - - vis_ld64(ref[ 8], TMP2); - - vis_ld64(constants2[0], CONST_2); - - vis_ld64(constants256_512[0], CONST_256); - vis_faligndata(TMP0, TMP2, REF_S0); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_S2); - } else { - vis_src1(TMP2, REF_S2); - } - - height >>= 1; - do { /* 26 cycles */ - vis_ld64_2(ref, stride, TMP0); - vis_mul8x16au(REF_S0, CONST_256, TMP8); - vis_pmerge(ZERO, REF_S2, TMP12); - - vis_alignaddr_g0((void *)off); - - vis_ld64_2(ref, stride_8, TMP2); - ref += stride; - vis_mul8x16au(REF_S0_1, CONST_256, TMP10); - vis_pmerge(ZERO, REF_S2_1, TMP14); - - vis_ld64_2(ref, stride, TMP4); - - vis_ld64_2(ref, stride_8, TMP6); - ref += stride; - vis_faligndata(TMP0, TMP2, REF_S4); - - vis_pmerge(ZERO, REF_S4, TMP18); - - vis_pmerge(ZERO, REF_S4_1, TMP20); - - vis_faligndata(TMP4, TMP6, REF_S0); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_S6); - vis_faligndata(TMP4, TMP6, REF_S2); - } else { - vis_src1(TMP2, REF_S6); - vis_src1(TMP6, REF_S2); - } - - vis_padd16(TMP18, CONST_2, TMP18); - vis_mul8x16au(REF_S6, CONST_256, TMP22); - - vis_padd16(TMP20, CONST_2, TMP20); - vis_mul8x16au(REF_S6_1, CONST_256, TMP24); - - vis_mul8x16au(REF_S0, CONST_256, TMP26); - vis_pmerge(ZERO, REF_S0_1, TMP28); - - vis_mul8x16au(REF_S2, CONST_256, TMP30); - vis_padd16(TMP18, TMP22, TMP18); - - vis_mul8x16au(REF_S2_1, CONST_256, TMP32); - vis_padd16(TMP20, TMP24, TMP20); - - vis_padd16(TMP8, TMP18, TMP8); - - vis_padd16(TMP10, TMP20, TMP10); - - vis_padd16(TMP8, TMP12, TMP8); - - vis_padd16(TMP10, TMP14, TMP10); - vis_pack16(TMP8, DST_0); - - vis_pack16(TMP10, DST_1); - vis_st64(DST_0, dest[0]); - dest += stride; - vis_padd16(TMP18, TMP26, TMP18); - - vis_padd16(TMP20, TMP28, TMP20); - - vis_padd16(TMP18, TMP30, TMP18); - - vis_padd16(TMP20, TMP32, TMP20); - vis_pack16(TMP18, DST_2); - - vis_pack16(TMP20, DST_3); - vis_st64(DST_2, dest[0]); - dest += stride; - } while (--height); -} - -static void MC_avg_xy_16_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - int stride_8 = stride + 8; - int stride_16 = stride + 16; - - vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); - - ref = vis_alignaddr(ref); - - vis_ld64(ref[ 0], TMP0); - vis_fzero(ZERO); - - vis_ld64(ref[ 8], TMP2); - - vis_ld64(ref[16], TMP4); - - vis_ld64(constants6[0], CONST_6); - vis_faligndata(TMP0, TMP2, REF_S0); - - vis_ld64(constants256_1024[0], CONST_256); - vis_faligndata(TMP2, TMP4, REF_S4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_S2); - vis_faligndata(TMP2, TMP4, REF_S6); - } else { - vis_src1(TMP2, REF_S2); - vis_src1(TMP4, REF_S6); - } - - height >>= 1; - do { /* 55 cycles */ - vis_ld64_2(ref, stride, TMP0); - vis_mul8x16au(REF_S0, CONST_256, TMP12); - vis_pmerge(ZERO, REF_S0_1, TMP14); - - vis_alignaddr_g0((void *)off); - - vis_ld64_2(ref, stride_8, TMP2); - vis_mul8x16au(REF_S2, CONST_256, TMP16); - vis_pmerge(ZERO, REF_S2_1, TMP18); - - vis_ld64_2(ref, stride_16, TMP4); - ref += stride; - vis_mul8x16au(REF_S4, CONST_256, TMP20); - vis_pmerge(ZERO, REF_S4_1, TMP22); - - vis_ld64_2(ref, stride, TMP6); - vis_mul8x16au(REF_S6, CONST_256, TMP24); - vis_pmerge(ZERO, REF_S6_1, TMP26); - - vis_ld64_2(ref, stride_8, TMP8); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, stride_16, TMP10); - ref += stride; - vis_faligndata(TMP2, TMP4, REF_4); - - vis_ld64(dest[0], DST_0); - vis_faligndata(TMP6, TMP8, REF_S0); - - vis_ld64_2(dest, 8, DST_2); - vis_faligndata(TMP8, TMP10, REF_S4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - vis_faligndata(TMP2, TMP4, REF_6); - vis_faligndata(TMP6, TMP8, REF_S2); - vis_faligndata(TMP8, TMP10, REF_S6); - } else { - vis_src1(TMP2, REF_2); - vis_src1(TMP4, REF_6); - vis_src1(TMP8, REF_S2); - vis_src1(TMP10, REF_S6); - } - - vis_mul8x16al(DST_0, CONST_1024, TMP30); - vis_pmerge(ZERO, REF_0, TMP0); - - vis_mul8x16al(DST_1, CONST_1024, TMP32); - vis_pmerge(ZERO, REF_0_1, TMP2); - - vis_mul8x16au(REF_2, CONST_256, TMP4); - vis_pmerge(ZERO, REF_2_1, TMP6); - - vis_mul8x16al(DST_2, CONST_1024, REF_0); - vis_padd16(TMP0, CONST_6, TMP0); - - vis_mul8x16al(DST_3, CONST_1024, REF_2); - vis_padd16(TMP2, CONST_6, TMP2); - - vis_padd16(TMP0, TMP4, TMP0); - vis_mul8x16au(REF_4, CONST_256, TMP4); - - vis_padd16(TMP2, TMP6, TMP2); - vis_mul8x16au(REF_4_1, CONST_256, TMP6); - - vis_padd16(TMP12, TMP0, TMP12); - vis_mul8x16au(REF_6, CONST_256, TMP8); - - vis_padd16(TMP14, TMP2, TMP14); - vis_mul8x16au(REF_6_1, CONST_256, TMP10); - - vis_padd16(TMP12, TMP16, TMP12); - vis_mul8x16au(REF_S0, CONST_256, REF_4); - - vis_padd16(TMP14, TMP18, TMP14); - vis_mul8x16au(REF_S0_1, CONST_256, REF_6); - - vis_padd16(TMP12, TMP30, TMP12); - - vis_padd16(TMP14, TMP32, TMP14); - vis_pack16(TMP12, DST_0); - - vis_pack16(TMP14, DST_1); - vis_st64(DST_0, dest[0]); - vis_padd16(TMP4, CONST_6, TMP4); - - vis_ld64_2(dest, stride, DST_0); - vis_padd16(TMP6, CONST_6, TMP6); - vis_mul8x16au(REF_S2, CONST_256, TMP12); - - vis_padd16(TMP4, TMP8, TMP4); - vis_mul8x16au(REF_S2_1, CONST_256, TMP14); - - vis_padd16(TMP6, TMP10, TMP6); - - vis_padd16(TMP20, TMP4, TMP20); - - vis_padd16(TMP22, TMP6, TMP22); - - vis_padd16(TMP20, TMP24, TMP20); - - vis_padd16(TMP22, TMP26, TMP22); - - vis_padd16(TMP20, REF_0, TMP20); - vis_mul8x16au(REF_S4, CONST_256, REF_0); - - vis_padd16(TMP22, REF_2, TMP22); - vis_pack16(TMP20, DST_2); - - vis_pack16(TMP22, DST_3); - vis_st64_2(DST_2, dest, 8); - dest += stride; - - vis_ld64_2(dest, 8, DST_2); - vis_mul8x16al(DST_0, CONST_1024, TMP30); - vis_pmerge(ZERO, REF_S4_1, REF_2); - - vis_mul8x16al(DST_1, CONST_1024, TMP32); - vis_padd16(REF_4, TMP0, TMP8); - - vis_mul8x16au(REF_S6, CONST_256, REF_4); - vis_padd16(REF_6, TMP2, TMP10); - - vis_mul8x16au(REF_S6_1, CONST_256, REF_6); - vis_padd16(TMP8, TMP12, TMP8); - - vis_padd16(TMP10, TMP14, TMP10); - - vis_padd16(TMP8, TMP30, TMP8); - - vis_padd16(TMP10, TMP32, TMP10); - vis_pack16(TMP8, DST_0); - - vis_pack16(TMP10, DST_1); - vis_st64(DST_0, dest[0]); - - vis_padd16(REF_0, TMP4, REF_0); - - vis_mul8x16al(DST_2, CONST_1024, TMP30); - vis_padd16(REF_2, TMP6, REF_2); - - vis_mul8x16al(DST_3, CONST_1024, TMP32); - vis_padd16(REF_0, REF_4, REF_0); - - vis_padd16(REF_2, REF_6, REF_2); - - vis_padd16(REF_0, TMP30, REF_0); - - /* stall */ - - vis_padd16(REF_2, TMP32, REF_2); - vis_pack16(REF_0, DST_2); - - vis_pack16(REF_2, DST_3); - vis_st64_2(DST_2, dest, 8); - dest += stride; - } while (--height); -} - -static void MC_avg_xy_8_vis (uint8_t * dest, const uint8_t * _ref, - const int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - int stride_8 = stride + 8; - - vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); - - ref = vis_alignaddr(ref); - - vis_ld64(ref[0], TMP0); - vis_fzero(ZERO); - - vis_ld64_2(ref, 8, TMP2); - - vis_ld64(constants6[0], CONST_6); - - vis_ld64(constants256_1024[0], CONST_256); - vis_faligndata(TMP0, TMP2, REF_S0); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_S2); - } else { - vis_src1(TMP2, REF_S2); - } - - height >>= 1; - do { /* 31 cycles */ - vis_ld64_2(ref, stride, TMP0); - vis_mul8x16au(REF_S0, CONST_256, TMP8); - vis_pmerge(ZERO, REF_S0_1, TMP10); - - vis_ld64_2(ref, stride_8, TMP2); - ref += stride; - vis_mul8x16au(REF_S2, CONST_256, TMP12); - vis_pmerge(ZERO, REF_S2_1, TMP14); - - vis_alignaddr_g0((void *)off); - - vis_ld64_2(ref, stride, TMP4); - vis_faligndata(TMP0, TMP2, REF_S4); - - vis_ld64_2(ref, stride_8, TMP6); - ref += stride; - - vis_ld64(dest[0], DST_0); - vis_faligndata(TMP4, TMP6, REF_S0); - - vis_ld64_2(dest, stride, DST_2); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_S6); - vis_faligndata(TMP4, TMP6, REF_S2); - } else { - vis_src1(TMP2, REF_S6); - vis_src1(TMP6, REF_S2); - } - - vis_mul8x16al(DST_0, CONST_1024, TMP30); - vis_pmerge(ZERO, REF_S4, TMP22); - - vis_mul8x16al(DST_1, CONST_1024, TMP32); - vis_pmerge(ZERO, REF_S4_1, TMP24); - - vis_mul8x16au(REF_S6, CONST_256, TMP26); - vis_pmerge(ZERO, REF_S6_1, TMP28); - - vis_mul8x16au(REF_S0, CONST_256, REF_S4); - vis_padd16(TMP22, CONST_6, TMP22); - - vis_mul8x16au(REF_S0_1, CONST_256, REF_S6); - vis_padd16(TMP24, CONST_6, TMP24); - - vis_mul8x16al(DST_2, CONST_1024, REF_0); - vis_padd16(TMP22, TMP26, TMP22); - - vis_mul8x16al(DST_3, CONST_1024, REF_2); - vis_padd16(TMP24, TMP28, TMP24); - - vis_mul8x16au(REF_S2, CONST_256, TMP26); - vis_padd16(TMP8, TMP22, TMP8); - - vis_mul8x16au(REF_S2_1, CONST_256, TMP28); - vis_padd16(TMP10, TMP24, TMP10); - - vis_padd16(TMP8, TMP12, TMP8); - - vis_padd16(TMP10, TMP14, TMP10); - - vis_padd16(TMP8, TMP30, TMP8); - - vis_padd16(TMP10, TMP32, TMP10); - vis_pack16(TMP8, DST_0); - - vis_pack16(TMP10, DST_1); - vis_st64(DST_0, dest[0]); - dest += stride; - - vis_padd16(REF_S4, TMP22, TMP12); - - vis_padd16(REF_S6, TMP24, TMP14); - - vis_padd16(TMP12, TMP26, TMP12); - - vis_padd16(TMP14, TMP28, TMP14); - - vis_padd16(TMP12, REF_0, TMP12); - - vis_padd16(TMP14, REF_2, TMP14); - vis_pack16(TMP12, DST_2); - - vis_pack16(TMP14, DST_3); - vis_st64(DST_2, dest[0]); - dest += stride; - } while (--height); -} - -MPEG2_MC_EXTERN(vis); - -#endif /* !(ARCH_SPARC) */ diff --git a/src/libmpeg2new/libmpeg2/mpeg2_internal.h b/src/libmpeg2new/libmpeg2/mpeg2_internal.h deleted file mode 100644 index fec7d4744..000000000 --- a/src/libmpeg2new/libmpeg2/mpeg2_internal.h +++ /dev/null @@ -1,302 +0,0 @@ -/* - * mpeg2_internal.h - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#define STATE_INTERNAL_NORETURN ((mpeg2_state_t)-1) - -/* macroblock modes */ -#define MACROBLOCK_INTRA 1 -#define MACROBLOCK_PATTERN 2 -#define MACROBLOCK_MOTION_BACKWARD 4 -#define MACROBLOCK_MOTION_FORWARD 8 -#define MACROBLOCK_QUANT 16 -#define DCT_TYPE_INTERLACED 32 -/* motion_type */ -#define MOTION_TYPE_SHIFT 6 -#define MC_FIELD 1 -#define MC_FRAME 2 -#define MC_16X8 2 -#define MC_DMV 3 - -/* picture structure */ -#define TOP_FIELD 1 -#define BOTTOM_FIELD 2 -#define FRAME_PICTURE 3 - -/* picture coding type */ -#define I_TYPE 1 -#define P_TYPE 2 -#define B_TYPE 3 -#define D_TYPE 4 - -typedef void mpeg2_mc_fct (uint8_t *, const uint8_t *, int, int); - -typedef struct { - uint8_t * ref[2][3]; - uint8_t ** ref2[2]; - int pmv[2][2]; - int f_code[2]; -} motion_t; - -typedef void motion_parser_t (mpeg2_decoder_t * decoder, - motion_t * motion, - mpeg2_mc_fct * const * table); - -struct mpeg2_decoder_s { - /* first, state that carries information from one macroblock to the */ - /* next inside a slice, and is never used outside of mpeg2_slice() */ - - /* bit parsing stuff */ - uint32_t bitstream_buf; /* current 32 bit working set */ - int bitstream_bits; /* used bits in working set */ - const uint8_t * bitstream_ptr; /* buffer with stream data */ - - uint8_t * dest[3]; - - int offset; - int stride; - int uv_stride; - int slice_stride; - int slice_uv_stride; - int stride_frame; - unsigned int limit_x; - unsigned int limit_y_16; - unsigned int limit_y_8; - unsigned int limit_y; - - /* Motion vectors */ - /* The f_ and b_ correspond to the forward and backward motion */ - /* predictors */ - motion_t b_motion; - motion_t f_motion; - motion_parser_t * motion_parser[5]; - - /* predictor for DC coefficients in intra blocks */ - int16_t dc_dct_pred[3]; - - /* DCT coefficients */ - int16_t DCTblock[64] ATTR_ALIGN(64); - - uint8_t * picture_dest[3]; - void (* convert) (void * convert_id, uint8_t * const * src, - unsigned int v_offset); - void * convert_id; - - int dmv_offset; - unsigned int v_offset; - - /* now non-slice-specific information */ - - /* sequence header stuff */ - uint16_t * quantizer_matrix[4]; - uint16_t (* chroma_quantizer[2])[64]; - uint16_t quantizer_prescale[4][32][64]; - - /* The width and height of the picture snapped to macroblock units */ - int width; - int height; - int vertical_position_extension; - int chroma_format; - - /* picture header stuff */ - - /* what type of picture this is (I, P, B, D) */ - int coding_type; - - /* picture coding extension stuff */ - - /* quantization factor for intra dc coefficients */ - int intra_dc_precision; - /* top/bottom/both fields */ - int picture_structure; - /* bool to indicate all predictions are frame based */ - int frame_pred_frame_dct; - /* bool to indicate whether intra blocks have motion vectors */ - /* (for concealment) */ - int concealment_motion_vectors; - /* bool to use different vlc tables */ - int intra_vlc_format; - /* used for DMV MC */ - int top_field_first; - - /* stuff derived from bitstream */ - - /* pointer to the zigzag scan we're supposed to be using */ - const uint8_t * scan; - - int second_field; - - int mpeg1; -}; - -typedef struct { - mpeg2_fbuf_t fbuf; -} fbuf_alloc_t; - -struct mpeg2dec_s { - mpeg2_decoder_t decoder; - - mpeg2_info_t info; - - uint32_t shift; - int is_display_initialized; - mpeg2_state_t (* action) (struct mpeg2dec_s * mpeg2dec); - mpeg2_state_t state; - uint32_t ext_state; - - /* allocated in init - gcc has problems allocating such big structures */ - uint8_t * chunk_buffer; - /* pointer to start of the current chunk */ - uint8_t * chunk_start; - /* pointer to current position in chunk_buffer */ - uint8_t * chunk_ptr; - /* last start code ? */ - uint8_t code; - - /* picture tags */ - uint32_t tag_current, tag2_current, tag_previous, tag2_previous; - int num_tags; - int bytes_since_tag; - - int first; - int alloc_index_user; - int alloc_index; - uint8_t first_decode_slice; - uint8_t nb_decode_slices; - - unsigned int user_data_len; - - mpeg2_sequence_t new_sequence; - mpeg2_sequence_t sequence; - mpeg2_gop_t new_gop; - mpeg2_gop_t gop; - mpeg2_picture_t new_picture; - mpeg2_picture_t pictures[4]; - mpeg2_picture_t * picture; - /*const*/ mpeg2_fbuf_t * fbuf[3]; /* 0: current fbuf, 1-2: prediction fbufs */ - - fbuf_alloc_t fbuf_alloc[3]; - int custom_fbuf; - - uint8_t * yuv_buf[3][3]; - int yuv_index; - mpeg2_convert_t * convert; - void * convert_arg; - unsigned int convert_id_size; - int convert_stride; - void (* convert_start) (void * id, const mpeg2_fbuf_t * fbuf, - const mpeg2_picture_t * picture, - const mpeg2_gop_t * gop); - - uint8_t * buf_start; - uint8_t * buf_end; - - int16_t display_offset_x, display_offset_y; - - int copy_matrix; - int8_t q_scale_type, scaled[4]; - uint8_t quantizer_matrix[4][64]; - uint8_t new_quantizer_matrix[4][64]; -}; - -typedef struct { -#ifdef ARCH_PPC - uint8_t regv[12*16]; -#endif - int dummy; -} cpu_state_t; - -/* cpu_accel.c */ -uint32_t mpeg2_detect_accel (uint32_t accel); - -/* cpu_state.c */ -void mpeg2_cpu_state_init (uint32_t accel); - -/* decode.c */ -mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec); -mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec); - -/* header.c */ -void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec); -void mpeg2_reset_info (mpeg2_info_t * info); -int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec); -int mpeg2_header_gop (mpeg2dec_t * mpeg2dec); -mpeg2_state_t mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec); -int mpeg2_header_picture (mpeg2dec_t * mpeg2dec); -int mpeg2_header_extension (mpeg2dec_t * mpeg2dec); -int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec); -void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec); -void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec); -void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels); -mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec); -mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec); -void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type); - -/* idct.c */ -void mpeg2_idct_init (uint32_t accel); - -/* idct_mmx.c */ -void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride); -void mpeg2_idct_add_mmxext (int last, int16_t * block, - uint8_t * dest, int stride); -void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride); -void mpeg2_idct_add_mmx (int last, int16_t * block, - uint8_t * dest, int stride); -void mpeg2_idct_mmx_init (void); - -/* idct_altivec.c */ -void mpeg2_idct_copy_altivec (int16_t * block, uint8_t * dest, int stride); -void mpeg2_idct_add_altivec (int last, int16_t * block, - uint8_t * dest, int stride); -void mpeg2_idct_altivec_init (void); - -/* idct_alpha.c */ -void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, int stride); -void mpeg2_idct_add_mvi (int last, int16_t * block, - uint8_t * dest, int stride); -void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, int stride); -void mpeg2_idct_add_alpha (int last, int16_t * block, - uint8_t * dest, int stride); -void mpeg2_idct_alpha_init (void); - -/* motion_comp.c */ -void mpeg2_mc_init (uint32_t accel); - -typedef struct { - mpeg2_mc_fct * put [8]; - mpeg2_mc_fct * avg [8]; -} mpeg2_mc_t; - -#define MPEG2_MC_EXTERN(x) mpeg2_mc_t mpeg2_mc_##x = { \ - {MC_put_o_16_##x, MC_put_x_16_##x, MC_put_y_16_##x, MC_put_xy_16_##x, \ - MC_put_o_8_##x, MC_put_x_8_##x, MC_put_y_8_##x, MC_put_xy_8_##x}, \ - {MC_avg_o_16_##x, MC_avg_x_16_##x, MC_avg_y_16_##x, MC_avg_xy_16_##x, \ - MC_avg_o_8_##x, MC_avg_x_8_##x, MC_avg_y_8_##x, MC_avg_xy_8_##x} \ -}; - -extern mpeg2_mc_t mpeg2_mc_c; -extern mpeg2_mc_t mpeg2_mc_mmx; -extern mpeg2_mc_t mpeg2_mc_mmxext; -extern mpeg2_mc_t mpeg2_mc_3dnow; -extern mpeg2_mc_t mpeg2_mc_altivec; -extern mpeg2_mc_t mpeg2_mc_alpha; -extern mpeg2_mc_t mpeg2_mc_vis; diff --git a/src/libmpeg2new/libmpeg2/rgb.c b/src/libmpeg2new/libmpeg2/rgb.c deleted file mode 100644 index e4abcacc2..000000000 --- a/src/libmpeg2new/libmpeg2/rgb.c +++ /dev/null @@ -1,598 +0,0 @@ -/* - * rgb.c - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" -#include - -#include - -#include "mpeg2.h" -#include "mpeg2convert.h" -#include "convert_internal.h" - -static int matrix_coefficients = 6; - -static const int Inverse_Table_6_9[8][4] = { - {117504, 138453, 13954, 34903}, /* no sequence_display_extension */ - {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */ - {104597, 132201, 25675, 53279}, /* unspecified */ - {104597, 132201, 25675, 53279}, /* reserved */ - {104448, 132798, 24759, 53109}, /* FCC */ - {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */ - {104597, 132201, 25675, 53279}, /* SMPTE 170M */ - {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */ -}; - -static const uint8_t dither[] ATTR_ALIGN(32) = { - 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, - 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, - 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, - 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, - 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, - 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, - 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, - 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, - 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, - 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, - 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, - 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, - 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, - 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, - 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, - 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, - 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, - 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, - 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, - 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, - 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, - 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, - 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, - 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, - 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, - 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, - 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, - 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, - 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, - 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, - 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, - 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, - 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, - 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, - 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, - 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, - 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, - 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35 -}; - -static const uint8_t dither_temporal[64] = { - 0x00, 0x20, 0x21, 0x01, 0x40, 0x60, 0x61, 0x41, - 0x42, 0x62, 0x63, 0x43, 0x02, 0x22, 0x23, 0x03, - 0x80, 0xa0, 0xa1, 0x81, 0xc0, 0xe0, 0xe1, 0xc1, - 0xc2, 0xe2, 0xe3, 0xc3, 0x82, 0xa2, 0xa3, 0x83, - 0x84, 0xa4, 0xa5, 0x85, 0xc4, 0xe4, 0xe5, 0xc5, - 0xc6, 0xe6, 0xe7, 0xc7, 0x86, 0xa6, 0xa7, 0x87, - 0x04, 0x24, 0x25, 0x05, 0x44, 0x64, 0x65, 0x45, - 0x46, 0x66, 0x67, 0x47, 0x06, 0x26, 0x27, 0x07 -}; - -typedef struct { - convert_rgb_t base; - void * table_rV[256]; - void * table_gU[256]; - int table_gV[256]; - void * table_bU[256]; -} convert_rgb_c_t; - -#define RGB(type,i) \ - U = pu[i]; \ - V = pv[i]; \ - r = (type *) id->table_rV[V]; \ - g = (type *) (((uint8_t *)id->table_gU[U]) + id->table_gV[V]); \ - b = (type *) id->table_bU[U]; - -#define DST(py,dst,i,j) \ - Y = py[i]; \ - dst[i] = r[Y] + g[Y] + b[Y]; - -#define DSTRGB(py,dst,i,j) \ - Y = py[i]; \ - dst[3*i] = r[Y]; dst[3*i+1] = g[Y]; dst[3*i+2] = b[Y]; - -#define DSTBGR(py,dst,i,j) \ - Y = py[i]; \ - dst[3*i] = b[Y]; dst[3*i+1] = g[Y]; dst[3*i+2] = r[Y]; - -#define DSTDITHER(py,dst,i,j) \ - Y = py[i]; \ - dst[i] = r[Y+pd[2*i+96*j]] + g[Y-pd[2*i+96*j]] + b[Y+pd[2*i+1+96*j]]; - -#define DO(x) x -#define SKIP(x) - -#define DECLARE_420(func,type,num,DST,DITHER) \ -static void func (void * _id, uint8_t * const * src, \ - unsigned int v_offset) \ -{ \ - const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id; \ - type * dst_1; \ - const uint8_t * py_1, * pu, * pv; \ - int i; \ - DITHER(uint8_t dithpos = id->base.dither_offset;) \ - \ - dst_1 = (type *)(id->base.rgb_ptr + id->base.rgb_slice * v_offset); \ - py_1 = src[0]; pu = src[1]; pv = src[2]; \ - \ - i = 8; \ - do { \ - const uint8_t * py_2; \ - int j, U, V, Y; \ - const type * r, * g, * b; \ - type * dst_2; \ - DITHER(const uint8_t * const pd = dither + 2 * dithpos;) \ - \ - dst_2 = (type *)((char *)dst_1 + id->base.rgb_stride); \ - py_2 = py_1 + id->base.y_stride; \ - j = id->base.width; \ - do { \ - RGB (type, 0) \ - DST (py_1, dst_1, 0, 0) \ - DST (py_1, dst_1, 1, 0) \ - DST (py_2, dst_2, 0, 1) \ - DST (py_2, dst_2, 1, 1) \ - \ - RGB (type, 1) \ - DST (py_2, dst_2, 2, 1) \ - DST (py_2, dst_2, 3, 1) \ - DST (py_1, dst_1, 2, 0) \ - DST (py_1, dst_1, 3, 0) \ - \ - RGB (type, 2) \ - DST (py_1, dst_1, 4, 0) \ - DST (py_1, dst_1, 5, 0) \ - DST (py_2, dst_2, 4, 1) \ - DST (py_2, dst_2, 5, 1) \ - \ - RGB (type, 3) \ - DST (py_2, dst_2, 6, 1) \ - DST (py_2, dst_2, 7, 1) \ - DST (py_1, dst_1, 6, 0) \ - DST (py_1, dst_1, 7, 0) \ - \ - pu += 4; \ - pv += 4; \ - py_1 += 8; \ - py_2 += 8; \ - dst_1 += 8 * num; \ - dst_2 += 8 * num; \ - } while (--j); \ - if (--i == id->base.field) { \ - dst_1 = (type *)(id->base.rgb_ptr + \ - id->base.rgb_slice * (v_offset + 1)); \ - py_1 = src[0] + id->base.y_stride_frame; \ - pu = src[1] + id->base.uv_stride_frame; \ - pv = src[2] + id->base.uv_stride_frame; \ - } else { \ - py_1 += id->base.y_increm; \ - pu += id->base.uv_increm; \ - pv += id->base.uv_increm; \ - dst_1 = (type *)((char *)dst_1 + id->base.rgb_increm); \ - DITHER(dithpos += id->base.dither_stride;) \ - } \ - } while (i); \ -} - -DECLARE_420 (rgb_c_32_420, uint32_t, 1, DST, SKIP) -DECLARE_420 (rgb_c_24_rgb_420, uint8_t, 3, DSTRGB, SKIP) -DECLARE_420 (rgb_c_24_bgr_420, uint8_t, 3, DSTBGR, SKIP) -DECLARE_420 (rgb_c_16_420, uint16_t, 1, DST, SKIP) -DECLARE_420 (rgb_c_8_420, uint8_t, 1, DSTDITHER, DO) - -#define DECLARE_422(func,type,num,DST,DITHER) \ -static void func (void * _id, uint8_t * const * src, \ - unsigned int v_offset) \ -{ \ - const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id; \ - type * dst; \ - const uint8_t * py, * pu, * pv; \ - int i; \ - DITHER(uint8_t dithpos = id->base.dither_offset;) \ - \ - dst = (type *)(id->base.rgb_ptr + id->base.rgb_stride * v_offset); \ - py = src[0]; pu = src[1]; pv = src[2]; \ - \ - i = 16; \ - do { \ - int j, U, V, Y; \ - const type * r, * g, * b; \ - DITHER(const uint8_t * const pd = dither + 2 * dithpos;) \ - \ - j = id->base.width; \ - do { \ - RGB (type, 0) \ - DST (py, dst, 0, 0) \ - DST (py, dst, 1, 0) \ - \ - RGB (type, 1) \ - DST (py, dst, 2, 0) \ - DST (py, dst, 3, 0) \ - \ - RGB (type, 2) \ - DST (py, dst, 4, 0) \ - DST (py, dst, 5, 0) \ - \ - RGB (type, 3) \ - DST (py, dst, 6, 0) \ - DST (py, dst, 7, 0) \ - \ - pu += 4; \ - pv += 4; \ - py += 8; \ - dst += 8 * num; \ - } while (--j); \ - py += id->base.y_increm; \ - pu += id->base.uv_increm; \ - pv += id->base.uv_increm; \ - dst = (type *)((char *)dst + id->base.rgb_increm); \ - DITHER(dithpos += id->base.dither_stride;) \ - } while (--i); \ -} - -DECLARE_422 (rgb_c_32_422, uint32_t, 1, DST, SKIP) -DECLARE_422 (rgb_c_24_rgb_422, uint8_t, 3, DSTRGB, SKIP) -DECLARE_422 (rgb_c_24_bgr_422, uint8_t, 3, DSTBGR, SKIP) -DECLARE_422 (rgb_c_16_422, uint16_t, 1, DST, SKIP) -DECLARE_422 (rgb_c_8_422, uint8_t, 1, DSTDITHER, DO) - -#define DECLARE_444(func,type,num,DST,DITHER) \ -static void func (void * _id, uint8_t * const * src, \ - unsigned int v_offset) \ -{ \ - const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id; \ - type * dst; \ - const uint8_t * py, * pu, * pv; \ - int i; \ - DITHER(uint8_t dithpos = id->base.dither_offset;) \ - \ - dst = (type *)(id->base.rgb_ptr + id->base.rgb_stride * v_offset); \ - py = src[0]; pu = src[1]; pv = src[2]; \ - \ - i = 16; \ - do { \ - int j, U, V, Y; \ - const type * r, * g, * b; \ - DITHER(const uint8_t * const pd = dither + 2 * dithpos;) \ - \ - j = id->base.width; \ - do { \ - RGB (type, 0) \ - DST (py, dst, 0, 0) \ - RGB (type, 1) \ - DST (py, dst, 1, 0) \ - RGB (type, 2) \ - DST (py, dst, 2, 0) \ - RGB (type, 3) \ - DST (py, dst, 3, 0) \ - RGB (type, 4) \ - DST (py, dst, 4, 0) \ - RGB (type, 5) \ - DST (py, dst, 5, 0) \ - RGB (type, 6) \ - DST (py, dst, 6, 0) \ - RGB (type, 7) \ - DST (py, dst, 7, 0) \ - \ - pu += 8; \ - pv += 8; \ - py += 8; \ - dst += 8 * num; \ - } while (--j); \ - py += id->base.y_increm; \ - pu += id->base.y_increm; \ - pv += id->base.y_increm; \ - dst = (type *)((char *)dst + id->base.rgb_increm); \ - DITHER(dithpos += id->base.dither_stride;) \ - } while (--i); \ -} - -DECLARE_444 (rgb_c_32_444, uint32_t, 1, DST, SKIP) -DECLARE_444 (rgb_c_24_rgb_444, uint8_t, 3, DSTRGB, SKIP) -DECLARE_444 (rgb_c_24_bgr_444, uint8_t, 3, DSTBGR, SKIP) -DECLARE_444 (rgb_c_16_444, uint16_t, 1, DST, SKIP) -DECLARE_444 (rgb_c_8_444, uint8_t, 1, DSTDITHER, DO) - -static void rgb_start (void * _id, const mpeg2_fbuf_t * fbuf, - const mpeg2_picture_t * picture, - const mpeg2_gop_t * gop) -{ - convert_rgb_t * id = (convert_rgb_t *) _id; - int uv_stride = id->uv_stride_frame; - id->y_stride = id->y_stride_frame; - id->rgb_ptr = fbuf->buf[0]; - id->rgb_slice = id->rgb_stride = id->rgb_stride_frame; - id->dither_stride = 32; - id->dither_offset = dither_temporal[picture->temporal_reference & 63]; - id->field = 0; - if ((picture->nb_fields == 1) || - (id->chroma420 && !(picture->flags & PIC_FLAG_PROGRESSIVE_FRAME))) { - uv_stride <<= 1; - id->y_stride <<= 1; - id->rgb_stride <<= 1; - id->dither_stride <<= 1; - id->dither_offset += 16; - if (picture->nb_fields == 1) { - id->rgb_slice <<= 1; - if (!(picture->flags & PIC_FLAG_TOP_FIELD_FIRST)) { - id->rgb_ptr += id->rgb_stride_frame; - id->dither_offset += 32; - } - } else - id->field = 8 >> id->convert420; - } - id->y_increm = (id->y_stride << id->convert420) - id->y_stride_frame; - id->uv_increm = uv_stride - id->uv_stride_frame; - id->rgb_increm = (id->rgb_stride << id->convert420) - id->rgb_stride_min; - id->dither_stride <<= id->convert420; -} - -static inline int div_round (int dividend, int divisor) -{ - if (dividend > 0) - return (dividend + (divisor>>1)) / divisor; - else - return -((-dividend + (divisor>>1)) / divisor); -} - -static unsigned int rgb_c_init (convert_rgb_c_t * id, - mpeg2convert_rgb_order_t order, - unsigned int bpp) -{ - int i; - uint8_t table_Y[1024]; - uint32_t * table_32 = 0; - uint16_t * table_16 = 0; - uint8_t * table_8 = 0; - uint8_t * table_332 = 0; - int entry_size = 0; - void * table_r = 0; - void * table_g = 0; - void * table_b = 0; - - int crv = Inverse_Table_6_9[matrix_coefficients][0]; - int cbu = Inverse_Table_6_9[matrix_coefficients][1]; - int cgu = -Inverse_Table_6_9[matrix_coefficients][2]; - int cgv = -Inverse_Table_6_9[matrix_coefficients][3]; - - for (i = 0; i < 1024; i++) { - int j; - - j = (76309 * (i - 384 - 16) + 32768) >> 16; - table_Y[i] = (j < 0) ? 0 : ((j > 255) ? 255 : j); - } - - switch (bpp) { - case 32: - if (!id) - return (197 + 2*682 + 256 + 132) * sizeof (uint32_t); - table_32 = (uint32_t *) (id + 1); - entry_size = sizeof (uint32_t); - table_r = table_32 + 197; - table_b = table_32 + 197 + 685; - table_g = table_32 + 197 + 2*682; - - for (i = -197; i < 256+197; i++) - ((uint32_t *) table_r)[i] = - table_Y[i+384] << ((order == MPEG2CONVERT_RGB) ? 16 : 0); - for (i = -132; i < 256+132; i++) - ((uint32_t *) table_g)[i] = table_Y[i+384] << 8; - for (i = -232; i < 256+232; i++) - ((uint32_t *) table_b)[i] = - table_Y[i+384] << ((order == MPEG2CONVERT_RGB) ? 0 : 16); - break; - - case 24: - if (!id) - return (256 + 2*232) * sizeof (uint8_t); - table_8 = (uint8_t *) (id + 1); - entry_size = sizeof (uint8_t); - table_r = table_g = table_b = table_8 + 232; - - for (i = -232; i < 256+232; i++) - ((uint8_t * )table_b)[i] = table_Y[i+384]; - break; - - case 15: - case 16: - if (!id) - return (197 + 2*682 + 256 + 132) * sizeof (uint16_t); - table_16 = (uint16_t *) (id + 1); - entry_size = sizeof (uint16_t); - table_r = table_16 + 197; - table_b = table_16 + 197 + 685; - table_g = table_16 + 197 + 2*682; - - for (i = -197; i < 256+197; i++) { - int j = table_Y[i+384] >> 3; - - if (order == MPEG2CONVERT_RGB) - j <<= ((bpp==16) ? 11 : 10); - - ((uint16_t *)table_r)[i] = j; - } - for (i = -132; i < 256+132; i++) { - int j = table_Y[i+384] >> ((bpp==16) ? 2 : 3); - - ((uint16_t *)table_g)[i] = j << 5; - } - for (i = -232; i < 256+232; i++) { - int j = table_Y[i+384] >> 3; - - if (order == MPEG2CONVERT_BGR) - j <<= ((bpp==16) ? 11 : 10); - - ((uint16_t *)table_b)[i] = j; - } - break; - - case 8: - if (!id) - return (197 + 2*682 + 256 + 232 + 71) * sizeof (uint8_t); - table_332 = (uint8_t *) (id + 1); - entry_size = sizeof (uint8_t); - table_r = table_332 + 197; - table_g = table_332 + 197 + 682 + 30; - table_b = table_332 + 197 + 2*682; - - for (i = -197; i < 256+197+30; i++) - ((uint8_t *)table_r)[i] = ((table_Y[i+384] * 7 / 255) << - (order == MPEG2CONVERT_RGB ? 5 : 0)); - for (i = -132; i < 256+132+30; i++) - ((uint8_t *)table_g)[i-30] = ((table_Y[i+384] * 7 / 255) << - (order == MPEG2CONVERT_RGB ? 2 : 3)); - for (i = -232; i < 256+232+71; i++) - ((uint8_t *)table_b)[i] = ((table_Y[i+384] / 85) << - (order == MPEG2CONVERT_RGB ? 0 : 6)); - break; - } - - for (i = 0; i < 256; i++) { - id->table_rV[i] = (((uint8_t *)table_r) + - entry_size * div_round (crv * (i-128), 76309)); - id->table_gU[i] = (((uint8_t *)table_g) + - entry_size * div_round (cgu * (i-128), 76309)); - id->table_gV[i] = entry_size * div_round (cgv * (i-128), 76309); - id->table_bU[i] = (((uint8_t *)table_b) + - entry_size * div_round (cbu * (i-128), 76309)); - } - - return 0; -} - -static int rgb_internal (mpeg2convert_rgb_order_t order, unsigned int bpp, - int stage, void * _id, const mpeg2_sequence_t * seq, - int stride, uint32_t accel, void * arg, - mpeg2_convert_init_t * result) -{ - convert_rgb_t * id = (convert_rgb_t *) _id; - mpeg2convert_copy_t * copy = (mpeg2convert_copy_t *) 0; - unsigned int id_size = sizeof (convert_rgb_t); - int chroma420 = (seq->chroma_height < seq->height); - int convert420 = 0; - int rgb_stride_min = ((bpp + 7) >> 3) * seq->width; - -#ifdef ARCH_X86 - if (!copy && (accel & MPEG2_ACCEL_X86_MMXEXT)) { - convert420 = 0; - copy = mpeg2convert_rgb_mmxext (order, bpp, seq); - } - if (!copy && (accel & MPEG2_ACCEL_X86_MMX)) { - convert420 = 0; - copy = mpeg2convert_rgb_mmx (order, bpp, seq); - } -#endif -#ifdef ARCH_SPARC - if (!copy && (accel & MPEG2_ACCEL_SPARC_VIS)) { - convert420 = chroma420; - copy = mpeg2convert_rgb_vis (order, bpp, seq); - } -#endif - if (!copy) { - int src, dest; - static void (* rgb_c[3][5]) (void *, uint8_t * const *, - unsigned int) = - {{rgb_c_24_bgr_420, rgb_c_8_420, rgb_c_16_420, - rgb_c_24_rgb_420, rgb_c_32_420}, - {rgb_c_24_bgr_422, rgb_c_8_422, rgb_c_16_422, - rgb_c_24_rgb_422, rgb_c_32_422}, - {rgb_c_24_bgr_444, rgb_c_8_444, rgb_c_16_444, - rgb_c_24_rgb_444, rgb_c_32_444}}; - - convert420 = chroma420; - id_size = (sizeof (convert_rgb_c_t) + - rgb_c_init ((convert_rgb_c_t *) id, order, bpp)); - src = ((seq->chroma_width == seq->width) + - (seq->chroma_height == seq->height)); - dest = ((bpp == 24 && order == MPEG2CONVERT_BGR) ? 0 : (bpp + 7) >> 3); - copy = rgb_c[src][dest]; - } - - result->id_size = id_size; - - if (stride < rgb_stride_min) - stride = rgb_stride_min; - - if (stage == MPEG2_CONVERT_STRIDE) - return stride; - else if (stage == MPEG2_CONVERT_START) { - id->width = seq->width >> 3; - id->y_stride_frame = seq->width; - id->uv_stride_frame = seq->chroma_width; - id->rgb_stride_frame = stride; - id->rgb_stride_min = rgb_stride_min; - id->chroma420 = chroma420; - id->convert420 = convert420; - result->buf_size[0] = stride * seq->height; - result->buf_size[1] = result->buf_size[2] = 0; - result->start = rgb_start; - result->copy = copy; - } - return 0; -} - -#define DECLARE(func,order,bpp) \ -int func (int stage, void * id, \ - const mpeg2_sequence_t * sequence, int stride, \ - uint32_t accel, void * arg, mpeg2_convert_init_t * result) \ -{ \ - return rgb_internal (order, bpp, stage, id, sequence, stride, \ - accel, arg, result); \ -} - -DECLARE (mpeg2convert_rgb32, MPEG2CONVERT_RGB, 32) -DECLARE (mpeg2convert_rgb24, MPEG2CONVERT_RGB, 24) -DECLARE (mpeg2convert_rgb16, MPEG2CONVERT_RGB, 16) -DECLARE (mpeg2convert_rgb15, MPEG2CONVERT_RGB, 15) -DECLARE (mpeg2convert_rgb8, MPEG2CONVERT_RGB, 8) -DECLARE (mpeg2convert_bgr32, MPEG2CONVERT_BGR, 32) -DECLARE (mpeg2convert_bgr24, MPEG2CONVERT_BGR, 24) -DECLARE (mpeg2convert_bgr16, MPEG2CONVERT_BGR, 16) -DECLARE (mpeg2convert_bgr15, MPEG2CONVERT_BGR, 15) -DECLARE (mpeg2convert_bgr8, MPEG2CONVERT_BGR, 8) - -mpeg2_convert_t * mpeg2convert_rgb (mpeg2convert_rgb_order_t order, - unsigned int bpp) -{ - static mpeg2_convert_t * table[5][2] = - {{mpeg2convert_rgb15, mpeg2convert_bgr15}, - {mpeg2convert_rgb8, mpeg2convert_bgr8}, - {mpeg2convert_rgb16, mpeg2convert_bgr16}, - {mpeg2convert_rgb24, mpeg2convert_bgr24}, - {mpeg2convert_rgb32, mpeg2convert_bgr32}}; - - if (order == MPEG2CONVERT_RGB || order == MPEG2CONVERT_BGR) { - if (bpp == 15) - return table[0][order == MPEG2CONVERT_BGR]; - else if (bpp >= 8 && bpp <= 32 && (bpp & 7) == 0) - return table[bpp >> 3][order == MPEG2CONVERT_BGR]; - } - return (mpeg2_convert_t *) 0; -} diff --git a/src/libmpeg2new/libmpeg2/rgb_mmx.c b/src/libmpeg2new/libmpeg2/rgb_mmx.c deleted file mode 100644 index 6ca7e65a8..000000000 --- a/src/libmpeg2new/libmpeg2/rgb_mmx.c +++ /dev/null @@ -1,321 +0,0 @@ -/* - * rgb_mmx.c - * Copyright (C) 2000-2003 Silicon Integrated System Corp. - * All Rights Reserved. - * - * Author: Olie Lho - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef ARCH_X86 - -#include -#include -#include - -#include "mpeg2.h" -#include "mpeg2convert.h" -#include "convert_internal.h" -#include -#include "mmx.h" - -#define CPU_MMXEXT 0 -#define CPU_MMX 1 - -/* CPU_MMXEXT/CPU_MMX adaptation layer */ - -#define movntq(src,dest) \ -do { \ - if (cpu == CPU_MMXEXT) \ - movntq_r2m (src, dest); \ - else \ - movq_r2m (src, dest); \ -} while (0) - -static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv) -{ - static mmx_t mmx_80w = {0x0080008000800080LL}; - static mmx_t mmx_U_green = {0xf37df37df37df37dLL}; - static mmx_t mmx_U_blue = {0x4093409340934093LL}; - static mmx_t mmx_V_red = {0x3312331233123312LL}; - static mmx_t mmx_V_green = {0xe5fce5fce5fce5fcLL}; - static mmx_t mmx_10w = {0x1010101010101010LL}; - static mmx_t mmx_00ffw = {0x00ff00ff00ff00ffLL}; - static mmx_t mmx_Y_coeff = {0x253f253f253f253fLL}; - - movd_m2r (*pu, mm0); /* mm0 = 00 00 00 00 u3 u2 u1 u0 */ - movd_m2r (*pv, mm1); /* mm1 = 00 00 00 00 v3 v2 v1 v0 */ - movq_m2r (*py, mm6); /* mm6 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ - pxor_r2r (mm4, mm4); /* mm4 = 0 */ - /* XXX might do cache preload for image here */ - - /* - * Do the multiply part of the conversion for even and odd pixels - * register usage: - * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels - * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels - * mm6 -> Y even, mm7 -> Y odd - */ - - punpcklbw_r2r (mm4, mm0); /* mm0 = u3 u2 u1 u0 */ - punpcklbw_r2r (mm4, mm1); /* mm1 = v3 v2 v1 v0 */ - psubsw_m2r (mmx_80w, mm0); /* u -= 128 */ - psubsw_m2r (mmx_80w, mm1); /* v -= 128 */ - psllw_i2r (3, mm0); /* promote precision */ - psllw_i2r (3, mm1); /* promote precision */ - movq_r2r (mm0, mm2); /* mm2 = u3 u2 u1 u0 */ - movq_r2r (mm1, mm3); /* mm3 = v3 v2 v1 v0 */ - pmulhw_m2r (mmx_U_green, mm2); /* mm2 = u * u_green */ - pmulhw_m2r (mmx_V_green, mm3); /* mm3 = v * v_green */ - pmulhw_m2r (mmx_U_blue, mm0); /* mm0 = chroma_b */ - pmulhw_m2r (mmx_V_red, mm1); /* mm1 = chroma_r */ - paddsw_r2r (mm3, mm2); /* mm2 = chroma_g */ - - psubusb_m2r (mmx_10w, mm6); /* Y -= 16 */ - movq_r2r (mm6, mm7); /* mm7 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ - pand_m2r (mmx_00ffw, mm6); /* mm6 = Y6 Y4 Y2 Y0 */ - psrlw_i2r (8, mm7); /* mm7 = Y7 Y5 Y3 Y1 */ - psllw_i2r (3, mm6); /* promote precision */ - psllw_i2r (3, mm7); /* promote precision */ - pmulhw_m2r (mmx_Y_coeff, mm6); /* mm6 = luma_rgb even */ - pmulhw_m2r (mmx_Y_coeff, mm7); /* mm7 = luma_rgb odd */ - - /* - * Do the addition part of the conversion for even and odd pixels - * register usage: - * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels - * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels - * mm6 -> Y even, mm7 -> Y odd - */ - - movq_r2r (mm0, mm3); /* mm3 = chroma_b */ - movq_r2r (mm1, mm4); /* mm4 = chroma_r */ - movq_r2r (mm2, mm5); /* mm5 = chroma_g */ - paddsw_r2r (mm6, mm0); /* mm0 = B6 B4 B2 B0 */ - paddsw_r2r (mm7, mm3); /* mm3 = B7 B5 B3 B1 */ - paddsw_r2r (mm6, mm1); /* mm1 = R6 R4 R2 R0 */ - paddsw_r2r (mm7, mm4); /* mm4 = R7 R5 R3 R1 */ - paddsw_r2r (mm6, mm2); /* mm2 = G6 G4 G2 G0 */ - paddsw_r2r (mm7, mm5); /* mm5 = G7 G5 G3 G1 */ - packuswb_r2r (mm0, mm0); /* saturate to 0-255 */ - packuswb_r2r (mm1, mm1); /* saturate to 0-255 */ - packuswb_r2r (mm2, mm2); /* saturate to 0-255 */ - packuswb_r2r (mm3, mm3); /* saturate to 0-255 */ - packuswb_r2r (mm4, mm4); /* saturate to 0-255 */ - packuswb_r2r (mm5, mm5); /* saturate to 0-255 */ - punpcklbw_r2r (mm3, mm0); /* mm0 = B7 B6 B5 B4 B3 B2 B1 B0 */ - punpcklbw_r2r (mm4, mm1); /* mm1 = R7 R6 R5 R4 R3 R2 R1 R0 */ - punpcklbw_r2r (mm5, mm2); /* mm2 = G7 G6 G5 G4 G3 G2 G1 G0 */ -} - -static inline void mmx_unpack_16rgb (uint8_t * image, const int cpu) -{ - static mmx_t mmx_bluemask = {0xf8f8f8f8f8f8f8f8LL}; - static mmx_t mmx_greenmask = {0xfcfcfcfcfcfcfcfcLL}; - static mmx_t mmx_redmask = {0xf8f8f8f8f8f8f8f8LL}; - - /* - * convert RGB plane to RGB 16 bits - * mm0 -> B, mm1 -> R, mm2 -> G - * mm4 -> GB, mm5 -> AR pixel 4-7 - * mm6 -> GB, mm7 -> AR pixel 0-3 - */ - - pand_m2r (mmx_bluemask, mm0); /* mm0 = b7b6b5b4b3______ */ - pand_m2r (mmx_greenmask, mm2); /* mm2 = g7g6g5g4g3g2____ */ - pand_m2r (mmx_redmask, mm1); /* mm1 = r7r6r5r4r3______ */ - psrlq_i2r (3, mm0); /* mm0 = ______b7b6b5b4b3 */ - pxor_r2r (mm4, mm4); /* mm4 = 0 */ - movq_r2r (mm0, mm5); /* mm5 = ______b7b6b5b4b3 */ - movq_r2r (mm2, mm7); /* mm7 = g7g6g5g4g3g2____ */ - - punpcklbw_r2r (mm4, mm2); - punpcklbw_r2r (mm1, mm0); - psllq_i2r (3, mm2); - por_r2r (mm2, mm0); - movntq (mm0, *image); - - punpckhbw_r2r (mm4, mm7); - punpckhbw_r2r (mm1, mm5); - psllq_i2r (3, mm7); - por_r2r (mm7, mm5); - movntq (mm5, *(image+8)); -} - -static inline void mmx_unpack_32rgb (uint8_t * image, const int cpu) -{ - /* - * convert RGB plane to RGB packed format, - * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0, - * mm4 -> GB, mm5 -> AR pixel 4-7, - * mm6 -> GB, mm7 -> AR pixel 0-3 - */ - - pxor_r2r (mm3, mm3); - movq_r2r (mm0, mm6); - movq_r2r (mm1, mm7); - movq_r2r (mm0, mm4); - movq_r2r (mm1, mm5); - punpcklbw_r2r (mm2, mm6); - punpcklbw_r2r (mm3, mm7); - punpcklwd_r2r (mm7, mm6); - movntq (mm6, *image); - movq_r2r (mm0, mm6); - punpcklbw_r2r (mm2, mm6); - punpckhwd_r2r (mm7, mm6); - movntq (mm6, *(image+8)); - punpckhbw_r2r (mm2, mm4); - punpckhbw_r2r (mm3, mm5); - punpcklwd_r2r (mm5, mm4); - movntq (mm4, *(image+16)); - movq_r2r (mm0, mm4); - punpckhbw_r2r (mm2, mm4); - punpckhwd_r2r (mm5, mm4); - movntq (mm4, *(image+24)); -} - -static inline void rgb16 (void * const _id, uint8_t * const * src, - const unsigned int v_offset, const int cpu) -{ - convert_rgb_t * const id = (convert_rgb_t *) _id; - uint8_t * dst; - uint8_t * py, * pu, * pv; - int i, j; - - dst = id->rgb_ptr + id->rgb_slice * v_offset; - py = src[0]; pu = src[1]; pv = src[2]; - - i = 16; - do { - j = id->width; - do { - mmx_yuv2rgb (py, pu, pv); - mmx_unpack_16rgb (dst, cpu); - py += 8; - pu += 4; - pv += 4; - dst += 16; - } while (--j); - - dst += id->rgb_increm; - py += id->y_increm; - if (--i == id->field) { - dst = id->rgb_ptr + id->rgb_slice * (v_offset + 1); - py = src[0] + id->y_stride_frame; - pu = src[1] + id->uv_stride_frame; - pv = src[2] + id->uv_stride_frame; - } else if (! (i & id->chroma420)) { - pu += id->uv_increm; - pv += id->uv_increm; - } else { - pu -= id->uv_stride_frame; - pv -= id->uv_stride_frame; - } - } while (i); -} - -static inline void argb32 (void * const _id, uint8_t * const * src, - const unsigned int v_offset, const int cpu) -{ - convert_rgb_t * const id = (convert_rgb_t *) _id; - uint8_t * dst; - uint8_t * py, * pu, * pv; - int i, j; - - dst = id->rgb_ptr + id->rgb_slice * v_offset; - py = src[0]; pu = src[1]; pv = src[2]; - - i = 16; - do { - j = id->width; - do { - mmx_yuv2rgb (py, pu, pv); - mmx_unpack_32rgb (dst, cpu); - py += 8; - pu += 4; - pv += 4; - dst += 32; - } while (--j); - - dst += id->rgb_increm; - py += id->y_increm; - if (--i == id->field) { - dst = id->rgb_ptr + id->rgb_slice * (v_offset + 1); - py = src[0] + id->y_stride_frame; - pu = src[1] + id->uv_stride_frame; - pv = src[2] + id->uv_stride_frame; - } else if (! (i & id->chroma420)) { - pu += id->uv_increm; - pv += id->uv_increm; - } else { - pu -= id->uv_stride_frame; - pv -= id->uv_stride_frame; - } - } while (i); -} - -static void mmxext_rgb16 (void * id, uint8_t * const * src, - unsigned int v_offset) -{ - rgb16 (id, src, v_offset, CPU_MMXEXT); -} - -static void mmxext_argb32 (void * id, uint8_t * const * src, - unsigned int v_offset) -{ - argb32 (id, src, v_offset, CPU_MMXEXT); -} - -static void mmx_rgb16 (void * id, uint8_t * const * src, unsigned int v_offset) -{ - rgb16 (id, src, v_offset, CPU_MMX); -} - -static void mmx_argb32 (void * id, uint8_t * const * src, - unsigned int v_offset) -{ - argb32 (id, src, v_offset, CPU_MMX); -} - -mpeg2convert_copy_t * mpeg2convert_rgb_mmxext (int order, int bpp, - const mpeg2_sequence_t * seq) -{ - if (order == MPEG2CONVERT_RGB && seq->chroma_width < seq->width) { - if (bpp == 16) - return mmxext_rgb16; - else if (bpp == 32) - return mmxext_argb32; - } - return NULL; /* Fallback to C */ -} - -mpeg2convert_copy_t * mpeg2convert_rgb_mmx (int order, int bpp, - const mpeg2_sequence_t * seq) -{ - if (order == MPEG2CONVERT_RGB && seq->chroma_width < seq->width) { - if (bpp == 16) - return mmx_rgb16; - else if (bpp == 32) - return mmx_argb32; - } - return NULL; /* Fallback to C */ -} -#endif diff --git a/src/libmpeg2new/libmpeg2/rgb_vis.c b/src/libmpeg2new/libmpeg2/rgb_vis.c deleted file mode 100644 index cbd7c7072..000000000 --- a/src/libmpeg2new/libmpeg2/rgb_vis.c +++ /dev/null @@ -1,384 +0,0 @@ -/* - * rgb_vis.c - * Copyright (C) 2003 David S. Miller - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef ARCH_SPARC - -#include -#include - -#include "mpeg2.h" -#include "mpeg2convert.h" -#include "convert_internal.h" -#include -#include "vis.h" - -/* Based partially upon the MMX yuv2rgb code, see there for credits. - * - * The difference here is that since we have enough registers we - * process both even and odd scanlines in one pass. - */ - -static const uint16_t const_2048[] ATTR_ALIGN(8) = {2048, 2048, 2048, 2048}; -static const uint16_t const_1024[] ATTR_ALIGN(8) = {1024, 1024, 1024, 1024}; -static const uint16_t const_128[] ATTR_ALIGN(8) = {128, 128, 128, 128}; -static const uint8_t const_Ugreen[] ATTR_ALIGN(8) = - {0xf3, 0x00, 0xf3, 0x00, 0xf3, 0x00, 0xf3, 0x00}; -static const uint8_t const_Vgreen[] ATTR_ALIGN(8) = - {0xe6, 0x00, 0xe6, 0x00, 0xe6, 0x00, 0xe6, 0x00}; -static const uint8_t const_Ublue_Vred[] ATTR_ALIGN(8) = - {0x41, 0x41, 0x41, 0x41, 0x33, 0x33, 0x33, 0x33}; -static const uint8_t const_Ycoeff[] ATTR_ALIGN(4) = {0x25, 0x25, 0x25, 0x25}; - -#define TMP0 0 -#define TMP1 1 -#define TMP2 2 -#define TMP3 3 -#define TMP4 4 -#define TMP5 5 -#define TMP6 6 -#define TMP7 7 -#define TMP8 8 -#define TMP9 9 -#define TMP10 10 -#define TMP11 11 -#define TMP12 12 -#define TMP13 13 - -#define CONST_UBLUE 14 -#define CONST_VRED 15 -#define CONST_2048 16 - -#define BLUE8_EVEN 18 -#define BLUE8_ODD 19 -#define RED8_EVEN 20 -#define RED8_ODD 21 -#define GREEN8_EVEN 22 -#define GREEN8_ODD 23 - -#define BLUE8_2_EVEN 24 -#define BLUE8_2_ODD 25 -#define RED8_2_EVEN 26 -#define RED8_2_ODD 27 -#define GREEN8_2_EVEN 28 -#define GREEN8_2_ODD 29 - -#define CONST_YCOEFF 30 -#define ZEROS 31 - -#define PU_0 32 -#define PU_2 34 -#define PV_0 36 -#define PV_2 38 -#define PY_0 40 -#define PY_2 42 -#define PY_4 44 -#define PY_6 46 - -#define CONST_128 56 -#define CONST_1024 58 -#define CONST_VGREEN 60 -#define CONST_UGREEN 62 - -static inline void vis_init_consts(void) -{ - vis_set_gsr(7 << VIS_GSR_SCALEFACT_SHIFT); - - vis_ld64(const_2048[0], CONST_2048); - vis_ld64(const_1024[0], CONST_1024); - vis_ld64(const_Ugreen[0], CONST_UGREEN); - vis_ld64(const_Vgreen[0], CONST_VGREEN); - vis_fzeros(ZEROS); - vis_ld64(const_Ublue_Vred[0], CONST_UBLUE); - vis_ld32(const_Ycoeff[0], CONST_YCOEFF); - vis_ld64(const_128[0], CONST_128); -} - -static inline void vis_yuv2rgb(uint8_t *py, uint8_t *pu, uint8_t *pv, - int y_stride) -{ - vis_ld32(pu[0], TMP0); - - vis_ld32(pv[0], TMP2); - - vis_ld64(py[0], TMP4); - vis_mul8x16au(TMP0, CONST_2048, PU_0); - - vis_ld64_2(py, y_stride, TMP8); - vis_mul8x16au(TMP2, CONST_2048, PV_0); - - vis_pmerge(TMP4, TMP5, TMP6); - - vis_pmerge(TMP6, TMP7, TMP4); - - vis_pmerge(TMP8, TMP9, TMP10); - - vis_pmerge(TMP10, TMP11, TMP8); - vis_mul8x16au(TMP4, CONST_2048, PY_0); - - vis_psub16(PU_0, CONST_1024, PU_0); - vis_mul8x16au(TMP5, CONST_2048, PY_2); - - vis_psub16(PV_0, CONST_1024, PV_0); - vis_mul8x16au(TMP8, CONST_2048, PY_4); - - vis_psub16(PY_0, CONST_128, PY_0); - vis_mul8x16au(TMP9, CONST_2048, PY_6); - - vis_psub16(PY_2, CONST_128, PY_2); - vis_mul8x16(CONST_YCOEFF, PY_0, PY_0); - - vis_psub16(PY_4, CONST_128, PY_4); - vis_mul8x16(CONST_YCOEFF, PY_2, PY_2); - - vis_psub16(PY_6, CONST_128, PY_6); - vis_mul8x16(CONST_YCOEFF, PY_4, PY_4); - - vis_mul8x16(CONST_YCOEFF, PY_6, PY_6); - - vis_mul8sux16(CONST_UGREEN, PU_0, TMP0); - - vis_mul8sux16(CONST_VGREEN, PV_0, TMP2); - - vis_mul8x16(CONST_UBLUE, PU_0, TMP4); - - vis_mul8x16(CONST_VRED, PV_0, TMP6); - vis_padd16(TMP0, TMP2, TMP10); - - vis_padd16(PY_0, TMP4, TMP0); - - vis_padd16(PY_2, TMP4, TMP2); - vis_pack16(TMP0, BLUE8_EVEN); - - vis_padd16(PY_4, TMP4, TMP0); - vis_pack16(TMP2, BLUE8_ODD); - - vis_padd16(PY_6, TMP4, TMP2); - vis_pack16(TMP0, BLUE8_2_EVEN); - - vis_padd16(PY_0, TMP6, TMP0); - vis_pack16(TMP2, BLUE8_2_ODD); - - vis_padd16(PY_2, TMP6, TMP2); - vis_pack16(TMP0, RED8_EVEN); - - vis_padd16(PY_4, TMP6, TMP0); - vis_pack16(TMP2, RED8_ODD); - - vis_padd16(PY_6, TMP6, TMP2); - vis_pack16(TMP0, RED8_2_EVEN); - - vis_padd16(PY_0, TMP10, TMP0); - vis_pack16(TMP2, RED8_2_ODD); - - vis_padd16(PY_2, TMP10, TMP2); - vis_pack16(TMP0, GREEN8_EVEN); - - vis_padd16(PY_4, TMP10, TMP0); - vis_pack16(TMP2, GREEN8_ODD); - - vis_padd16(PY_6, TMP10, TMP2); - vis_pack16(TMP0, GREEN8_2_EVEN); - - vis_pack16(TMP2, GREEN8_2_ODD); - vis_pmerge(BLUE8_EVEN, BLUE8_ODD, BLUE8_EVEN); - - vis_pmerge(BLUE8_2_EVEN, BLUE8_2_ODD, BLUE8_2_EVEN); - - vis_pmerge(RED8_EVEN, RED8_ODD, RED8_EVEN); - - vis_pmerge(RED8_2_EVEN, RED8_2_ODD, RED8_2_EVEN); - - vis_pmerge(GREEN8_EVEN, GREEN8_ODD, GREEN8_EVEN); - - vis_pmerge(GREEN8_2_EVEN, GREEN8_2_ODD, GREEN8_2_EVEN); -} - -static inline void vis_unpack_32rgb(uint8_t *image, int stride) -{ - vis_pmerge(ZEROS, GREEN8_EVEN, TMP0); - vis_pmerge(RED8_EVEN, BLUE8_EVEN, TMP2); - - vis_pmerge(TMP0, TMP2, TMP4); - vis_st64(TMP4, image[0]); - - vis_pmerge(TMP1, TMP3, TMP6); - vis_st64_2(TMP6, image, 8); - - vis_pmerge(ZEROS, GREEN8_ODD, TMP8); - vis_pmerge(RED8_ODD, BLUE8_ODD, TMP10); - - vis_pmerge(TMP8, TMP10, TMP0); - vis_st64_2(TMP0, image, 16); - - vis_pmerge(TMP9, TMP11, TMP2); - vis_st64_2(TMP2, image, 24); - - image += stride; - - vis_pmerge(ZEROS, GREEN8_2_EVEN, TMP0); - vis_pmerge(RED8_2_EVEN, BLUE8_2_EVEN, TMP2); - - vis_pmerge(TMP0, TMP2, TMP4); - vis_st64(TMP4, image[0]); - - vis_pmerge(TMP1, TMP3, TMP6); - vis_st64_2(TMP6, image, 8); - - vis_pmerge(ZEROS, GREEN8_2_ODD, TMP8); - vis_pmerge(RED8_2_ODD, BLUE8_2_ODD, TMP10); - - vis_pmerge(TMP8, TMP10, TMP0); - vis_st64_2(TMP0, image, 16); - - vis_pmerge(TMP9, TMP11, TMP2); - vis_st64_2(TMP2, image, 24); -} - -static inline void vis_unpack_32bgr(uint8_t *image, int stride) -{ - vis_pmerge(ZEROS, GREEN8_EVEN, TMP0); - vis_pmerge(BLUE8_EVEN, RED8_EVEN, TMP2); - - vis_pmerge(TMP0, TMP2, TMP4); - vis_st64(TMP4, image[0]); - - vis_pmerge(TMP1, TMP3, TMP6); - vis_st64_2(TMP6, image, 8); - - vis_pmerge(ZEROS, GREEN8_ODD, TMP8); - vis_pmerge(BLUE8_ODD, RED8_ODD, TMP10); - - vis_pmerge(TMP8, TMP10, TMP0); - vis_st64_2(TMP0, image, 16); - - vis_pmerge(TMP9, TMP11, TMP2); - vis_st64_2(TMP2, image, 24); - - image += stride; - - vis_pmerge(ZEROS, GREEN8_2_EVEN, TMP0); - vis_pmerge(BLUE8_2_EVEN, RED8_2_EVEN, TMP2); - - vis_pmerge(TMP0, TMP2, TMP4); - vis_st64(TMP4, image[0]); - - vis_pmerge(TMP1, TMP3, TMP6); - vis_st64_2(TMP6, image, 8); - - vis_pmerge(ZEROS, GREEN8_2_ODD, TMP8); - vis_pmerge(BLUE8_2_ODD, RED8_2_ODD, TMP10); - - vis_pmerge(TMP8, TMP10, TMP0); - vis_st64_2(TMP0, image, 16); - - vis_pmerge(TMP9, TMP11, TMP2); - vis_st64_2(TMP2, image, 24); -} - -static inline void vis_yuv420_argb32(uint8_t *image, - uint8_t *py, uint8_t *pu, uint8_t *pv, - int width, int height, int rgb_stride, - int y_stride, int uv_stride) -{ - height >>= 1; - uv_stride -= width >> 1; - do { - int i = width >> 3; - do { - vis_yuv2rgb(py, pu, pv, y_stride); - vis_unpack_32rgb(image, rgb_stride); - py += 8; - pu += 4; - pv += 4; - image += 32; - } while (--i); - - py += (y_stride << 1) - width; - image += (rgb_stride << 1) - 4 * width; - pu += uv_stride; - pv += uv_stride; - } while (--height); -} - -static inline void vis_yuv420_abgr32(uint8_t *image, - uint8_t *py, uint8_t *pu, uint8_t *pv, - int width, int height, int rgb_stride, - int y_stride, int uv_stride) -{ - height >>= 1; - uv_stride -= width >> 1; - do { - int i = width >> 3; - do { - vis_yuv2rgb(py, pu, pv, y_stride); - vis_unpack_32bgr(image, rgb_stride); - py += 8; - pu += 4; - pv += 4; - image += 32; - } while (--i); - - py += (y_stride << 1) - width; - image += (rgb_stride << 1) - 4 * width; - pu += uv_stride; - pv += uv_stride; - } while (--height); -} - -static void vis_argb32(void *_id, uint8_t * const *src, - unsigned int v_offset) -{ - convert_rgb_t *id = (convert_rgb_t *) _id; - - vis_init_consts(); - vis_yuv420_argb32(id->rgb_ptr + id->rgb_stride * v_offset, - src[0], src[1], src[2], id->width, 16, - id->rgb_stride, id->y_stride, id->y_stride >> 1); -} - -static void vis_abgr32(void *_id, uint8_t * const *src, - unsigned int v_offset) -{ - convert_rgb_t *id = (convert_rgb_t *) _id; - - vis_init_consts(); - vis_yuv420_abgr32(id->rgb_ptr + id->rgb_stride * v_offset, - src[0], src[1], src[2], id->width, 16, - id->rgb_stride, id->y_stride, id->y_stride >> 1); -} - -mpeg2convert_copy_t *mpeg2convert_rgb_vis(int order, int bpp, - const mpeg2_sequence_t * seq) -{ - if (bpp == 32 && seq->chroma_height < seq->height) { - if (order == MPEG2CONVERT_RGB) - return vis_argb32; - if (order == MPEG2CONVERT_BGR) - return vis_abgr32; - } - - return NULL; /* Fallback to C */ -} - -#endif /* ARCH_SPARC */ diff --git a/src/libmpeg2new/libmpeg2/slice.c b/src/libmpeg2new/libmpeg2/slice.c deleted file mode 100644 index ce4508639..000000000 --- a/src/libmpeg2new/libmpeg2/slice.c +++ /dev/null @@ -1,2058 +0,0 @@ -/* - * slice.c - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 2003 Peter Gubanov - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#include - -#include "../include/mpeg2.h" -#include "../include/attributes.h" -#include "mpeg2_internal.h" - -extern mpeg2_mc_t mpeg2_mc; -extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); -extern void (* mpeg2_idct_add) (int last, int16_t * block, - uint8_t * dest, int stride); -extern void (* mpeg2_cpu_state_save) (cpu_state_t * state); -extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state); - -#include "vlc.h" - -static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - int macroblock_modes; - const MBtab * tab; - - switch (decoder->coding_type) { - case I_TYPE: - - tab = MB_I + UBITS (bit_buf, 1); - DUMPBITS (bit_buf, bits, tab->len); - macroblock_modes = tab->modes; - - if ((! (decoder->frame_pred_frame_dct)) && - (decoder->picture_structure == FRAME_PICTURE)) { - macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; - DUMPBITS (bit_buf, bits, 1); - } - - return macroblock_modes; - - case P_TYPE: - - tab = MB_P + UBITS (bit_buf, 5); - DUMPBITS (bit_buf, bits, tab->len); - macroblock_modes = tab->modes; - - if (decoder->picture_structure != FRAME_PICTURE) { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; - DUMPBITS (bit_buf, bits, 2); - } - return macroblock_modes | MACROBLOCK_MOTION_FORWARD; - } else if (decoder->frame_pred_frame_dct) { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) - macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT; - return macroblock_modes | MACROBLOCK_MOTION_FORWARD; - } else { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; - DUMPBITS (bit_buf, bits, 2); - } - if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { - macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; - DUMPBITS (bit_buf, bits, 1); - } - return macroblock_modes | MACROBLOCK_MOTION_FORWARD; - } - - case B_TYPE: - - tab = MB_B + UBITS (bit_buf, 6); - DUMPBITS (bit_buf, bits, tab->len); - macroblock_modes = tab->modes; - - if (decoder->picture_structure != FRAME_PICTURE) { - if (! (macroblock_modes & MACROBLOCK_INTRA)) { - macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; - DUMPBITS (bit_buf, bits, 2); - } - return macroblock_modes; - } else if (decoder->frame_pred_frame_dct) { - /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ - macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT; - return macroblock_modes; - } else { - if (macroblock_modes & MACROBLOCK_INTRA) - goto intra; - macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; - DUMPBITS (bit_buf, bits, 2); - if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { - intra: - macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; - DUMPBITS (bit_buf, bits, 1); - } - return macroblock_modes; - } - - case D_TYPE: - - DUMPBITS (bit_buf, bits, 1); - return MACROBLOCK_INTRA; - - default: - return 0; - } -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline void get_quantizer_scale (mpeg2_decoder_t * const decoder) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - - int quantizer_scale_code; - - quantizer_scale_code = UBITS (bit_buf, 5); - DUMPBITS (bit_buf, bits, 5); - - decoder->quantizer_matrix[0] = - decoder->quantizer_prescale[0][quantizer_scale_code]; - decoder->quantizer_matrix[1] = - decoder->quantizer_prescale[1][quantizer_scale_code]; - decoder->quantizer_matrix[2] = - decoder->chroma_quantizer[0][quantizer_scale_code]; - decoder->quantizer_matrix[3] = - decoder->chroma_quantizer[1][quantizer_scale_code]; -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int get_motion_delta (mpeg2_decoder_t * const decoder, - const int f_code) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - - int delta; - int sign; - const MVtab * tab; - - if (bit_buf & 0x80000000) { - DUMPBITS (bit_buf, bits, 1); - return 0; - } else if (bit_buf >= 0x0c000000) { - - tab = MV_4 + UBITS (bit_buf, 4); - delta = (tab->delta << f_code) + 1; - bits += tab->len + f_code + 1; - bit_buf <<= tab->len; - - sign = SBITS (bit_buf, 1); - bit_buf <<= 1; - - if (f_code) - delta += UBITS (bit_buf, f_code); - bit_buf <<= f_code; - - return (delta ^ sign) - sign; - - } else { - - tab = MV_10 + UBITS (bit_buf, 10); - delta = (tab->delta << f_code) + 1; - bits += tab->len + 1; - bit_buf <<= tab->len; - - sign = SBITS (bit_buf, 1); - bit_buf <<= 1; - - if (f_code) { - NEEDBITS (bit_buf, bits, bit_ptr); - delta += UBITS (bit_buf, f_code); - DUMPBITS (bit_buf, bits, f_code); - } - - return (delta ^ sign) - sign; - - } -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int bound_motion_vector (const int vector, const int f_code) -{ - return ((int32_t)vector << (27 - f_code)) >> (27 - f_code); -} - -static inline int get_dmv (mpeg2_decoder_t * const decoder) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - - const DMVtab * tab; - - tab = DMV_2 + UBITS (bit_buf, 2); - DUMPBITS (bit_buf, bits, tab->len); - return tab->dmv; -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int get_coded_block_pattern (mpeg2_decoder_t * const decoder) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - - const CBPtab * tab; - - NEEDBITS (bit_buf, bits, bit_ptr); - - if (bit_buf >= 0x20000000) { - - tab = CBP_7 + (UBITS (bit_buf, 7) - 16); - DUMPBITS (bit_buf, bits, tab->len); - return tab->cbp; - - } else { - - tab = CBP_9 + UBITS (bit_buf, 9); - DUMPBITS (bit_buf, bits, tab->len); - return tab->cbp; - } - -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int get_luma_dc_dct_diff (mpeg2_decoder_t * const decoder) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - const DCtab * tab; - int size; - int dc_diff; - - if (bit_buf < 0xf8000000) { - tab = DC_lum_5 + UBITS (bit_buf, 5); - size = tab->size; - if (size) { - bits += tab->len + size; - bit_buf <<= tab->len; - dc_diff = - UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); - bit_buf <<= size; - return dc_diff << decoder->intra_dc_precision; - } else { - DUMPBITS (bit_buf, bits, 3); - return 0; - } - } else { - tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0); - size = tab->size; - DUMPBITS (bit_buf, bits, tab->len); - NEEDBITS (bit_buf, bits, bit_ptr); - dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); - DUMPBITS (bit_buf, bits, size); - return dc_diff << decoder->intra_dc_precision; - } -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int get_chroma_dc_dct_diff (mpeg2_decoder_t * const decoder) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - const DCtab * tab; - int size; - int dc_diff; - - if (bit_buf < 0xf8000000) { - tab = DC_chrom_5 + UBITS (bit_buf, 5); - size = tab->size; - if (size) { - bits += tab->len + size; - bit_buf <<= tab->len; - dc_diff = - UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); - bit_buf <<= size; - return dc_diff << decoder->intra_dc_precision; - } else { - DUMPBITS (bit_buf, bits, 2); - return 0; - } - } else { - tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0); - size = tab->size; - DUMPBITS (bit_buf, bits, tab->len + 1); - NEEDBITS (bit_buf, bits, bit_ptr); - dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); - DUMPBITS (bit_buf, bits, size); - return dc_diff << decoder->intra_dc_precision; - } -#undef bit_buf -#undef bits -#undef bit_ptr -} - -#define SATURATE(val) \ -do { \ - val <<= 4; \ - if (unlikely (val != (int16_t) val)) \ - val = (SBITS (val, 1) ^ 2047) << 4; \ -} while (0) - -static void get_intra_block_B14 (mpeg2_decoder_t * const decoder, - const uint16_t * const quant_matrix) -{ - int i; - int j; - int val; - const uint8_t * const scan = decoder->scan; - int mismatch; - const DCTtab * tab; - uint32_t bit_buf; - int bits; - const uint8_t * bit_ptr; - int16_t * const dest = decoder->DCTblock; - - i = 0; - mismatch = ~dest[0]; - - bit_buf = decoder->bitstream_buf; - bits = decoder->bitstream_bits; - bit_ptr = decoder->bitstream_ptr; - - NEEDBITS (bit_buf, bits, bit_ptr); - - while (1) { - if (bit_buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); - - i += tab->run; - if (i >= 64) - break; /* end of block */ - - normal_code: - j = scan[i]; - bit_buf <<= tab->len; - bits += tab->len + 1; - val = (tab->level * quant_matrix[j]) >> 4; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - bit_buf <<= 1; - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x04000000) { - - tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS (bit_buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - j = scan[i]; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16; - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00800000) { - tab = DCT_13 + (UBITS (bit_buf, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00200000) { - tab = DCT_15 + (UBITS (bit_buf, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + UBITS (bit_buf, 16); - bit_buf <<= 16; - GETWORD (bit_buf, bits + 16, bit_ptr); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - dest[63] ^= mismatch & 16; - DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ - decoder->bitstream_buf = bit_buf; - decoder->bitstream_bits = bits; - decoder->bitstream_ptr = bit_ptr; -} - -static void get_intra_block_B15 (mpeg2_decoder_t * const decoder, - const uint16_t * const quant_matrix) -{ - int i; - int j; - int val; - const uint8_t * const scan = decoder->scan; - int mismatch; - const DCTtab * tab; - uint32_t bit_buf; - int bits; - const uint8_t * bit_ptr; - int16_t * const dest = decoder->DCTblock; - - i = 0; - mismatch = ~dest[0]; - - bit_buf = decoder->bitstream_buf; - bits = decoder->bitstream_bits; - bit_ptr = decoder->bitstream_ptr; - - NEEDBITS (bit_buf, bits, bit_ptr); - - while (1) { - if (bit_buf >= 0x04000000) { - - tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4); - - i += tab->run; - if (i < 64) { - - normal_code: - j = scan[i]; - bit_buf <<= tab->len; - bits += tab->len + 1; - val = (tab->level * quant_matrix[j]) >> 4; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - bit_buf <<= 1; - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else { - - /* end of block. I commented out this code because if we */ - /* dont exit here we will still exit at the later test :) */ - - /* if (i >= 128) break; */ /* end of block */ - - /* escape code */ - - i += UBITS (bit_buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check against buffer overflow */ - - j = scan[i]; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16; - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } - } else if (bit_buf >= 0x02000000) { - tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00800000) { - tab = DCT_13 + (UBITS (bit_buf, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00200000) { - tab = DCT_15 + (UBITS (bit_buf, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + UBITS (bit_buf, 16); - bit_buf <<= 16; - GETWORD (bit_buf, bits + 16, bit_ptr); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - dest[63] ^= mismatch & 16; - DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ - decoder->bitstream_buf = bit_buf; - decoder->bitstream_bits = bits; - decoder->bitstream_ptr = bit_ptr; -} - -static int get_non_intra_block (mpeg2_decoder_t * const decoder, - const uint16_t * const quant_matrix) -{ - int i; - int j; - int val; - const uint8_t * const scan = decoder->scan; - int mismatch; - const DCTtab * tab; - uint32_t bit_buf; - int bits; - const uint8_t * bit_ptr; - int16_t * const dest = decoder->DCTblock; - - i = -1; - mismatch = -1; - - bit_buf = decoder->bitstream_buf; - bits = decoder->bitstream_bits; - bit_ptr = decoder->bitstream_ptr; - - NEEDBITS (bit_buf, bits, bit_ptr); - if (bit_buf >= 0x28000000) { - tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); - goto entry_1; - } else - goto entry_2; - - while (1) { - if (bit_buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); - - entry_1: - i += tab->run; - if (i >= 64) - break; /* end of block */ - - normal_code: - j = scan[i]; - bit_buf <<= tab->len; - bits += tab->len + 1; - val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - bit_buf <<= 1; - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } - - entry_2: - if (bit_buf >= 0x04000000) { - - tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS (bit_buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - j = scan[i]; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1; - val = (val * quant_matrix[j]) / 32; - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00800000) { - tab = DCT_13 + (UBITS (bit_buf, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00200000) { - tab = DCT_15 + (UBITS (bit_buf, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + UBITS (bit_buf, 16); - bit_buf <<= 16; - GETWORD (bit_buf, bits + 16, bit_ptr); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - dest[63] ^= mismatch & 16; - DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ - decoder->bitstream_buf = bit_buf; - decoder->bitstream_bits = bits; - decoder->bitstream_ptr = bit_ptr; - return i; -} - -static void get_mpeg1_intra_block (mpeg2_decoder_t * const decoder) -{ - int i; - int j; - int val; - const uint8_t * const scan = decoder->scan; - const uint16_t * const quant_matrix = decoder->quantizer_matrix[0]; - const DCTtab * tab; - uint32_t bit_buf; - int bits; - const uint8_t * bit_ptr; - int16_t * const dest = decoder->DCTblock; - - i = 0; - - bit_buf = decoder->bitstream_buf; - bits = decoder->bitstream_bits; - bit_ptr = decoder->bitstream_ptr; - - NEEDBITS (bit_buf, bits, bit_ptr); - - while (1) { - if (bit_buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); - - i += tab->run; - if (i >= 64) - break; /* end of block */ - - normal_code: - j = scan[i]; - bit_buf <<= tab->len; - bits += tab->len + 1; - val = (tab->level * quant_matrix[j]) >> 4; - - /* oddification */ - val = (val - 1) | 1; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); - - SATURATE (val); - dest[j] = val; - - bit_buf <<= 1; - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x04000000) { - - tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS (bit_buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - j = scan[i]; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - val = SBITS (bit_buf, 8); - if (! (val & 0x7f)) { - DUMPBITS (bit_buf, bits, 8); - val = UBITS (bit_buf, 8) + 2 * val; - } - val = (val * quant_matrix[j]) / 16; - - /* oddification */ - val = (val + ~SBITS (val, 1)) | 1; - - SATURATE (val); - dest[j] = val; - - DUMPBITS (bit_buf, bits, 8); - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00800000) { - tab = DCT_13 + (UBITS (bit_buf, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00200000) { - tab = DCT_15 + (UBITS (bit_buf, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + UBITS (bit_buf, 16); - bit_buf <<= 16; - GETWORD (bit_buf, bits + 16, bit_ptr); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ - decoder->bitstream_buf = bit_buf; - decoder->bitstream_bits = bits; - decoder->bitstream_ptr = bit_ptr; -} - -static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder) -{ - int i; - int j; - int val; - const uint8_t * const scan = decoder->scan; - const uint16_t * const quant_matrix = decoder->quantizer_matrix[1]; - const DCTtab * tab; - uint32_t bit_buf; - int bits; - const uint8_t * bit_ptr; - int16_t * const dest = decoder->DCTblock; - - i = -1; - - bit_buf = decoder->bitstream_buf; - bits = decoder->bitstream_bits; - bit_ptr = decoder->bitstream_ptr; - - NEEDBITS (bit_buf, bits, bit_ptr); - if (bit_buf >= 0x28000000) { - tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); - goto entry_1; - } else - goto entry_2; - - while (1) { - if (bit_buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); - - entry_1: - i += tab->run; - if (i >= 64) - break; /* end of block */ - - normal_code: - j = scan[i]; - bit_buf <<= tab->len; - bits += tab->len + 1; - val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5; - - /* oddification */ - val = (val - 1) | 1; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); - - SATURATE (val); - dest[j] = val; - - bit_buf <<= 1; - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } - - entry_2: - if (bit_buf >= 0x04000000) { - - tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS (bit_buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - j = scan[i]; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - val = SBITS (bit_buf, 8); - if (! (val & 0x7f)) { - DUMPBITS (bit_buf, bits, 8); - val = UBITS (bit_buf, 8) + 2 * val; - } - val = 2 * (val + SBITS (val, 1)) + 1; - val = (val * quant_matrix[j]) / 32; - - /* oddification */ - val = (val + ~SBITS (val, 1)) | 1; - - SATURATE (val); - dest[j] = val; - - DUMPBITS (bit_buf, bits, 8); - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00800000) { - tab = DCT_13 + (UBITS (bit_buf, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00200000) { - tab = DCT_15 + (UBITS (bit_buf, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + UBITS (bit_buf, 16); - bit_buf <<= 16; - GETWORD (bit_buf, bits + 16, bit_ptr); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ - decoder->bitstream_buf = bit_buf; - decoder->bitstream_bits = bits; - decoder->bitstream_ptr = bit_ptr; - return i; -} - -static inline void slice_intra_DCT (mpeg2_decoder_t * const decoder, - const int cc, - uint8_t * const dest, const int stride) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - NEEDBITS (bit_buf, bits, bit_ptr); - /* Get the intra DC coefficient and inverse quantize it */ - if (cc == 0) - decoder->DCTblock[0] = - decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder); - else - decoder->DCTblock[0] = - decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder); - - if (decoder->mpeg1) { - if (decoder->coding_type != D_TYPE) - get_mpeg1_intra_block (decoder); - } else if (decoder->intra_vlc_format) - get_intra_block_B15 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]); - else - get_intra_block_B14 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]); - mpeg2_idct_copy (decoder->DCTblock, dest, stride); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder, - const int cc, - uint8_t * const dest, const int stride) -{ - int last; - - if (decoder->mpeg1) - last = get_mpeg1_non_intra_block (decoder); - else - last = get_non_intra_block (decoder, - decoder->quantizer_matrix[cc ? 3 : 1]); - mpeg2_idct_add (last, decoder->DCTblock, dest, stride); -} - -#define MOTION_420(table,ref,motion_x,motion_y,size,y) \ - pos_x = 2 * decoder->offset + motion_x; \ - pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ - if (unlikely (pos_x > decoder->limit_x)) { \ - pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ - motion_x = pos_x - 2 * decoder->offset; \ - } \ - if (unlikely (pos_y > decoder->limit_y_ ## size)) { \ - pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \ - motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \ - } \ - xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ - table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ - ref[0] + (pos_x >> 1) + (pos_y >> 1) * decoder->stride, \ - decoder->stride, size); \ - motion_x /= 2; motion_y /= 2; \ - xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ - offset = (((decoder->offset + motion_x) >> 1) + \ - ((((decoder->v_offset + motion_y) >> 1) + y/2) * \ - decoder->uv_stride)); \ - table[4+xy_half] (decoder->dest[1] + y/2 * decoder->uv_stride + \ - (decoder->offset >> 1), ref[1] + offset, \ - decoder->uv_stride, size/2); \ - table[4+xy_half] (decoder->dest[2] + y/2 * decoder->uv_stride + \ - (decoder->offset >> 1), ref[2] + offset, \ - decoder->uv_stride, size/2) - -#define MOTION_FIELD_420(table,ref,motion_x,motion_y,dest_field,op,src_field) \ - pos_x = 2 * decoder->offset + motion_x; \ - pos_y = decoder->v_offset + motion_y; \ - if (unlikely (pos_x > decoder->limit_x)) { \ - pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ - motion_x = pos_x - 2 * decoder->offset; \ - } \ - if (unlikely (pos_y > decoder->limit_y)) { \ - pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ - motion_y = pos_y - decoder->v_offset; \ - } \ - xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ - table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ - decoder->offset, \ - (ref[0] + (pos_x >> 1) + \ - ((pos_y op) + src_field) * decoder->stride), \ - 2 * decoder->stride, 8); \ - motion_x /= 2; motion_y /= 2; \ - xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ - offset = (((decoder->offset + motion_x) >> 1) + \ - (((decoder->v_offset >> 1) + (motion_y op) + src_field) * \ - decoder->uv_stride)); \ - table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride + \ - (decoder->offset >> 1), ref[1] + offset, \ - 2 * decoder->uv_stride, 4); \ - table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride + \ - (decoder->offset >> 1), ref[2] + offset, \ - 2 * decoder->uv_stride, 4) - -#define MOTION_DMV_420(table,ref,motion_x,motion_y) \ - pos_x = 2 * decoder->offset + motion_x; \ - pos_y = decoder->v_offset + motion_y; \ - if (unlikely (pos_x > decoder->limit_x)) { \ - pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ - motion_x = pos_x - 2 * decoder->offset; \ - } \ - if (unlikely (pos_y > decoder->limit_y)) { \ - pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ - motion_y = pos_y - decoder->v_offset; \ - } \ - xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ - offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \ - table[xy_half] (decoder->dest[0] + decoder->offset, \ - ref[0] + offset, 2 * decoder->stride, 8); \ - table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \ - ref[0] + decoder->stride + offset, \ - 2 * decoder->stride, 8); \ - motion_x /= 2; motion_y /= 2; \ - xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ - offset = (((decoder->offset + motion_x) >> 1) + \ - (((decoder->v_offset >> 1) + (motion_y & ~1)) * \ - decoder->uv_stride)); \ - table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1), \ - ref[1] + offset, 2 * decoder->uv_stride, 4); \ - table[4+xy_half] (decoder->dest[1] + decoder->uv_stride + \ - (decoder->offset >> 1), \ - ref[1] + decoder->uv_stride + offset, \ - 2 * decoder->uv_stride, 4); \ - table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1), \ - ref[2] + offset, 2 * decoder->uv_stride, 4); \ - table[4+xy_half] (decoder->dest[2] + decoder->uv_stride + \ - (decoder->offset >> 1), \ - ref[2] + decoder->uv_stride + offset, \ - 2 * decoder->uv_stride, 4) - -#define MOTION_ZERO_420(table,ref) \ - table[0] (decoder->dest[0] + decoder->offset, \ - (ref[0] + decoder->offset + \ - decoder->v_offset * decoder->stride), decoder->stride, 16); \ - offset = ((decoder->offset >> 1) + \ - (decoder->v_offset >> 1) * decoder->uv_stride); \ - table[4] (decoder->dest[1] + (decoder->offset >> 1), \ - ref[1] + offset, decoder->uv_stride, 8); \ - table[4] (decoder->dest[2] + (decoder->offset >> 1), \ - ref[2] + offset, decoder->uv_stride, 8) - -#define MOTION_422(table,ref,motion_x,motion_y,size,y) \ - pos_x = 2 * decoder->offset + motion_x; \ - pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ - if (unlikely (pos_x > decoder->limit_x)) { \ - pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ - motion_x = pos_x - 2 * decoder->offset; \ - } \ - if (unlikely (pos_y > decoder->limit_y_ ## size)) { \ - pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \ - motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \ - } \ - xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ - offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride; \ - table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ - ref[0] + offset, decoder->stride, size); \ - offset = (offset + (motion_x & (motion_x < 0))) >> 1; \ - motion_x /= 2; \ - xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \ - table[4+xy_half] (decoder->dest[1] + y * decoder->uv_stride + \ - (decoder->offset >> 1), ref[1] + offset, \ - decoder->uv_stride, size); \ - table[4+xy_half] (decoder->dest[2] + y * decoder->uv_stride + \ - (decoder->offset >> 1), ref[2] + offset, \ - decoder->uv_stride, size) - -#define MOTION_FIELD_422(table,ref,motion_x,motion_y,dest_field,op,src_field) \ - pos_x = 2 * decoder->offset + motion_x; \ - pos_y = decoder->v_offset + motion_y; \ - if (unlikely (pos_x > decoder->limit_x)) { \ - pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ - motion_x = pos_x - 2 * decoder->offset; \ - } \ - if (unlikely (pos_y > decoder->limit_y)) { \ - pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ - motion_y = pos_y - decoder->v_offset; \ - } \ - xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ - offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride; \ - table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ - decoder->offset, ref[0] + offset, \ - 2 * decoder->stride, 8); \ - offset = (offset + (motion_x & (motion_x < 0))) >> 1; \ - motion_x /= 2; \ - xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \ - table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride + \ - (decoder->offset >> 1), ref[1] + offset, \ - 2 * decoder->uv_stride, 8); \ - table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride + \ - (decoder->offset >> 1), ref[2] + offset, \ - 2 * decoder->uv_stride, 8) - -#define MOTION_DMV_422(table,ref,motion_x,motion_y) \ - pos_x = 2 * decoder->offset + motion_x; \ - pos_y = decoder->v_offset + motion_y; \ - if (unlikely (pos_x > decoder->limit_x)) { \ - pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ - motion_x = pos_x - 2 * decoder->offset; \ - } \ - if (unlikely (pos_y > decoder->limit_y)) { \ - pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ - motion_y = pos_y - decoder->v_offset; \ - } \ - xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ - offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \ - table[xy_half] (decoder->dest[0] + decoder->offset, \ - ref[0] + offset, 2 * decoder->stride, 8); \ - table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \ - ref[0] + decoder->stride + offset, \ - 2 * decoder->stride, 8); \ - offset = (offset + (motion_x & (motion_x < 0))) >> 1; \ - motion_x /= 2; \ - xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \ - table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1), \ - ref[1] + offset, 2 * decoder->uv_stride, 8); \ - table[4+xy_half] (decoder->dest[1] + decoder->uv_stride + \ - (decoder->offset >> 1), \ - ref[1] + decoder->uv_stride + offset, \ - 2 * decoder->uv_stride, 8); \ - table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1), \ - ref[2] + offset, 2 * decoder->uv_stride, 8); \ - table[4+xy_half] (decoder->dest[2] + decoder->uv_stride + \ - (decoder->offset >> 1), \ - ref[2] + decoder->uv_stride + offset, \ - 2 * decoder->uv_stride, 8) - -#define MOTION_ZERO_422(table,ref) \ - offset = decoder->offset + decoder->v_offset * decoder->stride; \ - table[0] (decoder->dest[0] + decoder->offset, \ - ref[0] + offset, decoder->stride, 16); \ - offset >>= 1; \ - table[4] (decoder->dest[1] + (decoder->offset >> 1), \ - ref[1] + offset, decoder->uv_stride, 16); \ - table[4] (decoder->dest[2] + (decoder->offset >> 1), \ - ref[2] + offset, decoder->uv_stride, 16) - -#define MOTION_444(table,ref,motion_x,motion_y,size,y) \ - pos_x = 2 * decoder->offset + motion_x; \ - pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ - if (unlikely (pos_x > decoder->limit_x)) { \ - pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ - motion_x = pos_x - 2 * decoder->offset; \ - } \ - if (unlikely (pos_y > decoder->limit_y_ ## size)) { \ - pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \ - motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \ - } \ - xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ - offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride; \ - table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ - ref[0] + offset, decoder->stride, size); \ - table[xy_half] (decoder->dest[1] + y * decoder->stride + decoder->offset, \ - ref[1] + offset, decoder->stride, size); \ - table[xy_half] (decoder->dest[2] + y * decoder->stride + decoder->offset, \ - ref[2] + offset, decoder->stride, size) - -#define MOTION_FIELD_444(table,ref,motion_x,motion_y,dest_field,op,src_field) \ - pos_x = 2 * decoder->offset + motion_x; \ - pos_y = decoder->v_offset + motion_y; \ - if (unlikely (pos_x > decoder->limit_x)) { \ - pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ - motion_x = pos_x - 2 * decoder->offset; \ - } \ - if (unlikely (pos_y > decoder->limit_y)) { \ - pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ - motion_y = pos_y - decoder->v_offset; \ - } \ - xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ - offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride; \ - table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ - decoder->offset, ref[0] + offset, \ - 2 * decoder->stride, 8); \ - table[xy_half] (decoder->dest[1] + dest_field * decoder->stride + \ - decoder->offset, ref[1] + offset, \ - 2 * decoder->stride, 8); \ - table[xy_half] (decoder->dest[2] + dest_field * decoder->stride + \ - decoder->offset, ref[2] + offset, \ - 2 * decoder->stride, 8) - -#define MOTION_DMV_444(table,ref,motion_x,motion_y) \ - pos_x = 2 * decoder->offset + motion_x; \ - pos_y = decoder->v_offset + motion_y; \ - if (unlikely (pos_x > decoder->limit_x)) { \ - pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ - motion_x = pos_x - 2 * decoder->offset; \ - } \ - if (unlikely (pos_y > decoder->limit_y)) { \ - pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ - motion_y = pos_y - decoder->v_offset; \ - } \ - xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ - offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \ - table[xy_half] (decoder->dest[0] + decoder->offset, \ - ref[0] + offset, 2 * decoder->stride, 8); \ - table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \ - ref[0] + decoder->stride + offset, \ - 2 * decoder->stride, 8); \ - table[xy_half] (decoder->dest[1] + decoder->offset, \ - ref[1] + offset, 2 * decoder->stride, 8); \ - table[xy_half] (decoder->dest[1] + decoder->stride + decoder->offset, \ - ref[1] + decoder->stride + offset, \ - 2 * decoder->stride, 8); \ - table[xy_half] (decoder->dest[2] + decoder->offset, \ - ref[2] + offset, 2 * decoder->stride, 8); \ - table[xy_half] (decoder->dest[2] + decoder->stride + decoder->offset, \ - ref[2] + decoder->stride + offset, \ - 2 * decoder->stride, 8) - -#define MOTION_ZERO_444(table,ref) \ - offset = decoder->offset + decoder->v_offset * decoder->stride; \ - table[0] (decoder->dest[0] + decoder->offset, \ - ref[0] + offset, decoder->stride, 16); \ - table[4] (decoder->dest[1] + decoder->offset, \ - ref[1] + offset, decoder->stride, 16); \ - table[4] (decoder->dest[2] + (decoder->offset >> 1), \ - ref[2] + offset, decoder->stride, 16) - -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - -static void motion_mp1 (mpeg2_decoder_t * const decoder, - motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ - int motion_x, motion_y; - unsigned int pos_x, pos_y, xy_half, offset; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = (motion->pmv[0][0] + - (get_motion_delta (decoder, - motion->f_code[0]) << motion->f_code[1])); - motion_x = bound_motion_vector (motion_x, - motion->f_code[0] + motion->f_code[1]); - motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = (motion->pmv[0][1] + - (get_motion_delta (decoder, - motion->f_code[0]) << motion->f_code[1])); - motion_y = bound_motion_vector (motion_y, - motion->f_code[0] + motion->f_code[1]); - motion->pmv[0][1] = motion_y; - - MOTION_420 (table, motion->ref[0], motion_x, motion_y, 16, 0); -} - -#define MOTION_FUNCTIONS(FORMAT,MOTION,MOTION_FIELD,MOTION_DMV,MOTION_ZERO) \ - \ -static void motion_fr_frame_##FORMAT (mpeg2_decoder_t * const decoder, \ - motion_t * const motion, \ - mpeg2_mc_fct * const * const table) \ -{ \ - int motion_x, motion_y; \ - unsigned int pos_x, pos_y, xy_half, offset; \ - \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ - motion->f_code[0]); \ - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ - \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ - motion->f_code[1]); \ - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ - motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \ - \ - MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); \ -} \ - \ -static void motion_fr_field_##FORMAT (mpeg2_decoder_t * const decoder, \ - motion_t * const motion, \ - mpeg2_mc_fct * const * const table) \ -{ \ - int motion_x, motion_y, field; \ - unsigned int pos_x, pos_y, xy_half, offset; \ - \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - field = UBITS (bit_buf, 1); \ - DUMPBITS (bit_buf, bits, 1); \ - \ - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ - motion->f_code[0]); \ - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ - motion->pmv[0][0] = motion_x; \ - \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - motion_y = ((motion->pmv[0][1] >> 1) + \ - get_motion_delta (decoder, motion->f_code[1])); \ - /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \ - motion->pmv[0][1] = motion_y << 1; \ - \ - MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); \ - \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - field = UBITS (bit_buf, 1); \ - DUMPBITS (bit_buf, bits, 1); \ - \ - motion_x = motion->pmv[1][0] + get_motion_delta (decoder, \ - motion->f_code[0]); \ - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ - motion->pmv[1][0] = motion_x; \ - \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - motion_y = ((motion->pmv[1][1] >> 1) + \ - get_motion_delta (decoder, motion->f_code[1])); \ - /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \ - motion->pmv[1][1] = motion_y << 1; \ - \ - MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); \ -} \ - \ -static void motion_fr_dmv_##FORMAT (mpeg2_decoder_t * const decoder, \ - motion_t * const motion, \ - mpeg2_mc_fct * const * const table) \ -{ \ - int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y; \ - unsigned int pos_x, pos_y, xy_half, offset; \ - \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ - motion->f_code[0]); \ - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - dmv_x = get_dmv (decoder); \ - \ - motion_y = ((motion->pmv[0][1] >> 1) + \ - get_motion_delta (decoder, motion->f_code[1])); \ - /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \ - motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; \ - dmv_y = get_dmv (decoder); \ - \ - m = decoder->top_field_first ? 1 : 3; \ - other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; \ - other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1; \ - MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); \ - \ - m = decoder->top_field_first ? 3 : 1; \ - other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; \ - other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1; \ - MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0);\ - \ - MOTION_DMV (mpeg2_mc.avg, motion->ref[0], motion_x, motion_y); \ -} \ - \ -static void motion_reuse_##FORMAT (mpeg2_decoder_t * const decoder, \ - motion_t * const motion, \ - mpeg2_mc_fct * const * const table) \ -{ \ - int motion_x, motion_y; \ - unsigned int pos_x, pos_y, xy_half, offset; \ - \ - motion_x = motion->pmv[0][0]; \ - motion_y = motion->pmv[0][1]; \ - \ - MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); \ -} \ - \ -static void motion_zero_##FORMAT (mpeg2_decoder_t * const decoder, \ - motion_t * const motion, \ - mpeg2_mc_fct * const * const table) \ -{ \ - unsigned int offset; \ - \ - motion->pmv[0][0] = motion->pmv[0][1] = 0; \ - motion->pmv[1][0] = motion->pmv[1][1] = 0; \ - \ - MOTION_ZERO (table, motion->ref[0]); \ -} \ - \ -static void motion_fi_field_##FORMAT (mpeg2_decoder_t * const decoder, \ - motion_t * const motion, \ - mpeg2_mc_fct * const * const table) \ -{ \ - int motion_x, motion_y; \ - uint8_t ** ref_field; \ - unsigned int pos_x, pos_y, xy_half, offset; \ - \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - ref_field = motion->ref2[UBITS (bit_buf, 1)]; \ - DUMPBITS (bit_buf, bits, 1); \ - \ - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ - motion->f_code[0]); \ - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ - \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ - motion->f_code[1]); \ - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ - motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \ - \ - MOTION (table, ref_field, motion_x, motion_y, 16, 0); \ -} \ - \ -static void motion_fi_16x8_##FORMAT (mpeg2_decoder_t * const decoder, \ - motion_t * const motion, \ - mpeg2_mc_fct * const * const table) \ -{ \ - int motion_x, motion_y; \ - uint8_t ** ref_field; \ - unsigned int pos_x, pos_y, xy_half, offset; \ - \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - ref_field = motion->ref2[UBITS (bit_buf, 1)]; \ - DUMPBITS (bit_buf, bits, 1); \ - \ - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ - motion->f_code[0]); \ - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ - motion->pmv[0][0] = motion_x; \ - \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ - motion->f_code[1]); \ - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ - motion->pmv[0][1] = motion_y; \ - \ - MOTION (table, ref_field, motion_x, motion_y, 8, 0); \ - \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - ref_field = motion->ref2[UBITS (bit_buf, 1)]; \ - DUMPBITS (bit_buf, bits, 1); \ - \ - motion_x = motion->pmv[1][0] + get_motion_delta (decoder, \ - motion->f_code[0]); \ - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ - motion->pmv[1][0] = motion_x; \ - \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - motion_y = motion->pmv[1][1] + get_motion_delta (decoder, \ - motion->f_code[1]); \ - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ - motion->pmv[1][1] = motion_y; \ - \ - MOTION (table, ref_field, motion_x, motion_y, 8, 8); \ -} \ - \ -static void motion_fi_dmv_##FORMAT (mpeg2_decoder_t * const decoder, \ - motion_t * const motion, \ - mpeg2_mc_fct * const * const table) \ -{ \ - int motion_x, motion_y, other_x, other_y; \ - unsigned int pos_x, pos_y, xy_half, offset; \ - \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ - motion->f_code[0]); \ - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ - NEEDBITS (bit_buf, bits, bit_ptr); \ - other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (decoder); \ - \ - motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ - motion->f_code[1]); \ - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ - motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \ - other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (decoder) + \ - decoder->dmv_offset); \ - \ - MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0); \ - MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0); \ -} \ - -MOTION_FUNCTIONS (420, MOTION_420, MOTION_FIELD_420, MOTION_DMV_420, - MOTION_ZERO_420) -MOTION_FUNCTIONS (422, MOTION_422, MOTION_FIELD_422, MOTION_DMV_422, - MOTION_ZERO_422) -MOTION_FUNCTIONS (444, MOTION_444, MOTION_FIELD_444, MOTION_DMV_444, - MOTION_ZERO_444) - -/* like motion_frame, but parsing without actual motion compensation */ -static void motion_fr_conceal (mpeg2_decoder_t * const decoder) -{ - int tmp; - - NEEDBITS (bit_buf, bits, bit_ptr); - tmp = (decoder->f_motion.pmv[0][0] + - get_motion_delta (decoder, decoder->f_motion.f_code[0])); - tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]); - decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp; - - NEEDBITS (bit_buf, bits, bit_ptr); - tmp = (decoder->f_motion.pmv[0][1] + - get_motion_delta (decoder, decoder->f_motion.f_code[1])); - tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]); - decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp; - - DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ -} - -static void motion_fi_conceal (mpeg2_decoder_t * const decoder) -{ - int tmp; - - NEEDBITS (bit_buf, bits, bit_ptr); - DUMPBITS (bit_buf, bits, 1); /* remove field_select */ - - tmp = (decoder->f_motion.pmv[0][0] + - get_motion_delta (decoder, decoder->f_motion.f_code[0])); - tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]); - decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp; - - NEEDBITS (bit_buf, bits, bit_ptr); - tmp = (decoder->f_motion.pmv[0][1] + - get_motion_delta (decoder, decoder->f_motion.f_code[1])); - tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]); - decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp; - - DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ -} - -#undef bit_buf -#undef bits -#undef bit_ptr - -#define MOTION_CALL(routine,direction) \ -do { \ - if ((direction) & MACROBLOCK_MOTION_FORWARD) \ - routine (decoder, &(decoder->f_motion), mpeg2_mc.put); \ - if ((direction) & MACROBLOCK_MOTION_BACKWARD) \ - routine (decoder, &(decoder->b_motion), \ - ((direction) & MACROBLOCK_MOTION_FORWARD ? \ - mpeg2_mc.avg : mpeg2_mc.put)); \ -} while (0) - -#define NEXT_MACROBLOCK \ -do { \ - decoder->offset += 16; \ - if (decoder->offset == decoder->width) { \ - do { /* just so we can use the break statement */ \ - if (decoder->convert) { \ - decoder->convert (decoder->convert_id, decoder->dest, \ - decoder->v_offset); \ - if (decoder->coding_type == B_TYPE) \ - break; \ - } \ - decoder->dest[0] += decoder->slice_stride; \ - decoder->dest[1] += decoder->slice_uv_stride; \ - decoder->dest[2] += decoder->slice_uv_stride; \ - } while (0); \ - decoder->v_offset += 16; \ - if (decoder->v_offset > decoder->limit_y) { \ - if (mpeg2_cpu_state_restore) \ - mpeg2_cpu_state_restore (&cpu_state); \ - return; \ - } \ - decoder->offset = 0; \ - } \ -} while (0) - -void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], - uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]) -{ - int offset, stride, height, bottom_field; - - stride = decoder->stride_frame; - bottom_field = (decoder->picture_structure == BOTTOM_FIELD); - offset = bottom_field ? stride : 0; - height = decoder->height; - - decoder->picture_dest[0] = current_fbuf[0] + offset; - decoder->picture_dest[1] = current_fbuf[1] + (offset >> 1); - decoder->picture_dest[2] = current_fbuf[2] + (offset >> 1); - - decoder->f_motion.ref[0][0] = forward_fbuf[0] + offset; - decoder->f_motion.ref[0][1] = forward_fbuf[1] + (offset >> 1); - decoder->f_motion.ref[0][2] = forward_fbuf[2] + (offset >> 1); - - decoder->b_motion.ref[0][0] = backward_fbuf[0] + offset; - decoder->b_motion.ref[0][1] = backward_fbuf[1] + (offset >> 1); - decoder->b_motion.ref[0][2] = backward_fbuf[2] + (offset >> 1); - - if (decoder->picture_structure != FRAME_PICTURE) { - decoder->dmv_offset = bottom_field ? 1 : -1; - decoder->f_motion.ref2[0] = decoder->f_motion.ref[bottom_field]; - decoder->f_motion.ref2[1] = decoder->f_motion.ref[!bottom_field]; - decoder->b_motion.ref2[0] = decoder->b_motion.ref[bottom_field]; - decoder->b_motion.ref2[1] = decoder->b_motion.ref[!bottom_field]; - offset = stride - offset; - - if (decoder->second_field && (decoder->coding_type != B_TYPE)) - forward_fbuf = current_fbuf; - - decoder->f_motion.ref[1][0] = forward_fbuf[0] + offset; - decoder->f_motion.ref[1][1] = forward_fbuf[1] + (offset >> 1); - decoder->f_motion.ref[1][2] = forward_fbuf[2] + (offset >> 1); - - decoder->b_motion.ref[1][0] = backward_fbuf[0] + offset; - decoder->b_motion.ref[1][1] = backward_fbuf[1] + (offset >> 1); - decoder->b_motion.ref[1][2] = backward_fbuf[2] + (offset >> 1); - - stride <<= 1; - height >>= 1; - } - - decoder->stride = stride; - decoder->uv_stride = stride >> 1; - decoder->slice_stride = 16 * stride; - decoder->slice_uv_stride = - decoder->slice_stride >> (2 - decoder->chroma_format); - decoder->limit_x = 2 * decoder->width - 32; - decoder->limit_y_16 = 2 * height - 32; - decoder->limit_y_8 = 2 * height - 16; - decoder->limit_y = height - 16; - - if (decoder->mpeg1) { - decoder->motion_parser[0] = motion_zero_420; - decoder->motion_parser[MC_FRAME] = motion_mp1; - decoder->motion_parser[4] = motion_reuse_420; - } else if (decoder->picture_structure == FRAME_PICTURE) { - if (decoder->chroma_format == 0) { - decoder->motion_parser[0] = motion_zero_420; - decoder->motion_parser[MC_FIELD] = motion_fr_field_420; - decoder->motion_parser[MC_FRAME] = motion_fr_frame_420; - decoder->motion_parser[MC_DMV] = motion_fr_dmv_420; - decoder->motion_parser[4] = motion_reuse_420; - } else if (decoder->chroma_format == 1) { - decoder->motion_parser[0] = motion_zero_422; - decoder->motion_parser[MC_FIELD] = motion_fr_field_422; - decoder->motion_parser[MC_FRAME] = motion_fr_frame_422; - decoder->motion_parser[MC_DMV] = motion_fr_dmv_422; - decoder->motion_parser[4] = motion_reuse_422; - } else { - decoder->motion_parser[0] = motion_zero_444; - decoder->motion_parser[MC_FIELD] = motion_fr_field_444; - decoder->motion_parser[MC_FRAME] = motion_fr_frame_444; - decoder->motion_parser[MC_DMV] = motion_fr_dmv_444; - decoder->motion_parser[4] = motion_reuse_444; - } - } else { - if (decoder->chroma_format == 0) { - decoder->motion_parser[0] = motion_zero_420; - decoder->motion_parser[MC_FIELD] = motion_fi_field_420; - decoder->motion_parser[MC_16X8] = motion_fi_16x8_420; - decoder->motion_parser[MC_DMV] = motion_fi_dmv_420; - decoder->motion_parser[4] = motion_reuse_420; - } else if (decoder->chroma_format == 1) { - decoder->motion_parser[0] = motion_zero_422; - decoder->motion_parser[MC_FIELD] = motion_fi_field_422; - decoder->motion_parser[MC_16X8] = motion_fi_16x8_422; - decoder->motion_parser[MC_DMV] = motion_fi_dmv_422; - decoder->motion_parser[4] = motion_reuse_422; - } else { - decoder->motion_parser[0] = motion_zero_444; - decoder->motion_parser[MC_FIELD] = motion_fi_field_444; - decoder->motion_parser[MC_16X8] = motion_fi_16x8_444; - decoder->motion_parser[MC_DMV] = motion_fi_dmv_444; - decoder->motion_parser[4] = motion_reuse_444; - } - } -} - -static inline int slice_init (mpeg2_decoder_t * const decoder, int code) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - int offset; - const MBAtab * mba; - - decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = - decoder->dc_dct_pred[2] = 16384; - - decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; - decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; - decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0; - decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0; - - if (decoder->vertical_position_extension) { - code += UBITS (bit_buf, 3) << 7; - DUMPBITS (bit_buf, bits, 3); - } - decoder->v_offset = (code - 1) * 16; - offset = 0; - if (!(decoder->convert) || decoder->coding_type != B_TYPE) - offset = (code - 1) * decoder->slice_stride; - - decoder->dest[0] = decoder->picture_dest[0] + offset; - offset >>= (2 - decoder->chroma_format); - decoder->dest[1] = decoder->picture_dest[1] + offset; - decoder->dest[2] = decoder->picture_dest[2] + offset; - - get_quantizer_scale (decoder); - - /* ignore intra_slice and all the extra data */ - while (bit_buf & 0x80000000) { - DUMPBITS (bit_buf, bits, 9); - NEEDBITS (bit_buf, bits, bit_ptr); - } - - /* decode initial macroblock address increment */ - offset = 0; - while (1) { - if (bit_buf >= 0x08000000) { - mba = MBA_5 + (UBITS (bit_buf, 6) - 2); - break; - } else if (bit_buf >= 0x01800000) { - mba = MBA_11 + (UBITS (bit_buf, 12) - 24); - break; - } else switch (UBITS (bit_buf, 12)) { - case 8: /* macroblock_escape */ - offset += 33; - DUMPBITS (bit_buf, bits, 11); - NEEDBITS (bit_buf, bits, bit_ptr); - continue; - case 15: /* macroblock_stuffing (MPEG1 only) */ - bit_buf &= 0xfffff; - DUMPBITS (bit_buf, bits, 11); - NEEDBITS (bit_buf, bits, bit_ptr); - continue; - default: /* error */ - return 1; - } - } - DUMPBITS (bit_buf, bits, mba->len + 1); - decoder->offset = (offset + mba->mba) << 4; - - while (decoder->offset - decoder->width >= 0) { - decoder->offset -= decoder->width; - if (!(decoder->convert) || decoder->coding_type != B_TYPE) { - decoder->dest[0] += decoder->slice_stride; - decoder->dest[1] += decoder->slice_uv_stride; - decoder->dest[2] += decoder->slice_uv_stride; - } - decoder->v_offset += 16; - } - if (decoder->v_offset > decoder->limit_y) - return 1; - - return 0; -#undef bit_buf -#undef bits -#undef bit_ptr -} - -void mpeg2_slice (mpeg2_decoder_t * const decoder, const int code, - const uint8_t * const buffer) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - cpu_state_t cpu_state; - - bitstream_init (decoder, buffer); - - if (slice_init (decoder, code)) - return; - - if (mpeg2_cpu_state_save) - mpeg2_cpu_state_save (&cpu_state); - - while (1) { - int macroblock_modes; - int mba_inc; - const MBAtab * mba; - - NEEDBITS (bit_buf, bits, bit_ptr); - - macroblock_modes = get_macroblock_modes (decoder); - - /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */ - if (macroblock_modes & MACROBLOCK_QUANT) - get_quantizer_scale (decoder); - - if (macroblock_modes & MACROBLOCK_INTRA) { - - int DCT_offset, DCT_stride; - int offset; - uint8_t * dest_y; - - if (decoder->concealment_motion_vectors) { - if (decoder->picture_structure == FRAME_PICTURE) - motion_fr_conceal (decoder); - else - motion_fi_conceal (decoder); - } else { - decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; - decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; - decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0; - decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0; - } - - if (macroblock_modes & DCT_TYPE_INTERLACED) { - DCT_offset = decoder->stride; - DCT_stride = decoder->stride * 2; - } else { - DCT_offset = decoder->stride * 8; - DCT_stride = decoder->stride; - } - - offset = decoder->offset; - dest_y = decoder->dest[0] + offset; - slice_intra_DCT (decoder, 0, dest_y, DCT_stride); - slice_intra_DCT (decoder, 0, dest_y + 8, DCT_stride); - slice_intra_DCT (decoder, 0, dest_y + DCT_offset, DCT_stride); - slice_intra_DCT (decoder, 0, dest_y + DCT_offset + 8, DCT_stride); - if (likely (decoder->chroma_format == 0)) { - slice_intra_DCT (decoder, 1, decoder->dest[1] + (offset >> 1), - decoder->uv_stride); - slice_intra_DCT (decoder, 2, decoder->dest[2] + (offset >> 1), - decoder->uv_stride); - if (decoder->coding_type == D_TYPE) { - NEEDBITS (bit_buf, bits, bit_ptr); - DUMPBITS (bit_buf, bits, 1); - } - } else if (likely (decoder->chroma_format == 1)) { - uint8_t * dest_u = decoder->dest[1] + (offset >> 1); - uint8_t * dest_v = decoder->dest[2] + (offset >> 1); - DCT_stride >>= 1; - DCT_offset >>= 1; - slice_intra_DCT (decoder, 1, dest_u, DCT_stride); - slice_intra_DCT (decoder, 2, dest_v, DCT_stride); - slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride); - slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride); - } else { - uint8_t * dest_u = decoder->dest[1] + offset; - uint8_t * dest_v = decoder->dest[2] + offset; - slice_intra_DCT (decoder, 1, dest_u, DCT_stride); - slice_intra_DCT (decoder, 2, dest_v, DCT_stride); - slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride); - slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride); - slice_intra_DCT (decoder, 1, dest_u + 8, DCT_stride); - slice_intra_DCT (decoder, 2, dest_v + 8, DCT_stride); - slice_intra_DCT (decoder, 1, dest_u + DCT_offset + 8, - DCT_stride); - slice_intra_DCT (decoder, 2, dest_v + DCT_offset + 8, - DCT_stride); - } - } else { - - motion_parser_t * parser; - - parser = - decoder->motion_parser[macroblock_modes >> MOTION_TYPE_SHIFT]; - MOTION_CALL (parser, macroblock_modes); - - if (macroblock_modes & MACROBLOCK_PATTERN) { - int coded_block_pattern; - int DCT_offset, DCT_stride; - - if (macroblock_modes & DCT_TYPE_INTERLACED) { - DCT_offset = decoder->stride; - DCT_stride = decoder->stride * 2; - } else { - DCT_offset = decoder->stride * 8; - DCT_stride = decoder->stride; - } - - coded_block_pattern = get_coded_block_pattern (decoder); - - if (likely (decoder->chroma_format == 0)) { - int offset = decoder->offset; - uint8_t * dest_y = decoder->dest[0] + offset; - if (coded_block_pattern & 1) - slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride); - if (coded_block_pattern & 2) - slice_non_intra_DCT (decoder, 0, dest_y + 8, - DCT_stride); - if (coded_block_pattern & 4) - slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset, - DCT_stride); - if (coded_block_pattern & 8) - slice_non_intra_DCT (decoder, 0, - dest_y + DCT_offset + 8, - DCT_stride); - if (coded_block_pattern & 16) - slice_non_intra_DCT (decoder, 1, - decoder->dest[1] + (offset >> 1), - decoder->uv_stride); - if (coded_block_pattern & 32) - slice_non_intra_DCT (decoder, 2, - decoder->dest[2] + (offset >> 1), - decoder->uv_stride); - } else if (likely (decoder->chroma_format == 1)) { - int offset; - uint8_t * dest_y; - - coded_block_pattern |= bit_buf & (3 << 30); - DUMPBITS (bit_buf, bits, 2); - - offset = decoder->offset; - dest_y = decoder->dest[0] + offset; - if (coded_block_pattern & 1) - slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride); - if (coded_block_pattern & 2) - slice_non_intra_DCT (decoder, 0, dest_y + 8, - DCT_stride); - if (coded_block_pattern & 4) - slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset, - DCT_stride); - if (coded_block_pattern & 8) - slice_non_intra_DCT (decoder, 0, - dest_y + DCT_offset + 8, - DCT_stride); - - DCT_stride >>= 1; - DCT_offset = (DCT_offset + offset) >> 1; - if (coded_block_pattern & 16) - slice_non_intra_DCT (decoder, 1, - decoder->dest[1] + (offset >> 1), - DCT_stride); - if (coded_block_pattern & 32) - slice_non_intra_DCT (decoder, 2, - decoder->dest[2] + (offset >> 1), - DCT_stride); - if (coded_block_pattern & (2 << 30)) - slice_non_intra_DCT (decoder, 1, - decoder->dest[1] + DCT_offset, - DCT_stride); - if (coded_block_pattern & (1 << 30)) - slice_non_intra_DCT (decoder, 2, - decoder->dest[2] + DCT_offset, - DCT_stride); - } else { - int offset; - uint8_t * dest_y, * dest_u, * dest_v; - - coded_block_pattern |= bit_buf & (63 << 26); - DUMPBITS (bit_buf, bits, 6); - - offset = decoder->offset; - dest_y = decoder->dest[0] + offset; - dest_u = decoder->dest[1] + offset; - dest_v = decoder->dest[2] + offset; - - if (coded_block_pattern & 1) - slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride); - if (coded_block_pattern & 2) - slice_non_intra_DCT (decoder, 0, dest_y + 8, - DCT_stride); - if (coded_block_pattern & 4) - slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset, - DCT_stride); - if (coded_block_pattern & 8) - slice_non_intra_DCT (decoder, 0, - dest_y + DCT_offset + 8, - DCT_stride); - - if (coded_block_pattern & 16) - slice_non_intra_DCT (decoder, 1, dest_u, DCT_stride); - if (coded_block_pattern & 32) - slice_non_intra_DCT (decoder, 2, dest_v, DCT_stride); - if (coded_block_pattern & (32 << 26)) - slice_non_intra_DCT (decoder, 1, dest_u + DCT_offset, - DCT_stride); - if (coded_block_pattern & (16 << 26)) - slice_non_intra_DCT (decoder, 2, dest_v + DCT_offset, - DCT_stride); - if (coded_block_pattern & (8 << 26)) - slice_non_intra_DCT (decoder, 1, dest_u + 8, - DCT_stride); - if (coded_block_pattern & (4 << 26)) - slice_non_intra_DCT (decoder, 2, dest_v + 8, - DCT_stride); - if (coded_block_pattern & (2 << 26)) - slice_non_intra_DCT (decoder, 1, - dest_u + DCT_offset + 8, - DCT_stride); - if (coded_block_pattern & (1 << 26)) - slice_non_intra_DCT (decoder, 2, - dest_v + DCT_offset + 8, - DCT_stride); - } - } - - decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = - decoder->dc_dct_pred[2] = 16384; - } - - NEXT_MACROBLOCK; - - NEEDBITS (bit_buf, bits, bit_ptr); - mba_inc = 0; - while (1) { - if (bit_buf >= 0x10000000) { - mba = MBA_5 + (UBITS (bit_buf, 5) - 2); - break; - } else if (bit_buf >= 0x03000000) { - mba = MBA_11 + (UBITS (bit_buf, 11) - 24); - break; - } else switch (UBITS (bit_buf, 11)) { - case 8: /* macroblock_escape */ - mba_inc += 33; - /* pass through */ - case 15: /* macroblock_stuffing (MPEG1 only) */ - DUMPBITS (bit_buf, bits, 11); - NEEDBITS (bit_buf, bits, bit_ptr); - continue; - default: /* end of slice, or error */ - if (mpeg2_cpu_state_restore) - mpeg2_cpu_state_restore (&cpu_state); - return; - } - } - DUMPBITS (bit_buf, bits, mba->len); - mba_inc += mba->mba; - - if (mba_inc) { - decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = - decoder->dc_dct_pred[2] = 16384; - - if (decoder->coding_type == P_TYPE) { - do { - MOTION_CALL (decoder->motion_parser[0], - MACROBLOCK_MOTION_FORWARD); - NEXT_MACROBLOCK; - } while (--mba_inc); - } else { - do { - MOTION_CALL (decoder->motion_parser[4], macroblock_modes); - NEXT_MACROBLOCK; - } while (--mba_inc); - } - } - } -#undef bit_buf -#undef bits -#undef bit_ptr -} diff --git a/src/libmpeg2new/libmpeg2/uyvy.c b/src/libmpeg2new/libmpeg2/uyvy.c deleted file mode 100644 index 7f107ffad..000000000 --- a/src/libmpeg2new/libmpeg2/uyvy.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - * uyvy.c - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 2003 Regis Duchesne - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#include - -#include "mpeg2.h" -#include "mpeg2convert.h" - -typedef struct { - int width; - int stride; - int chroma420; - uint8_t * out; -} convert_uyvy_t; - -static void uyvy_start (void * _id, const mpeg2_fbuf_t * fbuf, - const mpeg2_picture_t * picture, - const mpeg2_gop_t * gop) -{ - convert_uyvy_t * instance = (convert_uyvy_t *) _id; - - instance->out = fbuf->buf[0]; - instance->stride = instance->width; - if (picture->nb_fields == 1) { - if (! (picture->flags & PIC_FLAG_TOP_FIELD_FIRST)) - instance->out += 2 * instance->stride; - instance->stride <<= 1; - } -} - -#ifdef WORDS_BIGENDIAN -#define PACK(a,b,c,d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d)) -#else -#define PACK(a,b,c,d) (((d) << 24) | ((c) << 16) | ((b) << 8) | (a)) -#endif - -static void uyvy_copy (void * const _id, uint8_t * const * src, - const unsigned int v_offset) -{ - const convert_uyvy_t * const id = (convert_uyvy_t *) _id; - uint8_t * _dst; - uint8_t * py, * pu, * pv; - int i, j; - - _dst = id->out + 2 * id->stride * v_offset; - py = src[0]; pu = src[1]; pv = src[2]; - - i = 16; - do { - uint32_t * dst = (uint32_t *) _dst; - - j = id->width >> 4; - do { - dst[0] = PACK (pu[0], py[0], pv[0], py[1]); - dst[1] = PACK (pu[1], py[2], pv[1], py[3]); - dst[2] = PACK (pu[2], py[4], pv[2], py[5]); - dst[3] = PACK (pu[3], py[6], pv[3], py[7]); - dst[4] = PACK (pu[4], py[8], pv[4], py[9]); - dst[5] = PACK (pu[5], py[10], pv[5], py[11]); - dst[6] = PACK (pu[6], py[12], pv[6], py[13]); - dst[7] = PACK (pu[7], py[14], pv[7], py[15]); - py += 16; - pu += 8; - pv += 8; - dst += 8; - } while (--j); - py -= id->width; - pu -= id->width >> 1; - pv -= id->width >> 1; - _dst += 2 * id->stride; - py += id->stride; - if (! (--i & id->chroma420)) { - pu += id->stride >> 1; - pv += id->stride >> 1; - } - } while (i); -} - -int mpeg2convert_uyvy (int stage, void * _id, const mpeg2_sequence_t * seq, - int stride, uint32_t accel, void * arg, - mpeg2_convert_init_t * result) -{ - convert_uyvy_t * instance = (convert_uyvy_t *) _id; - - if (seq->chroma_width == seq->width) - return 1; - - if (instance) { - instance->width = seq->width; - instance->chroma420 = (seq->chroma_height < seq->height); - result->buf_size[0] = seq->width * seq->height * 2; - result->buf_size[1] = result->buf_size[2] = 0; - result->start = uyvy_start; - result->copy = uyvy_copy; - } else { - result->id_size = sizeof (convert_uyvy_t); - } - - return 0; -} diff --git a/src/libmpeg2new/libmpeg2/vlc.h b/src/libmpeg2new/libmpeg2/vlc.h deleted file mode 100644 index 57448ce04..000000000 --- a/src/libmpeg2new/libmpeg2/vlc.h +++ /dev/null @@ -1,429 +0,0 @@ -/* - * vlc.h - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#define GETWORD(bit_buf,shift,bit_ptr) \ -do { \ - bit_buf |= ((bit_ptr[0] << 8) | bit_ptr[1]) << (shift); \ - bit_ptr += 2; \ -} while (0) - -static inline void bitstream_init (mpeg2_decoder_t * decoder, - const uint8_t * start) -{ - decoder->bitstream_buf = - (start[0] << 24) | (start[1] << 16) | (start[2] << 8) | start[3]; - decoder->bitstream_ptr = start + 4; - decoder->bitstream_bits = -16; -} - -/* make sure that there are at least 16 valid bits in bit_buf */ -#define NEEDBITS(bit_buf,bits,bit_ptr) \ -do { \ - if (unlikely (bits > 0)) { \ - GETWORD (bit_buf, bits, bit_ptr); \ - bits -= 16; \ - } \ -} while (0) - -/* remove num valid bits from bit_buf */ -#define DUMPBITS(bit_buf,bits,num) \ -do { \ - bit_buf <<= (num); \ - bits += (num); \ -} while (0) - -/* take num bits from the high part of bit_buf and zero extend them */ -#define UBITS(bit_buf,num) (((uint32_t)(bit_buf)) >> (32 - (num))) - -/* take num bits from the high part of bit_buf and sign extend them */ -#define SBITS(bit_buf,num) (((int32_t)(bit_buf)) >> (32 - (num))) - -typedef struct { - uint8_t modes; - uint8_t len; -} MBtab; - -typedef struct { - uint8_t delta; - uint8_t len; -} MVtab; - -typedef struct { - int8_t dmv; - uint8_t len; -} DMVtab; - -typedef struct { - uint8_t cbp; - uint8_t len; -} CBPtab; - -typedef struct { - uint8_t size; - uint8_t len; -} DCtab; - -typedef struct { - uint8_t run; - uint8_t level; - uint8_t len; -} DCTtab; - -typedef struct { - uint8_t mba; - uint8_t len; -} MBAtab; - - -#define INTRA MACROBLOCK_INTRA -#define QUANT MACROBLOCK_QUANT - -static const MBtab MB_I [] = { - {INTRA|QUANT, 2}, {INTRA, 1} -}; - -#define MC MACROBLOCK_MOTION_FORWARD -#define CODED MACROBLOCK_PATTERN - -static const MBtab MB_P [] = { - {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, - {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, - {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, - {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1} -}; - -#define FWD MACROBLOCK_MOTION_FORWARD -#define BWD MACROBLOCK_MOTION_BACKWARD -#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD - -static const MBtab MB_B [] = { - {0, 6}, {INTRA|QUANT, 6}, - {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, - {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, - {INTRA, 5}, {INTRA, 5}, - {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, - {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, - {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, - {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, - {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, - {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2} -}; - -#undef INTRA -#undef QUANT -#undef MC -#undef CODED -#undef FWD -#undef BWD -#undef INTER - - -static const MVtab MV_4 [] = { - { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} -}; - -static const MVtab MV_10 [] = { - { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, - { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, - {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, - { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, - { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, - { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7} -}; - - -static const DMVtab DMV_2 [] = { - { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2} -}; - - -static const CBPtab CBP_7 [] = { - {0x11, 7}, {0x12, 7}, {0x14, 7}, {0x18, 7}, - {0x21, 7}, {0x22, 7}, {0x24, 7}, {0x28, 7}, - {0x3f, 6}, {0x3f, 6}, {0x30, 6}, {0x30, 6}, - {0x09, 6}, {0x09, 6}, {0x06, 6}, {0x06, 6}, - {0x1f, 5}, {0x1f, 5}, {0x1f, 5}, {0x1f, 5}, - {0x10, 5}, {0x10, 5}, {0x10, 5}, {0x10, 5}, - {0x2f, 5}, {0x2f, 5}, {0x2f, 5}, {0x2f, 5}, - {0x20, 5}, {0x20, 5}, {0x20, 5}, {0x20, 5}, - {0x07, 5}, {0x07, 5}, {0x07, 5}, {0x07, 5}, - {0x0b, 5}, {0x0b, 5}, {0x0b, 5}, {0x0b, 5}, - {0x0d, 5}, {0x0d, 5}, {0x0d, 5}, {0x0d, 5}, - {0x0e, 5}, {0x0e, 5}, {0x0e, 5}, {0x0e, 5}, - {0x05, 5}, {0x05, 5}, {0x05, 5}, {0x05, 5}, - {0x0a, 5}, {0x0a, 5}, {0x0a, 5}, {0x0a, 5}, - {0x03, 5}, {0x03, 5}, {0x03, 5}, {0x03, 5}, - {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, - {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4}, - {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4}, - {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4}, - {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4}, - {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, - {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, - {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, - {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, - {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, - {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3} -}; - -static const CBPtab CBP_9 [] = { - {0, 9}, {0x00, 9}, {0x39, 9}, {0x36, 9}, - {0x37, 9}, {0x3b, 9}, {0x3d, 9}, {0x3e, 9}, - {0x17, 8}, {0x17, 8}, {0x1b, 8}, {0x1b, 8}, - {0x1d, 8}, {0x1d, 8}, {0x1e, 8}, {0x1e, 8}, - {0x27, 8}, {0x27, 8}, {0x2b, 8}, {0x2b, 8}, - {0x2d, 8}, {0x2d, 8}, {0x2e, 8}, {0x2e, 8}, - {0x19, 8}, {0x19, 8}, {0x16, 8}, {0x16, 8}, - {0x29, 8}, {0x29, 8}, {0x26, 8}, {0x26, 8}, - {0x35, 8}, {0x35, 8}, {0x3a, 8}, {0x3a, 8}, - {0x33, 8}, {0x33, 8}, {0x3c, 8}, {0x3c, 8}, - {0x15, 8}, {0x15, 8}, {0x1a, 8}, {0x1a, 8}, - {0x13, 8}, {0x13, 8}, {0x1c, 8}, {0x1c, 8}, - {0x25, 8}, {0x25, 8}, {0x2a, 8}, {0x2a, 8}, - {0x23, 8}, {0x23, 8}, {0x2c, 8}, {0x2c, 8}, - {0x31, 8}, {0x31, 8}, {0x32, 8}, {0x32, 8}, - {0x34, 8}, {0x34, 8}, {0x38, 8}, {0x38, 8} -}; - - -static const DCtab DC_lum_5 [] = { - {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, - {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5} -}; - -static const DCtab DC_chrom_5 [] = { - {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, - {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5} -}; - -static const DCtab DC_long [] = { - {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, - {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, - {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6}, - {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9} -}; - - -static const DCTtab DCT_16 [] = { - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - { 2,18, 0}, { 2,17, 0}, { 2,16, 0}, { 2,15, 0}, - { 7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0}, - { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0}, - { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0} -}; - -static const DCTtab DCT_15 [] = { - { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15}, - { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15}, - { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15}, - { 2,11,15}, { 2,10,15}, { 2, 9,15}, { 2, 8,15}, - { 1,31,14}, { 1,31,14}, { 1,30,14}, { 1,30,14}, - { 1,29,14}, { 1,29,14}, { 1,28,14}, { 1,28,14}, - { 1,27,14}, { 1,27,14}, { 1,26,14}, { 1,26,14}, - { 1,25,14}, { 1,25,14}, { 1,24,14}, { 1,24,14}, - { 1,23,14}, { 1,23,14}, { 1,22,14}, { 1,22,14}, - { 1,21,14}, { 1,21,14}, { 1,20,14}, { 1,20,14}, - { 1,19,14}, { 1,19,14}, { 1,18,14}, { 1,18,14}, - { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14} -}; - -static const DCTtab DCT_13 [] = { - { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13}, - { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13}, - { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13}, - { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13}, - { 1,11,12}, { 1,11,12}, { 9, 2,12}, { 9, 2,12}, - { 5, 3,12}, { 5, 3,12}, { 1,10,12}, { 1,10,12}, - { 3, 4,12}, { 3, 4,12}, { 8, 2,12}, { 8, 2,12}, - { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12}, - { 1, 9,12}, { 1, 9,12}, { 20, 1,12}, { 20, 1,12}, - { 19, 1,12}, { 19, 1,12}, { 2, 5,12}, { 2, 5,12}, - { 4, 3,12}, { 4, 3,12}, { 1, 8,12}, { 1, 8,12}, - { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12} -}; - -static const DCTtab DCT_B14_10 [] = { - { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10}, - { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10} -}; - -static const DCTtab DCT_B14_8 [] = { - { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, - { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, - { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, - { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, - { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, - { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, - { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, - { 14, 1, 8}, { 1, 6, 8}, { 13, 1, 8}, { 12, 1, 8}, - { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8} -}; - -static const DCTtab DCT_B14AC_5 [] = { - { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, - { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, - {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2} -}; - -static const DCTtab DCT_B14DC_5 [] = { - { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, - { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1} -}; - -static const DCTtab DCT_B15_10 [] = { - { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9}, - { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9} -}; - -static const DCTtab DCT_B15_8 [] = { - { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, - { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, - { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, - { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, - { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, - { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, - { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, - { 2, 5, 8}, { 12, 1, 8}, { 1,11, 8}, { 1,10, 8}, - { 14, 1, 8}, { 13, 1, 8}, { 4, 2, 8}, { 2, 4, 8}, - { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, - { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, - { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, - { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, - { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, - { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, - { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, - { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, - { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, - { 10, 1, 7}, { 10, 1, 7}, { 2, 3, 7}, { 2, 3, 7}, - { 11, 1, 7}, { 11, 1, 7}, { 1, 8, 7}, { 1, 8, 7}, - { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8}, - { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8} -}; - - -static const MBAtab MBA_5 [] = { - {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, - {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} -}; - -static const MBAtab MBA_11 [] = { - {32, 11}, {31, 11}, {30, 11}, {29, 11}, - {28, 11}, {27, 11}, {26, 11}, {25, 11}, - {24, 11}, {23, 11}, {22, 11}, {21, 11}, - {20, 10}, {20, 10}, {19, 10}, {19, 10}, - {18, 10}, {18, 10}, {17, 10}, {17, 10}, - {16, 10}, {16, 10}, {15, 10}, {15, 10}, - {14, 8}, {14, 8}, {14, 8}, {14, 8}, - {14, 8}, {14, 8}, {14, 8}, {14, 8}, - {13, 8}, {13, 8}, {13, 8}, {13, 8}, - {13, 8}, {13, 8}, {13, 8}, {13, 8}, - {12, 8}, {12, 8}, {12, 8}, {12, 8}, - {12, 8}, {12, 8}, {12, 8}, {12, 8}, - {11, 8}, {11, 8}, {11, 8}, {11, 8}, - {11, 8}, {11, 8}, {11, 8}, {11, 8}, - {10, 8}, {10, 8}, {10, 8}, {10, 8}, - {10, 8}, {10, 8}, {10, 8}, {10, 8}, - { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, - { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} -}; diff --git a/src/libmpeg2new/xine_mpeg2new_decoder.c b/src/libmpeg2new/xine_mpeg2new_decoder.c deleted file mode 100644 index 7494791b1..000000000 --- a/src/libmpeg2new/xine_mpeg2new_decoder.c +++ /dev/null @@ -1,504 +0,0 @@ -/* - * Copyright (C) 2000-2004 the xine project - * - * This file is part of xine, a free video player. - * - * xine is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * xine is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * stuff needed to turn libmpeg2 into a xine decoder plugin - */ - - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "./include/mpeg2.h" -#include -#include -#include - - - -#define LOG -#define LOG_FRAME_ALLOC_FREE -#define LOG_ENTRY -#define LOG_FRAME_COUNTER - - -typedef struct { - video_decoder_class_t decoder_class; -} mpeg2_class_t; - -typedef struct { - uint32_t id; - vo_frame_t * img; -} img_state_t; - -typedef struct mpeg2_video_decoder_s { - video_decoder_t video_decoder; - mpeg2dec_t *mpeg2dec; - mpeg2_class_t *class; - xine_stream_t *stream; - int32_t force_aspect; - int force_pan_scan; - double ratio; - img_state_t img_state[30]; - uint32_t frame_number; - uint32_t rff_pattern; - -} mpeg2_video_decoder_t; - - -static void mpeg2_video_print_bad_state(img_state_t * img_state) { - int32_t n,m; - m=0; - for(n=0;n<30;n++) { - if (img_state[n].id>0) { - printf("%d = %u\n",n, img_state[n].id); - m++; - } - } - if (m > 3) _x_abort(); - if (m == 0) printf("NO FRAMES\n"); -} - -static void mpeg2_video_free_all(img_state_t * img_state) { - int32_t n,m; - vo_frame_t * img; - printf("libmpeg2new:free_all\n"); - for(n=0;n<30;n++) { - if (img_state[n].id>0) { - img = img_state[n].img; - img->free(img); - img_state[n].id = 0; - } - } -} - - -static void mpeg2_video_print_fbuf(const mpeg2_fbuf_t * fbuf) { - printf("%p",fbuf); - vo_frame_t * img; - if (fbuf) { - img = (vo_frame_t *) fbuf->id; - if (img) { - printf (", img=%p, (id=%d)\n", - img, img->id); - } else { - printf (", img=NULL\n"); - } - } else { - printf ("\n"); - } -} - -static void mpeg2_video_decode_data (video_decoder_t *this_gen, buf_element_t *buf_element) { - mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen; - uint8_t * current = buf_element->content; - uint8_t * end = buf_element->content + buf_element->size; - const mpeg2_info_t * info; - mpeg2_state_t state; - vo_frame_t * img; - uint32_t picture_structure; - int32_t frame_skipping; - - /* handle aspect hints from xine-dvdnav */ - if (buf_element->decoder_flags & BUF_FLAG_SPECIAL) { - if (buf_element->decoder_info[1] == BUF_SPECIAL_ASPECT) { - this->force_aspect = buf_element->decoder_info[2]; - if (buf_element->decoder_info[3] == 0x1 && buf_element->decoder_info[2] == 3) - /* letterboxing is denied, we have to do pan&scan */ - this->force_pan_scan = 1; - else - this->force_pan_scan = 0; - } - - return; - } - - if (buf_element->decoder_flags != 0) return; - -#ifdef LOG_ENTRY - printf ("libmpeg2: decode_data: enter\n"); -#endif - - mpeg2_buffer (this->mpeg2dec, current, end); - - info = mpeg2_info (this->mpeg2dec); - - while ((state = mpeg2_parse (this->mpeg2dec)) != STATE_BUFFER) { - switch (state) { - case STATE_SEQUENCE: - /* might set nb fbuf, convert format, stride */ - /* might set fbufs */ - _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_BITRATE, info->sequence->byte_rate * 8); - _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_WIDTH, info->sequence->picture_width); - _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, info->sequence->picture_height); - _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION, info->sequence->frame_period / 300); - if (this->force_aspect) info->sequence->pixel_width = this->force_aspect; - switch (info->sequence->pixel_width) { - case 3: - this->ratio = 16.0 / 9.0; - break; - case 4: - this->ratio = 2.11; - break; - case 2: - this->ratio = 4.0 / 3.0; - break; - case 1: - default: - this->ratio = (double)info->sequence->picture_width/(double)info->sequence->picture_height; - break; - } - _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_RATIO, (int)(10000*this->ratio)); - - if (info->sequence->flags & SEQ_FLAG_MPEG2) { - _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "MPEG 2 (libmpeg2new)"); - } else { - _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "MPEG 1 (libmpeg2new)"); - } - - break; - case STATE_PICTURE: - /* might skip */ - /* might set fbuf */ - if (info->current_picture->nb_fields == 1) { - picture_structure = info->current_picture->flags & PIC_FLAG_TOP_FIELD_FIRST ? VO_TOP_FIELD : VO_BOTTOM_FIELD; - } else { - picture_structure = VO_BOTH_FIELDS; - } - - img = this->stream->video_out->get_frame (this->stream->video_out, - info->sequence->picture_width, - info->sequence->picture_height, - this->ratio, - XINE_IMGFMT_YV12, - picture_structure); - this->frame_number++; -#ifdef LOG_FRAME_COUNTER - printf("libmpeg2:frame_number=%d\n",this->frame_number); -#endif - img->top_field_first = info->current_picture->flags & PIC_FLAG_TOP_FIELD_FIRST ? 1 : 0; - img->repeat_first_field = (info->current_picture->nb_fields > 2) ? 1 : 0; - img->duration=info->sequence->frame_period / 300; - if( ((this->rff_pattern & 0xff) == 0xaa || - (this->rff_pattern & 0xff) == 0x55) ) { - /* special case for ntsc 3:2 pulldown */ - img->duration += img->duration/4; - } else { - if( img->repeat_first_field ) { - img->duration = (img->duration * info->current_picture->nb_fields) / 2; - } - } - - if ((info->current_picture->flags & 7) == 1) { - img->pts=buf_element->pts; /* If an I frame, use PTS */ - } else { - img->pts=0; - } - - -#ifdef LOG_FRAME_ALLOC_FREE - printf ("libmpeg2:decode_data:get_frame xine=%p (id=%d)\n", img,img->id); -#endif - if (this->img_state[img->id].id != 0) { - printf ("libmpeg2:decode_data:get_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id].id); - _x_abort(); - } - - this->img_state[img->id].id = 1; - this->img_state[img->id].img = img; - - mpeg2_set_buf (this->mpeg2dec, img->base, img); - break; - case STATE_SLICE: - case STATE_END: -#if 0 - printf("libmpeg2:decode_data:current_fbuf="); - mpeg2_video_print_fbuf(info->current_fbuf); - printf("libmpeg2:decode_data:display_fbuf="); - mpeg2_video_print_fbuf(info->display_fbuf); - printf("libmpeg2:decode_data:discard_fbuf="); - mpeg2_video_print_fbuf(info->discard_fbuf); -#endif - /* draw current picture */ - /* might free frame buffer */ - if (info->display_fbuf && info->display_fbuf->id) { - img = (vo_frame_t *) info->display_fbuf->id; - /* this should be used to detect any special rff pattern */ - this->rff_pattern = this->rff_pattern << 1; - this->rff_pattern |= img->repeat_first_field; - -#ifdef LOG_FRAME_ALLOC_FREE - printf ("libmpeg2:decode_data:draw_frame xine=%p, fbuf=%p, id=%d \n", img, info->display_fbuf, img->id); -#endif - if (this->img_state[img->id].id != 1) { - printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id].id); - _x_abort(); - } - if (this->img_state[img->id].id == 1) { - frame_skipping = img->draw (img, this->stream); - /* FIXME: Handle skipping */ - this->img_state[img->id].id = 2; - } - - } - if (info->discard_fbuf && !info->discard_fbuf->id) { - printf ("libmpeg2:decode_data:BAD free_frame discard: xine=%p, fbuf=%p\n", info->discard_fbuf->id, info->discard_fbuf); - //_x_abort(); - } - if (info->discard_fbuf && info->discard_fbuf->id) { - img = (vo_frame_t *) info->discard_fbuf->id; -#ifdef LOG_FRAME_ALLOC_FREE - printf ("libmpeg2:decode_data:free_frame xine=%p, fbuf=%p,id=%d\n", img, info->discard_fbuf, img->id); -#endif - if (this->img_state[img->id].id != 2) { - printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id].id); - _x_abort(); - } - if (this->img_state[img->id].id == 2) { - img->free(img); - this->img_state[img->id].id = 0; - } - } -#ifdef LOG_FRAME_ALLOC_FREE - mpeg2_video_print_bad_state(this->img_state); -#endif - break; - case STATE_GOP: - break; - default: - printf("libmpeg2new: STATE unknown %d\n",state); - break; - } - - } -#ifdef LOG_ENTRY - printf ("libmpeg2: decode_data: exit\n"); -#endif - -} - -static void mpeg2_video_flush (video_decoder_t *this_gen) { - mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen; - -#ifdef LOG_ENTRY - printf ("libmpeg2: flush\n"); -#endif - -/* mpeg2_flush (&this->mpeg2); */ -} - -static void mpeg2_video_reset (video_decoder_t *this_gen) { - mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen; - int32_t state; - const mpeg2_info_t * info; - vo_frame_t * img; - int32_t frame_skipping; - -#ifdef LOG_ENTRY - printf ("libmpeg2: reset\n"); -#endif - mpeg2_reset (this->mpeg2dec, 1); /* 1 for full reset */ - mpeg2_video_free_all(this->img_state); - - -#if 0 /* This bit of code does not work yet. */ - info = mpeg2_info (this->mpeg2dec); - state = mpeg2_reset (this->mpeg2dec); - printf("reset state1:%d\n",state); - if (info->display_fbuf && info->display_fbuf->id) { - img = (vo_frame_t *) info->display_fbuf->id; - - if (this->img_state[img->id] != 1) { - printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); - _x_abort(); - } - if (this->img_state[img->id] == 1) { - frame_skipping = img->draw (img, this->stream); - /* FIXME: Handle skipping */ - this->img_state[img->id] = 2; - } - } - - if (info->discard_fbuf && !info->discard_fbuf->id) { - printf ("libmpeg2:decode_data:BAD free_frame discard_fbuf=%p\n", info->discard_fbuf); - _x_abort(); - } - if (info->discard_fbuf && info->discard_fbuf->id) { - img = (vo_frame_t *) info->discard_fbuf->id; - if (this->img_state[img->id] != 2) { - printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); - _x_abort(); - } - if (this->img_state[img->id] == 2) { - img->free(img); - this->img_state[img->id] = 0; - } - } - state = mpeg2_parse (this->mpeg2dec); - printf("reset state2:%d\n",state); - if (info->display_fbuf && info->display_fbuf->id) { - img = (vo_frame_t *) info->display_fbuf->id; - - if (this->img_state[img->id] != 1) { - printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); - _x_abort(); - } - if (this->img_state[img->id] == 1) { - frame_skipping = img->draw (img, this->stream); - /* FIXME: Handle skipping */ - this->img_state[img->id] = 2; - } - } - - if (info->discard_fbuf && !info->discard_fbuf->id) { - printf ("libmpeg2:decode_data:BAD free_frame discard_fbuf=%p\n", info->discard_fbuf); - _x_abort(); - } - if (info->discard_fbuf && info->discard_fbuf->id) { - img = (vo_frame_t *) info->discard_fbuf->id; - if (this->img_state[img->id] != 2) { - printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); - _x_abort(); - } - if (this->img_state[img->id] == 2) { - img->free(img); - this->img_state[img->id] = 0; - } - } - state = mpeg2_parse (this->mpeg2dec); - printf("reset state3:%d\n",state); - if (info->display_fbuf && info->display_fbuf->id) { - img = (vo_frame_t *) info->display_fbuf->id; - - if (this->img_state[img->id] != 1) { - printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); - _x_abort(); - } - if (this->img_state[img->id] == 1) { - frame_skipping = img->draw (img, this->stream); - /* FIXME: Handle skipping */ - this->img_state[img->id] = 2; - } - } - - if (info->discard_fbuf && !info->discard_fbuf->id) { - printf ("libmpeg2:decode_data:BAD free_frame discard_fbuf=%p\n", info->discard_fbuf); - _x_abort(); - } - if (info->discard_fbuf && info->discard_fbuf->id) { - img = (vo_frame_t *) info->discard_fbuf->id; - if (this->img_state[img->id] != 2) { - printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); - _x_abort(); - } - if (this->img_state[img->id] == 2) { - img->free(img); - this->img_state[img->id] = 0; - } - } -#endif - -} - -static void mpeg2_video_discontinuity (video_decoder_t *this_gen) { - mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen; - -#ifdef LOG_ENTRY - printf ("libmpeg2: dicontinuity\n"); -#endif -/* mpeg2_discontinuity (&this->mpeg2dec); */ -} - -static void mpeg2_video_dispose (video_decoder_t *this_gen) { - - mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen; - -#ifdef LOG_ENTRY - printf ("libmpeg2: close\n"); -#endif - - mpeg2_close (this->mpeg2dec); - - this->stream->video_out->close(this->stream->video_out, this->stream); - - free (this); -} - -static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) { - mpeg2_video_decoder_t *this ; - int32_t n; - - this = (mpeg2_video_decoder_t *) calloc(1, sizeof(mpeg2_video_decoder_t)); - - this->video_decoder.decode_data = mpeg2_video_decode_data; - this->video_decoder.flush = mpeg2_video_flush; - this->video_decoder.reset = mpeg2_video_reset; - this->video_decoder.discontinuity = mpeg2_video_discontinuity; - this->video_decoder.dispose = mpeg2_video_dispose; - this->stream = stream; - this->class = (mpeg2_class_t *) class_gen; - this->frame_number=0; - this->rff_pattern=0; - - this->mpeg2dec = mpeg2_init (); - mpeg2_custom_fbuf (this->mpeg2dec, 1); /* <- Force libmpeg2 to use xine frame buffers. */ - (stream->video_out->open) (stream->video_out, stream); - this->force_aspect = this->force_pan_scan = 0; - for(n=0;n<30;n++) this->img_state[n].id=0; - - return &this->video_decoder; -} - -/* - * mpeg2 plugin class - */ -static void *init_plugin (xine_t *xine, void *data) { - - mpeg2_class_t *this; - - this = (mpeg2_class_t *) calloc(1, sizeof(mpeg2_class_t)); - - this->decoder_class.open_plugin = open_plugin; - this->decoder_class.identifier = "mpeg2new"; - this->decoder_class.description = N_("mpeg2 based video decoder plugin"); - this->decoder_class.dispose = default_video_decoder_class_dispose; - - return this; -} -/* - * exported plugin catalog entry - */ - -static uint32_t supported_types[] = { BUF_VIDEO_MPEG, 0 }; - -static decoder_info_t dec_info_mpeg2 = { - supported_types, /* supported types */ - 6 /* priority */ -}; - -plugin_info_t xine_plugin_info[] = { - /* type, API, "name", version, special_info, init_function */ - { PLUGIN_VIDEO_DECODER, 19, "mpeg2new", XINE_VERSION_CODE, &dec_info_mpeg2, init_plugin }, - { PLUGIN_NONE, 0, "", 0, NULL, NULL } -}; diff --git a/src/video_dec/libmpeg2new/Makefile.am b/src/video_dec/libmpeg2new/Makefile.am new file mode 100644 index 000000000..8c248fdcb --- /dev/null +++ b/src/video_dec/libmpeg2new/Makefile.am @@ -0,0 +1,12 @@ +include $(top_srcdir)/misc/Makefile.common + +AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG) +AM_LDFLAGS = $(xineplug_ldflags) + +SUBDIRS = libmpeg2 + +xineplug_LTLIBRARIES = xineplug_decode_mpeg2.la + +xineplug_decode_mpeg2_la_SOURCES = xine_mpeg2_decoder.c +xineplug_decode_mpeg2_la_LIBADD = $(XINE_LIB) ./libmpeg2/libmpeg2.la +xineplug_decode_mpeg2_la_CFLAGS = $(AM_CFLAGS) $(MLIB_CFLAGS) diff --git a/src/video_dec/libmpeg2new/include/Makefile.am b/src/video_dec/libmpeg2new/include/Makefile.am new file mode 100644 index 000000000..302d01cb1 --- /dev/null +++ b/src/video_dec/libmpeg2new/include/Makefile.am @@ -0,0 +1,3 @@ +pkginclude_HEADERS = mpeg2.h mpeg2convert.h + +EXTRA_DIST = video_out.h mmx.h alpha_asm.h vis.h attributes.h tendra.h diff --git a/src/video_dec/libmpeg2new/include/alpha_asm.h b/src/video_dec/libmpeg2new/include/alpha_asm.h new file mode 100644 index 000000000..bf1081f24 --- /dev/null +++ b/src/video_dec/libmpeg2new/include/alpha_asm.h @@ -0,0 +1,181 @@ +/* + * Alpha assembly macros + * Copyright (c) 2002-2003 Falk Hueffner + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef ALPHA_ASM_H +#define ALPHA_ASM_H + +#include + +#if defined __GNUC__ +# define GNUC_PREREQ(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#else +# define GNUC_PREREQ(maj, min) 0 +#endif + +#define AMASK_BWX (1 << 0) +#define AMASK_FIX (1 << 1) +#define AMASK_CIX (1 << 2) +#define AMASK_MVI (1 << 8) + +#ifdef __alpha_bwx__ +# define HAVE_BWX() 1 +#else +# define HAVE_BWX() (amask(AMASK_BWX) == 0) +#endif +#ifdef __alpha_fix__ +# define HAVE_FIX() 1 +#else +# define HAVE_FIX() (amask(AMASK_FIX) == 0) +#endif +#ifdef __alpha_max__ +# define HAVE_MVI() 1 +#else +# define HAVE_MVI() (amask(AMASK_MVI) == 0) +#endif +#ifdef __alpha_cix__ +# define HAVE_CIX() 1 +#else +# define HAVE_CIX() (amask(AMASK_CIX) == 0) +#endif + +inline static uint64_t BYTE_VEC(uint64_t x) +{ + x |= x << 8; + x |= x << 16; + x |= x << 32; + return x; +} +inline static uint64_t WORD_VEC(uint64_t x) +{ + x |= x << 16; + x |= x << 32; + return x; +} + +#define ldq(p) (*(const uint64_t *) (p)) +#define ldl(p) (*(const int32_t *) (p)) +#define stl(l, p) do { *(uint32_t *) (p) = (l); } while (0) +#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0) +#define sextw(x) ((int16_t) (x)) + +#ifdef __GNUC__ +struct unaligned_long { uint64_t l; } __attribute__((packed)); +#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul)) +#define uldq(a) (((const struct unaligned_long *) (a))->l) + +#if GNUC_PREREQ(3,3) +#define prefetch(p) __builtin_prefetch((p), 0, 1) +#define prefetch_en(p) __builtin_prefetch((p), 0, 0) +#define prefetch_m(p) __builtin_prefetch((p), 1, 1) +#define prefetch_men(p) __builtin_prefetch((p), 1, 0) +#define cmpbge __builtin_alpha_cmpbge +/* Avoid warnings. */ +#define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b)) +#define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b)) +#define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b)) +#define zap __builtin_alpha_zap +#define zapnot __builtin_alpha_zapnot +#define amask __builtin_alpha_amask +#define implver __builtin_alpha_implver +#define rpcc __builtin_alpha_rpcc +#else +#define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory") +#define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory") +#define prefetch_m(p) asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory") +#define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory") +#define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define extql(a, b) ({ uint64_t __r; asm ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define extwl(a, b) ({ uint64_t __r; asm ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; }) +#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; }) +#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; }) +#endif +#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory") + +#if GNUC_PREREQ(3,3) && defined(__alpha_max__) +#define minub8 __builtin_alpha_minub8 +#define minsb8 __builtin_alpha_minsb8 +#define minuw4 __builtin_alpha_minuw4 +#define minsw4 __builtin_alpha_minsw4 +#define maxub8 __builtin_alpha_maxub8 +#define maxsb8 __builtin_alpha_maxsb8 +#define maxuw4 __builtin_alpha_maxuw4 +#define maxsw4 __builtin_alpha_maxsw4 +#define perr __builtin_alpha_perr +#define pklb __builtin_alpha_pklb +#define pkwb __builtin_alpha_pkwb +#define unpkbl __builtin_alpha_unpkbl +#define unpkbw __builtin_alpha_unpkbw +#else +#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define minuw4(a, b) ({ uint64_t __r; asm (".arch ev6; minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define minsw4(a, b) ({ uint64_t __r; asm (".arch ev6; minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxub8(a, b) ({ uint64_t __r; asm (".arch ev6; maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxsb8(a, b) ({ uint64_t __r; asm (".arch ev6; maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define perr(a, b) ({ uint64_t __r; asm (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; }) +#define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#endif + +#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */ + +#include +#define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a) +#define uldq(a) (*(const __unaligned uint64_t *) (a)) +#define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b) +#define extql(a, b) asm ("extql %a0,%a1,%v0", a, b) +#define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b) +#define extqh(a, b) asm ("extqh %a0,%a1,%v0", a, b) +#define zap(a, b) asm ("zap %a0,%a1,%v0", a, b) +#define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b) +#define amask(a) asm ("amask %a0,%v0", a) +#define implver() asm ("implver %v0") +#define rpcc() asm ("rpcc %v0") +#define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b) +#define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b) +#define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b) +#define minsw4(a, b) asm ("minsw4 %a0,%a1,%v0", a, b) +#define maxub8(a, b) asm ("maxub8 %a0,%a1,%v0", a, b) +#define maxsb8(a, b) asm ("maxsb8 %a0,%a1,%v0", a, b) +#define maxuw4(a, b) asm ("maxuw4 %a0,%a1,%v0", a, b) +#define maxsw4(a, b) asm ("maxsw4 %a0,%a1,%v0", a, b) +#define perr(a, b) asm ("perr %a0,%a1,%v0", a, b) +#define pklb(a) asm ("pklb %a0,%v0", a) +#define pkwb(a) asm ("pkwb %a0,%v0", a) +#define unpkbl(a) asm ("unpkbl %a0,%v0", a) +#define unpkbw(a) asm ("unpkbw %a0,%v0", a) +#define wh64(a) asm ("wh64 %a0", a) + +#else +#error "Unknown compiler!" +#endif + +#endif /* ALPHA_ASM_H */ diff --git a/src/video_dec/libmpeg2new/include/attributes.h b/src/video_dec/libmpeg2new/include/attributes.h new file mode 100644 index 000000000..eefbc0dd1 --- /dev/null +++ b/src/video_dec/libmpeg2new/include/attributes.h @@ -0,0 +1,37 @@ +/* + * attributes.h + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* use gcc attribs to align critical data structures */ +#ifdef ATTRIBUTE_ALIGNED_MAX +#define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((ATTRIBUTE_ALIGNED_MAX < align) ? ATTRIBUTE_ALIGNED_MAX : align))) +#else +#define ATTR_ALIGN(align) +#endif + +#ifdef HAVE_BUILTIN_EXPECT +#define likely(x) __builtin_expect ((x) != 0, 1) +#define unlikely(x) __builtin_expect ((x) != 0, 0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif diff --git a/src/video_dec/libmpeg2new/include/mmx.h b/src/video_dec/libmpeg2new/include/mmx.h new file mode 100644 index 000000000..08b4d4776 --- /dev/null +++ b/src/video_dec/libmpeg2new/include/mmx.h @@ -0,0 +1,263 @@ +/* + * mmx.h + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * The type of an value that fits in an MMX register (note that long + * long constant values MUST be suffixed by LL and unsigned long long + * values by ULL, lest they be truncated by the compiler) + */ + +typedef union { + long long q; /* Quadword (64-bit) value */ + unsigned long long uq; /* Unsigned Quadword */ + int d[2]; /* 2 Doubleword (32-bit) values */ + unsigned int ud[2]; /* 2 Unsigned Doubleword */ + short w[4]; /* 4 Word (16-bit) values */ + unsigned short uw[4]; /* 4 Unsigned Word */ + char b[8]; /* 8 Byte (8-bit) values */ + unsigned char ub[8]; /* 8 Unsigned Byte */ + float s[2]; /* Single-precision (32-bit) value */ +} ATTR_ALIGN(8) mmx_t; /* On an 8-byte (64-bit) boundary */ + + +#define mmx_i2r(op,imm,reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "i" (imm) ) + +#define mmx_m2r(op,mem,reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "m" (mem)) + +#define mmx_r2m(op,reg,mem) \ + __asm__ __volatile__ (#op " %%" #reg ", %0" \ + : "=m" (mem) \ + : /* nothing */ ) + +#define mmx_r2r(op,regs,regd) \ + __asm__ __volatile__ (#op " %" #regs ", %" #regd) + + +#define emms() __asm__ __volatile__ ("emms") + +#define movd_m2r(var,reg) mmx_m2r (movd, var, reg) +#define movd_r2m(reg,var) mmx_r2m (movd, reg, var) +#define movd_v2r(var,reg) __asm__ __volatile__ ("movd %0, %%" #reg \ + : /* nothing */ \ + : "rm" (var)) +#define movd_r2v(reg,var) __asm__ __volatile__ ("movd %%" #reg ", %0" \ + : "=rm" (var) \ + : /* nothing */ ) + +#define movq_m2r(var,reg) mmx_m2r (movq, var, reg) +#define movq_r2m(reg,var) mmx_r2m (movq, reg, var) +#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd) + +#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg) +#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd) +#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg) +#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd) + +#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg) +#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd) + +#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg) +#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd) +#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg) +#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd) +#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg) +#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd) + +#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg) +#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd) +#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg) +#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd) + +#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg) +#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd) +#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg) +#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd) + +#define pand_m2r(var,reg) mmx_m2r (pand, var, reg) +#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd) + +#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg) +#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd) + +#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg) +#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd) +#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg) +#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd) +#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg) +#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd) + +#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg) +#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd) +#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg) +#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd) +#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg) +#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd) + +#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg) +#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd) + +#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg) +#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd) + +#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg) +#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd) + +#define por_m2r(var,reg) mmx_m2r (por, var, reg) +#define por_r2r(regs,regd) mmx_r2r (por, regs, regd) + +#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg) +#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg) +#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd) +#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg) +#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg) +#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd) +#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg) +#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg) +#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd) + +#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg) +#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg) +#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd) +#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg) +#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg) +#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd) + +#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg) +#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg) +#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd) +#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg) +#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg) +#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd) +#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg) +#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg) +#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd) + +#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg) +#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd) +#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg) +#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd) +#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg) +#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd) + +#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg) +#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd) +#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg) +#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd) + +#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg) +#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd) +#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg) +#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd) + +#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg) +#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd) +#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg) +#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd) +#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg) +#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd) + +#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg) +#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd) +#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg) +#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd) +#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg) +#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd) + +#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg) +#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd) + + +/* 3DNOW extensions */ + +#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg) +#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd) + + +/* AMD MMX extensions - also available in intel SSE */ + + +#define mmx_m2ri(op,mem,reg,imm) \ + __asm__ __volatile__ (#op " %1, %0, %%" #reg \ + : /* nothing */ \ + : "m" (mem), "i" (imm)) + +#define mmx_r2ri(op,regs,regd,imm) \ + __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ + : /* nothing */ \ + : "i" (imm) ) + +#define mmx_fetch(mem,hint) \ + __asm__ __volatile__ ("prefetch" #hint " %0" \ + : /* nothing */ \ + : "m" (mem)) + + +#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg) + +#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var) + +#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg) +#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd) +#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg) +#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd) + +#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm) + +#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm) + +#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg) +#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd) + +#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg) +#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd) + +#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg) +#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd) + +#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg) +#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd) + +#define pmovmskb(mmreg,reg) \ + __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) + +#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg) +#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd) + +#define prefetcht0(mem) mmx_fetch (mem, t0) +#define prefetcht1(mem) mmx_fetch (mem, t1) +#define prefetcht2(mem) mmx_fetch (mem, t2) +#define prefetchnta(mem) mmx_fetch (mem, nta) + +#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg) +#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd) + +#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm) +#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm) + +#define sfence() __asm__ __volatile__ ("sfence\n\t") diff --git a/src/video_dec/libmpeg2new/include/mpeg2.h b/src/video_dec/libmpeg2new/include/mpeg2.h new file mode 100644 index 000000000..6c1a3805b --- /dev/null +++ b/src/video_dec/libmpeg2new/include/mpeg2.h @@ -0,0 +1,202 @@ +/* + * mpeg2.h + * Copyright (C) 2000-2004 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef MPEG2_H +#define MPEG2_H + +#define MPEG2_VERSION(a,b,c) (((a)<<16)|((b)<<8)|(c)) +#define MPEG2_RELEASE MPEG2_VERSION (0, 4, 1) /* 0.4.1 */ + +#define SEQ_FLAG_MPEG2 1 +#define SEQ_FLAG_CONSTRAINED_PARAMETERS 2 +#define SEQ_FLAG_PROGRESSIVE_SEQUENCE 4 +#define SEQ_FLAG_LOW_DELAY 8 +#define SEQ_FLAG_COLOUR_DESCRIPTION 16 + +#define SEQ_MASK_VIDEO_FORMAT 0xe0 +#define SEQ_VIDEO_FORMAT_COMPONENT 0 +#define SEQ_VIDEO_FORMAT_PAL 0x20 +#define SEQ_VIDEO_FORMAT_NTSC 0x40 +#define SEQ_VIDEO_FORMAT_SECAM 0x60 +#define SEQ_VIDEO_FORMAT_MAC 0x80 +#define SEQ_VIDEO_FORMAT_UNSPECIFIED 0xa0 + +typedef struct mpeg2_sequence_s { + unsigned int width, height; + unsigned int chroma_width, chroma_height; + unsigned int byte_rate; + unsigned int vbv_buffer_size; + uint32_t flags; + + unsigned int picture_width, picture_height; + unsigned int display_width, display_height; + unsigned int pixel_width, pixel_height; + unsigned int frame_period; + + uint8_t profile_level_id; + uint8_t colour_primaries; + uint8_t transfer_characteristics; + uint8_t matrix_coefficients; +} mpeg2_sequence_t; + +#define GOP_FLAG_DROP_FRAME 1 +#define GOP_FLAG_BROKEN_LINK 2 +#define GOP_FLAG_CLOSED_GOP 4 + +typedef struct mpeg2_gop_s { + uint8_t hours; + uint8_t minutes; + uint8_t seconds; + uint8_t pictures; + uint32_t flags; +} mpeg2_gop_t; + +#define PIC_MASK_CODING_TYPE 7 +#define PIC_FLAG_CODING_TYPE_I 1 +#define PIC_FLAG_CODING_TYPE_P 2 +#define PIC_FLAG_CODING_TYPE_B 3 +#define PIC_FLAG_CODING_TYPE_D 4 + +#define PIC_FLAG_TOP_FIELD_FIRST 8 +#define PIC_FLAG_PROGRESSIVE_FRAME 16 +#define PIC_FLAG_COMPOSITE_DISPLAY 32 +#define PIC_FLAG_SKIP 64 +#define PIC_FLAG_TAGS 128 +#define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000 + +typedef struct mpeg2_picture_s { + unsigned int temporal_reference; + unsigned int nb_fields; + uint32_t tag, tag2; + uint32_t flags; + struct { + int x, y; + } display_offset[3]; +} mpeg2_picture_t; + +typedef struct mpeg2_fbuf_s { + uint8_t * buf[3]; + void * id; +} mpeg2_fbuf_t; + +typedef struct mpeg2_info_s { + const mpeg2_sequence_t * sequence; + const mpeg2_gop_t * gop; + const mpeg2_picture_t * current_picture; + const mpeg2_picture_t * current_picture_2nd; + const mpeg2_fbuf_t * current_fbuf; + const mpeg2_picture_t * display_picture; + const mpeg2_picture_t * display_picture_2nd; + const mpeg2_fbuf_t * display_fbuf; + const mpeg2_fbuf_t * discard_fbuf; + const uint8_t * user_data; + unsigned int user_data_len; +} mpeg2_info_t; + +typedef struct mpeg2dec_s mpeg2dec_t; +typedef struct mpeg2_decoder_s mpeg2_decoder_t; + +typedef enum { + STATE_BUFFER = 0, + STATE_SEQUENCE = 1, + STATE_SEQUENCE_REPEATED = 2, + STATE_SEQUENCE_MODIFIED = 3, + STATE_GOP = 4, + STATE_PICTURE = 5, + STATE_SLICE_1ST = 6, + STATE_PICTURE_2ND = 7, + STATE_SLICE = 8, + STATE_END = 9, + STATE_INVALID = 10, + STATE_INVALID_END = 11 +} mpeg2_state_t; + +typedef struct mpeg2_convert_init_s { + unsigned int id_size; + unsigned int buf_size[3]; + void (* start) (void * id, const mpeg2_fbuf_t * fbuf, + const mpeg2_picture_t * picture, const mpeg2_gop_t * gop); + void (* copy) (void * id, uint8_t * const * src, unsigned int v_offset); +} mpeg2_convert_init_t; +typedef enum { + MPEG2_CONVERT_SET = 0, + MPEG2_CONVERT_STRIDE = 1, + MPEG2_CONVERT_START = 2 +} mpeg2_convert_stage_t; +typedef int mpeg2_convert_t (int stage, void * id, + const mpeg2_sequence_t * sequence, int stride, + uint32_t accel, void * arg, + mpeg2_convert_init_t * result); +int mpeg2_convert (mpeg2dec_t * mpeg2dec, mpeg2_convert_t convert, void * arg); +int mpeg2_stride (mpeg2dec_t * mpeg2dec, int stride); +void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id); +void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf); + +#define MPEG2_ACCEL_X86_MMX 1 +#define MPEG2_ACCEL_X86_3DNOW 2 +#define MPEG2_ACCEL_X86_MMXEXT 4 +#define MPEG2_ACCEL_X86_SSE2 8 +#define MPEG2_ACCEL_X86_SSE3 16 +#define MPEG2_ACCEL_PPC_ALTIVEC 1 +#define MPEG2_ACCEL_ALPHA 1 +#define MPEG2_ACCEL_ALPHA_MVI 2 +#define MPEG2_ACCEL_SPARC_VIS 1 +#define MPEG2_ACCEL_SPARC_VIS2 2 +#define MPEG2_ACCEL_DETECT 0x80000000 + +uint32_t mpeg2_accel (uint32_t accel); +mpeg2dec_t * mpeg2_init (void); +const mpeg2_info_t * mpeg2_info (mpeg2dec_t * mpeg2dec); +void mpeg2_close (mpeg2dec_t * mpeg2dec); + +void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end); +int mpeg2_getpos (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec); + +void mpeg2_reset (mpeg2dec_t * mpeg2dec, int full_reset); +void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip); +void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end); + +void mpeg2_tag_picture (mpeg2dec_t * mpeg2dec, uint32_t tag, uint32_t tag2); + +void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], + uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]); +void mpeg2_slice (mpeg2_decoder_t * decoder, int code, const uint8_t * buffer); +int mpeg2_guess_aspect (const mpeg2_sequence_t * sequence, + unsigned int * pixel_width, + unsigned int * pixel_height); + +typedef enum { + MPEG2_ALLOC_MPEG2DEC = 0, + MPEG2_ALLOC_CHUNK = 1, + MPEG2_ALLOC_YUV = 2, + MPEG2_ALLOC_CONVERT_ID = 3, + MPEG2_ALLOC_CONVERTED = 4 +} mpeg2_alloc_t; + +void * mpeg2_malloc (unsigned size, mpeg2_alloc_t reason); +void mpeg2_free (void * buf); +void mpeg2_malloc_hooks (void * malloc (unsigned, mpeg2_alloc_t), + int free (void *)); + +#endif /* MPEG2_H */ diff --git a/src/video_dec/libmpeg2new/include/mpeg2convert.h b/src/video_dec/libmpeg2new/include/mpeg2convert.h new file mode 100644 index 000000000..aac5d1991 --- /dev/null +++ b/src/video_dec/libmpeg2new/include/mpeg2convert.h @@ -0,0 +1,48 @@ +/* + * mpeg2convert.h + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef MPEG2CONVERT_H +#define MPEG2CONVERT_H + +mpeg2_convert_t mpeg2convert_rgb32; +mpeg2_convert_t mpeg2convert_rgb24; +mpeg2_convert_t mpeg2convert_rgb16; +mpeg2_convert_t mpeg2convert_rgb15; +mpeg2_convert_t mpeg2convert_rgb8; +mpeg2_convert_t mpeg2convert_bgr32; +mpeg2_convert_t mpeg2convert_bgr24; +mpeg2_convert_t mpeg2convert_bgr16; +mpeg2_convert_t mpeg2convert_bgr15; +mpeg2_convert_t mpeg2convert_bgr8; + +typedef enum { + MPEG2CONVERT_RGB = 0, + MPEG2CONVERT_BGR = 1 +} mpeg2convert_rgb_order_t; + +mpeg2_convert_t * mpeg2convert_rgb (mpeg2convert_rgb_order_t order, + unsigned int bpp); + +mpeg2_convert_t mpeg2convert_uyvy; + +#endif /* MPEG2CONVERT_H */ diff --git a/src/video_dec/libmpeg2new/include/sse.h b/src/video_dec/libmpeg2new/include/sse.h new file mode 100644 index 000000000..4bd853f8b --- /dev/null +++ b/src/video_dec/libmpeg2new/include/sse.h @@ -0,0 +1,256 @@ +/* + * sse.h + * Copyright (C) 1999-2003 R. Fisher + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +typedef union { + float sf[4]; /* Single-precision (32-bit) value */ +} ATTR_ALIGN(16) sse_t; /* On a 16 byte (128-bit) boundary */ + + +#define sse_i2r(op, imm, reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "X" (imm) ) + +#define sse_m2r(op, mem, reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "X" (mem)) + +#define sse_r2m(op, reg, mem) \ + __asm__ __volatile__ (#op " %%" #reg ", %0" \ + : "=X" (mem) \ + : /* nothing */ ) + +#define sse_r2r(op, regs, regd) \ + __asm__ __volatile__ (#op " %" #regs ", %" #regd) + +#define sse_r2ri(op, regs, regd, imm) \ + __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ + : /* nothing */ \ + : "X" (imm) ) + +#define sse_m2ri(op, mem, reg, subop) \ + __asm__ __volatile__ (#op " %0, %%" #reg ", " #subop \ + : /* nothing */ \ + : "X" (mem)) + + +#define movaps_m2r(var, reg) sse_m2r(movaps, var, reg) +#define movaps_r2m(reg, var) sse_r2m(movaps, reg, var) +#define movaps_r2r(regs, regd) sse_r2r(movaps, regs, regd) + +#define movntps_r2m(xmmreg, var) sse_r2m(movntps, xmmreg, var) + +#define movups_m2r(var, reg) sse_m2r(movups, var, reg) +#define movups_r2m(reg, var) sse_r2m(movups, reg, var) +#define movups_r2r(regs, regd) sse_r2r(movups, regs, regd) + +#define movhlps_r2r(regs, regd) sse_r2r(movhlps, regs, regd) + +#define movlhps_r2r(regs, regd) sse_r2r(movlhps, regs, regd) + +#define movhps_m2r(var, reg) sse_m2r(movhps, var, reg) +#define movhps_r2m(reg, var) sse_r2m(movhps, reg, var) + +#define movlps_m2r(var, reg) sse_m2r(movlps, var, reg) +#define movlps_r2m(reg, var) sse_r2m(movlps, reg, var) + +#define movss_m2r(var, reg) sse_m2r(movss, var, reg) +#define movss_r2m(reg, var) sse_r2m(movss, reg, var) +#define movss_r2r(regs, regd) sse_r2r(movss, regs, regd) + +#define shufps_m2r(var, reg, index) sse_m2ri(shufps, var, reg, index) +#define shufps_r2r(regs, regd, index) sse_r2ri(shufps, regs, regd, index) + +#define cvtpi2ps_m2r(var, xmmreg) sse_m2r(cvtpi2ps, var, xmmreg) +#define cvtpi2ps_r2r(mmreg, xmmreg) sse_r2r(cvtpi2ps, mmreg, xmmreg) + +#define cvtps2pi_m2r(var, mmreg) sse_m2r(cvtps2pi, var, mmreg) +#define cvtps2pi_r2r(xmmreg, mmreg) sse_r2r(cvtps2pi, mmreg, xmmreg) + +#define cvttps2pi_m2r(var, mmreg) sse_m2r(cvttps2pi, var, mmreg) +#define cvttps2pi_r2r(xmmreg, mmreg) sse_r2r(cvttps2pi, mmreg, xmmreg) + +#define cvtsi2ss_m2r(var, xmmreg) sse_m2r(cvtsi2ss, var, xmmreg) +#define cvtsi2ss_r2r(reg, xmmreg) sse_r2r(cvtsi2ss, reg, xmmreg) + +#define cvtss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg) +#define cvtss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg) + +#define cvttss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg) +#define cvttss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg) + +#define movmskps(xmmreg, reg) \ + __asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg) + +#define addps_m2r(var, reg) sse_m2r(addps, var, reg) +#define addps_r2r(regs, regd) sse_r2r(addps, regs, regd) + +#define addss_m2r(var, reg) sse_m2r(addss, var, reg) +#define addss_r2r(regs, regd) sse_r2r(addss, regs, regd) + +#define subps_m2r(var, reg) sse_m2r(subps, var, reg) +#define subps_r2r(regs, regd) sse_r2r(subps, regs, regd) + +#define subss_m2r(var, reg) sse_m2r(subss, var, reg) +#define subss_r2r(regs, regd) sse_r2r(subss, regs, regd) + +#define mulps_m2r(var, reg) sse_m2r(mulps, var, reg) +#define mulps_r2r(regs, regd) sse_r2r(mulps, regs, regd) + +#define mulss_m2r(var, reg) sse_m2r(mulss, var, reg) +#define mulss_r2r(regs, regd) sse_r2r(mulss, regs, regd) + +#define divps_m2r(var, reg) sse_m2r(divps, var, reg) +#define divps_r2r(regs, regd) sse_r2r(divps, regs, regd) + +#define divss_m2r(var, reg) sse_m2r(divss, var, reg) +#define divss_r2r(regs, regd) sse_r2r(divss, regs, regd) + +#define rcpps_m2r(var, reg) sse_m2r(rcpps, var, reg) +#define rcpps_r2r(regs, regd) sse_r2r(rcpps, regs, regd) + +#define rcpss_m2r(var, reg) sse_m2r(rcpss, var, reg) +#define rcpss_r2r(regs, regd) sse_r2r(rcpss, regs, regd) + +#define rsqrtps_m2r(var, reg) sse_m2r(rsqrtps, var, reg) +#define rsqrtps_r2r(regs, regd) sse_r2r(rsqrtps, regs, regd) + +#define rsqrtss_m2r(var, reg) sse_m2r(rsqrtss, var, reg) +#define rsqrtss_r2r(regs, regd) sse_r2r(rsqrtss, regs, regd) + +#define sqrtps_m2r(var, reg) sse_m2r(sqrtps, var, reg) +#define sqrtps_r2r(regs, regd) sse_r2r(sqrtps, regs, regd) + +#define sqrtss_m2r(var, reg) sse_m2r(sqrtss, var, reg) +#define sqrtss_r2r(regs, regd) sse_r2r(sqrtss, regs, regd) + +#define andps_m2r(var, reg) sse_m2r(andps, var, reg) +#define andps_r2r(regs, regd) sse_r2r(andps, regs, regd) + +#define andnps_m2r(var, reg) sse_m2r(andnps, var, reg) +#define andnps_r2r(regs, regd) sse_r2r(andnps, regs, regd) + +#define orps_m2r(var, reg) sse_m2r(orps, var, reg) +#define orps_r2r(regs, regd) sse_r2r(orps, regs, regd) + +#define xorps_m2r(var, reg) sse_m2r(xorps, var, reg) +#define xorps_r2r(regs, regd) sse_r2r(xorps, regs, regd) + +#define maxps_m2r(var, reg) sse_m2r(maxps, var, reg) +#define maxps_r2r(regs, regd) sse_r2r(maxps, regs, regd) + +#define maxss_m2r(var, reg) sse_m2r(maxss, var, reg) +#define maxss_r2r(regs, regd) sse_r2r(maxss, regs, regd) + +#define minps_m2r(var, reg) sse_m2r(minps, var, reg) +#define minps_r2r(regs, regd) sse_r2r(minps, regs, regd) + +#define minss_m2r(var, reg) sse_m2r(minss, var, reg) +#define minss_r2r(regs, regd) sse_r2r(minss, regs, regd) + +#define cmpps_m2r(var, reg, op) sse_m2ri(cmpps, var, reg, op) +#define cmpps_r2r(regs, regd, op) sse_r2ri(cmpps, regs, regd, op) + +#define cmpeqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 0) +#define cmpeqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 0) + +#define cmpltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 1) +#define cmpltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 1) + +#define cmpleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 2) +#define cmpleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 2) + +#define cmpunordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 3) +#define cmpunordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 3) + +#define cmpneqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 4) +#define cmpneqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 4) + +#define cmpnltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 5) +#define cmpnltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 5) + +#define cmpnleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 6) +#define cmpnleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 6) + +#define cmpordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 7) +#define cmpordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 7) + +#define cmpss_m2r(var, reg, op) sse_m2ri(cmpss, var, reg, op) +#define cmpss_r2r(regs, regd, op) sse_r2ri(cmpss, regs, regd, op) + +#define cmpeqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 0) +#define cmpeqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 0) + +#define cmpltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 1) +#define cmpltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 1) + +#define cmpless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 2) +#define cmpless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 2) + +#define cmpunordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 3) +#define cmpunordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 3) + +#define cmpneqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 4) +#define cmpneqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 4) + +#define cmpnltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 5) +#define cmpnltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 5) + +#define cmpnless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 6) +#define cmpnless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 6) + +#define cmpordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 7) +#define cmpordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 7) + +#define comiss_m2r(var, reg) sse_m2r(comiss, var, reg) +#define comiss_r2r(regs, regd) sse_r2r(comiss, regs, regd) + +#define ucomiss_m2r(var, reg) sse_m2r(ucomiss, var, reg) +#define ucomiss_r2r(regs, regd) sse_r2r(ucomiss, regs, regd) + +#define unpcklps_m2r(var, reg) sse_m2r(unpcklps, var, reg) +#define unpcklps_r2r(regs, regd) sse_r2r(unpcklps, regs, regd) + +#define unpckhps_m2r(var, reg) sse_m2r(unpckhps, var, reg) +#define unpckhps_r2r(regs, regd) sse_r2r(unpckhps, regs, regd) + +#define fxrstor(mem) \ + __asm__ __volatile__ ("fxrstor %0" \ + : /* nothing */ \ + : "X" (mem)) + +#define fxsave(mem) \ + __asm__ __volatile__ ("fxsave %0" \ + : /* nothing */ \ + : "X" (mem)) + +#define stmxcsr(mem) \ + __asm__ __volatile__ ("stmxcsr %0" \ + : /* nothing */ \ + : "X" (mem)) + +#define ldmxcsr(mem) \ + __asm__ __volatile__ ("ldmxcsr %0" \ + : /* nothing */ \ + : "X" (mem)) + diff --git a/src/video_dec/libmpeg2new/include/tendra.h b/src/video_dec/libmpeg2new/include/tendra.h new file mode 100644 index 000000000..09900916a --- /dev/null +++ b/src/video_dec/libmpeg2new/include/tendra.h @@ -0,0 +1,35 @@ +/* + * tendra.h + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#pragma TenDRA begin +#pragma TenDRA longlong type warning + +#ifdef TenDRA_check + +#pragma TenDRA conversion analysis (pointer-int explicit) off +#pragma TenDRA implicit function declaration off + +/* avoid the "No declarations in translation unit" problem */ +int TenDRA; + +#endif /* TenDRA_check */ diff --git a/src/video_dec/libmpeg2new/include/video_out.h b/src/video_dec/libmpeg2new/include/video_out.h new file mode 100644 index 000000000..342c55197 --- /dev/null +++ b/src/video_dec/libmpeg2new/include/video_out.h @@ -0,0 +1,58 @@ +/* + * video_out.h + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +struct mpeg2_sequence_s; +struct mpeg2_convert_init_s; +typedef struct { + int (* convert) (int stage, void * id, + const struct mpeg2_sequence_s * sequence, + int stride, uint32_t accel, void * arg, + struct mpeg2_convert_init_s * result); +} vo_setup_result_t; + +typedef struct vo_instance_s vo_instance_t; +struct vo_instance_s { + int (* setup) (vo_instance_t * instance, unsigned int width, + unsigned int height, unsigned int chroma_width, + unsigned int chroma_height, vo_setup_result_t * result); + void (* setup_fbuf) (vo_instance_t * instance, uint8_t ** buf, void ** id); + void (* set_fbuf) (vo_instance_t * instance, uint8_t ** buf, void ** id); + void (* start_fbuf) (vo_instance_t * instance, + uint8_t * const * buf, void * id); + void (* draw) (vo_instance_t * instance, uint8_t * const * buf, void * id); + void (* discard) (vo_instance_t * instance, + uint8_t * const * buf, void * id); + void (* close) (vo_instance_t * instance); +}; + +typedef vo_instance_t * vo_open_t (void); + +typedef struct { + char * name; + vo_open_t * open; +} vo_driver_t; + +void vo_accel (uint32_t accel); + +/* return NULL terminated array of all drivers */ +vo_driver_t const * vo_drivers (void); diff --git a/src/video_dec/libmpeg2new/include/vis.h b/src/video_dec/libmpeg2new/include/vis.h new file mode 100644 index 000000000..69dd49075 --- /dev/null +++ b/src/video_dec/libmpeg2new/include/vis.h @@ -0,0 +1,328 @@ +/* + * vis.h + * Copyright (C) 2003 David S. Miller + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* You may be asking why I hard-code the instruction opcodes and don't + * use the normal VIS assembler mnenomics for the VIS instructions. + * + * The reason is that Sun, in their infinite wisdom, decided that a binary + * using a VIS instruction will cause it to be marked (in the ELF headers) + * as doing so, and this prevents the OS from loading such binaries if the + * current cpu doesn't have VIS. There is no way to easily override this + * behavior of the assembler that I am aware of. + * + * This totally defeats what libmpeg2 is trying to do which is allow a + * single binary to be created, and then detect the availability of VIS + * at runtime. + * + * I'm not saying that tainting the binary by default is bad, rather I'm + * saying that not providing a way to override this easily unnecessarily + * ties people's hands. + * + * Thus, we do the opcode encoding by hand and output 32-bit words in + * the assembler to keep the binary from becoming tainted. + */ + +#define vis_opc_base ((0x1 << 31) | (0x36 << 19)) +#define vis_opf(X) ((X) << 5) +#define vis_sreg(X) (X) +#define vis_dreg(X) (((X)&0x1f)|((X)>>5)) +#define vis_rs1_s(X) (vis_sreg(X) << 14) +#define vis_rs1_d(X) (vis_dreg(X) << 14) +#define vis_rs2_s(X) (vis_sreg(X) << 0) +#define vis_rs2_d(X) (vis_dreg(X) << 0) +#define vis_rd_s(X) (vis_sreg(X) << 25) +#define vis_rd_d(X) (vis_dreg(X) << 25) + +#define vis_ss2s(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rs2_s(rs2) | \ + vis_rd_s(rd))) + +#define vis_dd2d(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_d(rs1) | \ + vis_rs2_d(rs2) | \ + vis_rd_d(rd))) + +#define vis_ss2d(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rs2_s(rs2) | \ + vis_rd_d(rd))) + +#define vis_sd2d(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rs2_d(rs2) | \ + vis_rd_d(rd))) + +#define vis_d2s(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_d(rs2) | \ + vis_rd_s(rd))) + +#define vis_s2d(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_s(rs2) | \ + vis_rd_d(rd))) + +#define vis_d12d(opf,rs1,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_d(rs1) | \ + vis_rd_d(rd))) + +#define vis_d22d(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_d(rs2) | \ + vis_rd_d(rd))) + +#define vis_s12s(opf,rs1,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rd_s(rd))) + +#define vis_s22s(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_s(rs2) | \ + vis_rd_s(rd))) + +#define vis_s(opf,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rd_s(rd))) + +#define vis_d(opf,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rd_d(rd))) + +#define vis_r2m(op,rd,mem) \ + __asm__ __volatile__ (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) ) + +#define vis_r2m_2(op,rd,mem1,mem2) \ + __asm__ __volatile__ (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) ) + +#define vis_m2r(op,mem,rd) \ + __asm__ __volatile__ (#op "\t[%0], %%f" #rd : : "r" (&(mem)) ) + +#define vis_m2r_2(op,mem1,mem2,rd) \ + __asm__ __volatile__ (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) ) + +static inline void vis_set_gsr(unsigned int _val) +{ + register unsigned int val asm("g1"); + + val = _val; + __asm__ __volatile__(".word 0xa7804000" + : : "r" (val)); +} + +#define VIS_GSR_ALIGNADDR_MASK 0x0000007 +#define VIS_GSR_ALIGNADDR_SHIFT 0 +#define VIS_GSR_SCALEFACT_MASK 0x0000078 +#define VIS_GSR_SCALEFACT_SHIFT 3 + +#define vis_ld32(mem,rs1) vis_m2r(ld, mem, rs1) +#define vis_ld32_2(mem1,mem2,rs1) vis_m2r_2(ld, mem1, mem2, rs1) +#define vis_st32(rs1,mem) vis_r2m(st, rs1, mem) +#define vis_st32_2(rs1,mem1,mem2) vis_r2m_2(st, rs1, mem1, mem2) +#define vis_ld64(mem,rs1) vis_m2r(ldd, mem, rs1) +#define vis_ld64_2(mem1,mem2,rs1) vis_m2r_2(ldd, mem1, mem2, rs1) +#define vis_st64(rs1,mem) vis_r2m(std, rs1, mem) +#define vis_st64_2(rs1,mem1,mem2) vis_r2m_2(std, rs1, mem1, mem2) + +#define vis_ldblk(mem, rd) \ +do { register void *__mem asm("g1"); \ + __mem = &(mem); \ + __asm__ __volatile__(".word 0xc1985e00 | %1" \ + : \ + : "r" (__mem), \ + "i" (vis_rd_d(rd)) \ + : "memory"); \ +} while (0) + +#define vis_stblk(rd, mem) \ +do { register void *__mem asm("g1"); \ + __mem = &(mem); \ + __asm__ __volatile__(".word 0xc1b85e00 | %1" \ + : \ + : "r" (__mem), \ + "i" (vis_rd_d(rd)) \ + : "memory"); \ +} while (0) + +#define vis_membar_storestore() \ + __asm__ __volatile__(".word 0x8143e008" : : : "memory") + +#define vis_membar_sync() \ + __asm__ __volatile__(".word 0x8143e040" : : : "memory") + +/* 16 and 32 bit partitioned addition and subtraction. The normal + * versions perform 4 16-bit or 2 32-bit additions or subtractions. + * The 's' versions perform 2 16-bit or 2 32-bit additions or + * subtractions. + */ + +#define vis_padd16(rs1,rs2,rd) vis_dd2d(0x50, rs1, rs2, rd) +#define vis_padd16s(rs1,rs2,rd) vis_ss2s(0x51, rs1, rs2, rd) +#define vis_padd32(rs1,rs2,rd) vis_dd2d(0x52, rs1, rs2, rd) +#define vis_padd32s(rs1,rs2,rd) vis_ss2s(0x53, rs1, rs2, rd) +#define vis_psub16(rs1,rs2,rd) vis_dd2d(0x54, rs1, rs2, rd) +#define vis_psub16s(rs1,rs2,rd) vis_ss2s(0x55, rs1, rs2, rd) +#define vis_psub32(rs1,rs2,rd) vis_dd2d(0x56, rs1, rs2, rd) +#define vis_psub32s(rs1,rs2,rd) vis_ss2s(0x57, rs1, rs2, rd) + +/* Pixel formatting instructions. */ + +#define vis_pack16(rs2,rd) vis_d2s( 0x3b, rs2, rd) +#define vis_pack32(rs1,rs2,rd) vis_dd2d(0x3a, rs1, rs2, rd) +#define vis_packfix(rs2,rd) vis_d2s( 0x3d, rs2, rd) +#define vis_expand(rs2,rd) vis_s2d( 0x4d, rs2, rd) +#define vis_pmerge(rs1,rs2,rd) vis_ss2d(0x4b, rs1, rs2, rd) + +/* Partitioned multiply instructions. */ + +#define vis_mul8x16(rs1,rs2,rd) vis_sd2d(0x31, rs1, rs2, rd) +#define vis_mul8x16au(rs1,rs2,rd) vis_ss2d(0x33, rs1, rs2, rd) +#define vis_mul8x16al(rs1,rs2,rd) vis_ss2d(0x35, rs1, rs2, rd) +#define vis_mul8sux16(rs1,rs2,rd) vis_dd2d(0x36, rs1, rs2, rd) +#define vis_mul8ulx16(rs1,rs2,rd) vis_dd2d(0x37, rs1, rs2, rd) +#define vis_muld8sux16(rs1,rs2,rd) vis_ss2d(0x38, rs1, rs2, rd) +#define vis_muld8ulx16(rs1,rs2,rd) vis_ss2d(0x39, rs1, rs2, rd) + +/* Alignment instructions. */ + +static inline void *vis_alignaddr(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x18) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(1))); + + return ptr; +} + +static inline void vis_alignaddr_g0(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x18) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(0))); +} + +static inline void *vis_alignaddrl(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x19) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(1))); + + return ptr; +} + +static inline void vis_alignaddrl_g0(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x19) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(0))); +} + +#define vis_faligndata(rs1,rs2,rd) vis_dd2d(0x48, rs1, rs2, rd) + +/* Logical operate instructions. */ + +#define vis_fzero(rd) vis_d( 0x60, rd) +#define vis_fzeros(rd) vis_s( 0x61, rd) +#define vis_fone(rd) vis_d( 0x7e, rd) +#define vis_fones(rd) vis_s( 0x7f, rd) +#define vis_src1(rs1,rd) vis_d12d(0x74, rs1, rd) +#define vis_src1s(rs1,rd) vis_s12s(0x75, rs1, rd) +#define vis_src2(rs2,rd) vis_d22d(0x78, rs2, rd) +#define vis_src2s(rs2,rd) vis_s22s(0x79, rs2, rd) +#define vis_not1(rs1,rd) vis_d12d(0x6a, rs1, rd) +#define vis_not1s(rs1,rd) vis_s12s(0x6b, rs1, rd) +#define vis_not2(rs2,rd) vis_d22d(0x66, rs2, rd) +#define vis_not2s(rs2,rd) vis_s22s(0x67, rs2, rd) +#define vis_or(rs1,rs2,rd) vis_dd2d(0x7c, rs1, rs2, rd) +#define vis_ors(rs1,rs2,rd) vis_ss2s(0x7d, rs1, rs2, rd) +#define vis_nor(rs1,rs2,rd) vis_dd2d(0x62, rs1, rs2, rd) +#define vis_nors(rs1,rs2,rd) vis_ss2s(0x63, rs1, rs2, rd) +#define vis_and(rs1,rs2,rd) vis_dd2d(0x70, rs1, rs2, rd) +#define vis_ands(rs1,rs2,rd) vis_ss2s(0x71, rs1, rs2, rd) +#define vis_nand(rs1,rs2,rd) vis_dd2d(0x6e, rs1, rs2, rd) +#define vis_nands(rs1,rs2,rd) vis_ss2s(0x6f, rs1, rs2, rd) +#define vis_xor(rs1,rs2,rd) vis_dd2d(0x6c, rs1, rs2, rd) +#define vis_xors(rs1,rs2,rd) vis_ss2s(0x6d, rs1, rs2, rd) +#define vis_xnor(rs1,rs2,rd) vis_dd2d(0x72, rs1, rs2, rd) +#define vis_xnors(rs1,rs2,rd) vis_ss2s(0x73, rs1, rs2, rd) +#define vis_ornot1(rs1,rs2,rd) vis_dd2d(0x7a, rs1, rs2, rd) +#define vis_ornot1s(rs1,rs2,rd) vis_ss2s(0x7b, rs1, rs2, rd) +#define vis_ornot2(rs1,rs2,rd) vis_dd2d(0x76, rs1, rs2, rd) +#define vis_ornot2s(rs1,rs2,rd) vis_ss2s(0x77, rs1, rs2, rd) +#define vis_andnot1(rs1,rs2,rd) vis_dd2d(0x68, rs1, rs2, rd) +#define vis_andnot1s(rs1,rs2,rd) vis_ss2s(0x69, rs1, rs2, rd) +#define vis_andnot2(rs1,rs2,rd) vis_dd2d(0x64, rs1, rs2, rd) +#define vis_andnot2s(rs1,rs2,rd) vis_ss2s(0x65, rs1, rs2, rd) + +/* Pixel component distance. */ + +#define vis_pdist(rs1,rs2,rd) vis_dd2d(0x3e, rs1, rs2, rd) diff --git a/src/video_dec/libmpeg2new/libmpeg2/Makefile.am b/src/video_dec/libmpeg2new/libmpeg2/Makefile.am new file mode 100644 index 000000000..2caa3ddc2 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/Makefile.am @@ -0,0 +1,14 @@ +include $(top_srcdir)/misc/Makefile.common + +AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG) + +noinst_LTLIBRARIES = libmpeg2.la libmpeg2arch.la + +libmpeg2_la_SOURCES = alloc.c header.c decode.c slice.c motion_comp.c idct.c +libmpeg2_la_LIBADD = libmpeg2arch.la + +libmpeg2arch_la_SOURCES = motion_comp_mmx.c idct_mmx.c \ + motion_comp_altivec.c idct_altivec.c \ + motion_comp_alpha.c idct_alpha.c \ + motion_comp_vis.c \ + cpu_accel.c cpu_state.c diff --git a/src/video_dec/libmpeg2new/libmpeg2/alloc.c b/src/video_dec/libmpeg2new/libmpeg2/alloc.c new file mode 100644 index 000000000..f1a7afa1c --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/alloc.c @@ -0,0 +1,70 @@ +/* + * alloc.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include + +#include "../include/mpeg2.h" + +static void * (* malloc_hook) (unsigned size, mpeg2_alloc_t reason) = NULL; +static int (* free_hook) (void * buf) = NULL; + +void * mpeg2_malloc (unsigned size, mpeg2_alloc_t reason) +{ + char * buf; + + if (malloc_hook) { + buf = (char *) malloc_hook (size, reason); + if (buf) + return buf; + } + + if (size) { + buf = (char *) malloc (size + 63 + sizeof (void **)); + if (buf) { + char * align_buf; + + align_buf = buf + 63 + sizeof (void **); + align_buf -= (long)align_buf & 63; + *(((void **)align_buf) - 1) = buf; + return align_buf; + } + } + return NULL; +} + +void mpeg2_free (void * buf) +{ + if (free_hook && free_hook (buf)) + return; + + if (buf) + free (*(((void **)buf) - 1)); +} + +void mpeg2_malloc_hooks (void * malloc (unsigned, mpeg2_alloc_t), + int free (void *)) +{ + malloc_hook = malloc; + free_hook = free; +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/configure.incl b/src/video_dec/libmpeg2new/libmpeg2/configure.incl new file mode 100644 index 000000000..f8dbd5aef --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/configure.incl @@ -0,0 +1,11 @@ +AC_SUBST([LIBMPEG2_CFLAGS]) + +dnl avoid -fPIC when possible +AC_LIBTOOL_NON_PIC([LIBMPEG2_CFLAGS="$LIBMPEG2_CFLAGS -prefer-non-pic"]) + +dnl check for cpudetect +AC_ARG_ENABLE([accel-detect], + [ --disable-accel-detect make a version without accel detection code]) +if test x"$enable_accel_detect" != x"no"; then + AC_DEFINE([ACCEL_DETECT],,[autodetect accelerations]) +fi diff --git a/src/video_dec/libmpeg2new/libmpeg2/convert_internal.h b/src/video_dec/libmpeg2new/libmpeg2/convert_internal.h new file mode 100644 index 000000000..d1e63d5e3 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/convert_internal.h @@ -0,0 +1,42 @@ +/* + * convert_internal.h + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +typedef struct { + uint8_t * rgb_ptr; + int width; + int field; + int y_stride, rgb_stride, y_increm, uv_increm, rgb_increm, rgb_slice; + int chroma420, convert420; + int dither_offset, dither_stride; + int y_stride_frame, uv_stride_frame, rgb_stride_frame, rgb_stride_min; +} convert_rgb_t; + +typedef void mpeg2convert_copy_t (void * id, uint8_t * const * src, + unsigned int v_offset); + +mpeg2convert_copy_t * mpeg2convert_rgb_mmxext (int bpp, int mode, + const mpeg2_sequence_t * seq); +mpeg2convert_copy_t * mpeg2convert_rgb_mmx (int bpp, int mode, + const mpeg2_sequence_t * seq); +mpeg2convert_copy_t * mpeg2convert_rgb_vis (int bpp, int mode, + const mpeg2_sequence_t * seq); diff --git a/src/video_dec/libmpeg2new/libmpeg2/cpu_accel.c b/src/video_dec/libmpeg2new/libmpeg2/cpu_accel.c new file mode 100644 index 000000000..7846f1e88 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/cpu_accel.c @@ -0,0 +1,258 @@ +/* + * cpu_accel.c + * Copyright (C) 2000-2004 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" + +#ifdef ARCH_X86 +static inline uint32_t arch_accel (uint32_t accel) +{ + if (accel & (MPEG2_ACCEL_X86_3DNOW | MPEG2_ACCEL_X86_MMXEXT)) + accel |= MPEG2_ACCEL_X86_MMX; + + if (accel & (MPEG2_ACCEL_X86_SSE2 | MPEG2_ACCEL_X86_SSE3)) + accel |= MPEG2_ACCEL_X86_MMXEXT; + + if (accel & (MPEG2_ACCEL_X86_SSE3)) + accel |= MPEG2_ACCEL_X86_SSE2; + +#ifdef ACCEL_DETECT + if (accel & MPEG2_ACCEL_DETECT) { + uint32_t eax, ebx, ecx, edx; + int AMD; + +#if !defined(PIC) && !defined(__PIC__) +#define cpuid(op,eax,ebx,ecx,edx) \ + __asm__ ("cpuid" \ + : "=a" (eax), \ + "=b" (ebx), \ + "=c" (ecx), \ + "=d" (edx) \ + : "a" (op) \ + : "cc") +#else /* PIC version : save ebx */ +#define cpuid(op,eax,ebx,ecx,edx) \ + __asm__ ("push %%ebx\n\t" \ + "cpuid\n\t" \ + "movl %%ebx,%1\n\t" \ + "pop %%ebx" \ + : "=a" (eax), \ + "=r" (ebx), \ + "=c" (ecx), \ + "=d" (edx) \ + : "a" (op) \ + : "cc") +#endif + + __asm__ ("pushf\n\t" + "pushf\n\t" + "pop %0\n\t" + "movl %0,%1\n\t" + "xorl $0x200000,%0\n\t" + "push %0\n\t" + "popf\n\t" + "pushf\n\t" + "pop %0\n\t" + "popf" + : "=r" (eax), + "=r" (ebx) + : + : "cc"); + + if (eax == ebx) /* no cpuid */ + return accel; + + cpuid (0x00000000, eax, ebx, ecx, edx); + if (!eax) /* vendor string only */ + return accel; + + AMD = (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65); + + cpuid (0x00000001, eax, ebx, ecx, edx); + if (! (edx & 0x00800000)) /* no MMX */ + return accel; + + accel |= MPEG2_ACCEL_X86_MMX; + if (edx & 0x02000000) /* SSE - identical to AMD MMX extensions */ + accel |= MPEG2_ACCEL_X86_MMXEXT; + + if (edx & 0x04000000) /* SSE2 */ + accel |= MPEG2_ACCEL_X86_SSE2; + + if (ecx & 0x00000001) /* SSE3 */ + accel |= MPEG2_ACCEL_X86_SSE3; + + cpuid (0x80000000, eax, ebx, ecx, edx); + if (eax < 0x80000001) /* no extended capabilities */ + return accel; + + cpuid (0x80000001, eax, ebx, ecx, edx); + + if (edx & 0x80000000) + accel |= MPEG2_ACCEL_X86_3DNOW; + + if (AMD && (edx & 0x00400000)) /* AMD MMX extensions */ + accel |= MPEG2_ACCEL_X86_MMXEXT; + } +#endif /* ACCEL_DETECT */ + + return accel; +} +#endif /* ARCH_X86 */ + +#if defined(ACCEL_DETECT) && (defined(ARCH_PPC) || defined(ARCH_SPARC)) +#include +#include + +static sigjmp_buf jmpbuf; +static volatile sig_atomic_t canjump = 0; + +static RETSIGTYPE sigill_handler (int sig) +{ + if (!canjump) { + signal (sig, SIG_DFL); + raise (sig); + } + + canjump = 0; + siglongjmp (jmpbuf, 1); +} +#endif /* ACCEL_DETECT && (ARCH_PPC || ARCH_SPARC) */ + +#ifdef ARCH_PPC +static inline uint32_t arch_accel (uint32_t accel) +{ +#ifdef ACCEL_DETECT + if (accel & (MPEG2_ACCEL_PPC_ALTIVEC | MPEG2_ACCEL_DETECT) == + MPEG2_ACCEL_DETECT) { + static RETSIGTYPE (* oldsig) (int); + + oldsig = signal (SIGILL, sigill_handler); + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, oldsig); + return accel; + } + + canjump = 1; + +#ifdef HAVE_ALTIVEC_H /* gnu */ +#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t" +#else /* apple */ +#define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t" +#endif + asm volatile ("mtspr 256, %0\n\t" + VAND (0, 0, 0) + : + : "r" (-1)); + + canjump = 0; + accel |= MPEG2_ACCEL_PPC_ALTIVEC; + + signal (SIGILL, oldsig); + } +#endif /* ACCEL_DETECT */ + + return accel; +} +#endif /* ARCH_PPC */ + +#ifdef ARCH_SPARC +static inline uint32_t arch_accel (uint32_t accel) +{ + if (accel & MPEG2_ACCEL_SPARC_VIS2) + accel |= MPEG2_ACCEL_SPARC_VIS; + +#ifdef ACCEL_DETECT + if (accel & (MPEG2_ACCEL_SPARC_VIS2 | MPEG2_ACCEL_DETECT) == + MPEG2_ACCEL_DETECT) { + static RETSIGTYPE (* oldsig) (int); + + oldsig = signal (SIGILL, sigill_handler); + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, oldsig); + return accel; + } + + canjump = 1; + + /* pdist %f0, %f0, %f0 */ + __asm__ __volatile__(".word\t0x81b007c0"); + + canjump = 0; + accel |= MPEG2_ACCEL_SPARC_VIS; + + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, oldsig); + return accel; + } + + canjump = 1; + + /* edge8n %g0, %g0, %g0 */ + __asm__ __volatile__(".word\t0x81b00020"); + + canjump = 0; + accel |= MPEG2_ACCEL_SPARC_VIS2; + + signal (SIGILL, oldsig); + } +#endif /* ACCEL_DETECT */ + + return accel; +} +#endif /* ARCH_SPARC */ + +#ifdef ARCH_ALPHA +static inline uint32_t arch_accel (uint32_t accel) +{ + if (accel & MPEG2_ACCEL_ALPHA_MVI) + accel |= MPEG2_ACCEL_ALPHA; + +#ifdef ACCEL_DETECT + if (accel & MPEG2_ACCEL_DETECT) { + uint64_t no_mvi; + + asm volatile ("amask %1, %0" + : "=r" (no_mvi) + : "rI" (256)); /* AMASK_MVI */ + accel |= no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA | + MPEG2_ACCEL_ALPHA_MVI); + } +#endif /* ACCEL_DETECT */ + + return accel; +} +#endif /* ARCH_ALPHA */ + +uint32_t mpeg2_detect_accel (uint32_t accel) +{ +#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) + accel = arch_accel (accel); +#endif + return accel; +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/cpu_state.c b/src/video_dec/libmpeg2new/libmpeg2/cpu_state.c new file mode 100644 index 000000000..edbf2dd28 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/cpu_state.c @@ -0,0 +1,129 @@ +/* + * cpu_state.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include +#include + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" +#ifdef ARCH_X86 +#include "../include/mmx.h" +#endif + +void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL; +void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL; + +#ifdef ARCH_X86 +static void state_restore_mmx (cpu_state_t * state) +{ + emms (); +} +#endif + +#ifdef ARCH_PPC +#ifdef HAVE_ALTIVEC_H /* gnu */ +#define LI(a,b) "li " #a "," #b "\n\t" +#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t" +#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t" +#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t" +#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t" +#else /* apple */ +#define LI(a,b) "li r" #a "," #b "\n\t" +#define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t" +#define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t" +#define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t" +#define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t" +#endif + +static void state_save_altivec (cpu_state_t * state) +{ + asm (LI (9, 16) + STVX0 (20, 0, 3) + LI (11, 32) + STVX (21, 9, 3) + LI (9, 48) + STVX (22, 11, 3) + LI (11, 64) + STVX (23, 9, 3) + LI (9, 80) + STVX (24, 11, 3) + LI (11, 96) + STVX (25, 9, 3) + LI (9, 112) + STVX (26, 11, 3) + LI (11, 128) + STVX (27, 9, 3) + LI (9, 144) + STVX (28, 11, 3) + LI (11, 160) + STVX (29, 9, 3) + LI (9, 176) + STVX (30, 11, 3) + STVX (31, 9, 3)); +} + +static void state_restore_altivec (cpu_state_t * state) +{ + asm (LI (9, 16) + LVX0 (20, 0, 3) + LI (11, 32) + LVX (21, 9, 3) + LI (9, 48) + LVX (22, 11, 3) + LI (11, 64) + LVX (23, 9, 3) + LI (9, 80) + LVX (24, 11, 3) + LI (11, 96) + LVX (25, 9, 3) + LI (9, 112) + LVX (26, 11, 3) + LI (11, 128) + LVX (27, 9, 3) + LI (9, 144) + LVX (28, 11, 3) + LI (11, 160) + LVX (29, 9, 3) + LI (9, 176) + LVX (30, 11, 3) + LVX (31, 9, 3)); +} +#endif + +void mpeg2_cpu_state_init (uint32_t accel) +{ +#ifdef ARCH_X86 + if (accel & MPEG2_ACCEL_X86_MMX) { + mpeg2_cpu_state_restore = state_restore_mmx; + } +#endif +#ifdef ARCH_PPC + if (accel & MPEG2_ACCEL_PPC_ALTIVEC) { + mpeg2_cpu_state_save = state_save_altivec; + mpeg2_cpu_state_restore = state_restore_altivec; + } +#endif +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/decode.c b/src/video_dec/libmpeg2new/libmpeg2/decode.c new file mode 100644 index 000000000..337ba4466 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/decode.c @@ -0,0 +1,439 @@ +/* + * decode.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include /* memcmp/memset, try to remove */ +#include +#include + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" + +static int mpeg2_accels = 0; + +#define BUFFER_SIZE (1194 * 1024) + +const mpeg2_info_t * mpeg2_info (mpeg2dec_t * mpeg2dec) +{ + return &(mpeg2dec->info); +} + +static inline int skip_chunk (mpeg2dec_t * mpeg2dec, int bytes) +{ + uint8_t * current; + uint32_t shift; + uint8_t * limit; + uint8_t byte; + + if (!bytes) + return 0; + + current = mpeg2dec->buf_start; + shift = mpeg2dec->shift; + limit = current + bytes; + + do { + byte = *current++; + if (shift == 0x00000100) { + int skipped; + + mpeg2dec->shift = 0xffffff00; + skipped = current - mpeg2dec->buf_start; + mpeg2dec->buf_start = current; + return skipped; + } + shift = (shift | byte) << 8; + } while (current < limit); + + mpeg2dec->shift = shift; + mpeg2dec->buf_start = current; + return 0; +} + +static inline int copy_chunk (mpeg2dec_t * mpeg2dec, int bytes) +{ + uint8_t * current; + uint32_t shift; + uint8_t * chunk_ptr; + uint8_t * limit; + uint8_t byte; + + if (!bytes) + return 0; + + current = mpeg2dec->buf_start; + shift = mpeg2dec->shift; + chunk_ptr = mpeg2dec->chunk_ptr; + limit = current + bytes; + + do { + byte = *current++; + if (shift == 0x00000100) { + int copied; + + mpeg2dec->shift = 0xffffff00; + mpeg2dec->chunk_ptr = chunk_ptr + 1; + copied = current - mpeg2dec->buf_start; + mpeg2dec->buf_start = current; + return copied; + } + shift = (shift | byte) << 8; + *chunk_ptr++ = byte; + } while (current < limit); + + mpeg2dec->shift = shift; + mpeg2dec->buf_start = current; + return 0; +} + +void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end) +{ + mpeg2dec->buf_start = start; + mpeg2dec->buf_end = end; +} + +int mpeg2_getpos (mpeg2dec_t * mpeg2dec) +{ + return mpeg2dec->buf_end - mpeg2dec->buf_start; +} + +static inline mpeg2_state_t seek_chunk (mpeg2dec_t * mpeg2dec) +{ + int size, skipped; + + size = mpeg2dec->buf_end - mpeg2dec->buf_start; + skipped = skip_chunk (mpeg2dec, size); + if (!skipped) { + mpeg2dec->bytes_since_tag += size; + return STATE_BUFFER; + } + mpeg2dec->bytes_since_tag += skipped; + mpeg2dec->code = mpeg2dec->buf_start[-1]; + return STATE_INTERNAL_NORETURN; +} + +mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec) +{ + while (!(mpeg2dec->code == 0xb3 || + ((mpeg2dec->code == 0xb7 || mpeg2dec->code == 0xb8 || + !mpeg2dec->code) && mpeg2dec->sequence.width != (unsigned)-1))) + if (seek_chunk (mpeg2dec) == STATE_BUFFER) + return STATE_BUFFER; + mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + mpeg2dec->user_data_len = 0; + return ((mpeg2dec->code == 0xb7) ? + mpeg2_header_end (mpeg2dec) : mpeg2_parse_header (mpeg2dec)); +} + +#define RECEIVED(code,state) (((state) << 8) + (code)) + +mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec) +{ + int size_buffer, size_chunk, copied; + + if (mpeg2dec->action) { + mpeg2_state_t state; + + state = mpeg2dec->action (mpeg2dec); + if ((int)state > (int)STATE_INTERNAL_NORETURN) + return state; + } + + while (1) { + while ((unsigned) (mpeg2dec->code - mpeg2dec->first_decode_slice) < + mpeg2dec->nb_decode_slices) { + size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start; + size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE - + mpeg2dec->chunk_ptr); + if (size_buffer <= size_chunk) { + copied = copy_chunk (mpeg2dec, size_buffer); + if (!copied) { + mpeg2dec->bytes_since_tag += size_buffer; + mpeg2dec->chunk_ptr += size_buffer; + return STATE_BUFFER; + } + } else { + copied = copy_chunk (mpeg2dec, size_chunk); + if (!copied) { + /* filled the chunk buffer without finding a start code */ + mpeg2dec->bytes_since_tag += size_chunk; + mpeg2dec->action = seek_chunk; + return STATE_INVALID; + } + } + mpeg2dec->bytes_since_tag += copied; + + mpeg2_slice (&(mpeg2dec->decoder), mpeg2dec->code, + mpeg2dec->chunk_start); + mpeg2dec->code = mpeg2dec->buf_start[-1]; + mpeg2dec->chunk_ptr = mpeg2dec->chunk_start; + } + if ((unsigned) (mpeg2dec->code - 1) >= 0xb0 - 1) + break; + if (seek_chunk (mpeg2dec) == STATE_BUFFER) + return STATE_BUFFER; + } + + mpeg2dec->action = mpeg2_seek_header; + switch (mpeg2dec->code) { + case 0x00: + return mpeg2dec->state; + case 0xb3: + case 0xb7: + case 0xb8: + return (mpeg2dec->state == STATE_SLICE) ? STATE_SLICE : STATE_INVALID; + default: + mpeg2dec->action = seek_chunk; + return STATE_INVALID; + } +} + +mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec) +{ + static int (* process_header[]) (mpeg2dec_t * mpeg2dec) = { + mpeg2_header_picture, mpeg2_header_extension, mpeg2_header_user_data, + mpeg2_header_sequence, NULL, NULL, NULL, NULL, mpeg2_header_gop + }; + int size_buffer, size_chunk, copied; + + mpeg2dec->action = mpeg2_parse_header; + mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; + while (1) { + size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start; + size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE - + mpeg2dec->chunk_ptr); + if (size_buffer <= size_chunk) { + copied = copy_chunk (mpeg2dec, size_buffer); + if (!copied) { + mpeg2dec->bytes_since_tag += size_buffer; + mpeg2dec->chunk_ptr += size_buffer; + return STATE_BUFFER; + } + } else { + copied = copy_chunk (mpeg2dec, size_chunk); + if (!copied) { + /* filled the chunk buffer without finding a start code */ + mpeg2dec->bytes_since_tag += size_chunk; + mpeg2dec->code = 0xb4; + mpeg2dec->action = mpeg2_seek_header; + return STATE_INVALID; + } + } + mpeg2dec->bytes_since_tag += copied; + + if (process_header[mpeg2dec->code & 0x0b] (mpeg2dec)) { + mpeg2dec->code = mpeg2dec->buf_start[-1]; + mpeg2dec->action = mpeg2_seek_header; + return STATE_INVALID; + } + + mpeg2dec->code = mpeg2dec->buf_start[-1]; + switch (RECEIVED (mpeg2dec->code, mpeg2dec->state)) { + + /* state transition after a sequence header */ + case RECEIVED (0x00, STATE_SEQUENCE): + case RECEIVED (0xb8, STATE_SEQUENCE): + mpeg2_header_sequence_finalize (mpeg2dec); + break; + + /* other legal state transitions */ + case RECEIVED (0x00, STATE_GOP): + mpeg2_header_gop_finalize (mpeg2dec); + break; + case RECEIVED (0x01, STATE_PICTURE): + case RECEIVED (0x01, STATE_PICTURE_2ND): + mpeg2_header_picture_finalize (mpeg2dec, mpeg2_accels); + mpeg2dec->action = mpeg2_header_slice_start; + break; + + /* legal headers within a given state */ + case RECEIVED (0xb2, STATE_SEQUENCE): + case RECEIVED (0xb2, STATE_GOP): + case RECEIVED (0xb2, STATE_PICTURE): + case RECEIVED (0xb2, STATE_PICTURE_2ND): + case RECEIVED (0xb5, STATE_SEQUENCE): + case RECEIVED (0xb5, STATE_PICTURE): + case RECEIVED (0xb5, STATE_PICTURE_2ND): + mpeg2dec->chunk_ptr = mpeg2dec->chunk_start; + continue; + + default: + mpeg2dec->action = mpeg2_seek_header; + return STATE_INVALID; + } + + mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + mpeg2dec->user_data_len = 0; + return mpeg2dec->state; + } +} + +int mpeg2_convert (mpeg2dec_t * mpeg2dec, mpeg2_convert_t convert, void * arg) +{ + mpeg2_convert_init_t convert_init; + int error; + + error = convert (MPEG2_CONVERT_SET, NULL, &(mpeg2dec->sequence), 0, + mpeg2_accels, arg, &convert_init); + if (!error) { + mpeg2dec->convert = convert; + mpeg2dec->convert_arg = arg; + mpeg2dec->convert_id_size = convert_init.id_size; + mpeg2dec->convert_stride = 0; + } + return error; +} + +int mpeg2_stride (mpeg2dec_t * mpeg2dec, int stride) +{ + if (!mpeg2dec->convert) { + if (stride < (int) mpeg2dec->sequence.width) + stride = mpeg2dec->sequence.width; + mpeg2dec->decoder.stride_frame = stride; + } else { + mpeg2_convert_init_t convert_init; + + stride = mpeg2dec->convert (MPEG2_CONVERT_STRIDE, NULL, + &(mpeg2dec->sequence), stride, + mpeg2_accels, mpeg2dec->convert_arg, + &convert_init); + mpeg2dec->convert_id_size = convert_init.id_size; + mpeg2dec->convert_stride = stride; + } + return stride; +} + +void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id) +{ + mpeg2_fbuf_t * fbuf; + + if (mpeg2dec->custom_fbuf) { + if (mpeg2dec->state == STATE_SEQUENCE) { + mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1]; + mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0]; + } + mpeg2_set_fbuf (mpeg2dec, (mpeg2dec->decoder.coding_type == + PIC_FLAG_CODING_TYPE_B)); + fbuf = mpeg2dec->fbuf[0]; + } else { + fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index].fbuf); + mpeg2dec->alloc_index_user = ++mpeg2dec->alloc_index; + } + fbuf->buf[0] = buf[0]; + fbuf->buf[1] = buf[1]; + fbuf->buf[2] = buf[2]; + fbuf->id = id; +} + +void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf) +{ + mpeg2dec->custom_fbuf = custom_fbuf; +} + +void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip) +{ + mpeg2dec->first_decode_slice = 1; + mpeg2dec->nb_decode_slices = skip ? 0 : (0xb0 - 1); +} + +void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end) +{ + start = (start < 1) ? 1 : (start > 0xb0) ? 0xb0 : start; + end = (end < start) ? start : (end > 0xb0) ? 0xb0 : end; + mpeg2dec->first_decode_slice = start; + mpeg2dec->nb_decode_slices = end - start; +} + +void mpeg2_tag_picture (mpeg2dec_t * mpeg2dec, uint32_t tag, uint32_t tag2) +{ + mpeg2dec->tag_previous = mpeg2dec->tag_current; + mpeg2dec->tag2_previous = mpeg2dec->tag2_current; + mpeg2dec->tag_current = tag; + mpeg2dec->tag2_current = tag2; + mpeg2dec->num_tags++; + mpeg2dec->bytes_since_tag = 0; +} + +uint32_t mpeg2_accel (uint32_t accel) +{ + if (!mpeg2_accels) { + mpeg2_accels = mpeg2_detect_accel (accel) | MPEG2_ACCEL_DETECT; + mpeg2_cpu_state_init (mpeg2_accels); + mpeg2_idct_init (mpeg2_accels); + mpeg2_mc_init (mpeg2_accels); + } + return mpeg2_accels & ~MPEG2_ACCEL_DETECT; +} + +void mpeg2_reset (mpeg2dec_t * mpeg2dec, int full_reset) +{ + mpeg2dec->buf_start = mpeg2dec->buf_end = NULL; + mpeg2dec->num_tags = 0; + mpeg2dec->shift = 0xffffff00; + mpeg2dec->code = 0xb4; + mpeg2dec->action = mpeg2_seek_header; + mpeg2dec->state = STATE_INVALID; + mpeg2dec->first = 1; + + mpeg2_reset_info(&(mpeg2dec->info)); + mpeg2dec->info.gop = NULL; + mpeg2dec->info.user_data = NULL; + mpeg2dec->info.user_data_len = 0; + if (full_reset) { + mpeg2dec->info.sequence = NULL; + mpeg2_header_state_init (mpeg2dec); + } + +} + +mpeg2dec_t * mpeg2_init (void) +{ + mpeg2dec_t * mpeg2dec; + + mpeg2_accel (MPEG2_ACCEL_DETECT); + + mpeg2dec = (mpeg2dec_t *) mpeg2_malloc (sizeof (mpeg2dec_t), + MPEG2_ALLOC_MPEG2DEC); + if (mpeg2dec == NULL) + return NULL; + + memset (mpeg2dec->decoder.DCTblock, 0, 64 * sizeof (int16_t)); + memset (mpeg2dec->quantizer_matrix, 0, 4 * 64 * sizeof (uint8_t)); + + mpeg2dec->chunk_buffer = (uint8_t *) mpeg2_malloc (BUFFER_SIZE + 4, + MPEG2_ALLOC_CHUNK); + + mpeg2dec->sequence.width = (unsigned)-1; + mpeg2_reset (mpeg2dec, 1); + + return mpeg2dec; +} + +void mpeg2_close (mpeg2dec_t * mpeg2dec) +{ + mpeg2_header_state_init (mpeg2dec); + mpeg2_free (mpeg2dec->chunk_buffer); + mpeg2_free (mpeg2dec); +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/header.c b/src/video_dec/libmpeg2new/libmpeg2/header.c new file mode 100644 index 000000000..935a50aa3 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/header.c @@ -0,0 +1,961 @@ +/* + * header.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 2003 Regis Duchesne + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include +#include /* defines NULL */ +#include /* memcmp */ + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" + +#define SEQ_EXT 2 +#define SEQ_DISPLAY_EXT 4 +#define QUANT_MATRIX_EXT 8 +#define COPYRIGHT_EXT 0x10 +#define PIC_DISPLAY_EXT 0x80 +#define PIC_CODING_EXT 0x100 + +/* default intra quant matrix, in zig-zag order */ +static const uint8_t default_intra_quantizer_matrix[64] ATTR_ALIGN(16) = { + 8, + 16, 16, + 19, 16, 19, + 22, 22, 22, 22, + 22, 22, 26, 24, 26, + 27, 27, 27, 26, 26, 26, + 26, 27, 27, 27, 29, 29, 29, + 34, 34, 34, 29, 29, 29, 27, 27, + 29, 29, 32, 32, 34, 34, 37, + 38, 37, 35, 35, 34, 35, + 38, 38, 40, 40, 40, + 48, 48, 46, 46, + 56, 56, 58, + 69, 69, + 83 +}; + +uint8_t mpeg2_scan_norm[64] ATTR_ALIGN(16) = { + /* Zig-Zag scan pattern */ + 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 +}; + +uint8_t mpeg2_scan_alt[64] ATTR_ALIGN(16) = { + /* Alternate scan pattern */ + 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49, + 41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43, + 51, 59, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 52, 60, 37, 45, + 53, 61, 22, 30, 7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63 +}; + +void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec) +{ + if (mpeg2dec->sequence.width != (unsigned)-1) { + int i; + + mpeg2dec->sequence.width = (unsigned)-1; + if (!mpeg2dec->custom_fbuf) + for (i = mpeg2dec->alloc_index_user; + i < mpeg2dec->alloc_index; i++) { + mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[0]); + mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[1]); + mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[2]); + } + if (mpeg2dec->convert_start) + for (i = 0; i < 3; i++) { + mpeg2_free (mpeg2dec->yuv_buf[i][0]); + mpeg2_free (mpeg2dec->yuv_buf[i][1]); + mpeg2_free (mpeg2dec->yuv_buf[i][2]); + } + if (mpeg2dec->decoder.convert_id) + mpeg2_free (mpeg2dec->decoder.convert_id); + } + mpeg2dec->decoder.coding_type = I_TYPE; + mpeg2dec->decoder.convert = NULL; + mpeg2dec->decoder.convert_id = NULL; + mpeg2dec->picture = mpeg2dec->pictures; + mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf; + mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf; + mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf; + mpeg2dec->first = 1; + mpeg2dec->alloc_index = 0; + mpeg2dec->alloc_index_user = 0; + mpeg2dec->first_decode_slice = 1; + mpeg2dec->nb_decode_slices = 0xb0 - 1; + mpeg2dec->convert = NULL; + mpeg2dec->convert_start = NULL; + mpeg2dec->custom_fbuf = 0; + mpeg2dec->yuv_index = 0; +} + +void mpeg2_reset_info (mpeg2_info_t * info) +{ + info->current_picture = info->current_picture_2nd = NULL; + info->display_picture = info->display_picture_2nd = NULL; + info->current_fbuf = info->display_fbuf = info->discard_fbuf = NULL; +} + +static void info_user_data (mpeg2dec_t * mpeg2dec) +{ + if (mpeg2dec->user_data_len) { + mpeg2dec->info.user_data = mpeg2dec->chunk_buffer; + mpeg2dec->info.user_data_len = mpeg2dec->user_data_len - 3; + } +} + +int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); + static unsigned int frame_period[16] = { + 0, 1126125, 1125000, 1080000, 900900, 900000, 540000, 450450, 450000, + /* unofficial: xing 15 fps */ + 1800000, + /* unofficial: libmpeg3 "Unofficial economy rates" 5/10/12/15 fps */ + 5400000, 2700000, 2250000, 1800000, 0, 0 + }; + int i; + + if ((buffer[6] & 0x20) != 0x20) /* missing marker_bit */ + return 1; + + i = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; + if (! (sequence->display_width = sequence->picture_width = i >> 12)) + return 1; + if (! (sequence->display_height = sequence->picture_height = i & 0xfff)) + return 1; + sequence->width = (sequence->picture_width + 15) & ~15; + sequence->height = (sequence->picture_height + 15) & ~15; + sequence->chroma_width = sequence->width >> 1; + sequence->chroma_height = sequence->height >> 1; + + sequence->flags = (SEQ_FLAG_PROGRESSIVE_SEQUENCE | + SEQ_VIDEO_FORMAT_UNSPECIFIED); + + sequence->pixel_width = buffer[3] >> 4; /* aspect ratio */ + sequence->frame_period = frame_period[buffer[3] & 15]; + + sequence->byte_rate = (buffer[4]<<10) | (buffer[5]<<2) | (buffer[6]>>6); + + sequence->vbv_buffer_size = ((buffer[6]<<16)|(buffer[7]<<8))&0x1ff800; + + if (buffer[7] & 4) + sequence->flags |= SEQ_FLAG_CONSTRAINED_PARAMETERS; + + mpeg2dec->copy_matrix = 3; + if (buffer[7] & 2) { + for (i = 0; i < 64; i++) + mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] = + (buffer[i+7] << 7) | (buffer[i+8] >> 1); + buffer += 64; + } else + for (i = 0; i < 64; i++) + mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] = + default_intra_quantizer_matrix[i]; + + if (buffer[7] & 1) + for (i = 0; i < 64; i++) + mpeg2dec->new_quantizer_matrix[1][mpeg2_scan_norm[i]] = + buffer[i+8]; + else + memset (mpeg2dec->new_quantizer_matrix[1], 16, 64); + + sequence->profile_level_id = 0x80; + sequence->colour_primaries = 0; + sequence->transfer_characteristics = 0; + sequence->matrix_coefficients = 0; + + mpeg2dec->ext_state = SEQ_EXT; + mpeg2dec->state = STATE_SEQUENCE; + mpeg2dec->display_offset_x = mpeg2dec->display_offset_y = 0; + + return 0; +} + +static int sequence_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); + uint32_t flags; + + if (!(buffer[3] & 1)) + return 1; + + sequence->profile_level_id = (buffer[0] << 4) | (buffer[1] >> 4); + + sequence->display_width = sequence->picture_width += + ((buffer[1] << 13) | (buffer[2] << 5)) & 0x3000; + sequence->display_height = sequence->picture_height += + (buffer[2] << 7) & 0x3000; + sequence->width = (sequence->picture_width + 15) & ~15; + sequence->height = (sequence->picture_height + 15) & ~15; + flags = sequence->flags | SEQ_FLAG_MPEG2; + if (!(buffer[1] & 8)) { + flags &= ~SEQ_FLAG_PROGRESSIVE_SEQUENCE; + sequence->height = (sequence->height + 31) & ~31; + } + if (buffer[5] & 0x80) + flags |= SEQ_FLAG_LOW_DELAY; + sequence->flags = flags; + sequence->chroma_width = sequence->width; + sequence->chroma_height = sequence->height; + switch (buffer[1] & 6) { + case 0: /* invalid */ + return 1; + case 2: /* 4:2:0 */ + sequence->chroma_height >>= 1; + case 4: /* 4:2:2 */ + sequence->chroma_width >>= 1; + } + + sequence->byte_rate += ((buffer[2]<<25) | (buffer[3]<<17)) & 0x3ffc0000; + + sequence->vbv_buffer_size |= buffer[4] << 21; + + sequence->frame_period = + sequence->frame_period * ((buffer[5]&31)+1) / (((buffer[5]>>2)&3)+1); + + mpeg2dec->ext_state = SEQ_DISPLAY_EXT; + + return 0; +} + +static int sequence_display_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); + + sequence->flags = ((sequence->flags & ~SEQ_MASK_VIDEO_FORMAT) | + ((buffer[0]<<4) & SEQ_MASK_VIDEO_FORMAT)); + if (buffer[0] & 1) { + sequence->flags |= SEQ_FLAG_COLOUR_DESCRIPTION; + sequence->colour_primaries = buffer[1]; + sequence->transfer_characteristics = buffer[2]; + sequence->matrix_coefficients = buffer[3]; + buffer += 3; + } + + if (!(buffer[2] & 2)) /* missing marker_bit */ + return 1; + + sequence->display_width = (buffer[1] << 6) | (buffer[2] >> 2); + sequence->display_height = + ((buffer[2]& 1 ) << 13) | (buffer[3] << 5) | (buffer[4] >> 3); + + return 0; +} + +static inline void simplify (unsigned int * u, unsigned int * v) +{ + unsigned int a, b, tmp; + + a = *u; b = *v; + while (a) { /* find greatest common divisor */ + tmp = a; a = b % tmp; b = tmp; + } + *u /= b; *v /= b; +} + +static inline void finalize_sequence (mpeg2_sequence_t * sequence) +{ + int width; + int height; + + sequence->byte_rate *= 50; + + if (sequence->flags & SEQ_FLAG_MPEG2) { + switch (sequence->pixel_width) { + case 1: /* square pixels */ + sequence->pixel_width = sequence->pixel_height = 1; return; + case 2: /* 4:3 aspect ratio */ + width = 4; height = 3; break; + case 3: /* 16:9 aspect ratio */ + width = 16; height = 9; break; + case 4: /* 2.21:1 aspect ratio */ + width = 221; height = 100; break; + default: /* illegal */ + sequence->pixel_width = sequence->pixel_height = 0; return; + } + width *= sequence->display_height; + height *= sequence->display_width; + + } else { + if (sequence->byte_rate == 50 * 0x3ffff) + sequence->byte_rate = 0; /* mpeg-1 VBR */ + + switch (sequence->pixel_width) { + case 0: case 15: /* illegal */ + sequence->pixel_width = sequence->pixel_height = 0; return; + case 1: /* square pixels */ + sequence->pixel_width = sequence->pixel_height = 1; return; + case 3: /* 720x576 16:9 */ + sequence->pixel_width = 64; sequence->pixel_height = 45; return; + case 6: /* 720x480 16:9 */ + sequence->pixel_width = 32; sequence->pixel_height = 27; return; + case 8: /* BT.601 625 lines 4:3 */ + sequence->pixel_width = 59; sequence->pixel_height = 54; return; + case 12: /* BT.601 525 lines 4:3 */ + sequence->pixel_width = 10; sequence->pixel_height = 11; return; + default: + height = 88 * sequence->pixel_width + 1171; + width = 2000; + } + } + + sequence->pixel_width = width; + sequence->pixel_height = height; + simplify (&sequence->pixel_width, &sequence->pixel_height); +} + +int mpeg2_guess_aspect (const mpeg2_sequence_t * sequence, + unsigned int * pixel_width, + unsigned int * pixel_height) +{ + static struct { + unsigned int width, height; + } video_modes[] = { + {720, 576}, /* 625 lines, 13.5 MHz (D1, DV, DVB, DVD) */ + {704, 576}, /* 625 lines, 13.5 MHz (1/1 D1, DVB, DVD, 4CIF) */ + {544, 576}, /* 625 lines, 10.125 MHz (DVB, laserdisc) */ + {528, 576}, /* 625 lines, 10.125 MHz (3/4 D1, DVB, laserdisc) */ + {480, 576}, /* 625 lines, 9 MHz (2/3 D1, DVB, SVCD) */ + {352, 576}, /* 625 lines, 6.75 MHz (D2, 1/2 D1, CVD, DVB, DVD) */ + {352, 288}, /* 625 lines, 6.75 MHz, 1 field (D4, VCD, DVB, DVD, CIF) */ + {176, 144}, /* 625 lines, 3.375 MHz, half field (QCIF) */ + {720, 486}, /* 525 lines, 13.5 MHz (D1) */ + {704, 486}, /* 525 lines, 13.5 MHz */ + {720, 480}, /* 525 lines, 13.5 MHz (DV, DSS, DVD) */ + {704, 480}, /* 525 lines, 13.5 MHz (1/1 D1, ATSC, DVD) */ + {544, 480}, /* 525 lines. 10.125 MHz (DSS, laserdisc) */ + {528, 480}, /* 525 lines. 10.125 MHz (3/4 D1, laserdisc) */ + {480, 480}, /* 525 lines, 9 MHz (2/3 D1, SVCD) */ + {352, 480}, /* 525 lines, 6.75 MHz (D2, 1/2 D1, CVD, DVD) */ + {352, 240} /* 525 lines. 6.75 MHz, 1 field (D4, VCD, DSS, DVD) */ + }; + unsigned int width, height, pix_width, pix_height, i, DAR_16_9; + + *pixel_width = sequence->pixel_width; + *pixel_height = sequence->pixel_height; + width = sequence->picture_width; + height = sequence->picture_height; + for (i = 0; i < sizeof (video_modes) / sizeof (video_modes[0]); i++) + if (width == video_modes[i].width && height == video_modes[i].height) + break; + if (i == sizeof (video_modes) / sizeof (video_modes[0]) || + (sequence->pixel_width == 1 && sequence->pixel_height == 1) || + width != sequence->display_width || height != sequence->display_height) + return 0; + + for (pix_height = 1; height * pix_height < 480; pix_height <<= 1); + height *= pix_height; + for (pix_width = 1; width * pix_width <= 352; pix_width <<= 1); + width *= pix_width; + + if (! (sequence->flags & SEQ_FLAG_MPEG2)) { + static unsigned int mpeg1_check[2][2] = {{11, 54}, {27, 45}}; + DAR_16_9 = (sequence->pixel_height == 27 || + sequence->pixel_height == 45); + if (width < 704 || + sequence->pixel_height != mpeg1_check[DAR_16_9][height == 576]) + return 0; + } else { + DAR_16_9 = (3 * sequence->picture_width * sequence->pixel_width > + 4 * sequence->picture_height * sequence->pixel_height); + switch (width) { + case 528: case 544: pix_width *= 4; pix_height *= 3; break; + case 480: pix_width *= 3; pix_height *= 2; break; + } + } + if (DAR_16_9) { + pix_width *= 4; pix_height *= 3; + } + if (height == 576) { + pix_width *= 59; pix_height *= 54; + } else { + pix_width *= 10; pix_height *= 11; + } + *pixel_width = pix_width; + *pixel_height = pix_height; + simplify (pixel_width, pixel_height); + return (height == 576) ? 1 : 2; +} + +static void copy_matrix (mpeg2dec_t * mpeg2dec, int index) +{ + if (memcmp (mpeg2dec->quantizer_matrix[index], + mpeg2dec->new_quantizer_matrix[index], 64)) { + memcpy (mpeg2dec->quantizer_matrix[index], + mpeg2dec->new_quantizer_matrix[index], 64); + mpeg2dec->scaled[index] = -1; + } +} + +static void finalize_matrix (mpeg2dec_t * mpeg2dec) +{ + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + int i; + + for (i = 0; i < 2; i++) { + if (mpeg2dec->copy_matrix & (1 << i)) + copy_matrix (mpeg2dec, i); + if ((mpeg2dec->copy_matrix & (4 << i)) && + memcmp (mpeg2dec->quantizer_matrix[i], + mpeg2dec->new_quantizer_matrix[i+2], 64)) { + copy_matrix (mpeg2dec, i + 2); + decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i+2]; + } else if (mpeg2dec->copy_matrix & (5 << i)) + decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i]; + } +} + +static mpeg2_state_t invalid_end_action (mpeg2dec_t * mpeg2dec) +{ + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.gop = NULL; + info_user_data (mpeg2dec); + mpeg2_header_state_init (mpeg2dec); + mpeg2dec->sequence = mpeg2dec->new_sequence; + mpeg2dec->action = mpeg2_seek_header; + mpeg2dec->state = STATE_SEQUENCE; + return STATE_SEQUENCE; +} + +void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec) +{ + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + + finalize_sequence (sequence); + finalize_matrix (mpeg2dec); + + decoder->mpeg1 = !(sequence->flags & SEQ_FLAG_MPEG2); + decoder->width = sequence->width; + decoder->height = sequence->height; + decoder->vertical_position_extension = (sequence->picture_height > 2800); + decoder->chroma_format = ((sequence->chroma_width == sequence->width) + + (sequence->chroma_height == sequence->height)); + + if (mpeg2dec->sequence.width != (unsigned)-1) { + /* + * According to 6.1.1.6, repeat sequence headers should be + * identical to the original. However some encoders dont + * respect that and change various fields (including bitrate + * and aspect ratio) in the repeat sequence headers. So we + * choose to be as conservative as possible and only restart + * the decoder if the width, height, chroma_width, + * chroma_height or low_delay flag are modified. + */ + if (sequence->width != mpeg2dec->sequence.width || + sequence->height != mpeg2dec->sequence.height || + sequence->chroma_width != mpeg2dec->sequence.chroma_width || + sequence->chroma_height != mpeg2dec->sequence.chroma_height || + ((sequence->flags ^ mpeg2dec->sequence.flags) & + SEQ_FLAG_LOW_DELAY)) { + decoder->stride_frame = sequence->width; + mpeg2_header_end (mpeg2dec); + mpeg2dec->action = invalid_end_action; + mpeg2dec->state = STATE_INVALID_END; + return; + } + mpeg2dec->state = (memcmp (&(mpeg2dec->sequence), sequence, + sizeof (mpeg2_sequence_t)) ? + STATE_SEQUENCE_MODIFIED : STATE_SEQUENCE_REPEATED); + } else + decoder->stride_frame = sequence->width; + mpeg2dec->sequence = *sequence; + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.sequence = &(mpeg2dec->sequence); + mpeg2dec->info.gop = NULL; + info_user_data (mpeg2dec); +} + +int mpeg2_header_gop (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_gop_t * gop = &(mpeg2dec->new_gop); + + if (! (buffer[1] & 8)) + return 1; + gop->hours = (buffer[0] >> 2) & 31; + gop->minutes = ((buffer[0] << 4) | (buffer[1] >> 4)) & 63; + gop->seconds = ((buffer[1] << 3) | (buffer[2] >> 5)) & 63; + gop->pictures = ((buffer[2] << 1) | (buffer[3] >> 7)) & 63; + gop->flags = (buffer[0] >> 7) | ((buffer[3] >> 4) & 6); + mpeg2dec->state = STATE_GOP; + return 0; +} + +void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec) +{ + mpeg2dec->gop = mpeg2dec->new_gop; + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.gop = &(mpeg2dec->gop); + info_user_data (mpeg2dec); +} + +void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type) +{ + int i; + + for (i = 0; i < 3; i++) + if (mpeg2dec->fbuf[1] != &mpeg2dec->fbuf_alloc[i].fbuf && + mpeg2dec->fbuf[2] != &mpeg2dec->fbuf_alloc[i].fbuf) { + mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[i].fbuf; + mpeg2dec->info.current_fbuf = mpeg2dec->fbuf[0]; + if (b_type || (mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { + if (b_type || mpeg2dec->convert) + mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0]; + mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0]; + } + break; + } +} + +int mpeg2_header_picture (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_picture_t * picture = &(mpeg2dec->new_picture); + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + int type; + + mpeg2dec->state = ((mpeg2dec->state != STATE_SLICE_1ST) ? + STATE_PICTURE : STATE_PICTURE_2ND); + mpeg2dec->ext_state = PIC_CODING_EXT; + + picture->temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6); + + type = (buffer [1] >> 3) & 7; + if (type == PIC_FLAG_CODING_TYPE_P || type == PIC_FLAG_CODING_TYPE_B) { + /* forward_f_code and backward_f_code - used in mpeg1 only */ + decoder->f_motion.f_code[1] = (buffer[3] >> 2) & 1; + decoder->f_motion.f_code[0] = + (((buffer[3] << 1) | (buffer[4] >> 7)) & 7) - 1; + decoder->b_motion.f_code[1] = (buffer[4] >> 6) & 1; + decoder->b_motion.f_code[0] = ((buffer[4] >> 3) & 7) - 1; + } + + picture->flags = PIC_FLAG_PROGRESSIVE_FRAME | type; + picture->tag = picture->tag2 = 0; + if (mpeg2dec->num_tags) { + if (mpeg2dec->bytes_since_tag >= mpeg2dec->chunk_ptr - buffer + 4) { + mpeg2dec->num_tags = 0; + picture->tag = mpeg2dec->tag_current; + picture->tag2 = mpeg2dec->tag2_current; + picture->flags |= PIC_FLAG_TAGS; + } else if (mpeg2dec->num_tags > 1) { + mpeg2dec->num_tags = 1; + picture->tag = mpeg2dec->tag_previous; + picture->tag2 = mpeg2dec->tag2_previous; + picture->flags |= PIC_FLAG_TAGS; + } + } + picture->nb_fields = 2; + picture->display_offset[0].x = picture->display_offset[1].x = + picture->display_offset[2].x = mpeg2dec->display_offset_x; + picture->display_offset[0].y = picture->display_offset[1].y = + picture->display_offset[2].y = mpeg2dec->display_offset_y; + + /* XXXXXX decode extra_information_picture as well */ + + mpeg2dec->q_scale_type = 0; + decoder->intra_dc_precision = 7; + decoder->frame_pred_frame_dct = 1; + decoder->concealment_motion_vectors = 0; + decoder->scan = mpeg2_scan_norm; + decoder->picture_structure = FRAME_PICTURE; + mpeg2dec->copy_matrix = 0; + + return 0; +} + +static int picture_coding_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_picture_t * picture = &(mpeg2dec->new_picture); + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + uint32_t flags; + + /* pre subtract 1 for use later in compute_motion_vector */ + decoder->f_motion.f_code[0] = (buffer[0] & 15) - 1; + decoder->f_motion.f_code[1] = (buffer[1] >> 4) - 1; + decoder->b_motion.f_code[0] = (buffer[1] & 15) - 1; + decoder->b_motion.f_code[1] = (buffer[2] >> 4) - 1; + + flags = picture->flags; + decoder->intra_dc_precision = 7 - ((buffer[2] >> 2) & 3); + decoder->picture_structure = buffer[2] & 3; + switch (decoder->picture_structure) { + case TOP_FIELD: + flags |= PIC_FLAG_TOP_FIELD_FIRST; + case BOTTOM_FIELD: + picture->nb_fields = 1; + break; + case FRAME_PICTURE: + if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) { + picture->nb_fields = (buffer[3] & 2) ? 3 : 2; + flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0; + } else + picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2; + break; + default: + return 1; + } + decoder->top_field_first = buffer[3] >> 7; + decoder->frame_pred_frame_dct = (buffer[3] >> 6) & 1; + decoder->concealment_motion_vectors = (buffer[3] >> 5) & 1; + mpeg2dec->q_scale_type = buffer[3] & 16; + decoder->intra_vlc_format = (buffer[3] >> 3) & 1; + decoder->scan = (buffer[3] & 4) ? mpeg2_scan_alt : mpeg2_scan_norm; + if (!(buffer[4] & 0x80)) + flags &= ~PIC_FLAG_PROGRESSIVE_FRAME; + if (buffer[4] & 0x40) + flags |= (((buffer[4]<<26) | (buffer[5]<<18) | (buffer[6]<<10)) & + PIC_MASK_COMPOSITE_DISPLAY) | PIC_FLAG_COMPOSITE_DISPLAY; + picture->flags = flags; + + mpeg2dec->ext_state = PIC_DISPLAY_EXT | COPYRIGHT_EXT | QUANT_MATRIX_EXT; + + return 0; +} + +static int picture_display_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_picture_t * picture = &(mpeg2dec->new_picture); + int i, nb_pos; + + nb_pos = picture->nb_fields; + if (mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE) + nb_pos >>= 1; + + for (i = 0; i < nb_pos; i++) { + int x, y; + + x = ((buffer[4*i] << 24) | (buffer[4*i+1] << 16) | + (buffer[4*i+2] << 8) | buffer[4*i+3]) >> (11-2*i); + y = ((buffer[4*i+2] << 24) | (buffer[4*i+3] << 16) | + (buffer[4*i+4] << 8) | buffer[4*i+5]) >> (10-2*i); + if (! (x & y & 1)) + return 1; + picture->display_offset[i].x = mpeg2dec->display_offset_x = x >> 1; + picture->display_offset[i].y = mpeg2dec->display_offset_y = y >> 1; + } + for (; i < 3; i++) { + picture->display_offset[i].x = mpeg2dec->display_offset_x; + picture->display_offset[i].y = mpeg2dec->display_offset_y; + } + return 0; +} + +void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels) +{ + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + int old_type_b = (decoder->coding_type == B_TYPE); + int low_delay = mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY; + + finalize_matrix (mpeg2dec); + decoder->coding_type = mpeg2dec->new_picture.flags & PIC_MASK_CODING_TYPE; + + if (mpeg2dec->state == STATE_PICTURE) { + mpeg2_picture_t * picture; + mpeg2_picture_t * other; + + decoder->second_field = 0; + + picture = other = mpeg2dec->pictures; + if (old_type_b ^ (mpeg2dec->picture < mpeg2dec->pictures + 2)) + picture += 2; + else + other += 2; + mpeg2dec->picture = picture; + *picture = mpeg2dec->new_picture; + + if (!old_type_b) { + mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1]; + mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0]; + } + mpeg2dec->fbuf[0] = NULL; + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.current_picture = picture; + mpeg2dec->info.display_picture = picture; + if (decoder->coding_type != B_TYPE) { + if (!low_delay) { + if (mpeg2dec->first) { + mpeg2dec->info.display_picture = NULL; + mpeg2dec->first = 0; + } else { + mpeg2dec->info.display_picture = other; + if (other->nb_fields == 1) + mpeg2dec->info.display_picture_2nd = other + 1; + mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1]; + } + } + if (!low_delay + !mpeg2dec->convert) + mpeg2dec->info.discard_fbuf = + mpeg2dec->fbuf[!low_delay + !mpeg2dec->convert]; + } + if (mpeg2dec->convert) { + mpeg2_convert_init_t convert_init; + if (!mpeg2dec->convert_start) { + int y_size, uv_size; + + mpeg2dec->decoder.convert_id = + mpeg2_malloc (mpeg2dec->convert_id_size, + MPEG2_ALLOC_CONVERT_ID); + mpeg2dec->convert (MPEG2_CONVERT_START, + mpeg2dec->decoder.convert_id, + &(mpeg2dec->sequence), + mpeg2dec->convert_stride, accels, + mpeg2dec->convert_arg, &convert_init); + mpeg2dec->convert_start = convert_init.start; + mpeg2dec->decoder.convert = convert_init.copy; + + y_size = decoder->stride_frame * mpeg2dec->sequence.height; + uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format); + mpeg2dec->yuv_buf[0][0] = + (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[0][1] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[0][2] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[1][0] = + (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[1][1] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[1][2] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + y_size = decoder->stride_frame * 32; + uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format); + mpeg2dec->yuv_buf[2][0] = + (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[2][1] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[2][2] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + } + if (!mpeg2dec->custom_fbuf) { + while (mpeg2dec->alloc_index < 3) { + mpeg2_fbuf_t * fbuf; + + fbuf = &mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf; + fbuf->id = NULL; + fbuf->buf[0] = + (uint8_t *) mpeg2_malloc (convert_init.buf_size[0], + MPEG2_ALLOC_CONVERTED); + fbuf->buf[1] = + (uint8_t *) mpeg2_malloc (convert_init.buf_size[1], + MPEG2_ALLOC_CONVERTED); + fbuf->buf[2] = + (uint8_t *) mpeg2_malloc (convert_init.buf_size[2], + MPEG2_ALLOC_CONVERTED); + } + mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE)); + } + } else if (!mpeg2dec->custom_fbuf) { + while (mpeg2dec->alloc_index < 3) { + mpeg2_fbuf_t * fbuf; + int y_size, uv_size; + + fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf); + fbuf->id = NULL; + y_size = decoder->stride_frame * mpeg2dec->sequence.height; + uv_size = y_size >> (2 - decoder->chroma_format); + fbuf->buf[0] = (uint8_t *) mpeg2_malloc (y_size, + MPEG2_ALLOC_YUV); + fbuf->buf[1] = (uint8_t *) mpeg2_malloc (uv_size, + MPEG2_ALLOC_YUV); + fbuf->buf[2] = (uint8_t *) mpeg2_malloc (uv_size, + MPEG2_ALLOC_YUV); + } + mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE)); + } + } else { + decoder->second_field = 1; + mpeg2dec->picture++; /* second field picture */ + *(mpeg2dec->picture) = mpeg2dec->new_picture; + mpeg2dec->info.current_picture_2nd = mpeg2dec->picture; + if (low_delay || decoder->coding_type == B_TYPE) + mpeg2dec->info.display_picture_2nd = mpeg2dec->picture; + } + + info_user_data (mpeg2dec); +} + +static int copyright_ext (mpeg2dec_t * mpeg2dec) +{ + return 0; +} + +static int quant_matrix_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + int i, j; + + for (i = 0; i < 4; i++) + if (buffer[0] & (8 >> i)) { + for (j = 0; j < 64; j++) + mpeg2dec->new_quantizer_matrix[i][mpeg2_scan_norm[j]] = + (buffer[j] << (i+5)) | (buffer[j+1] >> (3-i)); + mpeg2dec->copy_matrix |= 1 << i; + buffer += 64; + } + + return 0; +} + +int mpeg2_header_extension (mpeg2dec_t * mpeg2dec) +{ + static int (* parser[]) (mpeg2dec_t *) = { + 0, sequence_ext, sequence_display_ext, quant_matrix_ext, + copyright_ext, 0, 0, picture_display_ext, picture_coding_ext + }; + int ext, ext_bit; + + ext = mpeg2dec->chunk_start[0] >> 4; + ext_bit = 1 << ext; + + if (!(mpeg2dec->ext_state & ext_bit)) + return 0; /* ignore illegal extensions */ + mpeg2dec->ext_state &= ~ext_bit; + return parser[ext] (mpeg2dec); +} + +int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec) +{ + mpeg2dec->user_data_len += mpeg2dec->chunk_ptr - 1 - mpeg2dec->chunk_start; + mpeg2dec->chunk_start = mpeg2dec->chunk_ptr - 1; + + return 0; +} + +static void prescale (mpeg2dec_t * mpeg2dec, int index) +{ + static int non_linear_scale [] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 10, 12, 14, 16, 18, 20, 22, + 24, 28, 32, 36, 40, 44, 48, 52, + 56, 64, 72, 80, 88, 96, 104, 112 + }; + int i, j, k; + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + + if (mpeg2dec->scaled[index] != mpeg2dec->q_scale_type) { + mpeg2dec->scaled[index] = mpeg2dec->q_scale_type; + for (i = 0; i < 32; i++) { + k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1); + for (j = 0; j < 64; j++) + decoder->quantizer_prescale[index][i][j] = + k * mpeg2dec->quantizer_matrix[index][j]; + } + } +} + +mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec) +{ + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + + mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; + mpeg2dec->state = ((mpeg2dec->picture->nb_fields > 1 || + mpeg2dec->state == STATE_PICTURE_2ND) ? + STATE_SLICE : STATE_SLICE_1ST); + + if (mpeg2dec->decoder.coding_type != D_TYPE) { + prescale (mpeg2dec, 0); + if (decoder->chroma_quantizer[0] == decoder->quantizer_prescale[2]) + prescale (mpeg2dec, 2); + if (mpeg2dec->decoder.coding_type != I_TYPE) { + prescale (mpeg2dec, 1); + if (decoder->chroma_quantizer[1] == decoder->quantizer_prescale[3]) + prescale (mpeg2dec, 3); + } + } + + if (!(mpeg2dec->nb_decode_slices)) + mpeg2dec->picture->flags |= PIC_FLAG_SKIP; + else if (mpeg2dec->convert_start) { + mpeg2dec->convert_start (decoder->convert_id, mpeg2dec->fbuf[0], + mpeg2dec->picture, mpeg2dec->info.gop); + + if (mpeg2dec->decoder.coding_type == B_TYPE) + mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->yuv_buf[2], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index]); + else { + mpeg2_init_fbuf (&(mpeg2dec->decoder), + mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index]); + if (mpeg2dec->state == STATE_SLICE) + mpeg2dec->yuv_index ^= 1; + } + } else { + int b_type; + + b_type = (mpeg2dec->decoder.coding_type == B_TYPE); + mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->fbuf[0]->buf, + mpeg2dec->fbuf[b_type + 1]->buf, + mpeg2dec->fbuf[b_type]->buf); + } + mpeg2dec->action = NULL; + return STATE_INTERNAL_NORETURN; +} + +static mpeg2_state_t seek_sequence (mpeg2dec_t * mpeg2dec) +{ + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.sequence = NULL; + mpeg2dec->info.gop = NULL; + mpeg2_header_state_init (mpeg2dec); + mpeg2dec->action = mpeg2_seek_header; + return mpeg2_seek_header (mpeg2dec); +} + +mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec) +{ + mpeg2_picture_t * picture; + int b_type; + + b_type = (mpeg2dec->decoder.coding_type == B_TYPE); + picture = mpeg2dec->pictures; + if ((mpeg2dec->picture >= picture + 2) ^ b_type) + picture = mpeg2dec->pictures + 2; + + mpeg2_reset_info (&(mpeg2dec->info)); + if (!(mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { + mpeg2dec->info.display_picture = picture; + if (picture->nb_fields == 1) + mpeg2dec->info.display_picture_2nd = picture + 1; + mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[b_type]; + if (!mpeg2dec->convert) + mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type + 1]; + } else if (!mpeg2dec->convert) + mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type]; + mpeg2dec->action = seek_sequence; + return STATE_END; +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/idct.c b/src/video_dec/libmpeg2new/libmpeg2/idct.c new file mode 100644 index 000000000..8b982bb33 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/idct.c @@ -0,0 +1,287 @@ +/* + * idct.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include +#include + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" + +#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ +#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ +#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ +#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */ +#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ +#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ + +/* idct main entry point */ +void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); +void (* mpeg2_idct_add) (int last, int16_t * block, + uint8_t * dest, int stride); + +/* + * In legal streams, the IDCT output should be between -384 and +384. + * In corrupted streams, it is possible to force the IDCT output to go + * to +-3826 - this is the worst case for a column IDCT where the + * column inputs are 16-bit values. + */ +uint8_t mpeg2_clip[3840 * 2 + 256]; +#define CLIP(i) ((mpeg2_clip + 3840)[i]) + +#if 0 +#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ +do { \ + t0 = W0 * d0 + W1 * d1; \ + t1 = W0 * d1 - W1 * d0; \ +} while (0) +#else +#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ +do { \ + int tmp = W0 * (d0 + d1); \ + t0 = tmp + (W1 - W0) * d1; \ + t1 = tmp - (W1 + W0) * d0; \ +} while (0) +#endif + +static void inline idct_row (int16_t * const block) +{ + int d0, d1, d2, d3; + int a0, a1, a2, a3, b0, b1, b2, b3; + int t0, t1, t2, t3; + + /* shortcut */ + if (likely (!(block[1] | ((int32_t *)block)[1] | ((int32_t *)block)[2] | + ((int32_t *)block)[3]))) { + uint32_t tmp = (uint16_t) (block[0] >> 1); + tmp |= tmp << 16; + ((int32_t *)block)[0] = tmp; + ((int32_t *)block)[1] = tmp; + ((int32_t *)block)[2] = tmp; + ((int32_t *)block)[3] = tmp; + return; + } + + d0 = (block[0] << 11) + 2048; + d1 = block[1]; + d2 = block[2] << 11; + d3 = block[3]; + t0 = d0 + d2; + t1 = d0 - d2; + BUTTERFLY (t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + + d0 = block[4]; + d1 = block[5]; + d2 = block[6]; + d3 = block[7]; + BUTTERFLY (t0, t1, W7, W1, d3, d0); + BUTTERFLY (t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; + + block[0] = (a0 + b0) >> 12; + block[1] = (a1 + b1) >> 12; + block[2] = (a2 + b2) >> 12; + block[3] = (a3 + b3) >> 12; + block[4] = (a3 - b3) >> 12; + block[5] = (a2 - b2) >> 12; + block[6] = (a1 - b1) >> 12; + block[7] = (a0 - b0) >> 12; +} + +static void inline idct_col (int16_t * const block) +{ + int d0, d1, d2, d3; + int a0, a1, a2, a3, b0, b1, b2, b3; + int t0, t1, t2, t3; + + d0 = (block[8*0] << 11) + 65536; + d1 = block[8*1]; + d2 = block[8*2] << 11; + d3 = block[8*3]; + t0 = d0 + d2; + t1 = d0 - d2; + BUTTERFLY (t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + + d0 = block[8*4]; + d1 = block[8*5]; + d2 = block[8*6]; + d3 = block[8*7]; + BUTTERFLY (t0, t1, W7, W1, d3, d0); + BUTTERFLY (t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; + + block[8*0] = (a0 + b0) >> 17; + block[8*1] = (a1 + b1) >> 17; + block[8*2] = (a2 + b2) >> 17; + block[8*3] = (a3 + b3) >> 17; + block[8*4] = (a3 - b3) >> 17; + block[8*5] = (a2 - b2) >> 17; + block[8*6] = (a1 - b1) >> 17; + block[8*7] = (a0 - b0) >> 17; +} + +static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, + const int stride) +{ + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + do { + dest[0] = CLIP (block[0]); + dest[1] = CLIP (block[1]); + dest[2] = CLIP (block[2]); + dest[3] = CLIP (block[3]); + dest[4] = CLIP (block[4]); + dest[5] = CLIP (block[5]); + dest[6] = CLIP (block[6]); + dest[7] = CLIP (block[7]); + + ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0; + ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0; + + dest += stride; + block += 8; + } while (--i); +} + +static void mpeg2_idct_add_c (const int last, int16_t * block, + uint8_t * dest, const int stride) +{ + int i; + + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + do { + dest[0] = CLIP (block[0] + dest[0]); + dest[1] = CLIP (block[1] + dest[1]); + dest[2] = CLIP (block[2] + dest[2]); + dest[3] = CLIP (block[3] + dest[3]); + dest[4] = CLIP (block[4] + dest[4]); + dest[5] = CLIP (block[5] + dest[5]); + dest[6] = CLIP (block[6] + dest[6]); + dest[7] = CLIP (block[7] + dest[7]); + + ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0; + ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0; + + dest += stride; + block += 8; + } while (--i); + } else { + int DC; + + DC = (block[0] + 64) >> 7; + block[0] = block[63] = 0; + i = 8; + do { + dest[0] = CLIP (DC + dest[0]); + dest[1] = CLIP (DC + dest[1]); + dest[2] = CLIP (DC + dest[2]); + dest[3] = CLIP (DC + dest[3]); + dest[4] = CLIP (DC + dest[4]); + dest[5] = CLIP (DC + dest[5]); + dest[6] = CLIP (DC + dest[6]); + dest[7] = CLIP (DC + dest[7]); + dest += stride; + } while (--i); + } +} + +void mpeg2_idct_init (uint32_t accel) +{ +#ifdef ARCH_X86 + if (accel & MPEG2_ACCEL_X86_MMXEXT) { + mpeg2_idct_copy = mpeg2_idct_copy_mmxext; + mpeg2_idct_add = mpeg2_idct_add_mmxext; + mpeg2_idct_mmx_init (); + } else if (accel & MPEG2_ACCEL_X86_MMX) { + mpeg2_idct_copy = mpeg2_idct_copy_mmx; + mpeg2_idct_add = mpeg2_idct_add_mmx; + mpeg2_idct_mmx_init (); + } else +#endif +#ifdef ARCH_PPC + if (accel & MPEG2_ACCEL_PPC_ALTIVEC) { + mpeg2_idct_copy = mpeg2_idct_copy_altivec; + mpeg2_idct_add = mpeg2_idct_add_altivec; + mpeg2_idct_altivec_init (); + } else +#endif +#ifdef ARCH_ALPHA + if (accel & MPEG2_ACCEL_ALPHA_MVI) { + mpeg2_idct_copy = mpeg2_idct_copy_mvi; + mpeg2_idct_add = mpeg2_idct_add_mvi; + mpeg2_idct_alpha_init (); + } else if (accel & MPEG2_ACCEL_ALPHA) { + int i; + + mpeg2_idct_copy = mpeg2_idct_copy_alpha; + mpeg2_idct_add = mpeg2_idct_add_alpha; + mpeg2_idct_alpha_init (); + for (i = -3840; i < 3840 + 256; i++) + CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i); + } else +#endif + { + extern uint8_t mpeg2_scan_norm[64]; + extern uint8_t mpeg2_scan_alt[64]; + int i, j; + + mpeg2_idct_copy = mpeg2_idct_copy_c; + mpeg2_idct_add = mpeg2_idct_add_c; + for (i = -3840; i < 3840 + 256; i++) + CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i); + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + } + } +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/idct_alpha.c b/src/video_dec/libmpeg2new/libmpeg2/idct_alpha.c new file mode 100644 index 000000000..1d8fd08ee --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/idct_alpha.c @@ -0,0 +1,379 @@ +/* + * idct_alpha.c + * Copyright (C) 2002-2003 Falk Hueffner + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_ALPHA + +#include +#include + +#include "mpeg2.h" +#include +#include "mpeg2_internal.h" +#include "alpha_asm.h" + +#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ +#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ +#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ +#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */ +#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ +#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ + +extern uint8_t mpeg2_clip[3840 * 2 + 256]; +#define CLIP(i) ((mpeg2_clip + 3840)[i]) + +#if 0 +#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ +do { \ + t0 = W0 * d0 + W1 * d1; \ + t1 = W0 * d1 - W1 * d0; \ +} while (0) +#else +#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ +do { \ + int_fast32_t tmp = W0 * (d0 + d1); \ + t0 = tmp + (W1 - W0) * d1; \ + t1 = tmp - (W1 + W0) * d0; \ +} while (0) +#endif + +static void inline idct_row (int16_t * const block) +{ + uint64_t l, r; + int_fast32_t d0, d1, d2, d3; + int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; + int_fast32_t t0, t1, t2, t3; + + l = ldq (block); + r = ldq (block + 4); + + /* shortcut */ + if (likely (!((l & ~0xffffUL) | r))) { + uint64_t tmp = (uint16_t) (l >> 1); + tmp |= tmp << 16; + tmp |= tmp << 32; + ((int32_t *)block)[0] = tmp; + ((int32_t *)block)[1] = tmp; + ((int32_t *)block)[2] = tmp; + ((int32_t *)block)[3] = tmp; + return; + } + + d0 = (sextw (l) << 11) + 2048; + d1 = sextw (extwl (l, 2)); + d2 = sextw (extwl (l, 4)) << 11; + d3 = sextw (extwl (l, 6)); + t0 = d0 + d2; + t1 = d0 - d2; + BUTTERFLY (t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + + d0 = sextw (r); + d1 = sextw (extwl (r, 2)); + d2 = sextw (extwl (r, 4)); + d3 = sextw (extwl (r, 6)); + BUTTERFLY (t0, t1, W7, W1, d3, d0); + BUTTERFLY (t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; + + block[0] = (a0 + b0) >> 12; + block[1] = (a1 + b1) >> 12; + block[2] = (a2 + b2) >> 12; + block[3] = (a3 + b3) >> 12; + block[4] = (a3 - b3) >> 12; + block[5] = (a2 - b2) >> 12; + block[6] = (a1 - b1) >> 12; + block[7] = (a0 - b0) >> 12; +} + +static void inline idct_col (int16_t * const block) +{ + int_fast32_t d0, d1, d2, d3; + int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; + int_fast32_t t0, t1, t2, t3; + + d0 = (block[8*0] << 11) + 65536; + d1 = block[8*1]; + d2 = block[8*2] << 11; + d3 = block[8*3]; + t0 = d0 + d2; + t1 = d0 - d2; + BUTTERFLY (t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + + d0 = block[8*4]; + d1 = block[8*5]; + d2 = block[8*6]; + d3 = block[8*7]; + BUTTERFLY (t0, t1, W7, W1, d3, d0); + BUTTERFLY (t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; + + block[8*0] = (a0 + b0) >> 17; + block[8*1] = (a1 + b1) >> 17; + block[8*2] = (a2 + b2) >> 17; + block[8*3] = (a3 + b3) >> 17; + block[8*4] = (a3 - b3) >> 17; + block[8*5] = (a2 - b2) >> 17; + block[8*6] = (a1 - b1) >> 17; + block[8*7] = (a0 - b0) >> 17; +} + +void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, const int stride) +{ + uint64_t clampmask; + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + + for (i = 0; i < 8; i++) + idct_col (block + i); + + clampmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */ + do { + uint64_t shorts0, shorts1; + + shorts0 = ldq (block); + shorts0 = maxsw4 (shorts0, 0); + shorts0 = minsw4 (shorts0, clampmask); + stl (pkwb (shorts0), dest); + + shorts1 = ldq (block + 4); + shorts1 = maxsw4 (shorts1, 0); + shorts1 = minsw4 (shorts1, clampmask); + stl (pkwb (shorts1), dest + 4); + + stq (0, block); + stq (0, block + 4); + + dest += stride; + block += 8; + } while (--i); +} + +void mpeg2_idct_add_mvi (const int last, int16_t * block, + uint8_t * dest, const int stride) +{ + uint64_t clampmask; + uint64_t signmask; + int i; + + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + clampmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */ + signmask = zap (-1, 0x33); + signmask ^= signmask >> 1; /* 0x8000800080008000 */ + + do { + uint64_t shorts0, pix0, signs0; + uint64_t shorts1, pix1, signs1; + + shorts0 = ldq (block); + shorts1 = ldq (block + 4); + + pix0 = unpkbw (ldl (dest)); + /* signed subword add (MMX paddw). */ + signs0 = shorts0 & signmask; + shorts0 &= ~signmask; + shorts0 += pix0; + shorts0 ^= signs0; + /* clamp. */ + shorts0 = maxsw4 (shorts0, 0); + shorts0 = minsw4 (shorts0, clampmask); + + /* next 4. */ + pix1 = unpkbw (ldl (dest + 4)); + signs1 = shorts1 & signmask; + shorts1 &= ~signmask; + shorts1 += pix1; + shorts1 ^= signs1; + shorts1 = maxsw4 (shorts1, 0); + shorts1 = minsw4 (shorts1, clampmask); + + stl (pkwb (shorts0), dest); + stl (pkwb (shorts1), dest + 4); + stq (0, block); + stq (0, block + 4); + + dest += stride; + block += 8; + } while (--i); + } else { + int DC; + uint64_t p0, p1, p2, p3, p4, p5, p6, p7; + uint64_t DCs; + + DC = (block[0] + 64) >> 7; + block[0] = block[63] = 0; + + p0 = ldq (dest + 0 * stride); + p1 = ldq (dest + 1 * stride); + p2 = ldq (dest + 2 * stride); + p3 = ldq (dest + 3 * stride); + p4 = ldq (dest + 4 * stride); + p5 = ldq (dest + 5 * stride); + p6 = ldq (dest + 6 * stride); + p7 = ldq (dest + 7 * stride); + + if (DC > 0) { + DCs = BYTE_VEC (likely (DC <= 255) ? DC : 255); + p0 += minub8 (DCs, ~p0); + p1 += minub8 (DCs, ~p1); + p2 += minub8 (DCs, ~p2); + p3 += minub8 (DCs, ~p3); + p4 += minub8 (DCs, ~p4); + p5 += minub8 (DCs, ~p5); + p6 += minub8 (DCs, ~p6); + p7 += minub8 (DCs, ~p7); + } else { + DCs = BYTE_VEC (likely (-DC <= 255) ? -DC : 255); + p0 -= minub8 (DCs, p0); + p1 -= minub8 (DCs, p1); + p2 -= minub8 (DCs, p2); + p3 -= minub8 (DCs, p3); + p4 -= minub8 (DCs, p4); + p5 -= minub8 (DCs, p5); + p6 -= minub8 (DCs, p6); + p7 -= minub8 (DCs, p7); + } + + stq (p0, dest + 0 * stride); + stq (p1, dest + 1 * stride); + stq (p2, dest + 2 * stride); + stq (p3, dest + 3 * stride); + stq (p4, dest + 4 * stride); + stq (p5, dest + 5 * stride); + stq (p6, dest + 6 * stride); + stq (p7, dest + 7 * stride); + } +} + +void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, const int stride) +{ + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + do { + dest[0] = CLIP (block[0]); + dest[1] = CLIP (block[1]); + dest[2] = CLIP (block[2]); + dest[3] = CLIP (block[3]); + dest[4] = CLIP (block[4]); + dest[5] = CLIP (block[5]); + dest[6] = CLIP (block[6]); + dest[7] = CLIP (block[7]); + + stq(0, block); + stq(0, block + 4); + + dest += stride; + block += 8; + } while (--i); +} + +void mpeg2_idct_add_alpha (const int last, int16_t * block, + uint8_t * dest, const int stride) +{ + int i; + + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + do { + dest[0] = CLIP (block[0] + dest[0]); + dest[1] = CLIP (block[1] + dest[1]); + dest[2] = CLIP (block[2] + dest[2]); + dest[3] = CLIP (block[3] + dest[3]); + dest[4] = CLIP (block[4] + dest[4]); + dest[5] = CLIP (block[5] + dest[5]); + dest[6] = CLIP (block[6] + dest[6]); + dest[7] = CLIP (block[7] + dest[7]); + + stq(0, block); + stq(0, block + 4); + + dest += stride; + block += 8; + } while (--i); + } else { + int DC; + + DC = (block[0] + 64) >> 7; + block[0] = block[63] = 0; + i = 8; + do { + dest[0] = CLIP (DC + dest[0]); + dest[1] = CLIP (DC + dest[1]); + dest[2] = CLIP (DC + dest[2]); + dest[3] = CLIP (DC + dest[3]); + dest[4] = CLIP (DC + dest[4]); + dest[5] = CLIP (DC + dest[5]); + dest[6] = CLIP (DC + dest[6]); + dest[7] = CLIP (DC + dest[7]); + dest += stride; + } while (--i); + } +} + +void mpeg2_idct_alpha_init (void) +{ + extern uint8_t mpeg2_scan_norm[64]; + extern uint8_t mpeg2_scan_alt[64]; + int i, j; + + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + } +} + +#endif /* ARCH_ALPHA */ diff --git a/src/video_dec/libmpeg2new/libmpeg2/idct_altivec.c b/src/video_dec/libmpeg2new/libmpeg2/idct_altivec.c new file mode 100644 index 000000000..f15bca165 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/idct_altivec.c @@ -0,0 +1,288 @@ +/* + * idct_altivec.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_PPC + +#ifdef HAVE_ALTIVEC_H +#include +#endif +#include + +#include "mpeg2.h" +#include +#include "mpeg2_internal.h" + +typedef vector signed char vector_s8_t; +typedef vector unsigned char vector_u8_t; +typedef vector signed short vector_s16_t; +typedef vector unsigned short vector_u16_t; +typedef vector signed int vector_s32_t; +typedef vector unsigned int vector_u32_t; + +#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303) +/* work around gcc <3.3 vec_mergel bug */ +static inline vector_s16_t my_vec_mergel (vector_s16_t const A, + vector_s16_t const B) +{ + static const vector_u8_t mergel = { + 0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b, + 0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f + }; + return vec_perm (A, B, mergel); +} +#undef vec_mergel +#define vec_mergel my_vec_mergel +#endif + +#ifdef HAVE_ALTIVEC_H /* gnu */ +#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h} +#else /* apple */ +#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h) +#endif + +static const vector_s16_t constants ATTR_ALIGN(16) = + VEC_S16 (23170, 13573, 6518, 21895, -23170, -21895, 32, 31); +static const vector_s16_t constants_1 ATTR_ALIGN(16) = + VEC_S16 (16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725); +static const vector_s16_t constants_2 ATTR_ALIGN(16) = + VEC_S16 (16069, 22289, 20995, 18895, 16069, 18895, 20995, 22289); +static const vector_s16_t constants_3 ATTR_ALIGN(16) = + VEC_S16 (21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692); +static const vector_s16_t constants_4 ATTR_ALIGN(16) = + VEC_S16 (13623, 18895, 17799, 16019, 13623, 16019, 17799, 18895); + +#define IDCT \ + vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ + vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ + vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \ + vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \ + vector_u16_t shift; \ + \ + c4 = vec_splat (constants, 0); \ + a0 = vec_splat (constants, 1); \ + a1 = vec_splat (constants, 2); \ + a2 = vec_splat (constants, 3); \ + mc4 = vec_splat (constants, 4); \ + ma2 = vec_splat (constants, 5); \ + bias = (vector_s16_t)vec_splat ((vector_s32_t)constants, 3); \ + \ + zero = vec_splat_s16 (0); \ + \ + vx0 = vec_adds (block[0], block[4]); \ + vx4 = vec_subs (block[0], block[4]); \ + t5 = vec_mradds (vx0, constants_1, zero); \ + t0 = vec_mradds (vx4, constants_1, zero); \ + \ + vx1 = vec_mradds (a1, block[7], block[1]); \ + vx7 = vec_mradds (a1, block[1], vec_subs (zero, block[7])); \ + t1 = vec_mradds (vx1, constants_2, zero); \ + t8 = vec_mradds (vx7, constants_2, zero); \ + \ + vx2 = vec_mradds (a0, block[6], block[2]); \ + vx6 = vec_mradds (a0, block[2], vec_subs (zero, block[6])); \ + t2 = vec_mradds (vx2, constants_3, zero); \ + t4 = vec_mradds (vx6, constants_3, zero); \ + \ + vx3 = vec_mradds (block[3], constants_4, zero); \ + vx5 = vec_mradds (block[5], constants_4, zero); \ + t7 = vec_mradds (a2, vx5, vx3); \ + t3 = vec_mradds (ma2, vx3, vx5); \ + \ + t6 = vec_adds (t8, t3); \ + t3 = vec_subs (t8, t3); \ + t8 = vec_subs (t1, t7); \ + t1 = vec_adds (t1, t7); \ + t6 = vec_mradds (a0, t6, t6); /* a0+1 == 2*c4 */ \ + t1 = vec_mradds (a0, t1, t1); /* a0+1 == 2*c4 */ \ + \ + t7 = vec_adds (t5, t2); \ + t2 = vec_subs (t5, t2); \ + t5 = vec_adds (t0, t4); \ + t0 = vec_subs (t0, t4); \ + t4 = vec_subs (t8, t3); \ + t3 = vec_adds (t8, t3); \ + \ + vy0 = vec_adds (t7, t1); \ + vy7 = vec_subs (t7, t1); \ + vy1 = vec_adds (t5, t3); \ + vy6 = vec_subs (t5, t3); \ + vy2 = vec_adds (t0, t4); \ + vy5 = vec_subs (t0, t4); \ + vy3 = vec_adds (t2, t6); \ + vy4 = vec_subs (t2, t6); \ + \ + vx0 = vec_mergeh (vy0, vy4); \ + vx1 = vec_mergel (vy0, vy4); \ + vx2 = vec_mergeh (vy1, vy5); \ + vx3 = vec_mergel (vy1, vy5); \ + vx4 = vec_mergeh (vy2, vy6); \ + vx5 = vec_mergel (vy2, vy6); \ + vx6 = vec_mergeh (vy3, vy7); \ + vx7 = vec_mergel (vy3, vy7); \ + \ + vy0 = vec_mergeh (vx0, vx4); \ + vy1 = vec_mergel (vx0, vx4); \ + vy2 = vec_mergeh (vx1, vx5); \ + vy3 = vec_mergel (vx1, vx5); \ + vy4 = vec_mergeh (vx2, vx6); \ + vy5 = vec_mergel (vx2, vx6); \ + vy6 = vec_mergeh (vx3, vx7); \ + vy7 = vec_mergel (vx3, vx7); \ + \ + vx0 = vec_mergeh (vy0, vy4); \ + vx1 = vec_mergel (vy0, vy4); \ + vx2 = vec_mergeh (vy1, vy5); \ + vx3 = vec_mergel (vy1, vy5); \ + vx4 = vec_mergeh (vy2, vy6); \ + vx5 = vec_mergel (vy2, vy6); \ + vx6 = vec_mergeh (vy3, vy7); \ + vx7 = vec_mergel (vy3, vy7); \ + \ + vx0 = vec_adds (vx0, bias); \ + t5 = vec_adds (vx0, vx4); \ + t0 = vec_subs (vx0, vx4); \ + \ + t1 = vec_mradds (a1, vx7, vx1); \ + t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ + \ + t2 = vec_mradds (a0, vx6, vx2); \ + t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \ + \ + t7 = vec_mradds (a2, vx5, vx3); \ + t3 = vec_mradds (ma2, vx3, vx5); \ + \ + t6 = vec_adds (t8, t3); \ + t3 = vec_subs (t8, t3); \ + t8 = vec_subs (t1, t7); \ + t1 = vec_adds (t1, t7); \ + \ + t7 = vec_adds (t5, t2); \ + t2 = vec_subs (t5, t2); \ + t5 = vec_adds (t0, t4); \ + t0 = vec_subs (t0, t4); \ + t4 = vec_subs (t8, t3); \ + t3 = vec_adds (t8, t3); \ + \ + vy0 = vec_adds (t7, t1); \ + vy7 = vec_subs (t7, t1); \ + vy1 = vec_mradds (c4, t3, t5); \ + vy6 = vec_mradds (mc4, t3, t5); \ + vy2 = vec_mradds (c4, t4, t0); \ + vy5 = vec_mradds (mc4, t4, t0); \ + vy3 = vec_adds (t2, t6); \ + vy4 = vec_subs (t2, t6); \ + \ + shift = vec_splat_u16 (6); \ + vx0 = vec_sra (vy0, shift); \ + vx1 = vec_sra (vy1, shift); \ + vx2 = vec_sra (vy2, shift); \ + vx3 = vec_sra (vy3, shift); \ + vx4 = vec_sra (vy4, shift); \ + vx5 = vec_sra (vy5, shift); \ + vx6 = vec_sra (vy6, shift); \ + vx7 = vec_sra (vy7, shift); + +void mpeg2_idct_copy_altivec (int16_t * const _block, uint8_t * dest, + const int stride) +{ + vector_s16_t * const block = (vector_s16_t *)_block; + vector_u8_t tmp; + + IDCT + +#define COPY(dest,src) \ + tmp = vec_packsu (src, src); \ + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + + COPY (dest, vx0) dest += stride; + COPY (dest, vx1) dest += stride; + COPY (dest, vx2) dest += stride; + COPY (dest, vx3) dest += stride; + COPY (dest, vx4) dest += stride; + COPY (dest, vx5) dest += stride; + COPY (dest, vx6) dest += stride; + COPY (dest, vx7) + + block[0] = block[1] = block[2] = block[3] = zero; + block[4] = block[5] = block[6] = block[7] = zero; +} + +void mpeg2_idct_add_altivec (const int last, int16_t * const _block, + uint8_t * dest, const int stride) +{ + vector_s16_t * const block = (vector_s16_t *)_block; + vector_u8_t tmp; + vector_s16_t tmp2, tmp3; + vector_u8_t perm0; + vector_u8_t perm1; + vector_u8_t p0, p1, p; + + IDCT + + p0 = vec_lvsl (0, dest); + p1 = vec_lvsl (stride, dest); + p = vec_splat_u8 (-1); + perm0 = vec_mergeh (p, p0); + perm1 = vec_mergeh (p, p1); + +#define ADD(dest,src,perm) \ + /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \ + tmp = vec_ld (0, dest); \ + tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \ + tmp3 = vec_adds (tmp2, src); \ + tmp = vec_packsu (tmp3, tmp3); \ + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + + ADD (dest, vx0, perm0) dest += stride; + ADD (dest, vx1, perm1) dest += stride; + ADD (dest, vx2, perm0) dest += stride; + ADD (dest, vx3, perm1) dest += stride; + ADD (dest, vx4, perm0) dest += stride; + ADD (dest, vx5, perm1) dest += stride; + ADD (dest, vx6, perm0) dest += stride; + ADD (dest, vx7, perm1) + + block[0] = block[1] = block[2] = block[3] = zero; + block[4] = block[5] = block[6] = block[7] = zero; +} + +void mpeg2_idct_altivec_init (void) +{ + extern uint8_t mpeg2_scan_norm[64]; + extern uint8_t mpeg2_scan_alt[64]; + int i, j; + + /* the altivec idct uses a transposed input, so we patch scan tables */ + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = (j >> 3) | ((j & 7) << 3); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = (j >> 3) | ((j & 7) << 3); + } +} + +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/idct_mlib.c b/src/video_dec/libmpeg2new/libmpeg2/idct_mlib.c new file mode 100644 index 000000000..55a2e9b64 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/idct_mlib.c @@ -0,0 +1,60 @@ +/* + * idct_mlib.c + * Copyright (C) 1999-2003 HÃ¥kan Hjort + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef LIBMPEG2_MLIB + +#include +#include +#include +#include +#include +#include + +#include "../include/mpeg2.h" +#include "mpeg2_internal.h" + +void mpeg2_idct_add_mlib (const int last, int16_t * const block, + uint8_t * const dest, const int stride) +{ + mlib_VideoIDCT_IEEE_S16_S16 (block, block); + mlib_VideoAddBlock_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +void mpeg2_idct_copy_mlib_non_ieee (int16_t * const block, + uint8_t * const dest, const int stride) +{ + mlib_VideoIDCT8x8_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +void mpeg2_idct_add_mlib_non_ieee (const int last, int16_t * const block, + uint8_t * const dest, const int stride) +{ + mlib_VideoIDCT8x8_S16_S16 (block, block); + mlib_VideoAddBlock_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/idct_mmx.c b/src/video_dec/libmpeg2new/libmpeg2/idct_mmx.c new file mode 100644 index 000000000..d5a5c08a4 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/idct_mmx.c @@ -0,0 +1,814 @@ +/* + * idct_mmx.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_X86 + +#include + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" +#include "../include/mmx.h" + +#define ROW_SHIFT 15 +#define COL_SHIFT 6 + +#define round(bias) ((int)(((bias)+0.5) * (1<> ROW_SHIFT; + row[1] = (a1 + b1) >> ROW_SHIFT; + row[2] = (a2 + b2) >> ROW_SHIFT; + row[3] = (a3 + b3) >> ROW_SHIFT; + row[4] = (a3 - b3) >> ROW_SHIFT; + row[5] = (a2 - b2) >> ROW_SHIFT; + row[6] = (a1 - b1) >> ROW_SHIFT; + row[7] = (a0 - b0) >> ROW_SHIFT; +} +#endif + + +/* MMXEXT row IDCT */ + +#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \ + c4, c6, c4, c6, \ + c1, c3, -c1, -c5, \ + c5, c7, c3, -c7, \ + c4, -c6, c4, -c6, \ + -c4, c2, c4, -c2, \ + c5, -c1, c3, -c1, \ + c7, c3, c7, -c5 } + +static inline void mmxext_row_head (int16_t * const row, const int offset, + const int16_t * const table) +{ + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ + + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ + + movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ + + movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */ + pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */ + + pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */ +} + +static inline void mmxext_row (const int16_t * const table, + const int32_t * const rounder) +{ + movq_m2r (*(table+8), mm1); /* mm1 = -C5 -C1 C3 C1 */ + pmaddwd_r2r (mm2, mm4); /* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */ + + pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */ + pshufw_r2r (mm6, mm6, 0x4e); /* mm6 = x3 x1 x7 x5 */ + + movq_m2r (*(table+12), mm7); /* mm7 = -C7 C3 C7 C5 */ + pmaddwd_r2r (mm5, mm1); /* mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 */ + + paddd_m2r (*rounder, mm3); /* mm3 += rounder */ + pmaddwd_r2r (mm6, mm7); /* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */ + + pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 */ + paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */ + + pmaddwd_m2r (*(table+24), mm5); /* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */ + movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */ + + pmaddwd_m2r (*(table+28), mm6); /* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */ + paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */ + + paddd_m2r (*rounder, mm0); /* mm0 += rounder */ + psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */ + + psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */ + paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */ + + paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */ + psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */ + + paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */ + movq_r2r (mm0, mm4); /* mm4 = a3 a2 + rounder */ + + paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */ + psubd_r2r (mm5, mm4); /* mm4 = a3-b3 a2-b2 + rounder */ +} + +static inline void mmxext_row_tail (int16_t * const row, const int store) +{ + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ + + psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */ + + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ + + packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */ + + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */ + + /* slot */ + + movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */ +} + +static inline void mmxext_row_mid (int16_t * const row, const int store, + const int offset, + const int16_t * const table) +{ + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ + + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */ + + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ + + packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ + + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */ + + movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */ + movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */ + + pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */ + + movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */ + pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */ +} + + +/* MMX row IDCT */ + +#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \ + c4, c6, -c4, -c2, \ + c1, c3, c3, -c7, \ + c5, c7, -c1, -c5, \ + c4, -c6, c4, -c2, \ + -c4, c2, c4, -c6, \ + c5, -c1, c7, -c5, \ + c7, c3, c3, -c1 } + +static inline void mmx_row_head (int16_t * const row, const int offset, + const int16_t * const table) +{ + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ + + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ + + movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ + + punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */ + + movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */ + pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */ + + movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */ + punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */ +} + +static inline void mmx_row (const int16_t * const table, + const int32_t * const rounder) +{ + pmaddwd_r2r (mm2, mm4); /* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */ + punpckldq_r2r (mm5, mm5); /* mm5 = x3 x1 x3 x1 */ + + pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */ + punpckhdq_r2r (mm6, mm6); /* mm6 = x7 x5 x7 x5 */ + + movq_m2r (*(table+12), mm7); /* mm7 = -C5 -C1 C7 C5 */ + pmaddwd_r2r (mm5, mm1); /* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */ + + paddd_m2r (*rounder, mm3); /* mm3 += rounder */ + pmaddwd_r2r (mm6, mm7); /* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */ + + pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */ + paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */ + + pmaddwd_m2r (*(table+24), mm5); /* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */ + movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */ + + pmaddwd_m2r (*(table+28), mm6); /* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */ + paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */ + + paddd_m2r (*rounder, mm0); /* mm0 += rounder */ + psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */ + + psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */ + paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */ + + paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */ + psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */ + + paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */ + movq_r2r (mm0, mm7); /* mm7 = a3 a2 + rounder */ + + paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */ + psubd_r2r (mm5, mm7); /* mm7 = a3-b3 a2-b2 + rounder */ +} + +static inline void mmx_row_tail (int16_t * const row, const int store) +{ + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ + + psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */ + + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ + + packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */ + + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + movq_r2r (mm7, mm4); /* mm4 = y6 y7 y4 y5 */ + + pslld_i2r (16, mm7); /* mm7 = y7 0 y5 0 */ + + psrld_i2r (16, mm4); /* mm4 = 0 y6 0 y4 */ + + por_r2r (mm4, mm7); /* mm7 = y7 y6 y5 y4 */ + + /* slot */ + + movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */ +} + +static inline void mmx_row_mid (int16_t * const row, const int store, + const int offset, const int16_t * const table) +{ + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ + + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */ + + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ + + packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ + + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + movq_r2r (mm7, mm1); /* mm1 = y6 y7 y4 y5 */ + + punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */ + psrld_i2r (16, mm7); /* mm7 = 0 y6 0 y4 */ + + movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */ + pslld_i2r (16, mm1); /* mm1 = y7 0 y5 0 */ + + movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */ + por_r2r (mm1, mm7); /* mm7 = y7 y6 y5 y4 */ + + movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */ + punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */ + + movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */ + pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */ +} + + +#if 0 +/* C column IDCT - its just here to document the MMXEXT and MMX versions */ +static inline void idct_col (int16_t * col, int offset) +{ +/* multiplication - as implemented on mmx */ +#define F(c,x) (((c) * (x)) >> 16) + +/* saturation - it helps us handle torture test cases */ +#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x)) + + int16_t x0, x1, x2, x3, x4, x5, x6, x7; + int16_t y0, y1, y2, y3, y4, y5, y6, y7; + int16_t a0, a1, a2, a3, b0, b1, b2, b3; + int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12; + + col += offset; + + x0 = col[0*8]; + x1 = col[1*8]; + x2 = col[2*8]; + x3 = col[3*8]; + x4 = col[4*8]; + x5 = col[5*8]; + x6 = col[6*8]; + x7 = col[7*8]; + + u04 = S (x0 + x4); + v04 = S (x0 - x4); + u26 = S (F (T2, x6) + x2); + v26 = S (F (T2, x2) - x6); + + a0 = S (u04 + u26); + a1 = S (v04 + v26); + a2 = S (v04 - v26); + a3 = S (u04 - u26); + + u17 = S (F (T1, x7) + x1); + v17 = S (F (T1, x1) - x7); + u35 = S (F (T3, x5) + x3); + v35 = S (F (T3, x3) - x5); + + b0 = S (u17 + u35); + b3 = S (v17 - v35); + u12 = S (u17 - u35); + v12 = S (v17 + v35); + u12 = S (2 * F (C4, u12)); + v12 = S (2 * F (C4, v12)); + b1 = S (u12 + v12); + b2 = S (u12 - v12); + + y0 = S (a0 + b0) >> COL_SHIFT; + y1 = S (a1 + b1) >> COL_SHIFT; + y2 = S (a2 + b2) >> COL_SHIFT; + y3 = S (a3 + b3) >> COL_SHIFT; + + y4 = S (a3 - b3) >> COL_SHIFT; + y5 = S (a2 - b2) >> COL_SHIFT; + y6 = S (a1 - b1) >> COL_SHIFT; + y7 = S (a0 - b0) >> COL_SHIFT; + + col[0*8] = y0; + col[1*8] = y1; + col[2*8] = y2; + col[3*8] = y3; + col[4*8] = y4; + col[5*8] = y5; + col[6*8] = y6; + col[7*8] = y7; +} +#endif + + +/* MMX column IDCT */ +static inline void idct_col (int16_t * const col, const int offset) +{ +#define T1 13036 +#define T2 27146 +#define T3 43790 +#define C4 23170 + + static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; + static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; + static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; + static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; + + /* column code adapted from peter gubanov */ + /* http://www.elecard.com/peter/idct.shtml */ + + movq_m2r (*_T1, mm0); /* mm0 = T1 */ + + movq_m2r (*(col+offset+1*8), mm1); /* mm1 = x1 */ + movq_r2r (mm0, mm2); /* mm2 = T1 */ + + movq_m2r (*(col+offset+7*8), mm4); /* mm4 = x7 */ + pmulhw_r2r (mm1, mm0); /* mm0 = T1*x1 */ + + movq_m2r (*_T3, mm5); /* mm5 = T3 */ + pmulhw_r2r (mm4, mm2); /* mm2 = T1*x7 */ + + movq_m2r (*(col+offset+5*8), mm6); /* mm6 = x5 */ + movq_r2r (mm5, mm7); /* mm7 = T3-1 */ + + movq_m2r (*(col+offset+3*8), mm3); /* mm3 = x3 */ + psubsw_r2r (mm4, mm0); /* mm0 = v17 */ + + movq_m2r (*_T2, mm4); /* mm4 = T2 */ + pmulhw_r2r (mm3, mm5); /* mm5 = (T3-1)*x3 */ + + paddsw_r2r (mm2, mm1); /* mm1 = u17 */ + pmulhw_r2r (mm6, mm7); /* mm7 = (T3-1)*x5 */ + + /* slot */ + + movq_r2r (mm4, mm2); /* mm2 = T2 */ + paddsw_r2r (mm3, mm5); /* mm5 = T3*x3 */ + + pmulhw_m2r (*(col+offset+2*8), mm4);/* mm4 = T2*x2 */ + paddsw_r2r (mm6, mm7); /* mm7 = T3*x5 */ + + psubsw_r2r (mm6, mm5); /* mm5 = v35 */ + paddsw_r2r (mm3, mm7); /* mm7 = u35 */ + + movq_m2r (*(col+offset+6*8), mm3); /* mm3 = x6 */ + movq_r2r (mm0, mm6); /* mm6 = v17 */ + + pmulhw_r2r (mm3, mm2); /* mm2 = T2*x6 */ + psubsw_r2r (mm5, mm0); /* mm0 = b3 */ + + psubsw_r2r (mm3, mm4); /* mm4 = v26 */ + paddsw_r2r (mm6, mm5); /* mm5 = v12 */ + + movq_r2m (mm0, *(col+offset+3*8)); /* save b3 in scratch0 */ + movq_r2r (mm1, mm6); /* mm6 = u17 */ + + paddsw_m2r (*(col+offset+2*8), mm2);/* mm2 = u26 */ + paddsw_r2r (mm7, mm6); /* mm6 = b0 */ + + psubsw_r2r (mm7, mm1); /* mm1 = u12 */ + movq_r2r (mm1, mm7); /* mm7 = u12 */ + + movq_m2r (*(col+offset+0*8), mm3); /* mm3 = x0 */ + paddsw_r2r (mm5, mm1); /* mm1 = u12+v12 */ + + movq_m2r (*_C4, mm0); /* mm0 = C4/2 */ + psubsw_r2r (mm5, mm7); /* mm7 = u12-v12 */ + + movq_r2m (mm6, *(col+offset+5*8)); /* save b0 in scratch1 */ + pmulhw_r2r (mm0, mm1); /* mm1 = b1/2 */ + + movq_r2r (mm4, mm6); /* mm6 = v26 */ + pmulhw_r2r (mm0, mm7); /* mm7 = b2/2 */ + + movq_m2r (*(col+offset+4*8), mm5); /* mm5 = x4 */ + movq_r2r (mm3, mm0); /* mm0 = x0 */ + + psubsw_r2r (mm5, mm3); /* mm3 = v04 */ + paddsw_r2r (mm5, mm0); /* mm0 = u04 */ + + paddsw_r2r (mm3, mm4); /* mm4 = a1 */ + movq_r2r (mm0, mm5); /* mm5 = u04 */ + + psubsw_r2r (mm6, mm3); /* mm3 = a2 */ + paddsw_r2r (mm2, mm5); /* mm5 = a0 */ + + paddsw_r2r (mm1, mm1); /* mm1 = b1 */ + psubsw_r2r (mm2, mm0); /* mm0 = a3 */ + + paddsw_r2r (mm7, mm7); /* mm7 = b2 */ + movq_r2r (mm3, mm2); /* mm2 = a2 */ + + movq_r2r (mm4, mm6); /* mm6 = a1 */ + paddsw_r2r (mm7, mm3); /* mm3 = a2+b2 */ + + psraw_i2r (COL_SHIFT, mm3); /* mm3 = y2 */ + paddsw_r2r (mm1, mm4); /* mm4 = a1+b1 */ + + psraw_i2r (COL_SHIFT, mm4); /* mm4 = y1 */ + psubsw_r2r (mm1, mm6); /* mm6 = a1-b1 */ + + movq_m2r (*(col+offset+5*8), mm1); /* mm1 = b0 */ + psubsw_r2r (mm7, mm2); /* mm2 = a2-b2 */ + + psraw_i2r (COL_SHIFT, mm6); /* mm6 = y6 */ + movq_r2r (mm5, mm7); /* mm7 = a0 */ + + movq_r2m (mm4, *(col+offset+1*8)); /* save y1 */ + psraw_i2r (COL_SHIFT, mm2); /* mm2 = y5 */ + + movq_r2m (mm3, *(col+offset+2*8)); /* save y2 */ + paddsw_r2r (mm1, mm5); /* mm5 = a0+b0 */ + + movq_m2r (*(col+offset+3*8), mm4); /* mm4 = b3 */ + psubsw_r2r (mm1, mm7); /* mm7 = a0-b0 */ + + psraw_i2r (COL_SHIFT, mm5); /* mm5 = y0 */ + movq_r2r (mm0, mm3); /* mm3 = a3 */ + + movq_r2m (mm2, *(col+offset+5*8)); /* save y5 */ + psubsw_r2r (mm4, mm3); /* mm3 = a3-b3 */ + + psraw_i2r (COL_SHIFT, mm7); /* mm7 = y7 */ + paddsw_r2r (mm0, mm4); /* mm4 = a3+b3 */ + + movq_r2m (mm5, *(col+offset+0*8)); /* save y0 */ + psraw_i2r (COL_SHIFT, mm3); /* mm3 = y4 */ + + movq_r2m (mm6, *(col+offset+6*8)); /* save y6 */ + psraw_i2r (COL_SHIFT, mm4); /* mm4 = y3 */ + + movq_r2m (mm7, *(col+offset+7*8)); /* save y7 */ + + movq_r2m (mm3, *(col+offset+4*8)); /* save y4 */ + + movq_r2m (mm4, *(col+offset+3*8)); /* save y3 */ +} + + +static const int32_t rounder0[] ATTR_ALIGN(8) = + rounder ((1 << (COL_SHIFT - 1)) - 0.5); +static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); +static const int32_t rounder1[] ATTR_ALIGN(8) = + rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ +static const int32_t rounder7[] ATTR_ALIGN(8) = + rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ +static const int32_t rounder2[] ATTR_ALIGN(8) = + rounder (0.60355339059); /* C2 * (C6+C2)/2 */ +static const int32_t rounder6[] ATTR_ALIGN(8) = + rounder (-0.25); /* C2 * (C6-C2)/2 */ +static const int32_t rounder3[] ATTR_ALIGN(8) = + rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ +static const int32_t rounder5[] ATTR_ALIGN(8) = + rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ + + +#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ +static inline void idct (int16_t * const block) \ +{ \ + static const int16_t table04[] ATTR_ALIGN(16) = \ + table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ + static const int16_t table17[] ATTR_ALIGN(16) = \ + table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ + static const int16_t table26[] ATTR_ALIGN(16) = \ + table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ + static const int16_t table35[] ATTR_ALIGN(16) = \ + table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ + \ + idct_row_head (block, 0*8, table04); \ + idct_row (table04, rounder0); \ + idct_row_mid (block, 0*8, 4*8, table04); \ + idct_row (table04, rounder4); \ + idct_row_mid (block, 4*8, 1*8, table17); \ + idct_row (table17, rounder1); \ + idct_row_mid (block, 1*8, 7*8, table17); \ + idct_row (table17, rounder7); \ + idct_row_mid (block, 7*8, 2*8, table26); \ + idct_row (table26, rounder2); \ + idct_row_mid (block, 2*8, 6*8, table26); \ + idct_row (table26, rounder6); \ + idct_row_mid (block, 6*8, 3*8, table35); \ + idct_row (table35, rounder3); \ + idct_row_mid (block, 3*8, 5*8, table35); \ + idct_row (table35, rounder5); \ + idct_row_tail (block, 5*8); \ + \ + idct_col (block, 0); \ + idct_col (block, 4); \ +} + + +#define COPY_MMX(offset,r0,r1,r2) \ +do { \ + movq_m2r (*(block+offset), r0); \ + dest += stride; \ + movq_m2r (*(block+offset+4), r1); \ + movq_r2m (r2, *dest); \ + packuswb_r2r (r1, r0); \ +} while (0) + +static inline void block_copy (int16_t * const block, uint8_t * dest, + const int stride) +{ + movq_m2r (*(block+0*8), mm0); + movq_m2r (*(block+0*8+4), mm1); + movq_m2r (*(block+1*8), mm2); + packuswb_r2r (mm1, mm0); + movq_m2r (*(block+1*8+4), mm3); + movq_r2m (mm0, *dest); + packuswb_r2r (mm3, mm2); + COPY_MMX (2*8, mm0, mm1, mm2); + COPY_MMX (3*8, mm2, mm3, mm0); + COPY_MMX (4*8, mm0, mm1, mm2); + COPY_MMX (5*8, mm2, mm3, mm0); + COPY_MMX (6*8, mm0, mm1, mm2); + COPY_MMX (7*8, mm2, mm3, mm0); + movq_r2m (mm2, *(dest+stride)); +} + + +#define ADD_MMX(offset,r1,r2,r3,r4) \ +do { \ + movq_m2r (*(dest+2*stride), r1); \ + packuswb_r2r (r4, r3); \ + movq_r2r (r1, r2); \ + dest += stride; \ + movq_r2m (r3, *dest); \ + punpcklbw_r2r (mm0, r1); \ + paddsw_m2r (*(block+offset), r1); \ + punpckhbw_r2r (mm0, r2); \ + paddsw_m2r (*(block+offset+4), r2); \ +} while (0) + +static inline void block_add (int16_t * const block, uint8_t * dest, + const int stride) +{ + movq_m2r (*dest, mm1); + pxor_r2r (mm0, mm0); + movq_m2r (*(dest+stride), mm3); + movq_r2r (mm1, mm2); + punpcklbw_r2r (mm0, mm1); + movq_r2r (mm3, mm4); + paddsw_m2r (*(block+0*8), mm1); + punpckhbw_r2r (mm0, mm2); + paddsw_m2r (*(block+0*8+4), mm2); + punpcklbw_r2r (mm0, mm3); + paddsw_m2r (*(block+1*8), mm3); + packuswb_r2r (mm2, mm1); + punpckhbw_r2r (mm0, mm4); + movq_r2m (mm1, *dest); + paddsw_m2r (*(block+1*8+4), mm4); + ADD_MMX (2*8, mm1, mm2, mm3, mm4); + ADD_MMX (3*8, mm3, mm4, mm1, mm2); + ADD_MMX (4*8, mm1, mm2, mm3, mm4); + ADD_MMX (5*8, mm3, mm4, mm1, mm2); + ADD_MMX (6*8, mm1, mm2, mm3, mm4); + ADD_MMX (7*8, mm3, mm4, mm1, mm2); + packuswb_r2r (mm4, mm3); + movq_r2m (mm3, *(dest+stride)); +} + + +static inline void block_zero (int16_t * const block) +{ + pxor_r2r (mm0, mm0); + movq_r2m (mm0, *(block+0*4)); + movq_r2m (mm0, *(block+1*4)); + movq_r2m (mm0, *(block+2*4)); + movq_r2m (mm0, *(block+3*4)); + movq_r2m (mm0, *(block+4*4)); + movq_r2m (mm0, *(block+5*4)); + movq_r2m (mm0, *(block+6*4)); + movq_r2m (mm0, *(block+7*4)); + movq_r2m (mm0, *(block+8*4)); + movq_r2m (mm0, *(block+9*4)); + movq_r2m (mm0, *(block+10*4)); + movq_r2m (mm0, *(block+11*4)); + movq_r2m (mm0, *(block+12*4)); + movq_r2m (mm0, *(block+13*4)); + movq_r2m (mm0, *(block+14*4)); + movq_r2m (mm0, *(block+15*4)); +} + + +#define CPU_MMXEXT 0 +#define CPU_MMX 1 + +#define dup4(reg) \ +do { \ + if (cpu != CPU_MMXEXT) { \ + punpcklwd_r2r (reg, reg); \ + punpckldq_r2r (reg, reg); \ + } else \ + pshufw_r2r (reg, reg, 0x00); \ +} while (0) + +static inline void block_add_DC (int16_t * const block, uint8_t * dest, + const int stride, const int cpu) +{ + movd_v2r ((block[0] + 64) >> 7, mm0); + pxor_r2r (mm1, mm1); + movq_m2r (*dest, mm2); + dup4 (mm0); + psubsw_r2r (mm0, mm1); + packuswb_r2r (mm0, mm0); + paddusb_r2r (mm0, mm2); + packuswb_r2r (mm1, mm1); + movq_m2r (*(dest + stride), mm3); + psubusb_r2r (mm1, mm2); + block[0] = 0; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *dest); + psubusb_r2r (mm1, mm3); + movq_m2r (*(dest + 2*stride), mm2); + dest += stride; + movq_r2m (mm3, *dest); + paddusb_r2r (mm0, mm2); + movq_m2r (*(dest + 2*stride), mm3); + psubusb_r2r (mm1, mm2); + dest += stride; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *dest); + psubusb_r2r (mm1, mm3); + movq_m2r (*(dest + 2*stride), mm2); + dest += stride; + movq_r2m (mm3, *dest); + paddusb_r2r (mm0, mm2); + movq_m2r (*(dest + 2*stride), mm3); + psubusb_r2r (mm1, mm2); + dest += stride; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *dest); + psubusb_r2r (mm1, mm3); + movq_m2r (*(dest + 2*stride), mm2); + dest += stride; + movq_r2m (mm3, *dest); + paddusb_r2r (mm0, mm2); + movq_m2r (*(dest + 2*stride), mm3); + psubusb_r2r (mm1, mm2); + block[63] = 0; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *(dest + stride)); + psubusb_r2r (mm1, mm3); + movq_r2m (mm3, *(dest + 2*stride)); +} + + +declare_idct (mmxext_idct, mmxext_table, + mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) + +void mpeg2_idct_copy_mmxext (int16_t * const block, uint8_t * const dest, + const int stride) +{ + mmxext_idct (block); + block_copy (block, dest, stride); + block_zero (block); +} + +void mpeg2_idct_add_mmxext (const int last, int16_t * const block, + uint8_t * const dest, const int stride) +{ + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { + mmxext_idct (block); + block_add (block, dest, stride); + block_zero (block); + } else + block_add_DC (block, dest, stride, CPU_MMXEXT); +} + + +declare_idct (mmx_idct, mmx_table, + mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid) + +void mpeg2_idct_copy_mmx (int16_t * const block, uint8_t * const dest, + const int stride) +{ + mmx_idct (block); + block_copy (block, dest, stride); + block_zero (block); +} + +void mpeg2_idct_add_mmx (const int last, int16_t * const block, + uint8_t * const dest, const int stride) +{ + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { + mmx_idct (block); + block_add (block, dest, stride); + block_zero (block); + } else + block_add_DC (block, dest, stride, CPU_MMX); +} + + +void mpeg2_idct_mmx_init (void) +{ + extern uint8_t mpeg2_scan_norm[64]; + extern uint8_t mpeg2_scan_alt[64]; + int i, j; + + /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ + + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + } +} + +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/libmpeg2.pc.in b/src/video_dec/libmpeg2new/libmpeg2/libmpeg2.pc.in new file mode 100644 index 000000000..d54500b0e --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/libmpeg2.pc.in @@ -0,0 +1,10 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libmpeg2 +Description: A decoding library for MPEG-1 and MPEG-2 streams. +Version: @VERSION@ +Libs: -L${libdir} -lmpeg2 +Cflags: -I${includedir}/@PACKAGE@ diff --git a/src/video_dec/libmpeg2new/libmpeg2/libmpeg2convert.pc.in b/src/video_dec/libmpeg2new/libmpeg2/libmpeg2convert.pc.in new file mode 100644 index 000000000..42383a6e2 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/libmpeg2convert.pc.in @@ -0,0 +1,10 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libmpeg2convert +Description: libmpeg2 helper functions for converting to various formats. +Version: @VERSION@ +Libs: -L${libdir} -lmpeg2convert +Cflags: -I${includedir}/@PACKAGE@ diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp.c new file mode 100644 index 000000000..d5a265d5c --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp.c @@ -0,0 +1,130 @@ +/* + * motion_comp.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" + +mpeg2_mc_t mpeg2_mc; + +void mpeg2_mc_init (uint32_t accel) +{ +#ifdef ARCH_X86 + if (accel & MPEG2_ACCEL_X86_MMXEXT) + mpeg2_mc = mpeg2_mc_mmxext; + else if (accel & MPEG2_ACCEL_X86_3DNOW) + mpeg2_mc = mpeg2_mc_3dnow; + else if (accel & MPEG2_ACCEL_X86_MMX) + mpeg2_mc = mpeg2_mc_mmx; + else +#endif +#ifdef ARCH_PPC + if (accel & MPEG2_ACCEL_PPC_ALTIVEC) + mpeg2_mc = mpeg2_mc_altivec; + else +#endif +#ifdef ARCH_ALPHA + if (accel & MPEG2_ACCEL_ALPHA) + mpeg2_mc = mpeg2_mc_alpha; + else +#endif +#ifdef ARCH_SPARC + if (accel & MPEG2_ACCEL_SPARC_VIS) + mpeg2_mc = mpeg2_mc_vis; + else +#endif + mpeg2_mc = mpeg2_mc_c; +} + +#define avg2(a,b) ((a+b+1)>>1) +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) + +#define predict_o(i) (ref[i]) +#define predict_x(i) (avg2 (ref[i], ref[i+1])) +#define predict_y(i) (avg2 (ref[i], (ref+stride)[i])) +#define predict_xy(i) (avg4 (ref[i], ref[i+1], \ + (ref+stride)[i], (ref+stride)[i+1])) + +#define put(predictor,i) dest[i] = predictor (i) +#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i]) + +/* mc function template */ + +#define MC_FUNC(op,xy) \ +static void MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \ + const int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + op (predict_##xy, 8); \ + op (predict_##xy, 9); \ + op (predict_##xy, 10); \ + op (predict_##xy, 11); \ + op (predict_##xy, 12); \ + op (predict_##xy, 13); \ + op (predict_##xy, 14); \ + op (predict_##xy, 15); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} \ +static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \ + const int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} + +/* definitions of the actual mc functions */ + +MC_FUNC (put,o) +MC_FUNC (avg,o) +MC_FUNC (put,x) +MC_FUNC (avg,x) +MC_FUNC (put,y) +MC_FUNC (avg,y) +MC_FUNC (put,xy) +MC_FUNC (avg,xy) + +MPEG2_MC_EXTERN (c) diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp_alpha.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_alpha.c new file mode 100644 index 000000000..1b3712a1a --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_alpha.c @@ -0,0 +1,253 @@ +/* + * motion_comp_alpha.c + * Copyright (C) 2002-2003 Falk Hueffner + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_ALPHA + +#include + +#include "mpeg2.h" +#include +#include "mpeg2_internal.h" +#include "alpha_asm.h" + +static inline uint64_t avg2 (uint64_t a, uint64_t b) +{ + return (a | b) - (((a ^ b) & BYTE_VEC (0xfe)) >> 1); +} + +// Load two unaligned quadwords from addr. This macro only works if +// addr is actually unaligned. +#define ULOAD16(ret_l,ret_r,addr) \ + do { \ + uint64_t _l = ldq_u (addr + 0); \ + uint64_t _m = ldq_u (addr + 8); \ + uint64_t _r = ldq_u (addr + 16); \ + ret_l = extql (_l, addr) | extqh (_m, addr); \ + ret_r = extql (_m, addr) | extqh (_r, addr); \ + } while (0) + +// Load two aligned quadwords from addr. +#define ALOAD16(ret_l,ret_r,addr) \ + do { \ + ret_l = ldq (addr); \ + ret_r = ldq (addr + 8); \ + } while (0) + +#define OP8(LOAD,LOAD16,STORE) \ + do { \ + STORE (LOAD (pixels), block); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP16(LOAD,LOAD16,STORE) \ + do { \ + uint64_t l, r; \ + LOAD16 (l, r, pixels); \ + STORE (l, block); \ + STORE (r, block + 8); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP8_X2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t p0, p1; \ + \ + p0 = LOAD (pixels); \ + p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56); \ + STORE (avg2 (p0, p1), block); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP16_X2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t p0, p1; \ + \ + LOAD16 (p0, p1, pixels); \ + STORE (avg2(p0, p0 >> 8 | p1 << 56), block); \ + STORE (avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56), \ + block + 8); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP8_Y2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t p0, p1; \ + p0 = LOAD (pixels); \ + pixels += line_size; \ + p1 = LOAD (pixels); \ + do { \ + uint64_t av = avg2 (p0, p1); \ + if (--h == 0) line_size = 0; \ + pixels += line_size; \ + p0 = p1; \ + p1 = LOAD (pixels); \ + STORE (av, block); \ + block += line_size; \ + } while (h); \ + } while (0) + +#define OP16_Y2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t p0l, p0r, p1l, p1r; \ + LOAD16 (p0l, p0r, pixels); \ + pixels += line_size; \ + LOAD16 (p1l, p1r, pixels); \ + do { \ + uint64_t avl, avr; \ + if (--h == 0) line_size = 0; \ + avl = avg2 (p0l, p1l); \ + avr = avg2 (p0r, p1r); \ + p0l = p1l; \ + p0r = p1r; \ + pixels += line_size; \ + LOAD16 (p1l, p1r, pixels); \ + STORE (avl, block); \ + STORE (avr, block + 8); \ + block += line_size; \ + } while (h); \ + } while (0) + +#define OP8_XY2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t pl, ph; \ + uint64_t p1 = LOAD (pixels); \ + uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56); \ + \ + ph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p2 & ~BYTE_VEC (0x03)) >> 2)); \ + pl = ((p1 & BYTE_VEC (0x03)) + \ + (p2 & BYTE_VEC (0x03))); \ + \ + do { \ + uint64_t npl, nph; \ + \ + pixels += line_size; \ + p1 = LOAD (pixels); \ + p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56); \ + nph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p2 & ~BYTE_VEC (0x03)) >> 2)); \ + npl = ((p1 & BYTE_VEC (0x03)) + \ + (p2 & BYTE_VEC (0x03))); \ + \ + STORE (ph + nph + \ + (((pl + npl + BYTE_VEC (0x02)) >> 2) & \ + BYTE_VEC (0x03)), block); \ + \ + block += line_size; \ + pl = npl; \ + ph = nph; \ + } while (--h); \ + } while (0) + +#define OP16_XY2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r; \ + LOAD16 (p0, p2, pixels); \ + p1 = p0 >> 8 | (p2 << 56); \ + p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \ + \ + ph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p1 & ~BYTE_VEC (0x03)) >> 2)); \ + pl_l = ((p0 & BYTE_VEC (0x03)) + \ + (p1 & BYTE_VEC(0x03))); \ + ph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p3 & ~BYTE_VEC (0x03)) >> 2)); \ + pl_r = ((p2 & BYTE_VEC (0x03)) + \ + (p3 & BYTE_VEC (0x03))); \ + \ + do { \ + uint64_t npl_l, nph_l, npl_r, nph_r; \ + \ + pixels += line_size; \ + LOAD16 (p0, p2, pixels); \ + p1 = p0 >> 8 | (p2 << 56); \ + p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \ + nph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p1 & ~BYTE_VEC (0x03)) >> 2)); \ + npl_l = ((p0 & BYTE_VEC (0x03)) + \ + (p1 & BYTE_VEC (0x03))); \ + nph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p3 & ~BYTE_VEC (0x03)) >> 2)); \ + npl_r = ((p2 & BYTE_VEC (0x03)) + \ + (p3 & BYTE_VEC (0x03))); \ + \ + STORE (ph_l + nph_l + \ + (((pl_l + npl_l + BYTE_VEC (0x02)) >> 2) & \ + BYTE_VEC(0x03)), block); \ + STORE (ph_r + nph_r + \ + (((pl_r + npl_r + BYTE_VEC (0x02)) >> 2) & \ + BYTE_VEC(0x03)), block + 8); \ + \ + block += line_size; \ + pl_l = npl_l; \ + ph_l = nph_l; \ + pl_r = npl_r; \ + ph_r = nph_r; \ + } while (--h); \ + } while (0) + +#define MAKE_OP(OPNAME,SIZE,SUFF,OPKIND,STORE) \ +static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha \ + (uint8_t *restrict block, const uint8_t *restrict pixels, \ + int line_size, int h) \ +{ \ + if ((uint64_t) pixels & 0x7) { \ + OPKIND (uldq, ULOAD16, STORE); \ + } else { \ + OPKIND (ldq, ALOAD16, STORE); \ + } \ +} + +#define PIXOP(OPNAME,STORE) \ + MAKE_OP (OPNAME, 8, o, OP8, STORE); \ + MAKE_OP (OPNAME, 8, x, OP8_X2, STORE); \ + MAKE_OP (OPNAME, 8, y, OP8_Y2, STORE); \ + MAKE_OP (OPNAME, 8, xy, OP8_XY2, STORE); \ + MAKE_OP (OPNAME, 16, o, OP16, STORE); \ + MAKE_OP (OPNAME, 16, x, OP16_X2, STORE); \ + MAKE_OP (OPNAME, 16, y, OP16_Y2, STORE); \ + MAKE_OP (OPNAME, 16, xy, OP16_XY2, STORE); + +#define STORE(l,b) stq (l, b) +PIXOP (put, STORE); +#undef STORE +#define STORE(l,b) stq (avg2 (l, ldq (b)), b); +PIXOP (avg, STORE); + +mpeg2_mc_t mpeg2_mc_alpha = { + { MC_put_o_16_alpha, MC_put_x_16_alpha, + MC_put_y_16_alpha, MC_put_xy_16_alpha, + MC_put_o_8_alpha, MC_put_x_8_alpha, + MC_put_y_8_alpha, MC_put_xy_8_alpha }, + { MC_avg_o_16_alpha, MC_avg_x_16_alpha, + MC_avg_y_16_alpha, MC_avg_xy_16_alpha, + MC_avg_o_8_alpha, MC_avg_x_8_alpha, + MC_avg_y_8_alpha, MC_avg_xy_8_alpha } +}; + +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp_altivec.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_altivec.c new file mode 100644 index 000000000..ee740e14e --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_altivec.c @@ -0,0 +1,1010 @@ +/* + * motion_comp_altivec.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_PPC + +#ifdef HAVE_ALTIVEC_H +#include +#endif +#include + +#include "mpeg2.h" +#include +#include "mpeg2_internal.h" + +typedef vector signed char vector_s8_t; +typedef vector unsigned char vector_u8_t; +typedef vector signed short vector_s16_t; +typedef vector unsigned short vector_u16_t; +typedef vector signed int vector_s32_t; +typedef vector unsigned int vector_u32_t; + +#ifndef COFFEE_BREAK /* Workarounds for gcc suckage */ + +static inline vector_u8_t my_vec_ld (int const A, const uint8_t * const B) +{ + return vec_ld (A, (uint8_t *)B); +} +#undef vec_ld +#define vec_ld my_vec_ld + +static inline vector_u8_t my_vec_and (vector_u8_t const A, vector_u8_t const B) +{ + return vec_and (A, B); +} +#undef vec_and +#define vec_and my_vec_and + +static inline vector_u8_t my_vec_avg (vector_u8_t const A, vector_u8_t const B) +{ + return vec_avg (A, B); +} +#undef vec_avg +#define vec_avg my_vec_avg + +#endif + +static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp = vec_perm (ref0, ref1, perm); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, 0, dest); + tmp = vec_perm (ref0, ref1, perm); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_perm (ref0, ref1, perm); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + vec_st (tmp, 0, dest); + tmp = vec_perm (ref0, ref1, perm); + vec_st (tmp, stride, dest); +} + +static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, tmp; + + permA = vec_lvsl (0, ref); + permB = vec_add (permA, vec_splat_u8 (1)); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, 0, dest); + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + vec_st (tmp, 0, dest); + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + vec_st (tmp, stride, dest); +} + +static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; + + ones = vec_splat_u8 (1); + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + perm0B = vec_add (perm0A, ones); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B)); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + vec_st (tmp, stride, dest); +} + +static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (tmp0, tmp1); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (tmp0, tmp1); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (tmp0, tmp1); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (tmp0, tmp1); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; + vector_u8_t ones; + + ones = vec_splat_u8 (1); + permA = vec_lvsl (0, ref); + permB = vec_add (permA, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + vec_st (tmp, stride, dest); +} + +static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; + vector_u8_t avg0, avg1, xor0, xor1, tmp, ones; + + ones = vec_splat_u8 (1); + perm0A = vec_lvsl (0, ref); + perm0A = vec_mergeh (perm0A, perm0A); + perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); + perm0B = vec_add (perm0A, ones); + perm1A = vec_lvsl (stride, ref); + perm1A = vec_mergeh (perm1A, perm1A); + perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +#if 0 +static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones; + vector_u16_t splat2, temp; + + ones = vec_splat_u8 (1); + permA = vec_lvsl (0, ref); + permB = vec_add (permA, ones); + + zero = vec_splat_u8 (0); + splat2 = vec_splat_u16 (2); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + C = vec_perm (ref0, ref1, permA); + D = vec_perm (ref0, ref1, permB); + + temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A), + (vector_u16_t)vec_mergeh (zero, B)), + vec_add ((vector_u16_t)vec_mergeh (zero, C), + (vector_u16_t)vec_mergeh (zero, D))); + temp = vec_sr (vec_add (temp, splat2), splat2); + tmp = vec_pack (temp, temp); + + vec_st (tmp, 0, dest); + dest += stride; + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + } while (--height); +} +#endif + +static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp, prev; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + vec_st (tmp, stride, dest); +} + +static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, tmp, prev; + + permA = vec_lvsl (0, ref); + permB = vec_add (permA, vec_splat_u8 (1)); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (0, dest); + ref += stride; + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + vec_st (tmp, stride, dest); +} + +static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; + vector_u8_t prev; + + ones = vec_splat_u8 (1); + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + perm0B = vec_add (perm0A, ones); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + prev = vec_ld (0, dest); + ref += stride; + tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B))); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + vec_st (tmp, stride, dest); +} + +static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; + vector_u8_t ones, prev; + + ones = vec_splat_u8 (1); + permA = vec_lvsl (0, ref); + permB = vec_add (permA, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + vec_st (tmp, stride, dest); +} + +static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; + vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev; + + ones = vec_splat_u8 (1); + perm0A = vec_lvsl (0, ref); + perm0A = vec_mergeh (perm0A, perm0A); + perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); + perm0B = vec_add (perm0A, ones); + perm1A = vec_lvsl (stride, ref); + perm1A = vec_mergeh (perm1A, perm1A); + perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (0, dest); + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +MPEG2_MC_EXTERN (altivec) + +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mlib.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mlib.c new file mode 100644 index 000000000..71c085029 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mlib.c @@ -0,0 +1,190 @@ +/* + * motion_comp_mlib.c + * Copyright (C) 2000-2003 HÃ¥kan Hjort + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef LIBMPEG2_MLIB + +#include +#include +#include +#include +#include + +#include "../include/mpeg2.h" +#include "mpeg2_internal.h" + +static void MC_put_o_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoCopyRef_U8_U8_16x16 (dest, (uint8_t *) ref, stride); + else + mlib_VideoCopyRef_U8_U8_16x8 (dest, (uint8_t *) ref, stride); +} + +static void MC_put_x_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpX_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpX_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_put_y_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_put_xy_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpXY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpXY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_put_o_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoCopyRef_U8_U8_8x8 (dest, (uint8_t *) ref, stride); + else + mlib_VideoCopyRef_U8_U8_8x4 (dest, (uint8_t *) ref, stride); +} + +static void MC_put_x_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_put_y_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_put_xy_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpXY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpXY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_avg_o_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoCopyRefAve_U8_U8_16x16 (dest, (uint8_t *) ref, stride); + else + mlib_VideoCopyRefAve_U8_U8_16x8 (dest, (uint8_t *) ref, stride); +} + +static void MC_avg_x_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveX_U8_U8_16x16 (dest, (uint8_t *) ref, + stride, stride); + else + mlib_VideoInterpAveX_U8_U8_16x8 (dest, (uint8_t *) ref, + stride, stride); +} + +static void MC_avg_y_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveY_U8_U8_16x16 (dest, (uint8_t *) ref, + stride, stride); + else + mlib_VideoInterpAveY_U8_U8_16x8 (dest, (uint8_t *) ref, + stride, stride); +} + +static void MC_avg_xy_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveXY_U8_U8_16x16 (dest, (uint8_t *) ref, + stride, stride); + else + mlib_VideoInterpAveXY_U8_U8_16x8 (dest, (uint8_t *) ref, + stride, stride); +} + +static void MC_avg_o_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoCopyRefAve_U8_U8_8x8 (dest, (uint8_t *) ref, stride); + else + mlib_VideoCopyRefAve_U8_U8_8x4 (dest, (uint8_t *) ref, stride); +} + +static void MC_avg_x_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpAveX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_avg_y_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpAveY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_avg_xy_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveXY_U8_U8_8x8 (dest, (uint8_t *) ref, + stride, stride); + else + mlib_VideoInterpAveXY_U8_U8_8x4 (dest, (uint8_t *) ref, + stride, stride); +} + +MPEG2_MC_EXTERN (mlib) + +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mmx.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mmx.c new file mode 100644 index 000000000..8694bdfea --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mmx.c @@ -0,0 +1,1005 @@ +/* + * motion_comp_mmx.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_X86 + +#include + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" +#include "../include/mmx.h" + +#define CPU_MMXEXT 0 +#define CPU_3DNOW 1 + + +/* MMX code - needs a rewrite */ + +/* + * Motion Compensation frequently needs to average values using the + * formula (x+y+1)>>1. Both MMXEXT and 3Dnow include one instruction + * to compute this, but it's been left out of classic MMX. + * + * We need to be careful of overflows when doing this computation. + * Rather than unpacking data to 16-bits, which reduces parallelism, + * we use the following formulas: + * + * (x+y)>>1 == (x&y)+((x^y)>>1) + * (x+y+1)>>1 == (x|y)-((x^y)>>1) + */ + +/* some rounding constants */ +static mmx_t mask1 = {0xfefefefefefefefeLL}; +static mmx_t round4 = {0x0002000200020002LL}; + +/* + * This code should probably be compiled with loop unrolling + * (ie, -funroll-loops in gcc)becuase some of the loops + * use a small static number of iterations. This was written + * with the assumption the compiler knows best about when + * unrolling will help + */ + +static inline void mmx_zero_reg () +{ + /* load 0 into mm0 */ + pxor_r2r (mm0, mm0); +} + +static inline void mmx_average_2_U8 (uint8_t * dest, const uint8_t * src1, + const uint8_t * src2) +{ + /* *dest = (*src1 + *src2 + 1)/ 2; */ + + movq_m2r (*src1, mm1); /* load 8 src1 bytes */ + movq_r2r (mm1, mm2); /* copy 8 src1 bytes */ + + movq_m2r (*src2, mm3); /* load 8 src2 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src2 bytes */ + + pxor_r2r (mm1, mm3); /* xor src1 and src2 */ + pand_m2r (mask1, mm3); /* mask lower bits */ + psrlq_i2r (1, mm3); /* /2 */ + por_r2r (mm2, mm4); /* or src1 and src2 */ + psubb_r2r (mm3, mm4); /* subtract subresults */ + movq_r2m (mm4, *dest); /* store result in dest */ +} + +static inline void mmx_interp_average_2_U8 (uint8_t * dest, + const uint8_t * src1, + const uint8_t * src2) +{ + /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */ + + movq_m2r (*dest, mm1); /* load 8 dest bytes */ + movq_r2r (mm1, mm2); /* copy 8 dest bytes */ + + movq_m2r (*src1, mm3); /* load 8 src1 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src1 bytes */ + + movq_m2r (*src2, mm5); /* load 8 src2 bytes */ + movq_r2r (mm5, mm6); /* copy 8 src2 bytes */ + + pxor_r2r (mm3, mm5); /* xor src1 and src2 */ + pand_m2r (mask1, mm5); /* mask lower bits */ + psrlq_i2r (1, mm5); /* /2 */ + por_r2r (mm4, mm6); /* or src1 and src2 */ + psubb_r2r (mm5, mm6); /* subtract subresults */ + movq_r2r (mm6, mm5); /* copy subresult */ + + pxor_r2r (mm1, mm5); /* xor srcavg and dest */ + pand_m2r (mask1, mm5); /* mask lower bits */ + psrlq_i2r (1, mm5); /* /2 */ + por_r2r (mm2, mm6); /* or srcavg and dest */ + psubb_r2r (mm5, mm6); /* subtract subresults */ + movq_r2m (mm6, *dest); /* store result in dest */ +} + +static inline void mmx_average_4_U8 (uint8_t * dest, const uint8_t * src1, + const uint8_t * src2, + const uint8_t * src3, + const uint8_t * src4) +{ + /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */ + + movq_m2r (*src1, mm1); /* load 8 src1 bytes */ + movq_r2r (mm1, mm2); /* copy 8 src1 bytes */ + + punpcklbw_r2r (mm0, mm1); /* unpack low src1 bytes */ + punpckhbw_r2r (mm0, mm2); /* unpack high src1 bytes */ + + movq_m2r (*src2, mm3); /* load 8 src2 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src2 bytes */ + + punpcklbw_r2r (mm0, mm3); /* unpack low src2 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src2 bytes */ + + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ + + /* now have partials in mm1 and mm2 */ + + movq_m2r (*src3, mm3); /* load 8 src3 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src3 bytes */ + + punpcklbw_r2r (mm0, mm3); /* unpack low src3 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src3 bytes */ + + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ + + movq_m2r (*src4, mm5); /* load 8 src4 bytes */ + movq_r2r (mm5, mm6); /* copy 8 src4 bytes */ + + punpcklbw_r2r (mm0, mm5); /* unpack low src4 bytes */ + punpckhbw_r2r (mm0, mm6); /* unpack high src4 bytes */ + + paddw_r2r (mm5, mm1); /* add lows */ + paddw_r2r (mm6, mm2); /* add highs */ + + /* now have subtotal in mm1 and mm2 */ + + paddw_m2r (round4, mm1); + psraw_i2r (2, mm1); /* /4 */ + paddw_m2r (round4, mm2); + psraw_i2r (2, mm2); /* /4 */ + + packuswb_r2r (mm2, mm1); /* pack (w/ saturation) */ + movq_r2m (mm1, *dest); /* store result in dest */ +} + +static inline void mmx_interp_average_4_U8 (uint8_t * dest, + const uint8_t * src1, + const uint8_t * src2, + const uint8_t * src3, + const uint8_t * src4) +{ + /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */ + + movq_m2r (*src1, mm1); /* load 8 src1 bytes */ + movq_r2r (mm1, mm2); /* copy 8 src1 bytes */ + + punpcklbw_r2r (mm0, mm1); /* unpack low src1 bytes */ + punpckhbw_r2r (mm0, mm2); /* unpack high src1 bytes */ + + movq_m2r (*src2, mm3); /* load 8 src2 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src2 bytes */ + + punpcklbw_r2r (mm0, mm3); /* unpack low src2 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src2 bytes */ + + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ + + /* now have partials in mm1 and mm2 */ + + movq_m2r (*src3, mm3); /* load 8 src3 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src3 bytes */ + + punpcklbw_r2r (mm0, mm3); /* unpack low src3 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src3 bytes */ + + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ + + movq_m2r (*src4, mm5); /* load 8 src4 bytes */ + movq_r2r (mm5, mm6); /* copy 8 src4 bytes */ + + punpcklbw_r2r (mm0, mm5); /* unpack low src4 bytes */ + punpckhbw_r2r (mm0, mm6); /* unpack high src4 bytes */ + + paddw_r2r (mm5, mm1); /* add lows */ + paddw_r2r (mm6, mm2); /* add highs */ + + paddw_m2r (round4, mm1); + psraw_i2r (2, mm1); /* /4 */ + paddw_m2r (round4, mm2); + psraw_i2r (2, mm2); /* /4 */ + + /* now have subtotal/4 in mm1 and mm2 */ + + movq_m2r (*dest, mm3); /* load 8 dest bytes */ + movq_r2r (mm3, mm4); /* copy 8 dest bytes */ + + packuswb_r2r (mm2, mm1); /* pack (w/ saturation) */ + movq_r2r (mm1,mm2); /* copy subresult */ + + pxor_r2r (mm1, mm3); /* xor srcavg and dest */ + pand_m2r (mask1, mm3); /* mask lower bits */ + psrlq_i2r (1, mm3); /* /2 */ + por_r2r (mm2, mm4); /* or srcavg and dest */ + psubb_r2r (mm3, mm4); /* subtract subresults */ + movq_r2m (mm4, *dest); /* store result in dest */ +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, dest, ref); + + if (width == 16) + mmx_average_2_U8 (dest+8, dest+8, ref+8); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_avg_o_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_o_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + mmx_zero_reg (); + + do { + movq_m2r (* ref, mm1); /* load 8 ref bytes */ + movq_r2m (mm1,* dest); /* store 8 bytes at curr */ + + if (width == 16) + { + movq_m2r (* (ref+8), mm1); /* load 8 ref bytes */ + movq_r2m (mm1,* (dest+8)); /* store 8 bytes at curr */ + } + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_put_o_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_mmx (16, height, dest, ref, stride); +} + +static void MC_put_o_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +/* Half pixel interpolation in the x direction */ +static inline void MC_avg_x_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + mmx_zero_reg (); + + do { + mmx_interp_average_2_U8 (dest, ref, ref+1); + + if (width == 16) + mmx_interp_average_2_U8 (dest+8, ref+8, ref+9); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_avg_x_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_x_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_x_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_x_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_x_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, ref, ref+1); + + if (width == 16) + mmx_average_2_U8 (dest+8, ref+8, ref+9); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_put_x_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_x_mmx (16, height, dest, ref, stride); +} + +static void MC_put_x_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_x_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_xy_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + const uint8_t * ref_next = ref + stride; + + mmx_zero_reg (); + + do { + mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); + + if (width == 16) + mmx_interp_average_4_U8 (dest+8, ref+8, ref+9, + ref_next+8, ref_next+9); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_avg_xy_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_xy_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_xy_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + const uint8_t * ref_next = ref + stride; + + mmx_zero_reg (); + + do { + mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); + + if (width == 16) + mmx_average_4_U8 (dest+8, ref+8, ref+9, ref_next+8, ref_next+9); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_put_xy_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_xy_mmx (16, height, dest, ref, stride); +} + +static void MC_put_xy_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_xy_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_y_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + const uint8_t * ref_next = ref + stride; + + mmx_zero_reg (); + + do { + mmx_interp_average_2_U8 (dest, ref, ref_next); + + if (width == 16) + mmx_interp_average_2_U8 (dest+8, ref+8, ref_next+8); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_avg_y_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_y_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_y_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_y_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_y_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + const uint8_t * ref_next = ref + stride; + + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, ref, ref_next); + + if (width == 16) + mmx_average_2_U8 (dest+8, ref+8, ref_next+8); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_put_y_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_y_mmx (16, height, dest, ref, stride); +} + +static void MC_put_y_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_y_mmx (8, height, dest, ref, stride); +} + + +MPEG2_MC_EXTERN (mmx) + + + + + + + +/* CPU_MMXEXT/CPU_3DNOW adaptation layer */ + +#define pavg_r2r(src,dest) \ +do { \ + if (cpu == CPU_MMXEXT) \ + pavgb_r2r (src, dest); \ + else \ + pavgusb_r2r (src, dest); \ +} while (0) + +#define pavg_m2r(src,dest) \ +do { \ + if (cpu == CPU_MMXEXT) \ + pavgb_m2r (src, dest); \ + else \ + pavgusb_m2r (src, dest); \ +} while (0) + + +/* CPU_MMXEXT code */ + + +static inline void MC_put1_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride) +{ + do { + movq_m2r (*ref, mm0); + movq_r2m (mm0, *dest); + ref += stride; + dest += stride; + } while (--height); +} + +static inline void MC_put1_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + ref += stride; + movq_r2m (mm0, *dest); + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg1_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*dest, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg1_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*dest, mm0); + pavg_m2r (*(dest+8), mm1); + movq_r2m (mm0, *dest); + ref += stride; + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_put2_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*(ref+offset), mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_put2_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*(ref+offset+8), mm1); + movq_r2m (mm0, *dest); + ref += stride; + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg2_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*dest, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg2_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*(ref+offset+8), mm1); + pavg_m2r (*dest, mm0); + pavg_m2r (*(dest+8), mm1); + ref += stride; + movq_r2m (mm0, *dest); + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static mmx_t mask_one = {0x0101010101010101LL}; + +static inline void MC_put4_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + movq_m2r (*ref, mm0); + movq_m2r (*(ref+1), mm1); + movq_r2r (mm0, mm7); + pxor_r2r (mm1, mm7); + pavg_r2r (mm1, mm0); + ref += stride; + + do { + movq_m2r (*ref, mm2); + movq_r2r (mm0, mm5); + + movq_m2r (*(ref+1), mm3); + movq_r2r (mm2, mm6); + + pxor_r2r (mm3, mm6); + pavg_r2r (mm3, mm2); + + por_r2r (mm6, mm7); + pxor_r2r (mm2, mm5); + + pand_r2r (mm5, mm7); + pavg_r2r (mm2, mm0); + + pand_m2r (mask_one, mm7); + + psubusb_r2r (mm7, mm0); + + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + + movq_r2r (mm6, mm7); /* unroll ! */ + movq_r2r (mm2, mm0); /* unroll ! */ + } while (--height); +} + +static inline void MC_put4_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_r2m (mm0, *dest); + + movq_m2r (*(ref+8), mm0); + movq_m2r (*(ref+stride+9), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+9), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride+8), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + ref += stride; + movq_r2m (mm0, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg4_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*dest, mm1); + pavg_r2r (mm1, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg4_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*dest, mm1); + pavg_r2r (mm1, mm0); + movq_r2m (mm0, *dest); + + movq_m2r (*(ref+8), mm0); + movq_m2r (*(ref+stride+9), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+9), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride+8), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*(dest+8), mm1); + pavg_r2r (mm1, mm0); + ref += stride; + movq_r2m (mm0, *(dest+8)); + dest += stride; + } while (--height); +} + +static void MC_avg_o_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_avg_o_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_o_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put1_16 (height, dest, ref, stride); +} + +static void MC_put_o_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put1_8 (height, dest, ref, stride); +} + +static void MC_avg_x_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_avg_x_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_put_x_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_put_x_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_avg_y_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_avg_y_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_put_y_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_put_y_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_avg_xy_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_avg_xy_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_xy_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_xy_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT); +} + + +MPEG2_MC_EXTERN (mmxext) + + + +static void MC_avg_o_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_avg_o_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_o_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put1_16 (height, dest, ref, stride); +} + +static void MC_put_o_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put1_8 (height, dest, ref, stride); +} + +static void MC_avg_x_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_avg_x_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_put_x_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_put_x_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_avg_y_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_avg_y_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_put_y_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_put_y_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_avg_xy_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_avg_xy_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_xy_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put4_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_xy_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put4_8 (height, dest, ref, stride, CPU_3DNOW); +} + + +MPEG2_MC_EXTERN (3dnow) + +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp_vis.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_vis.c new file mode 100644 index 000000000..e724d28a2 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_vis.c @@ -0,0 +1,2061 @@ +/* + * motion_comp_vis.c + * Copyright (C) 2003 David S. Miller + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_SPARC + +#include + +#include "mpeg2.h" +#include +#include "mpeg2_internal.h" +#include "vis.h" + +/* The trick used in some of this file is the formula from the MMX + * motion comp code, which is: + * + * (x+y+1)>>1 == (x|y)-((x^y)>>1) + * + * This allows us to average 8 bytes at a time in a 64-bit FPU reg. + * We avoid overflows by masking before we do the shift, and we + * implement the shift by multiplying by 1/2 using mul8x16. So in + * VIS this is (assume 'x' is in f0, 'y' is in f2, a repeating mask + * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and + * the value 0x80808080 is in f8): + * + * fxor f0, f2, f10 + * fand f10, f4, f10 + * fmul8x16 f8, f10, f10 + * fand f10, f6, f10 + * for f0, f2, f12 + * fpsub16 f12, f10, f10 + */ + +#define DUP4(x) {x, x, x, x} +#define DUP8(x) {x, x, x, x, x, x, x, x} +static const int16_t constants1[] ATTR_ALIGN(8) = DUP4 (1); +static const int16_t constants2[] ATTR_ALIGN(8) = DUP4 (2); +static const int16_t constants3[] ATTR_ALIGN(8) = DUP4 (3); +static const int16_t constants6[] ATTR_ALIGN(8) = DUP4 (6); +static const int8_t constants_fe[] ATTR_ALIGN(8) = DUP8 (0xfe); +static const int8_t constants_7f[] ATTR_ALIGN(8) = DUP8 (0x7f); +static const int8_t constants128[] ATTR_ALIGN(8) = DUP8 (128); +static const int16_t constants256_512[] ATTR_ALIGN(8) = + {256, 512, 256, 512}; +static const int16_t constants256_1024[] ATTR_ALIGN(8) = + {256, 1024, 256, 1024}; + +#define REF_0 0 +#define REF_0_1 1 +#define REF_2 2 +#define REF_2_1 3 +#define REF_4 4 +#define REF_4_1 5 +#define REF_6 6 +#define REF_6_1 7 +#define REF_S0 8 +#define REF_S0_1 9 +#define REF_S2 10 +#define REF_S2_1 11 +#define REF_S4 12 +#define REF_S4_1 13 +#define REF_S6 14 +#define REF_S6_1 15 +#define DST_0 16 +#define DST_1 17 +#define DST_2 18 +#define DST_3 19 +#define CONST_1 20 +#define CONST_2 20 +#define CONST_3 20 +#define CONST_6 20 +#define MASK_fe 20 +#define CONST_128 22 +#define CONST_256 22 +#define CONST_512 22 +#define CONST_1024 22 +#define TMP0 24 +#define TMP1 25 +#define TMP2 26 +#define TMP3 27 +#define TMP4 28 +#define TMP5 29 +#define ZERO 30 +#define MASK_7f 30 + +#define TMP6 32 +#define TMP8 34 +#define TMP10 36 +#define TMP12 38 +#define TMP14 40 +#define TMP16 42 +#define TMP18 44 +#define TMP20 46 +#define TMP22 48 +#define TMP24 50 +#define TMP26 52 +#define TMP28 54 +#define TMP30 56 +#define TMP32 58 + +static void MC_put_o_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + do { /* 5 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + + vis_faligndata(TMP0, TMP2, REF_0); + vis_st64(REF_0, dest[0]); + + vis_faligndata(TMP2, TMP4, REF_2); + vis_st64_2(REF_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_put_o_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + do { /* 4 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + + /* stall */ + + vis_faligndata(TMP0, TMP2, REF_0); + vis_st64(REF_0, dest[0]); + dest += stride; + } while (--height); +} + + +static void MC_avg_o_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8 = stride + 8; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_ld64_2(ref, offset, TMP4); + + vis_ld64(dest[0], DST_0); + + vis_ld64(dest[8], DST_2); + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP2, TMP4, REF_2); + + vis_ld64(constants128[0], CONST_128); + + ref += stride; + height = (height >> 1) - 1; + + do { /* 24 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP6, MASK_fe, TMP6); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_xor(DST_2, REF_2, TMP8); + + vis_and(TMP8, MASK_fe, TMP8); + + vis_or(DST_0, REF_0, TMP10); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP8, TMP8); + + vis_or(DST_2, REF_2, TMP12); + vis_ld64_2(dest, stride_8, DST_2); + + vis_ld64(ref[0], TMP14); + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + + dest += stride; + vis_ld64_2(ref, 8, TMP16); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP18); + vis_faligndata(TMP2, TMP4, REF_2); + ref += stride; + + vis_xor(DST_0, REF_0, TMP20); + + vis_and(TMP20, MASK_fe, TMP20); + + vis_xor(DST_2, REF_2, TMP22); + vis_mul8x16(CONST_128, TMP20, TMP20); + + vis_and(TMP22, MASK_fe, TMP22); + + vis_or(DST_0, REF_0, TMP24); + vis_mul8x16(CONST_128, TMP22, TMP22); + + vis_or(DST_2, REF_2, TMP26); + + vis_ld64_2(dest, stride, DST_0); + vis_faligndata(TMP14, TMP16, REF_0); + + vis_ld64_2(dest, stride_8, DST_2); + vis_faligndata(TMP16, TMP18, REF_2); + + vis_and(TMP20, MASK_7f, TMP20); + + vis_and(TMP22, MASK_7f, TMP22); + + vis_psub16(TMP24, TMP20, TMP20); + vis_st64(TMP20, dest[0]); + + vis_psub16(TMP26, TMP22, TMP22); + vis_st64_2(TMP22, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP6, MASK_fe, TMP6); + + vis_ld64_2(ref, offset, TMP4); + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_xor(DST_2, REF_2, TMP8); + + vis_and(TMP8, MASK_fe, TMP8); + + vis_or(DST_0, REF_0, TMP10); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP8, TMP8); + + vis_or(DST_2, REF_2, TMP12); + vis_ld64_2(dest, stride_8, DST_2); + + vis_ld64(ref[0], TMP14); + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + + dest += stride; + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_2); + + vis_xor(DST_0, REF_0, TMP20); + + vis_and(TMP20, MASK_fe, TMP20); + + vis_xor(DST_2, REF_2, TMP22); + vis_mul8x16(CONST_128, TMP20, TMP20); + + vis_and(TMP22, MASK_fe, TMP22); + + vis_or(DST_0, REF_0, TMP24); + vis_mul8x16(CONST_128, TMP22, TMP22); + + vis_or(DST_2, REF_2, TMP26); + + vis_and(TMP20, MASK_7f, TMP20); + + vis_and(TMP22, MASK_7f, TMP22); + + vis_psub16(TMP24, TMP20, TMP20); + vis_st64(TMP20, dest[0]); + + vis_psub16(TMP26, TMP22, TMP22); + vis_st64_2(TMP22, dest, 8); +} + +static void MC_avg_o_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + + vis_ld64(dest[0], DST_0); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants128[0], CONST_128); + + ref += stride; + height = (height >> 1) - 1; + + do { /* 12 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + ref += stride; + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_ld64(ref[0], TMP12); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP2); + vis_xor(DST_0, REF_0, TMP0); + ref += stride; + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + + vis_faligndata(TMP12, TMP2, REF_0); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_psub16(TMP6, TMP0, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_xor(DST_0, REF_0, TMP0); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_or(DST_0, REF_0, TMP6); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_psub16(TMP6, TMP0, TMP4); + vis_st64(TMP4, dest[0]); +} + +static void MC_put_x_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, 16, TMP4); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants128[0], CONST_128); + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + ref += stride; + height = (height >> 1) - 1; + + do { /* 34 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP8); + + vis_ld64_2(ref, 16, TMP4); + vis_and(TMP6, MASK_fe, TMP6); + ref += stride; + + vis_ld64(ref[0], TMP14); + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_ld64_2(ref, 8, TMP16); + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_ld64_2(ref, 16, TMP18); + ref += stride; + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + + vis_xor(REF_0, REF_2, TMP6); + + vis_xor(REF_4, REF_6, TMP8); + + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP14, TMP16, REF_0); + + vis_faligndata(TMP16, TMP18, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP14, TMP16, REF_2); + vis_faligndata(TMP16, TMP18, REF_6); + } else { + vis_src1(TMP16, REF_2); + vis_src1(TMP18, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP8); + + vis_ld64_2(ref, 16, TMP4); + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + + vis_xor(REF_0, REF_2, TMP6); + + vis_xor(REF_4, REF_6, TMP8); + + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); +} + +static void MC_put_x_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + + vis_ld64(constants128[0], CONST_128); + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + ref += stride; + height = (height >> 1) - 1; + + do { /* 20 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + ref += stride; + + vis_ld64(ref[0], TMP8); + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, 8, TMP10); + ref += stride; + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_or(REF_0, REF_2, TMP14); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_alignaddr_g0((void *)off); + vis_faligndata(TMP8, TMP10, REF_0); + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP8, TMP10, REF_2); + } else { + vis_src1(TMP10, REF_2); + } + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_or(REF_0, REF_2, TMP14); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; +} + +static void MC_avg_x_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + vis_ld64(constants3[0], CONST_3); + vis_fzero(ZERO); + vis_ld64(constants256_512[0], CONST_256); + + ref = vis_alignaddr(ref); + do { /* 26 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_alignaddr_g0((void *)off); + + vis_ld64(ref[16], TMP4); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(dest[8], DST_2); + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_mul8x16au(REF_0, CONST_256, TMP0); + + vis_pmerge(ZERO, REF_2, TMP4); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_mul8x16al(DST_0, CONST_512, TMP4); + vis_padd16(TMP2, TMP6, TMP2); + + vis_mul8x16al(DST_1, CONST_512, TMP6); + + vis_mul8x16au(REF_6, CONST_256, TMP12); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_6_1, CONST_256, TMP14); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4, CONST_256, TMP16); + + vis_padd16(TMP0, CONST_3, TMP8); + vis_mul8x16au(REF_4_1, CONST_256, TMP18); + + vis_padd16(TMP2, CONST_3, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_padd16(TMP16, TMP12, TMP0); + + vis_st64(DST_0, dest[0]); + vis_mul8x16al(DST_2, CONST_512, TMP4); + vis_padd16(TMP18, TMP14, TMP2); + + vis_mul8x16al(DST_3, CONST_512, TMP6); + vis_padd16(TMP0, CONST_3, TMP0); + + vis_padd16(TMP2, CONST_3, TMP2); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_padd16(TMP2, TMP6, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64(DST_2, dest[8]); + + ref += stride; + dest += stride; + } while (--height); +} + +static void MC_avg_x_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_times_2 = stride << 1; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + vis_ld64(constants3[0], CONST_3); + vis_fzero(ZERO); + vis_ld64(constants256_512[0], CONST_256); + + ref = vis_alignaddr(ref); + height >>= 2; + do { /* 47 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + ref += stride; + + vis_alignaddr_g0((void *)off); + + vis_ld64(ref[0], TMP4); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, 8, TMP6); + ref += stride; + + vis_ld64(ref[0], TMP8); + + vis_ld64_2(ref, 8, TMP10); + ref += stride; + vis_faligndata(TMP4, TMP6, REF_4); + + vis_ld64(ref[0], TMP12); + + vis_ld64_2(ref, 8, TMP14); + ref += stride; + vis_faligndata(TMP8, TMP10, REF_S0); + + vis_faligndata(TMP12, TMP14, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64_2(dest, stride, DST_2); + vis_faligndata(TMP4, TMP6, REF_6); + + vis_faligndata(TMP8, TMP10, REF_S2); + + vis_faligndata(TMP12, TMP14, REF_S6); + } else { + vis_ld64(dest[0], DST_0); + vis_src1(TMP2, REF_2); + + vis_ld64_2(dest, stride, DST_2); + vis_src1(TMP6, REF_6); + + vis_src1(TMP10, REF_S2); + + vis_src1(TMP14, REF_S6); + } + + vis_pmerge(ZERO, REF_0, TMP0); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_pmerge(ZERO, REF_2, TMP4); + vis_mul8x16au(REF_2_1, CONST_256, TMP6); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16al(DST_0, CONST_512, TMP16); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16al(DST_1, CONST_512, TMP18); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_4, CONST_256, TMP8); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4_1, CONST_256, TMP10); + + vis_padd16(TMP0, TMP16, TMP0); + vis_mul8x16au(REF_6, CONST_256, TMP12); + + vis_padd16(TMP2, TMP18, TMP2); + vis_mul8x16au(REF_6_1, CONST_256, TMP14); + + vis_padd16(TMP8, CONST_3, TMP8); + vis_mul8x16al(DST_2, CONST_512, TMP16); + + vis_padd16(TMP8, TMP12, TMP8); + vis_mul8x16al(DST_3, CONST_512, TMP18); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP0, DST_0); + + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP10, CONST_3, TMP10); + + vis_ld64_2(dest, stride, DST_0); + vis_padd16(TMP8, TMP16, TMP8); + + vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/); + vis_padd16(TMP10, TMP18, TMP10); + vis_pack16(TMP8, DST_2); + + vis_pack16(TMP10, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + + vis_mul8x16au(REF_S0_1, CONST_256, TMP2); + vis_pmerge(ZERO, REF_S0, TMP0); + + vis_pmerge(ZERO, REF_S2, TMP24); + vis_mul8x16au(REF_S2_1, CONST_256, TMP6); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16au(REF_S4, CONST_256, TMP8); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16au(REF_S4_1, CONST_256, TMP10); + + vis_padd16(TMP0, TMP24, TMP0); + vis_mul8x16au(REF_S6, CONST_256, TMP12); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_S6_1, CONST_256, TMP14); + + vis_padd16(TMP8, CONST_3, TMP8); + vis_mul8x16al(DST_0, CONST_512, TMP16); + + vis_padd16(TMP10, CONST_3, TMP10); + vis_mul8x16al(DST_1, CONST_512, TMP18); + + vis_padd16(TMP8, TMP12, TMP8); + vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20); + + vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22); + vis_padd16(TMP0, TMP16, TMP0); + + vis_padd16(TMP2, TMP18, TMP2); + vis_pack16(TMP0, DST_0); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_padd16(TMP8, TMP20, TMP8); + + vis_padd16(TMP10, TMP22, TMP10); + vis_pack16(TMP8, DST_2); + + vis_pack16(TMP10, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_put_y_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + + vis_ld64(ref[0], TMP6); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, 8, TMP8); + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64_2(ref, offset, TMP10); + ref += stride; + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP6, TMP8, REF_2); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP8, TMP10, REF_6); + + vis_ld64(constants128[0], CONST_128); + height = (height >> 1) - 1; + do { /* 24 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP12); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP16); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + vis_or(REF_0, REF_2, TMP14); + + vis_ld64(ref[0], TMP6); + vis_or(REF_4, REF_6, TMP18); + + vis_ld64_2(ref, 8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_and(TMP16, MASK_fe, TMP16); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_mul8x16(CONST_128, TMP16, TMP16); + vis_xor(REF_0, REF_2, TMP0); + + vis_xor(REF_4, REF_6, TMP2); + + vis_or(REF_0, REF_2, TMP20); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_and(TMP16, MASK_7f, TMP16); + + vis_psub16(TMP14, TMP12, TMP12); + vis_st64(TMP12, dest[0]); + + vis_psub16(TMP18, TMP16, TMP16); + vis_st64_2(TMP16, dest, 8); + dest += stride; + + vis_or(REF_4, REF_6, TMP18); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP2, MASK_fe, TMP2); + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_faligndata(TMP6, TMP8, REF_2); + vis_mul8x16(CONST_128, TMP2, TMP2); + + vis_faligndata(TMP8, TMP10, REF_6); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_and(TMP2, MASK_7f, TMP2); + + vis_psub16(TMP20, TMP0, TMP0); + vis_st64(TMP0, dest[0]); + + vis_psub16(TMP18, TMP2, TMP2); + vis_st64_2(TMP2, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP12); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP16); + + vis_ld64_2(ref, offset, TMP4); + vis_or(REF_0, REF_2, TMP14); + + vis_or(REF_4, REF_6, TMP18); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_and(TMP16, MASK_fe, TMP16); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_mul8x16(CONST_128, TMP16, TMP16); + vis_xor(REF_0, REF_2, TMP0); + + vis_xor(REF_4, REF_6, TMP2); + + vis_or(REF_0, REF_2, TMP20); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_and(TMP16, MASK_7f, TMP16); + + vis_psub16(TMP14, TMP12, TMP12); + vis_st64(TMP12, dest[0]); + + vis_psub16(TMP18, TMP16, TMP16); + vis_st64_2(TMP16, dest, 8); + dest += stride; + + vis_or(REF_4, REF_6, TMP18); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP2, MASK_fe, TMP2); + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_mul8x16(CONST_128, TMP2, TMP2); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_and(TMP2, MASK_7f, TMP2); + + vis_psub16(TMP20, TMP0, TMP0); + vis_st64(TMP0, dest[0]); + + vis_psub16(TMP18, TMP2, TMP2); + vis_st64_2(TMP2, dest, 8); +} + +static void MC_put_y_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + + vis_ld64(ref[0], TMP4); + + vis_ld64_2(ref, offset, TMP6); + ref += stride; + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP4, TMP6, REF_2); + + vis_ld64(constants128[0], CONST_128); + height = (height >> 1) - 1; + do { /* 12 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_mul8x16(CONST_128, TMP12, TMP12); + vis_or(REF_0, REF_2, TMP14); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_faligndata(TMP0, TMP2, REF_2); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_mul8x16(CONST_128, TMP12, TMP12); + vis_or(REF_0, REF_2, TMP14); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); +} + +static void MC_avg_y_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8 = stride + 8; + int stride_16; + int offset; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64_2(ref, offset, TMP4); + stride_16 = stride + offset; + + vis_ld64(constants3[0], CONST_3); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_6); + height >>= 1; + + do { /* 31 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_pmerge(ZERO, REF_2, TMP12); + vis_mul8x16au(REF_2_1, CONST_256, TMP14); + + vis_ld64_2(ref, stride_8, TMP2); + vis_pmerge(ZERO, REF_6, TMP16); + vis_mul8x16au(REF_6_1, CONST_256, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(dest, 8, DST_2); + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64_2(ref, stride, TMP6); + vis_pmerge(ZERO, REF_0, TMP0); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_ld64_2(ref, stride_8, TMP8); + vis_pmerge(ZERO, REF_4, TMP4); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + + vis_ld64_2(dest, stride, REF_S0/*DST_4*/); + vis_faligndata(TMP6, TMP8, REF_2); + vis_mul8x16au(REF_4_1, CONST_256, TMP6); + + vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/); + vis_faligndata(TMP8, TMP10, REF_6); + vis_mul8x16al(DST_0, CONST_512, TMP20); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16al(DST_1, CONST_512, TMP22); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16al(DST_2, CONST_512, TMP24); + + vis_padd16(TMP4, CONST_3, TMP4); + vis_mul8x16al(DST_3, CONST_512, TMP26); + + vis_padd16(TMP6, CONST_3, TMP6); + + vis_padd16(TMP12, TMP20, TMP12); + vis_mul8x16al(REF_S0, CONST_512, TMP20); + + vis_padd16(TMP14, TMP22, TMP14); + vis_mul8x16al(REF_S0_1, CONST_512, TMP22); + + vis_padd16(TMP16, TMP24, TMP16); + vis_mul8x16al(REF_S2, CONST_512, TMP24); + + vis_padd16(TMP18, TMP26, TMP18); + vis_mul8x16al(REF_S2_1, CONST_512, TMP26); + + vis_padd16(TMP12, TMP0, TMP12); + vis_mul8x16au(REF_2, CONST_256, TMP28); + + vis_padd16(TMP14, TMP2, TMP14); + vis_mul8x16au(REF_2_1, CONST_256, TMP30); + + vis_padd16(TMP16, TMP4, TMP16); + vis_mul8x16au(REF_6, CONST_256, REF_S4); + + vis_padd16(TMP18, TMP6, TMP18); + vis_mul8x16au(REF_6_1, CONST_256, REF_S6); + + vis_pack16(TMP12, DST_0); + vis_padd16(TMP28, TMP0, TMP12); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP30, TMP2, TMP14); + + vis_pack16(TMP16, DST_2); + vis_padd16(REF_S4, TMP4, TMP16); + + vis_pack16(TMP18, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + vis_padd16(REF_S6, TMP6, TMP18); + + vis_padd16(TMP12, TMP20, TMP12); + + vis_padd16(TMP14, TMP22, TMP14); + vis_pack16(TMP12, DST_0); + + vis_padd16(TMP16, TMP24, TMP16); + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + + vis_padd16(TMP18, TMP26, TMP18); + vis_pack16(TMP16, DST_2); + + vis_pack16(TMP18, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_avg_y_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8; + int offset; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64_2(ref, offset, TMP2); + stride_8 = stride + offset; + + vis_ld64(constants3[0], CONST_3); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64(constants256_512[0], CONST_256); + + height >>= 1; + do { /* 20 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_pmerge(ZERO, REF_2, TMP8); + vis_mul8x16au(REF_2_1, CONST_256, TMP10); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + + vis_ld64(dest[0], DST_0); + + vis_ld64_2(dest, stride, DST_2); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride, TMP4); + vis_mul8x16al(DST_0, CONST_512, TMP16); + vis_pmerge(ZERO, REF_0, TMP12); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + vis_mul8x16al(DST_1, CONST_512, TMP18); + vis_pmerge(ZERO, REF_0_1, TMP14); + + vis_padd16(TMP12, CONST_3, TMP12); + vis_mul8x16al(DST_2, CONST_512, TMP24); + + vis_padd16(TMP14, CONST_3, TMP14); + vis_mul8x16al(DST_3, CONST_512, TMP26); + + vis_faligndata(TMP4, TMP6, REF_2); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + vis_mul8x16au(REF_2, CONST_256, TMP20); + + vis_padd16(TMP8, TMP16, TMP0); + vis_mul8x16au(REF_2_1, CONST_256, TMP22); + + vis_padd16(TMP10, TMP18, TMP2); + vis_pack16(TMP0, DST_0); + + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP12, TMP20, TMP12); + + vis_padd16(TMP14, TMP22, TMP14); + + vis_padd16(TMP12, TMP24, TMP0); + + vis_padd16(TMP14, TMP26, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_put_xy_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + int stride_16 = stride + 16; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(ref[16], TMP4); + + vis_ld64(constants2[0], CONST_2); + vis_faligndata(TMP0, TMP2, REF_S0); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + vis_faligndata(TMP2, TMP4, REF_S6); + } else { + vis_src1(TMP2, REF_S2); + vis_src1(TMP4, REF_S6); + } + + height >>= 1; + do { + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S0_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + vis_mul8x16au(REF_S2, CONST_256, TMP16); + vis_pmerge(ZERO, REF_S2_1, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + vis_mul8x16au(REF_S4, CONST_256, TMP20); + vis_pmerge(ZERO, REF_S4_1, TMP22); + + vis_ld64_2(ref, stride, TMP6); + vis_mul8x16au(REF_S6, CONST_256, TMP24); + vis_pmerge(ZERO, REF_S6_1, TMP26); + + vis_ld64_2(ref, stride_8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_faligndata(TMP6, TMP8, REF_S0); + + vis_faligndata(TMP8, TMP10, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + vis_faligndata(TMP6, TMP8, REF_S2); + vis_faligndata(TMP8, TMP10, REF_S6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + vis_src1(TMP8, REF_S2); + vis_src1(TMP10, REF_S6); + } + + vis_mul8x16au(REF_0, CONST_256, TMP0); + vis_pmerge(ZERO, REF_0_1, TMP2); + + vis_mul8x16au(REF_2, CONST_256, TMP4); + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_padd16(TMP0, CONST_2, TMP8); + vis_mul8x16au(REF_4, CONST_256, TMP0); + + vis_padd16(TMP2, CONST_2, TMP10); + vis_mul8x16au(REF_4_1, CONST_256, TMP2); + + vis_padd16(TMP8, TMP4, TMP8); + vis_mul8x16au(REF_6, CONST_256, TMP4); + + vis_padd16(TMP10, TMP6, TMP10); + vis_mul8x16au(REF_6_1, CONST_256, TMP6); + + vis_padd16(TMP12, TMP8, TMP12); + + vis_padd16(TMP14, TMP10, TMP14); + + vis_padd16(TMP12, TMP16, TMP12); + + vis_padd16(TMP14, TMP18, TMP14); + vis_pack16(TMP12, DST_0); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP0, CONST_2, TMP12); + + vis_mul8x16au(REF_S0, CONST_256, TMP0); + vis_padd16(TMP2, CONST_2, TMP14); + + vis_mul8x16au(REF_S0_1, CONST_256, TMP2); + vis_padd16(TMP12, TMP4, TMP12); + + vis_mul8x16au(REF_S2, CONST_256, TMP4); + vis_padd16(TMP14, TMP6, TMP14); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP6); + vis_padd16(TMP20, TMP12, TMP20); + + vis_padd16(TMP22, TMP14, TMP22); + + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP22, TMP26, TMP22); + vis_pack16(TMP20, DST_2); + + vis_pack16(TMP22, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + vis_padd16(TMP0, TMP4, TMP24); + + vis_mul8x16au(REF_S4, CONST_256, TMP0); + vis_padd16(TMP2, TMP6, TMP26); + + vis_mul8x16au(REF_S4_1, CONST_256, TMP2); + vis_padd16(TMP24, TMP8, TMP24); + + vis_padd16(TMP26, TMP10, TMP26); + vis_pack16(TMP24, DST_0); + + vis_pack16(TMP26, DST_1); + vis_st64(DST_0, dest[0]); + vis_pmerge(ZERO, REF_S6, TMP4); + + vis_pmerge(ZERO, REF_S6_1, TMP6); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_padd16(TMP2, TMP6, TMP2); + + vis_padd16(TMP0, TMP12, TMP0); + + vis_padd16(TMP2, TMP14, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_put_xy_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(constants2[0], CONST_2); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP0, TMP2, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + } else { + vis_src1(TMP2, REF_S2); + } + + height >>= 1; + do { /* 26 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP8); + vis_pmerge(ZERO, REF_S2, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + vis_mul8x16au(REF_S0_1, CONST_256, TMP10); + vis_pmerge(ZERO, REF_S2_1, TMP14); + + vis_ld64_2(ref, stride, TMP4); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + vis_faligndata(TMP0, TMP2, REF_S4); + + vis_pmerge(ZERO, REF_S4, TMP18); + + vis_pmerge(ZERO, REF_S4_1, TMP20); + + vis_faligndata(TMP4, TMP6, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S6); + vis_faligndata(TMP4, TMP6, REF_S2); + } else { + vis_src1(TMP2, REF_S6); + vis_src1(TMP6, REF_S2); + } + + vis_padd16(TMP18, CONST_2, TMP18); + vis_mul8x16au(REF_S6, CONST_256, TMP22); + + vis_padd16(TMP20, CONST_2, TMP20); + vis_mul8x16au(REF_S6_1, CONST_256, TMP24); + + vis_mul8x16au(REF_S0, CONST_256, TMP26); + vis_pmerge(ZERO, REF_S0_1, TMP28); + + vis_mul8x16au(REF_S2, CONST_256, TMP30); + vis_padd16(TMP18, TMP22, TMP18); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP32); + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP8, TMP18, TMP8); + + vis_padd16(TMP10, TMP20, TMP10); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP18, TMP26, TMP18); + + vis_padd16(TMP20, TMP28, TMP20); + + vis_padd16(TMP18, TMP30, TMP18); + + vis_padd16(TMP20, TMP32, TMP20); + vis_pack16(TMP18, DST_2); + + vis_pack16(TMP20, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_avg_xy_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + int stride_16 = stride + 16; + + vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(ref[16], TMP4); + + vis_ld64(constants6[0], CONST_6); + vis_faligndata(TMP0, TMP2, REF_S0); + + vis_ld64(constants256_1024[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + vis_faligndata(TMP2, TMP4, REF_S6); + } else { + vis_src1(TMP2, REF_S2); + vis_src1(TMP4, REF_S6); + } + + height >>= 1; + do { /* 55 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S0_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + vis_mul8x16au(REF_S2, CONST_256, TMP16); + vis_pmerge(ZERO, REF_S2_1, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + vis_mul8x16au(REF_S4, CONST_256, TMP20); + vis_pmerge(ZERO, REF_S4_1, TMP22); + + vis_ld64_2(ref, stride, TMP6); + vis_mul8x16au(REF_S6, CONST_256, TMP24); + vis_pmerge(ZERO, REF_S6_1, TMP26); + + vis_ld64_2(ref, stride_8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP6, TMP8, REF_S0); + + vis_ld64_2(dest, 8, DST_2); + vis_faligndata(TMP8, TMP10, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + vis_faligndata(TMP6, TMP8, REF_S2); + vis_faligndata(TMP8, TMP10, REF_S6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + vis_src1(TMP8, REF_S2); + vis_src1(TMP10, REF_S6); + } + + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_0, TMP0); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_pmerge(ZERO, REF_0_1, TMP2); + + vis_mul8x16au(REF_2, CONST_256, TMP4); + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_mul8x16al(DST_2, CONST_1024, REF_0); + vis_padd16(TMP0, CONST_6, TMP0); + + vis_mul8x16al(DST_3, CONST_1024, REF_2); + vis_padd16(TMP2, CONST_6, TMP2); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_4, CONST_256, TMP4); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4_1, CONST_256, TMP6); + + vis_padd16(TMP12, TMP0, TMP12); + vis_mul8x16au(REF_6, CONST_256, TMP8); + + vis_padd16(TMP14, TMP2, TMP14); + vis_mul8x16au(REF_6_1, CONST_256, TMP10); + + vis_padd16(TMP12, TMP16, TMP12); + vis_mul8x16au(REF_S0, CONST_256, REF_4); + + vis_padd16(TMP14, TMP18, TMP14); + vis_mul8x16au(REF_S0_1, CONST_256, REF_6); + + vis_padd16(TMP12, TMP30, TMP12); + + vis_padd16(TMP14, TMP32, TMP14); + vis_pack16(TMP12, DST_0); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP4, CONST_6, TMP4); + + vis_ld64_2(dest, stride, DST_0); + vis_padd16(TMP6, CONST_6, TMP6); + vis_mul8x16au(REF_S2, CONST_256, TMP12); + + vis_padd16(TMP4, TMP8, TMP4); + vis_mul8x16au(REF_S2_1, CONST_256, TMP14); + + vis_padd16(TMP6, TMP10, TMP6); + + vis_padd16(TMP20, TMP4, TMP20); + + vis_padd16(TMP22, TMP6, TMP22); + + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP22, TMP26, TMP22); + + vis_padd16(TMP20, REF_0, TMP20); + vis_mul8x16au(REF_S4, CONST_256, REF_0); + + vis_padd16(TMP22, REF_2, TMP22); + vis_pack16(TMP20, DST_2); + + vis_pack16(TMP22, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + + vis_ld64_2(dest, 8, DST_2); + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_S4_1, REF_2); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_padd16(REF_4, TMP0, TMP8); + + vis_mul8x16au(REF_S6, CONST_256, REF_4); + vis_padd16(REF_6, TMP2, TMP10); + + vis_mul8x16au(REF_S6_1, CONST_256, REF_6); + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + + vis_padd16(TMP8, TMP30, TMP8); + + vis_padd16(TMP10, TMP32, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + + vis_padd16(REF_0, TMP4, REF_0); + + vis_mul8x16al(DST_2, CONST_1024, TMP30); + vis_padd16(REF_2, TMP6, REF_2); + + vis_mul8x16al(DST_3, CONST_1024, TMP32); + vis_padd16(REF_0, REF_4, REF_0); + + vis_padd16(REF_2, REF_6, REF_2); + + vis_padd16(REF_0, TMP30, REF_0); + + /* stall */ + + vis_padd16(REF_2, TMP32, REF_2); + vis_pack16(REF_0, DST_2); + + vis_pack16(REF_2, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_avg_xy_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + + vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + vis_fzero(ZERO); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64(constants6[0], CONST_6); + + vis_ld64(constants256_1024[0], CONST_256); + vis_faligndata(TMP0, TMP2, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + } else { + vis_src1(TMP2, REF_S2); + } + + height >>= 1; + do { /* 31 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP8); + vis_pmerge(ZERO, REF_S0_1, TMP10); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + vis_mul8x16au(REF_S2, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S2_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride, TMP4); + vis_faligndata(TMP0, TMP2, REF_S4); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP4, TMP6, REF_S0); + + vis_ld64_2(dest, stride, DST_2); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S6); + vis_faligndata(TMP4, TMP6, REF_S2); + } else { + vis_src1(TMP2, REF_S6); + vis_src1(TMP6, REF_S2); + } + + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_S4, TMP22); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_pmerge(ZERO, REF_S4_1, TMP24); + + vis_mul8x16au(REF_S6, CONST_256, TMP26); + vis_pmerge(ZERO, REF_S6_1, TMP28); + + vis_mul8x16au(REF_S0, CONST_256, REF_S4); + vis_padd16(TMP22, CONST_6, TMP22); + + vis_mul8x16au(REF_S0_1, CONST_256, REF_S6); + vis_padd16(TMP24, CONST_6, TMP24); + + vis_mul8x16al(DST_2, CONST_1024, REF_0); + vis_padd16(TMP22, TMP26, TMP22); + + vis_mul8x16al(DST_3, CONST_1024, REF_2); + vis_padd16(TMP24, TMP28, TMP24); + + vis_mul8x16au(REF_S2, CONST_256, TMP26); + vis_padd16(TMP8, TMP22, TMP8); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP28); + vis_padd16(TMP10, TMP24, TMP10); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + + vis_padd16(TMP8, TMP30, TMP8); + + vis_padd16(TMP10, TMP32, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_padd16(REF_S4, TMP22, TMP12); + + vis_padd16(REF_S6, TMP24, TMP14); + + vis_padd16(TMP12, TMP26, TMP12); + + vis_padd16(TMP14, TMP28, TMP14); + + vis_padd16(TMP12, REF_0, TMP12); + + vis_padd16(TMP14, REF_2, TMP14); + vis_pack16(TMP12, DST_2); + + vis_pack16(TMP14, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +MPEG2_MC_EXTERN(vis); + +#endif /* !(ARCH_SPARC) */ diff --git a/src/video_dec/libmpeg2new/libmpeg2/mpeg2_internal.h b/src/video_dec/libmpeg2new/libmpeg2/mpeg2_internal.h new file mode 100644 index 000000000..fec7d4744 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/mpeg2_internal.h @@ -0,0 +1,302 @@ +/* + * mpeg2_internal.h + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define STATE_INTERNAL_NORETURN ((mpeg2_state_t)-1) + +/* macroblock modes */ +#define MACROBLOCK_INTRA 1 +#define MACROBLOCK_PATTERN 2 +#define MACROBLOCK_MOTION_BACKWARD 4 +#define MACROBLOCK_MOTION_FORWARD 8 +#define MACROBLOCK_QUANT 16 +#define DCT_TYPE_INTERLACED 32 +/* motion_type */ +#define MOTION_TYPE_SHIFT 6 +#define MC_FIELD 1 +#define MC_FRAME 2 +#define MC_16X8 2 +#define MC_DMV 3 + +/* picture structure */ +#define TOP_FIELD 1 +#define BOTTOM_FIELD 2 +#define FRAME_PICTURE 3 + +/* picture coding type */ +#define I_TYPE 1 +#define P_TYPE 2 +#define B_TYPE 3 +#define D_TYPE 4 + +typedef void mpeg2_mc_fct (uint8_t *, const uint8_t *, int, int); + +typedef struct { + uint8_t * ref[2][3]; + uint8_t ** ref2[2]; + int pmv[2][2]; + int f_code[2]; +} motion_t; + +typedef void motion_parser_t (mpeg2_decoder_t * decoder, + motion_t * motion, + mpeg2_mc_fct * const * table); + +struct mpeg2_decoder_s { + /* first, state that carries information from one macroblock to the */ + /* next inside a slice, and is never used outside of mpeg2_slice() */ + + /* bit parsing stuff */ + uint32_t bitstream_buf; /* current 32 bit working set */ + int bitstream_bits; /* used bits in working set */ + const uint8_t * bitstream_ptr; /* buffer with stream data */ + + uint8_t * dest[3]; + + int offset; + int stride; + int uv_stride; + int slice_stride; + int slice_uv_stride; + int stride_frame; + unsigned int limit_x; + unsigned int limit_y_16; + unsigned int limit_y_8; + unsigned int limit_y; + + /* Motion vectors */ + /* The f_ and b_ correspond to the forward and backward motion */ + /* predictors */ + motion_t b_motion; + motion_t f_motion; + motion_parser_t * motion_parser[5]; + + /* predictor for DC coefficients in intra blocks */ + int16_t dc_dct_pred[3]; + + /* DCT coefficients */ + int16_t DCTblock[64] ATTR_ALIGN(64); + + uint8_t * picture_dest[3]; + void (* convert) (void * convert_id, uint8_t * const * src, + unsigned int v_offset); + void * convert_id; + + int dmv_offset; + unsigned int v_offset; + + /* now non-slice-specific information */ + + /* sequence header stuff */ + uint16_t * quantizer_matrix[4]; + uint16_t (* chroma_quantizer[2])[64]; + uint16_t quantizer_prescale[4][32][64]; + + /* The width and height of the picture snapped to macroblock units */ + int width; + int height; + int vertical_position_extension; + int chroma_format; + + /* picture header stuff */ + + /* what type of picture this is (I, P, B, D) */ + int coding_type; + + /* picture coding extension stuff */ + + /* quantization factor for intra dc coefficients */ + int intra_dc_precision; + /* top/bottom/both fields */ + int picture_structure; + /* bool to indicate all predictions are frame based */ + int frame_pred_frame_dct; + /* bool to indicate whether intra blocks have motion vectors */ + /* (for concealment) */ + int concealment_motion_vectors; + /* bool to use different vlc tables */ + int intra_vlc_format; + /* used for DMV MC */ + int top_field_first; + + /* stuff derived from bitstream */ + + /* pointer to the zigzag scan we're supposed to be using */ + const uint8_t * scan; + + int second_field; + + int mpeg1; +}; + +typedef struct { + mpeg2_fbuf_t fbuf; +} fbuf_alloc_t; + +struct mpeg2dec_s { + mpeg2_decoder_t decoder; + + mpeg2_info_t info; + + uint32_t shift; + int is_display_initialized; + mpeg2_state_t (* action) (struct mpeg2dec_s * mpeg2dec); + mpeg2_state_t state; + uint32_t ext_state; + + /* allocated in init - gcc has problems allocating such big structures */ + uint8_t * chunk_buffer; + /* pointer to start of the current chunk */ + uint8_t * chunk_start; + /* pointer to current position in chunk_buffer */ + uint8_t * chunk_ptr; + /* last start code ? */ + uint8_t code; + + /* picture tags */ + uint32_t tag_current, tag2_current, tag_previous, tag2_previous; + int num_tags; + int bytes_since_tag; + + int first; + int alloc_index_user; + int alloc_index; + uint8_t first_decode_slice; + uint8_t nb_decode_slices; + + unsigned int user_data_len; + + mpeg2_sequence_t new_sequence; + mpeg2_sequence_t sequence; + mpeg2_gop_t new_gop; + mpeg2_gop_t gop; + mpeg2_picture_t new_picture; + mpeg2_picture_t pictures[4]; + mpeg2_picture_t * picture; + /*const*/ mpeg2_fbuf_t * fbuf[3]; /* 0: current fbuf, 1-2: prediction fbufs */ + + fbuf_alloc_t fbuf_alloc[3]; + int custom_fbuf; + + uint8_t * yuv_buf[3][3]; + int yuv_index; + mpeg2_convert_t * convert; + void * convert_arg; + unsigned int convert_id_size; + int convert_stride; + void (* convert_start) (void * id, const mpeg2_fbuf_t * fbuf, + const mpeg2_picture_t * picture, + const mpeg2_gop_t * gop); + + uint8_t * buf_start; + uint8_t * buf_end; + + int16_t display_offset_x, display_offset_y; + + int copy_matrix; + int8_t q_scale_type, scaled[4]; + uint8_t quantizer_matrix[4][64]; + uint8_t new_quantizer_matrix[4][64]; +}; + +typedef struct { +#ifdef ARCH_PPC + uint8_t regv[12*16]; +#endif + int dummy; +} cpu_state_t; + +/* cpu_accel.c */ +uint32_t mpeg2_detect_accel (uint32_t accel); + +/* cpu_state.c */ +void mpeg2_cpu_state_init (uint32_t accel); + +/* decode.c */ +mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec); + +/* header.c */ +void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec); +void mpeg2_reset_info (mpeg2_info_t * info); +int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec); +int mpeg2_header_gop (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec); +int mpeg2_header_picture (mpeg2dec_t * mpeg2dec); +int mpeg2_header_extension (mpeg2dec_t * mpeg2dec); +int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec); +void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec); +void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec); +void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels); +mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec); +void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type); + +/* idct.c */ +void mpeg2_idct_init (uint32_t accel); + +/* idct_mmx.c */ +void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mmxext (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mmx (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_mmx_init (void); + +/* idct_altivec.c */ +void mpeg2_idct_copy_altivec (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_altivec (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_altivec_init (void); + +/* idct_alpha.c */ +void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mvi (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_alpha (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_alpha_init (void); + +/* motion_comp.c */ +void mpeg2_mc_init (uint32_t accel); + +typedef struct { + mpeg2_mc_fct * put [8]; + mpeg2_mc_fct * avg [8]; +} mpeg2_mc_t; + +#define MPEG2_MC_EXTERN(x) mpeg2_mc_t mpeg2_mc_##x = { \ + {MC_put_o_16_##x, MC_put_x_16_##x, MC_put_y_16_##x, MC_put_xy_16_##x, \ + MC_put_o_8_##x, MC_put_x_8_##x, MC_put_y_8_##x, MC_put_xy_8_##x}, \ + {MC_avg_o_16_##x, MC_avg_x_16_##x, MC_avg_y_16_##x, MC_avg_xy_16_##x, \ + MC_avg_o_8_##x, MC_avg_x_8_##x, MC_avg_y_8_##x, MC_avg_xy_8_##x} \ +}; + +extern mpeg2_mc_t mpeg2_mc_c; +extern mpeg2_mc_t mpeg2_mc_mmx; +extern mpeg2_mc_t mpeg2_mc_mmxext; +extern mpeg2_mc_t mpeg2_mc_3dnow; +extern mpeg2_mc_t mpeg2_mc_altivec; +extern mpeg2_mc_t mpeg2_mc_alpha; +extern mpeg2_mc_t mpeg2_mc_vis; diff --git a/src/video_dec/libmpeg2new/libmpeg2/rgb.c b/src/video_dec/libmpeg2new/libmpeg2/rgb.c new file mode 100644 index 000000000..e4abcacc2 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/rgb.c @@ -0,0 +1,598 @@ +/* + * rgb.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" +#include + +#include + +#include "mpeg2.h" +#include "mpeg2convert.h" +#include "convert_internal.h" + +static int matrix_coefficients = 6; + +static const int Inverse_Table_6_9[8][4] = { + {117504, 138453, 13954, 34903}, /* no sequence_display_extension */ + {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */ + {104597, 132201, 25675, 53279}, /* unspecified */ + {104597, 132201, 25675, 53279}, /* reserved */ + {104448, 132798, 24759, 53109}, /* FCC */ + {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */ + {104597, 132201, 25675, 53279}, /* SMPTE 170M */ + {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */ +}; + +static const uint8_t dither[] ATTR_ALIGN(32) = { + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, + 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, + 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, + 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, + 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, + 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, + 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, + 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, + 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, + 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, + 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, + 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, + 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, + 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, + 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, + 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, + 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, + 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, + 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, + 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, + 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, + 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, + 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, + 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, + 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35 +}; + +static const uint8_t dither_temporal[64] = { + 0x00, 0x20, 0x21, 0x01, 0x40, 0x60, 0x61, 0x41, + 0x42, 0x62, 0x63, 0x43, 0x02, 0x22, 0x23, 0x03, + 0x80, 0xa0, 0xa1, 0x81, 0xc0, 0xe0, 0xe1, 0xc1, + 0xc2, 0xe2, 0xe3, 0xc3, 0x82, 0xa2, 0xa3, 0x83, + 0x84, 0xa4, 0xa5, 0x85, 0xc4, 0xe4, 0xe5, 0xc5, + 0xc6, 0xe6, 0xe7, 0xc7, 0x86, 0xa6, 0xa7, 0x87, + 0x04, 0x24, 0x25, 0x05, 0x44, 0x64, 0x65, 0x45, + 0x46, 0x66, 0x67, 0x47, 0x06, 0x26, 0x27, 0x07 +}; + +typedef struct { + convert_rgb_t base; + void * table_rV[256]; + void * table_gU[256]; + int table_gV[256]; + void * table_bU[256]; +} convert_rgb_c_t; + +#define RGB(type,i) \ + U = pu[i]; \ + V = pv[i]; \ + r = (type *) id->table_rV[V]; \ + g = (type *) (((uint8_t *)id->table_gU[U]) + id->table_gV[V]); \ + b = (type *) id->table_bU[U]; + +#define DST(py,dst,i,j) \ + Y = py[i]; \ + dst[i] = r[Y] + g[Y] + b[Y]; + +#define DSTRGB(py,dst,i,j) \ + Y = py[i]; \ + dst[3*i] = r[Y]; dst[3*i+1] = g[Y]; dst[3*i+2] = b[Y]; + +#define DSTBGR(py,dst,i,j) \ + Y = py[i]; \ + dst[3*i] = b[Y]; dst[3*i+1] = g[Y]; dst[3*i+2] = r[Y]; + +#define DSTDITHER(py,dst,i,j) \ + Y = py[i]; \ + dst[i] = r[Y+pd[2*i+96*j]] + g[Y-pd[2*i+96*j]] + b[Y+pd[2*i+1+96*j]]; + +#define DO(x) x +#define SKIP(x) + +#define DECLARE_420(func,type,num,DST,DITHER) \ +static void func (void * _id, uint8_t * const * src, \ + unsigned int v_offset) \ +{ \ + const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id; \ + type * dst_1; \ + const uint8_t * py_1, * pu, * pv; \ + int i; \ + DITHER(uint8_t dithpos = id->base.dither_offset;) \ + \ + dst_1 = (type *)(id->base.rgb_ptr + id->base.rgb_slice * v_offset); \ + py_1 = src[0]; pu = src[1]; pv = src[2]; \ + \ + i = 8; \ + do { \ + const uint8_t * py_2; \ + int j, U, V, Y; \ + const type * r, * g, * b; \ + type * dst_2; \ + DITHER(const uint8_t * const pd = dither + 2 * dithpos;) \ + \ + dst_2 = (type *)((char *)dst_1 + id->base.rgb_stride); \ + py_2 = py_1 + id->base.y_stride; \ + j = id->base.width; \ + do { \ + RGB (type, 0) \ + DST (py_1, dst_1, 0, 0) \ + DST (py_1, dst_1, 1, 0) \ + DST (py_2, dst_2, 0, 1) \ + DST (py_2, dst_2, 1, 1) \ + \ + RGB (type, 1) \ + DST (py_2, dst_2, 2, 1) \ + DST (py_2, dst_2, 3, 1) \ + DST (py_1, dst_1, 2, 0) \ + DST (py_1, dst_1, 3, 0) \ + \ + RGB (type, 2) \ + DST (py_1, dst_1, 4, 0) \ + DST (py_1, dst_1, 5, 0) \ + DST (py_2, dst_2, 4, 1) \ + DST (py_2, dst_2, 5, 1) \ + \ + RGB (type, 3) \ + DST (py_2, dst_2, 6, 1) \ + DST (py_2, dst_2, 7, 1) \ + DST (py_1, dst_1, 6, 0) \ + DST (py_1, dst_1, 7, 0) \ + \ + pu += 4; \ + pv += 4; \ + py_1 += 8; \ + py_2 += 8; \ + dst_1 += 8 * num; \ + dst_2 += 8 * num; \ + } while (--j); \ + if (--i == id->base.field) { \ + dst_1 = (type *)(id->base.rgb_ptr + \ + id->base.rgb_slice * (v_offset + 1)); \ + py_1 = src[0] + id->base.y_stride_frame; \ + pu = src[1] + id->base.uv_stride_frame; \ + pv = src[2] + id->base.uv_stride_frame; \ + } else { \ + py_1 += id->base.y_increm; \ + pu += id->base.uv_increm; \ + pv += id->base.uv_increm; \ + dst_1 = (type *)((char *)dst_1 + id->base.rgb_increm); \ + DITHER(dithpos += id->base.dither_stride;) \ + } \ + } while (i); \ +} + +DECLARE_420 (rgb_c_32_420, uint32_t, 1, DST, SKIP) +DECLARE_420 (rgb_c_24_rgb_420, uint8_t, 3, DSTRGB, SKIP) +DECLARE_420 (rgb_c_24_bgr_420, uint8_t, 3, DSTBGR, SKIP) +DECLARE_420 (rgb_c_16_420, uint16_t, 1, DST, SKIP) +DECLARE_420 (rgb_c_8_420, uint8_t, 1, DSTDITHER, DO) + +#define DECLARE_422(func,type,num,DST,DITHER) \ +static void func (void * _id, uint8_t * const * src, \ + unsigned int v_offset) \ +{ \ + const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id; \ + type * dst; \ + const uint8_t * py, * pu, * pv; \ + int i; \ + DITHER(uint8_t dithpos = id->base.dither_offset;) \ + \ + dst = (type *)(id->base.rgb_ptr + id->base.rgb_stride * v_offset); \ + py = src[0]; pu = src[1]; pv = src[2]; \ + \ + i = 16; \ + do { \ + int j, U, V, Y; \ + const type * r, * g, * b; \ + DITHER(const uint8_t * const pd = dither + 2 * dithpos;) \ + \ + j = id->base.width; \ + do { \ + RGB (type, 0) \ + DST (py, dst, 0, 0) \ + DST (py, dst, 1, 0) \ + \ + RGB (type, 1) \ + DST (py, dst, 2, 0) \ + DST (py, dst, 3, 0) \ + \ + RGB (type, 2) \ + DST (py, dst, 4, 0) \ + DST (py, dst, 5, 0) \ + \ + RGB (type, 3) \ + DST (py, dst, 6, 0) \ + DST (py, dst, 7, 0) \ + \ + pu += 4; \ + pv += 4; \ + py += 8; \ + dst += 8 * num; \ + } while (--j); \ + py += id->base.y_increm; \ + pu += id->base.uv_increm; \ + pv += id->base.uv_increm; \ + dst = (type *)((char *)dst + id->base.rgb_increm); \ + DITHER(dithpos += id->base.dither_stride;) \ + } while (--i); \ +} + +DECLARE_422 (rgb_c_32_422, uint32_t, 1, DST, SKIP) +DECLARE_422 (rgb_c_24_rgb_422, uint8_t, 3, DSTRGB, SKIP) +DECLARE_422 (rgb_c_24_bgr_422, uint8_t, 3, DSTBGR, SKIP) +DECLARE_422 (rgb_c_16_422, uint16_t, 1, DST, SKIP) +DECLARE_422 (rgb_c_8_422, uint8_t, 1, DSTDITHER, DO) + +#define DECLARE_444(func,type,num,DST,DITHER) \ +static void func (void * _id, uint8_t * const * src, \ + unsigned int v_offset) \ +{ \ + const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id; \ + type * dst; \ + const uint8_t * py, * pu, * pv; \ + int i; \ + DITHER(uint8_t dithpos = id->base.dither_offset;) \ + \ + dst = (type *)(id->base.rgb_ptr + id->base.rgb_stride * v_offset); \ + py = src[0]; pu = src[1]; pv = src[2]; \ + \ + i = 16; \ + do { \ + int j, U, V, Y; \ + const type * r, * g, * b; \ + DITHER(const uint8_t * const pd = dither + 2 * dithpos;) \ + \ + j = id->base.width; \ + do { \ + RGB (type, 0) \ + DST (py, dst, 0, 0) \ + RGB (type, 1) \ + DST (py, dst, 1, 0) \ + RGB (type, 2) \ + DST (py, dst, 2, 0) \ + RGB (type, 3) \ + DST (py, dst, 3, 0) \ + RGB (type, 4) \ + DST (py, dst, 4, 0) \ + RGB (type, 5) \ + DST (py, dst, 5, 0) \ + RGB (type, 6) \ + DST (py, dst, 6, 0) \ + RGB (type, 7) \ + DST (py, dst, 7, 0) \ + \ + pu += 8; \ + pv += 8; \ + py += 8; \ + dst += 8 * num; \ + } while (--j); \ + py += id->base.y_increm; \ + pu += id->base.y_increm; \ + pv += id->base.y_increm; \ + dst = (type *)((char *)dst + id->base.rgb_increm); \ + DITHER(dithpos += id->base.dither_stride;) \ + } while (--i); \ +} + +DECLARE_444 (rgb_c_32_444, uint32_t, 1, DST, SKIP) +DECLARE_444 (rgb_c_24_rgb_444, uint8_t, 3, DSTRGB, SKIP) +DECLARE_444 (rgb_c_24_bgr_444, uint8_t, 3, DSTBGR, SKIP) +DECLARE_444 (rgb_c_16_444, uint16_t, 1, DST, SKIP) +DECLARE_444 (rgb_c_8_444, uint8_t, 1, DSTDITHER, DO) + +static void rgb_start (void * _id, const mpeg2_fbuf_t * fbuf, + const mpeg2_picture_t * picture, + const mpeg2_gop_t * gop) +{ + convert_rgb_t * id = (convert_rgb_t *) _id; + int uv_stride = id->uv_stride_frame; + id->y_stride = id->y_stride_frame; + id->rgb_ptr = fbuf->buf[0]; + id->rgb_slice = id->rgb_stride = id->rgb_stride_frame; + id->dither_stride = 32; + id->dither_offset = dither_temporal[picture->temporal_reference & 63]; + id->field = 0; + if ((picture->nb_fields == 1) || + (id->chroma420 && !(picture->flags & PIC_FLAG_PROGRESSIVE_FRAME))) { + uv_stride <<= 1; + id->y_stride <<= 1; + id->rgb_stride <<= 1; + id->dither_stride <<= 1; + id->dither_offset += 16; + if (picture->nb_fields == 1) { + id->rgb_slice <<= 1; + if (!(picture->flags & PIC_FLAG_TOP_FIELD_FIRST)) { + id->rgb_ptr += id->rgb_stride_frame; + id->dither_offset += 32; + } + } else + id->field = 8 >> id->convert420; + } + id->y_increm = (id->y_stride << id->convert420) - id->y_stride_frame; + id->uv_increm = uv_stride - id->uv_stride_frame; + id->rgb_increm = (id->rgb_stride << id->convert420) - id->rgb_stride_min; + id->dither_stride <<= id->convert420; +} + +static inline int div_round (int dividend, int divisor) +{ + if (dividend > 0) + return (dividend + (divisor>>1)) / divisor; + else + return -((-dividend + (divisor>>1)) / divisor); +} + +static unsigned int rgb_c_init (convert_rgb_c_t * id, + mpeg2convert_rgb_order_t order, + unsigned int bpp) +{ + int i; + uint8_t table_Y[1024]; + uint32_t * table_32 = 0; + uint16_t * table_16 = 0; + uint8_t * table_8 = 0; + uint8_t * table_332 = 0; + int entry_size = 0; + void * table_r = 0; + void * table_g = 0; + void * table_b = 0; + + int crv = Inverse_Table_6_9[matrix_coefficients][0]; + int cbu = Inverse_Table_6_9[matrix_coefficients][1]; + int cgu = -Inverse_Table_6_9[matrix_coefficients][2]; + int cgv = -Inverse_Table_6_9[matrix_coefficients][3]; + + for (i = 0; i < 1024; i++) { + int j; + + j = (76309 * (i - 384 - 16) + 32768) >> 16; + table_Y[i] = (j < 0) ? 0 : ((j > 255) ? 255 : j); + } + + switch (bpp) { + case 32: + if (!id) + return (197 + 2*682 + 256 + 132) * sizeof (uint32_t); + table_32 = (uint32_t *) (id + 1); + entry_size = sizeof (uint32_t); + table_r = table_32 + 197; + table_b = table_32 + 197 + 685; + table_g = table_32 + 197 + 2*682; + + for (i = -197; i < 256+197; i++) + ((uint32_t *) table_r)[i] = + table_Y[i+384] << ((order == MPEG2CONVERT_RGB) ? 16 : 0); + for (i = -132; i < 256+132; i++) + ((uint32_t *) table_g)[i] = table_Y[i+384] << 8; + for (i = -232; i < 256+232; i++) + ((uint32_t *) table_b)[i] = + table_Y[i+384] << ((order == MPEG2CONVERT_RGB) ? 0 : 16); + break; + + case 24: + if (!id) + return (256 + 2*232) * sizeof (uint8_t); + table_8 = (uint8_t *) (id + 1); + entry_size = sizeof (uint8_t); + table_r = table_g = table_b = table_8 + 232; + + for (i = -232; i < 256+232; i++) + ((uint8_t * )table_b)[i] = table_Y[i+384]; + break; + + case 15: + case 16: + if (!id) + return (197 + 2*682 + 256 + 132) * sizeof (uint16_t); + table_16 = (uint16_t *) (id + 1); + entry_size = sizeof (uint16_t); + table_r = table_16 + 197; + table_b = table_16 + 197 + 685; + table_g = table_16 + 197 + 2*682; + + for (i = -197; i < 256+197; i++) { + int j = table_Y[i+384] >> 3; + + if (order == MPEG2CONVERT_RGB) + j <<= ((bpp==16) ? 11 : 10); + + ((uint16_t *)table_r)[i] = j; + } + for (i = -132; i < 256+132; i++) { + int j = table_Y[i+384] >> ((bpp==16) ? 2 : 3); + + ((uint16_t *)table_g)[i] = j << 5; + } + for (i = -232; i < 256+232; i++) { + int j = table_Y[i+384] >> 3; + + if (order == MPEG2CONVERT_BGR) + j <<= ((bpp==16) ? 11 : 10); + + ((uint16_t *)table_b)[i] = j; + } + break; + + case 8: + if (!id) + return (197 + 2*682 + 256 + 232 + 71) * sizeof (uint8_t); + table_332 = (uint8_t *) (id + 1); + entry_size = sizeof (uint8_t); + table_r = table_332 + 197; + table_g = table_332 + 197 + 682 + 30; + table_b = table_332 + 197 + 2*682; + + for (i = -197; i < 256+197+30; i++) + ((uint8_t *)table_r)[i] = ((table_Y[i+384] * 7 / 255) << + (order == MPEG2CONVERT_RGB ? 5 : 0)); + for (i = -132; i < 256+132+30; i++) + ((uint8_t *)table_g)[i-30] = ((table_Y[i+384] * 7 / 255) << + (order == MPEG2CONVERT_RGB ? 2 : 3)); + for (i = -232; i < 256+232+71; i++) + ((uint8_t *)table_b)[i] = ((table_Y[i+384] / 85) << + (order == MPEG2CONVERT_RGB ? 0 : 6)); + break; + } + + for (i = 0; i < 256; i++) { + id->table_rV[i] = (((uint8_t *)table_r) + + entry_size * div_round (crv * (i-128), 76309)); + id->table_gU[i] = (((uint8_t *)table_g) + + entry_size * div_round (cgu * (i-128), 76309)); + id->table_gV[i] = entry_size * div_round (cgv * (i-128), 76309); + id->table_bU[i] = (((uint8_t *)table_b) + + entry_size * div_round (cbu * (i-128), 76309)); + } + + return 0; +} + +static int rgb_internal (mpeg2convert_rgb_order_t order, unsigned int bpp, + int stage, void * _id, const mpeg2_sequence_t * seq, + int stride, uint32_t accel, void * arg, + mpeg2_convert_init_t * result) +{ + convert_rgb_t * id = (convert_rgb_t *) _id; + mpeg2convert_copy_t * copy = (mpeg2convert_copy_t *) 0; + unsigned int id_size = sizeof (convert_rgb_t); + int chroma420 = (seq->chroma_height < seq->height); + int convert420 = 0; + int rgb_stride_min = ((bpp + 7) >> 3) * seq->width; + +#ifdef ARCH_X86 + if (!copy && (accel & MPEG2_ACCEL_X86_MMXEXT)) { + convert420 = 0; + copy = mpeg2convert_rgb_mmxext (order, bpp, seq); + } + if (!copy && (accel & MPEG2_ACCEL_X86_MMX)) { + convert420 = 0; + copy = mpeg2convert_rgb_mmx (order, bpp, seq); + } +#endif +#ifdef ARCH_SPARC + if (!copy && (accel & MPEG2_ACCEL_SPARC_VIS)) { + convert420 = chroma420; + copy = mpeg2convert_rgb_vis (order, bpp, seq); + } +#endif + if (!copy) { + int src, dest; + static void (* rgb_c[3][5]) (void *, uint8_t * const *, + unsigned int) = + {{rgb_c_24_bgr_420, rgb_c_8_420, rgb_c_16_420, + rgb_c_24_rgb_420, rgb_c_32_420}, + {rgb_c_24_bgr_422, rgb_c_8_422, rgb_c_16_422, + rgb_c_24_rgb_422, rgb_c_32_422}, + {rgb_c_24_bgr_444, rgb_c_8_444, rgb_c_16_444, + rgb_c_24_rgb_444, rgb_c_32_444}}; + + convert420 = chroma420; + id_size = (sizeof (convert_rgb_c_t) + + rgb_c_init ((convert_rgb_c_t *) id, order, bpp)); + src = ((seq->chroma_width == seq->width) + + (seq->chroma_height == seq->height)); + dest = ((bpp == 24 && order == MPEG2CONVERT_BGR) ? 0 : (bpp + 7) >> 3); + copy = rgb_c[src][dest]; + } + + result->id_size = id_size; + + if (stride < rgb_stride_min) + stride = rgb_stride_min; + + if (stage == MPEG2_CONVERT_STRIDE) + return stride; + else if (stage == MPEG2_CONVERT_START) { + id->width = seq->width >> 3; + id->y_stride_frame = seq->width; + id->uv_stride_frame = seq->chroma_width; + id->rgb_stride_frame = stride; + id->rgb_stride_min = rgb_stride_min; + id->chroma420 = chroma420; + id->convert420 = convert420; + result->buf_size[0] = stride * seq->height; + result->buf_size[1] = result->buf_size[2] = 0; + result->start = rgb_start; + result->copy = copy; + } + return 0; +} + +#define DECLARE(func,order,bpp) \ +int func (int stage, void * id, \ + const mpeg2_sequence_t * sequence, int stride, \ + uint32_t accel, void * arg, mpeg2_convert_init_t * result) \ +{ \ + return rgb_internal (order, bpp, stage, id, sequence, stride, \ + accel, arg, result); \ +} + +DECLARE (mpeg2convert_rgb32, MPEG2CONVERT_RGB, 32) +DECLARE (mpeg2convert_rgb24, MPEG2CONVERT_RGB, 24) +DECLARE (mpeg2convert_rgb16, MPEG2CONVERT_RGB, 16) +DECLARE (mpeg2convert_rgb15, MPEG2CONVERT_RGB, 15) +DECLARE (mpeg2convert_rgb8, MPEG2CONVERT_RGB, 8) +DECLARE (mpeg2convert_bgr32, MPEG2CONVERT_BGR, 32) +DECLARE (mpeg2convert_bgr24, MPEG2CONVERT_BGR, 24) +DECLARE (mpeg2convert_bgr16, MPEG2CONVERT_BGR, 16) +DECLARE (mpeg2convert_bgr15, MPEG2CONVERT_BGR, 15) +DECLARE (mpeg2convert_bgr8, MPEG2CONVERT_BGR, 8) + +mpeg2_convert_t * mpeg2convert_rgb (mpeg2convert_rgb_order_t order, + unsigned int bpp) +{ + static mpeg2_convert_t * table[5][2] = + {{mpeg2convert_rgb15, mpeg2convert_bgr15}, + {mpeg2convert_rgb8, mpeg2convert_bgr8}, + {mpeg2convert_rgb16, mpeg2convert_bgr16}, + {mpeg2convert_rgb24, mpeg2convert_bgr24}, + {mpeg2convert_rgb32, mpeg2convert_bgr32}}; + + if (order == MPEG2CONVERT_RGB || order == MPEG2CONVERT_BGR) { + if (bpp == 15) + return table[0][order == MPEG2CONVERT_BGR]; + else if (bpp >= 8 && bpp <= 32 && (bpp & 7) == 0) + return table[bpp >> 3][order == MPEG2CONVERT_BGR]; + } + return (mpeg2_convert_t *) 0; +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/rgb_mmx.c b/src/video_dec/libmpeg2new/libmpeg2/rgb_mmx.c new file mode 100644 index 000000000..6ca7e65a8 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/rgb_mmx.c @@ -0,0 +1,321 @@ +/* + * rgb_mmx.c + * Copyright (C) 2000-2003 Silicon Integrated System Corp. + * All Rights Reserved. + * + * Author: Olie Lho + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_X86 + +#include +#include +#include + +#include "mpeg2.h" +#include "mpeg2convert.h" +#include "convert_internal.h" +#include +#include "mmx.h" + +#define CPU_MMXEXT 0 +#define CPU_MMX 1 + +/* CPU_MMXEXT/CPU_MMX adaptation layer */ + +#define movntq(src,dest) \ +do { \ + if (cpu == CPU_MMXEXT) \ + movntq_r2m (src, dest); \ + else \ + movq_r2m (src, dest); \ +} while (0) + +static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv) +{ + static mmx_t mmx_80w = {0x0080008000800080LL}; + static mmx_t mmx_U_green = {0xf37df37df37df37dLL}; + static mmx_t mmx_U_blue = {0x4093409340934093LL}; + static mmx_t mmx_V_red = {0x3312331233123312LL}; + static mmx_t mmx_V_green = {0xe5fce5fce5fce5fcLL}; + static mmx_t mmx_10w = {0x1010101010101010LL}; + static mmx_t mmx_00ffw = {0x00ff00ff00ff00ffLL}; + static mmx_t mmx_Y_coeff = {0x253f253f253f253fLL}; + + movd_m2r (*pu, mm0); /* mm0 = 00 00 00 00 u3 u2 u1 u0 */ + movd_m2r (*pv, mm1); /* mm1 = 00 00 00 00 v3 v2 v1 v0 */ + movq_m2r (*py, mm6); /* mm6 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ + pxor_r2r (mm4, mm4); /* mm4 = 0 */ + /* XXX might do cache preload for image here */ + + /* + * Do the multiply part of the conversion for even and odd pixels + * register usage: + * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels + * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels + * mm6 -> Y even, mm7 -> Y odd + */ + + punpcklbw_r2r (mm4, mm0); /* mm0 = u3 u2 u1 u0 */ + punpcklbw_r2r (mm4, mm1); /* mm1 = v3 v2 v1 v0 */ + psubsw_m2r (mmx_80w, mm0); /* u -= 128 */ + psubsw_m2r (mmx_80w, mm1); /* v -= 128 */ + psllw_i2r (3, mm0); /* promote precision */ + psllw_i2r (3, mm1); /* promote precision */ + movq_r2r (mm0, mm2); /* mm2 = u3 u2 u1 u0 */ + movq_r2r (mm1, mm3); /* mm3 = v3 v2 v1 v0 */ + pmulhw_m2r (mmx_U_green, mm2); /* mm2 = u * u_green */ + pmulhw_m2r (mmx_V_green, mm3); /* mm3 = v * v_green */ + pmulhw_m2r (mmx_U_blue, mm0); /* mm0 = chroma_b */ + pmulhw_m2r (mmx_V_red, mm1); /* mm1 = chroma_r */ + paddsw_r2r (mm3, mm2); /* mm2 = chroma_g */ + + psubusb_m2r (mmx_10w, mm6); /* Y -= 16 */ + movq_r2r (mm6, mm7); /* mm7 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ + pand_m2r (mmx_00ffw, mm6); /* mm6 = Y6 Y4 Y2 Y0 */ + psrlw_i2r (8, mm7); /* mm7 = Y7 Y5 Y3 Y1 */ + psllw_i2r (3, mm6); /* promote precision */ + psllw_i2r (3, mm7); /* promote precision */ + pmulhw_m2r (mmx_Y_coeff, mm6); /* mm6 = luma_rgb even */ + pmulhw_m2r (mmx_Y_coeff, mm7); /* mm7 = luma_rgb odd */ + + /* + * Do the addition part of the conversion for even and odd pixels + * register usage: + * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels + * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels + * mm6 -> Y even, mm7 -> Y odd + */ + + movq_r2r (mm0, mm3); /* mm3 = chroma_b */ + movq_r2r (mm1, mm4); /* mm4 = chroma_r */ + movq_r2r (mm2, mm5); /* mm5 = chroma_g */ + paddsw_r2r (mm6, mm0); /* mm0 = B6 B4 B2 B0 */ + paddsw_r2r (mm7, mm3); /* mm3 = B7 B5 B3 B1 */ + paddsw_r2r (mm6, mm1); /* mm1 = R6 R4 R2 R0 */ + paddsw_r2r (mm7, mm4); /* mm4 = R7 R5 R3 R1 */ + paddsw_r2r (mm6, mm2); /* mm2 = G6 G4 G2 G0 */ + paddsw_r2r (mm7, mm5); /* mm5 = G7 G5 G3 G1 */ + packuswb_r2r (mm0, mm0); /* saturate to 0-255 */ + packuswb_r2r (mm1, mm1); /* saturate to 0-255 */ + packuswb_r2r (mm2, mm2); /* saturate to 0-255 */ + packuswb_r2r (mm3, mm3); /* saturate to 0-255 */ + packuswb_r2r (mm4, mm4); /* saturate to 0-255 */ + packuswb_r2r (mm5, mm5); /* saturate to 0-255 */ + punpcklbw_r2r (mm3, mm0); /* mm0 = B7 B6 B5 B4 B3 B2 B1 B0 */ + punpcklbw_r2r (mm4, mm1); /* mm1 = R7 R6 R5 R4 R3 R2 R1 R0 */ + punpcklbw_r2r (mm5, mm2); /* mm2 = G7 G6 G5 G4 G3 G2 G1 G0 */ +} + +static inline void mmx_unpack_16rgb (uint8_t * image, const int cpu) +{ + static mmx_t mmx_bluemask = {0xf8f8f8f8f8f8f8f8LL}; + static mmx_t mmx_greenmask = {0xfcfcfcfcfcfcfcfcLL}; + static mmx_t mmx_redmask = {0xf8f8f8f8f8f8f8f8LL}; + + /* + * convert RGB plane to RGB 16 bits + * mm0 -> B, mm1 -> R, mm2 -> G + * mm4 -> GB, mm5 -> AR pixel 4-7 + * mm6 -> GB, mm7 -> AR pixel 0-3 + */ + + pand_m2r (mmx_bluemask, mm0); /* mm0 = b7b6b5b4b3______ */ + pand_m2r (mmx_greenmask, mm2); /* mm2 = g7g6g5g4g3g2____ */ + pand_m2r (mmx_redmask, mm1); /* mm1 = r7r6r5r4r3______ */ + psrlq_i2r (3, mm0); /* mm0 = ______b7b6b5b4b3 */ + pxor_r2r (mm4, mm4); /* mm4 = 0 */ + movq_r2r (mm0, mm5); /* mm5 = ______b7b6b5b4b3 */ + movq_r2r (mm2, mm7); /* mm7 = g7g6g5g4g3g2____ */ + + punpcklbw_r2r (mm4, mm2); + punpcklbw_r2r (mm1, mm0); + psllq_i2r (3, mm2); + por_r2r (mm2, mm0); + movntq (mm0, *image); + + punpckhbw_r2r (mm4, mm7); + punpckhbw_r2r (mm1, mm5); + psllq_i2r (3, mm7); + por_r2r (mm7, mm5); + movntq (mm5, *(image+8)); +} + +static inline void mmx_unpack_32rgb (uint8_t * image, const int cpu) +{ + /* + * convert RGB plane to RGB packed format, + * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0, + * mm4 -> GB, mm5 -> AR pixel 4-7, + * mm6 -> GB, mm7 -> AR pixel 0-3 + */ + + pxor_r2r (mm3, mm3); + movq_r2r (mm0, mm6); + movq_r2r (mm1, mm7); + movq_r2r (mm0, mm4); + movq_r2r (mm1, mm5); + punpcklbw_r2r (mm2, mm6); + punpcklbw_r2r (mm3, mm7); + punpcklwd_r2r (mm7, mm6); + movntq (mm6, *image); + movq_r2r (mm0, mm6); + punpcklbw_r2r (mm2, mm6); + punpckhwd_r2r (mm7, mm6); + movntq (mm6, *(image+8)); + punpckhbw_r2r (mm2, mm4); + punpckhbw_r2r (mm3, mm5); + punpcklwd_r2r (mm5, mm4); + movntq (mm4, *(image+16)); + movq_r2r (mm0, mm4); + punpckhbw_r2r (mm2, mm4); + punpckhwd_r2r (mm5, mm4); + movntq (mm4, *(image+24)); +} + +static inline void rgb16 (void * const _id, uint8_t * const * src, + const unsigned int v_offset, const int cpu) +{ + convert_rgb_t * const id = (convert_rgb_t *) _id; + uint8_t * dst; + uint8_t * py, * pu, * pv; + int i, j; + + dst = id->rgb_ptr + id->rgb_slice * v_offset; + py = src[0]; pu = src[1]; pv = src[2]; + + i = 16; + do { + j = id->width; + do { + mmx_yuv2rgb (py, pu, pv); + mmx_unpack_16rgb (dst, cpu); + py += 8; + pu += 4; + pv += 4; + dst += 16; + } while (--j); + + dst += id->rgb_increm; + py += id->y_increm; + if (--i == id->field) { + dst = id->rgb_ptr + id->rgb_slice * (v_offset + 1); + py = src[0] + id->y_stride_frame; + pu = src[1] + id->uv_stride_frame; + pv = src[2] + id->uv_stride_frame; + } else if (! (i & id->chroma420)) { + pu += id->uv_increm; + pv += id->uv_increm; + } else { + pu -= id->uv_stride_frame; + pv -= id->uv_stride_frame; + } + } while (i); +} + +static inline void argb32 (void * const _id, uint8_t * const * src, + const unsigned int v_offset, const int cpu) +{ + convert_rgb_t * const id = (convert_rgb_t *) _id; + uint8_t * dst; + uint8_t * py, * pu, * pv; + int i, j; + + dst = id->rgb_ptr + id->rgb_slice * v_offset; + py = src[0]; pu = src[1]; pv = src[2]; + + i = 16; + do { + j = id->width; + do { + mmx_yuv2rgb (py, pu, pv); + mmx_unpack_32rgb (dst, cpu); + py += 8; + pu += 4; + pv += 4; + dst += 32; + } while (--j); + + dst += id->rgb_increm; + py += id->y_increm; + if (--i == id->field) { + dst = id->rgb_ptr + id->rgb_slice * (v_offset + 1); + py = src[0] + id->y_stride_frame; + pu = src[1] + id->uv_stride_frame; + pv = src[2] + id->uv_stride_frame; + } else if (! (i & id->chroma420)) { + pu += id->uv_increm; + pv += id->uv_increm; + } else { + pu -= id->uv_stride_frame; + pv -= id->uv_stride_frame; + } + } while (i); +} + +static void mmxext_rgb16 (void * id, uint8_t * const * src, + unsigned int v_offset) +{ + rgb16 (id, src, v_offset, CPU_MMXEXT); +} + +static void mmxext_argb32 (void * id, uint8_t * const * src, + unsigned int v_offset) +{ + argb32 (id, src, v_offset, CPU_MMXEXT); +} + +static void mmx_rgb16 (void * id, uint8_t * const * src, unsigned int v_offset) +{ + rgb16 (id, src, v_offset, CPU_MMX); +} + +static void mmx_argb32 (void * id, uint8_t * const * src, + unsigned int v_offset) +{ + argb32 (id, src, v_offset, CPU_MMX); +} + +mpeg2convert_copy_t * mpeg2convert_rgb_mmxext (int order, int bpp, + const mpeg2_sequence_t * seq) +{ + if (order == MPEG2CONVERT_RGB && seq->chroma_width < seq->width) { + if (bpp == 16) + return mmxext_rgb16; + else if (bpp == 32) + return mmxext_argb32; + } + return NULL; /* Fallback to C */ +} + +mpeg2convert_copy_t * mpeg2convert_rgb_mmx (int order, int bpp, + const mpeg2_sequence_t * seq) +{ + if (order == MPEG2CONVERT_RGB && seq->chroma_width < seq->width) { + if (bpp == 16) + return mmx_rgb16; + else if (bpp == 32) + return mmx_argb32; + } + return NULL; /* Fallback to C */ +} +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/rgb_vis.c b/src/video_dec/libmpeg2new/libmpeg2/rgb_vis.c new file mode 100644 index 000000000..cbd7c7072 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/rgb_vis.c @@ -0,0 +1,384 @@ +/* + * rgb_vis.c + * Copyright (C) 2003 David S. Miller + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_SPARC + +#include +#include + +#include "mpeg2.h" +#include "mpeg2convert.h" +#include "convert_internal.h" +#include +#include "vis.h" + +/* Based partially upon the MMX yuv2rgb code, see there for credits. + * + * The difference here is that since we have enough registers we + * process both even and odd scanlines in one pass. + */ + +static const uint16_t const_2048[] ATTR_ALIGN(8) = {2048, 2048, 2048, 2048}; +static const uint16_t const_1024[] ATTR_ALIGN(8) = {1024, 1024, 1024, 1024}; +static const uint16_t const_128[] ATTR_ALIGN(8) = {128, 128, 128, 128}; +static const uint8_t const_Ugreen[] ATTR_ALIGN(8) = + {0xf3, 0x00, 0xf3, 0x00, 0xf3, 0x00, 0xf3, 0x00}; +static const uint8_t const_Vgreen[] ATTR_ALIGN(8) = + {0xe6, 0x00, 0xe6, 0x00, 0xe6, 0x00, 0xe6, 0x00}; +static const uint8_t const_Ublue_Vred[] ATTR_ALIGN(8) = + {0x41, 0x41, 0x41, 0x41, 0x33, 0x33, 0x33, 0x33}; +static const uint8_t const_Ycoeff[] ATTR_ALIGN(4) = {0x25, 0x25, 0x25, 0x25}; + +#define TMP0 0 +#define TMP1 1 +#define TMP2 2 +#define TMP3 3 +#define TMP4 4 +#define TMP5 5 +#define TMP6 6 +#define TMP7 7 +#define TMP8 8 +#define TMP9 9 +#define TMP10 10 +#define TMP11 11 +#define TMP12 12 +#define TMP13 13 + +#define CONST_UBLUE 14 +#define CONST_VRED 15 +#define CONST_2048 16 + +#define BLUE8_EVEN 18 +#define BLUE8_ODD 19 +#define RED8_EVEN 20 +#define RED8_ODD 21 +#define GREEN8_EVEN 22 +#define GREEN8_ODD 23 + +#define BLUE8_2_EVEN 24 +#define BLUE8_2_ODD 25 +#define RED8_2_EVEN 26 +#define RED8_2_ODD 27 +#define GREEN8_2_EVEN 28 +#define GREEN8_2_ODD 29 + +#define CONST_YCOEFF 30 +#define ZEROS 31 + +#define PU_0 32 +#define PU_2 34 +#define PV_0 36 +#define PV_2 38 +#define PY_0 40 +#define PY_2 42 +#define PY_4 44 +#define PY_6 46 + +#define CONST_128 56 +#define CONST_1024 58 +#define CONST_VGREEN 60 +#define CONST_UGREEN 62 + +static inline void vis_init_consts(void) +{ + vis_set_gsr(7 << VIS_GSR_SCALEFACT_SHIFT); + + vis_ld64(const_2048[0], CONST_2048); + vis_ld64(const_1024[0], CONST_1024); + vis_ld64(const_Ugreen[0], CONST_UGREEN); + vis_ld64(const_Vgreen[0], CONST_VGREEN); + vis_fzeros(ZEROS); + vis_ld64(const_Ublue_Vred[0], CONST_UBLUE); + vis_ld32(const_Ycoeff[0], CONST_YCOEFF); + vis_ld64(const_128[0], CONST_128); +} + +static inline void vis_yuv2rgb(uint8_t *py, uint8_t *pu, uint8_t *pv, + int y_stride) +{ + vis_ld32(pu[0], TMP0); + + vis_ld32(pv[0], TMP2); + + vis_ld64(py[0], TMP4); + vis_mul8x16au(TMP0, CONST_2048, PU_0); + + vis_ld64_2(py, y_stride, TMP8); + vis_mul8x16au(TMP2, CONST_2048, PV_0); + + vis_pmerge(TMP4, TMP5, TMP6); + + vis_pmerge(TMP6, TMP7, TMP4); + + vis_pmerge(TMP8, TMP9, TMP10); + + vis_pmerge(TMP10, TMP11, TMP8); + vis_mul8x16au(TMP4, CONST_2048, PY_0); + + vis_psub16(PU_0, CONST_1024, PU_0); + vis_mul8x16au(TMP5, CONST_2048, PY_2); + + vis_psub16(PV_0, CONST_1024, PV_0); + vis_mul8x16au(TMP8, CONST_2048, PY_4); + + vis_psub16(PY_0, CONST_128, PY_0); + vis_mul8x16au(TMP9, CONST_2048, PY_6); + + vis_psub16(PY_2, CONST_128, PY_2); + vis_mul8x16(CONST_YCOEFF, PY_0, PY_0); + + vis_psub16(PY_4, CONST_128, PY_4); + vis_mul8x16(CONST_YCOEFF, PY_2, PY_2); + + vis_psub16(PY_6, CONST_128, PY_6); + vis_mul8x16(CONST_YCOEFF, PY_4, PY_4); + + vis_mul8x16(CONST_YCOEFF, PY_6, PY_6); + + vis_mul8sux16(CONST_UGREEN, PU_0, TMP0); + + vis_mul8sux16(CONST_VGREEN, PV_0, TMP2); + + vis_mul8x16(CONST_UBLUE, PU_0, TMP4); + + vis_mul8x16(CONST_VRED, PV_0, TMP6); + vis_padd16(TMP0, TMP2, TMP10); + + vis_padd16(PY_0, TMP4, TMP0); + + vis_padd16(PY_2, TMP4, TMP2); + vis_pack16(TMP0, BLUE8_EVEN); + + vis_padd16(PY_4, TMP4, TMP0); + vis_pack16(TMP2, BLUE8_ODD); + + vis_padd16(PY_6, TMP4, TMP2); + vis_pack16(TMP0, BLUE8_2_EVEN); + + vis_padd16(PY_0, TMP6, TMP0); + vis_pack16(TMP2, BLUE8_2_ODD); + + vis_padd16(PY_2, TMP6, TMP2); + vis_pack16(TMP0, RED8_EVEN); + + vis_padd16(PY_4, TMP6, TMP0); + vis_pack16(TMP2, RED8_ODD); + + vis_padd16(PY_6, TMP6, TMP2); + vis_pack16(TMP0, RED8_2_EVEN); + + vis_padd16(PY_0, TMP10, TMP0); + vis_pack16(TMP2, RED8_2_ODD); + + vis_padd16(PY_2, TMP10, TMP2); + vis_pack16(TMP0, GREEN8_EVEN); + + vis_padd16(PY_4, TMP10, TMP0); + vis_pack16(TMP2, GREEN8_ODD); + + vis_padd16(PY_6, TMP10, TMP2); + vis_pack16(TMP0, GREEN8_2_EVEN); + + vis_pack16(TMP2, GREEN8_2_ODD); + vis_pmerge(BLUE8_EVEN, BLUE8_ODD, BLUE8_EVEN); + + vis_pmerge(BLUE8_2_EVEN, BLUE8_2_ODD, BLUE8_2_EVEN); + + vis_pmerge(RED8_EVEN, RED8_ODD, RED8_EVEN); + + vis_pmerge(RED8_2_EVEN, RED8_2_ODD, RED8_2_EVEN); + + vis_pmerge(GREEN8_EVEN, GREEN8_ODD, GREEN8_EVEN); + + vis_pmerge(GREEN8_2_EVEN, GREEN8_2_ODD, GREEN8_2_EVEN); +} + +static inline void vis_unpack_32rgb(uint8_t *image, int stride) +{ + vis_pmerge(ZEROS, GREEN8_EVEN, TMP0); + vis_pmerge(RED8_EVEN, BLUE8_EVEN, TMP2); + + vis_pmerge(TMP0, TMP2, TMP4); + vis_st64(TMP4, image[0]); + + vis_pmerge(TMP1, TMP3, TMP6); + vis_st64_2(TMP6, image, 8); + + vis_pmerge(ZEROS, GREEN8_ODD, TMP8); + vis_pmerge(RED8_ODD, BLUE8_ODD, TMP10); + + vis_pmerge(TMP8, TMP10, TMP0); + vis_st64_2(TMP0, image, 16); + + vis_pmerge(TMP9, TMP11, TMP2); + vis_st64_2(TMP2, image, 24); + + image += stride; + + vis_pmerge(ZEROS, GREEN8_2_EVEN, TMP0); + vis_pmerge(RED8_2_EVEN, BLUE8_2_EVEN, TMP2); + + vis_pmerge(TMP0, TMP2, TMP4); + vis_st64(TMP4, image[0]); + + vis_pmerge(TMP1, TMP3, TMP6); + vis_st64_2(TMP6, image, 8); + + vis_pmerge(ZEROS, GREEN8_2_ODD, TMP8); + vis_pmerge(RED8_2_ODD, BLUE8_2_ODD, TMP10); + + vis_pmerge(TMP8, TMP10, TMP0); + vis_st64_2(TMP0, image, 16); + + vis_pmerge(TMP9, TMP11, TMP2); + vis_st64_2(TMP2, image, 24); +} + +static inline void vis_unpack_32bgr(uint8_t *image, int stride) +{ + vis_pmerge(ZEROS, GREEN8_EVEN, TMP0); + vis_pmerge(BLUE8_EVEN, RED8_EVEN, TMP2); + + vis_pmerge(TMP0, TMP2, TMP4); + vis_st64(TMP4, image[0]); + + vis_pmerge(TMP1, TMP3, TMP6); + vis_st64_2(TMP6, image, 8); + + vis_pmerge(ZEROS, GREEN8_ODD, TMP8); + vis_pmerge(BLUE8_ODD, RED8_ODD, TMP10); + + vis_pmerge(TMP8, TMP10, TMP0); + vis_st64_2(TMP0, image, 16); + + vis_pmerge(TMP9, TMP11, TMP2); + vis_st64_2(TMP2, image, 24); + + image += stride; + + vis_pmerge(ZEROS, GREEN8_2_EVEN, TMP0); + vis_pmerge(BLUE8_2_EVEN, RED8_2_EVEN, TMP2); + + vis_pmerge(TMP0, TMP2, TMP4); + vis_st64(TMP4, image[0]); + + vis_pmerge(TMP1, TMP3, TMP6); + vis_st64_2(TMP6, image, 8); + + vis_pmerge(ZEROS, GREEN8_2_ODD, TMP8); + vis_pmerge(BLUE8_2_ODD, RED8_2_ODD, TMP10); + + vis_pmerge(TMP8, TMP10, TMP0); + vis_st64_2(TMP0, image, 16); + + vis_pmerge(TMP9, TMP11, TMP2); + vis_st64_2(TMP2, image, 24); +} + +static inline void vis_yuv420_argb32(uint8_t *image, + uint8_t *py, uint8_t *pu, uint8_t *pv, + int width, int height, int rgb_stride, + int y_stride, int uv_stride) +{ + height >>= 1; + uv_stride -= width >> 1; + do { + int i = width >> 3; + do { + vis_yuv2rgb(py, pu, pv, y_stride); + vis_unpack_32rgb(image, rgb_stride); + py += 8; + pu += 4; + pv += 4; + image += 32; + } while (--i); + + py += (y_stride << 1) - width; + image += (rgb_stride << 1) - 4 * width; + pu += uv_stride; + pv += uv_stride; + } while (--height); +} + +static inline void vis_yuv420_abgr32(uint8_t *image, + uint8_t *py, uint8_t *pu, uint8_t *pv, + int width, int height, int rgb_stride, + int y_stride, int uv_stride) +{ + height >>= 1; + uv_stride -= width >> 1; + do { + int i = width >> 3; + do { + vis_yuv2rgb(py, pu, pv, y_stride); + vis_unpack_32bgr(image, rgb_stride); + py += 8; + pu += 4; + pv += 4; + image += 32; + } while (--i); + + py += (y_stride << 1) - width; + image += (rgb_stride << 1) - 4 * width; + pu += uv_stride; + pv += uv_stride; + } while (--height); +} + +static void vis_argb32(void *_id, uint8_t * const *src, + unsigned int v_offset) +{ + convert_rgb_t *id = (convert_rgb_t *) _id; + + vis_init_consts(); + vis_yuv420_argb32(id->rgb_ptr + id->rgb_stride * v_offset, + src[0], src[1], src[2], id->width, 16, + id->rgb_stride, id->y_stride, id->y_stride >> 1); +} + +static void vis_abgr32(void *_id, uint8_t * const *src, + unsigned int v_offset) +{ + convert_rgb_t *id = (convert_rgb_t *) _id; + + vis_init_consts(); + vis_yuv420_abgr32(id->rgb_ptr + id->rgb_stride * v_offset, + src[0], src[1], src[2], id->width, 16, + id->rgb_stride, id->y_stride, id->y_stride >> 1); +} + +mpeg2convert_copy_t *mpeg2convert_rgb_vis(int order, int bpp, + const mpeg2_sequence_t * seq) +{ + if (bpp == 32 && seq->chroma_height < seq->height) { + if (order == MPEG2CONVERT_RGB) + return vis_argb32; + if (order == MPEG2CONVERT_BGR) + return vis_abgr32; + } + + return NULL; /* Fallback to C */ +} + +#endif /* ARCH_SPARC */ diff --git a/src/video_dec/libmpeg2new/libmpeg2/slice.c b/src/video_dec/libmpeg2new/libmpeg2/slice.c new file mode 100644 index 000000000..ce4508639 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/slice.c @@ -0,0 +1,2058 @@ +/* + * slice.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 2003 Peter Gubanov + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" + +extern mpeg2_mc_t mpeg2_mc; +extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); +extern void (* mpeg2_idct_add) (int last, int16_t * block, + uint8_t * dest, int stride); +extern void (* mpeg2_cpu_state_save) (cpu_state_t * state); +extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state); + +#include "vlc.h" + +static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int macroblock_modes; + const MBtab * tab; + + switch (decoder->coding_type) { + case I_TYPE: + + tab = MB_I + UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if ((! (decoder->frame_pred_frame_dct)) && + (decoder->picture_structure == FRAME_PICTURE)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + + return macroblock_modes; + + case P_TYPE: + + tab = MB_P + UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (decoder->picture_structure != FRAME_PICTURE) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes | MACROBLOCK_MOTION_FORWARD; + } else if (decoder->frame_pred_frame_dct) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT; + return macroblock_modes | MACROBLOCK_MOTION_FORWARD; + } else { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; + DUMPBITS (bit_buf, bits, 2); + } + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes | MACROBLOCK_MOTION_FORWARD; + } + + case B_TYPE: + + tab = MB_B + UBITS (bit_buf, 6); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (decoder->picture_structure != FRAME_PICTURE) { + if (! (macroblock_modes & MACROBLOCK_INTRA)) { + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } else if (decoder->frame_pred_frame_dct) { + /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ + macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT; + return macroblock_modes; + } else { + if (macroblock_modes & MACROBLOCK_INTRA) + goto intra; + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; + DUMPBITS (bit_buf, bits, 2); + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + intra: + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + case D_TYPE: + + DUMPBITS (bit_buf, bits, 1); + return MACROBLOCK_INTRA; + + default: + return 0; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline void get_quantizer_scale (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + + int quantizer_scale_code; + + quantizer_scale_code = UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, 5); + + decoder->quantizer_matrix[0] = + decoder->quantizer_prescale[0][quantizer_scale_code]; + decoder->quantizer_matrix[1] = + decoder->quantizer_prescale[1][quantizer_scale_code]; + decoder->quantizer_matrix[2] = + decoder->chroma_quantizer[0][quantizer_scale_code]; + decoder->quantizer_matrix[3] = + decoder->chroma_quantizer[1][quantizer_scale_code]; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_motion_delta (mpeg2_decoder_t * const decoder, + const int f_code) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + + int delta; + int sign; + const MVtab * tab; + + if (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 1); + return 0; + } else if (bit_buf >= 0x0c000000) { + + tab = MV_4 + UBITS (bit_buf, 4); + delta = (tab->delta << f_code) + 1; + bits += tab->len + f_code + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) + delta += UBITS (bit_buf, f_code); + bit_buf <<= f_code; + + return (delta ^ sign) - sign; + + } else { + + tab = MV_10 + UBITS (bit_buf, 10); + delta = (tab->delta << f_code) + 1; + bits += tab->len + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) { + NEEDBITS (bit_buf, bits, bit_ptr); + delta += UBITS (bit_buf, f_code); + DUMPBITS (bit_buf, bits, f_code); + } + + return (delta ^ sign) - sign; + + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int bound_motion_vector (const int vector, const int f_code) +{ + return ((int32_t)vector << (27 - f_code)) >> (27 - f_code); +} + +static inline int get_dmv (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + + const DMVtab * tab; + + tab = DMV_2 + UBITS (bit_buf, 2); + DUMPBITS (bit_buf, bits, tab->len); + return tab->dmv; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_coded_block_pattern (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + + const CBPtab * tab; + + NEEDBITS (bit_buf, bits, bit_ptr); + + if (bit_buf >= 0x20000000) { + + tab = CBP_7 + (UBITS (bit_buf, 7) - 16); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + + } else { + + tab = CBP_9 + UBITS (bit_buf, 9); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + } + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_luma_dc_dct_diff (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_lum_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff << decoder->intra_dc_precision; + } else { + DUMPBITS (bit_buf, bits, 3); + return 0; + } + } else { + tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff << decoder->intra_dc_precision; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_chroma_dc_dct_diff (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_chrom_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff << decoder->intra_dc_precision; + } else { + DUMPBITS (bit_buf, bits, 2); + return 0; + } + } else { + tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len + 1); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff << decoder->intra_dc_precision; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +#define SATURATE(val) \ +do { \ + val <<= 4; \ + if (unlikely (val != (int16_t) val)) \ + val = (SBITS (val, 1) ^ 2047) << 4; \ +} while (0) + +static void get_intra_block_B14 (mpeg2_decoder_t * const decoder, + const uint16_t * const quant_matrix) +{ + int i; + int j; + int val; + const uint8_t * const scan = decoder->scan; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + const uint8_t * bit_ptr; + int16_t * const dest = decoder->DCTblock; + + i = 0; + mismatch = ~dest[0]; + + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quant_matrix[j]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 16; + DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; +} + +static void get_intra_block_B15 (mpeg2_decoder_t * const decoder, + const uint16_t * const quant_matrix) +{ + int i; + int j; + int val; + const uint8_t * const scan = decoder->scan; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + const uint8_t * bit_ptr; + int16_t * const dest = decoder->DCTblock; + + i = 0; + mismatch = ~dest[0]; + + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x04000000) { + + tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) { + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quant_matrix[j]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else { + + /* end of block. I commented out this code because if we */ + /* dont exit here we will still exit at the later test :) */ + + /* if (i >= 128) break; */ /* end of block */ + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check against buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + } else if (bit_buf >= 0x02000000) { + tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 16; + DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; +} + +static int get_non_intra_block (mpeg2_decoder_t * const decoder, + const uint16_t * const quant_matrix) +{ + int i; + int j; + int val; + const uint8_t * const scan = decoder->scan; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + const uint8_t * bit_ptr; + int16_t * const dest = decoder->DCTblock; + + i = -1; + mismatch = -1; + + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1; + val = (val * quant_matrix[j]) / 32; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 16; + DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; + return i; +} + +static void get_mpeg1_intra_block (mpeg2_decoder_t * const decoder) +{ + int i; + int j; + int val; + const uint8_t * const scan = decoder->scan; + const uint16_t * const quant_matrix = decoder->quantizer_matrix[0]; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + const uint8_t * bit_ptr; + int16_t * const dest = decoder->DCTblock; + + i = 0; + + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quant_matrix[j]) >> 4; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = (val * quant_matrix[j]) / 16; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; +} + +static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder) +{ + int i; + int j; + int val; + const uint8_t * const scan = decoder->scan; + const uint16_t * const quant_matrix = decoder->quantizer_matrix[1]; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + const uint8_t * bit_ptr; + int16_t * const dest = decoder->DCTblock; + + i = -1; + + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = 2 * (val + SBITS (val, 1)) + 1; + val = (val * quant_matrix[j]) / 32; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; + return i; +} + +static inline void slice_intra_DCT (mpeg2_decoder_t * const decoder, + const int cc, + uint8_t * const dest, const int stride) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + NEEDBITS (bit_buf, bits, bit_ptr); + /* Get the intra DC coefficient and inverse quantize it */ + if (cc == 0) + decoder->DCTblock[0] = + decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder); + else + decoder->DCTblock[0] = + decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder); + + if (decoder->mpeg1) { + if (decoder->coding_type != D_TYPE) + get_mpeg1_intra_block (decoder); + } else if (decoder->intra_vlc_format) + get_intra_block_B15 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]); + else + get_intra_block_B14 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]); + mpeg2_idct_copy (decoder->DCTblock, dest, stride); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder, + const int cc, + uint8_t * const dest, const int stride) +{ + int last; + + if (decoder->mpeg1) + last = get_mpeg1_non_intra_block (decoder); + else + last = get_non_intra_block (decoder, + decoder->quantizer_matrix[cc ? 3 : 1]); + mpeg2_idct_add (last, decoder->DCTblock, dest, stride); +} + +#define MOTION_420(table,ref,motion_x,motion_y,size,y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y_ ## size)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \ + motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ + ref[0] + (pos_x >> 1) + (pos_y >> 1) * decoder->stride, \ + decoder->stride, size); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + offset = (((decoder->offset + motion_x) >> 1) + \ + ((((decoder->v_offset + motion_y) >> 1) + y/2) * \ + decoder->uv_stride)); \ + table[4+xy_half] (decoder->dest[1] + y/2 * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + decoder->uv_stride, size/2); \ + table[4+xy_half] (decoder->dest[2] + y/2 * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + decoder->uv_stride, size/2) + +#define MOTION_FIELD_420(table,ref,motion_x,motion_y,dest_field,op,src_field) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ + decoder->offset, \ + (ref[0] + (pos_x >> 1) + \ + ((pos_y op) + src_field) * decoder->stride), \ + 2 * decoder->stride, 8); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + offset = (((decoder->offset + motion_x) >> 1) + \ + (((decoder->v_offset >> 1) + (motion_y op) + src_field) * \ + decoder->uv_stride)); \ + table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + 2 * decoder->uv_stride, 4) + +#define MOTION_DMV_420(table,ref,motion_x,motion_y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \ + ref[0] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + offset = (((decoder->offset + motion_x) >> 1) + \ + (((decoder->v_offset >> 1) + (motion_y & ~1)) * \ + decoder->uv_stride)); \ + table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[1] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[1] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[2] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[2] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 4) + +#define MOTION_ZERO_420(table,ref) \ + table[0] (decoder->dest[0] + decoder->offset, \ + (ref[0] + decoder->offset + \ + decoder->v_offset * decoder->stride), decoder->stride, 16); \ + offset = ((decoder->offset >> 1) + \ + (decoder->v_offset >> 1) * decoder->uv_stride); \ + table[4] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, decoder->uv_stride, 8); \ + table[4] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, decoder->uv_stride, 8) + +#define MOTION_422(table,ref,motion_x,motion_y,size,y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y_ ## size)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \ + motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ + ref[0] + offset, decoder->stride, size); \ + offset = (offset + (motion_x & (motion_x < 0))) >> 1; \ + motion_x /= 2; \ + xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (decoder->dest[1] + y * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + decoder->uv_stride, size); \ + table[4+xy_half] (decoder->dest[2] + y * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + decoder->uv_stride, size) + +#define MOTION_FIELD_422(table,ref,motion_x,motion_y,dest_field,op,src_field) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ + decoder->offset, ref[0] + offset, \ + 2 * decoder->stride, 8); \ + offset = (offset + (motion_x & (motion_x < 0))) >> 1; \ + motion_x /= 2; \ + xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + 2 * decoder->uv_stride, 8) + +#define MOTION_DMV_422(table,ref,motion_x,motion_y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \ + ref[0] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + offset = (offset + (motion_x & (motion_x < 0))) >> 1; \ + motion_x /= 2; \ + xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[1] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[1] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[2] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[2] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 8) + +#define MOTION_ZERO_422(table,ref) \ + offset = decoder->offset + decoder->v_offset * decoder->stride; \ + table[0] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, decoder->stride, 16); \ + offset >>= 1; \ + table[4] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, decoder->uv_stride, 16); \ + table[4] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, decoder->uv_stride, 16) + +#define MOTION_444(table,ref,motion_x,motion_y,size,y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y_ ## size)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \ + motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ + ref[0] + offset, decoder->stride, size); \ + table[xy_half] (decoder->dest[1] + y * decoder->stride + decoder->offset, \ + ref[1] + offset, decoder->stride, size); \ + table[xy_half] (decoder->dest[2] + y * decoder->stride + decoder->offset, \ + ref[2] + offset, decoder->stride, size) + +#define MOTION_FIELD_444(table,ref,motion_x,motion_y,dest_field,op,src_field) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ + decoder->offset, ref[0] + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[1] + dest_field * decoder->stride + \ + decoder->offset, ref[1] + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[2] + dest_field * decoder->stride + \ + decoder->offset, ref[2] + offset, \ + 2 * decoder->stride, 8) + +#define MOTION_DMV_444(table,ref,motion_x,motion_y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \ + ref[0] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[1] + decoder->offset, \ + ref[1] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[1] + decoder->stride + decoder->offset, \ + ref[1] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[2] + decoder->offset, \ + ref[2] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[2] + decoder->stride + decoder->offset, \ + ref[2] + decoder->stride + offset, \ + 2 * decoder->stride, 8) + +#define MOTION_ZERO_444(table,ref) \ + offset = decoder->offset + decoder->v_offset * decoder->stride; \ + table[0] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, decoder->stride, 16); \ + table[4] (decoder->dest[1] + decoder->offset, \ + ref[1] + offset, decoder->stride, 16); \ + table[4] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, decoder->stride, 16) + +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + +static void motion_mp1 (mpeg2_decoder_t * const decoder, + motion_t * const motion, + mpeg2_mc_fct * const * const table) +{ + int motion_x, motion_y; + unsigned int pos_x, pos_y, xy_half, offset; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = (motion->pmv[0][0] + + (get_motion_delta (decoder, + motion->f_code[0]) << motion->f_code[1])); + motion_x = bound_motion_vector (motion_x, + motion->f_code[0] + motion->f_code[1]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[0][1] + + (get_motion_delta (decoder, + motion->f_code[0]) << motion->f_code[1])); + motion_y = bound_motion_vector (motion_y, + motion->f_code[0] + motion->f_code[1]); + motion->pmv[0][1] = motion_y; + + MOTION_420 (table, motion->ref[0], motion_x, motion_y, 16, 0); +} + +#define MOTION_FUNCTIONS(FORMAT,MOTION,MOTION_FIELD,MOTION_DMV,MOTION_ZERO) \ + \ +static void motion_fr_frame_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \ + \ + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); \ +} \ + \ +static void motion_fr_field_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y, field; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + field = UBITS (bit_buf, 1); \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = ((motion->pmv[0][1] >> 1) + \ + get_motion_delta (decoder, motion->f_code[1])); \ + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \ + motion->pmv[0][1] = motion_y << 1; \ + \ + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + field = UBITS (bit_buf, 1); \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[1][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = ((motion->pmv[1][1] >> 1) + \ + get_motion_delta (decoder, motion->f_code[1])); \ + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \ + motion->pmv[1][1] = motion_y << 1; \ + \ + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); \ +} \ + \ +static void motion_fr_dmv_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + dmv_x = get_dmv (decoder); \ + \ + motion_y = ((motion->pmv[0][1] >> 1) + \ + get_motion_delta (decoder, motion->f_code[1])); \ + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; \ + dmv_y = get_dmv (decoder); \ + \ + m = decoder->top_field_first ? 1 : 3; \ + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; \ + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1; \ + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); \ + \ + m = decoder->top_field_first ? 3 : 1; \ + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; \ + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1; \ + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0);\ + \ + MOTION_DMV (mpeg2_mc.avg, motion->ref[0], motion_x, motion_y); \ +} \ + \ +static void motion_reuse_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + motion_x = motion->pmv[0][0]; \ + motion_y = motion->pmv[0][1]; \ + \ + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); \ +} \ + \ +static void motion_zero_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + unsigned int offset; \ + \ + motion->pmv[0][0] = motion->pmv[0][1] = 0; \ + motion->pmv[1][0] = motion->pmv[1][1] = 0; \ + \ + MOTION_ZERO (table, motion->ref[0]); \ +} \ + \ +static void motion_fi_field_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + uint8_t ** ref_field; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + ref_field = motion->ref2[UBITS (bit_buf, 1)]; \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \ + \ + MOTION (table, ref_field, motion_x, motion_y, 16, 0); \ +} \ + \ +static void motion_fi_16x8_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + uint8_t ** ref_field; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + ref_field = motion->ref2[UBITS (bit_buf, 1)]; \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[0][1] = motion_y; \ + \ + MOTION (table, ref_field, motion_x, motion_y, 8, 0); \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + ref_field = motion->ref2[UBITS (bit_buf, 1)]; \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[1][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[1][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion_y; \ + \ + MOTION (table, ref_field, motion_x, motion_y, 8, 8); \ +} \ + \ +static void motion_fi_dmv_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y, other_x, other_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (decoder); \ + \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \ + other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (decoder) + \ + decoder->dmv_offset); \ + \ + MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0); \ + MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0); \ +} \ + +MOTION_FUNCTIONS (420, MOTION_420, MOTION_FIELD_420, MOTION_DMV_420, + MOTION_ZERO_420) +MOTION_FUNCTIONS (422, MOTION_422, MOTION_FIELD_422, MOTION_DMV_422, + MOTION_ZERO_422) +MOTION_FUNCTIONS (444, MOTION_444, MOTION_FIELD_444, MOTION_DMV_444, + MOTION_ZERO_444) + +/* like motion_frame, but parsing without actual motion compensation */ +static void motion_fr_conceal (mpeg2_decoder_t * const decoder) +{ + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (decoder->f_motion.pmv[0][0] + + get_motion_delta (decoder, decoder->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]); + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (decoder->f_motion.pmv[0][1] + + get_motion_delta (decoder, decoder->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]); + decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +} + +static void motion_fi_conceal (mpeg2_decoder_t * const decoder) +{ + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); /* remove field_select */ + + tmp = (decoder->f_motion.pmv[0][0] + + get_motion_delta (decoder, decoder->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]); + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (decoder->f_motion.pmv[0][1] + + get_motion_delta (decoder, decoder->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]); + decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +} + +#undef bit_buf +#undef bits +#undef bit_ptr + +#define MOTION_CALL(routine,direction) \ +do { \ + if ((direction) & MACROBLOCK_MOTION_FORWARD) \ + routine (decoder, &(decoder->f_motion), mpeg2_mc.put); \ + if ((direction) & MACROBLOCK_MOTION_BACKWARD) \ + routine (decoder, &(decoder->b_motion), \ + ((direction) & MACROBLOCK_MOTION_FORWARD ? \ + mpeg2_mc.avg : mpeg2_mc.put)); \ +} while (0) + +#define NEXT_MACROBLOCK \ +do { \ + decoder->offset += 16; \ + if (decoder->offset == decoder->width) { \ + do { /* just so we can use the break statement */ \ + if (decoder->convert) { \ + decoder->convert (decoder->convert_id, decoder->dest, \ + decoder->v_offset); \ + if (decoder->coding_type == B_TYPE) \ + break; \ + } \ + decoder->dest[0] += decoder->slice_stride; \ + decoder->dest[1] += decoder->slice_uv_stride; \ + decoder->dest[2] += decoder->slice_uv_stride; \ + } while (0); \ + decoder->v_offset += 16; \ + if (decoder->v_offset > decoder->limit_y) { \ + if (mpeg2_cpu_state_restore) \ + mpeg2_cpu_state_restore (&cpu_state); \ + return; \ + } \ + decoder->offset = 0; \ + } \ +} while (0) + +void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], + uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]) +{ + int offset, stride, height, bottom_field; + + stride = decoder->stride_frame; + bottom_field = (decoder->picture_structure == BOTTOM_FIELD); + offset = bottom_field ? stride : 0; + height = decoder->height; + + decoder->picture_dest[0] = current_fbuf[0] + offset; + decoder->picture_dest[1] = current_fbuf[1] + (offset >> 1); + decoder->picture_dest[2] = current_fbuf[2] + (offset >> 1); + + decoder->f_motion.ref[0][0] = forward_fbuf[0] + offset; + decoder->f_motion.ref[0][1] = forward_fbuf[1] + (offset >> 1); + decoder->f_motion.ref[0][2] = forward_fbuf[2] + (offset >> 1); + + decoder->b_motion.ref[0][0] = backward_fbuf[0] + offset; + decoder->b_motion.ref[0][1] = backward_fbuf[1] + (offset >> 1); + decoder->b_motion.ref[0][2] = backward_fbuf[2] + (offset >> 1); + + if (decoder->picture_structure != FRAME_PICTURE) { + decoder->dmv_offset = bottom_field ? 1 : -1; + decoder->f_motion.ref2[0] = decoder->f_motion.ref[bottom_field]; + decoder->f_motion.ref2[1] = decoder->f_motion.ref[!bottom_field]; + decoder->b_motion.ref2[0] = decoder->b_motion.ref[bottom_field]; + decoder->b_motion.ref2[1] = decoder->b_motion.ref[!bottom_field]; + offset = stride - offset; + + if (decoder->second_field && (decoder->coding_type != B_TYPE)) + forward_fbuf = current_fbuf; + + decoder->f_motion.ref[1][0] = forward_fbuf[0] + offset; + decoder->f_motion.ref[1][1] = forward_fbuf[1] + (offset >> 1); + decoder->f_motion.ref[1][2] = forward_fbuf[2] + (offset >> 1); + + decoder->b_motion.ref[1][0] = backward_fbuf[0] + offset; + decoder->b_motion.ref[1][1] = backward_fbuf[1] + (offset >> 1); + decoder->b_motion.ref[1][2] = backward_fbuf[2] + (offset >> 1); + + stride <<= 1; + height >>= 1; + } + + decoder->stride = stride; + decoder->uv_stride = stride >> 1; + decoder->slice_stride = 16 * stride; + decoder->slice_uv_stride = + decoder->slice_stride >> (2 - decoder->chroma_format); + decoder->limit_x = 2 * decoder->width - 32; + decoder->limit_y_16 = 2 * height - 32; + decoder->limit_y_8 = 2 * height - 16; + decoder->limit_y = height - 16; + + if (decoder->mpeg1) { + decoder->motion_parser[0] = motion_zero_420; + decoder->motion_parser[MC_FRAME] = motion_mp1; + decoder->motion_parser[4] = motion_reuse_420; + } else if (decoder->picture_structure == FRAME_PICTURE) { + if (decoder->chroma_format == 0) { + decoder->motion_parser[0] = motion_zero_420; + decoder->motion_parser[MC_FIELD] = motion_fr_field_420; + decoder->motion_parser[MC_FRAME] = motion_fr_frame_420; + decoder->motion_parser[MC_DMV] = motion_fr_dmv_420; + decoder->motion_parser[4] = motion_reuse_420; + } else if (decoder->chroma_format == 1) { + decoder->motion_parser[0] = motion_zero_422; + decoder->motion_parser[MC_FIELD] = motion_fr_field_422; + decoder->motion_parser[MC_FRAME] = motion_fr_frame_422; + decoder->motion_parser[MC_DMV] = motion_fr_dmv_422; + decoder->motion_parser[4] = motion_reuse_422; + } else { + decoder->motion_parser[0] = motion_zero_444; + decoder->motion_parser[MC_FIELD] = motion_fr_field_444; + decoder->motion_parser[MC_FRAME] = motion_fr_frame_444; + decoder->motion_parser[MC_DMV] = motion_fr_dmv_444; + decoder->motion_parser[4] = motion_reuse_444; + } + } else { + if (decoder->chroma_format == 0) { + decoder->motion_parser[0] = motion_zero_420; + decoder->motion_parser[MC_FIELD] = motion_fi_field_420; + decoder->motion_parser[MC_16X8] = motion_fi_16x8_420; + decoder->motion_parser[MC_DMV] = motion_fi_dmv_420; + decoder->motion_parser[4] = motion_reuse_420; + } else if (decoder->chroma_format == 1) { + decoder->motion_parser[0] = motion_zero_422; + decoder->motion_parser[MC_FIELD] = motion_fi_field_422; + decoder->motion_parser[MC_16X8] = motion_fi_16x8_422; + decoder->motion_parser[MC_DMV] = motion_fi_dmv_422; + decoder->motion_parser[4] = motion_reuse_422; + } else { + decoder->motion_parser[0] = motion_zero_444; + decoder->motion_parser[MC_FIELD] = motion_fi_field_444; + decoder->motion_parser[MC_16X8] = motion_fi_16x8_444; + decoder->motion_parser[MC_DMV] = motion_fi_dmv_444; + decoder->motion_parser[4] = motion_reuse_444; + } + } +} + +static inline int slice_init (mpeg2_decoder_t * const decoder, int code) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int offset; + const MBAtab * mba; + + decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = + decoder->dc_dct_pred[2] = 16384; + + decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; + decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0; + decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0; + + if (decoder->vertical_position_extension) { + code += UBITS (bit_buf, 3) << 7; + DUMPBITS (bit_buf, bits, 3); + } + decoder->v_offset = (code - 1) * 16; + offset = 0; + if (!(decoder->convert) || decoder->coding_type != B_TYPE) + offset = (code - 1) * decoder->slice_stride; + + decoder->dest[0] = decoder->picture_dest[0] + offset; + offset >>= (2 - decoder->chroma_format); + decoder->dest[1] = decoder->picture_dest[1] + offset; + decoder->dest[2] = decoder->picture_dest[2] + offset; + + get_quantizer_scale (decoder); + + /* ignore intra_slice and all the extra data */ + while (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 9); + NEEDBITS (bit_buf, bits, bit_ptr); + } + + /* decode initial macroblock address increment */ + offset = 0; + while (1) { + if (bit_buf >= 0x08000000) { + mba = MBA_5 + (UBITS (bit_buf, 6) - 2); + break; + } else if (bit_buf >= 0x01800000) { + mba = MBA_11 + (UBITS (bit_buf, 12) - 24); + break; + } else switch (UBITS (bit_buf, 12)) { + case 8: /* macroblock_escape */ + offset += 33; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + case 15: /* macroblock_stuffing (MPEG1 only) */ + bit_buf &= 0xfffff; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* error */ + return 1; + } + } + DUMPBITS (bit_buf, bits, mba->len + 1); + decoder->offset = (offset + mba->mba) << 4; + + while (decoder->offset - decoder->width >= 0) { + decoder->offset -= decoder->width; + if (!(decoder->convert) || decoder->coding_type != B_TYPE) { + decoder->dest[0] += decoder->slice_stride; + decoder->dest[1] += decoder->slice_uv_stride; + decoder->dest[2] += decoder->slice_uv_stride; + } + decoder->v_offset += 16; + } + if (decoder->v_offset > decoder->limit_y) + return 1; + + return 0; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +void mpeg2_slice (mpeg2_decoder_t * const decoder, const int code, + const uint8_t * const buffer) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + cpu_state_t cpu_state; + + bitstream_init (decoder, buffer); + + if (slice_init (decoder, code)) + return; + + if (mpeg2_cpu_state_save) + mpeg2_cpu_state_save (&cpu_state); + + while (1) { + int macroblock_modes; + int mba_inc; + const MBAtab * mba; + + NEEDBITS (bit_buf, bits, bit_ptr); + + macroblock_modes = get_macroblock_modes (decoder); + + /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */ + if (macroblock_modes & MACROBLOCK_QUANT) + get_quantizer_scale (decoder); + + if (macroblock_modes & MACROBLOCK_INTRA) { + + int DCT_offset, DCT_stride; + int offset; + uint8_t * dest_y; + + if (decoder->concealment_motion_vectors) { + if (decoder->picture_structure == FRAME_PICTURE) + motion_fr_conceal (decoder); + else + motion_fi_conceal (decoder); + } else { + decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; + decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0; + decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0; + } + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = decoder->stride; + DCT_stride = decoder->stride * 2; + } else { + DCT_offset = decoder->stride * 8; + DCT_stride = decoder->stride; + } + + offset = decoder->offset; + dest_y = decoder->dest[0] + offset; + slice_intra_DCT (decoder, 0, dest_y, DCT_stride); + slice_intra_DCT (decoder, 0, dest_y + 8, DCT_stride); + slice_intra_DCT (decoder, 0, dest_y + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 0, dest_y + DCT_offset + 8, DCT_stride); + if (likely (decoder->chroma_format == 0)) { + slice_intra_DCT (decoder, 1, decoder->dest[1] + (offset >> 1), + decoder->uv_stride); + slice_intra_DCT (decoder, 2, decoder->dest[2] + (offset >> 1), + decoder->uv_stride); + if (decoder->coding_type == D_TYPE) { + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); + } + } else if (likely (decoder->chroma_format == 1)) { + uint8_t * dest_u = decoder->dest[1] + (offset >> 1); + uint8_t * dest_v = decoder->dest[2] + (offset >> 1); + DCT_stride >>= 1; + DCT_offset >>= 1; + slice_intra_DCT (decoder, 1, dest_u, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride); + } else { + uint8_t * dest_u = decoder->dest[1] + offset; + uint8_t * dest_v = decoder->dest[2] + offset; + slice_intra_DCT (decoder, 1, dest_u, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + 8, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + 8, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + DCT_offset + 8, + DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + DCT_offset + 8, + DCT_stride); + } + } else { + + motion_parser_t * parser; + + parser = + decoder->motion_parser[macroblock_modes >> MOTION_TYPE_SHIFT]; + MOTION_CALL (parser, macroblock_modes); + + if (macroblock_modes & MACROBLOCK_PATTERN) { + int coded_block_pattern; + int DCT_offset, DCT_stride; + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = decoder->stride; + DCT_stride = decoder->stride * 2; + } else { + DCT_offset = decoder->stride * 8; + DCT_stride = decoder->stride; + } + + coded_block_pattern = get_coded_block_pattern (decoder); + + if (likely (decoder->chroma_format == 0)) { + int offset = decoder->offset; + uint8_t * dest_y = decoder->dest[0] + offset; + if (coded_block_pattern & 1) + slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride); + if (coded_block_pattern & 2) + slice_non_intra_DCT (decoder, 0, dest_y + 8, + DCT_stride); + if (coded_block_pattern & 4) + slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset, + DCT_stride); + if (coded_block_pattern & 8) + slice_non_intra_DCT (decoder, 0, + dest_y + DCT_offset + 8, + DCT_stride); + if (coded_block_pattern & 16) + slice_non_intra_DCT (decoder, 1, + decoder->dest[1] + (offset >> 1), + decoder->uv_stride); + if (coded_block_pattern & 32) + slice_non_intra_DCT (decoder, 2, + decoder->dest[2] + (offset >> 1), + decoder->uv_stride); + } else if (likely (decoder->chroma_format == 1)) { + int offset; + uint8_t * dest_y; + + coded_block_pattern |= bit_buf & (3 << 30); + DUMPBITS (bit_buf, bits, 2); + + offset = decoder->offset; + dest_y = decoder->dest[0] + offset; + if (coded_block_pattern & 1) + slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride); + if (coded_block_pattern & 2) + slice_non_intra_DCT (decoder, 0, dest_y + 8, + DCT_stride); + if (coded_block_pattern & 4) + slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset, + DCT_stride); + if (coded_block_pattern & 8) + slice_non_intra_DCT (decoder, 0, + dest_y + DCT_offset + 8, + DCT_stride); + + DCT_stride >>= 1; + DCT_offset = (DCT_offset + offset) >> 1; + if (coded_block_pattern & 16) + slice_non_intra_DCT (decoder, 1, + decoder->dest[1] + (offset >> 1), + DCT_stride); + if (coded_block_pattern & 32) + slice_non_intra_DCT (decoder, 2, + decoder->dest[2] + (offset >> 1), + DCT_stride); + if (coded_block_pattern & (2 << 30)) + slice_non_intra_DCT (decoder, 1, + decoder->dest[1] + DCT_offset, + DCT_stride); + if (coded_block_pattern & (1 << 30)) + slice_non_intra_DCT (decoder, 2, + decoder->dest[2] + DCT_offset, + DCT_stride); + } else { + int offset; + uint8_t * dest_y, * dest_u, * dest_v; + + coded_block_pattern |= bit_buf & (63 << 26); + DUMPBITS (bit_buf, bits, 6); + + offset = decoder->offset; + dest_y = decoder->dest[0] + offset; + dest_u = decoder->dest[1] + offset; + dest_v = decoder->dest[2] + offset; + + if (coded_block_pattern & 1) + slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride); + if (coded_block_pattern & 2) + slice_non_intra_DCT (decoder, 0, dest_y + 8, + DCT_stride); + if (coded_block_pattern & 4) + slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset, + DCT_stride); + if (coded_block_pattern & 8) + slice_non_intra_DCT (decoder, 0, + dest_y + DCT_offset + 8, + DCT_stride); + + if (coded_block_pattern & 16) + slice_non_intra_DCT (decoder, 1, dest_u, DCT_stride); + if (coded_block_pattern & 32) + slice_non_intra_DCT (decoder, 2, dest_v, DCT_stride); + if (coded_block_pattern & (32 << 26)) + slice_non_intra_DCT (decoder, 1, dest_u + DCT_offset, + DCT_stride); + if (coded_block_pattern & (16 << 26)) + slice_non_intra_DCT (decoder, 2, dest_v + DCT_offset, + DCT_stride); + if (coded_block_pattern & (8 << 26)) + slice_non_intra_DCT (decoder, 1, dest_u + 8, + DCT_stride); + if (coded_block_pattern & (4 << 26)) + slice_non_intra_DCT (decoder, 2, dest_v + 8, + DCT_stride); + if (coded_block_pattern & (2 << 26)) + slice_non_intra_DCT (decoder, 1, + dest_u + DCT_offset + 8, + DCT_stride); + if (coded_block_pattern & (1 << 26)) + slice_non_intra_DCT (decoder, 2, + dest_v + DCT_offset + 8, + DCT_stride); + } + } + + decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = + decoder->dc_dct_pred[2] = 16384; + } + + NEXT_MACROBLOCK; + + NEEDBITS (bit_buf, bits, bit_ptr); + mba_inc = 0; + while (1) { + if (bit_buf >= 0x10000000) { + mba = MBA_5 + (UBITS (bit_buf, 5) - 2); + break; + } else if (bit_buf >= 0x03000000) { + mba = MBA_11 + (UBITS (bit_buf, 11) - 24); + break; + } else switch (UBITS (bit_buf, 11)) { + case 8: /* macroblock_escape */ + mba_inc += 33; + /* pass through */ + case 15: /* macroblock_stuffing (MPEG1 only) */ + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* end of slice, or error */ + if (mpeg2_cpu_state_restore) + mpeg2_cpu_state_restore (&cpu_state); + return; + } + } + DUMPBITS (bit_buf, bits, mba->len); + mba_inc += mba->mba; + + if (mba_inc) { + decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = + decoder->dc_dct_pred[2] = 16384; + + if (decoder->coding_type == P_TYPE) { + do { + MOTION_CALL (decoder->motion_parser[0], + MACROBLOCK_MOTION_FORWARD); + NEXT_MACROBLOCK; + } while (--mba_inc); + } else { + do { + MOTION_CALL (decoder->motion_parser[4], macroblock_modes); + NEXT_MACROBLOCK; + } while (--mba_inc); + } + } + } +#undef bit_buf +#undef bits +#undef bit_ptr +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/uyvy.c b/src/video_dec/libmpeg2new/libmpeg2/uyvy.c new file mode 100644 index 000000000..7f107ffad --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/uyvy.c @@ -0,0 +1,123 @@ +/* + * uyvy.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 2003 Regis Duchesne + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include + +#include "mpeg2.h" +#include "mpeg2convert.h" + +typedef struct { + int width; + int stride; + int chroma420; + uint8_t * out; +} convert_uyvy_t; + +static void uyvy_start (void * _id, const mpeg2_fbuf_t * fbuf, + const mpeg2_picture_t * picture, + const mpeg2_gop_t * gop) +{ + convert_uyvy_t * instance = (convert_uyvy_t *) _id; + + instance->out = fbuf->buf[0]; + instance->stride = instance->width; + if (picture->nb_fields == 1) { + if (! (picture->flags & PIC_FLAG_TOP_FIELD_FIRST)) + instance->out += 2 * instance->stride; + instance->stride <<= 1; + } +} + +#ifdef WORDS_BIGENDIAN +#define PACK(a,b,c,d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d)) +#else +#define PACK(a,b,c,d) (((d) << 24) | ((c) << 16) | ((b) << 8) | (a)) +#endif + +static void uyvy_copy (void * const _id, uint8_t * const * src, + const unsigned int v_offset) +{ + const convert_uyvy_t * const id = (convert_uyvy_t *) _id; + uint8_t * _dst; + uint8_t * py, * pu, * pv; + int i, j; + + _dst = id->out + 2 * id->stride * v_offset; + py = src[0]; pu = src[1]; pv = src[2]; + + i = 16; + do { + uint32_t * dst = (uint32_t *) _dst; + + j = id->width >> 4; + do { + dst[0] = PACK (pu[0], py[0], pv[0], py[1]); + dst[1] = PACK (pu[1], py[2], pv[1], py[3]); + dst[2] = PACK (pu[2], py[4], pv[2], py[5]); + dst[3] = PACK (pu[3], py[6], pv[3], py[7]); + dst[4] = PACK (pu[4], py[8], pv[4], py[9]); + dst[5] = PACK (pu[5], py[10], pv[5], py[11]); + dst[6] = PACK (pu[6], py[12], pv[6], py[13]); + dst[7] = PACK (pu[7], py[14], pv[7], py[15]); + py += 16; + pu += 8; + pv += 8; + dst += 8; + } while (--j); + py -= id->width; + pu -= id->width >> 1; + pv -= id->width >> 1; + _dst += 2 * id->stride; + py += id->stride; + if (! (--i & id->chroma420)) { + pu += id->stride >> 1; + pv += id->stride >> 1; + } + } while (i); +} + +int mpeg2convert_uyvy (int stage, void * _id, const mpeg2_sequence_t * seq, + int stride, uint32_t accel, void * arg, + mpeg2_convert_init_t * result) +{ + convert_uyvy_t * instance = (convert_uyvy_t *) _id; + + if (seq->chroma_width == seq->width) + return 1; + + if (instance) { + instance->width = seq->width; + instance->chroma420 = (seq->chroma_height < seq->height); + result->buf_size[0] = seq->width * seq->height * 2; + result->buf_size[1] = result->buf_size[2] = 0; + result->start = uyvy_start; + result->copy = uyvy_copy; + } else { + result->id_size = sizeof (convert_uyvy_t); + } + + return 0; +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/vlc.h b/src/video_dec/libmpeg2new/libmpeg2/vlc.h new file mode 100644 index 000000000..57448ce04 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/vlc.h @@ -0,0 +1,429 @@ +/* + * vlc.h + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define GETWORD(bit_buf,shift,bit_ptr) \ +do { \ + bit_buf |= ((bit_ptr[0] << 8) | bit_ptr[1]) << (shift); \ + bit_ptr += 2; \ +} while (0) + +static inline void bitstream_init (mpeg2_decoder_t * decoder, + const uint8_t * start) +{ + decoder->bitstream_buf = + (start[0] << 24) | (start[1] << 16) | (start[2] << 8) | start[3]; + decoder->bitstream_ptr = start + 4; + decoder->bitstream_bits = -16; +} + +/* make sure that there are at least 16 valid bits in bit_buf */ +#define NEEDBITS(bit_buf,bits,bit_ptr) \ +do { \ + if (unlikely (bits > 0)) { \ + GETWORD (bit_buf, bits, bit_ptr); \ + bits -= 16; \ + } \ +} while (0) + +/* remove num valid bits from bit_buf */ +#define DUMPBITS(bit_buf,bits,num) \ +do { \ + bit_buf <<= (num); \ + bits += (num); \ +} while (0) + +/* take num bits from the high part of bit_buf and zero extend them */ +#define UBITS(bit_buf,num) (((uint32_t)(bit_buf)) >> (32 - (num))) + +/* take num bits from the high part of bit_buf and sign extend them */ +#define SBITS(bit_buf,num) (((int32_t)(bit_buf)) >> (32 - (num))) + +typedef struct { + uint8_t modes; + uint8_t len; +} MBtab; + +typedef struct { + uint8_t delta; + uint8_t len; +} MVtab; + +typedef struct { + int8_t dmv; + uint8_t len; +} DMVtab; + +typedef struct { + uint8_t cbp; + uint8_t len; +} CBPtab; + +typedef struct { + uint8_t size; + uint8_t len; +} DCtab; + +typedef struct { + uint8_t run; + uint8_t level; + uint8_t len; +} DCTtab; + +typedef struct { + uint8_t mba; + uint8_t len; +} MBAtab; + + +#define INTRA MACROBLOCK_INTRA +#define QUANT MACROBLOCK_QUANT + +static const MBtab MB_I [] = { + {INTRA|QUANT, 2}, {INTRA, 1} +}; + +#define MC MACROBLOCK_MOTION_FORWARD +#define CODED MACROBLOCK_PATTERN + +static const MBtab MB_P [] = { + {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, + {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1} +}; + +#define FWD MACROBLOCK_MOTION_FORWARD +#define BWD MACROBLOCK_MOTION_BACKWARD +#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD + +static const MBtab MB_B [] = { + {0, 6}, {INTRA|QUANT, 6}, + {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, + {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, + {INTRA, 5}, {INTRA, 5}, + {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, + {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2} +}; + +#undef INTRA +#undef QUANT +#undef MC +#undef CODED +#undef FWD +#undef BWD +#undef INTER + + +static const MVtab MV_4 [] = { + { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} +}; + +static const MVtab MV_10 [] = { + { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, + { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, + {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, + { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, + { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, + { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7} +}; + + +static const DMVtab DMV_2 [] = { + { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2} +}; + + +static const CBPtab CBP_7 [] = { + {0x11, 7}, {0x12, 7}, {0x14, 7}, {0x18, 7}, + {0x21, 7}, {0x22, 7}, {0x24, 7}, {0x28, 7}, + {0x3f, 6}, {0x3f, 6}, {0x30, 6}, {0x30, 6}, + {0x09, 6}, {0x09, 6}, {0x06, 6}, {0x06, 6}, + {0x1f, 5}, {0x1f, 5}, {0x1f, 5}, {0x1f, 5}, + {0x10, 5}, {0x10, 5}, {0x10, 5}, {0x10, 5}, + {0x2f, 5}, {0x2f, 5}, {0x2f, 5}, {0x2f, 5}, + {0x20, 5}, {0x20, 5}, {0x20, 5}, {0x20, 5}, + {0x07, 5}, {0x07, 5}, {0x07, 5}, {0x07, 5}, + {0x0b, 5}, {0x0b, 5}, {0x0b, 5}, {0x0b, 5}, + {0x0d, 5}, {0x0d, 5}, {0x0d, 5}, {0x0d, 5}, + {0x0e, 5}, {0x0e, 5}, {0x0e, 5}, {0x0e, 5}, + {0x05, 5}, {0x05, 5}, {0x05, 5}, {0x05, 5}, + {0x0a, 5}, {0x0a, 5}, {0x0a, 5}, {0x0a, 5}, + {0x03, 5}, {0x03, 5}, {0x03, 5}, {0x03, 5}, + {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, + {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4}, + {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4}, + {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4}, + {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3} +}; + +static const CBPtab CBP_9 [] = { + {0, 9}, {0x00, 9}, {0x39, 9}, {0x36, 9}, + {0x37, 9}, {0x3b, 9}, {0x3d, 9}, {0x3e, 9}, + {0x17, 8}, {0x17, 8}, {0x1b, 8}, {0x1b, 8}, + {0x1d, 8}, {0x1d, 8}, {0x1e, 8}, {0x1e, 8}, + {0x27, 8}, {0x27, 8}, {0x2b, 8}, {0x2b, 8}, + {0x2d, 8}, {0x2d, 8}, {0x2e, 8}, {0x2e, 8}, + {0x19, 8}, {0x19, 8}, {0x16, 8}, {0x16, 8}, + {0x29, 8}, {0x29, 8}, {0x26, 8}, {0x26, 8}, + {0x35, 8}, {0x35, 8}, {0x3a, 8}, {0x3a, 8}, + {0x33, 8}, {0x33, 8}, {0x3c, 8}, {0x3c, 8}, + {0x15, 8}, {0x15, 8}, {0x1a, 8}, {0x1a, 8}, + {0x13, 8}, {0x13, 8}, {0x1c, 8}, {0x1c, 8}, + {0x25, 8}, {0x25, 8}, {0x2a, 8}, {0x2a, 8}, + {0x23, 8}, {0x23, 8}, {0x2c, 8}, {0x2c, 8}, + {0x31, 8}, {0x31, 8}, {0x32, 8}, {0x32, 8}, + {0x34, 8}, {0x34, 8}, {0x38, 8}, {0x38, 8} +}; + + +static const DCtab DC_lum_5 [] = { + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, + {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5} +}; + +static const DCtab DC_chrom_5 [] = { + {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5} +}; + +static const DCtab DC_long [] = { + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9} +}; + + +static const DCTtab DCT_16 [] = { + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + { 2,18, 0}, { 2,17, 0}, { 2,16, 0}, { 2,15, 0}, + { 7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0}, + { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0}, + { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0} +}; + +static const DCTtab DCT_15 [] = { + { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15}, + { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15}, + { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15}, + { 2,11,15}, { 2,10,15}, { 2, 9,15}, { 2, 8,15}, + { 1,31,14}, { 1,31,14}, { 1,30,14}, { 1,30,14}, + { 1,29,14}, { 1,29,14}, { 1,28,14}, { 1,28,14}, + { 1,27,14}, { 1,27,14}, { 1,26,14}, { 1,26,14}, + { 1,25,14}, { 1,25,14}, { 1,24,14}, { 1,24,14}, + { 1,23,14}, { 1,23,14}, { 1,22,14}, { 1,22,14}, + { 1,21,14}, { 1,21,14}, { 1,20,14}, { 1,20,14}, + { 1,19,14}, { 1,19,14}, { 1,18,14}, { 1,18,14}, + { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14} +}; + +static const DCTtab DCT_13 [] = { + { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13}, + { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13}, + { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13}, + { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13}, + { 1,11,12}, { 1,11,12}, { 9, 2,12}, { 9, 2,12}, + { 5, 3,12}, { 5, 3,12}, { 1,10,12}, { 1,10,12}, + { 3, 4,12}, { 3, 4,12}, { 8, 2,12}, { 8, 2,12}, + { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12}, + { 1, 9,12}, { 1, 9,12}, { 20, 1,12}, { 20, 1,12}, + { 19, 1,12}, { 19, 1,12}, { 2, 5,12}, { 2, 5,12}, + { 4, 3,12}, { 4, 3,12}, { 1, 8,12}, { 1, 8,12}, + { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12} +}; + +static const DCTtab DCT_B14_10 [] = { + { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10}, + { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10} +}; + +static const DCTtab DCT_B14_8 [] = { + { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, + { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, + { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, + { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, + { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 14, 1, 8}, { 1, 6, 8}, { 13, 1, 8}, { 12, 1, 8}, + { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8} +}; + +static const DCTtab DCT_B14AC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2} +}; + +static const DCTtab DCT_B14DC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1} +}; + +static const DCTtab DCT_B15_10 [] = { + { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9}, + { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9} +}; + +static const DCTtab DCT_B15_8 [] = { + { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, + { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, + { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, + { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, + { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 2, 5, 8}, { 12, 1, 8}, { 1,11, 8}, { 1,10, 8}, + { 14, 1, 8}, { 13, 1, 8}, { 4, 2, 8}, { 2, 4, 8}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 10, 1, 7}, { 10, 1, 7}, { 2, 3, 7}, { 2, 3, 7}, + { 11, 1, 7}, { 11, 1, 7}, { 1, 8, 7}, { 1, 8, 7}, + { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8}, + { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8} +}; + + +static const MBAtab MBA_5 [] = { + {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, + {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} +}; + +static const MBAtab MBA_11 [] = { + {32, 11}, {31, 11}, {30, 11}, {29, 11}, + {28, 11}, {27, 11}, {26, 11}, {25, 11}, + {24, 11}, {23, 11}, {22, 11}, {21, 11}, + {20, 10}, {20, 10}, {19, 10}, {19, 10}, + {18, 10}, {18, 10}, {17, 10}, {17, 10}, + {16, 10}, {16, 10}, {15, 10}, {15, 10}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} +}; diff --git a/src/video_dec/libmpeg2new/xine_mpeg2new_decoder.c b/src/video_dec/libmpeg2new/xine_mpeg2new_decoder.c new file mode 100644 index 000000000..7494791b1 --- /dev/null +++ b/src/video_dec/libmpeg2new/xine_mpeg2new_decoder.c @@ -0,0 +1,504 @@ +/* + * Copyright (C) 2000-2004 the xine project + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * stuff needed to turn libmpeg2 into a xine decoder plugin + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "./include/mpeg2.h" +#include +#include +#include + + + +#define LOG +#define LOG_FRAME_ALLOC_FREE +#define LOG_ENTRY +#define LOG_FRAME_COUNTER + + +typedef struct { + video_decoder_class_t decoder_class; +} mpeg2_class_t; + +typedef struct { + uint32_t id; + vo_frame_t * img; +} img_state_t; + +typedef struct mpeg2_video_decoder_s { + video_decoder_t video_decoder; + mpeg2dec_t *mpeg2dec; + mpeg2_class_t *class; + xine_stream_t *stream; + int32_t force_aspect; + int force_pan_scan; + double ratio; + img_state_t img_state[30]; + uint32_t frame_number; + uint32_t rff_pattern; + +} mpeg2_video_decoder_t; + + +static void mpeg2_video_print_bad_state(img_state_t * img_state) { + int32_t n,m; + m=0; + for(n=0;n<30;n++) { + if (img_state[n].id>0) { + printf("%d = %u\n",n, img_state[n].id); + m++; + } + } + if (m > 3) _x_abort(); + if (m == 0) printf("NO FRAMES\n"); +} + +static void mpeg2_video_free_all(img_state_t * img_state) { + int32_t n,m; + vo_frame_t * img; + printf("libmpeg2new:free_all\n"); + for(n=0;n<30;n++) { + if (img_state[n].id>0) { + img = img_state[n].img; + img->free(img); + img_state[n].id = 0; + } + } +} + + +static void mpeg2_video_print_fbuf(const mpeg2_fbuf_t * fbuf) { + printf("%p",fbuf); + vo_frame_t * img; + if (fbuf) { + img = (vo_frame_t *) fbuf->id; + if (img) { + printf (", img=%p, (id=%d)\n", + img, img->id); + } else { + printf (", img=NULL\n"); + } + } else { + printf ("\n"); + } +} + +static void mpeg2_video_decode_data (video_decoder_t *this_gen, buf_element_t *buf_element) { + mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen; + uint8_t * current = buf_element->content; + uint8_t * end = buf_element->content + buf_element->size; + const mpeg2_info_t * info; + mpeg2_state_t state; + vo_frame_t * img; + uint32_t picture_structure; + int32_t frame_skipping; + + /* handle aspect hints from xine-dvdnav */ + if (buf_element->decoder_flags & BUF_FLAG_SPECIAL) { + if (buf_element->decoder_info[1] == BUF_SPECIAL_ASPECT) { + this->force_aspect = buf_element->decoder_info[2]; + if (buf_element->decoder_info[3] == 0x1 && buf_element->decoder_info[2] == 3) + /* letterboxing is denied, we have to do pan&scan */ + this->force_pan_scan = 1; + else + this->force_pan_scan = 0; + } + + return; + } + + if (buf_element->decoder_flags != 0) return; + +#ifdef LOG_ENTRY + printf ("libmpeg2: decode_data: enter\n"); +#endif + + mpeg2_buffer (this->mpeg2dec, current, end); + + info = mpeg2_info (this->mpeg2dec); + + while ((state = mpeg2_parse (this->mpeg2dec)) != STATE_BUFFER) { + switch (state) { + case STATE_SEQUENCE: + /* might set nb fbuf, convert format, stride */ + /* might set fbufs */ + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_BITRATE, info->sequence->byte_rate * 8); + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_WIDTH, info->sequence->picture_width); + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, info->sequence->picture_height); + _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION, info->sequence->frame_period / 300); + if (this->force_aspect) info->sequence->pixel_width = this->force_aspect; + switch (info->sequence->pixel_width) { + case 3: + this->ratio = 16.0 / 9.0; + break; + case 4: + this->ratio = 2.11; + break; + case 2: + this->ratio = 4.0 / 3.0; + break; + case 1: + default: + this->ratio = (double)info->sequence->picture_width/(double)info->sequence->picture_height; + break; + } + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_RATIO, (int)(10000*this->ratio)); + + if (info->sequence->flags & SEQ_FLAG_MPEG2) { + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "MPEG 2 (libmpeg2new)"); + } else { + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "MPEG 1 (libmpeg2new)"); + } + + break; + case STATE_PICTURE: + /* might skip */ + /* might set fbuf */ + if (info->current_picture->nb_fields == 1) { + picture_structure = info->current_picture->flags & PIC_FLAG_TOP_FIELD_FIRST ? VO_TOP_FIELD : VO_BOTTOM_FIELD; + } else { + picture_structure = VO_BOTH_FIELDS; + } + + img = this->stream->video_out->get_frame (this->stream->video_out, + info->sequence->picture_width, + info->sequence->picture_height, + this->ratio, + XINE_IMGFMT_YV12, + picture_structure); + this->frame_number++; +#ifdef LOG_FRAME_COUNTER + printf("libmpeg2:frame_number=%d\n",this->frame_number); +#endif + img->top_field_first = info->current_picture->flags & PIC_FLAG_TOP_FIELD_FIRST ? 1 : 0; + img->repeat_first_field = (info->current_picture->nb_fields > 2) ? 1 : 0; + img->duration=info->sequence->frame_period / 300; + if( ((this->rff_pattern & 0xff) == 0xaa || + (this->rff_pattern & 0xff) == 0x55) ) { + /* special case for ntsc 3:2 pulldown */ + img->duration += img->duration/4; + } else { + if( img->repeat_first_field ) { + img->duration = (img->duration * info->current_picture->nb_fields) / 2; + } + } + + if ((info->current_picture->flags & 7) == 1) { + img->pts=buf_element->pts; /* If an I frame, use PTS */ + } else { + img->pts=0; + } + + +#ifdef LOG_FRAME_ALLOC_FREE + printf ("libmpeg2:decode_data:get_frame xine=%p (id=%d)\n", img,img->id); +#endif + if (this->img_state[img->id].id != 0) { + printf ("libmpeg2:decode_data:get_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id].id); + _x_abort(); + } + + this->img_state[img->id].id = 1; + this->img_state[img->id].img = img; + + mpeg2_set_buf (this->mpeg2dec, img->base, img); + break; + case STATE_SLICE: + case STATE_END: +#if 0 + printf("libmpeg2:decode_data:current_fbuf="); + mpeg2_video_print_fbuf(info->current_fbuf); + printf("libmpeg2:decode_data:display_fbuf="); + mpeg2_video_print_fbuf(info->display_fbuf); + printf("libmpeg2:decode_data:discard_fbuf="); + mpeg2_video_print_fbuf(info->discard_fbuf); +#endif + /* draw current picture */ + /* might free frame buffer */ + if (info->display_fbuf && info->display_fbuf->id) { + img = (vo_frame_t *) info->display_fbuf->id; + /* this should be used to detect any special rff pattern */ + this->rff_pattern = this->rff_pattern << 1; + this->rff_pattern |= img->repeat_first_field; + +#ifdef LOG_FRAME_ALLOC_FREE + printf ("libmpeg2:decode_data:draw_frame xine=%p, fbuf=%p, id=%d \n", img, info->display_fbuf, img->id); +#endif + if (this->img_state[img->id].id != 1) { + printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id].id); + _x_abort(); + } + if (this->img_state[img->id].id == 1) { + frame_skipping = img->draw (img, this->stream); + /* FIXME: Handle skipping */ + this->img_state[img->id].id = 2; + } + + } + if (info->discard_fbuf && !info->discard_fbuf->id) { + printf ("libmpeg2:decode_data:BAD free_frame discard: xine=%p, fbuf=%p\n", info->discard_fbuf->id, info->discard_fbuf); + //_x_abort(); + } + if (info->discard_fbuf && info->discard_fbuf->id) { + img = (vo_frame_t *) info->discard_fbuf->id; +#ifdef LOG_FRAME_ALLOC_FREE + printf ("libmpeg2:decode_data:free_frame xine=%p, fbuf=%p,id=%d\n", img, info->discard_fbuf, img->id); +#endif + if (this->img_state[img->id].id != 2) { + printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id].id); + _x_abort(); + } + if (this->img_state[img->id].id == 2) { + img->free(img); + this->img_state[img->id].id = 0; + } + } +#ifdef LOG_FRAME_ALLOC_FREE + mpeg2_video_print_bad_state(this->img_state); +#endif + break; + case STATE_GOP: + break; + default: + printf("libmpeg2new: STATE unknown %d\n",state); + break; + } + + } +#ifdef LOG_ENTRY + printf ("libmpeg2: decode_data: exit\n"); +#endif + +} + +static void mpeg2_video_flush (video_decoder_t *this_gen) { + mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen; + +#ifdef LOG_ENTRY + printf ("libmpeg2: flush\n"); +#endif + +/* mpeg2_flush (&this->mpeg2); */ +} + +static void mpeg2_video_reset (video_decoder_t *this_gen) { + mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen; + int32_t state; + const mpeg2_info_t * info; + vo_frame_t * img; + int32_t frame_skipping; + +#ifdef LOG_ENTRY + printf ("libmpeg2: reset\n"); +#endif + mpeg2_reset (this->mpeg2dec, 1); /* 1 for full reset */ + mpeg2_video_free_all(this->img_state); + + +#if 0 /* This bit of code does not work yet. */ + info = mpeg2_info (this->mpeg2dec); + state = mpeg2_reset (this->mpeg2dec); + printf("reset state1:%d\n",state); + if (info->display_fbuf && info->display_fbuf->id) { + img = (vo_frame_t *) info->display_fbuf->id; + + if (this->img_state[img->id] != 1) { + printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); + _x_abort(); + } + if (this->img_state[img->id] == 1) { + frame_skipping = img->draw (img, this->stream); + /* FIXME: Handle skipping */ + this->img_state[img->id] = 2; + } + } + + if (info->discard_fbuf && !info->discard_fbuf->id) { + printf ("libmpeg2:decode_data:BAD free_frame discard_fbuf=%p\n", info->discard_fbuf); + _x_abort(); + } + if (info->discard_fbuf && info->discard_fbuf->id) { + img = (vo_frame_t *) info->discard_fbuf->id; + if (this->img_state[img->id] != 2) { + printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); + _x_abort(); + } + if (this->img_state[img->id] == 2) { + img->free(img); + this->img_state[img->id] = 0; + } + } + state = mpeg2_parse (this->mpeg2dec); + printf("reset state2:%d\n",state); + if (info->display_fbuf && info->display_fbuf->id) { + img = (vo_frame_t *) info->display_fbuf->id; + + if (this->img_state[img->id] != 1) { + printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); + _x_abort(); + } + if (this->img_state[img->id] == 1) { + frame_skipping = img->draw (img, this->stream); + /* FIXME: Handle skipping */ + this->img_state[img->id] = 2; + } + } + + if (info->discard_fbuf && !info->discard_fbuf->id) { + printf ("libmpeg2:decode_data:BAD free_frame discard_fbuf=%p\n", info->discard_fbuf); + _x_abort(); + } + if (info->discard_fbuf && info->discard_fbuf->id) { + img = (vo_frame_t *) info->discard_fbuf->id; + if (this->img_state[img->id] != 2) { + printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); + _x_abort(); + } + if (this->img_state[img->id] == 2) { + img->free(img); + this->img_state[img->id] = 0; + } + } + state = mpeg2_parse (this->mpeg2dec); + printf("reset state3:%d\n",state); + if (info->display_fbuf && info->display_fbuf->id) { + img = (vo_frame_t *) info->display_fbuf->id; + + if (this->img_state[img->id] != 1) { + printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); + _x_abort(); + } + if (this->img_state[img->id] == 1) { + frame_skipping = img->draw (img, this->stream); + /* FIXME: Handle skipping */ + this->img_state[img->id] = 2; + } + } + + if (info->discard_fbuf && !info->discard_fbuf->id) { + printf ("libmpeg2:decode_data:BAD free_frame discard_fbuf=%p\n", info->discard_fbuf); + _x_abort(); + } + if (info->discard_fbuf && info->discard_fbuf->id) { + img = (vo_frame_t *) info->discard_fbuf->id; + if (this->img_state[img->id] != 2) { + printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); + _x_abort(); + } + if (this->img_state[img->id] == 2) { + img->free(img); + this->img_state[img->id] = 0; + } + } +#endif + +} + +static void mpeg2_video_discontinuity (video_decoder_t *this_gen) { + mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen; + +#ifdef LOG_ENTRY + printf ("libmpeg2: dicontinuity\n"); +#endif +/* mpeg2_discontinuity (&this->mpeg2dec); */ +} + +static void mpeg2_video_dispose (video_decoder_t *this_gen) { + + mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen; + +#ifdef LOG_ENTRY + printf ("libmpeg2: close\n"); +#endif + + mpeg2_close (this->mpeg2dec); + + this->stream->video_out->close(this->stream->video_out, this->stream); + + free (this); +} + +static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) { + mpeg2_video_decoder_t *this ; + int32_t n; + + this = (mpeg2_video_decoder_t *) calloc(1, sizeof(mpeg2_video_decoder_t)); + + this->video_decoder.decode_data = mpeg2_video_decode_data; + this->video_decoder.flush = mpeg2_video_flush; + this->video_decoder.reset = mpeg2_video_reset; + this->video_decoder.discontinuity = mpeg2_video_discontinuity; + this->video_decoder.dispose = mpeg2_video_dispose; + this->stream = stream; + this->class = (mpeg2_class_t *) class_gen; + this->frame_number=0; + this->rff_pattern=0; + + this->mpeg2dec = mpeg2_init (); + mpeg2_custom_fbuf (this->mpeg2dec, 1); /* <- Force libmpeg2 to use xine frame buffers. */ + (stream->video_out->open) (stream->video_out, stream); + this->force_aspect = this->force_pan_scan = 0; + for(n=0;n<30;n++) this->img_state[n].id=0; + + return &this->video_decoder; +} + +/* + * mpeg2 plugin class + */ +static void *init_plugin (xine_t *xine, void *data) { + + mpeg2_class_t *this; + + this = (mpeg2_class_t *) calloc(1, sizeof(mpeg2_class_t)); + + this->decoder_class.open_plugin = open_plugin; + this->decoder_class.identifier = "mpeg2new"; + this->decoder_class.description = N_("mpeg2 based video decoder plugin"); + this->decoder_class.dispose = default_video_decoder_class_dispose; + + return this; +} +/* + * exported plugin catalog entry + */ + +static uint32_t supported_types[] = { BUF_VIDEO_MPEG, 0 }; + +static decoder_info_t dec_info_mpeg2 = { + supported_types, /* supported types */ + 6 /* priority */ +}; + +plugin_info_t xine_plugin_info[] = { + /* type, API, "name", version, special_info, init_function */ + { PLUGIN_VIDEO_DECODER, 19, "mpeg2new", XINE_VERSION_CODE, &dec_info_mpeg2, init_plugin }, + { PLUGIN_NONE, 0, "", 0, NULL, NULL } +}; -- cgit v1.2.3