From 33c63119cc7398a0c7c8e0a43d98d682591eacb6 Mon Sep 17 00:00:00 2001 From: James Courtier-Dutton Date: Mon, 9 Jun 2003 17:27:51 +0000 Subject: Initial import of libmpeg2 version 0.3.2-cvs CVS patchset: 5018 CVS date: 2003/06/09 17:27:51 --- src/libmpeg2new/libmpeg2/Makefile.am | 19 + src/libmpeg2new/libmpeg2/alloc.c | 76 + src/libmpeg2new/libmpeg2/configure.incl | 25 + src/libmpeg2new/libmpeg2/cpu_accel.c | 182 +++ src/libmpeg2new/libmpeg2/cpu_state.c | 129 ++ src/libmpeg2new/libmpeg2/decode.c | 445 ++++++ src/libmpeg2new/libmpeg2/header.c | 725 ++++++++++ src/libmpeg2new/libmpeg2/idct.c | 294 ++++ src/libmpeg2new/libmpeg2/idct_alpha.c | 377 +++++ src/libmpeg2new/libmpeg2/idct_altivec.c | 260 ++++ src/libmpeg2new/libmpeg2/idct_mlib.c | 60 + src/libmpeg2new/libmpeg2/idct_mmx.c | 814 +++++++++++ src/libmpeg2new/libmpeg2/libmpeg2.pc.in | 10 + src/libmpeg2new/libmpeg2/motion_comp.c | 129 ++ src/libmpeg2new/libmpeg2/motion_comp_alpha.c | 252 ++++ src/libmpeg2new/libmpeg2/motion_comp_altivec.c | 1009 +++++++++++++ src/libmpeg2new/libmpeg2/motion_comp_mlib.c | 190 +++ src/libmpeg2new/libmpeg2/motion_comp_mmx.c | 1005 +++++++++++++ src/libmpeg2new/libmpeg2/mpeg2_internal.h | 301 ++++ src/libmpeg2new/libmpeg2/slice.c | 1808 ++++++++++++++++++++++++ src/libmpeg2new/libmpeg2/vlc.h | 429 ++++++ 21 files changed, 8539 insertions(+) create mode 100644 src/libmpeg2new/libmpeg2/Makefile.am create mode 100644 src/libmpeg2new/libmpeg2/alloc.c create mode 100644 src/libmpeg2new/libmpeg2/configure.incl create mode 100644 src/libmpeg2new/libmpeg2/cpu_accel.c create mode 100644 src/libmpeg2new/libmpeg2/cpu_state.c create mode 100644 src/libmpeg2new/libmpeg2/decode.c create mode 100644 src/libmpeg2new/libmpeg2/header.c create mode 100644 src/libmpeg2new/libmpeg2/idct.c create mode 100644 src/libmpeg2new/libmpeg2/idct_alpha.c create mode 100644 src/libmpeg2new/libmpeg2/idct_altivec.c create mode 100644 src/libmpeg2new/libmpeg2/idct_mlib.c create mode 100644 src/libmpeg2new/libmpeg2/idct_mmx.c create mode 100644 src/libmpeg2new/libmpeg2/libmpeg2.pc.in create mode 100644 src/libmpeg2new/libmpeg2/motion_comp.c create mode 100644 src/libmpeg2new/libmpeg2/motion_comp_alpha.c create mode 100644 src/libmpeg2new/libmpeg2/motion_comp_altivec.c create mode 100644 src/libmpeg2new/libmpeg2/motion_comp_mlib.c create mode 100644 src/libmpeg2new/libmpeg2/motion_comp_mmx.c create mode 100644 src/libmpeg2new/libmpeg2/mpeg2_internal.h create mode 100644 src/libmpeg2new/libmpeg2/slice.c create mode 100644 src/libmpeg2new/libmpeg2/vlc.h (limited to 'src') diff --git a/src/libmpeg2new/libmpeg2/Makefile.am b/src/libmpeg2new/libmpeg2/Makefile.am new file mode 100644 index 000000000..ed9b50e21 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/Makefile.am @@ -0,0 +1,19 @@ +AM_CFLAGS = $(OPT_CFLAGS) $(LIBMPEG2_CFLAGS) + +lib_LTLIBRARIES = libmpeg2.la +libmpeg2_la_SOURCES = alloc.c header.c decode.c slice.c motion_comp.c idct.c \ + motion_comp_mlib.c idct_mlib.c +libmpeg2_la_LIBADD = libmpeg2arch.la $(LIBMPEG2_LIBS) +libmpeg2_la_LDFLAGS = -no-undefined + +noinst_LTLIBRARIES = libmpeg2arch.la +libmpeg2arch_la_SOURCES = motion_comp_mmx.c idct_mmx.c \ + motion_comp_altivec.c idct_altivec.c \ + motion_comp_alpha.c idct_alpha.c \ + cpu_accel.c cpu_state.c +libmpeg2arch_la_CFLAGS = $(OPT_CFLAGS) $(ARCH_OPT_CFLAGS) $(LIBMPEG2_CFLAGS) + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = libmpeg2.pc + +EXTRA_DIST = configure.incl vlc.h mpeg2_internal.h diff --git a/src/libmpeg2new/libmpeg2/alloc.c b/src/libmpeg2new/libmpeg2/alloc.c new file mode 100644 index 000000000..2e4792e94 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/alloc.c @@ -0,0 +1,76 @@ +/* + * alloc.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include +#include + +#include "mpeg2.h" +#include "mpeg2_internal.h" + +#if defined(HAVE_MEMALIGN) && !defined(__cplusplus) +/* some systems have memalign() but no declaration for it */ +void * memalign (size_t align, size_t size); +#endif + +void * (* mpeg2_malloc_hook) (int size, int reason) = NULL; +int (* mpeg2_free_hook) (void * buf) = NULL; + +void * mpeg2_malloc (int size, int reason) +{ + char * buf; + + if (mpeg2_malloc_hook) { + buf = (char *) mpeg2_malloc_hook (size, reason); + if (buf) + return buf; + } + +#if defined(HAVE_MEMALIGN) && !defined(__cplusplus) && !defined(DEBUG) + return memalign (16, size); +#else + buf = (char *) malloc (size + 15 + sizeof (void **)); + if (buf) { + char * align_buf; + + align_buf = buf + 15 + sizeof (void **); + align_buf -= (long)align_buf & 15; + *(((void **)align_buf) - 1) = buf; + return align_buf; + } + return NULL; +#endif +} + +void mpeg2_free (void * buf) +{ + if (mpeg2_free_hook && mpeg2_free_hook (buf)) + return; + +#if defined(HAVE_MEMALIGN) && !defined(__cplusplus) && !defined(DEBUG) + free (buf); +#else + free (*(((void **)buf) - 1)); +#endif +} diff --git a/src/libmpeg2new/libmpeg2/configure.incl b/src/libmpeg2new/libmpeg2/configure.incl new file mode 100644 index 000000000..aa9337774 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/configure.incl @@ -0,0 +1,25 @@ +AC_SUBST([LIBMPEG2_CFLAGS]) +AC_SUBST([LIBMPEG2_LIBS]) + +dnl avoid -fPIC when possible +AC_LIBTOOL_NON_PIC([LIBMPEG2_CFLAGS="$LIBMPEG2_CFLAGS -prefer-non-pic"]) + +dnl check for cpudetect +AC_ARG_ENABLE([accel-detect], + [ --disable-accel-detect make a version without accel detection code]) +if test x"$enable_accel_detect" != x"no"; then + AC_DEFINE([ACCEL_DETECT],,[autodetect accelerations]) +fi + +dnl check for mlib +AC_ARG_ENABLE([mlib], + [ --disable-mlib make a version not using mediaLib]) +if test x"$enable_mlib" != x"no"; then + cflags_save="$CFLAGS" + CFLAGS="$OPT_CFLAGS -L/opt/SUNWmlib/lib -R/opt/SUNWmlib/lib $CFLAGS" + AC_CHECK_LIB([mlib],[mlib_VideoColorYUV2RGB420], + [AC_DEFINE([LIBMPEG2_MLIB],,[libmpeg2 mediaLib support]) + LIBMPEG2_CFLAGS="$LIBMPEG2_CFLAGS -I/opt/SUNWmlib/include" + LIBMPEG2_LIBS="$LIBMPEG2_LIBS -L/opt/SUNWmlib/lib -R/opt/SUNWmlib/lib -lmlib"]) + CFLAGS="$cflags_save" +fi diff --git a/src/libmpeg2new/libmpeg2/cpu_accel.c b/src/libmpeg2new/libmpeg2/cpu_accel.c new file mode 100644 index 000000000..97e5ea3ca --- /dev/null +++ b/src/libmpeg2new/libmpeg2/cpu_accel.c @@ -0,0 +1,182 @@ +/* + * cpu_accel.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include + +#include "mpeg2.h" + +#ifdef ACCEL_DETECT +#ifdef ARCH_X86 +static inline uint32_t arch_accel (void) +{ + uint32_t eax, ebx, ecx, edx; + int AMD; + uint32_t caps; + +#if !defined(PIC) && !defined(__PIC__) +#define cpuid(op,eax,ebx,ecx,edx) \ + __asm__ ("cpuid" \ + : "=a" (eax), \ + "=b" (ebx), \ + "=c" (ecx), \ + "=d" (edx) \ + : "a" (op) \ + : "cc") +#else /* PIC version : save ebx */ +#define cpuid(op,eax,ebx,ecx,edx) \ + __asm__ ("push %%ebx\n\t" \ + "cpuid\n\t" \ + "movl %%ebx,%1\n\t" \ + "pop %%ebx" \ + : "=a" (eax), \ + "=r" (ebx), \ + "=c" (ecx), \ + "=d" (edx) \ + : "a" (op) \ + : "cc") +#endif + + __asm__ ("pushf\n\t" + "pushf\n\t" + "pop %0\n\t" + "movl %0,%1\n\t" + "xorl $0x200000,%0\n\t" + "push %0\n\t" + "popf\n\t" + "pushf\n\t" + "pop %0\n\t" + "popf" + : "=r" (eax), + "=r" (ebx) + : + : "cc"); + + if (eax == ebx) /* no cpuid */ + return 0; + + cpuid (0x00000000, eax, ebx, ecx, edx); + if (!eax) /* vendor string only */ + return 0; + + AMD = (ebx == 0x68747541) && (ecx == 0x444d4163) && (edx == 0x69746e65); + + cpuid (0x00000001, eax, ebx, ecx, edx); + if (! (edx & 0x00800000)) /* no MMX */ + return 0; + + caps = MPEG2_ACCEL_X86_MMX; + if (edx & 0x02000000) /* SSE - identical to AMD MMX extensions */ + caps = MPEG2_ACCEL_X86_MMX | MPEG2_ACCEL_X86_MMXEXT; + + cpuid (0x80000000, eax, ebx, ecx, edx); + if (eax < 0x80000001) /* no extended capabilities */ + return caps; + + cpuid (0x80000001, eax, ebx, ecx, edx); + + if (edx & 0x80000000) + caps |= MPEG2_ACCEL_X86_3DNOW; + + if (AMD && (edx & 0x00400000)) /* AMD MMX extensions */ + caps |= MPEG2_ACCEL_X86_MMXEXT; + + return caps; +} +#endif /* ARCH_X86 */ + +#ifdef ARCH_PPC +#include +#include + +static sigjmp_buf jmpbuf; +static volatile sig_atomic_t canjump = 0; + +static RETSIGTYPE sigill_handler (int sig) +{ + if (!canjump) { + signal (sig, SIG_DFL); + raise (sig); + } + + canjump = 0; + siglongjmp (jmpbuf, 1); +} + +static inline uint32_t arch_accel (void) +{ + static RETSIGTYPE (* oldsig) (int); + + oldsig = signal (SIGILL, sigill_handler); + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, oldsig); + return 0; + } + + canjump = 1; + +#ifdef HAVE_ALTIVEC_H /* gnu */ +#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t" +#else /* apple */ +#define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t" +#endif + asm volatile ("mtspr 256, %0\n\t" + VAND (0, 0, 0) + : + : "r" (-1)); + + signal (SIGILL, oldsig); + return MPEG2_ACCEL_PPC_ALTIVEC; +} +#endif /* ARCH_PPC */ + +#ifdef ARCH_ALPHA +static inline uint32_t arch_accel (void) +{ + uint64_t no_mvi; + + asm volatile ("amask %1, %0" + : "=r" (no_mvi) + : "rI" (256)); /* AMASK_MVI */ + return no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA | + MPEG2_ACCEL_ALPHA_MVI); +} +#endif /* ARCH_ALPHA */ +#endif + +uint32_t mpeg2_detect_accel (void) +{ + uint32_t accel; + + accel = 0; +#ifdef ACCEL_DETECT +#ifdef LIBMPEG2_MLIB + accel = MPEG2_ACCEL_MLIB; +#endif +#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) + accel |= arch_accel (); +#endif +#endif + return accel; +} diff --git a/src/libmpeg2new/libmpeg2/cpu_state.c b/src/libmpeg2new/libmpeg2/cpu_state.c new file mode 100644 index 000000000..a94e5fedc --- /dev/null +++ b/src/libmpeg2new/libmpeg2/cpu_state.c @@ -0,0 +1,129 @@ +/* + * cpu_state.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include +#include + +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include "attributes.h" +#ifdef ARCH_X86 +#include "mmx.h" +#endif + +void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL; +void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL; + +#ifdef ARCH_X86 +static void state_restore_mmx (cpu_state_t * state) +{ + emms (); +} +#endif + +#ifdef ARCH_PPC +#ifdef HAVE_ALTIVEC_H /* gnu */ +#define LI(a,b) "li " #a "," #b "\n\t" +#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t" +#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t" +#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t" +#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t" +#else /* apple */ +#define LI(a,b) "li r" #a "," #b "\n\t" +#define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t" +#define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t" +#define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t" +#define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t" +#endif + +static void state_save_altivec (cpu_state_t * state) +{ + asm (LI (9, 16) + STVX0 (20, 0, 3) + LI (11, 32) + STVX (21, 9, 3) + LI (9, 48) + STVX (22, 11, 3) + LI (11, 64) + STVX (23, 9, 3) + LI (9, 80) + STVX (24, 11, 3) + LI (11, 96) + STVX (25, 9, 3) + LI (9, 112) + STVX (26, 11, 3) + LI (11, 128) + STVX (27, 9, 3) + LI (9, 144) + STVX (28, 11, 3) + LI (11, 160) + STVX (29, 9, 3) + LI (9, 176) + STVX (30, 11, 3) + STVX (31, 9, 3)); +} + +static void state_restore_altivec (cpu_state_t * state) +{ + asm (LI (9, 16) + LVX0 (20, 0, 3) + LI (11, 32) + LVX (21, 9, 3) + LI (9, 48) + LVX (22, 11, 3) + LI (11, 64) + LVX (23, 9, 3) + LI (9, 80) + LVX (24, 11, 3) + LI (11, 96) + LVX (25, 9, 3) + LI (9, 112) + LVX (26, 11, 3) + LI (11, 128) + LVX (27, 9, 3) + LI (9, 144) + LVX (28, 11, 3) + LI (11, 160) + LVX (29, 9, 3) + LI (9, 176) + LVX (30, 11, 3) + LVX (31, 9, 3)); +} +#endif + +void mpeg2_cpu_state_init (uint32_t accel) +{ +#ifdef ARCH_X86 + if (accel & MPEG2_ACCEL_X86_MMX) { + mpeg2_cpu_state_restore = state_restore_mmx; + } +#endif +#ifdef ARCH_PPC + if (accel & MPEG2_ACCEL_PPC_ALTIVEC) { + mpeg2_cpu_state_save = state_save_altivec; + mpeg2_cpu_state_restore = state_restore_altivec; + } +#endif +} diff --git a/src/libmpeg2new/libmpeg2/decode.c b/src/libmpeg2new/libmpeg2/decode.c new file mode 100644 index 000000000..df2ca2f28 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/decode.c @@ -0,0 +1,445 @@ +/* + * decode.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include /* memcmp/memset, try to remove */ +#include +#include + +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include "convert.h" + +static int mpeg2_accels = 0; + +#define BUFFER_SIZE (1194 * 1024) + +const mpeg2_info_t * mpeg2_info (mpeg2dec_t * mpeg2dec) +{ + return &(mpeg2dec->info); +} + +static inline int skip_chunk (mpeg2dec_t * mpeg2dec, int bytes) +{ + uint8_t * current; + uint32_t shift; + uint8_t * chunk_ptr; + uint8_t * limit; + uint8_t byte; + + if (!bytes) + return 0; + + current = mpeg2dec->buf_start; + shift = mpeg2dec->shift; + chunk_ptr = mpeg2dec->chunk_ptr; + limit = current + bytes; + + do { + byte = *current++; + if (shift == 0x00000100) { + int skipped; + + mpeg2dec->shift = 0xffffff00; + skipped = current - mpeg2dec->buf_start; + mpeg2dec->buf_start = current; + return skipped; + } + shift = (shift | byte) << 8; + } while (current < limit); + + mpeg2dec->shift = shift; + mpeg2dec->buf_start = current; + return 0; +} + +static inline int copy_chunk (mpeg2dec_t * mpeg2dec, int bytes) +{ + uint8_t * current; + uint32_t shift; + uint8_t * chunk_ptr; + uint8_t * limit; + uint8_t byte; + + if (!bytes) + return 0; + + current = mpeg2dec->buf_start; + shift = mpeg2dec->shift; + chunk_ptr = mpeg2dec->chunk_ptr; + limit = current + bytes; + + do { + byte = *current++; + if (shift == 0x00000100) { + int copied; + + mpeg2dec->shift = 0xffffff00; + mpeg2dec->chunk_ptr = chunk_ptr + 1; + copied = current - mpeg2dec->buf_start; + mpeg2dec->buf_start = current; + return copied; + } + shift = (shift | byte) << 8; + *chunk_ptr++ = byte; + } while (current < limit); + + mpeg2dec->shift = shift; + mpeg2dec->buf_start = current; + return 0; +} + +void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end) +{ + mpeg2dec->buf_start = start; + mpeg2dec->buf_end = end; +} + +int mpeg2_getpos (mpeg2dec_t * mpeg2dec) +{ + return mpeg2dec->buf_end - mpeg2dec->buf_start; +} + +static inline mpeg2_state_t seek_chunk (mpeg2dec_t * mpeg2dec) +{ + int size, skipped; + + size = mpeg2dec->buf_end - mpeg2dec->buf_start; + skipped = skip_chunk (mpeg2dec, size); + if (!skipped) { + mpeg2dec->bytes_since_pts += size; + return STATE_BUFFER; + } + mpeg2dec->bytes_since_pts += skipped; + mpeg2dec->code = mpeg2dec->buf_start[-1]; + return (mpeg2_state_t)-1; +} + +static mpeg2_state_t seek_header (mpeg2dec_t * mpeg2dec) +{ + while (mpeg2dec->code != 0xb3 && + ((mpeg2dec->code != 0xb7 && mpeg2dec->code != 0xb8 && + mpeg2dec->code) || mpeg2dec->sequence.width == (unsigned)-1)) + if (seek_chunk (mpeg2dec) == STATE_BUFFER) + return STATE_BUFFER; + mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + return (mpeg2dec->code ? mpeg2_parse_header (mpeg2dec) : + mpeg2_header_picture_start (mpeg2dec)); +} + +mpeg2_state_t mpeg2_seek_sequence (mpeg2dec_t * mpeg2dec) +{ + mpeg2dec->sequence.width = (unsigned)-1; + return seek_header (mpeg2dec); +} + +#define RECEIVED(code,state) (((state) << 8) + (code)) + +mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec) +{ + int size_buffer, size_chunk, copied; + + if (mpeg2dec->action) { + mpeg2_state_t state; + + state = mpeg2dec->action (mpeg2dec); + if ((int)state >= 0) + return state; + } + + while (1) { + while ((unsigned) (mpeg2dec->code - mpeg2dec->first_decode_slice) < + mpeg2dec->nb_decode_slices) { + size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start; + size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE - + mpeg2dec->chunk_ptr); + if (size_buffer <= size_chunk) { + copied = copy_chunk (mpeg2dec, size_buffer); + if (!copied) { + mpeg2dec->bytes_since_pts += size_buffer; + mpeg2dec->chunk_ptr += size_buffer; + return STATE_BUFFER; + } + } else { + copied = copy_chunk (mpeg2dec, size_chunk); + if (!copied) { + /* filled the chunk buffer without finding a start code */ + mpeg2dec->bytes_since_pts += size_chunk; + mpeg2dec->action = seek_chunk; + return STATE_INVALID; + } + } + mpeg2dec->bytes_since_pts += copied; + + mpeg2_slice (&(mpeg2dec->decoder), mpeg2dec->code, + mpeg2dec->chunk_start); + mpeg2dec->code = mpeg2dec->buf_start[-1]; + mpeg2dec->chunk_ptr = mpeg2dec->chunk_start; + } + if ((unsigned) (mpeg2dec->code - 1) >= 0xb0 - 1) + break; + if (seek_chunk (mpeg2dec) == STATE_BUFFER) + return STATE_BUFFER; + } + + switch (mpeg2dec->code) { + case 0x00: + mpeg2dec->action = mpeg2_header_picture_start; + return mpeg2dec->state; + case 0xb7: + mpeg2dec->action = mpeg2_header_end; + break; + case 0xb3: + case 0xb8: + mpeg2dec->action = mpeg2_parse_header; + break; + default: + mpeg2dec->action = seek_chunk; + return STATE_INVALID; + } + return (mpeg2dec->state == STATE_SLICE) ? STATE_SLICE : STATE_INVALID; +} + +mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec) +{ + static int (* process_header[]) (mpeg2dec_t * mpeg2dec) = { + mpeg2_header_picture, mpeg2_header_extension, mpeg2_header_user_data, + mpeg2_header_sequence, NULL, NULL, NULL, NULL, mpeg2_header_gop + }; + int size_buffer, size_chunk, copied; + + mpeg2dec->action = mpeg2_parse_header; + while (1) { + size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start; + size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE - + mpeg2dec->chunk_ptr); + if (size_buffer <= size_chunk) { + copied = copy_chunk (mpeg2dec, size_buffer); + if (!copied) { + mpeg2dec->bytes_since_pts += size_buffer; + mpeg2dec->chunk_ptr += size_buffer; + return STATE_BUFFER; + } + } else { + copied = copy_chunk (mpeg2dec, size_chunk); + if (!copied) { + /* filled the chunk buffer without finding a start code */ + mpeg2dec->bytes_since_pts += size_chunk; + mpeg2dec->code = 0xb4; + mpeg2dec->action = seek_header; + return STATE_INVALID; + } + } + mpeg2dec->bytes_since_pts += copied; + + if (process_header[mpeg2dec->code & 0x0b] (mpeg2dec)) { + mpeg2dec->code = mpeg2dec->buf_start[-1]; + mpeg2dec->action = seek_header; + return STATE_INVALID; + } + + mpeg2dec->code = mpeg2dec->buf_start[-1]; + switch (RECEIVED (mpeg2dec->code, mpeg2dec->state)) { + + /* state transition after a sequence header */ + case RECEIVED (0x00, STATE_SEQUENCE): + mpeg2dec->action = mpeg2_header_picture_start; + case RECEIVED (0xb8, STATE_SEQUENCE): + mpeg2_header_sequence_finalize (mpeg2dec); + break; + + /* other legal state transitions */ + case RECEIVED (0x00, STATE_GOP): + mpeg2dec->action = mpeg2_header_picture_start; + break; + case RECEIVED (0x01, STATE_PICTURE): + case RECEIVED (0x01, STATE_PICTURE_2ND): + mpeg2_header_matrix_finalize (mpeg2dec); + mpeg2dec->action = mpeg2_header_slice_start; + break; + + /* legal headers within a given state */ + case RECEIVED (0xb2, STATE_SEQUENCE): + case RECEIVED (0xb2, STATE_GOP): + case RECEIVED (0xb2, STATE_PICTURE): + case RECEIVED (0xb2, STATE_PICTURE_2ND): + case RECEIVED (0xb5, STATE_SEQUENCE): + case RECEIVED (0xb5, STATE_PICTURE): + case RECEIVED (0xb5, STATE_PICTURE_2ND): + mpeg2dec->chunk_ptr = mpeg2dec->chunk_start; + continue; + + default: + mpeg2dec->action = seek_header; + return STATE_INVALID; + } + + mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + return mpeg2dec->state; + } +} + +void mpeg2_convert (mpeg2dec_t * mpeg2dec, + void (* convert) (int, int, uint32_t, void *, + struct convert_init_s *), void * arg) +{ + convert_init_t convert_init; + int size; + + convert_init.id = NULL; + convert (mpeg2dec->decoder.width, mpeg2dec->decoder.height, + mpeg2_accels, arg, &convert_init); + if (convert_init.id_size) { + convert_init.id = mpeg2dec->convert_id = + mpeg2_malloc (convert_init.id_size, ALLOC_CONVERT_ID); + convert (mpeg2dec->decoder.width, mpeg2dec->decoder.height, + mpeg2_accels, arg, &convert_init); + } + mpeg2dec->convert_size[0] = size = convert_init.buf_size[0]; + mpeg2dec->convert_size[1] = size += convert_init.buf_size[1]; + mpeg2dec->convert_size[2] = size += convert_init.buf_size[2]; + mpeg2dec->convert_start = convert_init.start; + mpeg2dec->convert_copy = convert_init.copy; + + size = mpeg2dec->decoder.width * mpeg2dec->decoder.height >> 2; + mpeg2dec->yuv_buf[0][0] = (uint8_t *) mpeg2_malloc (6 * size, ALLOC_YUV); + mpeg2dec->yuv_buf[0][1] = mpeg2dec->yuv_buf[0][0] + 4 * size; + mpeg2dec->yuv_buf[0][2] = mpeg2dec->yuv_buf[0][0] + 5 * size; + mpeg2dec->yuv_buf[1][0] = (uint8_t *) mpeg2_malloc (6 * size, ALLOC_YUV); + mpeg2dec->yuv_buf[1][1] = mpeg2dec->yuv_buf[1][0] + 4 * size; + mpeg2dec->yuv_buf[1][2] = mpeg2dec->yuv_buf[1][0] + 5 * size; + size = mpeg2dec->decoder.width * 8; + mpeg2dec->yuv_buf[2][0] = (uint8_t *) mpeg2_malloc (6 * size, ALLOC_YUV); + mpeg2dec->yuv_buf[2][1] = mpeg2dec->yuv_buf[2][0] + 4 * size; + mpeg2dec->yuv_buf[2][2] = mpeg2dec->yuv_buf[2][0] + 5 * size; +} + +void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id) +{ + mpeg2_fbuf_t * fbuf; + + if (mpeg2dec->custom_fbuf) { + mpeg2_set_fbuf (mpeg2dec, mpeg2dec->decoder.coding_type); + fbuf = mpeg2dec->fbuf[0]; + if (mpeg2dec->state == STATE_SEQUENCE) { + mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1]; + mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0]; + } + } else { + fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index].fbuf); + mpeg2dec->alloc_index_user = ++mpeg2dec->alloc_index; + } + fbuf->buf[0] = buf[0]; + fbuf->buf[1] = buf[1]; + fbuf->buf[2] = buf[2]; + fbuf->id = id; +} + +void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf) +{ + mpeg2dec->custom_fbuf = custom_fbuf; +} + +void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip) +{ + mpeg2dec->first_decode_slice = 1; + mpeg2dec->nb_decode_slices = skip ? 0 : (0xb0 - 1); +} + +void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end) +{ + start = (start < 1) ? 1 : (start > 0xb0) ? 0xb0 : start; + end = (end < start) ? start : (end > 0xb0) ? 0xb0 : end; + mpeg2dec->first_decode_slice = start; + mpeg2dec->nb_decode_slices = end - start; +} + +void mpeg2_pts (mpeg2dec_t * mpeg2dec, uint32_t pts) +{ + mpeg2dec->pts_previous = mpeg2dec->pts_current; + mpeg2dec->pts_current = pts; + mpeg2dec->num_pts++; + mpeg2dec->bytes_since_pts = 0; +} + +uint32_t mpeg2_accel (uint32_t accel) +{ + if (!mpeg2_accels) { + if (accel & MPEG2_ACCEL_DETECT) + accel |= mpeg2_detect_accel (); + mpeg2_accels = accel |= MPEG2_ACCEL_DETECT; + mpeg2_cpu_state_init (accel); + mpeg2_idct_init (accel); + mpeg2_mc_init (accel); + } + return mpeg2_accels & ~MPEG2_ACCEL_DETECT; +} + +mpeg2dec_t * mpeg2_init (void) +{ + mpeg2dec_t * mpeg2dec; + + mpeg2_accel (MPEG2_ACCEL_DETECT); + + mpeg2dec = (mpeg2dec_t *) mpeg2_malloc (sizeof (mpeg2dec_t), + ALLOC_MPEG2DEC); + if (mpeg2dec == NULL) + return NULL; + + memset (mpeg2dec, 0, sizeof (mpeg2dec_t)); + + mpeg2dec->chunk_buffer = (uint8_t *) mpeg2_malloc (BUFFER_SIZE + 4, + ALLOC_CHUNK); + + mpeg2dec->shift = 0xffffff00; + mpeg2dec->action = mpeg2_seek_sequence; + mpeg2dec->code = 0xb4; + mpeg2dec->first_decode_slice = 1; + mpeg2dec->nb_decode_slices = 0xb0 - 1; + mpeg2dec->convert_id = NULL; + + /* initialize substructures */ + mpeg2_header_state_init (mpeg2dec); + + return mpeg2dec; +} + +void mpeg2_close (mpeg2dec_t * mpeg2dec) +{ + int i; + + /* static uint8_t finalizer[] = {0,0,1,0xb4}; */ + /* mpeg2_decode_data (mpeg2dec, finalizer, finalizer+4); */ + + mpeg2_free (mpeg2dec->chunk_buffer); + if (!mpeg2dec->custom_fbuf) + for (i = mpeg2dec->alloc_index_user; i < mpeg2dec->alloc_index; i++) + mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[0]); + if (mpeg2dec->convert_start) + for (i = 0; i < 3; i++) + mpeg2_free (mpeg2dec->yuv_buf[i][0]); + if (mpeg2dec->convert_id) + mpeg2_free (mpeg2dec->convert_id); + mpeg2_free (mpeg2dec); +} diff --git a/src/libmpeg2new/libmpeg2/header.c b/src/libmpeg2new/libmpeg2/header.c new file mode 100644 index 000000000..55f16f1ee --- /dev/null +++ b/src/libmpeg2new/libmpeg2/header.c @@ -0,0 +1,725 @@ +/* + * header.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include +#include /* defines NULL */ +#include /* memcmp */ + +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include "convert.h" +#include "attributes.h" + +#define SEQ_EXT 2 +#define SEQ_DISPLAY_EXT 4 +#define QUANT_MATRIX_EXT 8 +#define COPYRIGHT_EXT 0x10 +#define PIC_DISPLAY_EXT 0x80 +#define PIC_CODING_EXT 0x100 + +/* default intra quant matrix, in zig-zag order */ +static const uint8_t default_intra_quantizer_matrix[64] ATTR_ALIGN(16) = { + 8, + 16, 16, + 19, 16, 19, + 22, 22, 22, 22, + 22, 22, 26, 24, 26, + 27, 27, 27, 26, 26, 26, + 26, 27, 27, 27, 29, 29, 29, + 34, 34, 34, 29, 29, 29, 27, 27, + 29, 29, 32, 32, 34, 34, 37, + 38, 37, 35, 35, 34, 35, + 38, 38, 40, 40, 40, + 48, 48, 46, 46, + 56, 56, 58, + 69, 69, + 83 +}; + +uint8_t mpeg2_scan_norm[64] ATTR_ALIGN(16) = { + /* Zig-Zag scan pattern */ + 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 +}; + +uint8_t mpeg2_scan_alt[64] ATTR_ALIGN(16) = { + /* Alternate scan pattern */ + 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49, + 41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43, + 51, 59, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 52, 60, 37, 45, + 53, 61, 22, 30, 7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63 +}; + +void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec) +{ + mpeg2dec->decoder.scan = mpeg2_scan_norm; + mpeg2dec->picture = mpeg2dec->pictures; + mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf; + mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf; + mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf; + mpeg2dec->first = 1; + mpeg2dec->alloc_index = 0; + mpeg2dec->alloc_index_user = 0; +} + +static void reset_info (mpeg2_info_t * info) +{ + info->current_picture = info->current_picture_2nd = NULL; + info->display_picture = info->display_picture_2nd = NULL; + info->current_fbuf = info->display_fbuf = info->discard_fbuf = NULL; + info->user_data = NULL; info->user_data_len = 0; +} + +int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); + static unsigned int frame_period[9] = { + 0, 1126125, 1125000, 1080000, 900900, 900000, 540000, 450450, 450000 + }; + int i; + + if ((buffer[6] & 0x20) != 0x20) /* missing marker_bit */ + return 1; + + i = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; + if (! (sequence->display_width = sequence->picture_width = i >> 12)) + return 1; + if (! (sequence->display_height = sequence->picture_height = i & 0xfff)) + return 1; + sequence->width = (sequence->picture_width + 15) & ~15; + sequence->height = (sequence->picture_height + 15) & ~15; + sequence->chroma_width = sequence->width >> 1; + sequence->chroma_height = sequence->height >> 1; + + sequence->flags = (SEQ_FLAG_PROGRESSIVE_SEQUENCE | + SEQ_VIDEO_FORMAT_UNSPECIFIED); + + sequence->pixel_width = buffer[3] >> 4; /* aspect ratio */ + sequence->frame_period = 0; + if ((buffer[3] & 15) < 9) + sequence->frame_period = frame_period[buffer[3] & 15]; + + sequence->byte_rate = (buffer[4]<<10) | (buffer[5]<<2) | (buffer[6]>>6); + + sequence->vbv_buffer_size = ((buffer[6]<<16)|(buffer[7]<<8))&0x1ff800; + + if (buffer[7] & 4) + sequence->flags |= SEQ_FLAG_CONSTRAINED_PARAMETERS; + + mpeg2dec->copy_matrix = 3; + if (buffer[7] & 2) { + for (i = 0; i < 64; i++) + mpeg2dec->intra_quantizer_matrix[mpeg2_scan_norm[i]] = + (buffer[i+7] << 7) | (buffer[i+8] >> 1); + buffer += 64; + } else + for (i = 0; i < 64; i++) + mpeg2dec->intra_quantizer_matrix[mpeg2_scan_norm[i]] = + default_intra_quantizer_matrix[i]; + + if (buffer[7] & 1) + for (i = 0; i < 64; i++) + mpeg2dec->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] = + buffer[i+8]; + else + for (i = 0; i < 64; i++) + mpeg2dec->non_intra_quantizer_matrix[i] = 16; + + sequence->profile_level_id = 0x80; + sequence->colour_primaries = 0; + sequence->transfer_characteristics = 0; + sequence->matrix_coefficients = 0; + + mpeg2dec->ext_state = SEQ_EXT; + mpeg2dec->state = STATE_SEQUENCE; + mpeg2dec->display_offset_x = mpeg2dec->display_offset_y = 0; + + reset_info (&(mpeg2dec->info)); + mpeg2dec->info.gop = NULL; + return 0; +} + +static int sequence_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); + uint32_t flags; + + if (!(buffer[3] & 1)) + return 1; + + sequence->profile_level_id = (buffer[0] << 4) | (buffer[1] >> 4); + + sequence->display_width = sequence->picture_width += + ((buffer[1] << 13) | (buffer[2] << 5)) & 0x3000; + sequence->display_height = sequence->picture_height += + (buffer[2] << 7) & 0x3000; + sequence->width = (sequence->picture_width + 15) & ~15; + sequence->height = (sequence->picture_height + 15) & ~15; + flags = sequence->flags | SEQ_FLAG_MPEG2; + if (!(buffer[1] & 8)) { + flags &= ~SEQ_FLAG_PROGRESSIVE_SEQUENCE; + sequence->height = (sequence->height + 31) & ~31; + } + if (buffer[5] & 0x80) + flags |= SEQ_FLAG_LOW_DELAY; + sequence->flags = flags; + sequence->chroma_width = sequence->width; + sequence->chroma_height = sequence->height; + switch (buffer[1] & 6) { + case 0: /* invalid */ + return 1; + case 2: /* 4:2:0 */ + sequence->chroma_height >>= 1; + case 4: /* 4:2:2 */ + sequence->chroma_width >>= 1; + } + + sequence->byte_rate += ((buffer[2]<<25) | (buffer[3]<<17)) & 0x3ffc0000; + + sequence->vbv_buffer_size |= buffer[4] << 21; + + sequence->frame_period = + sequence->frame_period * ((buffer[5]&31)+1) / (((buffer[5]>>2)&3)+1); + + mpeg2dec->ext_state = SEQ_DISPLAY_EXT; + + return 0; +} + +static int sequence_display_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); + uint32_t flags; + + flags = ((sequence->flags & ~SEQ_MASK_VIDEO_FORMAT) | + ((buffer[0]<<4) & SEQ_MASK_VIDEO_FORMAT)); + if (buffer[0] & 1) { + flags |= SEQ_FLAG_COLOUR_DESCRIPTION; + sequence->colour_primaries = buffer[1]; + sequence->transfer_characteristics = buffer[2]; + sequence->matrix_coefficients = buffer[3]; + buffer += 3; + } + + if (!(buffer[2] & 2)) /* missing marker_bit */ + return 1; + + sequence->display_width = (buffer[1] << 6) | (buffer[2] >> 2); + sequence->display_height = + ((buffer[2]& 1 ) << 13) | (buffer[3] << 5) | (buffer[4] >> 3); + + return 0; +} + +static inline void finalize_sequence (mpeg2_sequence_t * sequence) +{ + int width; + int height; + + sequence->byte_rate *= 50; + + if (sequence->flags & SEQ_FLAG_MPEG2) { + switch (sequence->pixel_width) { + case 1: /* square pixels */ + sequence->pixel_width = sequence->pixel_height = 1; return; + case 2: /* 4:3 aspect ratio */ + width = 4; height = 3; break; + case 3: /* 16:9 aspect ratio */ + width = 16; height = 9; break; + case 4: /* 2.21:1 aspect ratio */ + width = 221; height = 100; break; + default: /* illegal */ + sequence->pixel_width = sequence->pixel_height = 0; return; + } + width *= sequence->display_height; + height *= sequence->display_width; + + } else { + if (sequence->byte_rate == 50 * 0x3ffff) + sequence->byte_rate = 0; /* mpeg-1 VBR */ + + switch (sequence->pixel_width) { + case 0: case 15: /* illegal */ + sequence->pixel_width = sequence->pixel_height = 0; return; + case 1: /* square pixels */ + sequence->pixel_width = sequence->pixel_height = 1; return; + case 3: /* 720x576 16:9 */ + sequence->pixel_width = 64; sequence->pixel_height = 45; return; + case 6: /* 720x480 16:9 */ + sequence->pixel_width = 32; sequence->pixel_height = 27; return; + case 12: /* 720*480 4:3 */ + sequence->pixel_width = 8; sequence->pixel_height = 9; return; + default: + height = 88 * sequence->pixel_width + 1171; + width = 2000; + } + } + + sequence->pixel_width = width; + sequence->pixel_height = height; + while (width) { /* find greatest common divisor */ + int tmp = width; + width = height % tmp; + height = tmp; + } + sequence->pixel_width /= height; + sequence->pixel_height /= height; +} + +void mpeg2_header_matrix_finalize (mpeg2dec_t * mpeg2dec) +{ + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + int i; + + if (mpeg2dec->copy_matrix & 1) + for (i = 0; i < 64; i++) + decoder->intra_quantizer_matrix[i] = + mpeg2dec->intra_quantizer_matrix[i]; + if (mpeg2dec->copy_matrix & 2) + for (i = 0; i < 64; i++) + decoder->non_intra_quantizer_matrix[i] = + mpeg2dec->non_intra_quantizer_matrix[i]; +} + +void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec) +{ + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + + finalize_sequence (sequence); + + mpeg2_header_matrix_finalize (mpeg2dec); + decoder->mpeg1 = !(sequence->flags & SEQ_FLAG_MPEG2); + decoder->width = sequence->width; + decoder->height = sequence->height; + decoder->vertical_position_extension = (sequence->picture_height > 2800); + + /* + * according to 6.1.1.6, repeat sequence headers should be + * identical to the original. However some DVDs dont respect that + * and have different bitrates in the repeat sequence headers. So + * we'll ignore that in the comparison and still consider these as + * repeat sequence headers. + */ + mpeg2dec->sequence.byte_rate = sequence->byte_rate; + if (!memcmp (&(mpeg2dec->sequence), sequence, sizeof (mpeg2_sequence_t))) + mpeg2dec->state = STATE_SEQUENCE_REPEATED; + mpeg2dec->sequence = *sequence; + + mpeg2dec->info.sequence = &(mpeg2dec->sequence); +} + +int mpeg2_header_gop (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_gop_t * gop = &(mpeg2dec->gop); + + reset_info (&(mpeg2dec->info)); + if (! (buffer[1] & 8)) + return 1; + mpeg2dec->info.gop = gop; + gop->hours = (buffer[0] >> 2) & 31; + gop->minutes = ((buffer[0] << 4) | (buffer[1] >> 4)) & 63; + gop->seconds = ((buffer[1] << 3) | (buffer[2] >> 5)) & 63; + gop->pictures = ((buffer[2] << 1) | (buffer[3] >> 7)) & 63; + gop->flags = (buffer[0] >> 7) | ((buffer[3] >> 4) & 6); + mpeg2dec->state = STATE_GOP; + return 0; +} + +void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int coding_type) +{ + int i; + + for (i = 0; i < 3; i++) + if (mpeg2dec->fbuf[1] != &mpeg2dec->fbuf_alloc[i].fbuf && + mpeg2dec->fbuf[2] != &mpeg2dec->fbuf_alloc[i].fbuf) { + mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[i].fbuf; + mpeg2dec->info.current_fbuf = mpeg2dec->fbuf[0]; + if ((coding_type == B_TYPE) || + (mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { + if ((coding_type == B_TYPE) || (mpeg2dec->convert_start)) + mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0]; + mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0]; + } + break; + } +} + +mpeg2_state_t mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec) +{ + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + mpeg2_picture_t * picture; + + if (mpeg2dec->state != STATE_SLICE_1ST) { + mpeg2dec->state = STATE_PICTURE; + picture = mpeg2dec->pictures; + if ((decoder->coding_type != PIC_FLAG_CODING_TYPE_B) ^ + (mpeg2dec->picture >= mpeg2dec->pictures + 2)) + picture += 2; + } else { + mpeg2dec->state = STATE_PICTURE_2ND; + picture = mpeg2dec->picture + 1; /* second field picture */ + } + mpeg2dec->picture = picture; + picture->flags = 0; + if (mpeg2dec->num_pts) { + if (mpeg2dec->bytes_since_pts >= 4) { + mpeg2dec->num_pts = 0; + picture->pts = mpeg2dec->pts_current; + picture->flags = PIC_FLAG_PTS; + } else if (mpeg2dec->num_pts > 1) { + mpeg2dec->num_pts = 1; + picture->pts = mpeg2dec->pts_previous; + picture->flags = PIC_FLAG_PTS; + } + } + picture->display_offset[0].x = picture->display_offset[1].x = + picture->display_offset[2].x = mpeg2dec->display_offset_x; + picture->display_offset[0].y = picture->display_offset[1].y = + picture->display_offset[2].y = mpeg2dec->display_offset_y; + return mpeg2_parse_header (mpeg2dec); +} + +int mpeg2_header_picture (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_picture_t * picture = mpeg2dec->picture; + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + int type; + int low_delay; + + type = (buffer [1] >> 3) & 7; + low_delay = mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY; + + if (mpeg2dec->state == STATE_PICTURE) { + mpeg2_picture_t * other; + + decoder->second_field = 0; + other = mpeg2dec->pictures; + if (other == picture) + other += 2; + if (decoder->coding_type != PIC_FLAG_CODING_TYPE_B) { + mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1]; + mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0]; + } + mpeg2dec->fbuf[0] = NULL; + reset_info (&(mpeg2dec->info)); + mpeg2dec->info.current_picture = picture; + mpeg2dec->info.display_picture = picture; + if (type != PIC_FLAG_CODING_TYPE_B) { + if (!low_delay) { + if (mpeg2dec->first) { + mpeg2dec->info.display_picture = NULL; + mpeg2dec->first = 0; + } else { + mpeg2dec->info.display_picture = other; + if (other->nb_fields == 1) + mpeg2dec->info.display_picture_2nd = other + 1; + mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1]; + } + } + if (!low_delay + !mpeg2dec->convert_start) + mpeg2dec->info.discard_fbuf = + mpeg2dec->fbuf[!low_delay + !mpeg2dec->convert_start]; + } + if (!mpeg2dec->custom_fbuf) { + while (mpeg2dec->alloc_index < 3) { + mpeg2_fbuf_t * fbuf; + + fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf); + fbuf->id = NULL; + if (mpeg2dec->convert_start) { + fbuf->buf[0] = + (uint8_t *) mpeg2_malloc (mpeg2dec->convert_size[0], + ALLOC_CONVERTED); + fbuf->buf[1] = fbuf->buf[0] + mpeg2dec->convert_size[1]; + fbuf->buf[2] = fbuf->buf[0] + mpeg2dec->convert_size[2]; + } else { + int size; + size = mpeg2dec->decoder.width * mpeg2dec->decoder.height; + fbuf->buf[0] = (uint8_t *) mpeg2_malloc (6 * size >> 2, + ALLOC_YUV); + fbuf->buf[1] = fbuf->buf[0] + size; + fbuf->buf[2] = fbuf->buf[1] + (size >> 2); + } + } + mpeg2_set_fbuf (mpeg2dec, type); + } + } else { + decoder->second_field = 1; + mpeg2dec->info.current_picture_2nd = picture; + mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; + if (low_delay || type == PIC_FLAG_CODING_TYPE_B) + mpeg2dec->info.display_picture_2nd = picture; + } + mpeg2dec->ext_state = PIC_CODING_EXT; + + picture->temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6); + + decoder->coding_type = type; + picture->flags |= type; + + if (type == PIC_FLAG_CODING_TYPE_P || type == PIC_FLAG_CODING_TYPE_B) { + /* forward_f_code and backward_f_code - used in mpeg1 only */ + decoder->f_motion.f_code[1] = (buffer[3] >> 2) & 1; + decoder->f_motion.f_code[0] = + (((buffer[3] << 1) | (buffer[4] >> 7)) & 7) - 1; + decoder->b_motion.f_code[1] = (buffer[4] >> 6) & 1; + decoder->b_motion.f_code[0] = ((buffer[4] >> 3) & 7) - 1; + } + + /* XXXXXX decode extra_information_picture as well */ + + picture->nb_fields = 2; + + decoder->intra_dc_precision = 0; + decoder->frame_pred_frame_dct = 1; + decoder->q_scale_type = 0; + decoder->concealment_motion_vectors = 0; + decoder->scan = mpeg2_scan_norm; + decoder->picture_structure = FRAME_PICTURE; + mpeg2dec->copy_matrix = 0; + + return 0; +} + +static int picture_coding_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_picture_t * picture = mpeg2dec->picture; + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + uint32_t flags; + + /* pre subtract 1 for use later in compute_motion_vector */ + decoder->f_motion.f_code[0] = (buffer[0] & 15) - 1; + decoder->f_motion.f_code[1] = (buffer[1] >> 4) - 1; + decoder->b_motion.f_code[0] = (buffer[1] & 15) - 1; + decoder->b_motion.f_code[1] = (buffer[2] >> 4) - 1; + + flags = picture->flags; + decoder->intra_dc_precision = (buffer[2] >> 2) & 3; + decoder->picture_structure = buffer[2] & 3; + switch (decoder->picture_structure) { + case TOP_FIELD: + flags |= PIC_FLAG_TOP_FIELD_FIRST; + case BOTTOM_FIELD: + picture->nb_fields = 1; + break; + case FRAME_PICTURE: + if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) { + picture->nb_fields = (buffer[3] & 2) ? 3 : 2; + flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0; + } else + picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2; + break; + default: + return 1; + } + decoder->top_field_first = buffer[3] >> 7; + decoder->frame_pred_frame_dct = (buffer[3] >> 6) & 1; + decoder->concealment_motion_vectors = (buffer[3] >> 5) & 1; + decoder->q_scale_type = (buffer[3] >> 4) & 1; + decoder->intra_vlc_format = (buffer[3] >> 3) & 1; + decoder->scan = (buffer[3] & 4) ? mpeg2_scan_alt : mpeg2_scan_norm; + flags |= (buffer[4] & 0x80) ? PIC_FLAG_PROGRESSIVE_FRAME : 0; + if (buffer[4] & 0x40) + flags |= (((buffer[4]<<26) | (buffer[5]<<18) | (buffer[6]<<10)) & + PIC_MASK_COMPOSITE_DISPLAY) | PIC_FLAG_COMPOSITE_DISPLAY; + picture->flags = flags; + + mpeg2dec->ext_state = PIC_DISPLAY_EXT | COPYRIGHT_EXT | QUANT_MATRIX_EXT; + + return 0; +} + +static int picture_display_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_picture_t * picture = mpeg2dec->picture; + int i, nb_pos; + + nb_pos = picture->nb_fields; + if (mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE) + nb_pos >>= 1; + + for (i = 0; i < nb_pos; i++) { + int x, y; + + x = ((buffer[4*i] << 24) | (buffer[4*i+1] << 16) | + (buffer[4*i+2] << 8) | buffer[4*i+3]) >> (11-2*i); + y = ((buffer[4*i+2] << 24) | (buffer[4*i+3] << 16) | + (buffer[4*i+4] << 8) | buffer[4*i+5]) >> (10-2*i); + if (! (x & y & 1)) + return 1; + picture->display_offset[i].x = mpeg2dec->display_offset_x = x >> 1; + picture->display_offset[i].y = mpeg2dec->display_offset_y = y >> 1; + } + for (; i < 3; i++) { + picture->display_offset[i].x = mpeg2dec->display_offset_x; + picture->display_offset[i].y = mpeg2dec->display_offset_y; + } + return 0; +} + +static int copyright_ext (mpeg2dec_t * mpeg2dec) +{ + return 0; +} + +static int quant_matrix_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + int i; + + if (buffer[0] & 8) { + for (i = 0; i < 64; i++) + mpeg2dec->intra_quantizer_matrix[mpeg2_scan_norm[i]] = + (buffer[i] << 5) | (buffer[i+1] >> 3); + mpeg2dec->copy_matrix |= 1; + buffer += 64; + } + + if (buffer[0] & 4) { + for (i = 0; i < 64; i++) + mpeg2dec->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] = + (buffer[i] << 6) | (buffer[i+1] >> 2); + mpeg2dec->copy_matrix |= 2; + } + + return 0; +} + +int mpeg2_header_extension (mpeg2dec_t * mpeg2dec) +{ + static int (* parser[]) (mpeg2dec_t *) = { + 0, sequence_ext, sequence_display_ext, quant_matrix_ext, + copyright_ext, 0, 0, picture_display_ext, picture_coding_ext + }; + int ext, ext_bit; + + ext = mpeg2dec->chunk_start[0] >> 4; + ext_bit = 1 << ext; + + if (!(mpeg2dec->ext_state & ext_bit)) + return 0; /* ignore illegal extensions */ + mpeg2dec->ext_state &= ~ext_bit; + return parser[ext] (mpeg2dec); +} + +int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec) +{ + if (!mpeg2dec->info.user_data_len) + mpeg2dec->info.user_data = mpeg2dec->chunk_start; + else + mpeg2dec->info.user_data_len += 3; + mpeg2dec->info.user_data_len += (mpeg2dec->chunk_ptr - 4 - + mpeg2dec->chunk_start); + mpeg2dec->chunk_start = mpeg2dec->chunk_ptr - 1; + + return 0; +} + +mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec) +{ + mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; + mpeg2dec->state = ((mpeg2dec->picture->nb_fields > 1 || + mpeg2dec->state == STATE_PICTURE_2ND) ? + STATE_SLICE : STATE_SLICE_1ST); + + if (!(mpeg2dec->nb_decode_slices)) + mpeg2dec->picture->flags |= PIC_FLAG_SKIP; + else if (mpeg2dec->convert_start) { + int flags; + + switch (mpeg2dec->decoder.picture_structure) { + case TOP_FIELD: flags = CONVERT_TOP_FIELD; break; + case BOTTOM_FIELD: flags = CONVERT_BOTTOM_FIELD; break; + default: + flags = + ((mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE) ? + CONVERT_FRAME : CONVERT_BOTH_FIELDS); + } + mpeg2dec->convert_start (mpeg2dec->convert_id, + mpeg2dec->fbuf[0]->buf, flags); + + mpeg2dec->decoder.convert = mpeg2dec->convert_copy; + mpeg2dec->decoder.fbuf_id = mpeg2dec->convert_id; + + if (mpeg2dec->decoder.coding_type == B_TYPE) + mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->yuv_buf[2], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index]); + else { + mpeg2_init_fbuf (&(mpeg2dec->decoder), + mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index]); + if (mpeg2dec->state == STATE_SLICE) + mpeg2dec->yuv_index ^= 1; + } + } else { + int b_type; + + mpeg2dec->decoder.convert = NULL; + b_type = (mpeg2dec->decoder.coding_type == B_TYPE); + mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->fbuf[0]->buf, + mpeg2dec->fbuf[b_type + 1]->buf, + mpeg2dec->fbuf[b_type]->buf); + } + mpeg2dec->action = NULL; + return (mpeg2_state_t)-1; +} + +mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec) +{ + mpeg2_picture_t * picture; + int b_type; + + b_type = (mpeg2dec->decoder.coding_type == B_TYPE); + picture = mpeg2dec->pictures; + if ((mpeg2dec->picture >= picture + 2) ^ b_type) + picture = mpeg2dec->pictures + 2; + + mpeg2dec->state = STATE_END; + reset_info (&(mpeg2dec->info)); + if (!(mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { + mpeg2dec->info.display_picture = picture; + if (picture->nb_fields == 1) + mpeg2dec->info.display_picture_2nd = picture + 1; + mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[b_type]; + if (!mpeg2dec->convert_start) + mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type + 1]; + } else if (!mpeg2dec->convert_start) + mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type]; + mpeg2dec->action = mpeg2_seek_sequence; + mpeg2dec->first = 1; + return STATE_END; +} diff --git a/src/libmpeg2new/libmpeg2/idct.c b/src/libmpeg2new/libmpeg2/idct.c new file mode 100644 index 000000000..932efcf2a --- /dev/null +++ b/src/libmpeg2new/libmpeg2/idct.c @@ -0,0 +1,294 @@ +/* + * idct.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include +#include + +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include "attributes.h" + +#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ +#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ +#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ +#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */ +#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ +#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ + +/* idct main entry point */ +void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); +void (* mpeg2_idct_add) (int last, int16_t * block, + uint8_t * dest, int stride); + +/* + * In legal streams, the IDCT output should be between -384 and +384. + * In corrupted streams, it is possible to force the IDCT output to go + * to +-3826 - this is the worst case for a column IDCT where the + * column inputs are 16-bit values. + */ +uint8_t mpeg2_clip[3840 * 2 + 256]; +#define CLIP(i) ((mpeg2_clip + 3840)[i]) + +#if 0 +#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ +do { \ + t0 = W0 * d0 + W1 * d1; \ + t1 = W0 * d1 - W1 * d0; \ +} while (0) +#else +#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ +do { \ + int tmp = W0 * (d0 + d1); \ + t0 = tmp + (W1 - W0) * d1; \ + t1 = tmp - (W1 + W0) * d0; \ +} while (0) +#endif + +static void inline idct_row (int16_t * const block) +{ + int d0, d1, d2, d3; + int a0, a1, a2, a3, b0, b1, b2, b3; + int t0, t1, t2, t3; + + /* shortcut */ + if (likely (!(block[1] | ((int32_t *)block)[1] | ((int32_t *)block)[2] | + ((int32_t *)block)[3]))) { + uint32_t tmp = (uint16_t) (block[0] << 3); + tmp |= tmp << 16; + ((int32_t *)block)[0] = tmp; + ((int32_t *)block)[1] = tmp; + ((int32_t *)block)[2] = tmp; + ((int32_t *)block)[3] = tmp; + return; + } + + d0 = (block[0] << 11) + 128; + d1 = block[1]; + d2 = block[2] << 11; + d3 = block[3]; + t0 = d0 + d2; + t1 = d0 - d2; + BUTTERFLY (t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + + d0 = block[4]; + d1 = block[5]; + d2 = block[6]; + d3 = block[7]; + BUTTERFLY (t0, t1, W7, W1, d3, d0); + BUTTERFLY (t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) * 181) >> 8; + b2 = ((t0 - t1) * 181) >> 8; + + block[0] = (a0 + b0) >> 8; + block[1] = (a1 + b1) >> 8; + block[2] = (a2 + b2) >> 8; + block[3] = (a3 + b3) >> 8; + block[4] = (a3 - b3) >> 8; + block[5] = (a2 - b2) >> 8; + block[6] = (a1 - b1) >> 8; + block[7] = (a0 - b0) >> 8; +} + +static void inline idct_col (int16_t * const block) +{ + int d0, d1, d2, d3; + int a0, a1, a2, a3, b0, b1, b2, b3; + int t0, t1, t2, t3; + + d0 = (block[8*0] << 11) + 65536; + d1 = block[8*1]; + d2 = block[8*2] << 11; + d3 = block[8*3]; + t0 = d0 + d2; + t1 = d0 - d2; + BUTTERFLY (t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + + d0 = block[8*4]; + d1 = block[8*5]; + d2 = block[8*6]; + d3 = block[8*7]; + BUTTERFLY (t0, t1, W7, W1, d3, d0); + BUTTERFLY (t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 = (t0 - t2) >> 8; + t1 = (t1 - t3) >> 8; + b1 = (t0 + t1) * 181; + b2 = (t0 - t1) * 181; + + block[8*0] = (a0 + b0) >> 17; + block[8*1] = (a1 + b1) >> 17; + block[8*2] = (a2 + b2) >> 17; + block[8*3] = (a3 + b3) >> 17; + block[8*4] = (a3 - b3) >> 17; + block[8*5] = (a2 - b2) >> 17; + block[8*6] = (a1 - b1) >> 17; + block[8*7] = (a0 - b0) >> 17; +} + +static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, + const int stride) +{ + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + do { + dest[0] = CLIP (block[0]); + dest[1] = CLIP (block[1]); + dest[2] = CLIP (block[2]); + dest[3] = CLIP (block[3]); + dest[4] = CLIP (block[4]); + dest[5] = CLIP (block[5]); + dest[6] = CLIP (block[6]); + dest[7] = CLIP (block[7]); + + block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; + block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; + + dest += stride; + block += 8; + } while (--i); +} + +static void mpeg2_idct_add_c (const int last, int16_t * block, + uint8_t * dest, const int stride) +{ + int i; + + if (last != 129 || (block[0] & 7) == 4) { + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + do { + dest[0] = CLIP (block[0] + dest[0]); + dest[1] = CLIP (block[1] + dest[1]); + dest[2] = CLIP (block[2] + dest[2]); + dest[3] = CLIP (block[3] + dest[3]); + dest[4] = CLIP (block[4] + dest[4]); + dest[5] = CLIP (block[5] + dest[5]); + dest[6] = CLIP (block[6] + dest[6]); + dest[7] = CLIP (block[7] + dest[7]); + + block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; + block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; + + dest += stride; + block += 8; + } while (--i); + } else { + int DC; + + DC = (block[0] + 4) >> 3; + block[0] = block[63] = 0; + i = 8; + do { + dest[0] = CLIP (DC + dest[0]); + dest[1] = CLIP (DC + dest[1]); + dest[2] = CLIP (DC + dest[2]); + dest[3] = CLIP (DC + dest[3]); + dest[4] = CLIP (DC + dest[4]); + dest[5] = CLIP (DC + dest[5]); + dest[6] = CLIP (DC + dest[6]); + dest[7] = CLIP (DC + dest[7]); + dest += stride; + } while (--i); + } +} + +void mpeg2_idct_init (uint32_t accel) +{ +#ifdef ARCH_X86 + if (accel & MPEG2_ACCEL_X86_MMXEXT) { + mpeg2_idct_copy = mpeg2_idct_copy_mmxext; + mpeg2_idct_add = mpeg2_idct_add_mmxext; + mpeg2_idct_mmx_init (); + } else if (accel & MPEG2_ACCEL_X86_MMX) { + mpeg2_idct_copy = mpeg2_idct_copy_mmx; + mpeg2_idct_add = mpeg2_idct_add_mmx; + mpeg2_idct_mmx_init (); + } else +#endif +#ifdef ARCH_PPC + if (accel & MPEG2_ACCEL_PPC_ALTIVEC) { + mpeg2_idct_copy = mpeg2_idct_copy_altivec; + mpeg2_idct_add = mpeg2_idct_add_altivec; + mpeg2_idct_altivec_init (); + } else +#endif +#ifdef ARCH_ALPHA + if (accel & MPEG2_ACCEL_ALPHA_MVI) { + mpeg2_idct_copy = mpeg2_idct_copy_mvi; + mpeg2_idct_add = mpeg2_idct_add_mvi; + mpeg2_idct_alpha_init (); + } else if (accel & MPEG2_ACCEL_ALPHA) { + int i; + + mpeg2_idct_copy = mpeg2_idct_copy_alpha; + mpeg2_idct_add = mpeg2_idct_add_alpha; + mpeg2_idct_alpha_init (); + for (i = -3840; i < 3840 + 256; i++) + CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i); + } else +#endif +#ifdef LIBMPEG2_MLIB + if (accel & MPEG2_ACCEL_MLIB) { + mpeg2_idct_copy = mpeg2_idct_copy_mlib_non_ieee; + mpeg2_idct_add = (getenv ("MLIB_NON_IEEE") ? + mpeg2_idct_add_mlib_non_ieee : mpeg2_idct_add_mlib); + } else +#endif + { + extern uint8_t mpeg2_scan_norm[64]; + extern uint8_t mpeg2_scan_alt[64]; + int i, j; + + mpeg2_idct_copy = mpeg2_idct_copy_c; + mpeg2_idct_add = mpeg2_idct_add_c; + for (i = -3840; i < 3840 + 256; i++) + CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i); + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + } + } +} diff --git a/src/libmpeg2new/libmpeg2/idct_alpha.c b/src/libmpeg2new/libmpeg2/idct_alpha.c new file mode 100644 index 000000000..68c605508 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/idct_alpha.c @@ -0,0 +1,377 @@ +/* + * idct_alpha.c + * Copyright (C) 2002-2003 Falk Hueffner + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_ALPHA + +#include +#include + +#include "alpha_asm.h" +#include "attributes.h" + +#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ +#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ +#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ +#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */ +#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ +#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ + +extern uint8_t mpeg2_clip[3840 * 2 + 256]; +#define CLIP(i) ((mpeg2_clip + 3840)[i]) + +#if 0 +#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ +do { \ + t0 = W0 * d0 + W1 * d1; \ + t1 = W0 * d1 - W1 * d0; \ +} while (0) +#else +#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ +do { \ + int_fast32_t tmp = W0 * (d0 + d1); \ + t0 = tmp + (W1 - W0) * d1; \ + t1 = tmp - (W1 + W0) * d0; \ +} while (0) +#endif + +static void inline idct_row (int16_t * const block) +{ + uint64_t l, r; + int_fast32_t d0, d1, d2, d3; + int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; + int_fast32_t t0, t1, t2, t3; + + l = ldq (block); + r = ldq (block + 4); + + /* shortcut */ + if (likely (!((l & ~0xffffUL) | r))) { + uint64_t tmp = (uint16_t) (l << 3); + tmp |= tmp << 16; + tmp |= tmp << 32; + ((int32_t *)block)[0] = tmp; + ((int32_t *)block)[1] = tmp; + ((int32_t *)block)[2] = tmp; + ((int32_t *)block)[3] = tmp; + return; + } + + d0 = (sextw (l) << 11) + 128; + d1 = sextw (extwl (l, 2)); + d2 = sextw (extwl (l, 4)) << 11; + d3 = sextw (extwl (l, 6)); + t0 = d0 + d2; + t1 = d0 - d2; + BUTTERFLY (t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + + d0 = sextw (r); + d1 = sextw (extwl (r, 2)); + d2 = sextw (extwl (r, 4)); + d3 = sextw (extwl (r, 6)); + BUTTERFLY (t0, t1, W7, W1, d3, d0); + BUTTERFLY (t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) * 181) >> 8; + b2 = ((t0 - t1) * 181) >> 8; + + block[0] = (a0 + b0) >> 8; + block[1] = (a1 + b1) >> 8; + block[2] = (a2 + b2) >> 8; + block[3] = (a3 + b3) >> 8; + block[4] = (a3 - b3) >> 8; + block[5] = (a2 - b2) >> 8; + block[6] = (a1 - b1) >> 8; + block[7] = (a0 - b0) >> 8; +} + +static void inline idct_col (int16_t * const block) +{ + int_fast32_t d0, d1, d2, d3; + int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; + int_fast32_t t0, t1, t2, t3; + + d0 = (block[8*0] << 11) + 65536; + d1 = block[8*1]; + d2 = block[8*2] << 11; + d3 = block[8*3]; + t0 = d0 + d2; + t1 = d0 - d2; + BUTTERFLY (t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + + d0 = block[8*4]; + d1 = block[8*5]; + d2 = block[8*6]; + d3 = block[8*7]; + BUTTERFLY (t0, t1, W7, W1, d3, d0); + BUTTERFLY (t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 = (t0 - t2) >> 8; + t1 = (t1 - t3) >> 8; + b1 = (t0 + t1) * 181; + b2 = (t0 - t1) * 181; + + block[8*0] = (a0 + b0) >> 17; + block[8*1] = (a1 + b1) >> 17; + block[8*2] = (a2 + b2) >> 17; + block[8*3] = (a3 + b3) >> 17; + block[8*4] = (a3 - b3) >> 17; + block[8*5] = (a2 - b2) >> 17; + block[8*6] = (a1 - b1) >> 17; + block[8*7] = (a0 - b0) >> 17; +} + +void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, const int stride) +{ + uint64_t clampmask; + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + + for (i = 0; i < 8; i++) + idct_col (block + i); + + clampmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */ + do { + uint64_t shorts0, shorts1; + + shorts0 = ldq (block); + shorts0 = maxsw4 (shorts0, 0); + shorts0 = minsw4 (shorts0, clampmask); + stl (pkwb (shorts0), dest); + + shorts1 = ldq (block + 4); + shorts1 = maxsw4 (shorts1, 0); + shorts1 = minsw4 (shorts1, clampmask); + stl (pkwb (shorts1), dest + 4); + + stq (0, block); + stq (0, block + 4); + + dest += stride; + block += 8; + } while (--i); +} + +void mpeg2_idct_add_mvi (const int last, int16_t * block, + uint8_t * dest, const int stride) +{ + uint64_t clampmask; + uint64_t signmask; + int i; + + if (last != 129 || (block[0] & 7) == 4) { + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + clampmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */ + signmask = zap (-1, 0x33); + signmask ^= signmask >> 1; /* 0x8000800080008000 */ + + do { + uint64_t shorts0, pix0, signs0; + uint64_t shorts1, pix1, signs1; + + shorts0 = ldq (block); + shorts1 = ldq (block + 4); + + pix0 = unpkbw (ldl (dest)); + /* signed subword add (MMX paddw). */ + signs0 = shorts0 & signmask; + shorts0 &= ~signmask; + shorts0 += pix0; + shorts0 ^= signs0; + /* clamp. */ + shorts0 = maxsw4 (shorts0, 0); + shorts0 = minsw4 (shorts0, clampmask); + + /* next 4. */ + pix1 = unpkbw (ldl (dest + 4)); + signs1 = shorts1 & signmask; + shorts1 &= ~signmask; + shorts1 += pix1; + shorts1 ^= signs1; + shorts1 = maxsw4 (shorts1, 0); + shorts1 = minsw4 (shorts1, clampmask); + + stl (pkwb (shorts0), dest); + stl (pkwb (shorts1), dest + 4); + stq (0, block); + stq (0, block + 4); + + dest += stride; + block += 8; + } while (--i); + } else { + int DC; + uint64_t p0, p1, p2, p3, p4, p5, p6, p7; + uint64_t DCs; + + DC = (block[0] + 4) >> 3; + block[0] = block[63] = 0; + + p0 = ldq (dest + 0 * stride); + p1 = ldq (dest + 1 * stride); + p2 = ldq (dest + 2 * stride); + p3 = ldq (dest + 3 * stride); + p4 = ldq (dest + 4 * stride); + p5 = ldq (dest + 5 * stride); + p6 = ldq (dest + 6 * stride); + p7 = ldq (dest + 7 * stride); + + if (DC > 0) { + DCs = BYTE_VEC (likely (DC <= 255) ? DC : 255); + p0 += minub8 (DCs, ~p0); + p1 += minub8 (DCs, ~p1); + p2 += minub8 (DCs, ~p2); + p3 += minub8 (DCs, ~p3); + p4 += minub8 (DCs, ~p4); + p5 += minub8 (DCs, ~p5); + p6 += minub8 (DCs, ~p6); + p7 += minub8 (DCs, ~p7); + } else { + DCs = BYTE_VEC (likely (-DC <= 255) ? -DC : 255); + p0 -= minub8 (DCs, p0); + p1 -= minub8 (DCs, p1); + p2 -= minub8 (DCs, p2); + p3 -= minub8 (DCs, p3); + p4 -= minub8 (DCs, p4); + p5 -= minub8 (DCs, p5); + p6 -= minub8 (DCs, p6); + p7 -= minub8 (DCs, p7); + } + + stq (p0, dest + 0 * stride); + stq (p1, dest + 1 * stride); + stq (p2, dest + 2 * stride); + stq (p3, dest + 3 * stride); + stq (p4, dest + 4 * stride); + stq (p5, dest + 5 * stride); + stq (p6, dest + 6 * stride); + stq (p7, dest + 7 * stride); + } +} + +void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, const int stride) +{ + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + do { + dest[0] = CLIP (block[0]); + dest[1] = CLIP (block[1]); + dest[2] = CLIP (block[2]); + dest[3] = CLIP (block[3]); + dest[4] = CLIP (block[4]); + dest[5] = CLIP (block[5]); + dest[6] = CLIP (block[6]); + dest[7] = CLIP (block[7]); + + stq(0, block); + stq(0, block + 4); + + dest += stride; + block += 8; + } while (--i); +} + +void mpeg2_idct_add_alpha (const int last, int16_t * block, + uint8_t * dest, const int stride) +{ + int i; + + if (last != 129 || (block[0] & 7) == 4) { + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + do { + dest[0] = CLIP (block[0] + dest[0]); + dest[1] = CLIP (block[1] + dest[1]); + dest[2] = CLIP (block[2] + dest[2]); + dest[3] = CLIP (block[3] + dest[3]); + dest[4] = CLIP (block[4] + dest[4]); + dest[5] = CLIP (block[5] + dest[5]); + dest[6] = CLIP (block[6] + dest[6]); + dest[7] = CLIP (block[7] + dest[7]); + + stq(0, block); + stq(0, block + 4); + + dest += stride; + block += 8; + } while (--i); + } else { + int DC; + + DC = (block[0] + 4) >> 3; + block[0] = block[63] = 0; + i = 8; + do { + dest[0] = CLIP (DC + dest[0]); + dest[1] = CLIP (DC + dest[1]); + dest[2] = CLIP (DC + dest[2]); + dest[3] = CLIP (DC + dest[3]); + dest[4] = CLIP (DC + dest[4]); + dest[5] = CLIP (DC + dest[5]); + dest[6] = CLIP (DC + dest[6]); + dest[7] = CLIP (DC + dest[7]); + dest += stride; + } while (--i); + } +} + +void mpeg2_idct_alpha_init (void) +{ + extern uint8_t mpeg2_scan_norm[64]; + extern uint8_t mpeg2_scan_alt[64]; + int i, j; + + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + } +} + +#endif /* ARCH_ALPHA */ diff --git a/src/libmpeg2new/libmpeg2/idct_altivec.c b/src/libmpeg2new/libmpeg2/idct_altivec.c new file mode 100644 index 000000000..e9fc28bc4 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/idct_altivec.c @@ -0,0 +1,260 @@ +/* + * idct_altivec.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_PPC + +#ifdef HAVE_ALTIVEC_H +#include +#endif +#include + +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include "attributes.h" + +typedef vector signed char vector_s8_t; +typedef vector unsigned char vector_u8_t; +typedef vector signed short vector_s16_t; +typedef vector unsigned short vector_u16_t; +typedef vector signed int vector_s32_t; +typedef vector unsigned int vector_u32_t; + +#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303) +/* work around gcc <3.3 vec_mergel bug */ +static inline vector_s16_t my_vec_mergel (vector_s16_t const A, + vector_s16_t const B) +{ + static const vector_u8_t mergel = { + 0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b, + 0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f + }; + return vec_perm (A, B, mergel); +} +#undef vec_mergel +#define vec_mergel my_vec_mergel +#endif + +#ifdef HAVE_ALTIVEC_H /* gnu */ +#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h} +#else /* apple */ +#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h) +#endif + +static const vector_s16_t constants ATTR_ALIGN(16) = + VEC_S16 (23170, 13573, 6518, 21895, -23170, -21895, 32, 31); +static const vector_s16_t constants_1 ATTR_ALIGN(16) = + VEC_S16 (16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725); +static const vector_s16_t constants_2 ATTR_ALIGN(16) = + VEC_S16 (22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521); +static const vector_s16_t constants_3 ATTR_ALIGN(16) = + VEC_S16 (21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692); +static const vector_s16_t constants_4 ATTR_ALIGN(16) = + VEC_S16 (19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722); + +#define IDCT_HALF \ + /* 1st stage */ \ + t1 = vec_mradds (a1, vx7, vx1 ); \ + t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ + t7 = vec_mradds (a2, vx5, vx3); \ + t3 = vec_mradds (ma2, vx3, vx5); \ + \ + /* 2nd stage */ \ + t5 = vec_adds (vx0, vx4); \ + t0 = vec_subs (vx0, vx4); \ + t2 = vec_mradds (a0, vx6, vx2); \ + t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \ + t6 = vec_adds (t8, t3); \ + t3 = vec_subs (t8, t3); \ + t8 = vec_subs (t1, t7); \ + t1 = vec_adds (t1, t7); \ + \ + /* 3rd stage */ \ + t7 = vec_adds (t5, t2); \ + t2 = vec_subs (t5, t2); \ + t5 = vec_adds (t0, t4); \ + t0 = vec_subs (t0, t4); \ + t4 = vec_subs (t8, t3); \ + t3 = vec_adds (t8, t3); \ + \ + /* 4th stage */ \ + vy0 = vec_adds (t7, t1); \ + vy7 = vec_subs (t7, t1); \ + vy1 = vec_mradds (c4, t3, t5); \ + vy6 = vec_mradds (mc4, t3, t5); \ + vy2 = vec_mradds (c4, t4, t0); \ + vy5 = vec_mradds (mc4, t4, t0); \ + vy3 = vec_adds (t2, t6); \ + vy4 = vec_subs (t2, t6); + +#define IDCT \ + vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ + vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ + vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \ + vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \ + vector_u16_t shift; \ + \ + c4 = vec_splat (constants, 0); \ + a0 = vec_splat (constants, 1); \ + a1 = vec_splat (constants, 2); \ + a2 = vec_splat (constants, 3); \ + mc4 = vec_splat (constants, 4); \ + ma2 = vec_splat (constants, 5); \ + bias = (vector_s16_t)vec_splat ((vector_s32_t)constants, 3); \ + \ + zero = vec_splat_s16 (0); \ + shift = vec_splat_u16 (4); \ + \ + vx0 = vec_mradds (vec_sl (block[0], shift), constants_1, zero); \ + vx1 = vec_mradds (vec_sl (block[1], shift), constants_2, zero); \ + vx2 = vec_mradds (vec_sl (block[2], shift), constants_3, zero); \ + vx3 = vec_mradds (vec_sl (block[3], shift), constants_4, zero); \ + vx4 = vec_mradds (vec_sl (block[4], shift), constants_1, zero); \ + vx5 = vec_mradds (vec_sl (block[5], shift), constants_4, zero); \ + vx6 = vec_mradds (vec_sl (block[6], shift), constants_3, zero); \ + vx7 = vec_mradds (vec_sl (block[7], shift), constants_2, zero); \ + \ + IDCT_HALF \ + \ + vx0 = vec_mergeh (vy0, vy4); \ + vx1 = vec_mergel (vy0, vy4); \ + vx2 = vec_mergeh (vy1, vy5); \ + vx3 = vec_mergel (vy1, vy5); \ + vx4 = vec_mergeh (vy2, vy6); \ + vx5 = vec_mergel (vy2, vy6); \ + vx6 = vec_mergeh (vy3, vy7); \ + vx7 = vec_mergel (vy3, vy7); \ + \ + vy0 = vec_mergeh (vx0, vx4); \ + vy1 = vec_mergel (vx0, vx4); \ + vy2 = vec_mergeh (vx1, vx5); \ + vy3 = vec_mergel (vx1, vx5); \ + vy4 = vec_mergeh (vx2, vx6); \ + vy5 = vec_mergel (vx2, vx6); \ + vy6 = vec_mergeh (vx3, vx7); \ + vy7 = vec_mergel (vx3, vx7); \ + \ + vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \ + vx1 = vec_mergel (vy0, vy4); \ + vx2 = vec_mergeh (vy1, vy5); \ + vx3 = vec_mergel (vy1, vy5); \ + vx4 = vec_mergeh (vy2, vy6); \ + vx5 = vec_mergel (vy2, vy6); \ + vx6 = vec_mergeh (vy3, vy7); \ + vx7 = vec_mergel (vy3, vy7); \ + \ + IDCT_HALF \ + \ + shift = vec_splat_u16 (6); \ + vx0 = vec_sra (vy0, shift); \ + vx1 = vec_sra (vy1, shift); \ + vx2 = vec_sra (vy2, shift); \ + vx3 = vec_sra (vy3, shift); \ + vx4 = vec_sra (vy4, shift); \ + vx5 = vec_sra (vy5, shift); \ + vx6 = vec_sra (vy6, shift); \ + vx7 = vec_sra (vy7, shift); + +void mpeg2_idct_copy_altivec (int16_t * const _block, uint8_t * dest, + const int stride) +{ + vector_s16_t * const block = (vector_s16_t *)_block; + vector_u8_t tmp; + + IDCT + +#define COPY(dest,src) \ + tmp = vec_packsu (src, src); \ + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + + COPY (dest, vx0) dest += stride; + COPY (dest, vx1) dest += stride; + COPY (dest, vx2) dest += stride; + COPY (dest, vx3) dest += stride; + COPY (dest, vx4) dest += stride; + COPY (dest, vx5) dest += stride; + COPY (dest, vx6) dest += stride; + COPY (dest, vx7) + + block[0] = block[1] = block[2] = block[3] = zero; + block[4] = block[5] = block[6] = block[7] = zero; +} + +void mpeg2_idct_add_altivec (const int last, int16_t * const _block, + uint8_t * dest, const int stride) +{ + vector_s16_t * const block = (vector_s16_t *)_block; + vector_u8_t tmp; + vector_s16_t tmp2, tmp3; + vector_u8_t perm0; + vector_u8_t perm1; + vector_u8_t p0, p1, p; + + IDCT + + p0 = vec_lvsl (0, dest); + p1 = vec_lvsl (stride, dest); + p = vec_splat_u8 (-1); + perm0 = vec_mergeh (p, p0); + perm1 = vec_mergeh (p, p1); + +#define ADD(dest,src,perm) \ + /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \ + tmp = vec_ld (0, dest); \ + tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \ + tmp3 = vec_adds (tmp2, src); \ + tmp = vec_packsu (tmp3, tmp3); \ + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + + ADD (dest, vx0, perm0) dest += stride; + ADD (dest, vx1, perm1) dest += stride; + ADD (dest, vx2, perm0) dest += stride; + ADD (dest, vx3, perm1) dest += stride; + ADD (dest, vx4, perm0) dest += stride; + ADD (dest, vx5, perm1) dest += stride; + ADD (dest, vx6, perm0) dest += stride; + ADD (dest, vx7, perm1) + + block[0] = block[1] = block[2] = block[3] = zero; + block[4] = block[5] = block[6] = block[7] = zero; +} + +void mpeg2_idct_altivec_init (void) +{ + extern uint8_t mpeg2_scan_norm[64]; + extern uint8_t mpeg2_scan_alt[64]; + int i, j; + + /* the altivec idct uses a transposed input, so we patch scan tables */ + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = (j >> 3) | ((j & 7) << 3); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = (j >> 3) | ((j & 7) << 3); + } +} + +#endif diff --git a/src/libmpeg2new/libmpeg2/idct_mlib.c b/src/libmpeg2new/libmpeg2/idct_mlib.c new file mode 100644 index 000000000..83c39738d --- /dev/null +++ b/src/libmpeg2new/libmpeg2/idct_mlib.c @@ -0,0 +1,60 @@ +/* + * idct_mlib.c + * Copyright (C) 1999-2003 Håkan Hjort + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef LIBMPEG2_MLIB + +#include +#include +#include +#include +#include +#include + +#include "mpeg2.h" +#include "mpeg2_internal.h" + +void mpeg2_idct_add_mlib (const int last, int16_t * const block, + uint8_t * const dest, const int stride) +{ + mlib_VideoIDCT_IEEE_S16_S16 (block, block); + mlib_VideoAddBlock_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +void mpeg2_idct_copy_mlib_non_ieee (int16_t * const block, + uint8_t * const dest, const int stride) +{ + mlib_VideoIDCT8x8_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +void mpeg2_idct_add_mlib_non_ieee (const int last, int16_t * const block, + uint8_t * const dest, const int stride) +{ + mlib_VideoIDCT8x8_S16_S16 (block, block); + mlib_VideoAddBlock_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +#endif diff --git a/src/libmpeg2new/libmpeg2/idct_mmx.c b/src/libmpeg2new/libmpeg2/idct_mmx.c new file mode 100644 index 000000000..e2afe6bb4 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/idct_mmx.c @@ -0,0 +1,814 @@ +/* + * idct_mmx.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_X86 + +#include + +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include "attributes.h" +#include "mmx.h" + +#define ROW_SHIFT 11 +#define COL_SHIFT 6 + +#define round(bias) ((int)(((bias)+0.5) * (1<> ROW_SHIFT; + row[1] = (a1 + b1) >> ROW_SHIFT; + row[2] = (a2 + b2) >> ROW_SHIFT; + row[3] = (a3 + b3) >> ROW_SHIFT; + row[4] = (a3 - b3) >> ROW_SHIFT; + row[5] = (a2 - b2) >> ROW_SHIFT; + row[6] = (a1 - b1) >> ROW_SHIFT; + row[7] = (a0 - b0) >> ROW_SHIFT; +} +#endif + + +/* MMXEXT row IDCT */ + +#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \ + c4, c6, c4, c6, \ + c1, c3, -c1, -c5, \ + c5, c7, c3, -c7, \ + c4, -c6, c4, -c6, \ + -c4, c2, c4, -c2, \ + c5, -c1, c3, -c1, \ + c7, c3, c7, -c5 } + +static inline void mmxext_row_head (int16_t * const row, const int offset, + const int16_t * const table) +{ + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ + + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ + + movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ + + movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */ + pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */ + + pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */ +} + +static inline void mmxext_row (const int16_t * const table, + const int32_t * const rounder) +{ + movq_m2r (*(table+8), mm1); /* mm1 = -C5 -C1 C3 C1 */ + pmaddwd_r2r (mm2, mm4); /* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */ + + pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */ + pshufw_r2r (mm6, mm6, 0x4e); /* mm6 = x3 x1 x7 x5 */ + + movq_m2r (*(table+12), mm7); /* mm7 = -C7 C3 C7 C5 */ + pmaddwd_r2r (mm5, mm1); /* mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 */ + + paddd_m2r (*rounder, mm3); /* mm3 += rounder */ + pmaddwd_r2r (mm6, mm7); /* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */ + + pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 */ + paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */ + + pmaddwd_m2r (*(table+24), mm5); /* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */ + movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */ + + pmaddwd_m2r (*(table+28), mm6); /* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */ + paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */ + + paddd_m2r (*rounder, mm0); /* mm0 += rounder */ + psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */ + + psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */ + paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */ + + paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */ + psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */ + + paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */ + movq_r2r (mm0, mm4); /* mm4 = a3 a2 + rounder */ + + paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */ + psubd_r2r (mm5, mm4); /* mm4 = a3-b3 a2-b2 + rounder */ +} + +static inline void mmxext_row_tail (int16_t * const row, const int store) +{ + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ + + psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */ + + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ + + packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */ + + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */ + + /* slot */ + + movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */ +} + +static inline void mmxext_row_mid (int16_t * const row, const int store, + const int offset, + const int16_t * const table) +{ + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ + + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */ + + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ + + packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ + + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */ + + movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */ + movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */ + + pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */ + + movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */ + pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */ +} + + +/* MMX row IDCT */ + +#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \ + c4, c6, -c4, -c2, \ + c1, c3, c3, -c7, \ + c5, c7, -c1, -c5, \ + c4, -c6, c4, -c2, \ + -c4, c2, c4, -c6, \ + c5, -c1, c7, -c5, \ + c7, c3, c3, -c1 } + +static inline void mmx_row_head (int16_t * const row, const int offset, + const int16_t * const table) +{ + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ + + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ + + movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ + + punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */ + + movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */ + pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */ + + movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */ + punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */ +} + +static inline void mmx_row (const int16_t * const table, + const int32_t * const rounder) +{ + pmaddwd_r2r (mm2, mm4); /* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */ + punpckldq_r2r (mm5, mm5); /* mm5 = x3 x1 x3 x1 */ + + pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */ + punpckhdq_r2r (mm6, mm6); /* mm6 = x7 x5 x7 x5 */ + + movq_m2r (*(table+12), mm7); /* mm7 = -C5 -C1 C7 C5 */ + pmaddwd_r2r (mm5, mm1); /* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */ + + paddd_m2r (*rounder, mm3); /* mm3 += rounder */ + pmaddwd_r2r (mm6, mm7); /* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */ + + pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */ + paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */ + + pmaddwd_m2r (*(table+24), mm5); /* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */ + movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */ + + pmaddwd_m2r (*(table+28), mm6); /* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */ + paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */ + + paddd_m2r (*rounder, mm0); /* mm0 += rounder */ + psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */ + + psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */ + paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */ + + paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */ + psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */ + + paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */ + movq_r2r (mm0, mm7); /* mm7 = a3 a2 + rounder */ + + paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */ + psubd_r2r (mm5, mm7); /* mm7 = a3-b3 a2-b2 + rounder */ +} + +static inline void mmx_row_tail (int16_t * const row, const int store) +{ + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ + + psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */ + + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ + + packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */ + + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + movq_r2r (mm7, mm4); /* mm4 = y6 y7 y4 y5 */ + + pslld_i2r (16, mm7); /* mm7 = y7 0 y5 0 */ + + psrld_i2r (16, mm4); /* mm4 = 0 y6 0 y4 */ + + por_r2r (mm4, mm7); /* mm7 = y7 y6 y5 y4 */ + + /* slot */ + + movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */ +} + +static inline void mmx_row_mid (int16_t * const row, const int store, + const int offset, const int16_t * const table) +{ + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ + + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */ + + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ + + packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ + + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + movq_r2r (mm7, mm1); /* mm1 = y6 y7 y4 y5 */ + + punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */ + psrld_i2r (16, mm7); /* mm7 = 0 y6 0 y4 */ + + movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */ + pslld_i2r (16, mm1); /* mm1 = y7 0 y5 0 */ + + movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */ + por_r2r (mm1, mm7); /* mm7 = y7 y6 y5 y4 */ + + movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */ + punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */ + + movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */ + pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */ +} + + +#if 0 +/* C column IDCT - its just here to document the MMXEXT and MMX versions */ +static inline void idct_col (int16_t * col, int offset) +{ +/* multiplication - as implemented on mmx */ +#define F(c,x) (((c) * (x)) >> 16) + +/* saturation - it helps us handle torture test cases */ +#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x)) + + int16_t x0, x1, x2, x3, x4, x5, x6, x7; + int16_t y0, y1, y2, y3, y4, y5, y6, y7; + int16_t a0, a1, a2, a3, b0, b1, b2, b3; + int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12; + + col += offset; + + x0 = col[0*8]; + x1 = col[1*8]; + x2 = col[2*8]; + x3 = col[3*8]; + x4 = col[4*8]; + x5 = col[5*8]; + x6 = col[6*8]; + x7 = col[7*8]; + + u04 = S (x0 + x4); + v04 = S (x0 - x4); + u26 = S (F (T2, x6) + x2); + v26 = S (F (T2, x2) - x6); + + a0 = S (u04 + u26); + a1 = S (v04 + v26); + a2 = S (v04 - v26); + a3 = S (u04 - u26); + + u17 = S (F (T1, x7) + x1); + v17 = S (F (T1, x1) - x7); + u35 = S (F (T3, x5) + x3); + v35 = S (F (T3, x3) - x5); + + b0 = S (u17 + u35); + b3 = S (v17 - v35); + u12 = S (u17 - u35); + v12 = S (v17 + v35); + u12 = S (2 * F (C4, u12)); + v12 = S (2 * F (C4, v12)); + b1 = S (u12 + v12); + b2 = S (u12 - v12); + + y0 = S (a0 + b0) >> COL_SHIFT; + y1 = S (a1 + b1) >> COL_SHIFT; + y2 = S (a2 + b2) >> COL_SHIFT; + y3 = S (a3 + b3) >> COL_SHIFT; + + y4 = S (a3 - b3) >> COL_SHIFT; + y5 = S (a2 - b2) >> COL_SHIFT; + y6 = S (a1 - b1) >> COL_SHIFT; + y7 = S (a0 - b0) >> COL_SHIFT; + + col[0*8] = y0; + col[1*8] = y1; + col[2*8] = y2; + col[3*8] = y3; + col[4*8] = y4; + col[5*8] = y5; + col[6*8] = y6; + col[7*8] = y7; +} +#endif + + +/* MMX column IDCT */ +static inline void idct_col (int16_t * const col, const int offset) +{ +#define T1 13036 +#define T2 27146 +#define T3 43790 +#define C4 23170 + + static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; + static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; + static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; + static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; + + /* column code adapted from peter gubanov */ + /* http://www.elecard.com/peter/idct.shtml */ + + movq_m2r (*_T1, mm0); /* mm0 = T1 */ + + movq_m2r (*(col+offset+1*8), mm1); /* mm1 = x1 */ + movq_r2r (mm0, mm2); /* mm2 = T1 */ + + movq_m2r (*(col+offset+7*8), mm4); /* mm4 = x7 */ + pmulhw_r2r (mm1, mm0); /* mm0 = T1*x1 */ + + movq_m2r (*_T3, mm5); /* mm5 = T3 */ + pmulhw_r2r (mm4, mm2); /* mm2 = T1*x7 */ + + movq_m2r (*(col+offset+5*8), mm6); /* mm6 = x5 */ + movq_r2r (mm5, mm7); /* mm7 = T3-1 */ + + movq_m2r (*(col+offset+3*8), mm3); /* mm3 = x3 */ + psubsw_r2r (mm4, mm0); /* mm0 = v17 */ + + movq_m2r (*_T2, mm4); /* mm4 = T2 */ + pmulhw_r2r (mm3, mm5); /* mm5 = (T3-1)*x3 */ + + paddsw_r2r (mm2, mm1); /* mm1 = u17 */ + pmulhw_r2r (mm6, mm7); /* mm7 = (T3-1)*x5 */ + + /* slot */ + + movq_r2r (mm4, mm2); /* mm2 = T2 */ + paddsw_r2r (mm3, mm5); /* mm5 = T3*x3 */ + + pmulhw_m2r (*(col+offset+2*8), mm4);/* mm4 = T2*x2 */ + paddsw_r2r (mm6, mm7); /* mm7 = T3*x5 */ + + psubsw_r2r (mm6, mm5); /* mm5 = v35 */ + paddsw_r2r (mm3, mm7); /* mm7 = u35 */ + + movq_m2r (*(col+offset+6*8), mm3); /* mm3 = x6 */ + movq_r2r (mm0, mm6); /* mm6 = v17 */ + + pmulhw_r2r (mm3, mm2); /* mm2 = T2*x6 */ + psubsw_r2r (mm5, mm0); /* mm0 = b3 */ + + psubsw_r2r (mm3, mm4); /* mm4 = v26 */ + paddsw_r2r (mm6, mm5); /* mm5 = v12 */ + + movq_r2m (mm0, *(col+offset+3*8)); /* save b3 in scratch0 */ + movq_r2r (mm1, mm6); /* mm6 = u17 */ + + paddsw_m2r (*(col+offset+2*8), mm2);/* mm2 = u26 */ + paddsw_r2r (mm7, mm6); /* mm6 = b0 */ + + psubsw_r2r (mm7, mm1); /* mm1 = u12 */ + movq_r2r (mm1, mm7); /* mm7 = u12 */ + + movq_m2r (*(col+offset+0*8), mm3); /* mm3 = x0 */ + paddsw_r2r (mm5, mm1); /* mm1 = u12+v12 */ + + movq_m2r (*_C4, mm0); /* mm0 = C4/2 */ + psubsw_r2r (mm5, mm7); /* mm7 = u12-v12 */ + + movq_r2m (mm6, *(col+offset+5*8)); /* save b0 in scratch1 */ + pmulhw_r2r (mm0, mm1); /* mm1 = b1/2 */ + + movq_r2r (mm4, mm6); /* mm6 = v26 */ + pmulhw_r2r (mm0, mm7); /* mm7 = b2/2 */ + + movq_m2r (*(col+offset+4*8), mm5); /* mm5 = x4 */ + movq_r2r (mm3, mm0); /* mm0 = x0 */ + + psubsw_r2r (mm5, mm3); /* mm3 = v04 */ + paddsw_r2r (mm5, mm0); /* mm0 = u04 */ + + paddsw_r2r (mm3, mm4); /* mm4 = a1 */ + movq_r2r (mm0, mm5); /* mm5 = u04 */ + + psubsw_r2r (mm6, mm3); /* mm3 = a2 */ + paddsw_r2r (mm2, mm5); /* mm5 = a0 */ + + paddsw_r2r (mm1, mm1); /* mm1 = b1 */ + psubsw_r2r (mm2, mm0); /* mm0 = a3 */ + + paddsw_r2r (mm7, mm7); /* mm7 = b2 */ + movq_r2r (mm3, mm2); /* mm2 = a2 */ + + movq_r2r (mm4, mm6); /* mm6 = a1 */ + paddsw_r2r (mm7, mm3); /* mm3 = a2+b2 */ + + psraw_i2r (COL_SHIFT, mm3); /* mm3 = y2 */ + paddsw_r2r (mm1, mm4); /* mm4 = a1+b1 */ + + psraw_i2r (COL_SHIFT, mm4); /* mm4 = y1 */ + psubsw_r2r (mm1, mm6); /* mm6 = a1-b1 */ + + movq_m2r (*(col+offset+5*8), mm1); /* mm1 = b0 */ + psubsw_r2r (mm7, mm2); /* mm2 = a2-b2 */ + + psraw_i2r (COL_SHIFT, mm6); /* mm6 = y6 */ + movq_r2r (mm5, mm7); /* mm7 = a0 */ + + movq_r2m (mm4, *(col+offset+1*8)); /* save y1 */ + psraw_i2r (COL_SHIFT, mm2); /* mm2 = y5 */ + + movq_r2m (mm3, *(col+offset+2*8)); /* save y2 */ + paddsw_r2r (mm1, mm5); /* mm5 = a0+b0 */ + + movq_m2r (*(col+offset+3*8), mm4); /* mm4 = b3 */ + psubsw_r2r (mm1, mm7); /* mm7 = a0-b0 */ + + psraw_i2r (COL_SHIFT, mm5); /* mm5 = y0 */ + movq_r2r (mm0, mm3); /* mm3 = a3 */ + + movq_r2m (mm2, *(col+offset+5*8)); /* save y5 */ + psubsw_r2r (mm4, mm3); /* mm3 = a3-b3 */ + + psraw_i2r (COL_SHIFT, mm7); /* mm7 = y7 */ + paddsw_r2r (mm0, mm4); /* mm4 = a3+b3 */ + + movq_r2m (mm5, *(col+offset+0*8)); /* save y0 */ + psraw_i2r (COL_SHIFT, mm3); /* mm3 = y4 */ + + movq_r2m (mm6, *(col+offset+6*8)); /* save y6 */ + psraw_i2r (COL_SHIFT, mm4); /* mm4 = y3 */ + + movq_r2m (mm7, *(col+offset+7*8)); /* save y7 */ + + movq_r2m (mm3, *(col+offset+4*8)); /* save y4 */ + + movq_r2m (mm4, *(col+offset+3*8)); /* save y3 */ +} + + +static const int32_t rounder0[] ATTR_ALIGN(8) = + rounder ((1 << (COL_SHIFT - 1)) - 0.5); +static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); +static const int32_t rounder1[] ATTR_ALIGN(8) = + rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ +static const int32_t rounder7[] ATTR_ALIGN(8) = + rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ +static const int32_t rounder2[] ATTR_ALIGN(8) = + rounder (0.60355339059); /* C2 * (C6+C2)/2 */ +static const int32_t rounder6[] ATTR_ALIGN(8) = + rounder (-0.25); /* C2 * (C6-C2)/2 */ +static const int32_t rounder3[] ATTR_ALIGN(8) = + rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ +static const int32_t rounder5[] ATTR_ALIGN(8) = + rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ + + +#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ +static inline void idct (int16_t * const block) \ +{ \ + static const int16_t table04[] ATTR_ALIGN(16) = \ + table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ + static const int16_t table17[] ATTR_ALIGN(16) = \ + table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ + static const int16_t table26[] ATTR_ALIGN(16) = \ + table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ + static const int16_t table35[] ATTR_ALIGN(16) = \ + table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ + \ + idct_row_head (block, 0*8, table04); \ + idct_row (table04, rounder0); \ + idct_row_mid (block, 0*8, 4*8, table04); \ + idct_row (table04, rounder4); \ + idct_row_mid (block, 4*8, 1*8, table17); \ + idct_row (table17, rounder1); \ + idct_row_mid (block, 1*8, 7*8, table17); \ + idct_row (table17, rounder7); \ + idct_row_mid (block, 7*8, 2*8, table26); \ + idct_row (table26, rounder2); \ + idct_row_mid (block, 2*8, 6*8, table26); \ + idct_row (table26, rounder6); \ + idct_row_mid (block, 6*8, 3*8, table35); \ + idct_row (table35, rounder3); \ + idct_row_mid (block, 3*8, 5*8, table35); \ + idct_row (table35, rounder5); \ + idct_row_tail (block, 5*8); \ + \ + idct_col (block, 0); \ + idct_col (block, 4); \ +} + + +#define COPY_MMX(offset,r0,r1,r2) \ +do { \ + movq_m2r (*(block+offset), r0); \ + dest += stride; \ + movq_m2r (*(block+offset+4), r1); \ + movq_r2m (r2, *dest); \ + packuswb_r2r (r1, r0); \ +} while (0) + +static inline void block_copy (int16_t * const block, uint8_t * dest, + const int stride) +{ + movq_m2r (*(block+0*8), mm0); + movq_m2r (*(block+0*8+4), mm1); + movq_m2r (*(block+1*8), mm2); + packuswb_r2r (mm1, mm0); + movq_m2r (*(block+1*8+4), mm3); + movq_r2m (mm0, *dest); + packuswb_r2r (mm3, mm2); + COPY_MMX (2*8, mm0, mm1, mm2); + COPY_MMX (3*8, mm2, mm3, mm0); + COPY_MMX (4*8, mm0, mm1, mm2); + COPY_MMX (5*8, mm2, mm3, mm0); + COPY_MMX (6*8, mm0, mm1, mm2); + COPY_MMX (7*8, mm2, mm3, mm0); + movq_r2m (mm2, *(dest+stride)); +} + + +#define ADD_MMX(offset,r1,r2,r3,r4) \ +do { \ + movq_m2r (*(dest+2*stride), r1); \ + packuswb_r2r (r4, r3); \ + movq_r2r (r1, r2); \ + dest += stride; \ + movq_r2m (r3, *dest); \ + punpcklbw_r2r (mm0, r1); \ + paddsw_m2r (*(block+offset), r1); \ + punpckhbw_r2r (mm0, r2); \ + paddsw_m2r (*(block+offset+4), r2); \ +} while (0) + +static inline void block_add (int16_t * const block, uint8_t * dest, + const int stride) +{ + movq_m2r (*dest, mm1); + pxor_r2r (mm0, mm0); + movq_m2r (*(dest+stride), mm3); + movq_r2r (mm1, mm2); + punpcklbw_r2r (mm0, mm1); + movq_r2r (mm3, mm4); + paddsw_m2r (*(block+0*8), mm1); + punpckhbw_r2r (mm0, mm2); + paddsw_m2r (*(block+0*8+4), mm2); + punpcklbw_r2r (mm0, mm3); + paddsw_m2r (*(block+1*8), mm3); + packuswb_r2r (mm2, mm1); + punpckhbw_r2r (mm0, mm4); + movq_r2m (mm1, *dest); + paddsw_m2r (*(block+1*8+4), mm4); + ADD_MMX (2*8, mm1, mm2, mm3, mm4); + ADD_MMX (3*8, mm3, mm4, mm1, mm2); + ADD_MMX (4*8, mm1, mm2, mm3, mm4); + ADD_MMX (5*8, mm3, mm4, mm1, mm2); + ADD_MMX (6*8, mm1, mm2, mm3, mm4); + ADD_MMX (7*8, mm3, mm4, mm1, mm2); + packuswb_r2r (mm4, mm3); + movq_r2m (mm3, *(dest+stride)); +} + + +static inline void block_zero (int16_t * const block) +{ + pxor_r2r (mm0, mm0); + movq_r2m (mm0, *(block+0*4)); + movq_r2m (mm0, *(block+1*4)); + movq_r2m (mm0, *(block+2*4)); + movq_r2m (mm0, *(block+3*4)); + movq_r2m (mm0, *(block+4*4)); + movq_r2m (mm0, *(block+5*4)); + movq_r2m (mm0, *(block+6*4)); + movq_r2m (mm0, *(block+7*4)); + movq_r2m (mm0, *(block+8*4)); + movq_r2m (mm0, *(block+9*4)); + movq_r2m (mm0, *(block+10*4)); + movq_r2m (mm0, *(block+11*4)); + movq_r2m (mm0, *(block+12*4)); + movq_r2m (mm0, *(block+13*4)); + movq_r2m (mm0, *(block+14*4)); + movq_r2m (mm0, *(block+15*4)); +} + + +#define CPU_MMXEXT 0 +#define CPU_MMX 1 + +#define dup4(reg) \ +do { \ + if (cpu != CPU_MMXEXT) { \ + punpcklwd_r2r (reg, reg); \ + punpckldq_r2r (reg, reg); \ + } else \ + pshufw_r2r (reg, reg, 0x00); \ +} while (0) + +static inline void block_add_DC (int16_t * const block, uint8_t * dest, + const int stride, const int cpu) +{ + movd_v2r ((block[0] + 4) >> 3, mm0); + pxor_r2r (mm1, mm1); + movq_m2r (*dest, mm2); + dup4 (mm0); + psubsw_r2r (mm0, mm1); + packuswb_r2r (mm0, mm0); + paddusb_r2r (mm0, mm2); + packuswb_r2r (mm1, mm1); + movq_m2r (*(dest + stride), mm3); + psubusb_r2r (mm1, mm2); + block[0] = 0; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *dest); + psubusb_r2r (mm1, mm3); + movq_m2r (*(dest + 2*stride), mm2); + dest += stride; + movq_r2m (mm3, *dest); + paddusb_r2r (mm0, mm2); + movq_m2r (*(dest + 2*stride), mm3); + psubusb_r2r (mm1, mm2); + dest += stride; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *dest); + psubusb_r2r (mm1, mm3); + movq_m2r (*(dest + 2*stride), mm2); + dest += stride; + movq_r2m (mm3, *dest); + paddusb_r2r (mm0, mm2); + movq_m2r (*(dest + 2*stride), mm3); + psubusb_r2r (mm1, mm2); + dest += stride; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *dest); + psubusb_r2r (mm1, mm3); + movq_m2r (*(dest + 2*stride), mm2); + dest += stride; + movq_r2m (mm3, *dest); + paddusb_r2r (mm0, mm2); + movq_m2r (*(dest + 2*stride), mm3); + psubusb_r2r (mm1, mm2); + block[63] = 0; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *(dest + stride)); + psubusb_r2r (mm1, mm3); + movq_r2m (mm3, *(dest + 2*stride)); +} + + +declare_idct (mmxext_idct, mmxext_table, + mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) + +void mpeg2_idct_copy_mmxext (int16_t * const block, uint8_t * const dest, + const int stride) +{ + mmxext_idct (block); + block_copy (block, dest, stride); + block_zero (block); +} + +void mpeg2_idct_add_mmxext (const int last, int16_t * const block, + uint8_t * const dest, const int stride) +{ + if (last != 129 || (block[0] & 7) == 4) { + mmxext_idct (block); + block_add (block, dest, stride); + block_zero (block); + } else + block_add_DC (block, dest, stride, CPU_MMXEXT); +} + + +declare_idct (mmx_idct, mmx_table, + mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid) + +void mpeg2_idct_copy_mmx (int16_t * const block, uint8_t * const dest, + const int stride) +{ + mmx_idct (block); + block_copy (block, dest, stride); + block_zero (block); +} + +void mpeg2_idct_add_mmx (const int last, int16_t * const block, + uint8_t * const dest, const int stride) +{ + if (last != 129 || (block[0] & 7) == 4) { + mmx_idct (block); + block_add (block, dest, stride); + block_zero (block); + } else + block_add_DC (block, dest, stride, CPU_MMX); +} + + +void mpeg2_idct_mmx_init (void) +{ + extern uint8_t mpeg2_scan_norm[64]; + extern uint8_t mpeg2_scan_alt[64]; + int i, j; + + /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ + + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + } +} + +#endif diff --git a/src/libmpeg2new/libmpeg2/libmpeg2.pc.in b/src/libmpeg2new/libmpeg2/libmpeg2.pc.in new file mode 100644 index 000000000..d54500b0e --- /dev/null +++ b/src/libmpeg2new/libmpeg2/libmpeg2.pc.in @@ -0,0 +1,10 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libmpeg2 +Description: A decoding library for MPEG-1 and MPEG-2 streams. +Version: @VERSION@ +Libs: -L${libdir} -lmpeg2 +Cflags: -I${includedir}/@PACKAGE@ diff --git a/src/libmpeg2new/libmpeg2/motion_comp.c b/src/libmpeg2new/libmpeg2/motion_comp.c new file mode 100644 index 000000000..24cfee1e1 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/motion_comp.c @@ -0,0 +1,129 @@ +/* + * motion_comp.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include + +#include "mpeg2.h" +#include "mpeg2_internal.h" + +mpeg2_mc_t mpeg2_mc; + +void mpeg2_mc_init (uint32_t accel) +{ +#ifdef ARCH_X86 + if (accel & MPEG2_ACCEL_X86_MMXEXT) + mpeg2_mc = mpeg2_mc_mmxext; + else if (accel & MPEG2_ACCEL_X86_3DNOW) + mpeg2_mc = mpeg2_mc_3dnow; + else if (accel & MPEG2_ACCEL_X86_MMX) + mpeg2_mc = mpeg2_mc_mmx; + else +#endif +#ifdef ARCH_PPC + if (accel & MPEG2_ACCEL_PPC_ALTIVEC) + mpeg2_mc = mpeg2_mc_altivec; + else +#endif +#ifdef ARCH_ALPHA + if (accel & MPEG2_ACCEL_ALPHA) + mpeg2_mc = mpeg2_mc_alpha; + else +#endif +#ifdef LIBMPEG2_MLIB + if (accel & MPEG2_ACCEL_MLIB) + mpeg2_mc = mpeg2_mc_mlib; + else +#endif + mpeg2_mc = mpeg2_mc_c; +} + +#define avg2(a,b) ((a+b+1)>>1) +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) + +#define predict_o(i) (ref[i]) +#define predict_x(i) (avg2 (ref[i], ref[i+1])) +#define predict_y(i) (avg2 (ref[i], (ref+stride)[i])) +#define predict_xy(i) (avg4 (ref[i], ref[i+1], \ + (ref+stride)[i], (ref+stride)[i+1])) + +#define put(predictor,i) dest[i] = predictor (i) +#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i]) + +/* mc function template */ + +#define MC_FUNC(op,xy) \ +static void MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \ + const int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + op (predict_##xy, 8); \ + op (predict_##xy, 9); \ + op (predict_##xy, 10); \ + op (predict_##xy, 11); \ + op (predict_##xy, 12); \ + op (predict_##xy, 13); \ + op (predict_##xy, 14); \ + op (predict_##xy, 15); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} \ +static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \ + const int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} + +/* definitions of the actual mc functions */ + +MC_FUNC (put,o) +MC_FUNC (avg,o) +MC_FUNC (put,x) +MC_FUNC (avg,x) +MC_FUNC (put,y) +MC_FUNC (avg,y) +MC_FUNC (put,xy) +MC_FUNC (avg,xy) + +MPEG2_MC_EXTERN (c) diff --git a/src/libmpeg2new/libmpeg2/motion_comp_alpha.c b/src/libmpeg2new/libmpeg2/motion_comp_alpha.c new file mode 100644 index 000000000..662221b4d --- /dev/null +++ b/src/libmpeg2new/libmpeg2/motion_comp_alpha.c @@ -0,0 +1,252 @@ +/* + * motion_comp_alpha.c + * Copyright (C) 2002-2003 Falk Hueffner + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_ALPHA + +#include + +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include "alpha_asm.h" + +static inline uint64_t avg2 (uint64_t a, uint64_t b) +{ + return (a | b) - (((a ^ b) & BYTE_VEC (0xfe)) >> 1); +} + +// Load two unaligned quadwords from addr. This macro only works if +// addr is actually unaligned. +#define ULOAD16(ret_l,ret_r,addr) \ + do { \ + uint64_t _l = ldq_u (addr + 0); \ + uint64_t _m = ldq_u (addr + 8); \ + uint64_t _r = ldq_u (addr + 16); \ + ret_l = extql (_l, addr) | extqh (_m, addr); \ + ret_r = extql (_m, addr) | extqh (_r, addr); \ + } while (0) + +// Load two aligned quadwords from addr. +#define ALOAD16(ret_l,ret_r,addr) \ + do { \ + ret_l = ldq (addr); \ + ret_r = ldq (addr + 8); \ + } while (0) + +#define OP8(LOAD,LOAD16,STORE) \ + do { \ + STORE (LOAD (pixels), block); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP16(LOAD,LOAD16,STORE) \ + do { \ + uint64_t l, r; \ + LOAD16 (l, r, pixels); \ + STORE (l, block); \ + STORE (r, block + 8); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP8_X2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t p0, p1; \ + \ + p0 = LOAD (pixels); \ + p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56); \ + STORE (avg2 (p0, p1), block); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP16_X2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t p0, p1; \ + \ + LOAD16 (p0, p1, pixels); \ + STORE (avg2(p0, p0 >> 8 | p1 << 56), block); \ + STORE (avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56), \ + block + 8); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP8_Y2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t p0, p1; \ + p0 = LOAD (pixels); \ + pixels += line_size; \ + p1 = LOAD (pixels); \ + do { \ + uint64_t av = avg2 (p0, p1); \ + if (--h == 0) line_size = 0; \ + pixels += line_size; \ + p0 = p1; \ + p1 = LOAD (pixels); \ + STORE (av, block); \ + block += line_size; \ + } while (h); \ + } while (0) + +#define OP16_Y2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t p0l, p0r, p1l, p1r; \ + LOAD16 (p0l, p0r, pixels); \ + pixels += line_size; \ + LOAD16 (p1l, p1r, pixels); \ + do { \ + uint64_t avl, avr; \ + if (--h == 0) line_size = 0; \ + avl = avg2 (p0l, p1l); \ + avr = avg2 (p0r, p1r); \ + p0l = p1l; \ + p0r = p1r; \ + pixels += line_size; \ + LOAD16 (p1l, p1r, pixels); \ + STORE (avl, block); \ + STORE (avr, block + 8); \ + block += line_size; \ + } while (h); \ + } while (0) + +#define OP8_XY2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t pl, ph; \ + uint64_t p1 = LOAD (pixels); \ + uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56); \ + \ + ph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p2 & ~BYTE_VEC (0x03)) >> 2)); \ + pl = ((p1 & BYTE_VEC (0x03)) + \ + (p2 & BYTE_VEC (0x03))); \ + \ + do { \ + uint64_t npl, nph; \ + \ + pixels += line_size; \ + p1 = LOAD (pixels); \ + p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56); \ + nph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p2 & ~BYTE_VEC (0x03)) >> 2)); \ + npl = ((p1 & BYTE_VEC (0x03)) + \ + (p2 & BYTE_VEC (0x03))); \ + \ + STORE (ph + nph + \ + (((pl + npl + BYTE_VEC (0x02)) >> 2) & \ + BYTE_VEC (0x03)), block); \ + \ + block += line_size; \ + pl = npl; \ + ph = nph; \ + } while (--h); \ + } while (0) + +#define OP16_XY2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r; \ + LOAD16 (p0, p2, pixels); \ + p1 = p0 >> 8 | (p2 << 56); \ + p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \ + \ + ph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p1 & ~BYTE_VEC (0x03)) >> 2)); \ + pl_l = ((p0 & BYTE_VEC (0x03)) + \ + (p1 & BYTE_VEC(0x03))); \ + ph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p3 & ~BYTE_VEC (0x03)) >> 2)); \ + pl_r = ((p2 & BYTE_VEC (0x03)) + \ + (p3 & BYTE_VEC (0x03))); \ + \ + do { \ + uint64_t npl_l, nph_l, npl_r, nph_r; \ + \ + pixels += line_size; \ + LOAD16 (p0, p2, pixels); \ + p1 = p0 >> 8 | (p2 << 56); \ + p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \ + nph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p1 & ~BYTE_VEC (0x03)) >> 2)); \ + npl_l = ((p0 & BYTE_VEC (0x03)) + \ + (p1 & BYTE_VEC (0x03))); \ + nph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p3 & ~BYTE_VEC (0x03)) >> 2)); \ + npl_r = ((p2 & BYTE_VEC (0x03)) + \ + (p3 & BYTE_VEC (0x03))); \ + \ + STORE (ph_l + nph_l + \ + (((pl_l + npl_l + BYTE_VEC (0x02)) >> 2) & \ + BYTE_VEC(0x03)), block); \ + STORE (ph_r + nph_r + \ + (((pl_r + npl_r + BYTE_VEC (0x02)) >> 2) & \ + BYTE_VEC(0x03)), block + 8); \ + \ + block += line_size; \ + pl_l = npl_l; \ + ph_l = nph_l; \ + pl_r = npl_r; \ + ph_r = nph_r; \ + } while (--h); \ + } while (0) + +#define MAKE_OP(OPNAME,SIZE,SUFF,OPKIND,STORE) \ +static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha \ + (uint8_t *restrict block, const uint8_t *restrict pixels, \ + int line_size, int h) \ +{ \ + if ((uint64_t) pixels & 0x7) { \ + OPKIND (uldq, ULOAD16, STORE); \ + } else { \ + OPKIND (ldq, ALOAD16, STORE); \ + } \ +} + +#define PIXOP(OPNAME,STORE) \ + MAKE_OP (OPNAME, 8, o, OP8, STORE); \ + MAKE_OP (OPNAME, 8, x, OP8_X2, STORE); \ + MAKE_OP (OPNAME, 8, y, OP8_Y2, STORE); \ + MAKE_OP (OPNAME, 8, xy, OP8_XY2, STORE); \ + MAKE_OP (OPNAME, 16, o, OP16, STORE); \ + MAKE_OP (OPNAME, 16, x, OP16_X2, STORE); \ + MAKE_OP (OPNAME, 16, y, OP16_Y2, STORE); \ + MAKE_OP (OPNAME, 16, xy, OP16_XY2, STORE); + +#define STORE(l,b) stq (l, b) +PIXOP (put, STORE); +#undef STORE +#define STORE(l,b) stq (avg2 (l, ldq (b)), b); +PIXOP (avg, STORE); + +mpeg2_mc_t mpeg2_mc_alpha = { + { MC_put_o_16_alpha, MC_put_x_16_alpha, + MC_put_y_16_alpha, MC_put_xy_16_alpha, + MC_put_o_8_alpha, MC_put_x_8_alpha, + MC_put_y_8_alpha, MC_put_xy_8_alpha }, + { MC_avg_o_16_alpha, MC_avg_x_16_alpha, + MC_avg_y_16_alpha, MC_avg_xy_16_alpha, + MC_avg_o_8_alpha, MC_avg_x_8_alpha, + MC_avg_y_8_alpha, MC_avg_xy_8_alpha } +}; + +#endif diff --git a/src/libmpeg2new/libmpeg2/motion_comp_altivec.c b/src/libmpeg2new/libmpeg2/motion_comp_altivec.c new file mode 100644 index 000000000..f5d884e6e --- /dev/null +++ b/src/libmpeg2new/libmpeg2/motion_comp_altivec.c @@ -0,0 +1,1009 @@ +/* + * motion_comp_altivec.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_PPC + +#ifdef HAVE_ALTIVEC_H +#include +#endif +#include + +#include "mpeg2.h" +#include "mpeg2_internal.h" + +typedef vector signed char vector_s8_t; +typedef vector unsigned char vector_u8_t; +typedef vector signed short vector_s16_t; +typedef vector unsigned short vector_u16_t; +typedef vector signed int vector_s32_t; +typedef vector unsigned int vector_u32_t; + +#ifndef COFFEE_BREAK /* Workarounds for gcc suckage */ + +static inline vector_u8_t my_vec_ld (int const A, const uint8_t * const B) +{ + return vec_ld (A, (uint8_t *)B); +} +#undef vec_ld +#define vec_ld my_vec_ld + +static inline vector_u8_t my_vec_and (vector_u8_t const A, vector_u8_t const B) +{ + return vec_and (A, B); +} +#undef vec_and +#define vec_and my_vec_and + +static inline vector_u8_t my_vec_avg (vector_u8_t const A, vector_u8_t const B) +{ + return vec_avg (A, B); +} +#undef vec_avg +#define vec_avg my_vec_avg + +#endif + +static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp = vec_perm (ref0, ref1, perm); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, 0, dest); + tmp = vec_perm (ref0, ref1, perm); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_perm (ref0, ref1, perm); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + vec_st (tmp, 0, dest); + tmp = vec_perm (ref0, ref1, perm); + vec_st (tmp, stride, dest); +} + +static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, tmp; + + permA = vec_lvsl (0, ref); + permB = vec_add (permA, vec_splat_u8 (1)); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, 0, dest); + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + vec_st (tmp, 0, dest); + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + vec_st (tmp, stride, dest); +} + +static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; + + ones = vec_splat_u8 (1); + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + perm0B = vec_add (perm0A, ones); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B)); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + vec_st (tmp, stride, dest); +} + +static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (tmp0, tmp1); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (tmp0, tmp1); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (tmp0, tmp1); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (tmp0, tmp1); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; + vector_u8_t ones; + + ones = vec_splat_u8 (1); + permA = vec_lvsl (0, ref); + permB = vec_add (permA, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + vec_st (tmp, stride, dest); +} + +static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; + vector_u8_t avg0, avg1, xor0, xor1, tmp, ones; + + ones = vec_splat_u8 (1); + perm0A = vec_lvsl (0, ref); + perm0A = vec_mergeh (perm0A, perm0A); + perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); + perm0B = vec_add (perm0A, ones); + perm1A = vec_lvsl (stride, ref); + perm1A = vec_mergeh (perm1A, perm1A); + perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +#if 0 +static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones; + vector_u16_t splat2, temp; + + ones = vec_splat_u8 (1); + permA = vec_lvsl (0, ref); + permB = vec_add (permA, ones); + + zero = vec_splat_u8 (0); + splat2 = vec_splat_u16 (2); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + C = vec_perm (ref0, ref1, permA); + D = vec_perm (ref0, ref1, permB); + + temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A), + (vector_u16_t)vec_mergeh (zero, B)), + vec_add ((vector_u16_t)vec_mergeh (zero, C), + (vector_u16_t)vec_mergeh (zero, D))); + temp = vec_sr (vec_add (temp, splat2), splat2); + tmp = vec_pack (temp, temp); + + vec_st (tmp, 0, dest); + dest += stride; + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + } while (--height); +} +#endif + +static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp, prev; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + vec_st (tmp, stride, dest); +} + +static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, tmp, prev; + + permA = vec_lvsl (0, ref); + permB = vec_add (permA, vec_splat_u8 (1)); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (0, dest); + ref += stride; + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + vec_st (tmp, stride, dest); +} + +static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; + vector_u8_t prev; + + ones = vec_splat_u8 (1); + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + perm0B = vec_add (perm0A, ones); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + prev = vec_ld (0, dest); + ref += stride; + tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B))); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + vec_st (tmp, stride, dest); +} + +static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; + vector_u8_t ones, prev; + + ones = vec_splat_u8 (1); + permA = vec_lvsl (0, ref); + permB = vec_add (permA, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + vec_st (tmp, stride, dest); +} + +static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; + vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev; + + ones = vec_splat_u8 (1); + perm0A = vec_lvsl (0, ref); + perm0A = vec_mergeh (perm0A, perm0A); + perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); + perm0B = vec_add (perm0A, ones); + perm1A = vec_lvsl (stride, ref); + perm1A = vec_mergeh (perm1A, perm1A); + perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (0, dest); + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +MPEG2_MC_EXTERN (altivec) + +#endif diff --git a/src/libmpeg2new/libmpeg2/motion_comp_mlib.c b/src/libmpeg2new/libmpeg2/motion_comp_mlib.c new file mode 100644 index 000000000..c7ed6b285 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/motion_comp_mlib.c @@ -0,0 +1,190 @@ +/* + * motion_comp_mlib.c + * Copyright (C) 2000-2003 Håkan Hjort + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef LIBMPEG2_MLIB + +#include +#include +#include +#include +#include + +#include "mpeg2.h" +#include "mpeg2_internal.h" + +static void MC_put_o_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoCopyRef_U8_U8_16x16 (dest, (uint8_t *) ref, stride); + else + mlib_VideoCopyRef_U8_U8_16x8 (dest, (uint8_t *) ref, stride); +} + +static void MC_put_x_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpX_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpX_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_put_y_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_put_xy_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpXY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpXY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_put_o_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoCopyRef_U8_U8_8x8 (dest, (uint8_t *) ref, stride); + else + mlib_VideoCopyRef_U8_U8_8x4 (dest, (uint8_t *) ref, stride); +} + +static void MC_put_x_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_put_y_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_put_xy_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpXY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpXY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_avg_o_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoCopyRefAve_U8_U8_16x16 (dest, (uint8_t *) ref, stride); + else + mlib_VideoCopyRefAve_U8_U8_16x8 (dest, (uint8_t *) ref, stride); +} + +static void MC_avg_x_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveX_U8_U8_16x16 (dest, (uint8_t *) ref, + stride, stride); + else + mlib_VideoInterpAveX_U8_U8_16x8 (dest, (uint8_t *) ref, + stride, stride); +} + +static void MC_avg_y_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveY_U8_U8_16x16 (dest, (uint8_t *) ref, + stride, stride); + else + mlib_VideoInterpAveY_U8_U8_16x8 (dest, (uint8_t *) ref, + stride, stride); +} + +static void MC_avg_xy_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveXY_U8_U8_16x16 (dest, (uint8_t *) ref, + stride, stride); + else + mlib_VideoInterpAveXY_U8_U8_16x8 (dest, (uint8_t *) ref, + stride, stride); +} + +static void MC_avg_o_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoCopyRefAve_U8_U8_8x8 (dest, (uint8_t *) ref, stride); + else + mlib_VideoCopyRefAve_U8_U8_8x4 (dest, (uint8_t *) ref, stride); +} + +static void MC_avg_x_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpAveX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_avg_y_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpAveY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_avg_xy_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveXY_U8_U8_8x8 (dest, (uint8_t *) ref, + stride, stride); + else + mlib_VideoInterpAveXY_U8_U8_8x4 (dest, (uint8_t *) ref, + stride, stride); +} + +MPEG2_MC_EXTERN (mlib) + +#endif diff --git a/src/libmpeg2new/libmpeg2/motion_comp_mmx.c b/src/libmpeg2new/libmpeg2/motion_comp_mmx.c new file mode 100644 index 000000000..2434ccee1 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/motion_comp_mmx.c @@ -0,0 +1,1005 @@ +/* + * motion_comp_mmx.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_X86 + +#include + +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include "attributes.h" +#include "mmx.h" + +#define CPU_MMXEXT 0 +#define CPU_3DNOW 1 + + +/* MMX code - needs a rewrite */ + +/* + * Motion Compensation frequently needs to average values using the + * formula (x+y+1)>>1. Both MMXEXT and 3Dnow include one instruction + * to compute this, but it's been left out of classic MMX. + * + * We need to be careful of overflows when doing this computation. + * Rather than unpacking data to 16-bits, which reduces parallelism, + * we use the following formulas: + * + * (x+y)>>1 == (x&y)+((x^y)>>1) + * (x+y+1)>>1 == (x|y)-((x^y)>>1) + */ + +/* some rounding constants */ +static mmx_t mask1 = {0xfefefefefefefefeLL}; +static mmx_t round4 = {0x0002000200020002LL}; + +/* + * This code should probably be compiled with loop unrolling + * (ie, -funroll-loops in gcc)becuase some of the loops + * use a small static number of iterations. This was written + * with the assumption the compiler knows best about when + * unrolling will help + */ + +static inline void mmx_zero_reg () +{ + /* load 0 into mm0 */ + pxor_r2r (mm0, mm0); +} + +static inline void mmx_average_2_U8 (uint8_t * dest, const uint8_t * src1, + const uint8_t * src2) +{ + /* *dest = (*src1 + *src2 + 1)/ 2; */ + + movq_m2r (*src1, mm1); /* load 8 src1 bytes */ + movq_r2r (mm1, mm2); /* copy 8 src1 bytes */ + + movq_m2r (*src2, mm3); /* load 8 src2 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src2 bytes */ + + pxor_r2r (mm1, mm3); /* xor src1 and src2 */ + pand_m2r (mask1, mm3); /* mask lower bits */ + psrlq_i2r (1, mm3); /* /2 */ + por_r2r (mm2, mm4); /* or src1 and src2 */ + psubb_r2r (mm3, mm4); /* subtract subresults */ + movq_r2m (mm4, *dest); /* store result in dest */ +} + +static inline void mmx_interp_average_2_U8 (uint8_t * dest, + const uint8_t * src1, + const uint8_t * src2) +{ + /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */ + + movq_m2r (*dest, mm1); /* load 8 dest bytes */ + movq_r2r (mm1, mm2); /* copy 8 dest bytes */ + + movq_m2r (*src1, mm3); /* load 8 src1 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src1 bytes */ + + movq_m2r (*src2, mm5); /* load 8 src2 bytes */ + movq_r2r (mm5, mm6); /* copy 8 src2 bytes */ + + pxor_r2r (mm3, mm5); /* xor src1 and src2 */ + pand_m2r (mask1, mm5); /* mask lower bits */ + psrlq_i2r (1, mm5); /* /2 */ + por_r2r (mm4, mm6); /* or src1 and src2 */ + psubb_r2r (mm5, mm6); /* subtract subresults */ + movq_r2r (mm6, mm5); /* copy subresult */ + + pxor_r2r (mm1, mm5); /* xor srcavg and dest */ + pand_m2r (mask1, mm5); /* mask lower bits */ + psrlq_i2r (1, mm5); /* /2 */ + por_r2r (mm2, mm6); /* or srcavg and dest */ + psubb_r2r (mm5, mm6); /* subtract subresults */ + movq_r2m (mm6, *dest); /* store result in dest */ +} + +static inline void mmx_average_4_U8 (uint8_t * dest, const uint8_t * src1, + const uint8_t * src2, + const uint8_t * src3, + const uint8_t * src4) +{ + /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */ + + movq_m2r (*src1, mm1); /* load 8 src1 bytes */ + movq_r2r (mm1, mm2); /* copy 8 src1 bytes */ + + punpcklbw_r2r (mm0, mm1); /* unpack low src1 bytes */ + punpckhbw_r2r (mm0, mm2); /* unpack high src1 bytes */ + + movq_m2r (*src2, mm3); /* load 8 src2 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src2 bytes */ + + punpcklbw_r2r (mm0, mm3); /* unpack low src2 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src2 bytes */ + + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ + + /* now have partials in mm1 and mm2 */ + + movq_m2r (*src3, mm3); /* load 8 src3 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src3 bytes */ + + punpcklbw_r2r (mm0, mm3); /* unpack low src3 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src3 bytes */ + + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ + + movq_m2r (*src4, mm5); /* load 8 src4 bytes */ + movq_r2r (mm5, mm6); /* copy 8 src4 bytes */ + + punpcklbw_r2r (mm0, mm5); /* unpack low src4 bytes */ + punpckhbw_r2r (mm0, mm6); /* unpack high src4 bytes */ + + paddw_r2r (mm5, mm1); /* add lows */ + paddw_r2r (mm6, mm2); /* add highs */ + + /* now have subtotal in mm1 and mm2 */ + + paddw_m2r (round4, mm1); + psraw_i2r (2, mm1); /* /4 */ + paddw_m2r (round4, mm2); + psraw_i2r (2, mm2); /* /4 */ + + packuswb_r2r (mm2, mm1); /* pack (w/ saturation) */ + movq_r2m (mm1, *dest); /* store result in dest */ +} + +static inline void mmx_interp_average_4_U8 (uint8_t * dest, + const uint8_t * src1, + const uint8_t * src2, + const uint8_t * src3, + const uint8_t * src4) +{ + /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */ + + movq_m2r (*src1, mm1); /* load 8 src1 bytes */ + movq_r2r (mm1, mm2); /* copy 8 src1 bytes */ + + punpcklbw_r2r (mm0, mm1); /* unpack low src1 bytes */ + punpckhbw_r2r (mm0, mm2); /* unpack high src1 bytes */ + + movq_m2r (*src2, mm3); /* load 8 src2 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src2 bytes */ + + punpcklbw_r2r (mm0, mm3); /* unpack low src2 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src2 bytes */ + + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ + + /* now have partials in mm1 and mm2 */ + + movq_m2r (*src3, mm3); /* load 8 src3 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src3 bytes */ + + punpcklbw_r2r (mm0, mm3); /* unpack low src3 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src3 bytes */ + + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ + + movq_m2r (*src4, mm5); /* load 8 src4 bytes */ + movq_r2r (mm5, mm6); /* copy 8 src4 bytes */ + + punpcklbw_r2r (mm0, mm5); /* unpack low src4 bytes */ + punpckhbw_r2r (mm0, mm6); /* unpack high src4 bytes */ + + paddw_r2r (mm5, mm1); /* add lows */ + paddw_r2r (mm6, mm2); /* add highs */ + + paddw_m2r (round4, mm1); + psraw_i2r (2, mm1); /* /4 */ + paddw_m2r (round4, mm2); + psraw_i2r (2, mm2); /* /4 */ + + /* now have subtotal/4 in mm1 and mm2 */ + + movq_m2r (*dest, mm3); /* load 8 dest bytes */ + movq_r2r (mm3, mm4); /* copy 8 dest bytes */ + + packuswb_r2r (mm2, mm1); /* pack (w/ saturation) */ + movq_r2r (mm1,mm2); /* copy subresult */ + + pxor_r2r (mm1, mm3); /* xor srcavg and dest */ + pand_m2r (mask1, mm3); /* mask lower bits */ + psrlq_i2r (1, mm3); /* /2 */ + por_r2r (mm2, mm4); /* or srcavg and dest */ + psubb_r2r (mm3, mm4); /* subtract subresults */ + movq_r2m (mm4, *dest); /* store result in dest */ +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, dest, ref); + + if (width == 16) + mmx_average_2_U8 (dest+8, dest+8, ref+8); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_avg_o_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_o_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + mmx_zero_reg (); + + do { + movq_m2r (* ref, mm1); /* load 8 ref bytes */ + movq_r2m (mm1,* dest); /* store 8 bytes at curr */ + + if (width == 16) + { + movq_m2r (* (ref+8), mm1); /* load 8 ref bytes */ + movq_r2m (mm1,* (dest+8)); /* store 8 bytes at curr */ + } + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_put_o_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_mmx (16, height, dest, ref, stride); +} + +static void MC_put_o_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +/* Half pixel interpolation in the x direction */ +static inline void MC_avg_x_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + mmx_zero_reg (); + + do { + mmx_interp_average_2_U8 (dest, ref, ref+1); + + if (width == 16) + mmx_interp_average_2_U8 (dest+8, ref+8, ref+9); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_avg_x_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_x_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_x_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_x_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_x_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, ref, ref+1); + + if (width == 16) + mmx_average_2_U8 (dest+8, ref+8, ref+9); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_put_x_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_x_mmx (16, height, dest, ref, stride); +} + +static void MC_put_x_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_x_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_xy_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + const uint8_t * ref_next = ref + stride; + + mmx_zero_reg (); + + do { + mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); + + if (width == 16) + mmx_interp_average_4_U8 (dest+8, ref+8, ref+9, + ref_next+8, ref_next+9); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_avg_xy_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_xy_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_xy_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + const uint8_t * ref_next = ref + stride; + + mmx_zero_reg (); + + do { + mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); + + if (width == 16) + mmx_average_4_U8 (dest+8, ref+8, ref+9, ref_next+8, ref_next+9); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_put_xy_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_xy_mmx (16, height, dest, ref, stride); +} + +static void MC_put_xy_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_xy_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_y_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + const uint8_t * ref_next = ref + stride; + + mmx_zero_reg (); + + do { + mmx_interp_average_2_U8 (dest, ref, ref_next); + + if (width == 16) + mmx_interp_average_2_U8 (dest+8, ref+8, ref_next+8); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_avg_y_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_y_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_y_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_y_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_y_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + const uint8_t * ref_next = ref + stride; + + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, ref, ref_next); + + if (width == 16) + mmx_average_2_U8 (dest+8, ref+8, ref_next+8); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_put_y_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_y_mmx (16, height, dest, ref, stride); +} + +static void MC_put_y_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_y_mmx (8, height, dest, ref, stride); +} + + +MPEG2_MC_EXTERN (mmx) + + + + + + + +/* CPU_MMXEXT/CPU_3DNOW adaptation layer */ + +#define pavg_r2r(src,dest) \ +do { \ + if (cpu == CPU_MMXEXT) \ + pavgb_r2r (src, dest); \ + else \ + pavgusb_r2r (src, dest); \ +} while (0) + +#define pavg_m2r(src,dest) \ +do { \ + if (cpu == CPU_MMXEXT) \ + pavgb_m2r (src, dest); \ + else \ + pavgusb_m2r (src, dest); \ +} while (0) + + +/* CPU_MMXEXT code */ + + +static inline void MC_put1_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride) +{ + do { + movq_m2r (*ref, mm0); + movq_r2m (mm0, *dest); + ref += stride; + dest += stride; + } while (--height); +} + +static inline void MC_put1_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + ref += stride; + movq_r2m (mm0, *dest); + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg1_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*dest, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg1_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*dest, mm0); + pavg_m2r (*(dest+8), mm1); + movq_r2m (mm0, *dest); + ref += stride; + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_put2_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*(ref+offset), mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_put2_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*(ref+offset+8), mm1); + movq_r2m (mm0, *dest); + ref += stride; + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg2_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*dest, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg2_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*(ref+offset+8), mm1); + pavg_m2r (*dest, mm0); + pavg_m2r (*(dest+8), mm1); + ref += stride; + movq_r2m (mm0, *dest); + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static mmx_t mask_one = {0x0101010101010101LL}; + +static inline void MC_put4_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + movq_m2r (*ref, mm0); + movq_m2r (*(ref+1), mm1); + movq_r2r (mm0, mm7); + pxor_r2r (mm1, mm7); + pavg_r2r (mm1, mm0); + ref += stride; + + do { + movq_m2r (*ref, mm2); + movq_r2r (mm0, mm5); + + movq_m2r (*(ref+1), mm3); + movq_r2r (mm2, mm6); + + pxor_r2r (mm3, mm6); + pavg_r2r (mm3, mm2); + + por_r2r (mm6, mm7); + pxor_r2r (mm2, mm5); + + pand_r2r (mm5, mm7); + pavg_r2r (mm2, mm0); + + pand_m2r (mask_one, mm7); + + psubusb_r2r (mm7, mm0); + + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + + movq_r2r (mm6, mm7); /* unroll ! */ + movq_r2r (mm2, mm0); /* unroll ! */ + } while (--height); +} + +static inline void MC_put4_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_r2m (mm0, *dest); + + movq_m2r (*(ref+8), mm0); + movq_m2r (*(ref+stride+9), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+9), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride+8), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + ref += stride; + movq_r2m (mm0, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg4_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*dest, mm1); + pavg_r2r (mm1, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg4_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*dest, mm1); + pavg_r2r (mm1, mm0); + movq_r2m (mm0, *dest); + + movq_m2r (*(ref+8), mm0); + movq_m2r (*(ref+stride+9), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+9), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride+8), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*(dest+8), mm1); + pavg_r2r (mm1, mm0); + ref += stride; + movq_r2m (mm0, *(dest+8)); + dest += stride; + } while (--height); +} + +static void MC_avg_o_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_avg_o_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_o_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put1_16 (height, dest, ref, stride); +} + +static void MC_put_o_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put1_8 (height, dest, ref, stride); +} + +static void MC_avg_x_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_avg_x_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_put_x_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_put_x_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_avg_y_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_avg_y_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_put_y_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_put_y_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_avg_xy_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_avg_xy_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_xy_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_xy_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT); +} + + +MPEG2_MC_EXTERN (mmxext) + + + +static void MC_avg_o_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_avg_o_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_o_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put1_16 (height, dest, ref, stride); +} + +static void MC_put_o_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put1_8 (height, dest, ref, stride); +} + +static void MC_avg_x_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_avg_x_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_put_x_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_put_x_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_avg_y_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_avg_y_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_put_y_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_put_y_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_avg_xy_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_avg_xy_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_xy_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put4_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_xy_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put4_8 (height, dest, ref, stride, CPU_3DNOW); +} + + +MPEG2_MC_EXTERN (3dnow) + +#endif diff --git a/src/libmpeg2new/libmpeg2/mpeg2_internal.h b/src/libmpeg2new/libmpeg2/mpeg2_internal.h new file mode 100644 index 000000000..ccd1bc4b5 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/mpeg2_internal.h @@ -0,0 +1,301 @@ +/* + * mpeg2_internal.h + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* macroblock modes */ +#define MACROBLOCK_INTRA 1 +#define MACROBLOCK_PATTERN 2 +#define MACROBLOCK_MOTION_BACKWARD 4 +#define MACROBLOCK_MOTION_FORWARD 8 +#define MACROBLOCK_QUANT 16 +#define DCT_TYPE_INTERLACED 32 +/* motion_type */ +#define MOTION_TYPE_MASK (3*64) +#define MOTION_TYPE_BASE 64 +#define MC_FIELD (1*64) +#define MC_FRAME (2*64) +#define MC_16X8 (2*64) +#define MC_DMV (3*64) + +/* picture structure */ +#define TOP_FIELD 1 +#define BOTTOM_FIELD 2 +#define FRAME_PICTURE 3 + +/* picture coding type */ +#define I_TYPE 1 +#define P_TYPE 2 +#define B_TYPE 3 +#define D_TYPE 4 + +typedef struct { + uint8_t * ref[2][3]; + uint8_t ** ref2[2]; + int pmv[2][2]; + int f_code[2]; +} motion_t; + +struct mpeg2_decoder_s { + /* first, state that carries information from one macroblock to the */ + /* next inside a slice, and is never used outside of mpeg2_slice() */ + + /* DCT coefficients - should be kept aligned ! */ + int16_t DCTblock[64]; + + /* bit parsing stuff */ + uint32_t bitstream_buf; /* current 32 bit working set */ + int bitstream_bits; /* used bits in working set */ + const uint8_t * bitstream_ptr; /* buffer with stream data */ + + uint8_t * dest[3]; + uint8_t * picture_dest[3]; + void (* convert) (void * fbuf_id, uint8_t * const * src, + unsigned int v_offset); + void * fbuf_id; + + int offset; + int stride; + int uv_stride; + unsigned int limit_x; + unsigned int limit_y_16; + unsigned int limit_y_8; + unsigned int limit_y; + + /* Motion vectors */ + /* The f_ and b_ correspond to the forward and backward motion */ + /* predictors */ + motion_t b_motion; + motion_t f_motion; + + /* predictor for DC coefficients in intra blocks */ + int16_t dc_dct_pred[3]; + + int quantizer_scale; /* remove */ + int dmv_offset; /* remove */ + unsigned int v_offset; /* remove */ + + /* now non-slice-specific information */ + + /* sequence header stuff */ + uint8_t intra_quantizer_matrix [64]; + uint8_t non_intra_quantizer_matrix [64]; + + /* The width and height of the picture snapped to macroblock units */ + int width; + int height; + int vertical_position_extension; + + /* picture header stuff */ + + /* what type of picture this is (I, P, B, D) */ + int coding_type; + + /* picture coding extension stuff */ + + /* quantization factor for intra dc coefficients */ + int intra_dc_precision; + /* top/bottom/both fields */ + int picture_structure; + /* bool to indicate all predictions are frame based */ + int frame_pred_frame_dct; + /* bool to indicate whether intra blocks have motion vectors */ + /* (for concealment) */ + int concealment_motion_vectors; + /* bit to indicate which quantization table to use */ + int q_scale_type; + /* bool to use different vlc tables */ + int intra_vlc_format; + /* used for DMV MC */ + int top_field_first; + + /* stuff derived from bitstream */ + + /* pointer to the zigzag scan we're supposed to be using */ + const uint8_t * scan; + + int second_field; + + int mpeg1; +}; + +typedef struct { + mpeg2_fbuf_t fbuf; +} fbuf_alloc_t; + +struct mpeg2dec_s { + mpeg2_decoder_t decoder; + + mpeg2_info_t info; + + uint32_t shift; + int is_display_initialized; + mpeg2_state_t (* action) (struct mpeg2dec_s * mpeg2dec); + mpeg2_state_t state; + uint32_t ext_state; + + /* allocated in init - gcc has problems allocating such big structures */ + uint8_t * chunk_buffer; + /* pointer to start of the current chunk */ + uint8_t * chunk_start; + /* pointer to current position in chunk_buffer */ + uint8_t * chunk_ptr; + /* last start code ? */ + uint8_t code; + + /* PTS */ + uint32_t pts_current, pts_previous; + int num_pts; + int bytes_since_pts; + + int first; + int alloc_index_user; + int alloc_index; + uint8_t first_decode_slice; + uint8_t nb_decode_slices; + + mpeg2_sequence_t new_sequence; + mpeg2_sequence_t sequence; + mpeg2_gop_t gop; + mpeg2_picture_t pictures[4]; + mpeg2_picture_t * picture; + /*const*/ mpeg2_fbuf_t * fbuf[3]; /* 0: current fbuf, 1-2: prediction fbufs */ + + fbuf_alloc_t fbuf_alloc[3]; + int custom_fbuf; + + uint8_t * yuv_buf[3][3]; + int yuv_index; + void * convert_id; + int convert_size[3]; + void (* convert_start) (void * id, uint8_t * const * dest, int flags); + void (* convert_copy) (void * id, uint8_t * const * src, + unsigned int v_offset); + + uint8_t * buf_start; + uint8_t * buf_end; + + int16_t display_offset_x, display_offset_y; + + int copy_matrix; + uint8_t intra_quantizer_matrix [64]; + uint8_t non_intra_quantizer_matrix [64]; +}; + +typedef struct { +#ifdef ARCH_PPC + uint8_t regv[12*16]; +#endif + int dummy; +} cpu_state_t; + +/* alloc.c */ +#define ALLOC_MPEG2DEC 0 +#define ALLOC_CHUNK 1 +#define ALLOC_YUV 2 +#define ALLOC_CONVERT_ID 3 +#define ALLOC_CONVERTED 4 +void * mpeg2_malloc (int size, int reason); +void mpeg2_free (void * buf); + +/* cpu_accel.c */ +uint32_t mpeg2_detect_accel (void); + +/* cpu_state.c */ +void mpeg2_cpu_state_init (uint32_t accel); + +/* decode.c */ +mpeg2_state_t mpeg2_seek_sequence (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec); + +/* header.c */ +void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec); +int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec); +int mpeg2_header_gop (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec); +int mpeg2_header_picture (mpeg2dec_t * mpeg2dec); +int mpeg2_header_extension (mpeg2dec_t * mpeg2dec); +int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec); +void mpeg2_header_matrix_finalize (mpeg2dec_t * mpeg2dec); +void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec); +void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int coding_type); + +/* idct.c */ +void mpeg2_idct_init (uint32_t accel); + +/* idct_mlib.c */ +void mpeg2_idct_add_mlib (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_copy_mlib_non_ieee (int16_t * block, uint8_t * dest, + int stride); +void mpeg2_idct_add_mlib_non_ieee (int last, int16_t * block, + uint8_t * dest, int stride); + +/* idct_mmx.c */ +void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mmxext (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mmx (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_mmx_init (void); + +/* idct_altivec.c */ +void mpeg2_idct_copy_altivec (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_altivec (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_altivec_init (void); + +/* idct_alpha.c */ +void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mvi (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_alpha (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_alpha_init (void); + +/* motion_comp.c */ +void mpeg2_mc_init (uint32_t accel); + +typedef void mpeg2_mc_fct (uint8_t *, const uint8_t *, int, int); + +typedef struct { + mpeg2_mc_fct * put [8]; + mpeg2_mc_fct * avg [8]; +} mpeg2_mc_t; + +#define MPEG2_MC_EXTERN(x) mpeg2_mc_t mpeg2_mc_##x = { \ + {MC_put_o_16_##x, MC_put_x_16_##x, MC_put_y_16_##x, MC_put_xy_16_##x, \ + MC_put_o_8_##x, MC_put_x_8_##x, MC_put_y_8_##x, MC_put_xy_8_##x}, \ + {MC_avg_o_16_##x, MC_avg_x_16_##x, MC_avg_y_16_##x, MC_avg_xy_16_##x, \ + MC_avg_o_8_##x, MC_avg_x_8_##x, MC_avg_y_8_##x, MC_avg_xy_8_##x} \ +}; + +extern mpeg2_mc_t mpeg2_mc_c; +extern mpeg2_mc_t mpeg2_mc_mmx; +extern mpeg2_mc_t mpeg2_mc_mmxext; +extern mpeg2_mc_t mpeg2_mc_3dnow; +extern mpeg2_mc_t mpeg2_mc_altivec; +extern mpeg2_mc_t mpeg2_mc_alpha; +extern mpeg2_mc_t mpeg2_mc_mlib; diff --git a/src/libmpeg2new/libmpeg2/slice.c b/src/libmpeg2new/libmpeg2/slice.c new file mode 100644 index 000000000..3e2db0803 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/slice.c @@ -0,0 +1,1808 @@ +/* + * slice.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include + +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include "attributes.h" + +extern mpeg2_mc_t mpeg2_mc; +extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); +extern void (* mpeg2_idct_add) (int last, int16_t * block, + uint8_t * dest, int stride); +extern void (* mpeg2_cpu_state_save) (cpu_state_t * state); +extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state); + +#include "vlc.h" + +static int non_linear_quantizer_scale [] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 10, 12, 14, 16, 18, 20, 22, + 24, 28, 32, 36, 40, 44, 48, 52, + 56, 64, 72, 80, 88, 96, 104, 112 +}; + +static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int macroblock_modes; + const MBtab * tab; + + switch (decoder->coding_type) { + case I_TYPE: + + tab = MB_I + UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if ((! (decoder->frame_pred_frame_dct)) && + (decoder->picture_structure == FRAME_PICTURE)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + + return macroblock_modes; + + case P_TYPE: + + tab = MB_P + UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (decoder->picture_structure != FRAME_PICTURE) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } else if (decoder->frame_pred_frame_dct) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + macroblock_modes |= MC_FRAME; + return macroblock_modes; + } else { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + case B_TYPE: + + tab = MB_B + UBITS (bit_buf, 6); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (decoder->picture_structure != FRAME_PICTURE) { + if (! (macroblock_modes & MACROBLOCK_INTRA)) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } else if (decoder->frame_pred_frame_dct) { + /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ + macroblock_modes |= MC_FRAME; + return macroblock_modes; + } else { + if (macroblock_modes & MACROBLOCK_INTRA) + goto intra; + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + intra: + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + case D_TYPE: + + DUMPBITS (bit_buf, bits, 1); + return MACROBLOCK_INTRA; + + default: + return 0; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_quantizer_scale (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + + int quantizer_scale_code; + + quantizer_scale_code = UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, 5); + + if (decoder->q_scale_type) + return non_linear_quantizer_scale [quantizer_scale_code]; + else + return quantizer_scale_code << 1; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_motion_delta (mpeg2_decoder_t * const decoder, + const int f_code) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + + int delta; + int sign; + const MVtab * tab; + + if (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 1); + return 0; + } else if (bit_buf >= 0x0c000000) { + + tab = MV_4 + UBITS (bit_buf, 4); + delta = (tab->delta << f_code) + 1; + bits += tab->len + f_code + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) + delta += UBITS (bit_buf, f_code); + bit_buf <<= f_code; + + return (delta ^ sign) - sign; + + } else { + + tab = MV_10 + UBITS (bit_buf, 10); + delta = (tab->delta << f_code) + 1; + bits += tab->len + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) { + NEEDBITS (bit_buf, bits, bit_ptr); + delta += UBITS (bit_buf, f_code); + DUMPBITS (bit_buf, bits, f_code); + } + + return (delta ^ sign) - sign; + + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int bound_motion_vector (const int vector, const int f_code) +{ + return ((int32_t)vector << (27 - f_code)) >> (27 - f_code); +} + +static inline int get_dmv (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + + const DMVtab * tab; + + tab = DMV_2 + UBITS (bit_buf, 2); + DUMPBITS (bit_buf, bits, tab->len); + return tab->dmv; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_coded_block_pattern (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + + const CBPtab * tab; + + NEEDBITS (bit_buf, bits, bit_ptr); + + if (bit_buf >= 0x20000000) { + + tab = CBP_7 + (UBITS (bit_buf, 7) - 16); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + + } else { + + tab = CBP_9 + UBITS (bit_buf, 9); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + } + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_luma_dc_dct_diff (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_lum_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff; + } else { + DUMPBITS (bit_buf, bits, 3); + return 0; + } + } else { + tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_chroma_dc_dct_diff (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_chrom_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff; + } else { + DUMPBITS (bit_buf, bits, 2); + return 0; + } + } else { + tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len + 1); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +#define SATURATE(val) \ +do { \ + if (unlikely ((uint32_t)(val + 2048) > 4095)) \ + val = SBITS (val, 1) ^ 2047; \ +} while (0) + +static void get_intra_block_B14 (mpeg2_decoder_t * const decoder) +{ + int i; + int j; + int val; + const uint8_t * scan = decoder->scan; + const uint8_t * quant_matrix = decoder->intra_quantizer_matrix; + int quantizer_scale = decoder->quantizer_scale; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + const uint8_t * bit_ptr; + int16_t * dest; + + dest = decoder->DCTblock; + i = 0; + mismatch = ~dest[0]; + + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * + quantizer_scale * quant_matrix[j]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; +} + +static void get_intra_block_B15 (mpeg2_decoder_t * const decoder) +{ + int i; + int j; + int val; + const uint8_t * scan = decoder->scan; + const uint8_t * quant_matrix = decoder->intra_quantizer_matrix; + int quantizer_scale = decoder->quantizer_scale; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + const uint8_t * bit_ptr; + int16_t * dest; + + dest = decoder->DCTblock; + i = 0; + mismatch = ~dest[0]; + + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x04000000) { + + tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) { + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else { + + /* end of block. I commented out this code because if we */ + /* dont exit here we will still exit at the later test :) */ + + /* if (i >= 128) break; */ /* end of block */ + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check against buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * + quantizer_scale * quant_matrix[j]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + } else if (bit_buf >= 0x02000000) { + tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 4); /* dump end of block code */ + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; +} + +static int get_non_intra_block (mpeg2_decoder_t * const decoder) +{ + int i; + int j; + int val; + const uint8_t * scan = decoder->scan; + const uint8_t * quant_matrix = decoder->non_intra_quantizer_matrix; + int quantizer_scale = decoder->quantizer_scale; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + const uint8_t * bit_ptr; + int16_t * dest; + + i = -1; + mismatch = 1; + dest = decoder->DCTblock; + + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1; + val = (val * quantizer_scale * quant_matrix[j]) / 32; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; + return i; +} + +static void get_mpeg1_intra_block (mpeg2_decoder_t * const decoder) +{ + int i; + int j; + int val; + const uint8_t * scan = decoder->scan; + const uint8_t * quant_matrix = decoder->intra_quantizer_matrix; + int quantizer_scale = decoder->quantizer_scale; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + const uint8_t * bit_ptr; + int16_t * dest; + + i = 0; + dest = decoder->DCTblock; + + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = (val * quantizer_scale * quant_matrix[j]) / 16; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; +} + +static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder) +{ + int i; + int j; + int val; + const uint8_t * scan = decoder->scan; + const uint8_t * quant_matrix = decoder->non_intra_quantizer_matrix; + int quantizer_scale = decoder->quantizer_scale; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + const uint8_t * bit_ptr; + int16_t * dest; + + i = -1; + dest = decoder->DCTblock; + + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = 2 * (val + SBITS (val, 1)) + 1; + val = (val * quantizer_scale * quant_matrix[j]) / 32; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; + return i; +} + +static inline void slice_intra_DCT (mpeg2_decoder_t * const decoder, + const int cc, + uint8_t * const dest, const int stride) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + NEEDBITS (bit_buf, bits, bit_ptr); + /* Get the intra DC coefficient and inverse quantize it */ + if (cc == 0) + decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder); + else + decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder); + decoder->DCTblock[0] = + decoder->dc_dct_pred[cc] << (3 - decoder->intra_dc_precision); + + if (decoder->mpeg1) { + if (decoder->coding_type != D_TYPE) + get_mpeg1_intra_block (decoder); + } else if (decoder->intra_vlc_format) + get_intra_block_B15 (decoder); + else + get_intra_block_B14 (decoder); + mpeg2_idct_copy (decoder->DCTblock, dest, stride); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder, + uint8_t * const dest, const int stride) +{ + int last; + + if (decoder->mpeg1) + last = get_mpeg1_non_intra_block (decoder); + else + last = get_non_intra_block (decoder); + mpeg2_idct_add (last, decoder->DCTblock, dest, stride); +} + +#define MOTION(table,ref,motion_x,motion_y,size,y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y_ ## size)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \ + motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ + ref[0] + (pos_x >> 1) + (pos_y >> 1) * decoder->stride, \ + decoder->stride, size); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + offset = (((decoder->offset + motion_x) >> 1) + \ + ((((decoder->v_offset + motion_y) >> 1) + y/2) * \ + decoder->uv_stride)); \ + table[4+xy_half] (decoder->dest[1] + y/2 * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + decoder->uv_stride, size/2); \ + table[4+xy_half] (decoder->dest[2] + y/2 * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + decoder->uv_stride, size/2) + +#define MOTION_FIELD(table,ref,motion_x,motion_y,dest_field,op,src_field) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ + decoder->offset, \ + (ref[0] + (pos_x >> 1) + \ + ((pos_y op) + src_field) * decoder->stride), \ + 2 * decoder->stride, 8); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + offset = (((decoder->offset + motion_x) >> 1) + \ + (((decoder->v_offset >> 1) + (motion_y op) + src_field) * \ + decoder->uv_stride)); \ + table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + 2 * decoder->uv_stride, 4) + +static void motion_mp1 (mpeg2_decoder_t * const decoder, + motion_t * const motion, + mpeg2_mc_fct * const * const table) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int motion_x, motion_y; + unsigned int pos_x, pos_y, xy_half, offset; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = (motion->pmv[0][0] + + (get_motion_delta (decoder, + motion->f_code[0]) << motion->f_code[1])); + motion_x = bound_motion_vector (motion_x, + motion->f_code[0] + motion->f_code[1]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[0][1] + + (get_motion_delta (decoder, + motion->f_code[0]) << motion->f_code[1])); + motion_y = bound_motion_vector (motion_y, + motion->f_code[0] + motion->f_code[1]); + motion->pmv[0][1] = motion_y; + + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_frame (mpeg2_decoder_t * const decoder, + motion_t * const motion, + mpeg2_mc_fct * const * const table) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int motion_x, motion_y; + unsigned int pos_x, pos_y, xy_half, offset; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_field (mpeg2_decoder_t * const decoder, + motion_t * const motion, + mpeg2_mc_fct * const * const table) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int motion_x, motion_y, field; + unsigned int pos_x, pos_y, xy_half, offset; + + NEEDBITS (bit_buf, bits, bit_ptr); + field = UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (decoder, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[0][1] = motion_y << 1; + + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); + + NEEDBITS (bit_buf, bits, bit_ptr); + field = UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[1][0] + get_motion_delta (decoder, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[1][1] >> 1) + get_motion_delta (decoder, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[1][1] = motion_y << 1; + + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_dmv (mpeg2_decoder_t * const decoder, + motion_t * const motion, + mpeg2_mc_fct * const * const table) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y; + unsigned int pos_x, pos_y, xy_half, offset; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + NEEDBITS (bit_buf, bits, bit_ptr); + dmv_x = get_dmv (decoder); + + motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (decoder, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; + dmv_y = get_dmv (decoder); + + m = decoder->top_field_first ? 1 : 3; + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1; + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); + + m = decoder->top_field_first ? 3 : 1; + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1; + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0); + + pos_x = 2 * decoder->offset + motion_x; + pos_y = decoder->v_offset + motion_y; + if (unlikely (pos_x > decoder->limit_x)) { + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; + motion_x = pos_x - 2 * decoder->offset; + } + if (unlikely (pos_y > decoder->limit_y)) { + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; + motion_y = pos_y - decoder->v_offset; + } + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); + offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; + mpeg2_mc.avg[xy_half] + (decoder->dest[0] + decoder->offset, + motion->ref[0][0] + offset, 2 * decoder->stride, 8); + mpeg2_mc.avg[xy_half] + (decoder->dest[0] + decoder->stride + decoder->offset, + motion->ref[0][0] + decoder->stride + offset, 2 * decoder->stride, 8); + motion_x /= 2; motion_y /= 2; + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); + offset = (((decoder->offset + motion_x) >> 1) + + (((decoder->v_offset >> 1) + (motion_y & ~1)) * + decoder->uv_stride)); + mpeg2_mc.avg[4+xy_half] + (decoder->dest[1] + (decoder->offset >> 1), + motion->ref[0][1] + offset, 2 * decoder->uv_stride, 4); + mpeg2_mc.avg[4+xy_half] + (decoder->dest[1] + decoder->uv_stride + (decoder->offset >> 1), + motion->ref[0][1] + decoder->uv_stride + offset, + 2 * decoder->uv_stride, 4); + mpeg2_mc.avg[4+xy_half] + (decoder->dest[2] + (decoder->offset >> 1), + motion->ref[0][2] + offset, 2 * decoder->uv_stride, 4); + mpeg2_mc.avg[4+xy_half] + (decoder->dest[2] + decoder->uv_stride + (decoder->offset >> 1), + motion->ref[0][2] + decoder->uv_stride + offset, + 2 * decoder->uv_stride, 4); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline void motion_reuse (const mpeg2_decoder_t * const decoder, + const motion_t * const motion, + mpeg2_mc_fct * const * const table) +{ + int motion_x, motion_y; + unsigned int pos_x, pos_y, xy_half, offset; + + motion_x = motion->pmv[0][0]; + motion_y = motion->pmv[0][1]; + + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); +} + +static inline void motion_zero (const mpeg2_decoder_t * const decoder, + const motion_t * const motion, + mpeg2_mc_fct * const * const table) +{ + unsigned int offset; + + table[0] (decoder->dest[0] + decoder->offset, + (motion->ref[0][0] + decoder->offset + + decoder->v_offset * decoder->stride), + decoder->stride, 16); + + offset = ((decoder->offset >> 1) + + (decoder->v_offset >> 1) * decoder->uv_stride); + table[4] (decoder->dest[1] + (decoder->offset >> 1), + motion->ref[0][1] + offset, decoder->uv_stride, 8); + table[4] (decoder->dest[2] + (decoder->offset >> 1), + motion->ref[0][2] + offset, decoder->uv_stride, 8); +} + +/* like motion_frame, but parsing without actual motion compensation */ +static void motion_fr_conceal (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (decoder->f_motion.pmv[0][0] + + get_motion_delta (decoder, decoder->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]); + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (decoder->f_motion.pmv[0][1] + + get_motion_delta (decoder, decoder->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]); + decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_field (mpeg2_decoder_t * const decoder, + motion_t * const motion, + mpeg2_mc_fct * const * const table) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int motion_x, motion_y; + uint8_t ** ref_field; + unsigned int pos_x, pos_y, xy_half, offset; + + NEEDBITS (bit_buf, bits, bit_ptr); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + + MOTION (table, ref_field, motion_x, motion_y, 16, 0); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_16x8 (mpeg2_decoder_t * const decoder, + motion_t * const motion, + mpeg2_mc_fct * const * const table) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int motion_x, motion_y; + uint8_t ** ref_field; + unsigned int pos_x, pos_y, xy_half, offset; + + NEEDBITS (bit_buf, bits, bit_ptr); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[0][1] = motion_y; + + MOTION (table, ref_field, motion_x, motion_y, 8, 0); + + NEEDBITS (bit_buf, bits, bit_ptr); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[1][0] + get_motion_delta (decoder, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[1][1] + get_motion_delta (decoder, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion_y; + + MOTION (table, ref_field, motion_x, motion_y, 8, 8); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_dmv (mpeg2_decoder_t * const decoder, + motion_t * const motion, + mpeg2_mc_fct * const * const table) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int motion_x, motion_y, other_x, other_y; + unsigned int pos_x, pos_y, xy_half, offset; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + NEEDBITS (bit_buf, bits, bit_ptr); + other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (decoder); + + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (decoder) + + decoder->dmv_offset); + + MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0); + MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_conceal (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); /* remove field_select */ + + tmp = (decoder->f_motion.pmv[0][0] + + get_motion_delta (decoder, decoder->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]); + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (decoder->f_motion.pmv[0][1] + + get_motion_delta (decoder, decoder->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]); + decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +#undef bit_buf +#undef bits +#undef bit_ptr +} + +#define MOTION_CALL(routine,direction) \ +do { \ + if ((direction) & MACROBLOCK_MOTION_FORWARD) \ + routine (decoder, &(decoder->f_motion), mpeg2_mc.put); \ + if ((direction) & MACROBLOCK_MOTION_BACKWARD) \ + routine (decoder, &(decoder->b_motion), \ + ((direction) & MACROBLOCK_MOTION_FORWARD ? \ + mpeg2_mc.avg : mpeg2_mc.put)); \ +} while (0) + +#define NEXT_MACROBLOCK \ +do { \ + decoder->offset += 16; \ + if (decoder->offset == decoder->width) { \ + do { /* just so we can use the break statement */ \ + if (decoder->convert) { \ + decoder->convert (decoder->fbuf_id, decoder->dest, \ + decoder->v_offset); \ + if (decoder->coding_type == B_TYPE) \ + break; \ + } \ + decoder->dest[0] += 16 * decoder->stride; \ + decoder->dest[1] += 4 * decoder->stride; \ + decoder->dest[2] += 4 * decoder->stride; \ + } while (0); \ + decoder->v_offset += 16; \ + if (decoder->v_offset > decoder->limit_y) { \ + if (mpeg2_cpu_state_restore) \ + mpeg2_cpu_state_restore (&cpu_state); \ + return; \ + } \ + decoder->offset = 0; \ + } \ +} while (0) + +void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], + uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]) +{ + int offset, stride, height, bottom_field; + + stride = decoder->width; + bottom_field = (decoder->picture_structure == BOTTOM_FIELD); + offset = bottom_field ? stride : 0; + height = decoder->height; + + decoder->picture_dest[0] = current_fbuf[0] + offset; + decoder->picture_dest[1] = current_fbuf[1] + (offset >> 1); + decoder->picture_dest[2] = current_fbuf[2] + (offset >> 1); + + decoder->f_motion.ref[0][0] = forward_fbuf[0] + offset; + decoder->f_motion.ref[0][1] = forward_fbuf[1] + (offset >> 1); + decoder->f_motion.ref[0][2] = forward_fbuf[2] + (offset >> 1); + + decoder->b_motion.ref[0][0] = backward_fbuf[0] + offset; + decoder->b_motion.ref[0][1] = backward_fbuf[1] + (offset >> 1); + decoder->b_motion.ref[0][2] = backward_fbuf[2] + (offset >> 1); + + if (decoder->picture_structure != FRAME_PICTURE) { + decoder->dmv_offset = bottom_field ? 1 : -1; + decoder->f_motion.ref2[0] = decoder->f_motion.ref[bottom_field]; + decoder->f_motion.ref2[1] = decoder->f_motion.ref[!bottom_field]; + decoder->b_motion.ref2[0] = decoder->b_motion.ref[bottom_field]; + decoder->b_motion.ref2[1] = decoder->b_motion.ref[!bottom_field]; + offset = stride - offset; + + if (decoder->second_field && (decoder->coding_type != B_TYPE)) + forward_fbuf = current_fbuf; + + decoder->f_motion.ref[1][0] = forward_fbuf[0] + offset; + decoder->f_motion.ref[1][1] = forward_fbuf[1] + (offset >> 1); + decoder->f_motion.ref[1][2] = forward_fbuf[2] + (offset >> 1); + + decoder->b_motion.ref[1][0] = backward_fbuf[0] + offset; + decoder->b_motion.ref[1][1] = backward_fbuf[1] + (offset >> 1); + decoder->b_motion.ref[1][2] = backward_fbuf[2] + (offset >> 1); + + stride <<= 1; + height >>= 1; + } + + decoder->stride = stride; + decoder->uv_stride = stride >> 1; + decoder->limit_x = 2 * decoder->width - 32; + decoder->limit_y_16 = 2 * height - 32; + decoder->limit_y_8 = 2 * height - 16; + decoder->limit_y = height - 16; +} + +static inline int slice_init (mpeg2_decoder_t * const decoder, int code) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int offset; + const MBAtab * mba; + + decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = + decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision; + + decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; + decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0; + decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0; + + if (decoder->vertical_position_extension) { + code += UBITS (bit_buf, 3) << 7; + DUMPBITS (bit_buf, bits, 3); + } + decoder->v_offset = (code - 1) * 16; + offset = 0; + if (!(decoder->convert) || decoder->coding_type != B_TYPE) + offset = (code - 1) * decoder->stride * 4; + + decoder->dest[0] = decoder->picture_dest[0] + offset * 4; + decoder->dest[1] = decoder->picture_dest[1] + offset; + decoder->dest[2] = decoder->picture_dest[2] + offset; + + decoder->quantizer_scale = get_quantizer_scale (decoder); + + /* ignore intra_slice and all the extra data */ + while (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 9); + NEEDBITS (bit_buf, bits, bit_ptr); + } + + /* decode initial macroblock address increment */ + offset = 0; + while (1) { + if (bit_buf >= 0x08000000) { + mba = MBA_5 + (UBITS (bit_buf, 6) - 2); + break; + } else if (bit_buf >= 0x01800000) { + mba = MBA_11 + (UBITS (bit_buf, 12) - 24); + break; + } else switch (UBITS (bit_buf, 12)) { + case 8: /* macroblock_escape */ + offset += 33; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + case 15: /* macroblock_stuffing (MPEG1 only) */ + bit_buf &= 0xfffff; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* error */ + return 1; + } + } + DUMPBITS (bit_buf, bits, mba->len + 1); + decoder->offset = (offset + mba->mba) << 4; + + while (decoder->offset - decoder->width >= 0) { + decoder->offset -= decoder->width; + if (!(decoder->convert) || decoder->coding_type != B_TYPE) { + decoder->dest[0] += 16 * decoder->stride; + decoder->dest[1] += 4 * decoder->stride; + decoder->dest[2] += 4 * decoder->stride; + } + decoder->v_offset += 16; + } + if (decoder->v_offset > decoder->limit_y) + return 1; + + return 0; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +void mpeg2_slice (mpeg2_decoder_t * const decoder, const int code, + const uint8_t * const buffer) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + cpu_state_t cpu_state; + + bitstream_init (decoder, buffer); + + if (slice_init (decoder, code)) + return; + + if (mpeg2_cpu_state_save) + mpeg2_cpu_state_save (&cpu_state); + + while (1) { + int macroblock_modes; + int mba_inc; + const MBAtab * mba; + + NEEDBITS (bit_buf, bits, bit_ptr); + + macroblock_modes = get_macroblock_modes (decoder); + + /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */ + if (macroblock_modes & MACROBLOCK_QUANT) + decoder->quantizer_scale = get_quantizer_scale (decoder); + + if (macroblock_modes & MACROBLOCK_INTRA) { + + int DCT_offset, DCT_stride; + int offset; + uint8_t * dest_y; + + if (decoder->concealment_motion_vectors) { + if (decoder->picture_structure == FRAME_PICTURE) + motion_fr_conceal (decoder); + else + motion_fi_conceal (decoder); + } else { + decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; + decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0; + decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0; + } + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = decoder->stride; + DCT_stride = decoder->stride * 2; + } else { + DCT_offset = decoder->stride * 8; + DCT_stride = decoder->stride; + } + + offset = decoder->offset; + dest_y = decoder->dest[0] + offset; + slice_intra_DCT (decoder, 0, dest_y, DCT_stride); + slice_intra_DCT (decoder, 0, dest_y + 8, DCT_stride); + slice_intra_DCT (decoder, 0, dest_y + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 0, dest_y + DCT_offset + 8, DCT_stride); + slice_intra_DCT (decoder, 1, decoder->dest[1] + (offset >> 1), + decoder->uv_stride); + slice_intra_DCT (decoder, 2, decoder->dest[2] + (offset >> 1), + decoder->uv_stride); + + if (decoder->coding_type == D_TYPE) { + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); + } + } else { + + if (decoder->picture_structure == FRAME_PICTURE) + switch (macroblock_modes & MOTION_TYPE_MASK) { + case MC_FRAME: + if (decoder->mpeg1) + MOTION_CALL (motion_mp1, macroblock_modes); + else + MOTION_CALL (motion_fr_frame, macroblock_modes); + break; + + case MC_FIELD: + MOTION_CALL (motion_fr_field, macroblock_modes); + break; + + case MC_DMV: + MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); + break; + + case 0: + /* non-intra mb without forward mv in a P picture */ + decoder->f_motion.pmv[0][0] = 0; + decoder->f_motion.pmv[0][1] = 0; + decoder->f_motion.pmv[1][0] = 0; + decoder->f_motion.pmv[1][1] = 0; + MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + break; + } + else + switch (macroblock_modes & MOTION_TYPE_MASK) { + case MC_FIELD: + MOTION_CALL (motion_fi_field, macroblock_modes); + break; + + case MC_16X8: + MOTION_CALL (motion_fi_16x8, macroblock_modes); + break; + + case MC_DMV: + MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); + break; + + case 0: + /* non-intra mb without forward mv in a P picture */ + decoder->f_motion.pmv[0][0] = 0; + decoder->f_motion.pmv[0][1] = 0; + decoder->f_motion.pmv[1][0] = 0; + decoder->f_motion.pmv[1][1] = 0; + MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + break; + } + + if (macroblock_modes & MACROBLOCK_PATTERN) { + int coded_block_pattern; + int DCT_offset, DCT_stride; + int offset; + uint8_t * dest_y; + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = decoder->stride; + DCT_stride = decoder->stride * 2; + } else { + DCT_offset = decoder->stride * 8; + DCT_stride = decoder->stride; + } + + coded_block_pattern = get_coded_block_pattern (decoder); + + offset = decoder->offset; + dest_y = decoder->dest[0] + offset; + if (coded_block_pattern & 0x20) + slice_non_intra_DCT (decoder, dest_y, DCT_stride); + if (coded_block_pattern & 0x10) + slice_non_intra_DCT (decoder, dest_y + 8, DCT_stride); + if (coded_block_pattern & 0x08) + slice_non_intra_DCT (decoder, dest_y + DCT_offset, + DCT_stride); + if (coded_block_pattern & 0x04) + slice_non_intra_DCT (decoder, dest_y + DCT_offset + 8, + DCT_stride); + if (coded_block_pattern & 0x2) + slice_non_intra_DCT (decoder, + decoder->dest[1] + (offset >> 1), + decoder->uv_stride); + if (coded_block_pattern & 0x1) + slice_non_intra_DCT (decoder, + decoder->dest[2] + (offset >> 1), + decoder->uv_stride); + } + + decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = + decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision; + } + + NEXT_MACROBLOCK; + + NEEDBITS (bit_buf, bits, bit_ptr); + mba_inc = 0; + while (1) { + if (bit_buf >= 0x10000000) { + mba = MBA_5 + (UBITS (bit_buf, 5) - 2); + break; + } else if (bit_buf >= 0x03000000) { + mba = MBA_11 + (UBITS (bit_buf, 11) - 24); + break; + } else switch (UBITS (bit_buf, 11)) { + case 8: /* macroblock_escape */ + mba_inc += 33; + /* pass through */ + case 15: /* macroblock_stuffing (MPEG1 only) */ + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* end of slice, or error */ + if (mpeg2_cpu_state_restore) + mpeg2_cpu_state_restore (&cpu_state); + return; + } + } + DUMPBITS (bit_buf, bits, mba->len); + mba_inc += mba->mba; + + if (mba_inc) { + decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = + decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision; + + if (decoder->coding_type == P_TYPE) { + decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; + + do { + MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + NEXT_MACROBLOCK; + } while (--mba_inc); + } else { + do { + MOTION_CALL (motion_reuse, macroblock_modes); + NEXT_MACROBLOCK; + } while (--mba_inc); + } + } + } +#undef bit_buf +#undef bits +#undef bit_ptr +} diff --git a/src/libmpeg2new/libmpeg2/vlc.h b/src/libmpeg2new/libmpeg2/vlc.h new file mode 100644 index 000000000..8fa6b75bd --- /dev/null +++ b/src/libmpeg2new/libmpeg2/vlc.h @@ -0,0 +1,429 @@ +/* + * vlc.h + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define GETWORD(bit_buf,shift,bit_ptr) \ +do { \ + bit_buf |= ((bit_ptr[0] << 8) | bit_ptr[1]) << (shift); \ + bit_ptr += 2; \ +} while (0) + +static inline void bitstream_init (mpeg2_decoder_t * decoder, + const uint8_t * start) +{ + decoder->bitstream_buf = + (start[0] << 24) | (start[1] << 16) | (start[2] << 8) | start[3]; + decoder->bitstream_ptr = start + 4; + decoder->bitstream_bits = -16; +} + +/* make sure that there are at least 16 valid bits in bit_buf */ +#define NEEDBITS(bit_buf,bits,bit_ptr) \ +do { \ + if (unlikely (bits > 0)) { \ + GETWORD (bit_buf, bits, bit_ptr); \ + bits -= 16; \ + } \ +} while (0) + +/* remove num valid bits from bit_buf */ +#define DUMPBITS(bit_buf,bits,num) \ +do { \ + bit_buf <<= (num); \ + bits += (num); \ +} while (0) + +/* take num bits from the high part of bit_buf and zero extend them */ +#define UBITS(bit_buf,num) (((uint32_t)(bit_buf)) >> (32 - (num))) + +/* take num bits from the high part of bit_buf and sign extend them */ +#define SBITS(bit_buf,num) (((int32_t)(bit_buf)) >> (32 - (num))) + +typedef struct { + uint8_t modes; + uint8_t len; +} MBtab; + +typedef struct { + uint8_t delta; + uint8_t len; +} MVtab; + +typedef struct { + int8_t dmv; + uint8_t len; +} DMVtab; + +typedef struct { + uint8_t cbp; + uint8_t len; +} CBPtab; + +typedef struct { + uint8_t size; + uint8_t len; +} DCtab; + +typedef struct { + uint8_t run; + uint8_t level; + uint8_t len; +} DCTtab; + +typedef struct { + uint8_t mba; + uint8_t len; +} MBAtab; + + +#define INTRA MACROBLOCK_INTRA +#define QUANT MACROBLOCK_QUANT + +static const MBtab MB_I [] = { + {INTRA|QUANT, 2}, {INTRA, 1} +}; + +#define MC MACROBLOCK_MOTION_FORWARD +#define CODED MACROBLOCK_PATTERN + +static const MBtab MB_P [] = { + {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, + {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1} +}; + +#define FWD MACROBLOCK_MOTION_FORWARD +#define BWD MACROBLOCK_MOTION_BACKWARD +#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD + +static const MBtab MB_B [] = { + {0, 0}, {INTRA|QUANT, 6}, + {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, + {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, + {INTRA, 5}, {INTRA, 5}, + {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, + {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2} +}; + +#undef INTRA +#undef QUANT +#undef MC +#undef CODED +#undef FWD +#undef BWD +#undef INTER + + +static const MVtab MV_4 [] = { + { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} +}; + +static const MVtab MV_10 [] = { + { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, + { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, + {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, + { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, + { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, + { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7} +}; + + +static const DMVtab DMV_2 [] = { + { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2} +}; + + +static const CBPtab CBP_7 [] = { + {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, + {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, + {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, + {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6}, + {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, + {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5}, + {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, + {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5}, + {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5}, + {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5}, + {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, + {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, + {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5}, + {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5}, + {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5}, + {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3} +}; + +static const CBPtab CBP_9 [] = { + {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, + {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, + {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, + {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8}, + {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8}, + {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8}, + {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8}, + {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8}, + {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8}, + {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8}, + {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8}, + {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8}, + {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8}, + {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8}, + {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8}, + {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8} +}; + + +static const DCtab DC_lum_5 [] = { + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, + {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5} +}; + +static const DCtab DC_chrom_5 [] = { + {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5} +}; + +static const DCtab DC_long [] = { + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9} +}; + + +static const DCTtab DCT_16 [] = { + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + { 2,18, 0}, { 2,17, 0}, { 2,16, 0}, { 2,15, 0}, + { 7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0}, + { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0}, + { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0} +}; + +static const DCTtab DCT_15 [] = { + { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15}, + { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15}, + { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15}, + { 2,11,15}, { 2,10,15}, { 2, 9,15}, { 2, 8,15}, + { 1,31,14}, { 1,31,14}, { 1,30,14}, { 1,30,14}, + { 1,29,14}, { 1,29,14}, { 1,28,14}, { 1,28,14}, + { 1,27,14}, { 1,27,14}, { 1,26,14}, { 1,26,14}, + { 1,25,14}, { 1,25,14}, { 1,24,14}, { 1,24,14}, + { 1,23,14}, { 1,23,14}, { 1,22,14}, { 1,22,14}, + { 1,21,14}, { 1,21,14}, { 1,20,14}, { 1,20,14}, + { 1,19,14}, { 1,19,14}, { 1,18,14}, { 1,18,14}, + { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14} +}; + +static const DCTtab DCT_13 [] = { + { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13}, + { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13}, + { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13}, + { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13}, + { 1,11,12}, { 1,11,12}, { 9, 2,12}, { 9, 2,12}, + { 5, 3,12}, { 5, 3,12}, { 1,10,12}, { 1,10,12}, + { 3, 4,12}, { 3, 4,12}, { 8, 2,12}, { 8, 2,12}, + { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12}, + { 1, 9,12}, { 1, 9,12}, { 20, 1,12}, { 20, 1,12}, + { 19, 1,12}, { 19, 1,12}, { 2, 5,12}, { 2, 5,12}, + { 4, 3,12}, { 4, 3,12}, { 1, 8,12}, { 1, 8,12}, + { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12} +}; + +static const DCTtab DCT_B14_10 [] = { + { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10}, + { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10} +}; + +static const DCTtab DCT_B14_8 [] = { + { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, + { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, + { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, + { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, + { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 14, 1, 8}, { 1, 6, 8}, { 13, 1, 8}, { 12, 1, 8}, + { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8} +}; + +static const DCTtab DCT_B14AC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2} +}; + +static const DCTtab DCT_B14DC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1} +}; + +static const DCTtab DCT_B15_10 [] = { + { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9}, + { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9} +}; + +static const DCTtab DCT_B15_8 [] = { + { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, + { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, + { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, + { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, + { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 2, 5, 8}, { 12, 1, 8}, { 1,11, 8}, { 1,10, 8}, + { 14, 1, 8}, { 13, 1, 8}, { 4, 2, 8}, { 2, 4, 8}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 10, 1, 7}, { 10, 1, 7}, { 2, 3, 7}, { 2, 3, 7}, + { 11, 1, 7}, { 11, 1, 7}, { 1, 8, 7}, { 1, 8, 7}, + { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8}, + { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8} +}; + + +static const MBAtab MBA_5 [] = { + {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, + {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} +}; + +static const MBAtab MBA_11 [] = { + {32, 11}, {31, 11}, {30, 11}, {29, 11}, + {28, 11}, {27, 11}, {26, 11}, {25, 11}, + {24, 11}, {23, 11}, {22, 11}, {21, 11}, + {20, 10}, {20, 10}, {19, 10}, {19, 10}, + {18, 10}, {18, 10}, {17, 10}, {17, 10}, + {16, 10}, {16, 10}, {15, 10}, {15, 10}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} +}; -- cgit v1.2.3