diff options
Diffstat (limited to 'src')
25 files changed, 5270 insertions, 1329 deletions
diff --git a/src/libmpeg2new/libmpeg2/Makefile.am b/src/libmpeg2new/libmpeg2/Makefile.am index b834c7df5..f99894f12 100644 --- a/src/libmpeg2new/libmpeg2/Makefile.am +++ b/src/libmpeg2new/libmpeg2/Makefile.am @@ -2,12 +2,12 @@ include $(top_srcdir)/misc/Makefile.common noinst_LTLIBRARIES = libmpeg2.la libmpeg2arch.la -libmpeg2_la_SOURCES = alloc.c header.c decode.c slice.c motion_comp.c idct.c \ - motion_comp_mlib.c idct_mlib.c +libmpeg2_la_SOURCES = alloc.c header.c decode.c slice.c motion_comp.c idct.c libmpeg2_la_LIBADD = libmpeg2arch.la libmpeg2arch_la_SOURCES = motion_comp_mmx.c idct_mmx.c \ motion_comp_altivec.c idct_altivec.c \ motion_comp_alpha.c idct_alpha.c \ + motion_comp_vis.c \ cpu_accel.c cpu_state.c diff --git a/src/libmpeg2new/libmpeg2/alloc.c b/src/libmpeg2new/libmpeg2/alloc.c index 67a5d5c6a..f1a7afa1c 100644 --- a/src/libmpeg2new/libmpeg2/alloc.c +++ b/src/libmpeg2new/libmpeg2/alloc.c @@ -21,56 +21,50 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include "config.h" - #include <stdlib.h> #include <inttypes.h> #include "../include/mpeg2.h" -#include "mpeg2_internal.h" - -#if defined(HAVE_MEMALIGN) && !defined(__cplusplus) -/* some systems have memalign() but no declaration for it */ -void * memalign (size_t align, size_t size); -#endif -void * (* mpeg2_malloc_hook) (int size, int reason) = NULL; -int (* mpeg2_free_hook) (void * buf) = NULL; +static void * (* malloc_hook) (unsigned size, mpeg2_alloc_t reason) = NULL; +static int (* free_hook) (void * buf) = NULL; -void * mpeg2_malloc (int size, int reason) +void * mpeg2_malloc (unsigned size, mpeg2_alloc_t reason) { char * buf; - if (mpeg2_malloc_hook) { - buf = (char *) mpeg2_malloc_hook (size, reason); + if (malloc_hook) { + buf = (char *) malloc_hook (size, reason); if (buf) return buf; } -#if defined(HAVE_MEMALIGN) && !defined(__cplusplus) && !defined(DEBUG) - return memalign (16, size); -#else - buf = (char *) malloc (size + 15 + sizeof (void **)); - if (buf) { - char * align_buf; + if (size) { + buf = (char *) malloc (size + 63 + sizeof (void **)); + if (buf) { + char * align_buf; - align_buf = buf + 15 + sizeof (void **); - align_buf -= (long)align_buf & 15; - *(((void **)align_buf) - 1) = buf; - return align_buf; + align_buf = buf + 63 + sizeof (void **); + align_buf -= (long)align_buf & 63; + *(((void **)align_buf) - 1) = buf; + return align_buf; + } } return NULL; -#endif } void mpeg2_free (void * buf) { - if (mpeg2_free_hook && mpeg2_free_hook (buf)) + if (free_hook && free_hook (buf)) return; -#if defined(HAVE_MEMALIGN) && !defined(__cplusplus) && !defined(DEBUG) - free (buf); -#else - free (*(((void **)buf) - 1)); -#endif + if (buf) + free (*(((void **)buf) - 1)); +} + +void mpeg2_malloc_hooks (void * malloc (unsigned, mpeg2_alloc_t), + int free (void *)) +{ + malloc_hook = malloc; + free_hook = free; } diff --git a/src/libmpeg2new/libmpeg2/configure.incl b/src/libmpeg2new/libmpeg2/configure.incl index aa9337774..f8dbd5aef 100644 --- a/src/libmpeg2new/libmpeg2/configure.incl +++ b/src/libmpeg2new/libmpeg2/configure.incl @@ -1,5 +1,4 @@ AC_SUBST([LIBMPEG2_CFLAGS]) -AC_SUBST([LIBMPEG2_LIBS]) dnl avoid -fPIC when possible AC_LIBTOOL_NON_PIC([LIBMPEG2_CFLAGS="$LIBMPEG2_CFLAGS -prefer-non-pic"]) @@ -10,16 +9,3 @@ AC_ARG_ENABLE([accel-detect], if test x"$enable_accel_detect" != x"no"; then AC_DEFINE([ACCEL_DETECT],,[autodetect accelerations]) fi - -dnl check for mlib -AC_ARG_ENABLE([mlib], - [ --disable-mlib make a version not using mediaLib]) -if test x"$enable_mlib" != x"no"; then - cflags_save="$CFLAGS" - CFLAGS="$OPT_CFLAGS -L/opt/SUNWmlib/lib -R/opt/SUNWmlib/lib $CFLAGS" - AC_CHECK_LIB([mlib],[mlib_VideoColorYUV2RGB420], - [AC_DEFINE([LIBMPEG2_MLIB],,[libmpeg2 mediaLib support]) - LIBMPEG2_CFLAGS="$LIBMPEG2_CFLAGS -I/opt/SUNWmlib/include" - LIBMPEG2_LIBS="$LIBMPEG2_LIBS -L/opt/SUNWmlib/lib -R/opt/SUNWmlib/lib -lmlib"]) - CFLAGS="$cflags_save" -fi diff --git a/src/libmpeg2new/libmpeg2/convert_internal.h b/src/libmpeg2new/libmpeg2/convert_internal.h new file mode 100644 index 000000000..d1e63d5e3 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/convert_internal.h @@ -0,0 +1,42 @@ +/* + * convert_internal.h + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +typedef struct { + uint8_t * rgb_ptr; + int width; + int field; + int y_stride, rgb_stride, y_increm, uv_increm, rgb_increm, rgb_slice; + int chroma420, convert420; + int dither_offset, dither_stride; + int y_stride_frame, uv_stride_frame, rgb_stride_frame, rgb_stride_min; +} convert_rgb_t; + +typedef void mpeg2convert_copy_t (void * id, uint8_t * const * src, + unsigned int v_offset); + +mpeg2convert_copy_t * mpeg2convert_rgb_mmxext (int bpp, int mode, + const mpeg2_sequence_t * seq); +mpeg2convert_copy_t * mpeg2convert_rgb_mmx (int bpp, int mode, + const mpeg2_sequence_t * seq); +mpeg2convert_copy_t * mpeg2convert_rgb_vis (int bpp, int mode, + const mpeg2_sequence_t * seq); diff --git a/src/libmpeg2new/libmpeg2/cpu_accel.c b/src/libmpeg2new/libmpeg2/cpu_accel.c index dac3cf83d..7846f1e88 100644 --- a/src/libmpeg2new/libmpeg2/cpu_accel.c +++ b/src/libmpeg2new/libmpeg2/cpu_accel.c @@ -1,6 +1,6 @@ /* * cpu_accel.c - * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2000-2004 Michel Lespinasse <walken@zoy.org> * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. @@ -26,16 +26,25 @@ #include <inttypes.h> #include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" -#define ACCEL_DETECT /* Force accel on */ - -#ifdef ACCEL_DETECT #ifdef ARCH_X86 -static inline uint32_t arch_accel (void) +static inline uint32_t arch_accel (uint32_t accel) { - uint32_t eax, ebx, ecx, edx; - int AMD; - uint32_t caps; + if (accel & (MPEG2_ACCEL_X86_3DNOW | MPEG2_ACCEL_X86_MMXEXT)) + accel |= MPEG2_ACCEL_X86_MMX; + + if (accel & (MPEG2_ACCEL_X86_SSE2 | MPEG2_ACCEL_X86_SSE3)) + accel |= MPEG2_ACCEL_X86_MMXEXT; + + if (accel & (MPEG2_ACCEL_X86_SSE3)) + accel |= MPEG2_ACCEL_X86_SSE2; + +#ifdef ACCEL_DETECT + if (accel & MPEG2_ACCEL_DETECT) { + uint32_t eax, ebx, ecx, edx; + int AMD; #if !defined(PIC) && !defined(__PIC__) #define cpuid(op,eax,ebx,ecx,edx) \ @@ -60,55 +69,63 @@ static inline uint32_t arch_accel (void) : "cc") #endif - __asm__ ("pushf\n\t" - "pushf\n\t" - "pop %0\n\t" - "movl %0,%1\n\t" - "xorl $0x200000,%0\n\t" - "push %0\n\t" - "popf\n\t" - "pushf\n\t" - "pop %0\n\t" - "popf" - : "=r" (eax), - "=r" (ebx) - : - : "cc"); + __asm__ ("pushf\n\t" + "pushf\n\t" + "pop %0\n\t" + "movl %0,%1\n\t" + "xorl $0x200000,%0\n\t" + "push %0\n\t" + "popf\n\t" + "pushf\n\t" + "pop %0\n\t" + "popf" + : "=r" (eax), + "=r" (ebx) + : + : "cc"); - if (eax == ebx) /* no cpuid */ - return 0; + if (eax == ebx) /* no cpuid */ + return accel; - cpuid (0x00000000, eax, ebx, ecx, edx); - if (!eax) /* vendor string only */ - return 0; + cpuid (0x00000000, eax, ebx, ecx, edx); + if (!eax) /* vendor string only */ + return accel; - AMD = (ebx == 0x68747541) && (ecx == 0x444d4163) && (edx == 0x69746e65); + AMD = (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65); - cpuid (0x00000001, eax, ebx, ecx, edx); - if (! (edx & 0x00800000)) /* no MMX */ - return 0; + cpuid (0x00000001, eax, ebx, ecx, edx); + if (! (edx & 0x00800000)) /* no MMX */ + return accel; - caps = MPEG2_ACCEL_X86_MMX; - if (edx & 0x02000000) /* SSE - identical to AMD MMX extensions */ - caps = MPEG2_ACCEL_X86_MMX | MPEG2_ACCEL_X86_MMXEXT; + accel |= MPEG2_ACCEL_X86_MMX; + if (edx & 0x02000000) /* SSE - identical to AMD MMX extensions */ + accel |= MPEG2_ACCEL_X86_MMXEXT; - cpuid (0x80000000, eax, ebx, ecx, edx); - if (eax < 0x80000001) /* no extended capabilities */ - return caps; + if (edx & 0x04000000) /* SSE2 */ + accel |= MPEG2_ACCEL_X86_SSE2; + + if (ecx & 0x00000001) /* SSE3 */ + accel |= MPEG2_ACCEL_X86_SSE3; + + cpuid (0x80000000, eax, ebx, ecx, edx); + if (eax < 0x80000001) /* no extended capabilities */ + return accel; - cpuid (0x80000001, eax, ebx, ecx, edx); + cpuid (0x80000001, eax, ebx, ecx, edx); - if (edx & 0x80000000) - caps |= MPEG2_ACCEL_X86_3DNOW; + if (edx & 0x80000000) + accel |= MPEG2_ACCEL_X86_3DNOW; - if (AMD && (edx & 0x00400000)) /* AMD MMX extensions */ - caps |= MPEG2_ACCEL_X86_MMXEXT; + if (AMD && (edx & 0x00400000)) /* AMD MMX extensions */ + accel |= MPEG2_ACCEL_X86_MMXEXT; + } +#endif /* ACCEL_DETECT */ - return caps; + return accel; } #endif /* ARCH_X86 */ -#ifdef ARCH_PPC +#if defined(ACCEL_DETECT) && (defined(ARCH_PPC) || defined(ARCH_SPARC)) #include <signal.h> #include <setjmp.h> @@ -125,60 +142,117 @@ static RETSIGTYPE sigill_handler (int sig) canjump = 0; siglongjmp (jmpbuf, 1); } +#endif /* ACCEL_DETECT && (ARCH_PPC || ARCH_SPARC) */ -static inline uint32_t arch_accel (void) +#ifdef ARCH_PPC +static inline uint32_t arch_accel (uint32_t accel) { - static RETSIGTYPE (* oldsig) (int); +#ifdef ACCEL_DETECT + if (accel & (MPEG2_ACCEL_PPC_ALTIVEC | MPEG2_ACCEL_DETECT) == + MPEG2_ACCEL_DETECT) { + static RETSIGTYPE (* oldsig) (int); - oldsig = signal (SIGILL, sigill_handler); - if (sigsetjmp (jmpbuf, 1)) { - signal (SIGILL, oldsig); - return 0; - } + oldsig = signal (SIGILL, sigill_handler); + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, oldsig); + return accel; + } - canjump = 1; + canjump = 1; #ifdef HAVE_ALTIVEC_H /* gnu */ #define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t" #else /* apple */ #define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t" #endif - asm volatile ("mtspr 256, %0\n\t" - VAND (0, 0, 0) - : - : "r" (-1)); + asm volatile ("mtspr 256, %0\n\t" + VAND (0, 0, 0) + : + : "r" (-1)); - signal (SIGILL, oldsig); - return MPEG2_ACCEL_PPC_ALTIVEC; + canjump = 0; + accel |= MPEG2_ACCEL_PPC_ALTIVEC; + + signal (SIGILL, oldsig); + } +#endif /* ACCEL_DETECT */ + + return accel; } #endif /* ARCH_PPC */ -#ifdef ARCH_ALPHA -static inline uint32_t arch_accel (void) +#ifdef ARCH_SPARC +static inline uint32_t arch_accel (uint32_t accel) { - uint64_t no_mvi; + if (accel & MPEG2_ACCEL_SPARC_VIS2) + accel |= MPEG2_ACCEL_SPARC_VIS; + +#ifdef ACCEL_DETECT + if (accel & (MPEG2_ACCEL_SPARC_VIS2 | MPEG2_ACCEL_DETECT) == + MPEG2_ACCEL_DETECT) { + static RETSIGTYPE (* oldsig) (int); + + oldsig = signal (SIGILL, sigill_handler); + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, oldsig); + return accel; + } + + canjump = 1; + + /* pdist %f0, %f0, %f0 */ + __asm__ __volatile__(".word\t0x81b007c0"); - asm volatile ("amask %1, %0" - : "=r" (no_mvi) - : "rI" (256)); /* AMASK_MVI */ - return no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA | - MPEG2_ACCEL_ALPHA_MVI); + canjump = 0; + accel |= MPEG2_ACCEL_SPARC_VIS; + + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, oldsig); + return accel; + } + + canjump = 1; + + /* edge8n %g0, %g0, %g0 */ + __asm__ __volatile__(".word\t0x81b00020"); + + canjump = 0; + accel |= MPEG2_ACCEL_SPARC_VIS2; + + signal (SIGILL, oldsig); + } +#endif /* ACCEL_DETECT */ + + return accel; } -#endif /* ARCH_ALPHA */ -#endif +#endif /* ARCH_SPARC */ -uint32_t mpeg2_detect_accel (void) +#ifdef ARCH_ALPHA +static inline uint32_t arch_accel (uint32_t accel) { - uint32_t accel; + if (accel & MPEG2_ACCEL_ALPHA_MVI) + accel |= MPEG2_ACCEL_ALPHA; - accel = 0; #ifdef ACCEL_DETECT -#ifdef LIBMPEG2_MLIB - accel = MPEG2_ACCEL_MLIB; -#endif -#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) - accel |= arch_accel (); -#endif + if (accel & MPEG2_ACCEL_DETECT) { + uint64_t no_mvi; + + asm volatile ("amask %1, %0" + : "=r" (no_mvi) + : "rI" (256)); /* AMASK_MVI */ + accel |= no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA | + MPEG2_ACCEL_ALPHA_MVI); + } +#endif /* ACCEL_DETECT */ + + return accel; +} +#endif /* ARCH_ALPHA */ + +uint32_t mpeg2_detect_accel (uint32_t accel) +{ +#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) + accel = arch_accel (accel); #endif return accel; } diff --git a/src/libmpeg2new/libmpeg2/cpu_state.c b/src/libmpeg2new/libmpeg2/cpu_state.c index 6761747fa..edbf2dd28 100644 --- a/src/libmpeg2new/libmpeg2/cpu_state.c +++ b/src/libmpeg2new/libmpeg2/cpu_state.c @@ -27,8 +27,8 @@ #include <inttypes.h> #include "../include/mpeg2.h" -#include "mpeg2_internal.h" #include "../include/attributes.h" +#include "mpeg2_internal.h" #ifdef ARCH_X86 #include "../include/mmx.h" #endif diff --git a/src/libmpeg2new/libmpeg2/decode.c b/src/libmpeg2new/libmpeg2/decode.c index 7d096c835..337ba4466 100644 --- a/src/libmpeg2new/libmpeg2/decode.c +++ b/src/libmpeg2new/libmpeg2/decode.c @@ -23,14 +23,13 @@ #include "config.h" -#include <stdio.h> /* For testing printf */ #include <string.h> /* memcmp/memset, try to remove */ #include <stdlib.h> #include <inttypes.h> #include "../include/mpeg2.h" +#include "../include/attributes.h" #include "mpeg2_internal.h" -#include "../include/convert.h" static int mpeg2_accels = 0; @@ -45,7 +44,6 @@ static inline int skip_chunk (mpeg2dec_t * mpeg2dec, int bytes) { uint8_t * current; uint32_t shift; - uint8_t * chunk_ptr; uint8_t * limit; uint8_t byte; @@ -54,7 +52,6 @@ static inline int skip_chunk (mpeg2dec_t * mpeg2dec, int bytes) current = mpeg2dec->buf_start; shift = mpeg2dec->shift; - chunk_ptr = mpeg2dec->chunk_ptr; limit = current + bytes; do { @@ -129,30 +126,25 @@ static inline mpeg2_state_t seek_chunk (mpeg2dec_t * mpeg2dec) size = mpeg2dec->buf_end - mpeg2dec->buf_start; skipped = skip_chunk (mpeg2dec, size); if (!skipped) { - mpeg2dec->bytes_since_pts += size; + mpeg2dec->bytes_since_tag += size; return STATE_BUFFER; } - mpeg2dec->bytes_since_pts += skipped; + mpeg2dec->bytes_since_tag += skipped; mpeg2dec->code = mpeg2dec->buf_start[-1]; - return (mpeg2_state_t)-1; + return STATE_INTERNAL_NORETURN; } -static mpeg2_state_t seek_header (mpeg2dec_t * mpeg2dec) +mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec) { - while (mpeg2dec->code != 0xb3 && - ((mpeg2dec->code != 0xb7 && mpeg2dec->code != 0xb8 && - mpeg2dec->code) || mpeg2dec->sequence.width == (unsigned)-1)) + while (!(mpeg2dec->code == 0xb3 || + ((mpeg2dec->code == 0xb7 || mpeg2dec->code == 0xb8 || + !mpeg2dec->code) && mpeg2dec->sequence.width != (unsigned)-1))) if (seek_chunk (mpeg2dec) == STATE_BUFFER) return STATE_BUFFER; mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; - return (mpeg2dec->code ? mpeg2_parse_header (mpeg2dec) : - mpeg2_header_picture_start (mpeg2dec)); -} - -mpeg2_state_t mpeg2_seek_sequence (mpeg2dec_t * mpeg2dec) -{ - mpeg2dec->sequence.width = (unsigned)-1; - return seek_header (mpeg2dec); + mpeg2dec->user_data_len = 0; + return ((mpeg2dec->code == 0xb7) ? + mpeg2_header_end (mpeg2dec) : mpeg2_parse_header (mpeg2dec)); } #define RECEIVED(code,state) (((state) << 8) + (code)) @@ -165,7 +157,7 @@ mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec) mpeg2_state_t state; state = mpeg2dec->action (mpeg2dec); - if ((int)state >= 0) + if ((int)state > (int)STATE_INTERNAL_NORETURN) return state; } @@ -178,7 +170,7 @@ mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec) if (size_buffer <= size_chunk) { copied = copy_chunk (mpeg2dec, size_buffer); if (!copied) { - mpeg2dec->bytes_since_pts += size_buffer; + mpeg2dec->bytes_since_tag += size_buffer; mpeg2dec->chunk_ptr += size_buffer; return STATE_BUFFER; } @@ -186,12 +178,12 @@ mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec) copied = copy_chunk (mpeg2dec, size_chunk); if (!copied) { /* filled the chunk buffer without finding a start code */ - mpeg2dec->bytes_since_pts += size_chunk; + mpeg2dec->bytes_since_tag += size_chunk; mpeg2dec->action = seek_chunk; return STATE_INVALID; } } - mpeg2dec->bytes_since_pts += copied; + mpeg2dec->bytes_since_tag += copied; mpeg2_slice (&(mpeg2dec->decoder), mpeg2dec->code, mpeg2dec->chunk_start); @@ -203,64 +195,19 @@ mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec) if (seek_chunk (mpeg2dec) == STATE_BUFFER) return STATE_BUFFER; } + + mpeg2dec->action = mpeg2_seek_header; switch (mpeg2dec->code) { case 0x00: - mpeg2dec->action = mpeg2_header_picture_start; - if (mpeg2dec->state == STATE_SLICE) { - mpeg2dec->info.current_picture = mpeg2dec->info.current_picture_2nd = NULL; - mpeg2dec->info.display_picture = mpeg2dec->info.display_picture_2nd = NULL; - mpeg2dec->info.current_fbuf = mpeg2dec->info.display_fbuf = mpeg2dec->info.discard_fbuf = NULL; - mpeg2dec->info.user_data = NULL; - mpeg2dec->info.user_data_len = 0; - - mpeg2dec->info.current_fbuf = mpeg2dec->fbuf[0]; - if (mpeg2dec->decoder.coding_type == B_TYPE) { - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0]; - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0]; - mpeg2dec->fbuf[0]=0; - } else { - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1]; - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[2]; - mpeg2dec->fbuf[2]=0; - } - } - return mpeg2dec->state; - case 0xb7: - mpeg2dec->action = mpeg2_header_end; - break; case 0xb3: + case 0xb7: case 0xb8: - mpeg2dec->action = mpeg2_parse_header; - break; - case 0xb2: - printf("libmpeg2:USER DATA for CC\n"); + return (mpeg2dec->state == STATE_SLICE) ? STATE_SLICE : STATE_INVALID; default: mpeg2dec->action = seek_chunk; return STATE_INVALID; } - if (mpeg2dec->state == STATE_SLICE) { - mpeg2dec->info.current_picture = mpeg2dec->info.current_picture_2nd = NULL; - mpeg2dec->info.display_picture = mpeg2dec->info.display_picture_2nd = NULL; - mpeg2dec->info.current_fbuf = mpeg2dec->info.display_fbuf = mpeg2dec->info.discard_fbuf = NULL; - mpeg2dec->info.user_data = NULL; - mpeg2dec->info.user_data_len = 0; - - mpeg2dec->info.current_fbuf = mpeg2dec->fbuf[0]; - if (mpeg2dec->decoder.coding_type == B_TYPE) { - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0]; - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0]; - mpeg2dec->fbuf[0]=0; - } else { - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1]; - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[2]; - mpeg2dec->fbuf[2]=0; - } - } - - - - return (mpeg2dec->state == STATE_SLICE) ? STATE_SLICE : STATE_INVALID; } mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec) @@ -272,6 +219,7 @@ mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec) int size_buffer, size_chunk, copied; mpeg2dec->action = mpeg2_parse_header; + mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; while (1) { size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start; size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE - @@ -279,7 +227,7 @@ mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec) if (size_buffer <= size_chunk) { copied = copy_chunk (mpeg2dec, size_buffer); if (!copied) { - mpeg2dec->bytes_since_pts += size_buffer; + mpeg2dec->bytes_since_tag += size_buffer; mpeg2dec->chunk_ptr += size_buffer; return STATE_BUFFER; } @@ -287,17 +235,17 @@ mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec) copied = copy_chunk (mpeg2dec, size_chunk); if (!copied) { /* filled the chunk buffer without finding a start code */ - mpeg2dec->bytes_since_pts += size_chunk; + mpeg2dec->bytes_since_tag += size_chunk; mpeg2dec->code = 0xb4; - mpeg2dec->action = seek_header; + mpeg2dec->action = mpeg2_seek_header; return STATE_INVALID; } } - mpeg2dec->bytes_since_pts += copied; + mpeg2dec->bytes_since_tag += copied; if (process_header[mpeg2dec->code & 0x0b] (mpeg2dec)) { mpeg2dec->code = mpeg2dec->buf_start[-1]; - mpeg2dec->action = seek_header; + mpeg2dec->action = mpeg2_seek_header; return STATE_INVALID; } @@ -306,18 +254,17 @@ mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec) /* state transition after a sequence header */ case RECEIVED (0x00, STATE_SEQUENCE): - mpeg2dec->action = mpeg2_header_picture_start; case RECEIVED (0xb8, STATE_SEQUENCE): mpeg2_header_sequence_finalize (mpeg2dec); break; /* other legal state transitions */ case RECEIVED (0x00, STATE_GOP): - mpeg2dec->action = mpeg2_header_picture_start; + mpeg2_header_gop_finalize (mpeg2dec); break; case RECEIVED (0x01, STATE_PICTURE): case RECEIVED (0x01, STATE_PICTURE_2ND): - mpeg2_header_matrix_finalize (mpeg2dec); + mpeg2_header_picture_finalize (mpeg2dec, mpeg2_accels); mpeg2dec->action = mpeg2_header_slice_start; break; @@ -333,48 +280,49 @@ mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec) continue; default: - mpeg2dec->action = seek_header; + mpeg2dec->action = mpeg2_seek_header; return STATE_INVALID; } mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + mpeg2dec->user_data_len = 0; return mpeg2dec->state; } } -void mpeg2_convert (mpeg2dec_t * mpeg2dec, - void (* convert) (int, int, uint32_t, void *, - struct convert_init_s *), void * arg) +int mpeg2_convert (mpeg2dec_t * mpeg2dec, mpeg2_convert_t convert, void * arg) +{ + mpeg2_convert_init_t convert_init; + int error; + + error = convert (MPEG2_CONVERT_SET, NULL, &(mpeg2dec->sequence), 0, + mpeg2_accels, arg, &convert_init); + if (!error) { + mpeg2dec->convert = convert; + mpeg2dec->convert_arg = arg; + mpeg2dec->convert_id_size = convert_init.id_size; + mpeg2dec->convert_stride = 0; + } + return error; +} + +int mpeg2_stride (mpeg2dec_t * mpeg2dec, int stride) { - convert_init_t convert_init; - int size; - - convert_init.id = NULL; - convert (mpeg2dec->decoder.width, mpeg2dec->decoder.height, - mpeg2_accels, arg, &convert_init); - if (convert_init.id_size) { - convert_init.id = mpeg2dec->convert_id = - mpeg2_malloc (convert_init.id_size, ALLOC_CONVERT_ID); - convert (mpeg2dec->decoder.width, mpeg2dec->decoder.height, - mpeg2_accels, arg, &convert_init); + if (!mpeg2dec->convert) { + if (stride < (int) mpeg2dec->sequence.width) + stride = mpeg2dec->sequence.width; + mpeg2dec->decoder.stride_frame = stride; + } else { + mpeg2_convert_init_t convert_init; + + stride = mpeg2dec->convert (MPEG2_CONVERT_STRIDE, NULL, + &(mpeg2dec->sequence), stride, + mpeg2_accels, mpeg2dec->convert_arg, + &convert_init); + mpeg2dec->convert_id_size = convert_init.id_size; + mpeg2dec->convert_stride = stride; } - mpeg2dec->convert_size[0] = size = convert_init.buf_size[0]; - mpeg2dec->convert_size[1] = size += convert_init.buf_size[1]; - mpeg2dec->convert_size[2] = size += convert_init.buf_size[2]; - mpeg2dec->convert_start = convert_init.start; - mpeg2dec->convert_copy = convert_init.copy; - - size = mpeg2dec->decoder.width * mpeg2dec->decoder.height >> 2; - mpeg2dec->yuv_buf[0][0] = (uint8_t *) mpeg2_malloc (6 * size, ALLOC_YUV); - mpeg2dec->yuv_buf[0][1] = mpeg2dec->yuv_buf[0][0] + 4 * size; - mpeg2dec->yuv_buf[0][2] = mpeg2dec->yuv_buf[0][0] + 5 * size; - mpeg2dec->yuv_buf[1][0] = (uint8_t *) mpeg2_malloc (6 * size, ALLOC_YUV); - mpeg2dec->yuv_buf[1][1] = mpeg2dec->yuv_buf[1][0] + 4 * size; - mpeg2dec->yuv_buf[1][2] = mpeg2dec->yuv_buf[1][0] + 5 * size; - size = mpeg2dec->decoder.width * 8; - mpeg2dec->yuv_buf[2][0] = (uint8_t *) mpeg2_malloc (6 * size, ALLOC_YUV); - mpeg2dec->yuv_buf[2][1] = mpeg2dec->yuv_buf[2][0] + 4 * size; - mpeg2dec->yuv_buf[2][2] = mpeg2dec->yuv_buf[2][0] + 5 * size; + return stride; } void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id) @@ -382,12 +330,13 @@ void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id) mpeg2_fbuf_t * fbuf; if (mpeg2dec->custom_fbuf) { - mpeg2_set_fbuf (mpeg2dec, mpeg2dec->decoder.coding_type); - fbuf = mpeg2dec->fbuf[0]; if (mpeg2dec->state == STATE_SEQUENCE) { mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1]; mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0]; } + mpeg2_set_fbuf (mpeg2dec, (mpeg2dec->decoder.coding_type == + PIC_FLAG_CODING_TYPE_B)); + fbuf = mpeg2dec->fbuf[0]; } else { fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index].fbuf); mpeg2dec->alloc_index_user = ++mpeg2dec->alloc_index; @@ -401,10 +350,6 @@ void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id) void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf) { mpeg2dec->custom_fbuf = custom_fbuf; - mpeg2dec->fbuf[0] = NULL; - mpeg2dec->fbuf[1] = NULL; - mpeg2dec->fbuf[2] = NULL; - } void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip) @@ -421,27 +366,48 @@ void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end) mpeg2dec->nb_decode_slices = end - start; } -void mpeg2_pts (mpeg2dec_t * mpeg2dec, uint32_t pts) +void mpeg2_tag_picture (mpeg2dec_t * mpeg2dec, uint32_t tag, uint32_t tag2) { - mpeg2dec->pts_previous = mpeg2dec->pts_current; - mpeg2dec->pts_current = pts; - mpeg2dec->num_pts++; - mpeg2dec->bytes_since_pts = 0; + mpeg2dec->tag_previous = mpeg2dec->tag_current; + mpeg2dec->tag2_previous = mpeg2dec->tag2_current; + mpeg2dec->tag_current = tag; + mpeg2dec->tag2_current = tag2; + mpeg2dec->num_tags++; + mpeg2dec->bytes_since_tag = 0; } uint32_t mpeg2_accel (uint32_t accel) { if (!mpeg2_accels) { - if (accel & MPEG2_ACCEL_DETECT) - accel |= mpeg2_detect_accel (); - mpeg2_accels = accel |= MPEG2_ACCEL_DETECT; - mpeg2_cpu_state_init (accel); - mpeg2_idct_init (accel); - mpeg2_mc_init (accel); + mpeg2_accels = mpeg2_detect_accel (accel) | MPEG2_ACCEL_DETECT; + mpeg2_cpu_state_init (mpeg2_accels); + mpeg2_idct_init (mpeg2_accels); + mpeg2_mc_init (mpeg2_accels); } return mpeg2_accels & ~MPEG2_ACCEL_DETECT; } +void mpeg2_reset (mpeg2dec_t * mpeg2dec, int full_reset) +{ + mpeg2dec->buf_start = mpeg2dec->buf_end = NULL; + mpeg2dec->num_tags = 0; + mpeg2dec->shift = 0xffffff00; + mpeg2dec->code = 0xb4; + mpeg2dec->action = mpeg2_seek_header; + mpeg2dec->state = STATE_INVALID; + mpeg2dec->first = 1; + + mpeg2_reset_info(&(mpeg2dec->info)); + mpeg2dec->info.gop = NULL; + mpeg2dec->info.user_data = NULL; + mpeg2dec->info.user_data_len = 0; + if (full_reset) { + mpeg2dec->info.sequence = NULL; + mpeg2_header_state_init (mpeg2dec); + } + +} + mpeg2dec_t * mpeg2_init (void) { mpeg2dec_t * mpeg2dec; @@ -449,42 +415,25 @@ mpeg2dec_t * mpeg2_init (void) mpeg2_accel (MPEG2_ACCEL_DETECT); mpeg2dec = (mpeg2dec_t *) mpeg2_malloc (sizeof (mpeg2dec_t), - ALLOC_MPEG2DEC); + MPEG2_ALLOC_MPEG2DEC); if (mpeg2dec == NULL) return NULL; - memset (mpeg2dec, 0, sizeof (mpeg2dec_t)); + memset (mpeg2dec->decoder.DCTblock, 0, 64 * sizeof (int16_t)); + memset (mpeg2dec->quantizer_matrix, 0, 4 * 64 * sizeof (uint8_t)); mpeg2dec->chunk_buffer = (uint8_t *) mpeg2_malloc (BUFFER_SIZE + 4, - ALLOC_CHUNK); + MPEG2_ALLOC_CHUNK); - mpeg2dec->shift = 0xffffff00; - mpeg2dec->action = mpeg2_seek_sequence; - mpeg2dec->code = 0xb4; - mpeg2dec->first_decode_slice = 1; - mpeg2dec->nb_decode_slices = 0xb0 - 1; - mpeg2dec->convert_id = NULL; + mpeg2dec->sequence.width = (unsigned)-1; + mpeg2_reset (mpeg2dec, 1); - /* initialize substructures */ - mpeg2_header_state_init (mpeg2dec); return mpeg2dec; } void mpeg2_close (mpeg2dec_t * mpeg2dec) { - int i; - - /* static uint8_t finalizer[] = {0,0,1,0xb4}; */ - /* mpeg2_decode_data (mpeg2dec, finalizer, finalizer+4); */ - + mpeg2_header_state_init (mpeg2dec); mpeg2_free (mpeg2dec->chunk_buffer); - if (!mpeg2dec->custom_fbuf) - for (i = mpeg2dec->alloc_index_user; i < mpeg2dec->alloc_index; i++) - mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[0]); - if (mpeg2dec->convert_start) - for (i = 0; i < 3; i++) - mpeg2_free (mpeg2dec->yuv_buf[i][0]); - if (mpeg2dec->convert_id) - mpeg2_free (mpeg2dec->convert_id); mpeg2_free (mpeg2dec); } diff --git a/src/libmpeg2new/libmpeg2/header.c b/src/libmpeg2new/libmpeg2/header.c index 894a86b30..935a50aa3 100644 --- a/src/libmpeg2new/libmpeg2/header.c +++ b/src/libmpeg2new/libmpeg2/header.c @@ -1,6 +1,7 @@ /* * header.c * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2003 Regis Duchesne <hpreg@zoy.org> * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. @@ -23,16 +24,13 @@ #include "config.h" -#include <stdio.h> /* For printf */ #include <inttypes.h> #include <stdlib.h> /* defines NULL */ #include <string.h> /* memcmp */ -#include <assert.h> #include "../include/mpeg2.h" -#include "mpeg2_internal.h" -#include "../include/convert.h" #include "../include/attributes.h" +#include "mpeg2_internal.h" #define SEQ_EXT 2 #define SEQ_DISPLAY_EXT 4 @@ -78,7 +76,29 @@ uint8_t mpeg2_scan_alt[64] ATTR_ALIGN(16) = { void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec) { - mpeg2dec->decoder.scan = mpeg2_scan_norm; + if (mpeg2dec->sequence.width != (unsigned)-1) { + int i; + + mpeg2dec->sequence.width = (unsigned)-1; + if (!mpeg2dec->custom_fbuf) + for (i = mpeg2dec->alloc_index_user; + i < mpeg2dec->alloc_index; i++) { + mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[0]); + mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[1]); + mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[2]); + } + if (mpeg2dec->convert_start) + for (i = 0; i < 3; i++) { + mpeg2_free (mpeg2dec->yuv_buf[i][0]); + mpeg2_free (mpeg2dec->yuv_buf[i][1]); + mpeg2_free (mpeg2dec->yuv_buf[i][2]); + } + if (mpeg2dec->decoder.convert_id) + mpeg2_free (mpeg2dec->decoder.convert_id); + } + mpeg2dec->decoder.coding_type = I_TYPE; + mpeg2dec->decoder.convert = NULL; + mpeg2dec->decoder.convert_id = NULL; mpeg2dec->picture = mpeg2dec->pictures; mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf; mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf; @@ -86,22 +106,39 @@ void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec) mpeg2dec->first = 1; mpeg2dec->alloc_index = 0; mpeg2dec->alloc_index_user = 0; + mpeg2dec->first_decode_slice = 1; + mpeg2dec->nb_decode_slices = 0xb0 - 1; + mpeg2dec->convert = NULL; + mpeg2dec->convert_start = NULL; + mpeg2dec->custom_fbuf = 0; + mpeg2dec->yuv_index = 0; } -static void reset_info (mpeg2_info_t * info) +void mpeg2_reset_info (mpeg2_info_t * info) { info->current_picture = info->current_picture_2nd = NULL; info->display_picture = info->display_picture_2nd = NULL; info->current_fbuf = info->display_fbuf = info->discard_fbuf = NULL; - info->user_data = NULL; info->user_data_len = 0; +} + +static void info_user_data (mpeg2dec_t * mpeg2dec) +{ + if (mpeg2dec->user_data_len) { + mpeg2dec->info.user_data = mpeg2dec->chunk_buffer; + mpeg2dec->info.user_data_len = mpeg2dec->user_data_len - 3; + } } int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec) { uint8_t * buffer = mpeg2dec->chunk_start; mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); - static unsigned int frame_period[9] = { - 0, 1126125, 1125000, 1080000, 900900, 900000, 540000, 450450, 450000 + static unsigned int frame_period[16] = { + 0, 1126125, 1125000, 1080000, 900900, 900000, 540000, 450450, 450000, + /* unofficial: xing 15 fps */ + 1800000, + /* unofficial: libmpeg3 "Unofficial economy rates" 5/10/12/15 fps */ + 5400000, 2700000, 2250000, 1800000, 0, 0 }; int i; @@ -122,9 +159,7 @@ int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec) SEQ_VIDEO_FORMAT_UNSPECIFIED); sequence->pixel_width = buffer[3] >> 4; /* aspect ratio */ - sequence->frame_period = 0; - if ((buffer[3] & 15) < 9) - sequence->frame_period = frame_period[buffer[3] & 15]; + sequence->frame_period = frame_period[buffer[3] & 15]; sequence->byte_rate = (buffer[4]<<10) | (buffer[5]<<2) | (buffer[6]>>6); @@ -136,21 +171,20 @@ int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec) mpeg2dec->copy_matrix = 3; if (buffer[7] & 2) { for (i = 0; i < 64; i++) - mpeg2dec->intra_quantizer_matrix[mpeg2_scan_norm[i]] = + mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] = (buffer[i+7] << 7) | (buffer[i+8] >> 1); buffer += 64; } else for (i = 0; i < 64; i++) - mpeg2dec->intra_quantizer_matrix[mpeg2_scan_norm[i]] = + mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] = default_intra_quantizer_matrix[i]; if (buffer[7] & 1) for (i = 0; i < 64; i++) - mpeg2dec->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] = + mpeg2dec->new_quantizer_matrix[1][mpeg2_scan_norm[i]] = buffer[i+8]; else - for (i = 0; i < 64; i++) - mpeg2dec->non_intra_quantizer_matrix[i] = 16; + memset (mpeg2dec->new_quantizer_matrix[1], 16, 64); sequence->profile_level_id = 0x80; sequence->colour_primaries = 0; @@ -161,8 +195,6 @@ int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec) mpeg2dec->state = STATE_SEQUENCE; mpeg2dec->display_offset_x = mpeg2dec->display_offset_y = 0; - reset_info (&(mpeg2dec->info)); - mpeg2dec->info.gop = NULL; return 0; } @@ -218,12 +250,11 @@ static int sequence_display_ext (mpeg2dec_t * mpeg2dec) { uint8_t * buffer = mpeg2dec->chunk_start; mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); - uint32_t flags; - flags = ((sequence->flags & ~SEQ_MASK_VIDEO_FORMAT) | - ((buffer[0]<<4) & SEQ_MASK_VIDEO_FORMAT)); + sequence->flags = ((sequence->flags & ~SEQ_MASK_VIDEO_FORMAT) | + ((buffer[0]<<4) & SEQ_MASK_VIDEO_FORMAT)); if (buffer[0] & 1) { - flags |= SEQ_FLAG_COLOUR_DESCRIPTION; + sequence->flags |= SEQ_FLAG_COLOUR_DESCRIPTION; sequence->colour_primaries = buffer[1]; sequence->transfer_characteristics = buffer[2]; sequence->matrix_coefficients = buffer[3]; @@ -240,6 +271,17 @@ static int sequence_display_ext (mpeg2dec_t * mpeg2dec) return 0; } +static inline void simplify (unsigned int * u, unsigned int * v) +{ + unsigned int a, b, tmp; + + a = *u; b = *v; + while (a) { /* find greatest common divisor */ + tmp = a; a = b % tmp; b = tmp; + } + *u /= b; *v /= b; +} + static inline void finalize_sequence (mpeg2_sequence_t * sequence) { int width; @@ -276,8 +318,10 @@ static inline void finalize_sequence (mpeg2_sequence_t * sequence) sequence->pixel_width = 64; sequence->pixel_height = 45; return; case 6: /* 720x480 16:9 */ sequence->pixel_width = 32; sequence->pixel_height = 27; return; - case 12: /* 720*480 4:3 */ - sequence->pixel_width = 8; sequence->pixel_height = 9; return; + case 8: /* BT.601 625 lines 4:3 */ + sequence->pixel_width = 59; sequence->pixel_height = 54; return; + case 12: /* BT.601 525 lines 4:3 */ + sequence->pixel_width = 10; sequence->pixel_height = 11; return; default: height = 88 * sequence->pixel_width + 1171; width = 2000; @@ -286,28 +330,120 @@ static inline void finalize_sequence (mpeg2_sequence_t * sequence) sequence->pixel_width = width; sequence->pixel_height = height; - while (width) { /* find greatest common divisor */ - int tmp = width; - width = height % tmp; - height = tmp; + simplify (&sequence->pixel_width, &sequence->pixel_height); +} + +int mpeg2_guess_aspect (const mpeg2_sequence_t * sequence, + unsigned int * pixel_width, + unsigned int * pixel_height) +{ + static struct { + unsigned int width, height; + } video_modes[] = { + {720, 576}, /* 625 lines, 13.5 MHz (D1, DV, DVB, DVD) */ + {704, 576}, /* 625 lines, 13.5 MHz (1/1 D1, DVB, DVD, 4CIF) */ + {544, 576}, /* 625 lines, 10.125 MHz (DVB, laserdisc) */ + {528, 576}, /* 625 lines, 10.125 MHz (3/4 D1, DVB, laserdisc) */ + {480, 576}, /* 625 lines, 9 MHz (2/3 D1, DVB, SVCD) */ + {352, 576}, /* 625 lines, 6.75 MHz (D2, 1/2 D1, CVD, DVB, DVD) */ + {352, 288}, /* 625 lines, 6.75 MHz, 1 field (D4, VCD, DVB, DVD, CIF) */ + {176, 144}, /* 625 lines, 3.375 MHz, half field (QCIF) */ + {720, 486}, /* 525 lines, 13.5 MHz (D1) */ + {704, 486}, /* 525 lines, 13.5 MHz */ + {720, 480}, /* 525 lines, 13.5 MHz (DV, DSS, DVD) */ + {704, 480}, /* 525 lines, 13.5 MHz (1/1 D1, ATSC, DVD) */ + {544, 480}, /* 525 lines. 10.125 MHz (DSS, laserdisc) */ + {528, 480}, /* 525 lines. 10.125 MHz (3/4 D1, laserdisc) */ + {480, 480}, /* 525 lines, 9 MHz (2/3 D1, SVCD) */ + {352, 480}, /* 525 lines, 6.75 MHz (D2, 1/2 D1, CVD, DVD) */ + {352, 240} /* 525 lines. 6.75 MHz, 1 field (D4, VCD, DSS, DVD) */ + }; + unsigned int width, height, pix_width, pix_height, i, DAR_16_9; + + *pixel_width = sequence->pixel_width; + *pixel_height = sequence->pixel_height; + width = sequence->picture_width; + height = sequence->picture_height; + for (i = 0; i < sizeof (video_modes) / sizeof (video_modes[0]); i++) + if (width == video_modes[i].width && height == video_modes[i].height) + break; + if (i == sizeof (video_modes) / sizeof (video_modes[0]) || + (sequence->pixel_width == 1 && sequence->pixel_height == 1) || + width != sequence->display_width || height != sequence->display_height) + return 0; + + for (pix_height = 1; height * pix_height < 480; pix_height <<= 1); + height *= pix_height; + for (pix_width = 1; width * pix_width <= 352; pix_width <<= 1); + width *= pix_width; + + if (! (sequence->flags & SEQ_FLAG_MPEG2)) { + static unsigned int mpeg1_check[2][2] = {{11, 54}, {27, 45}}; + DAR_16_9 = (sequence->pixel_height == 27 || + sequence->pixel_height == 45); + if (width < 704 || + sequence->pixel_height != mpeg1_check[DAR_16_9][height == 576]) + return 0; + } else { + DAR_16_9 = (3 * sequence->picture_width * sequence->pixel_width > + 4 * sequence->picture_height * sequence->pixel_height); + switch (width) { + case 528: case 544: pix_width *= 4; pix_height *= 3; break; + case 480: pix_width *= 3; pix_height *= 2; break; + } + } + if (DAR_16_9) { + pix_width *= 4; pix_height *= 3; } - sequence->pixel_width /= height; - sequence->pixel_height /= height; + if (height == 576) { + pix_width *= 59; pix_height *= 54; + } else { + pix_width *= 10; pix_height *= 11; + } + *pixel_width = pix_width; + *pixel_height = pix_height; + simplify (pixel_width, pixel_height); + return (height == 576) ? 1 : 2; } -void mpeg2_header_matrix_finalize (mpeg2dec_t * mpeg2dec) +static void copy_matrix (mpeg2dec_t * mpeg2dec, int index) +{ + if (memcmp (mpeg2dec->quantizer_matrix[index], + mpeg2dec->new_quantizer_matrix[index], 64)) { + memcpy (mpeg2dec->quantizer_matrix[index], + mpeg2dec->new_quantizer_matrix[index], 64); + mpeg2dec->scaled[index] = -1; + } +} + +static void finalize_matrix (mpeg2dec_t * mpeg2dec) { mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); int i; - if (mpeg2dec->copy_matrix & 1) - for (i = 0; i < 64; i++) - decoder->intra_quantizer_matrix[i] = - mpeg2dec->intra_quantizer_matrix[i]; - if (mpeg2dec->copy_matrix & 2) - for (i = 0; i < 64; i++) - decoder->non_intra_quantizer_matrix[i] = - mpeg2dec->non_intra_quantizer_matrix[i]; + for (i = 0; i < 2; i++) { + if (mpeg2dec->copy_matrix & (1 << i)) + copy_matrix (mpeg2dec, i); + if ((mpeg2dec->copy_matrix & (4 << i)) && + memcmp (mpeg2dec->quantizer_matrix[i], + mpeg2dec->new_quantizer_matrix[i+2], 64)) { + copy_matrix (mpeg2dec, i + 2); + decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i+2]; + } else if (mpeg2dec->copy_matrix & (5 << i)) + decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i]; + } +} + +static mpeg2_state_t invalid_end_action (mpeg2dec_t * mpeg2dec) +{ + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.gop = NULL; + info_user_data (mpeg2dec); + mpeg2_header_state_init (mpeg2dec); + mpeg2dec->sequence = mpeg2dec->new_sequence; + mpeg2dec->action = mpeg2_seek_header; + mpeg2dec->state = STATE_SEQUENCE; + return STATE_SEQUENCE; } void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec) @@ -316,37 +452,56 @@ void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec) mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); finalize_sequence (sequence); + finalize_matrix (mpeg2dec); - mpeg2_header_matrix_finalize (mpeg2dec); decoder->mpeg1 = !(sequence->flags & SEQ_FLAG_MPEG2); decoder->width = sequence->width; decoder->height = sequence->height; decoder->vertical_position_extension = (sequence->picture_height > 2800); - - /* - * according to 6.1.1.6, repeat sequence headers should be - * identical to the original. However some DVDs dont respect that - * and have different bitrates in the repeat sequence headers. So - * we'll ignore that in the comparison and still consider these as - * repeat sequence headers. - */ - mpeg2dec->sequence.byte_rate = sequence->byte_rate; - if (!memcmp (&(mpeg2dec->sequence), sequence, sizeof (mpeg2_sequence_t))) - mpeg2dec->state = STATE_SEQUENCE_REPEATED; + decoder->chroma_format = ((sequence->chroma_width == sequence->width) + + (sequence->chroma_height == sequence->height)); + + if (mpeg2dec->sequence.width != (unsigned)-1) { + /* + * According to 6.1.1.6, repeat sequence headers should be + * identical to the original. However some encoders dont + * respect that and change various fields (including bitrate + * and aspect ratio) in the repeat sequence headers. So we + * choose to be as conservative as possible and only restart + * the decoder if the width, height, chroma_width, + * chroma_height or low_delay flag are modified. + */ + if (sequence->width != mpeg2dec->sequence.width || + sequence->height != mpeg2dec->sequence.height || + sequence->chroma_width != mpeg2dec->sequence.chroma_width || + sequence->chroma_height != mpeg2dec->sequence.chroma_height || + ((sequence->flags ^ mpeg2dec->sequence.flags) & + SEQ_FLAG_LOW_DELAY)) { + decoder->stride_frame = sequence->width; + mpeg2_header_end (mpeg2dec); + mpeg2dec->action = invalid_end_action; + mpeg2dec->state = STATE_INVALID_END; + return; + } + mpeg2dec->state = (memcmp (&(mpeg2dec->sequence), sequence, + sizeof (mpeg2_sequence_t)) ? + STATE_SEQUENCE_MODIFIED : STATE_SEQUENCE_REPEATED); + } else + decoder->stride_frame = sequence->width; mpeg2dec->sequence = *sequence; - + mpeg2_reset_info (&(mpeg2dec->info)); mpeg2dec->info.sequence = &(mpeg2dec->sequence); + mpeg2dec->info.gop = NULL; + info_user_data (mpeg2dec); } int mpeg2_header_gop (mpeg2dec_t * mpeg2dec) { uint8_t * buffer = mpeg2dec->chunk_start; - mpeg2_gop_t * gop = &(mpeg2dec->gop); + mpeg2_gop_t * gop = &(mpeg2dec->new_gop); - reset_info (&(mpeg2dec->info)); if (! (buffer[1] & 8)) return 1; - mpeg2dec->info.gop = gop; gop->hours = (buffer[0] >> 2) & 31; gop->minutes = ((buffer[0] << 4) | (buffer[1] >> 4)) & 63; gop->seconds = ((buffer[1] << 3) | (buffer[2] >> 5)) & 63; @@ -356,7 +511,15 @@ int mpeg2_header_gop (mpeg2dec_t * mpeg2dec) return 0; } -void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int coding_type) +void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec) +{ + mpeg2dec->gop = mpeg2dec->new_gop; + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.gop = &(mpeg2dec->gop); + info_user_data (mpeg2dec); +} + +void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type) { int i; @@ -364,139 +527,30 @@ void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int coding_type) if (mpeg2dec->fbuf[1] != &mpeg2dec->fbuf_alloc[i].fbuf && mpeg2dec->fbuf[2] != &mpeg2dec->fbuf_alloc[i].fbuf) { mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[i].fbuf; - if (!mpeg2dec->custom_fbuf) { - mpeg2dec->info.current_fbuf = mpeg2dec->fbuf[0]; - if ((coding_type == B_TYPE) || - (mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0]; - if ((coding_type == B_TYPE) || (mpeg2dec->convert_start)) { - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0]; - mpeg2dec->fbuf[0]=0; - } - } - } + mpeg2dec->info.current_fbuf = mpeg2dec->fbuf[0]; + if (b_type || (mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { + if (b_type || mpeg2dec->convert) + mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0]; + mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0]; + } break; } } -mpeg2_state_t mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec) -{ - mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); - mpeg2_picture_t * picture; - - if (mpeg2dec->state != STATE_SLICE_1ST) { - mpeg2dec->state = STATE_PICTURE; - picture = mpeg2dec->pictures; - if ((decoder->coding_type != PIC_FLAG_CODING_TYPE_B) ^ - (mpeg2dec->picture >= mpeg2dec->pictures + 2)) - picture += 2; - } else { - mpeg2dec->state = STATE_PICTURE_2ND; - picture = mpeg2dec->picture + 1; /* second field picture */ - } - mpeg2dec->picture = picture; - picture->flags = 0; - if (mpeg2dec->num_pts) { - if (mpeg2dec->bytes_since_pts >= 4) { - mpeg2dec->num_pts = 0; - picture->pts = mpeg2dec->pts_current; - picture->flags = PIC_FLAG_PTS; - } else if (mpeg2dec->num_pts > 1) { - mpeg2dec->num_pts = 1; - picture->pts = mpeg2dec->pts_previous; - picture->flags = PIC_FLAG_PTS; - } - } - picture->display_offset[0].x = picture->display_offset[1].x = - picture->display_offset[2].x = mpeg2dec->display_offset_x; - picture->display_offset[0].y = picture->display_offset[1].y = - picture->display_offset[2].y = mpeg2dec->display_offset_y; - return mpeg2_parse_header (mpeg2dec); -} - int mpeg2_header_picture (mpeg2dec_t * mpeg2dec) { uint8_t * buffer = mpeg2dec->chunk_start; - mpeg2_picture_t * picture = mpeg2dec->picture; + mpeg2_picture_t * picture = &(mpeg2dec->new_picture); mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); int type; - int low_delay; - - type = (buffer [1] >> 3) & 7; - low_delay = mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY; - - if (mpeg2dec->state == STATE_PICTURE) { - mpeg2_picture_t * other; - - decoder->second_field = 0; - other = mpeg2dec->pictures; - if (other == picture) - other += 2; - if (decoder->coding_type != PIC_FLAG_CODING_TYPE_B) { - mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1]; - mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0]; - } - mpeg2dec->fbuf[0] = NULL; - reset_info (&(mpeg2dec->info)); - mpeg2dec->info.current_picture = picture; - mpeg2dec->info.display_picture = picture; - if (type != PIC_FLAG_CODING_TYPE_B) { - if (!low_delay) { - if (mpeg2dec->first) { - mpeg2dec->info.display_picture = NULL; - mpeg2dec->first = 0; - } else { - mpeg2dec->info.display_picture = other; - if (other->nb_fields == 1) - mpeg2dec->info.display_picture_2nd = other + 1; - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1]; - } - } - if (!low_delay + !mpeg2dec->convert_start) { - mpeg2dec->info.discard_fbuf = - mpeg2dec->fbuf[!low_delay + !mpeg2dec->convert_start]; - // FIXME: Might want to wipe this whole section, once pictures is sorted. - // mpeg2dec->fbuf[!low_delay + !mpeg2dec->convert_start]=0; - } - } - if (!mpeg2dec->custom_fbuf) { - while (mpeg2dec->alloc_index < 3) { - mpeg2_fbuf_t * fbuf; - fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf); - fbuf->id = NULL; - if (mpeg2dec->convert_start) { - fbuf->buf[0] = - (uint8_t *) mpeg2_malloc (mpeg2dec->convert_size[0], - ALLOC_CONVERTED); - fbuf->buf[1] = fbuf->buf[0] + mpeg2dec->convert_size[1]; - fbuf->buf[2] = fbuf->buf[0] + mpeg2dec->convert_size[2]; - } else { - int size; - size = mpeg2dec->decoder.width * mpeg2dec->decoder.height; - fbuf->buf[0] = (uint8_t *) mpeg2_malloc (6 * size >> 2, - ALLOC_YUV); - fbuf->buf[1] = fbuf->buf[0] + size; - fbuf->buf[2] = fbuf->buf[1] + (size >> 2); - } - } - abort(); - mpeg2_set_fbuf (mpeg2dec, type); - } - } else { - decoder->second_field = 1; - mpeg2dec->info.current_picture_2nd = picture; - mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; - if (low_delay || type == PIC_FLAG_CODING_TYPE_B) - mpeg2dec->info.display_picture_2nd = picture; - } + mpeg2dec->state = ((mpeg2dec->state != STATE_SLICE_1ST) ? + STATE_PICTURE : STATE_PICTURE_2ND); mpeg2dec->ext_state = PIC_CODING_EXT; picture->temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6); - decoder->coding_type = type; - picture->flags |= type; - + type = (buffer [1] >> 3) & 7; if (type == PIC_FLAG_CODING_TYPE_P || type == PIC_FLAG_CODING_TYPE_B) { /* forward_f_code and backward_f_code - used in mpeg1 only */ decoder->f_motion.f_code[1] = (buffer[3] >> 2) & 1; @@ -506,13 +560,32 @@ int mpeg2_header_picture (mpeg2dec_t * mpeg2dec) decoder->b_motion.f_code[0] = ((buffer[4] >> 3) & 7) - 1; } - /* XXXXXX decode extra_information_picture as well */ - + picture->flags = PIC_FLAG_PROGRESSIVE_FRAME | type; + picture->tag = picture->tag2 = 0; + if (mpeg2dec->num_tags) { + if (mpeg2dec->bytes_since_tag >= mpeg2dec->chunk_ptr - buffer + 4) { + mpeg2dec->num_tags = 0; + picture->tag = mpeg2dec->tag_current; + picture->tag2 = mpeg2dec->tag2_current; + picture->flags |= PIC_FLAG_TAGS; + } else if (mpeg2dec->num_tags > 1) { + mpeg2dec->num_tags = 1; + picture->tag = mpeg2dec->tag_previous; + picture->tag2 = mpeg2dec->tag2_previous; + picture->flags |= PIC_FLAG_TAGS; + } + } picture->nb_fields = 2; + picture->display_offset[0].x = picture->display_offset[1].x = + picture->display_offset[2].x = mpeg2dec->display_offset_x; + picture->display_offset[0].y = picture->display_offset[1].y = + picture->display_offset[2].y = mpeg2dec->display_offset_y; - decoder->intra_dc_precision = 0; + /* XXXXXX decode extra_information_picture as well */ + + mpeg2dec->q_scale_type = 0; + decoder->intra_dc_precision = 7; decoder->frame_pred_frame_dct = 1; - decoder->q_scale_type = 0; decoder->concealment_motion_vectors = 0; decoder->scan = mpeg2_scan_norm; decoder->picture_structure = FRAME_PICTURE; @@ -524,7 +597,7 @@ int mpeg2_header_picture (mpeg2dec_t * mpeg2dec) static int picture_coding_ext (mpeg2dec_t * mpeg2dec) { uint8_t * buffer = mpeg2dec->chunk_start; - mpeg2_picture_t * picture = mpeg2dec->picture; + mpeg2_picture_t * picture = &(mpeg2dec->new_picture); mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); uint32_t flags; @@ -535,7 +608,7 @@ static int picture_coding_ext (mpeg2dec_t * mpeg2dec) decoder->b_motion.f_code[1] = (buffer[2] >> 4) - 1; flags = picture->flags; - decoder->intra_dc_precision = (buffer[2] >> 2) & 3; + decoder->intra_dc_precision = 7 - ((buffer[2] >> 2) & 3); decoder->picture_structure = buffer[2] & 3; switch (decoder->picture_structure) { case TOP_FIELD: @@ -544,7 +617,6 @@ static int picture_coding_ext (mpeg2dec_t * mpeg2dec) picture->nb_fields = 1; break; case FRAME_PICTURE: - /* buffer[3] & 2 is repeat first field */ if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) { picture->nb_fields = (buffer[3] & 2) ? 3 : 2; flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0; @@ -557,10 +629,11 @@ static int picture_coding_ext (mpeg2dec_t * mpeg2dec) decoder->top_field_first = buffer[3] >> 7; decoder->frame_pred_frame_dct = (buffer[3] >> 6) & 1; decoder->concealment_motion_vectors = (buffer[3] >> 5) & 1; - decoder->q_scale_type = (buffer[3] >> 4) & 1; + mpeg2dec->q_scale_type = buffer[3] & 16; decoder->intra_vlc_format = (buffer[3] >> 3) & 1; decoder->scan = (buffer[3] & 4) ? mpeg2_scan_alt : mpeg2_scan_norm; - flags |= (buffer[4] & 0x80) ? PIC_FLAG_PROGRESSIVE_FRAME : 0; + if (!(buffer[4] & 0x80)) + flags &= ~PIC_FLAG_PROGRESSIVE_FRAME; if (buffer[4] & 0x40) flags |= (((buffer[4]<<26) | (buffer[5]<<18) | (buffer[6]<<10)) & PIC_MASK_COMPOSITE_DISPLAY) | PIC_FLAG_COMPOSITE_DISPLAY; @@ -574,7 +647,7 @@ static int picture_coding_ext (mpeg2dec_t * mpeg2dec) static int picture_display_ext (mpeg2dec_t * mpeg2dec) { uint8_t * buffer = mpeg2dec->chunk_start; - mpeg2_picture_t * picture = mpeg2dec->picture; + mpeg2_picture_t * picture = &(mpeg2dec->new_picture); int i, nb_pos; nb_pos = picture->nb_fields; @@ -600,6 +673,140 @@ static int picture_display_ext (mpeg2dec_t * mpeg2dec) return 0; } +void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels) +{ + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + int old_type_b = (decoder->coding_type == B_TYPE); + int low_delay = mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY; + + finalize_matrix (mpeg2dec); + decoder->coding_type = mpeg2dec->new_picture.flags & PIC_MASK_CODING_TYPE; + + if (mpeg2dec->state == STATE_PICTURE) { + mpeg2_picture_t * picture; + mpeg2_picture_t * other; + + decoder->second_field = 0; + + picture = other = mpeg2dec->pictures; + if (old_type_b ^ (mpeg2dec->picture < mpeg2dec->pictures + 2)) + picture += 2; + else + other += 2; + mpeg2dec->picture = picture; + *picture = mpeg2dec->new_picture; + + if (!old_type_b) { + mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1]; + mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0]; + } + mpeg2dec->fbuf[0] = NULL; + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.current_picture = picture; + mpeg2dec->info.display_picture = picture; + if (decoder->coding_type != B_TYPE) { + if (!low_delay) { + if (mpeg2dec->first) { + mpeg2dec->info.display_picture = NULL; + mpeg2dec->first = 0; + } else { + mpeg2dec->info.display_picture = other; + if (other->nb_fields == 1) + mpeg2dec->info.display_picture_2nd = other + 1; + mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1]; + } + } + if (!low_delay + !mpeg2dec->convert) + mpeg2dec->info.discard_fbuf = + mpeg2dec->fbuf[!low_delay + !mpeg2dec->convert]; + } + if (mpeg2dec->convert) { + mpeg2_convert_init_t convert_init; + if (!mpeg2dec->convert_start) { + int y_size, uv_size; + + mpeg2dec->decoder.convert_id = + mpeg2_malloc (mpeg2dec->convert_id_size, + MPEG2_ALLOC_CONVERT_ID); + mpeg2dec->convert (MPEG2_CONVERT_START, + mpeg2dec->decoder.convert_id, + &(mpeg2dec->sequence), + mpeg2dec->convert_stride, accels, + mpeg2dec->convert_arg, &convert_init); + mpeg2dec->convert_start = convert_init.start; + mpeg2dec->decoder.convert = convert_init.copy; + + y_size = decoder->stride_frame * mpeg2dec->sequence.height; + uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format); + mpeg2dec->yuv_buf[0][0] = + (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[0][1] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[0][2] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[1][0] = + (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[1][1] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[1][2] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + y_size = decoder->stride_frame * 32; + uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format); + mpeg2dec->yuv_buf[2][0] = + (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[2][1] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[2][2] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + } + if (!mpeg2dec->custom_fbuf) { + while (mpeg2dec->alloc_index < 3) { + mpeg2_fbuf_t * fbuf; + + fbuf = &mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf; + fbuf->id = NULL; + fbuf->buf[0] = + (uint8_t *) mpeg2_malloc (convert_init.buf_size[0], + MPEG2_ALLOC_CONVERTED); + fbuf->buf[1] = + (uint8_t *) mpeg2_malloc (convert_init.buf_size[1], + MPEG2_ALLOC_CONVERTED); + fbuf->buf[2] = + (uint8_t *) mpeg2_malloc (convert_init.buf_size[2], + MPEG2_ALLOC_CONVERTED); + } + mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE)); + } + } else if (!mpeg2dec->custom_fbuf) { + while (mpeg2dec->alloc_index < 3) { + mpeg2_fbuf_t * fbuf; + int y_size, uv_size; + + fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf); + fbuf->id = NULL; + y_size = decoder->stride_frame * mpeg2dec->sequence.height; + uv_size = y_size >> (2 - decoder->chroma_format); + fbuf->buf[0] = (uint8_t *) mpeg2_malloc (y_size, + MPEG2_ALLOC_YUV); + fbuf->buf[1] = (uint8_t *) mpeg2_malloc (uv_size, + MPEG2_ALLOC_YUV); + fbuf->buf[2] = (uint8_t *) mpeg2_malloc (uv_size, + MPEG2_ALLOC_YUV); + } + mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE)); + } + } else { + decoder->second_field = 1; + mpeg2dec->picture++; /* second field picture */ + *(mpeg2dec->picture) = mpeg2dec->new_picture; + mpeg2dec->info.current_picture_2nd = mpeg2dec->picture; + if (low_delay || decoder->coding_type == B_TYPE) + mpeg2dec->info.display_picture_2nd = mpeg2dec->picture; + } + + info_user_data (mpeg2dec); +} + static int copyright_ext (mpeg2dec_t * mpeg2dec) { return 0; @@ -608,22 +815,16 @@ static int copyright_ext (mpeg2dec_t * mpeg2dec) static int quant_matrix_ext (mpeg2dec_t * mpeg2dec) { uint8_t * buffer = mpeg2dec->chunk_start; - int i; - - if (buffer[0] & 8) { - for (i = 0; i < 64; i++) - mpeg2dec->intra_quantizer_matrix[mpeg2_scan_norm[i]] = - (buffer[i] << 5) | (buffer[i+1] >> 3); - mpeg2dec->copy_matrix |= 1; - buffer += 64; - } - - if (buffer[0] & 4) { - for (i = 0; i < 64; i++) - mpeg2dec->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] = - (buffer[i] << 6) | (buffer[i+1] >> 2); - mpeg2dec->copy_matrix |= 2; - } + int i, j; + + for (i = 0; i < 4; i++) + if (buffer[0] & (8 >> i)) { + for (j = 0; j < 64; j++) + mpeg2dec->new_quantizer_matrix[i][mpeg2_scan_norm[j]] = + (buffer[j] << (i+5)) | (buffer[j+1] >> (3-i)); + mpeg2dec->copy_matrix |= 1 << i; + buffer += 64; + } return 0; } @@ -647,42 +848,59 @@ int mpeg2_header_extension (mpeg2dec_t * mpeg2dec) int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec) { - if (!mpeg2dec->info.user_data_len) - mpeg2dec->info.user_data = mpeg2dec->chunk_start; - else - mpeg2dec->info.user_data_len += 3; - mpeg2dec->info.user_data_len += (mpeg2dec->chunk_ptr - 4 - - mpeg2dec->chunk_start); + mpeg2dec->user_data_len += mpeg2dec->chunk_ptr - 1 - mpeg2dec->chunk_start; mpeg2dec->chunk_start = mpeg2dec->chunk_ptr - 1; return 0; } +static void prescale (mpeg2dec_t * mpeg2dec, int index) +{ + static int non_linear_scale [] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 10, 12, 14, 16, 18, 20, 22, + 24, 28, 32, 36, 40, 44, 48, 52, + 56, 64, 72, 80, 88, 96, 104, 112 + }; + int i, j, k; + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + + if (mpeg2dec->scaled[index] != mpeg2dec->q_scale_type) { + mpeg2dec->scaled[index] = mpeg2dec->q_scale_type; + for (i = 0; i < 32; i++) { + k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1); + for (j = 0; j < 64; j++) + decoder->quantizer_prescale[index][i][j] = + k * mpeg2dec->quantizer_matrix[index][j]; + } + } +} + mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec) { - mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + + mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; mpeg2dec->state = ((mpeg2dec->picture->nb_fields > 1 || mpeg2dec->state == STATE_PICTURE_2ND) ? STATE_SLICE : STATE_SLICE_1ST); + if (mpeg2dec->decoder.coding_type != D_TYPE) { + prescale (mpeg2dec, 0); + if (decoder->chroma_quantizer[0] == decoder->quantizer_prescale[2]) + prescale (mpeg2dec, 2); + if (mpeg2dec->decoder.coding_type != I_TYPE) { + prescale (mpeg2dec, 1); + if (decoder->chroma_quantizer[1] == decoder->quantizer_prescale[3]) + prescale (mpeg2dec, 3); + } + } + if (!(mpeg2dec->nb_decode_slices)) mpeg2dec->picture->flags |= PIC_FLAG_SKIP; else if (mpeg2dec->convert_start) { - int flags; - - switch (mpeg2dec->decoder.picture_structure) { - case TOP_FIELD: flags = CONVERT_TOP_FIELD; break; - case BOTTOM_FIELD: flags = CONVERT_BOTTOM_FIELD; break; - default: - flags = - ((mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE) ? - CONVERT_FRAME : CONVERT_BOTH_FIELDS); - } - mpeg2dec->convert_start (mpeg2dec->convert_id, - mpeg2dec->fbuf[0]->buf, flags); - - mpeg2dec->decoder.convert = mpeg2dec->convert_copy; - mpeg2dec->decoder.fbuf_id = mpeg2dec->convert_id; + mpeg2dec->convert_start (decoder->convert_id, mpeg2dec->fbuf[0], + mpeg2dec->picture, mpeg2dec->info.gop); if (mpeg2dec->decoder.coding_type == B_TYPE) mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->yuv_buf[2], @@ -699,69 +917,23 @@ mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec) } else { int b_type; - mpeg2dec->decoder.convert = NULL; b_type = (mpeg2dec->decoder.coding_type == B_TYPE); mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->fbuf[0]->buf, mpeg2dec->fbuf[b_type + 1]->buf, mpeg2dec->fbuf[b_type]->buf); } mpeg2dec->action = NULL; - return (mpeg2_state_t)-1; -} - -mpeg2_state_t mpeg2_header_end_btype2 (mpeg2dec_t * mpeg2dec) -{ - mpeg2dec->info.display_fbuf = 0; - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[1]; - mpeg2dec->fbuf[1]=0; - mpeg2dec->action = mpeg2_seek_sequence; - mpeg2dec->first = 1; - return STATE_END; -} -mpeg2_state_t mpeg2_header_end_btype (mpeg2dec_t * mpeg2dec) -{ - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1]; - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[2]; - mpeg2dec->fbuf[2]=0; - mpeg2dec->action = mpeg2_header_end_btype2; - return STATE_SLICE; + return STATE_INTERNAL_NORETURN; } -mpeg2_state_t mpeg2_reset (mpeg2dec_t * mpeg2dec) +static mpeg2_state_t seek_sequence (mpeg2dec_t * mpeg2dec) { - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0]; - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0]; - mpeg2dec->fbuf[0]=0; - mpeg2dec->action = mpeg2_header_end_btype; - mpeg2dec->shift = 0xffffff00; - mpeg2dec->code = 0xb4; - mpeg2dec->first_decode_slice = 1; - mpeg2dec->nb_decode_slices = 0xb0 - 1; - mpeg2dec->decoder.scan = mpeg2_scan_norm; - mpeg2dec->picture = mpeg2dec->pictures; - mpeg2dec->first = 1; - mpeg2dec->alloc_index = 0; - mpeg2dec->alloc_index_user = 0; - - return STATE_SLICE; -} - -mpeg2_state_t mpeg2_header_end_itype2 (mpeg2dec_t * mpeg2dec) -{ - mpeg2dec->info.display_fbuf = 0; - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0]; - mpeg2dec->fbuf[0]=0; - mpeg2dec->action = mpeg2_seek_sequence; - mpeg2dec->first = 1; - return STATE_END; -} -mpeg2_state_t mpeg2_header_end_itype (mpeg2dec_t * mpeg2dec) -{ - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0]; - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[1]; - mpeg2dec->fbuf[1]=0; - mpeg2dec->action = mpeg2_header_end_itype2; - return STATE_SLICE; + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.sequence = NULL; + mpeg2dec->info.gop = NULL; + mpeg2_header_state_init (mpeg2dec); + mpeg2dec->action = mpeg2_seek_header; + return mpeg2_seek_header (mpeg2dec); } mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec) @@ -774,54 +946,16 @@ mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec) if ((mpeg2dec->picture >= picture + 2) ^ b_type) picture = mpeg2dec->pictures + 2; - mpeg2dec->state = STATE_END; - reset_info (&(mpeg2dec->info)); - if (b_type) { - mpeg2dec->info.display_picture = picture; - if (picture->nb_fields == 1) - mpeg2dec->info.display_picture_2nd = picture + 1; - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0]; - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0]; - mpeg2dec->fbuf[0]=0; - mpeg2dec->action = mpeg2_header_end_btype; - return STATE_SLICE; - } else { - mpeg2dec->info.display_picture = picture; - if (picture->nb_fields == 1) - mpeg2dec->info.display_picture_2nd = picture + 1; - if (mpeg2dec->fbuf[2]) { - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1]; - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[2]; - mpeg2dec->fbuf[2]=0; - mpeg2dec->action = mpeg2_header_end_itype; - return STATE_SLICE; - } else { - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0]; - mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[1]; - mpeg2dec->fbuf[1]=0; - mpeg2dec->action = mpeg2_header_end_itype2; - return STATE_SLICE; - } - - } - - -#if 0 + mpeg2_reset_info (&(mpeg2dec->info)); if (!(mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { mpeg2dec->info.display_picture = picture; if (picture->nb_fields == 1) mpeg2dec->info.display_picture_2nd = picture + 1; mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[b_type]; - if (!mpeg2dec->convert_start) { + if (!mpeg2dec->convert) mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type + 1]; - } - } else if (!mpeg2dec->convert_start) { + } else if (!mpeg2dec->convert) mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type]; - } - mpeg2dec->action = mpeg2_seek_sequence; - mpeg2dec->first = 1; + mpeg2dec->action = seek_sequence; return STATE_END; -#endif } - - diff --git a/src/libmpeg2new/libmpeg2/idct.c b/src/libmpeg2new/libmpeg2/idct.c index 57aba175b..8b982bb33 100644 --- a/src/libmpeg2new/libmpeg2/idct.c +++ b/src/libmpeg2new/libmpeg2/idct.c @@ -27,8 +27,8 @@ #include <inttypes.h> #include "../include/mpeg2.h" -#include "mpeg2_internal.h" #include "../include/attributes.h" +#include "mpeg2_internal.h" #define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ #define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ @@ -75,7 +75,7 @@ static void inline idct_row (int16_t * const block) /* shortcut */ if (likely (!(block[1] | ((int32_t *)block)[1] | ((int32_t *)block)[2] | ((int32_t *)block)[3]))) { - uint32_t tmp = (uint16_t) (block[0] << 3); + uint32_t tmp = (uint16_t) (block[0] >> 1); tmp |= tmp << 16; ((int32_t *)block)[0] = tmp; ((int32_t *)block)[1] = tmp; @@ -84,7 +84,7 @@ static void inline idct_row (int16_t * const block) return; } - d0 = (block[0] << 11) + 128; + d0 = (block[0] << 11) + 2048; d1 = block[1]; d2 = block[2] << 11; d3 = block[3]; @@ -106,17 +106,17 @@ static void inline idct_row (int16_t * const block) b3 = t1 + t3; t0 -= t2; t1 -= t3; - b1 = ((t0 + t1) * 181) >> 8; - b2 = ((t0 - t1) * 181) >> 8; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; - block[0] = (a0 + b0) >> 8; - block[1] = (a1 + b1) >> 8; - block[2] = (a2 + b2) >> 8; - block[3] = (a3 + b3) >> 8; - block[4] = (a3 - b3) >> 8; - block[5] = (a2 - b2) >> 8; - block[6] = (a1 - b1) >> 8; - block[7] = (a0 - b0) >> 8; + block[0] = (a0 + b0) >> 12; + block[1] = (a1 + b1) >> 12; + block[2] = (a2 + b2) >> 12; + block[3] = (a3 + b3) >> 12; + block[4] = (a3 - b3) >> 12; + block[5] = (a2 - b2) >> 12; + block[6] = (a1 - b1) >> 12; + block[7] = (a0 - b0) >> 12; } static void inline idct_col (int16_t * const block) @@ -145,10 +145,10 @@ static void inline idct_col (int16_t * const block) BUTTERFLY (t2, t3, W3, W5, d1, d2); b0 = t0 + t2; b3 = t1 + t3; - t0 = (t0 - t2) >> 8; - t1 = (t1 - t3) >> 8; - b1 = (t0 + t1) * 181; - b2 = (t0 - t1) * 181; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; block[8*0] = (a0 + b0) >> 17; block[8*1] = (a1 + b1) >> 17; @@ -179,8 +179,8 @@ static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, dest[6] = CLIP (block[6]); dest[7] = CLIP (block[7]); - block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; - block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; + ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0; + ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0; dest += stride; block += 8; @@ -192,7 +192,7 @@ static void mpeg2_idct_add_c (const int last, int16_t * block, { int i; - if (last != 129 || (block[0] & 7) == 4) { + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { for (i = 0; i < 8; i++) idct_row (block + 8 * i); for (i = 0; i < 8; i++) @@ -207,8 +207,8 @@ static void mpeg2_idct_add_c (const int last, int16_t * block, dest[6] = CLIP (block[6] + dest[6]); dest[7] = CLIP (block[7] + dest[7]); - block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; - block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; + ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0; + ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0; dest += stride; block += 8; @@ -216,7 +216,7 @@ static void mpeg2_idct_add_c (const int last, int16_t * block, } else { int DC; - DC = (block[0] + 4) >> 3; + DC = (block[0] + 64) >> 7; block[0] = block[63] = 0; i = 8; do { @@ -268,13 +268,6 @@ void mpeg2_idct_init (uint32_t accel) CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i); } else #endif -#ifdef LIBMPEG2_MLIB - if (accel & MPEG2_ACCEL_MLIB) { - mpeg2_idct_copy = mpeg2_idct_copy_mlib_non_ieee; - mpeg2_idct_add = (getenv ("MLIB_NON_IEEE") ? - mpeg2_idct_add_mlib_non_ieee : mpeg2_idct_add_mlib); - } else -#endif { extern uint8_t mpeg2_scan_norm[64]; extern uint8_t mpeg2_scan_alt[64]; diff --git a/src/libmpeg2new/libmpeg2/idct_alpha.c b/src/libmpeg2new/libmpeg2/idct_alpha.c index 68c605508..8f9beaf22 100644 --- a/src/libmpeg2new/libmpeg2/idct_alpha.c +++ b/src/libmpeg2new/libmpeg2/idct_alpha.c @@ -29,8 +29,10 @@ #include <stdlib.h> #include <inttypes.h> -#include "alpha_asm.h" +#include "mpeg2.h" #include "attributes.h" +#include "mpeg2_internal.h" +#include "alpha_asm.h" #define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ #define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ @@ -69,7 +71,7 @@ static void inline idct_row (int16_t * const block) /* shortcut */ if (likely (!((l & ~0xffffUL) | r))) { - uint64_t tmp = (uint16_t) (l << 3); + uint64_t tmp = (uint16_t) (l >> 1); tmp |= tmp << 16; tmp |= tmp << 32; ((int32_t *)block)[0] = tmp; @@ -79,7 +81,7 @@ static void inline idct_row (int16_t * const block) return; } - d0 = (sextw (l) << 11) + 128; + d0 = (sextw (l) << 11) + 2048; d1 = sextw (extwl (l, 2)); d2 = sextw (extwl (l, 4)) << 11; d3 = sextw (extwl (l, 6)); @@ -101,17 +103,17 @@ static void inline idct_row (int16_t * const block) b3 = t1 + t3; t0 -= t2; t1 -= t3; - b1 = ((t0 + t1) * 181) >> 8; - b2 = ((t0 - t1) * 181) >> 8; - - block[0] = (a0 + b0) >> 8; - block[1] = (a1 + b1) >> 8; - block[2] = (a2 + b2) >> 8; - block[3] = (a3 + b3) >> 8; - block[4] = (a3 - b3) >> 8; - block[5] = (a2 - b2) >> 8; - block[6] = (a1 - b1) >> 8; - block[7] = (a0 - b0) >> 8; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; + + block[0] = (a0 + b0) >> 12; + block[1] = (a1 + b1) >> 12; + block[2] = (a2 + b2) >> 12; + block[3] = (a3 + b3) >> 12; + block[4] = (a3 - b3) >> 12; + block[5] = (a2 - b2) >> 12; + block[6] = (a1 - b1) >> 12; + block[7] = (a0 - b0) >> 12; } static void inline idct_col (int16_t * const block) @@ -140,10 +142,10 @@ static void inline idct_col (int16_t * const block) BUTTERFLY (t2, t3, W3, W5, d1, d2); b0 = t0 + t2; b3 = t1 + t3; - t0 = (t0 - t2) >> 8; - t1 = (t1 - t3) >> 8; - b1 = (t0 + t1) * 181; - b2 = (t0 - t1) * 181; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; block[8*0] = (a0 + b0) >> 17; block[8*1] = (a1 + b1) >> 17; @@ -195,7 +197,7 @@ void mpeg2_idct_add_mvi (const int last, int16_t * block, uint64_t signmask; int i; - if (last != 129 || (block[0] & 7) == 4) { + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { for (i = 0; i < 8; i++) idct_row (block + 8 * i); for (i = 0; i < 8; i++) @@ -243,7 +245,7 @@ void mpeg2_idct_add_mvi (const int last, int16_t * block, uint64_t p0, p1, p2, p3, p4, p5, p6, p7; uint64_t DCs; - DC = (block[0] + 4) >> 3; + DC = (block[0] + 64) >> 7; block[0] = block[63] = 0; p0 = ldq (dest + 0 * stride); @@ -319,7 +321,7 @@ void mpeg2_idct_add_alpha (const int last, int16_t * block, { int i; - if (last != 129 || (block[0] & 7) == 4) { + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { for (i = 0; i < 8; i++) idct_row (block + 8 * i); for (i = 0; i < 8; i++) @@ -343,7 +345,7 @@ void mpeg2_idct_add_alpha (const int last, int16_t * block, } else { int DC; - DC = (block[0] + 4) >> 3; + DC = (block[0] + 64) >> 7; block[0] = block[63] = 0; i = 8; do { diff --git a/src/libmpeg2new/libmpeg2/idct_altivec.c b/src/libmpeg2new/libmpeg2/idct_altivec.c index d8f3ceab0..6b1b8586c 100644 --- a/src/libmpeg2new/libmpeg2/idct_altivec.c +++ b/src/libmpeg2new/libmpeg2/idct_altivec.c @@ -30,9 +30,9 @@ #endif #include <inttypes.h> -#include "../include/mpeg2.h" +#include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" -#include "../include/attributes.h" typedef vector signed char vector_s8_t; typedef vector unsigned char vector_u8_t; @@ -67,46 +67,11 @@ static const vector_s16_t constants ATTR_ALIGN(16) = static const vector_s16_t constants_1 ATTR_ALIGN(16) = VEC_S16 (16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725); static const vector_s16_t constants_2 ATTR_ALIGN(16) = - VEC_S16 (22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521); + VEC_S16 (16069, 22289, 20995, 18895, 16069, 18895, 20995, 22289); static const vector_s16_t constants_3 ATTR_ALIGN(16) = VEC_S16 (21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692); static const vector_s16_t constants_4 ATTR_ALIGN(16) = - VEC_S16 (19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722); - -#define IDCT_HALF \ - /* 1st stage */ \ - t1 = vec_mradds (a1, vx7, vx1 ); \ - t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ - t7 = vec_mradds (a2, vx5, vx3); \ - t3 = vec_mradds (ma2, vx3, vx5); \ - \ - /* 2nd stage */ \ - t5 = vec_adds (vx0, vx4); \ - t0 = vec_subs (vx0, vx4); \ - t2 = vec_mradds (a0, vx6, vx2); \ - t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \ - t6 = vec_adds (t8, t3); \ - t3 = vec_subs (t8, t3); \ - t8 = vec_subs (t1, t7); \ - t1 = vec_adds (t1, t7); \ - \ - /* 3rd stage */ \ - t7 = vec_adds (t5, t2); \ - t2 = vec_subs (t5, t2); \ - t5 = vec_adds (t0, t4); \ - t0 = vec_subs (t0, t4); \ - t4 = vec_subs (t8, t3); \ - t3 = vec_adds (t8, t3); \ - \ - /* 4th stage */ \ - vy0 = vec_adds (t7, t1); \ - vy7 = vec_subs (t7, t1); \ - vy1 = vec_mradds (c4, t3, t5); \ - vy6 = vec_mradds (mc4, t3, t5); \ - vy2 = vec_mradds (c4, t4, t0); \ - vy5 = vec_mradds (mc4, t4, t0); \ - vy3 = vec_adds (t2, t6); \ - vy4 = vec_subs (t2, t6); + VEC_S16 (13623, 18895, 17799, 16019, 13623, 16019, 17799, 18895); #define IDCT \ vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ @@ -124,18 +89,49 @@ static const vector_s16_t constants_4 ATTR_ALIGN(16) = bias = (vector_s16_t)vec_splat ((vector_s32_t)constants, 3); \ \ zero = vec_splat_s16 (0); \ - shift = vec_splat_u16 (4); \ \ - vx0 = vec_mradds (vec_sl (block[0], shift), constants_1, zero); \ - vx1 = vec_mradds (vec_sl (block[1], shift), constants_2, zero); \ - vx2 = vec_mradds (vec_sl (block[2], shift), constants_3, zero); \ - vx3 = vec_mradds (vec_sl (block[3], shift), constants_4, zero); \ - vx4 = vec_mradds (vec_sl (block[4], shift), constants_1, zero); \ - vx5 = vec_mradds (vec_sl (block[5], shift), constants_4, zero); \ - vx6 = vec_mradds (vec_sl (block[6], shift), constants_3, zero); \ - vx7 = vec_mradds (vec_sl (block[7], shift), constants_2, zero); \ + vx0 = vec_adds (block[0], block[4]); \ + vx4 = vec_subs (block[0], block[4]); \ + t5 = vec_mradds (vx0, constants_1, zero); \ + t0 = vec_mradds (vx4, constants_1, zero); \ + \ + vx1 = vec_mradds (a1, block[7], block[1]); \ + vx7 = vec_mradds (a1, block[1], vec_subs (zero, block[7])); \ + t1 = vec_mradds (vx1, constants_2, zero); \ + t8 = vec_mradds (vx7, constants_2, zero); \ + \ + vx2 = vec_mradds (a0, block[6], block[2]); \ + vx6 = vec_mradds (a0, block[2], vec_subs (zero, block[6])); \ + t2 = vec_mradds (vx2, constants_3, zero); \ + t4 = vec_mradds (vx6, constants_3, zero); \ + \ + vx3 = vec_mradds (block[3], constants_4, zero); \ + vx5 = vec_mradds (block[5], constants_4, zero); \ + t7 = vec_mradds (a2, vx5, vx3); \ + t3 = vec_mradds (ma2, vx3, vx5); \ \ - IDCT_HALF \ + t6 = vec_adds (t8, t3); \ + t3 = vec_subs (t8, t3); \ + t8 = vec_subs (t1, t7); \ + t1 = vec_adds (t1, t7); \ + t6 = vec_mradds (a0, t6, t6); /* a0+1 == 2*c4 */ \ + t1 = vec_mradds (a0, t1, t1); /* a0+1 == 2*c4 */ \ + \ + t7 = vec_adds (t5, t2); \ + t2 = vec_subs (t5, t2); \ + t5 = vec_adds (t0, t4); \ + t0 = vec_subs (t0, t4); \ + t4 = vec_subs (t8, t3); \ + t3 = vec_adds (t8, t3); \ + \ + vy0 = vec_adds (t7, t1); \ + vy7 = vec_subs (t7, t1); \ + vy1 = vec_adds (t5, t3); \ + vy6 = vec_subs (t5, t3); \ + vy2 = vec_adds (t0, t4); \ + vy5 = vec_subs (t0, t4); \ + vy3 = vec_adds (t2, t6); \ + vy4 = vec_subs (t2, t6); \ \ vx0 = vec_mergeh (vy0, vy4); \ vx1 = vec_mergel (vy0, vy4); \ @@ -155,7 +151,7 @@ static const vector_s16_t constants_4 ATTR_ALIGN(16) = vy6 = vec_mergeh (vx3, vx7); \ vy7 = vec_mergel (vx3, vx7); \ \ - vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \ + vx0 = vec_mergeh (vy0, vy4); \ vx1 = vec_mergel (vy0, vy4); \ vx2 = vec_mergeh (vy1, vy5); \ vx3 = vec_mergel (vy1, vy5); \ @@ -164,7 +160,39 @@ static const vector_s16_t constants_4 ATTR_ALIGN(16) = vx6 = vec_mergeh (vy3, vy7); \ vx7 = vec_mergel (vy3, vy7); \ \ - IDCT_HALF \ + vx0 = vec_adds (vx0, bias); \ + t5 = vec_adds (vx0, vx4); \ + t0 = vec_subs (vx0, vx4); \ + \ + t1 = vec_mradds (a1, vx7, vx1); \ + t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ + \ + t2 = vec_mradds (a0, vx6, vx2); \ + t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \ + \ + t7 = vec_mradds (a2, vx5, vx3); \ + t3 = vec_mradds (ma2, vx3, vx5); \ + \ + t6 = vec_adds (t8, t3); \ + t3 = vec_subs (t8, t3); \ + t8 = vec_subs (t1, t7); \ + t1 = vec_adds (t1, t7); \ + \ + t7 = vec_adds (t5, t2); \ + t2 = vec_subs (t5, t2); \ + t5 = vec_adds (t0, t4); \ + t0 = vec_subs (t0, t4); \ + t4 = vec_subs (t8, t3); \ + t3 = vec_adds (t8, t3); \ + \ + vy0 = vec_adds (t7, t1); \ + vy7 = vec_subs (t7, t1); \ + vy1 = vec_mradds (c4, t3, t5); \ + vy6 = vec_mradds (mc4, t3, t5); \ + vy2 = vec_mradds (c4, t4, t0); \ + vy5 = vec_mradds (mc4, t4, t0); \ + vy3 = vec_adds (t2, t6); \ + vy4 = vec_subs (t2, t6); \ \ shift = vec_splat_u16 (6); \ vx0 = vec_sra (vy0, shift); \ diff --git a/src/libmpeg2new/libmpeg2/idct_mmx.c b/src/libmpeg2new/libmpeg2/idct_mmx.c index c0e88f220..d5a5c08a4 100644 --- a/src/libmpeg2new/libmpeg2/idct_mmx.c +++ b/src/libmpeg2new/libmpeg2/idct_mmx.c @@ -28,11 +28,11 @@ #include <inttypes.h> #include "../include/mpeg2.h" -#include "mpeg2_internal.h" #include "../include/attributes.h" +#include "mpeg2_internal.h" #include "../include/mmx.h" -#define ROW_SHIFT 11 +#define ROW_SHIFT 15 #define COL_SHIFT 6 #define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT))) @@ -701,7 +701,7 @@ do { \ static inline void block_add_DC (int16_t * const block, uint8_t * dest, const int stride, const int cpu) { - movd_v2r ((block[0] + 4) >> 3, mm0); + movd_v2r ((block[0] + 64) >> 7, mm0); pxor_r2r (mm1, mm1); movq_m2r (*dest, mm2); dup4 (mm0); @@ -763,7 +763,7 @@ void mpeg2_idct_copy_mmxext (int16_t * const block, uint8_t * const dest, void mpeg2_idct_add_mmxext (const int last, int16_t * const block, uint8_t * const dest, const int stride) { - if (last != 129 || (block[0] & 7) == 4) { + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { mmxext_idct (block); block_add (block, dest, stride); block_zero (block); @@ -786,7 +786,7 @@ void mpeg2_idct_copy_mmx (int16_t * const block, uint8_t * const dest, void mpeg2_idct_add_mmx (const int last, int16_t * const block, uint8_t * const dest, const int stride) { - if (last != 129 || (block[0] & 7) == 4) { + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { mmx_idct (block); block_add (block, dest, stride); block_zero (block); diff --git a/src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in b/src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in new file mode 100644 index 000000000..42383a6e2 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in @@ -0,0 +1,10 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libmpeg2convert +Description: libmpeg2 helper functions for converting to various formats. +Version: @VERSION@ +Libs: -L${libdir} -lmpeg2convert +Cflags: -I${includedir}/@PACKAGE@ diff --git a/src/libmpeg2new/libmpeg2/motion_comp.c b/src/libmpeg2new/libmpeg2/motion_comp.c index cf9f807e2..d5a265d5c 100644 --- a/src/libmpeg2new/libmpeg2/motion_comp.c +++ b/src/libmpeg2new/libmpeg2/motion_comp.c @@ -26,6 +26,7 @@ #include <inttypes.h> #include "../include/mpeg2.h" +#include "../include/attributes.h" #include "mpeg2_internal.h" mpeg2_mc_t mpeg2_mc; @@ -51,9 +52,9 @@ void mpeg2_mc_init (uint32_t accel) mpeg2_mc = mpeg2_mc_alpha; else #endif -#ifdef LIBMPEG2_MLIB - if (accel & MPEG2_ACCEL_MLIB) - mpeg2_mc = mpeg2_mc_mlib; +#ifdef ARCH_SPARC + if (accel & MPEG2_ACCEL_SPARC_VIS) + mpeg2_mc = mpeg2_mc_vis; else #endif mpeg2_mc = mpeg2_mc_c; diff --git a/src/libmpeg2new/libmpeg2/motion_comp_alpha.c b/src/libmpeg2new/libmpeg2/motion_comp_alpha.c index efa0c44af..05cd55084 100644 --- a/src/libmpeg2new/libmpeg2/motion_comp_alpha.c +++ b/src/libmpeg2new/libmpeg2/motion_comp_alpha.c @@ -26,9 +26,10 @@ #include <inttypes.h> -#include "../include/mpeg2.h" +#include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" -#include "../include/alpha_asm.h" +#include "alpha_asm.h" static inline uint64_t avg2 (uint64_t a, uint64_t b) { diff --git a/src/libmpeg2new/libmpeg2/motion_comp_altivec.c b/src/libmpeg2new/libmpeg2/motion_comp_altivec.c index f0b6fa691..4356aa6e7 100644 --- a/src/libmpeg2new/libmpeg2/motion_comp_altivec.c +++ b/src/libmpeg2new/libmpeg2/motion_comp_altivec.c @@ -30,7 +30,8 @@ #endif #include <inttypes.h> -#include "../include/mpeg2.h" +#include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" typedef vector signed char vector_s8_t; diff --git a/src/libmpeg2new/libmpeg2/motion_comp_mmx.c b/src/libmpeg2new/libmpeg2/motion_comp_mmx.c index fc8e83abc..8694bdfea 100644 --- a/src/libmpeg2new/libmpeg2/motion_comp_mmx.c +++ b/src/libmpeg2new/libmpeg2/motion_comp_mmx.c @@ -28,8 +28,8 @@ #include <inttypes.h> #include "../include/mpeg2.h" -#include "mpeg2_internal.h" #include "../include/attributes.h" +#include "mpeg2_internal.h" #include "../include/mmx.h" #define CPU_MMXEXT 0 diff --git a/src/libmpeg2new/libmpeg2/motion_comp_vis.c b/src/libmpeg2new/libmpeg2/motion_comp_vis.c new file mode 100644 index 000000000..54c0f7e75 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/motion_comp_vis.c @@ -0,0 +1,2061 @@ +/* + * motion_comp_vis.c + * Copyright (C) 2003 David S. Miller <davem@redhat.com> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_SPARC + +#include <inttypes.h> + +#include "mpeg2.h" +#include "attributes.h" +#include "mpeg2_internal.h" +#include "vis.h" + +/* The trick used in some of this file is the formula from the MMX + * motion comp code, which is: + * + * (x+y+1)>>1 == (x|y)-((x^y)>>1) + * + * This allows us to average 8 bytes at a time in a 64-bit FPU reg. + * We avoid overflows by masking before we do the shift, and we + * implement the shift by multiplying by 1/2 using mul8x16. So in + * VIS this is (assume 'x' is in f0, 'y' is in f2, a repeating mask + * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and + * the value 0x80808080 is in f8): + * + * fxor f0, f2, f10 + * fand f10, f4, f10 + * fmul8x16 f8, f10, f10 + * fand f10, f6, f10 + * for f0, f2, f12 + * fpsub16 f12, f10, f10 + */ + +#define DUP4(x) {x, x, x, x} +#define DUP8(x) {x, x, x, x, x, x, x, x} +static const int16_t constants1[] ATTR_ALIGN(8) = DUP4 (1); +static const int16_t constants2[] ATTR_ALIGN(8) = DUP4 (2); +static const int16_t constants3[] ATTR_ALIGN(8) = DUP4 (3); +static const int16_t constants6[] ATTR_ALIGN(8) = DUP4 (6); +static const int8_t constants_fe[] ATTR_ALIGN(8) = DUP8 (0xfe); +static const int8_t constants_7f[] ATTR_ALIGN(8) = DUP8 (0x7f); +static const int8_t constants128[] ATTR_ALIGN(8) = DUP8 (128); +static const int16_t constants256_512[] ATTR_ALIGN(8) = + {256, 512, 256, 512}; +static const int16_t constants256_1024[] ATTR_ALIGN(8) = + {256, 1024, 256, 1024}; + +#define REF_0 0 +#define REF_0_1 1 +#define REF_2 2 +#define REF_2_1 3 +#define REF_4 4 +#define REF_4_1 5 +#define REF_6 6 +#define REF_6_1 7 +#define REF_S0 8 +#define REF_S0_1 9 +#define REF_S2 10 +#define REF_S2_1 11 +#define REF_S4 12 +#define REF_S4_1 13 +#define REF_S6 14 +#define REF_S6_1 15 +#define DST_0 16 +#define DST_1 17 +#define DST_2 18 +#define DST_3 19 +#define CONST_1 20 +#define CONST_2 20 +#define CONST_3 20 +#define CONST_6 20 +#define MASK_fe 20 +#define CONST_128 22 +#define CONST_256 22 +#define CONST_512 22 +#define CONST_1024 22 +#define TMP0 24 +#define TMP1 25 +#define TMP2 26 +#define TMP3 27 +#define TMP4 28 +#define TMP5 29 +#define ZERO 30 +#define MASK_7f 30 + +#define TMP6 32 +#define TMP8 34 +#define TMP10 36 +#define TMP12 38 +#define TMP14 40 +#define TMP16 42 +#define TMP18 44 +#define TMP20 46 +#define TMP22 48 +#define TMP24 50 +#define TMP26 52 +#define TMP28 54 +#define TMP30 56 +#define TMP32 58 + +static void MC_put_o_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + do { /* 5 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + + vis_faligndata(TMP0, TMP2, REF_0); + vis_st64(REF_0, dest[0]); + + vis_faligndata(TMP2, TMP4, REF_2); + vis_st64_2(REF_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_put_o_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + do { /* 4 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + + /* stall */ + + vis_faligndata(TMP0, TMP2, REF_0); + vis_st64(REF_0, dest[0]); + dest += stride; + } while (--height); +} + + +static void MC_avg_o_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8 = stride + 8; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_ld64_2(ref, offset, TMP4); + + vis_ld64(dest[0], DST_0); + + vis_ld64(dest[8], DST_2); + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP2, TMP4, REF_2); + + vis_ld64(constants128[0], CONST_128); + + ref += stride; + height = (height >> 1) - 1; + + do { /* 24 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP6, MASK_fe, TMP6); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_xor(DST_2, REF_2, TMP8); + + vis_and(TMP8, MASK_fe, TMP8); + + vis_or(DST_0, REF_0, TMP10); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP8, TMP8); + + vis_or(DST_2, REF_2, TMP12); + vis_ld64_2(dest, stride_8, DST_2); + + vis_ld64(ref[0], TMP14); + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + + dest += stride; + vis_ld64_2(ref, 8, TMP16); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP18); + vis_faligndata(TMP2, TMP4, REF_2); + ref += stride; + + vis_xor(DST_0, REF_0, TMP20); + + vis_and(TMP20, MASK_fe, TMP20); + + vis_xor(DST_2, REF_2, TMP22); + vis_mul8x16(CONST_128, TMP20, TMP20); + + vis_and(TMP22, MASK_fe, TMP22); + + vis_or(DST_0, REF_0, TMP24); + vis_mul8x16(CONST_128, TMP22, TMP22); + + vis_or(DST_2, REF_2, TMP26); + + vis_ld64_2(dest, stride, DST_0); + vis_faligndata(TMP14, TMP16, REF_0); + + vis_ld64_2(dest, stride_8, DST_2); + vis_faligndata(TMP16, TMP18, REF_2); + + vis_and(TMP20, MASK_7f, TMP20); + + vis_and(TMP22, MASK_7f, TMP22); + + vis_psub16(TMP24, TMP20, TMP20); + vis_st64(TMP20, dest[0]); + + vis_psub16(TMP26, TMP22, TMP22); + vis_st64_2(TMP22, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP6, MASK_fe, TMP6); + + vis_ld64_2(ref, offset, TMP4); + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_xor(DST_2, REF_2, TMP8); + + vis_and(TMP8, MASK_fe, TMP8); + + vis_or(DST_0, REF_0, TMP10); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP8, TMP8); + + vis_or(DST_2, REF_2, TMP12); + vis_ld64_2(dest, stride_8, DST_2); + + vis_ld64(ref[0], TMP14); + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + + dest += stride; + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_2); + + vis_xor(DST_0, REF_0, TMP20); + + vis_and(TMP20, MASK_fe, TMP20); + + vis_xor(DST_2, REF_2, TMP22); + vis_mul8x16(CONST_128, TMP20, TMP20); + + vis_and(TMP22, MASK_fe, TMP22); + + vis_or(DST_0, REF_0, TMP24); + vis_mul8x16(CONST_128, TMP22, TMP22); + + vis_or(DST_2, REF_2, TMP26); + + vis_and(TMP20, MASK_7f, TMP20); + + vis_and(TMP22, MASK_7f, TMP22); + + vis_psub16(TMP24, TMP20, TMP20); + vis_st64(TMP20, dest[0]); + + vis_psub16(TMP26, TMP22, TMP22); + vis_st64_2(TMP22, dest, 8); +} + +static void MC_avg_o_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + + vis_ld64(dest[0], DST_0); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants128[0], CONST_128); + + ref += stride; + height = (height >> 1) - 1; + + do { /* 12 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + ref += stride; + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_ld64(ref[0], TMP12); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP2); + vis_xor(DST_0, REF_0, TMP0); + ref += stride; + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + + vis_faligndata(TMP12, TMP2, REF_0); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_psub16(TMP6, TMP0, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_xor(DST_0, REF_0, TMP0); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_or(DST_0, REF_0, TMP6); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_psub16(TMP6, TMP0, TMP4); + vis_st64(TMP4, dest[0]); +} + +static void MC_put_x_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, 16, TMP4); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants128[0], CONST_128); + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + ref += stride; + height = (height >> 1) - 1; + + do { /* 34 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP8); + + vis_ld64_2(ref, 16, TMP4); + vis_and(TMP6, MASK_fe, TMP6); + ref += stride; + + vis_ld64(ref[0], TMP14); + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_ld64_2(ref, 8, TMP16); + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_ld64_2(ref, 16, TMP18); + ref += stride; + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + + vis_xor(REF_0, REF_2, TMP6); + + vis_xor(REF_4, REF_6, TMP8); + + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP14, TMP16, REF_0); + + vis_faligndata(TMP16, TMP18, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP14, TMP16, REF_2); + vis_faligndata(TMP16, TMP18, REF_6); + } else { + vis_src1(TMP16, REF_2); + vis_src1(TMP18, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP8); + + vis_ld64_2(ref, 16, TMP4); + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + + vis_xor(REF_0, REF_2, TMP6); + + vis_xor(REF_4, REF_6, TMP8); + + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); +} + +static void MC_put_x_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + + vis_ld64(constants128[0], CONST_128); + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + ref += stride; + height = (height >> 1) - 1; + + do { /* 20 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + ref += stride; + + vis_ld64(ref[0], TMP8); + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, 8, TMP10); + ref += stride; + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_or(REF_0, REF_2, TMP14); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_alignaddr_g0((void *)off); + vis_faligndata(TMP8, TMP10, REF_0); + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP8, TMP10, REF_2); + } else { + vis_src1(TMP10, REF_2); + } + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_or(REF_0, REF_2, TMP14); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; +} + +static void MC_avg_x_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + vis_ld64(constants3[0], CONST_3); + vis_fzero(ZERO); + vis_ld64(constants256_512[0], CONST_256); + + ref = vis_alignaddr(ref); + do { /* 26 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_alignaddr_g0((void *)off); + + vis_ld64(ref[16], TMP4); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(dest[8], DST_2); + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_mul8x16au(REF_0, CONST_256, TMP0); + + vis_pmerge(ZERO, REF_2, TMP4); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_mul8x16al(DST_0, CONST_512, TMP4); + vis_padd16(TMP2, TMP6, TMP2); + + vis_mul8x16al(DST_1, CONST_512, TMP6); + + vis_mul8x16au(REF_6, CONST_256, TMP12); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_6_1, CONST_256, TMP14); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4, CONST_256, TMP16); + + vis_padd16(TMP0, CONST_3, TMP8); + vis_mul8x16au(REF_4_1, CONST_256, TMP18); + + vis_padd16(TMP2, CONST_3, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_padd16(TMP16, TMP12, TMP0); + + vis_st64(DST_0, dest[0]); + vis_mul8x16al(DST_2, CONST_512, TMP4); + vis_padd16(TMP18, TMP14, TMP2); + + vis_mul8x16al(DST_3, CONST_512, TMP6); + vis_padd16(TMP0, CONST_3, TMP0); + + vis_padd16(TMP2, CONST_3, TMP2); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_padd16(TMP2, TMP6, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64(DST_2, dest[8]); + + ref += stride; + dest += stride; + } while (--height); +} + +static void MC_avg_x_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_times_2 = stride << 1; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + vis_ld64(constants3[0], CONST_3); + vis_fzero(ZERO); + vis_ld64(constants256_512[0], CONST_256); + + ref = vis_alignaddr(ref); + height >>= 2; + do { /* 47 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + ref += stride; + + vis_alignaddr_g0((void *)off); + + vis_ld64(ref[0], TMP4); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, 8, TMP6); + ref += stride; + + vis_ld64(ref[0], TMP8); + + vis_ld64_2(ref, 8, TMP10); + ref += stride; + vis_faligndata(TMP4, TMP6, REF_4); + + vis_ld64(ref[0], TMP12); + + vis_ld64_2(ref, 8, TMP14); + ref += stride; + vis_faligndata(TMP8, TMP10, REF_S0); + + vis_faligndata(TMP12, TMP14, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64_2(dest, stride, DST_2); + vis_faligndata(TMP4, TMP6, REF_6); + + vis_faligndata(TMP8, TMP10, REF_S2); + + vis_faligndata(TMP12, TMP14, REF_S6); + } else { + vis_ld64(dest[0], DST_0); + vis_src1(TMP2, REF_2); + + vis_ld64_2(dest, stride, DST_2); + vis_src1(TMP6, REF_6); + + vis_src1(TMP10, REF_S2); + + vis_src1(TMP14, REF_S6); + } + + vis_pmerge(ZERO, REF_0, TMP0); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_pmerge(ZERO, REF_2, TMP4); + vis_mul8x16au(REF_2_1, CONST_256, TMP6); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16al(DST_0, CONST_512, TMP16); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16al(DST_1, CONST_512, TMP18); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_4, CONST_256, TMP8); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4_1, CONST_256, TMP10); + + vis_padd16(TMP0, TMP16, TMP0); + vis_mul8x16au(REF_6, CONST_256, TMP12); + + vis_padd16(TMP2, TMP18, TMP2); + vis_mul8x16au(REF_6_1, CONST_256, TMP14); + + vis_padd16(TMP8, CONST_3, TMP8); + vis_mul8x16al(DST_2, CONST_512, TMP16); + + vis_padd16(TMP8, TMP12, TMP8); + vis_mul8x16al(DST_3, CONST_512, TMP18); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP0, DST_0); + + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP10, CONST_3, TMP10); + + vis_ld64_2(dest, stride, DST_0); + vis_padd16(TMP8, TMP16, TMP8); + + vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/); + vis_padd16(TMP10, TMP18, TMP10); + vis_pack16(TMP8, DST_2); + + vis_pack16(TMP10, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + + vis_mul8x16au(REF_S0_1, CONST_256, TMP2); + vis_pmerge(ZERO, REF_S0, TMP0); + + vis_pmerge(ZERO, REF_S2, TMP24); + vis_mul8x16au(REF_S2_1, CONST_256, TMP6); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16au(REF_S4, CONST_256, TMP8); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16au(REF_S4_1, CONST_256, TMP10); + + vis_padd16(TMP0, TMP24, TMP0); + vis_mul8x16au(REF_S6, CONST_256, TMP12); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_S6_1, CONST_256, TMP14); + + vis_padd16(TMP8, CONST_3, TMP8); + vis_mul8x16al(DST_0, CONST_512, TMP16); + + vis_padd16(TMP10, CONST_3, TMP10); + vis_mul8x16al(DST_1, CONST_512, TMP18); + + vis_padd16(TMP8, TMP12, TMP8); + vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20); + + vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22); + vis_padd16(TMP0, TMP16, TMP0); + + vis_padd16(TMP2, TMP18, TMP2); + vis_pack16(TMP0, DST_0); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_padd16(TMP8, TMP20, TMP8); + + vis_padd16(TMP10, TMP22, TMP10); + vis_pack16(TMP8, DST_2); + + vis_pack16(TMP10, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_put_y_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + + vis_ld64(ref[0], TMP6); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, 8, TMP8); + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64_2(ref, offset, TMP10); + ref += stride; + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP6, TMP8, REF_2); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP8, TMP10, REF_6); + + vis_ld64(constants128[0], CONST_128); + height = (height >> 1) - 1; + do { /* 24 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP12); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP16); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + vis_or(REF_0, REF_2, TMP14); + + vis_ld64(ref[0], TMP6); + vis_or(REF_4, REF_6, TMP18); + + vis_ld64_2(ref, 8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_and(TMP16, MASK_fe, TMP16); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_mul8x16(CONST_128, TMP16, TMP16); + vis_xor(REF_0, REF_2, TMP0); + + vis_xor(REF_4, REF_6, TMP2); + + vis_or(REF_0, REF_2, TMP20); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_and(TMP16, MASK_7f, TMP16); + + vis_psub16(TMP14, TMP12, TMP12); + vis_st64(TMP12, dest[0]); + + vis_psub16(TMP18, TMP16, TMP16); + vis_st64_2(TMP16, dest, 8); + dest += stride; + + vis_or(REF_4, REF_6, TMP18); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP2, MASK_fe, TMP2); + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_faligndata(TMP6, TMP8, REF_2); + vis_mul8x16(CONST_128, TMP2, TMP2); + + vis_faligndata(TMP8, TMP10, REF_6); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_and(TMP2, MASK_7f, TMP2); + + vis_psub16(TMP20, TMP0, TMP0); + vis_st64(TMP0, dest[0]); + + vis_psub16(TMP18, TMP2, TMP2); + vis_st64_2(TMP2, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP12); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP16); + + vis_ld64_2(ref, offset, TMP4); + vis_or(REF_0, REF_2, TMP14); + + vis_or(REF_4, REF_6, TMP18); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_and(TMP16, MASK_fe, TMP16); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_mul8x16(CONST_128, TMP16, TMP16); + vis_xor(REF_0, REF_2, TMP0); + + vis_xor(REF_4, REF_6, TMP2); + + vis_or(REF_0, REF_2, TMP20); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_and(TMP16, MASK_7f, TMP16); + + vis_psub16(TMP14, TMP12, TMP12); + vis_st64(TMP12, dest[0]); + + vis_psub16(TMP18, TMP16, TMP16); + vis_st64_2(TMP16, dest, 8); + dest += stride; + + vis_or(REF_4, REF_6, TMP18); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP2, MASK_fe, TMP2); + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_mul8x16(CONST_128, TMP2, TMP2); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_and(TMP2, MASK_7f, TMP2); + + vis_psub16(TMP20, TMP0, TMP0); + vis_st64(TMP0, dest[0]); + + vis_psub16(TMP18, TMP2, TMP2); + vis_st64_2(TMP2, dest, 8); +} + +static void MC_put_y_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + + vis_ld64(ref[0], TMP4); + + vis_ld64_2(ref, offset, TMP6); + ref += stride; + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP4, TMP6, REF_2); + + vis_ld64(constants128[0], CONST_128); + height = (height >> 1) - 1; + do { /* 12 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_mul8x16(CONST_128, TMP12, TMP12); + vis_or(REF_0, REF_2, TMP14); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_faligndata(TMP0, TMP2, REF_2); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_mul8x16(CONST_128, TMP12, TMP12); + vis_or(REF_0, REF_2, TMP14); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); +} + +static void MC_avg_y_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8 = stride + 8; + int stride_16; + int offset; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64_2(ref, offset, TMP4); + stride_16 = stride + offset; + + vis_ld64(constants3[0], CONST_3); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_6); + height >>= 1; + + do { /* 31 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_pmerge(ZERO, REF_2, TMP12); + vis_mul8x16au(REF_2_1, CONST_256, TMP14); + + vis_ld64_2(ref, stride_8, TMP2); + vis_pmerge(ZERO, REF_6, TMP16); + vis_mul8x16au(REF_6_1, CONST_256, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(dest, 8, DST_2); + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64_2(ref, stride, TMP6); + vis_pmerge(ZERO, REF_0, TMP0); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_ld64_2(ref, stride_8, TMP8); + vis_pmerge(ZERO, REF_4, TMP4); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + + vis_ld64_2(dest, stride, REF_S0/*DST_4*/); + vis_faligndata(TMP6, TMP8, REF_2); + vis_mul8x16au(REF_4_1, CONST_256, TMP6); + + vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/); + vis_faligndata(TMP8, TMP10, REF_6); + vis_mul8x16al(DST_0, CONST_512, TMP20); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16al(DST_1, CONST_512, TMP22); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16al(DST_2, CONST_512, TMP24); + + vis_padd16(TMP4, CONST_3, TMP4); + vis_mul8x16al(DST_3, CONST_512, TMP26); + + vis_padd16(TMP6, CONST_3, TMP6); + + vis_padd16(TMP12, TMP20, TMP12); + vis_mul8x16al(REF_S0, CONST_512, TMP20); + + vis_padd16(TMP14, TMP22, TMP14); + vis_mul8x16al(REF_S0_1, CONST_512, TMP22); + + vis_padd16(TMP16, TMP24, TMP16); + vis_mul8x16al(REF_S2, CONST_512, TMP24); + + vis_padd16(TMP18, TMP26, TMP18); + vis_mul8x16al(REF_S2_1, CONST_512, TMP26); + + vis_padd16(TMP12, TMP0, TMP12); + vis_mul8x16au(REF_2, CONST_256, TMP28); + + vis_padd16(TMP14, TMP2, TMP14); + vis_mul8x16au(REF_2_1, CONST_256, TMP30); + + vis_padd16(TMP16, TMP4, TMP16); + vis_mul8x16au(REF_6, CONST_256, REF_S4); + + vis_padd16(TMP18, TMP6, TMP18); + vis_mul8x16au(REF_6_1, CONST_256, REF_S6); + + vis_pack16(TMP12, DST_0); + vis_padd16(TMP28, TMP0, TMP12); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP30, TMP2, TMP14); + + vis_pack16(TMP16, DST_2); + vis_padd16(REF_S4, TMP4, TMP16); + + vis_pack16(TMP18, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + vis_padd16(REF_S6, TMP6, TMP18); + + vis_padd16(TMP12, TMP20, TMP12); + + vis_padd16(TMP14, TMP22, TMP14); + vis_pack16(TMP12, DST_0); + + vis_padd16(TMP16, TMP24, TMP16); + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + + vis_padd16(TMP18, TMP26, TMP18); + vis_pack16(TMP16, DST_2); + + vis_pack16(TMP18, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_avg_y_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8; + int offset; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64_2(ref, offset, TMP2); + stride_8 = stride + offset; + + vis_ld64(constants3[0], CONST_3); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64(constants256_512[0], CONST_256); + + height >>= 1; + do { /* 20 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_pmerge(ZERO, REF_2, TMP8); + vis_mul8x16au(REF_2_1, CONST_256, TMP10); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + + vis_ld64(dest[0], DST_0); + + vis_ld64_2(dest, stride, DST_2); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride, TMP4); + vis_mul8x16al(DST_0, CONST_512, TMP16); + vis_pmerge(ZERO, REF_0, TMP12); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + vis_mul8x16al(DST_1, CONST_512, TMP18); + vis_pmerge(ZERO, REF_0_1, TMP14); + + vis_padd16(TMP12, CONST_3, TMP12); + vis_mul8x16al(DST_2, CONST_512, TMP24); + + vis_padd16(TMP14, CONST_3, TMP14); + vis_mul8x16al(DST_3, CONST_512, TMP26); + + vis_faligndata(TMP4, TMP6, REF_2); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + vis_mul8x16au(REF_2, CONST_256, TMP20); + + vis_padd16(TMP8, TMP16, TMP0); + vis_mul8x16au(REF_2_1, CONST_256, TMP22); + + vis_padd16(TMP10, TMP18, TMP2); + vis_pack16(TMP0, DST_0); + + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP12, TMP20, TMP12); + + vis_padd16(TMP14, TMP22, TMP14); + + vis_padd16(TMP12, TMP24, TMP0); + + vis_padd16(TMP14, TMP26, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_put_xy_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + int stride_16 = stride + 16; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(ref[16], TMP4); + + vis_ld64(constants2[0], CONST_2); + vis_faligndata(TMP0, TMP2, REF_S0); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + vis_faligndata(TMP2, TMP4, REF_S6); + } else { + vis_src1(TMP2, REF_S2); + vis_src1(TMP4, REF_S6); + } + + height >>= 1; + do { + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S0_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + vis_mul8x16au(REF_S2, CONST_256, TMP16); + vis_pmerge(ZERO, REF_S2_1, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + vis_mul8x16au(REF_S4, CONST_256, TMP20); + vis_pmerge(ZERO, REF_S4_1, TMP22); + + vis_ld64_2(ref, stride, TMP6); + vis_mul8x16au(REF_S6, CONST_256, TMP24); + vis_pmerge(ZERO, REF_S6_1, TMP26); + + vis_ld64_2(ref, stride_8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_faligndata(TMP6, TMP8, REF_S0); + + vis_faligndata(TMP8, TMP10, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + vis_faligndata(TMP6, TMP8, REF_S2); + vis_faligndata(TMP8, TMP10, REF_S6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + vis_src1(TMP8, REF_S2); + vis_src1(TMP10, REF_S6); + } + + vis_mul8x16au(REF_0, CONST_256, TMP0); + vis_pmerge(ZERO, REF_0_1, TMP2); + + vis_mul8x16au(REF_2, CONST_256, TMP4); + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_padd16(TMP0, CONST_2, TMP8); + vis_mul8x16au(REF_4, CONST_256, TMP0); + + vis_padd16(TMP2, CONST_2, TMP10); + vis_mul8x16au(REF_4_1, CONST_256, TMP2); + + vis_padd16(TMP8, TMP4, TMP8); + vis_mul8x16au(REF_6, CONST_256, TMP4); + + vis_padd16(TMP10, TMP6, TMP10); + vis_mul8x16au(REF_6_1, CONST_256, TMP6); + + vis_padd16(TMP12, TMP8, TMP12); + + vis_padd16(TMP14, TMP10, TMP14); + + vis_padd16(TMP12, TMP16, TMP12); + + vis_padd16(TMP14, TMP18, TMP14); + vis_pack16(TMP12, DST_0); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP0, CONST_2, TMP12); + + vis_mul8x16au(REF_S0, CONST_256, TMP0); + vis_padd16(TMP2, CONST_2, TMP14); + + vis_mul8x16au(REF_S0_1, CONST_256, TMP2); + vis_padd16(TMP12, TMP4, TMP12); + + vis_mul8x16au(REF_S2, CONST_256, TMP4); + vis_padd16(TMP14, TMP6, TMP14); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP6); + vis_padd16(TMP20, TMP12, TMP20); + + vis_padd16(TMP22, TMP14, TMP22); + + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP22, TMP26, TMP22); + vis_pack16(TMP20, DST_2); + + vis_pack16(TMP22, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + vis_padd16(TMP0, TMP4, TMP24); + + vis_mul8x16au(REF_S4, CONST_256, TMP0); + vis_padd16(TMP2, TMP6, TMP26); + + vis_mul8x16au(REF_S4_1, CONST_256, TMP2); + vis_padd16(TMP24, TMP8, TMP24); + + vis_padd16(TMP26, TMP10, TMP26); + vis_pack16(TMP24, DST_0); + + vis_pack16(TMP26, DST_1); + vis_st64(DST_0, dest[0]); + vis_pmerge(ZERO, REF_S6, TMP4); + + vis_pmerge(ZERO, REF_S6_1, TMP6); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_padd16(TMP2, TMP6, TMP2); + + vis_padd16(TMP0, TMP12, TMP0); + + vis_padd16(TMP2, TMP14, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_put_xy_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(constants2[0], CONST_2); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP0, TMP2, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + } else { + vis_src1(TMP2, REF_S2); + } + + height >>= 1; + do { /* 26 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP8); + vis_pmerge(ZERO, REF_S2, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + vis_mul8x16au(REF_S0_1, CONST_256, TMP10); + vis_pmerge(ZERO, REF_S2_1, TMP14); + + vis_ld64_2(ref, stride, TMP4); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + vis_faligndata(TMP0, TMP2, REF_S4); + + vis_pmerge(ZERO, REF_S4, TMP18); + + vis_pmerge(ZERO, REF_S4_1, TMP20); + + vis_faligndata(TMP4, TMP6, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S6); + vis_faligndata(TMP4, TMP6, REF_S2); + } else { + vis_src1(TMP2, REF_S6); + vis_src1(TMP6, REF_S2); + } + + vis_padd16(TMP18, CONST_2, TMP18); + vis_mul8x16au(REF_S6, CONST_256, TMP22); + + vis_padd16(TMP20, CONST_2, TMP20); + vis_mul8x16au(REF_S6_1, CONST_256, TMP24); + + vis_mul8x16au(REF_S0, CONST_256, TMP26); + vis_pmerge(ZERO, REF_S0_1, TMP28); + + vis_mul8x16au(REF_S2, CONST_256, TMP30); + vis_padd16(TMP18, TMP22, TMP18); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP32); + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP8, TMP18, TMP8); + + vis_padd16(TMP10, TMP20, TMP10); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP18, TMP26, TMP18); + + vis_padd16(TMP20, TMP28, TMP20); + + vis_padd16(TMP18, TMP30, TMP18); + + vis_padd16(TMP20, TMP32, TMP20); + vis_pack16(TMP18, DST_2); + + vis_pack16(TMP20, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_avg_xy_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + int stride_16 = stride + 16; + + vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(ref[16], TMP4); + + vis_ld64(constants6[0], CONST_6); + vis_faligndata(TMP0, TMP2, REF_S0); + + vis_ld64(constants256_1024[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + vis_faligndata(TMP2, TMP4, REF_S6); + } else { + vis_src1(TMP2, REF_S2); + vis_src1(TMP4, REF_S6); + } + + height >>= 1; + do { /* 55 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S0_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + vis_mul8x16au(REF_S2, CONST_256, TMP16); + vis_pmerge(ZERO, REF_S2_1, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + vis_mul8x16au(REF_S4, CONST_256, TMP20); + vis_pmerge(ZERO, REF_S4_1, TMP22); + + vis_ld64_2(ref, stride, TMP6); + vis_mul8x16au(REF_S6, CONST_256, TMP24); + vis_pmerge(ZERO, REF_S6_1, TMP26); + + vis_ld64_2(ref, stride_8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP6, TMP8, REF_S0); + + vis_ld64_2(dest, 8, DST_2); + vis_faligndata(TMP8, TMP10, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + vis_faligndata(TMP6, TMP8, REF_S2); + vis_faligndata(TMP8, TMP10, REF_S6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + vis_src1(TMP8, REF_S2); + vis_src1(TMP10, REF_S6); + } + + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_0, TMP0); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_pmerge(ZERO, REF_0_1, TMP2); + + vis_mul8x16au(REF_2, CONST_256, TMP4); + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_mul8x16al(DST_2, CONST_1024, REF_0); + vis_padd16(TMP0, CONST_6, TMP0); + + vis_mul8x16al(DST_3, CONST_1024, REF_2); + vis_padd16(TMP2, CONST_6, TMP2); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_4, CONST_256, TMP4); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4_1, CONST_256, TMP6); + + vis_padd16(TMP12, TMP0, TMP12); + vis_mul8x16au(REF_6, CONST_256, TMP8); + + vis_padd16(TMP14, TMP2, TMP14); + vis_mul8x16au(REF_6_1, CONST_256, TMP10); + + vis_padd16(TMP12, TMP16, TMP12); + vis_mul8x16au(REF_S0, CONST_256, REF_4); + + vis_padd16(TMP14, TMP18, TMP14); + vis_mul8x16au(REF_S0_1, CONST_256, REF_6); + + vis_padd16(TMP12, TMP30, TMP12); + + vis_padd16(TMP14, TMP32, TMP14); + vis_pack16(TMP12, DST_0); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP4, CONST_6, TMP4); + + vis_ld64_2(dest, stride, DST_0); + vis_padd16(TMP6, CONST_6, TMP6); + vis_mul8x16au(REF_S2, CONST_256, TMP12); + + vis_padd16(TMP4, TMP8, TMP4); + vis_mul8x16au(REF_S2_1, CONST_256, TMP14); + + vis_padd16(TMP6, TMP10, TMP6); + + vis_padd16(TMP20, TMP4, TMP20); + + vis_padd16(TMP22, TMP6, TMP22); + + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP22, TMP26, TMP22); + + vis_padd16(TMP20, REF_0, TMP20); + vis_mul8x16au(REF_S4, CONST_256, REF_0); + + vis_padd16(TMP22, REF_2, TMP22); + vis_pack16(TMP20, DST_2); + + vis_pack16(TMP22, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + + vis_ld64_2(dest, 8, DST_2); + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_S4_1, REF_2); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_padd16(REF_4, TMP0, TMP8); + + vis_mul8x16au(REF_S6, CONST_256, REF_4); + vis_padd16(REF_6, TMP2, TMP10); + + vis_mul8x16au(REF_S6_1, CONST_256, REF_6); + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + + vis_padd16(TMP8, TMP30, TMP8); + + vis_padd16(TMP10, TMP32, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + + vis_padd16(REF_0, TMP4, REF_0); + + vis_mul8x16al(DST_2, CONST_1024, TMP30); + vis_padd16(REF_2, TMP6, REF_2); + + vis_mul8x16al(DST_3, CONST_1024, TMP32); + vis_padd16(REF_0, REF_4, REF_0); + + vis_padd16(REF_2, REF_6, REF_2); + + vis_padd16(REF_0, TMP30, REF_0); + + /* stall */ + + vis_padd16(REF_2, TMP32, REF_2); + vis_pack16(REF_0, DST_2); + + vis_pack16(REF_2, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_avg_xy_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + + vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + vis_fzero(ZERO); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64(constants6[0], CONST_6); + + vis_ld64(constants256_1024[0], CONST_256); + vis_faligndata(TMP0, TMP2, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + } else { + vis_src1(TMP2, REF_S2); + } + + height >>= 1; + do { /* 31 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP8); + vis_pmerge(ZERO, REF_S0_1, TMP10); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + vis_mul8x16au(REF_S2, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S2_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride, TMP4); + vis_faligndata(TMP0, TMP2, REF_S4); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP4, TMP6, REF_S0); + + vis_ld64_2(dest, stride, DST_2); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S6); + vis_faligndata(TMP4, TMP6, REF_S2); + } else { + vis_src1(TMP2, REF_S6); + vis_src1(TMP6, REF_S2); + } + + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_S4, TMP22); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_pmerge(ZERO, REF_S4_1, TMP24); + + vis_mul8x16au(REF_S6, CONST_256, TMP26); + vis_pmerge(ZERO, REF_S6_1, TMP28); + + vis_mul8x16au(REF_S0, CONST_256, REF_S4); + vis_padd16(TMP22, CONST_6, TMP22); + + vis_mul8x16au(REF_S0_1, CONST_256, REF_S6); + vis_padd16(TMP24, CONST_6, TMP24); + + vis_mul8x16al(DST_2, CONST_1024, REF_0); + vis_padd16(TMP22, TMP26, TMP22); + + vis_mul8x16al(DST_3, CONST_1024, REF_2); + vis_padd16(TMP24, TMP28, TMP24); + + vis_mul8x16au(REF_S2, CONST_256, TMP26); + vis_padd16(TMP8, TMP22, TMP8); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP28); + vis_padd16(TMP10, TMP24, TMP10); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + + vis_padd16(TMP8, TMP30, TMP8); + + vis_padd16(TMP10, TMP32, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_padd16(REF_S4, TMP22, TMP12); + + vis_padd16(REF_S6, TMP24, TMP14); + + vis_padd16(TMP12, TMP26, TMP12); + + vis_padd16(TMP14, TMP28, TMP14); + + vis_padd16(TMP12, REF_0, TMP12); + + vis_padd16(TMP14, REF_2, TMP14); + vis_pack16(TMP12, DST_2); + + vis_pack16(TMP14, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +MPEG2_MC_EXTERN(vis); + +#endif /* !(ARCH_SPARC) */ diff --git a/src/libmpeg2new/libmpeg2/mpeg2_internal.h b/src/libmpeg2new/libmpeg2/mpeg2_internal.h index ccd1bc4b5..fec7d4744 100644 --- a/src/libmpeg2new/libmpeg2/mpeg2_internal.h +++ b/src/libmpeg2new/libmpeg2/mpeg2_internal.h @@ -21,6 +21,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define STATE_INTERNAL_NORETURN ((mpeg2_state_t)-1) + /* macroblock modes */ #define MACROBLOCK_INTRA 1 #define MACROBLOCK_PATTERN 2 @@ -29,12 +31,11 @@ #define MACROBLOCK_QUANT 16 #define DCT_TYPE_INTERLACED 32 /* motion_type */ -#define MOTION_TYPE_MASK (3*64) -#define MOTION_TYPE_BASE 64 -#define MC_FIELD (1*64) -#define MC_FRAME (2*64) -#define MC_16X8 (2*64) -#define MC_DMV (3*64) +#define MOTION_TYPE_SHIFT 6 +#define MC_FIELD 1 +#define MC_FRAME 2 +#define MC_16X8 2 +#define MC_DMV 3 /* picture structure */ #define TOP_FIELD 1 @@ -47,6 +48,8 @@ #define B_TYPE 3 #define D_TYPE 4 +typedef void mpeg2_mc_fct (uint8_t *, const uint8_t *, int, int); + typedef struct { uint8_t * ref[2][3]; uint8_t ** ref2[2]; @@ -54,27 +57,27 @@ typedef struct { int f_code[2]; } motion_t; +typedef void motion_parser_t (mpeg2_decoder_t * decoder, + motion_t * motion, + mpeg2_mc_fct * const * table); + struct mpeg2_decoder_s { /* first, state that carries information from one macroblock to the */ /* next inside a slice, and is never used outside of mpeg2_slice() */ - /* DCT coefficients - should be kept aligned ! */ - int16_t DCTblock[64]; - /* bit parsing stuff */ uint32_t bitstream_buf; /* current 32 bit working set */ int bitstream_bits; /* used bits in working set */ const uint8_t * bitstream_ptr; /* buffer with stream data */ uint8_t * dest[3]; - uint8_t * picture_dest[3]; - void (* convert) (void * fbuf_id, uint8_t * const * src, - unsigned int v_offset); - void * fbuf_id; int offset; int stride; int uv_stride; + int slice_stride; + int slice_uv_stride; + int stride_frame; unsigned int limit_x; unsigned int limit_y_16; unsigned int limit_y_8; @@ -85,24 +88,34 @@ struct mpeg2_decoder_s { /* predictors */ motion_t b_motion; motion_t f_motion; + motion_parser_t * motion_parser[5]; /* predictor for DC coefficients in intra blocks */ int16_t dc_dct_pred[3]; - int quantizer_scale; /* remove */ - int dmv_offset; /* remove */ - unsigned int v_offset; /* remove */ + /* DCT coefficients */ + int16_t DCTblock[64] ATTR_ALIGN(64); + + uint8_t * picture_dest[3]; + void (* convert) (void * convert_id, uint8_t * const * src, + unsigned int v_offset); + void * convert_id; + + int dmv_offset; + unsigned int v_offset; /* now non-slice-specific information */ /* sequence header stuff */ - uint8_t intra_quantizer_matrix [64]; - uint8_t non_intra_quantizer_matrix [64]; + uint16_t * quantizer_matrix[4]; + uint16_t (* chroma_quantizer[2])[64]; + uint16_t quantizer_prescale[4][32][64]; /* The width and height of the picture snapped to macroblock units */ int width; int height; int vertical_position_extension; + int chroma_format; /* picture header stuff */ @@ -120,8 +133,6 @@ struct mpeg2_decoder_s { /* bool to indicate whether intra blocks have motion vectors */ /* (for concealment) */ int concealment_motion_vectors; - /* bit to indicate which quantization table to use */ - int q_scale_type; /* bool to use different vlc tables */ int intra_vlc_format; /* used for DMV MC */ @@ -161,10 +172,10 @@ struct mpeg2dec_s { /* last start code ? */ uint8_t code; - /* PTS */ - uint32_t pts_current, pts_previous; - int num_pts; - int bytes_since_pts; + /* picture tags */ + uint32_t tag_current, tag2_current, tag_previous, tag2_previous; + int num_tags; + int bytes_since_tag; int first; int alloc_index_user; @@ -172,9 +183,13 @@ struct mpeg2dec_s { uint8_t first_decode_slice; uint8_t nb_decode_slices; + unsigned int user_data_len; + mpeg2_sequence_t new_sequence; mpeg2_sequence_t sequence; + mpeg2_gop_t new_gop; mpeg2_gop_t gop; + mpeg2_picture_t new_picture; mpeg2_picture_t pictures[4]; mpeg2_picture_t * picture; /*const*/ mpeg2_fbuf_t * fbuf[3]; /* 0: current fbuf, 1-2: prediction fbufs */ @@ -184,11 +199,13 @@ struct mpeg2dec_s { uint8_t * yuv_buf[3][3]; int yuv_index; - void * convert_id; - int convert_size[3]; - void (* convert_start) (void * id, uint8_t * const * dest, int flags); - void (* convert_copy) (void * id, uint8_t * const * src, - unsigned int v_offset); + mpeg2_convert_t * convert; + void * convert_arg; + unsigned int convert_id_size; + int convert_stride; + void (* convert_start) (void * id, const mpeg2_fbuf_t * fbuf, + const mpeg2_picture_t * picture, + const mpeg2_gop_t * gop); uint8_t * buf_start; uint8_t * buf_end; @@ -196,8 +213,9 @@ struct mpeg2dec_s { int16_t display_offset_x, display_offset_y; int copy_matrix; - uint8_t intra_quantizer_matrix [64]; - uint8_t non_intra_quantizer_matrix [64]; + int8_t q_scale_type, scaled[4]; + uint8_t quantizer_matrix[4][64]; + uint8_t new_quantizer_matrix[4][64]; }; typedef struct { @@ -207,50 +225,35 @@ typedef struct { int dummy; } cpu_state_t; -/* alloc.c */ -#define ALLOC_MPEG2DEC 0 -#define ALLOC_CHUNK 1 -#define ALLOC_YUV 2 -#define ALLOC_CONVERT_ID 3 -#define ALLOC_CONVERTED 4 -void * mpeg2_malloc (int size, int reason); -void mpeg2_free (void * buf); - /* cpu_accel.c */ -uint32_t mpeg2_detect_accel (void); +uint32_t mpeg2_detect_accel (uint32_t accel); /* cpu_state.c */ void mpeg2_cpu_state_init (uint32_t accel); /* decode.c */ -mpeg2_state_t mpeg2_seek_sequence (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec); mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec); /* header.c */ void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec); +void mpeg2_reset_info (mpeg2_info_t * info); int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec); int mpeg2_header_gop (mpeg2dec_t * mpeg2dec); mpeg2_state_t mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec); int mpeg2_header_picture (mpeg2dec_t * mpeg2dec); int mpeg2_header_extension (mpeg2dec_t * mpeg2dec); int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec); -void mpeg2_header_matrix_finalize (mpeg2dec_t * mpeg2dec); void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec); +void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec); +void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels); mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec); mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec); -void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int coding_type); +void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type); /* idct.c */ void mpeg2_idct_init (uint32_t accel); -/* idct_mlib.c */ -void mpeg2_idct_add_mlib (int last, int16_t * block, - uint8_t * dest, int stride); -void mpeg2_idct_copy_mlib_non_ieee (int16_t * block, uint8_t * dest, - int stride); -void mpeg2_idct_add_mlib_non_ieee (int last, int16_t * block, - uint8_t * dest, int stride); - /* idct_mmx.c */ void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride); void mpeg2_idct_add_mmxext (int last, int16_t * block, @@ -278,8 +281,6 @@ void mpeg2_idct_alpha_init (void); /* motion_comp.c */ void mpeg2_mc_init (uint32_t accel); -typedef void mpeg2_mc_fct (uint8_t *, const uint8_t *, int, int); - typedef struct { mpeg2_mc_fct * put [8]; mpeg2_mc_fct * avg [8]; @@ -298,4 +299,4 @@ extern mpeg2_mc_t mpeg2_mc_mmxext; extern mpeg2_mc_t mpeg2_mc_3dnow; extern mpeg2_mc_t mpeg2_mc_altivec; extern mpeg2_mc_t mpeg2_mc_alpha; -extern mpeg2_mc_t mpeg2_mc_mlib; +extern mpeg2_mc_t mpeg2_mc_vis; diff --git a/src/libmpeg2new/libmpeg2/rgb.c b/src/libmpeg2new/libmpeg2/rgb.c new file mode 100644 index 000000000..8863b0b9f --- /dev/null +++ b/src/libmpeg2new/libmpeg2/rgb.c @@ -0,0 +1,598 @@ +/* + * rgb.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" +#include "attributes.h" + +#include <inttypes.h> + +#include "mpeg2.h" +#include "mpeg2convert.h" +#include "convert_internal.h" + +static int matrix_coefficients = 6; + +static const int Inverse_Table_6_9[8][4] = { + {117504, 138453, 13954, 34903}, /* no sequence_display_extension */ + {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */ + {104597, 132201, 25675, 53279}, /* unspecified */ + {104597, 132201, 25675, 53279}, /* reserved */ + {104448, 132798, 24759, 53109}, /* FCC */ + {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */ + {104597, 132201, 25675, 53279}, /* SMPTE 170M */ + {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */ +}; + +static const uint8_t dither[] ATTR_ALIGN(32) = { + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, + 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, + 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, + 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, + 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, + 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, + 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, + 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, + 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, + 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, + 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, + 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, + 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, + 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, + 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, + 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, + 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, + 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, + 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, + 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, + 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, + 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, + 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, + 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, + 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35 +}; + +static const uint8_t dither_temporal[64] = { + 0x00, 0x20, 0x21, 0x01, 0x40, 0x60, 0x61, 0x41, + 0x42, 0x62, 0x63, 0x43, 0x02, 0x22, 0x23, 0x03, + 0x80, 0xa0, 0xa1, 0x81, 0xc0, 0xe0, 0xe1, 0xc1, + 0xc2, 0xe2, 0xe3, 0xc3, 0x82, 0xa2, 0xa3, 0x83, + 0x84, 0xa4, 0xa5, 0x85, 0xc4, 0xe4, 0xe5, 0xc5, + 0xc6, 0xe6, 0xe7, 0xc7, 0x86, 0xa6, 0xa7, 0x87, + 0x04, 0x24, 0x25, 0x05, 0x44, 0x64, 0x65, 0x45, + 0x46, 0x66, 0x67, 0x47, 0x06, 0x26, 0x27, 0x07 +}; + +typedef struct { + convert_rgb_t base; + void * table_rV[256]; + void * table_gU[256]; + int table_gV[256]; + void * table_bU[256]; +} convert_rgb_c_t; + +#define RGB(type,i) \ + U = pu[i]; \ + V = pv[i]; \ + r = (type *) id->table_rV[V]; \ + g = (type *) (((uint8_t *)id->table_gU[U]) + id->table_gV[V]); \ + b = (type *) id->table_bU[U]; + +#define DST(py,dst,i,j) \ + Y = py[i]; \ + dst[i] = r[Y] + g[Y] + b[Y]; + +#define DSTRGB(py,dst,i,j) \ + Y = py[i]; \ + dst[3*i] = r[Y]; dst[3*i+1] = g[Y]; dst[3*i+2] = b[Y]; + +#define DSTBGR(py,dst,i,j) \ + Y = py[i]; \ + dst[3*i] = b[Y]; dst[3*i+1] = g[Y]; dst[3*i+2] = r[Y]; + +#define DSTDITHER(py,dst,i,j) \ + Y = py[i]; \ + dst[i] = r[Y+pd[2*i+96*j]] + g[Y-pd[2*i+96*j]] + b[Y+pd[2*i+1+96*j]]; + +#define DO(x) x +#define SKIP(x) + +#define DECLARE_420(func,type,num,DST,DITHER) \ +static void func (void * _id, uint8_t * const * src, \ + unsigned int v_offset) \ +{ \ + const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id; \ + type * dst_1; \ + const uint8_t * py_1, * pu, * pv; \ + int i; \ + DITHER(uint8_t dithpos = id->base.dither_offset;) \ + \ + dst_1 = (type *)(id->base.rgb_ptr + id->base.rgb_slice * v_offset); \ + py_1 = src[0]; pu = src[1]; pv = src[2]; \ + \ + i = 8; \ + do { \ + const uint8_t * py_2; \ + int j, U, V, Y; \ + const type * r, * g, * b; \ + type * dst_2; \ + DITHER(const uint8_t * const pd = dither + 2 * dithpos;) \ + \ + dst_2 = (type *)((char *)dst_1 + id->base.rgb_stride); \ + py_2 = py_1 + id->base.y_stride; \ + j = id->base.width; \ + do { \ + RGB (type, 0) \ + DST (py_1, dst_1, 0, 0) \ + DST (py_1, dst_1, 1, 0) \ + DST (py_2, dst_2, 0, 1) \ + DST (py_2, dst_2, 1, 1) \ + \ + RGB (type, 1) \ + DST (py_2, dst_2, 2, 1) \ + DST (py_2, dst_2, 3, 1) \ + DST (py_1, dst_1, 2, 0) \ + DST (py_1, dst_1, 3, 0) \ + \ + RGB (type, 2) \ + DST (py_1, dst_1, 4, 0) \ + DST (py_1, dst_1, 5, 0) \ + DST (py_2, dst_2, 4, 1) \ + DST (py_2, dst_2, 5, 1) \ + \ + RGB (type, 3) \ + DST (py_2, dst_2, 6, 1) \ + DST (py_2, dst_2, 7, 1) \ + DST (py_1, dst_1, 6, 0) \ + DST (py_1, dst_1, 7, 0) \ + \ + pu += 4; \ + pv += 4; \ + py_1 += 8; \ + py_2 += 8; \ + dst_1 += 8 * num; \ + dst_2 += 8 * num; \ + } while (--j); \ + if (--i == id->base.field) { \ + dst_1 = (type *)(id->base.rgb_ptr + \ + id->base.rgb_slice * (v_offset + 1)); \ + py_1 = src[0] + id->base.y_stride_frame; \ + pu = src[1] + id->base.uv_stride_frame; \ + pv = src[2] + id->base.uv_stride_frame; \ + } else { \ + py_1 += id->base.y_increm; \ + pu += id->base.uv_increm; \ + pv += id->base.uv_increm; \ + dst_1 = (type *)((char *)dst_1 + id->base.rgb_increm); \ + DITHER(dithpos += id->base.dither_stride;) \ + } \ + } while (i); \ +} + +DECLARE_420 (rgb_c_32_420, uint32_t, 1, DST, SKIP) +DECLARE_420 (rgb_c_24_rgb_420, uint8_t, 3, DSTRGB, SKIP) +DECLARE_420 (rgb_c_24_bgr_420, uint8_t, 3, DSTBGR, SKIP) +DECLARE_420 (rgb_c_16_420, uint16_t, 1, DST, SKIP) +DECLARE_420 (rgb_c_8_420, uint8_t, 1, DSTDITHER, DO) + +#define DECLARE_422(func,type,num,DST,DITHER) \ +static void func (void * _id, uint8_t * const * src, \ + unsigned int v_offset) \ +{ \ + const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id; \ + type * dst; \ + const uint8_t * py, * pu, * pv; \ + int i; \ + DITHER(uint8_t dithpos = id->base.dither_offset;) \ + \ + dst = (type *)(id->base.rgb_ptr + id->base.rgb_stride * v_offset); \ + py = src[0]; pu = src[1]; pv = src[2]; \ + \ + i = 16; \ + do { \ + int j, U, V, Y; \ + const type * r, * g, * b; \ + DITHER(const uint8_t * const pd = dither + 2 * dithpos;) \ + \ + j = id->base.width; \ + do { \ + RGB (type, 0) \ + DST (py, dst, 0, 0) \ + DST (py, dst, 1, 0) \ + \ + RGB (type, 1) \ + DST (py, dst, 2, 0) \ + DST (py, dst, 3, 0) \ + \ + RGB (type, 2) \ + DST (py, dst, 4, 0) \ + DST (py, dst, 5, 0) \ + \ + RGB (type, 3) \ + DST (py, dst, 6, 0) \ + DST (py, dst, 7, 0) \ + \ + pu += 4; \ + pv += 4; \ + py += 8; \ + dst += 8 * num; \ + } while (--j); \ + py += id->base.y_increm; \ + pu += id->base.uv_increm; \ + pv += id->base.uv_increm; \ + dst = (type *)((char *)dst + id->base.rgb_increm); \ + DITHER(dithpos += id->base.dither_stride;) \ + } while (--i); \ +} + +DECLARE_422 (rgb_c_32_422, uint32_t, 1, DST, SKIP) +DECLARE_422 (rgb_c_24_rgb_422, uint8_t, 3, DSTRGB, SKIP) +DECLARE_422 (rgb_c_24_bgr_422, uint8_t, 3, DSTBGR, SKIP) +DECLARE_422 (rgb_c_16_422, uint16_t, 1, DST, SKIP) +DECLARE_422 (rgb_c_8_422, uint8_t, 1, DSTDITHER, DO) + +#define DECLARE_444(func,type,num,DST,DITHER) \ +static void func (void * _id, uint8_t * const * src, \ + unsigned int v_offset) \ +{ \ + const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id; \ + type * dst; \ + const uint8_t * py, * pu, * pv; \ + int i; \ + DITHER(uint8_t dithpos = id->base.dither_offset;) \ + \ + dst = (type *)(id->base.rgb_ptr + id->base.rgb_stride * v_offset); \ + py = src[0]; pu = src[1]; pv = src[2]; \ + \ + i = 16; \ + do { \ + int j, U, V, Y; \ + const type * r, * g, * b; \ + DITHER(const uint8_t * const pd = dither + 2 * dithpos;) \ + \ + j = id->base.width; \ + do { \ + RGB (type, 0) \ + DST (py, dst, 0, 0) \ + RGB (type, 1) \ + DST (py, dst, 1, 0) \ + RGB (type, 2) \ + DST (py, dst, 2, 0) \ + RGB (type, 3) \ + DST (py, dst, 3, 0) \ + RGB (type, 4) \ + DST (py, dst, 4, 0) \ + RGB (type, 5) \ + DST (py, dst, 5, 0) \ + RGB (type, 6) \ + DST (py, dst, 6, 0) \ + RGB (type, 7) \ + DST (py, dst, 7, 0) \ + \ + pu += 8; \ + pv += 8; \ + py += 8; \ + dst += 8 * num; \ + } while (--j); \ + py += id->base.y_increm; \ + pu += id->base.y_increm; \ + pv += id->base.y_increm; \ + dst = (type *)((char *)dst + id->base.rgb_increm); \ + DITHER(dithpos += id->base.dither_stride;) \ + } while (--i); \ +} + +DECLARE_444 (rgb_c_32_444, uint32_t, 1, DST, SKIP) +DECLARE_444 (rgb_c_24_rgb_444, uint8_t, 3, DSTRGB, SKIP) +DECLARE_444 (rgb_c_24_bgr_444, uint8_t, 3, DSTBGR, SKIP) +DECLARE_444 (rgb_c_16_444, uint16_t, 1, DST, SKIP) +DECLARE_444 (rgb_c_8_444, uint8_t, 1, DSTDITHER, DO) + +static void rgb_start (void * _id, const mpeg2_fbuf_t * fbuf, + const mpeg2_picture_t * picture, + const mpeg2_gop_t * gop) +{ + convert_rgb_t * id = (convert_rgb_t *) _id; + int uv_stride = id->uv_stride_frame; + id->y_stride = id->y_stride_frame; + id->rgb_ptr = fbuf->buf[0]; + id->rgb_slice = id->rgb_stride = id->rgb_stride_frame; + id->dither_stride = 32; + id->dither_offset = dither_temporal[picture->temporal_reference & 63]; + id->field = 0; + if ((picture->nb_fields == 1) || + (id->chroma420 && !(picture->flags & PIC_FLAG_PROGRESSIVE_FRAME))) { + uv_stride <<= 1; + id->y_stride <<= 1; + id->rgb_stride <<= 1; + id->dither_stride <<= 1; + id->dither_offset += 16; + if (picture->nb_fields == 1) { + id->rgb_slice <<= 1; + if (!(picture->flags & PIC_FLAG_TOP_FIELD_FIRST)) { + id->rgb_ptr += id->rgb_stride_frame; + id->dither_offset += 32; + } + } else + id->field = 8 >> id->convert420; + } + id->y_increm = (id->y_stride << id->convert420) - id->y_stride_frame; + id->uv_increm = uv_stride - id->uv_stride_frame; + id->rgb_increm = (id->rgb_stride << id->convert420) - id->rgb_stride_min; + id->dither_stride <<= id->convert420; +} + +static inline int div_round (int dividend, int divisor) +{ + if (dividend > 0) + return (dividend + (divisor>>1)) / divisor; + else + return -((-dividend + (divisor>>1)) / divisor); +} + +static unsigned int rgb_c_init (convert_rgb_c_t * id, + mpeg2convert_rgb_order_t order, + unsigned int bpp) +{ + int i; + uint8_t table_Y[1024]; + uint32_t * table_32 = 0; + uint16_t * table_16 = 0; + uint8_t * table_8 = 0; + uint8_t * table_332 = 0; + int entry_size = 0; + void * table_r = 0; + void * table_g = 0; + void * table_b = 0; + + int crv = Inverse_Table_6_9[matrix_coefficients][0]; + int cbu = Inverse_Table_6_9[matrix_coefficients][1]; + int cgu = -Inverse_Table_6_9[matrix_coefficients][2]; + int cgv = -Inverse_Table_6_9[matrix_coefficients][3]; + + for (i = 0; i < 1024; i++) { + int j; + + j = (76309 * (i - 384 - 16) + 32768) >> 16; + table_Y[i] = (j < 0) ? 0 : ((j > 255) ? 255 : j); + } + + switch (bpp) { + case 32: + if (!id) + return (197 + 2*682 + 256 + 132) * sizeof (uint32_t); + table_32 = (uint32_t *) (id + 1); + entry_size = sizeof (uint32_t); + table_r = table_32 + 197; + table_b = table_32 + 197 + 685; + table_g = table_32 + 197 + 2*682; + + for (i = -197; i < 256+197; i++) + ((uint32_t *) table_r)[i] = + table_Y[i+384] << ((order == MPEG2CONVERT_RGB) ? 16 : 0); + for (i = -132; i < 256+132; i++) + ((uint32_t *) table_g)[i] = table_Y[i+384] << 8; + for (i = -232; i < 256+232; i++) + ((uint32_t *) table_b)[i] = + table_Y[i+384] << ((order == MPEG2CONVERT_RGB) ? 0 : 16); + break; + + case 24: + if (!id) + return (256 + 2*232) * sizeof (uint8_t); + table_8 = (uint8_t *) (id + 1); + entry_size = sizeof (uint8_t); + table_r = table_g = table_b = table_8 + 232; + + for (i = -232; i < 256+232; i++) + ((uint8_t * )table_b)[i] = table_Y[i+384]; + break; + + case 15: + case 16: + if (!id) + return (197 + 2*682 + 256 + 132) * sizeof (uint16_t); + table_16 = (uint16_t *) (id + 1); + entry_size = sizeof (uint16_t); + table_r = table_16 + 197; + table_b = table_16 + 197 + 685; + table_g = table_16 + 197 + 2*682; + + for (i = -197; i < 256+197; i++) { + int j = table_Y[i+384] >> 3; + + if (order == MPEG2CONVERT_RGB) + j <<= ((bpp==16) ? 11 : 10); + + ((uint16_t *)table_r)[i] = j; + } + for (i = -132; i < 256+132; i++) { + int j = table_Y[i+384] >> ((bpp==16) ? 2 : 3); + + ((uint16_t *)table_g)[i] = j << 5; + } + for (i = -232; i < 256+232; i++) { + int j = table_Y[i+384] >> 3; + + if (order == MPEG2CONVERT_BGR) + j <<= ((bpp==16) ? 11 : 10); + + ((uint16_t *)table_b)[i] = j; + } + break; + + case 8: + if (!id) + return (197 + 2*682 + 256 + 232 + 71) * sizeof (uint8_t); + table_332 = (uint8_t *) (id + 1); + entry_size = sizeof (uint8_t); + table_r = table_332 + 197; + table_g = table_332 + 197 + 682 + 30; + table_b = table_332 + 197 + 2*682; + + for (i = -197; i < 256+197+30; i++) + ((uint8_t *)table_r)[i] = ((table_Y[i+384] * 7 / 255) << + (order == MPEG2CONVERT_RGB ? 5 : 0)); + for (i = -132; i < 256+132+30; i++) + ((uint8_t *)table_g)[i-30] = ((table_Y[i+384] * 7 / 255) << + (order == MPEG2CONVERT_RGB ? 2 : 3)); + for (i = -232; i < 256+232+71; i++) + ((uint8_t *)table_b)[i] = ((table_Y[i+384] / 85) << + (order == MPEG2CONVERT_RGB ? 0 : 6)); + break; + } + + for (i = 0; i < 256; i++) { + id->table_rV[i] = (((uint8_t *)table_r) + + entry_size * div_round (crv * (i-128), 76309)); + id->table_gU[i] = (((uint8_t *)table_g) + + entry_size * div_round (cgu * (i-128), 76309)); + id->table_gV[i] = entry_size * div_round (cgv * (i-128), 76309); + id->table_bU[i] = (((uint8_t *)table_b) + + entry_size * div_round (cbu * (i-128), 76309)); + } + + return 0; +} + +static int rgb_internal (mpeg2convert_rgb_order_t order, unsigned int bpp, + int stage, void * _id, const mpeg2_sequence_t * seq, + int stride, uint32_t accel, void * arg, + mpeg2_convert_init_t * result) +{ + convert_rgb_t * id = (convert_rgb_t *) _id; + mpeg2convert_copy_t * copy = (mpeg2convert_copy_t *) 0; + unsigned int id_size = sizeof (convert_rgb_t); + int chroma420 = (seq->chroma_height < seq->height); + int convert420 = 0; + int rgb_stride_min = ((bpp + 7) >> 3) * seq->width; + +#ifdef ARCH_X86 + if (!copy && (accel & MPEG2_ACCEL_X86_MMXEXT)) { + convert420 = 0; + copy = mpeg2convert_rgb_mmxext (order, bpp, seq); + } + if (!copy && (accel & MPEG2_ACCEL_X86_MMX)) { + convert420 = 0; + copy = mpeg2convert_rgb_mmx (order, bpp, seq); + } +#endif +#ifdef ARCH_SPARC + if (!copy && (accel & MPEG2_ACCEL_SPARC_VIS)) { + convert420 = chroma420; + copy = mpeg2convert_rgb_vis (order, bpp, seq); + } +#endif + if (!copy) { + int src, dest; + static void (* rgb_c[3][5]) (void *, uint8_t * const *, + unsigned int) = + {{rgb_c_24_bgr_420, rgb_c_8_420, rgb_c_16_420, + rgb_c_24_rgb_420, rgb_c_32_420}, + {rgb_c_24_bgr_422, rgb_c_8_422, rgb_c_16_422, + rgb_c_24_rgb_422, rgb_c_32_422}, + {rgb_c_24_bgr_444, rgb_c_8_444, rgb_c_16_444, + rgb_c_24_rgb_444, rgb_c_32_444}}; + + convert420 = chroma420; + id_size = (sizeof (convert_rgb_c_t) + + rgb_c_init ((convert_rgb_c_t *) id, order, bpp)); + src = ((seq->chroma_width == seq->width) + + (seq->chroma_height == seq->height)); + dest = ((bpp == 24 && order == MPEG2CONVERT_BGR) ? 0 : (bpp + 7) >> 3); + copy = rgb_c[src][dest]; + } + + result->id_size = id_size; + + if (stride < rgb_stride_min) + stride = rgb_stride_min; + + if (stage == MPEG2_CONVERT_STRIDE) + return stride; + else if (stage == MPEG2_CONVERT_START) { + id->width = seq->width >> 3; + id->y_stride_frame = seq->width; + id->uv_stride_frame = seq->chroma_width; + id->rgb_stride_frame = stride; + id->rgb_stride_min = rgb_stride_min; + id->chroma420 = chroma420; + id->convert420 = convert420; + result->buf_size[0] = stride * seq->height; + result->buf_size[1] = result->buf_size[2] = 0; + result->start = rgb_start; + result->copy = copy; + } + return 0; +} + +#define DECLARE(func,order,bpp) \ +int func (int stage, void * id, \ + const mpeg2_sequence_t * sequence, int stride, \ + uint32_t accel, void * arg, mpeg2_convert_init_t * result) \ +{ \ + return rgb_internal (order, bpp, stage, id, sequence, stride, \ + accel, arg, result); \ +} + +DECLARE (mpeg2convert_rgb32, MPEG2CONVERT_RGB, 32) +DECLARE (mpeg2convert_rgb24, MPEG2CONVERT_RGB, 24) +DECLARE (mpeg2convert_rgb16, MPEG2CONVERT_RGB, 16) +DECLARE (mpeg2convert_rgb15, MPEG2CONVERT_RGB, 15) +DECLARE (mpeg2convert_rgb8, MPEG2CONVERT_RGB, 8) +DECLARE (mpeg2convert_bgr32, MPEG2CONVERT_BGR, 32) +DECLARE (mpeg2convert_bgr24, MPEG2CONVERT_BGR, 24) +DECLARE (mpeg2convert_bgr16, MPEG2CONVERT_BGR, 16) +DECLARE (mpeg2convert_bgr15, MPEG2CONVERT_BGR, 15) +DECLARE (mpeg2convert_bgr8, MPEG2CONVERT_BGR, 8) + +mpeg2_convert_t * mpeg2convert_rgb (mpeg2convert_rgb_order_t order, + unsigned int bpp) +{ + static mpeg2_convert_t * table[5][2] = + {{mpeg2convert_rgb15, mpeg2convert_bgr15}, + {mpeg2convert_rgb8, mpeg2convert_bgr8}, + {mpeg2convert_rgb16, mpeg2convert_bgr16}, + {mpeg2convert_rgb24, mpeg2convert_bgr24}, + {mpeg2convert_rgb32, mpeg2convert_bgr32}}; + + if (order == MPEG2CONVERT_RGB || order == MPEG2CONVERT_BGR) { + if (bpp == 15) + return table[0][order == MPEG2CONVERT_BGR]; + else if (bpp >= 8 && bpp <= 32 && (bpp & 7) == 0) + return table[bpp >> 3][order == MPEG2CONVERT_BGR]; + } + return (mpeg2_convert_t *) 0; +} diff --git a/src/libmpeg2new/libmpeg2/rgb_mmx.c b/src/libmpeg2new/libmpeg2/rgb_mmx.c new file mode 100644 index 000000000..912291c6a --- /dev/null +++ b/src/libmpeg2new/libmpeg2/rgb_mmx.c @@ -0,0 +1,321 @@ +/* + * rgb_mmx.c + * Copyright (C) 2000-2003 Silicon Integrated System Corp. + * All Rights Reserved. + * + * Author: Olie Lho <ollie@sis.com.tw> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_X86 + +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> + +#include "mpeg2.h" +#include "mpeg2convert.h" +#include "convert_internal.h" +#include "attributes.h" +#include "mmx.h" + +#define CPU_MMXEXT 0 +#define CPU_MMX 1 + +/* CPU_MMXEXT/CPU_MMX adaptation layer */ + +#define movntq(src,dest) \ +do { \ + if (cpu == CPU_MMXEXT) \ + movntq_r2m (src, dest); \ + else \ + movq_r2m (src, dest); \ +} while (0) + +static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv) +{ + static mmx_t mmx_80w = {0x0080008000800080LL}; + static mmx_t mmx_U_green = {0xf37df37df37df37dLL}; + static mmx_t mmx_U_blue = {0x4093409340934093LL}; + static mmx_t mmx_V_red = {0x3312331233123312LL}; + static mmx_t mmx_V_green = {0xe5fce5fce5fce5fcLL}; + static mmx_t mmx_10w = {0x1010101010101010LL}; + static mmx_t mmx_00ffw = {0x00ff00ff00ff00ffLL}; + static mmx_t mmx_Y_coeff = {0x253f253f253f253fLL}; + + movd_m2r (*pu, mm0); /* mm0 = 00 00 00 00 u3 u2 u1 u0 */ + movd_m2r (*pv, mm1); /* mm1 = 00 00 00 00 v3 v2 v1 v0 */ + movq_m2r (*py, mm6); /* mm6 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ + pxor_r2r (mm4, mm4); /* mm4 = 0 */ + /* XXX might do cache preload for image here */ + + /* + * Do the multiply part of the conversion for even and odd pixels + * register usage: + * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels + * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels + * mm6 -> Y even, mm7 -> Y odd + */ + + punpcklbw_r2r (mm4, mm0); /* mm0 = u3 u2 u1 u0 */ + punpcklbw_r2r (mm4, mm1); /* mm1 = v3 v2 v1 v0 */ + psubsw_m2r (mmx_80w, mm0); /* u -= 128 */ + psubsw_m2r (mmx_80w, mm1); /* v -= 128 */ + psllw_i2r (3, mm0); /* promote precision */ + psllw_i2r (3, mm1); /* promote precision */ + movq_r2r (mm0, mm2); /* mm2 = u3 u2 u1 u0 */ + movq_r2r (mm1, mm3); /* mm3 = v3 v2 v1 v0 */ + pmulhw_m2r (mmx_U_green, mm2); /* mm2 = u * u_green */ + pmulhw_m2r (mmx_V_green, mm3); /* mm3 = v * v_green */ + pmulhw_m2r (mmx_U_blue, mm0); /* mm0 = chroma_b */ + pmulhw_m2r (mmx_V_red, mm1); /* mm1 = chroma_r */ + paddsw_r2r (mm3, mm2); /* mm2 = chroma_g */ + + psubusb_m2r (mmx_10w, mm6); /* Y -= 16 */ + movq_r2r (mm6, mm7); /* mm7 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ + pand_m2r (mmx_00ffw, mm6); /* mm6 = Y6 Y4 Y2 Y0 */ + psrlw_i2r (8, mm7); /* mm7 = Y7 Y5 Y3 Y1 */ + psllw_i2r (3, mm6); /* promote precision */ + psllw_i2r (3, mm7); /* promote precision */ + pmulhw_m2r (mmx_Y_coeff, mm6); /* mm6 = luma_rgb even */ + pmulhw_m2r (mmx_Y_coeff, mm7); /* mm7 = luma_rgb odd */ + + /* + * Do the addition part of the conversion for even and odd pixels + * register usage: + * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels + * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels + * mm6 -> Y even, mm7 -> Y odd + */ + + movq_r2r (mm0, mm3); /* mm3 = chroma_b */ + movq_r2r (mm1, mm4); /* mm4 = chroma_r */ + movq_r2r (mm2, mm5); /* mm5 = chroma_g */ + paddsw_r2r (mm6, mm0); /* mm0 = B6 B4 B2 B0 */ + paddsw_r2r (mm7, mm3); /* mm3 = B7 B5 B3 B1 */ + paddsw_r2r (mm6, mm1); /* mm1 = R6 R4 R2 R0 */ + paddsw_r2r (mm7, mm4); /* mm4 = R7 R5 R3 R1 */ + paddsw_r2r (mm6, mm2); /* mm2 = G6 G4 G2 G0 */ + paddsw_r2r (mm7, mm5); /* mm5 = G7 G5 G3 G1 */ + packuswb_r2r (mm0, mm0); /* saturate to 0-255 */ + packuswb_r2r (mm1, mm1); /* saturate to 0-255 */ + packuswb_r2r (mm2, mm2); /* saturate to 0-255 */ + packuswb_r2r (mm3, mm3); /* saturate to 0-255 */ + packuswb_r2r (mm4, mm4); /* saturate to 0-255 */ + packuswb_r2r (mm5, mm5); /* saturate to 0-255 */ + punpcklbw_r2r (mm3, mm0); /* mm0 = B7 B6 B5 B4 B3 B2 B1 B0 */ + punpcklbw_r2r (mm4, mm1); /* mm1 = R7 R6 R5 R4 R3 R2 R1 R0 */ + punpcklbw_r2r (mm5, mm2); /* mm2 = G7 G6 G5 G4 G3 G2 G1 G0 */ +} + +static inline void mmx_unpack_16rgb (uint8_t * image, const int cpu) +{ + static mmx_t mmx_bluemask = {0xf8f8f8f8f8f8f8f8LL}; + static mmx_t mmx_greenmask = {0xfcfcfcfcfcfcfcfcLL}; + static mmx_t mmx_redmask = {0xf8f8f8f8f8f8f8f8LL}; + + /* + * convert RGB plane to RGB 16 bits + * mm0 -> B, mm1 -> R, mm2 -> G + * mm4 -> GB, mm5 -> AR pixel 4-7 + * mm6 -> GB, mm7 -> AR pixel 0-3 + */ + + pand_m2r (mmx_bluemask, mm0); /* mm0 = b7b6b5b4b3______ */ + pand_m2r (mmx_greenmask, mm2); /* mm2 = g7g6g5g4g3g2____ */ + pand_m2r (mmx_redmask, mm1); /* mm1 = r7r6r5r4r3______ */ + psrlq_i2r (3, mm0); /* mm0 = ______b7b6b5b4b3 */ + pxor_r2r (mm4, mm4); /* mm4 = 0 */ + movq_r2r (mm0, mm5); /* mm5 = ______b7b6b5b4b3 */ + movq_r2r (mm2, mm7); /* mm7 = g7g6g5g4g3g2____ */ + + punpcklbw_r2r (mm4, mm2); + punpcklbw_r2r (mm1, mm0); + psllq_i2r (3, mm2); + por_r2r (mm2, mm0); + movntq (mm0, *image); + + punpckhbw_r2r (mm4, mm7); + punpckhbw_r2r (mm1, mm5); + psllq_i2r (3, mm7); + por_r2r (mm7, mm5); + movntq (mm5, *(image+8)); +} + +static inline void mmx_unpack_32rgb (uint8_t * image, const int cpu) +{ + /* + * convert RGB plane to RGB packed format, + * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0, + * mm4 -> GB, mm5 -> AR pixel 4-7, + * mm6 -> GB, mm7 -> AR pixel 0-3 + */ + + pxor_r2r (mm3, mm3); + movq_r2r (mm0, mm6); + movq_r2r (mm1, mm7); + movq_r2r (mm0, mm4); + movq_r2r (mm1, mm5); + punpcklbw_r2r (mm2, mm6); + punpcklbw_r2r (mm3, mm7); + punpcklwd_r2r (mm7, mm6); + movntq (mm6, *image); + movq_r2r (mm0, mm6); + punpcklbw_r2r (mm2, mm6); + punpckhwd_r2r (mm7, mm6); + movntq (mm6, *(image+8)); + punpckhbw_r2r (mm2, mm4); + punpckhbw_r2r (mm3, mm5); + punpcklwd_r2r (mm5, mm4); + movntq (mm4, *(image+16)); + movq_r2r (mm0, mm4); + punpckhbw_r2r (mm2, mm4); + punpckhwd_r2r (mm5, mm4); + movntq (mm4, *(image+24)); +} + +static inline void rgb16 (void * const _id, uint8_t * const * src, + const unsigned int v_offset, const int cpu) +{ + convert_rgb_t * const id = (convert_rgb_t *) _id; + uint8_t * dst; + uint8_t * py, * pu, * pv; + int i, j; + + dst = id->rgb_ptr + id->rgb_slice * v_offset; + py = src[0]; pu = src[1]; pv = src[2]; + + i = 16; + do { + j = id->width; + do { + mmx_yuv2rgb (py, pu, pv); + mmx_unpack_16rgb (dst, cpu); + py += 8; + pu += 4; + pv += 4; + dst += 16; + } while (--j); + + dst += id->rgb_increm; + py += id->y_increm; + if (--i == id->field) { + dst = id->rgb_ptr + id->rgb_slice * (v_offset + 1); + py = src[0] + id->y_stride_frame; + pu = src[1] + id->uv_stride_frame; + pv = src[2] + id->uv_stride_frame; + } else if (! (i & id->chroma420)) { + pu += id->uv_increm; + pv += id->uv_increm; + } else { + pu -= id->uv_stride_frame; + pv -= id->uv_stride_frame; + } + } while (i); +} + +static inline void argb32 (void * const _id, uint8_t * const * src, + const unsigned int v_offset, const int cpu) +{ + convert_rgb_t * const id = (convert_rgb_t *) _id; + uint8_t * dst; + uint8_t * py, * pu, * pv; + int i, j; + + dst = id->rgb_ptr + id->rgb_slice * v_offset; + py = src[0]; pu = src[1]; pv = src[2]; + + i = 16; + do { + j = id->width; + do { + mmx_yuv2rgb (py, pu, pv); + mmx_unpack_32rgb (dst, cpu); + py += 8; + pu += 4; + pv += 4; + dst += 32; + } while (--j); + + dst += id->rgb_increm; + py += id->y_increm; + if (--i == id->field) { + dst = id->rgb_ptr + id->rgb_slice * (v_offset + 1); + py = src[0] + id->y_stride_frame; + pu = src[1] + id->uv_stride_frame; + pv = src[2] + id->uv_stride_frame; + } else if (! (i & id->chroma420)) { + pu += id->uv_increm; + pv += id->uv_increm; + } else { + pu -= id->uv_stride_frame; + pv -= id->uv_stride_frame; + } + } while (i); +} + +static void mmxext_rgb16 (void * id, uint8_t * const * src, + unsigned int v_offset) +{ + rgb16 (id, src, v_offset, CPU_MMXEXT); +} + +static void mmxext_argb32 (void * id, uint8_t * const * src, + unsigned int v_offset) +{ + argb32 (id, src, v_offset, CPU_MMXEXT); +} + +static void mmx_rgb16 (void * id, uint8_t * const * src, unsigned int v_offset) +{ + rgb16 (id, src, v_offset, CPU_MMX); +} + +static void mmx_argb32 (void * id, uint8_t * const * src, + unsigned int v_offset) +{ + argb32 (id, src, v_offset, CPU_MMX); +} + +mpeg2convert_copy_t * mpeg2convert_rgb_mmxext (int order, int bpp, + const mpeg2_sequence_t * seq) +{ + if (order == MPEG2CONVERT_RGB && seq->chroma_width < seq->width) { + if (bpp == 16) + return mmxext_rgb16; + else if (bpp == 32) + return mmxext_argb32; + } + return NULL; /* Fallback to C */ +} + +mpeg2convert_copy_t * mpeg2convert_rgb_mmx (int order, int bpp, + const mpeg2_sequence_t * seq) +{ + if (order == MPEG2CONVERT_RGB && seq->chroma_width < seq->width) { + if (bpp == 16) + return mmx_rgb16; + else if (bpp == 32) + return mmx_argb32; + } + return NULL; /* Fallback to C */ +} +#endif diff --git a/src/libmpeg2new/libmpeg2/rgb_vis.c b/src/libmpeg2new/libmpeg2/rgb_vis.c new file mode 100644 index 000000000..49d8d1d7c --- /dev/null +++ b/src/libmpeg2new/libmpeg2/rgb_vis.c @@ -0,0 +1,384 @@ +/* + * rgb_vis.c + * Copyright (C) 2003 David S. Miller <davem@redhat.com> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_SPARC + +#include <stddef.h> +#include <inttypes.h> + +#include "mpeg2.h" +#include "mpeg2convert.h" +#include "convert_internal.h" +#include "attributes.h" +#include "vis.h" + +/* Based partially upon the MMX yuv2rgb code, see there for credits. + * + * The difference here is that since we have enough registers we + * process both even and odd scanlines in one pass. + */ + +static const uint16_t const_2048[] ATTR_ALIGN(8) = {2048, 2048, 2048, 2048}; +static const uint16_t const_1024[] ATTR_ALIGN(8) = {1024, 1024, 1024, 1024}; +static const uint16_t const_128[] ATTR_ALIGN(8) = {128, 128, 128, 128}; +static const uint8_t const_Ugreen[] ATTR_ALIGN(8) = + {0xf3, 0x00, 0xf3, 0x00, 0xf3, 0x00, 0xf3, 0x00}; +static const uint8_t const_Vgreen[] ATTR_ALIGN(8) = + {0xe6, 0x00, 0xe6, 0x00, 0xe6, 0x00, 0xe6, 0x00}; +static const uint8_t const_Ublue_Vred[] ATTR_ALIGN(8) = + {0x41, 0x41, 0x41, 0x41, 0x33, 0x33, 0x33, 0x33}; +static const uint8_t const_Ycoeff[] ATTR_ALIGN(4) = {0x25, 0x25, 0x25, 0x25}; + +#define TMP0 0 +#define TMP1 1 +#define TMP2 2 +#define TMP3 3 +#define TMP4 4 +#define TMP5 5 +#define TMP6 6 +#define TMP7 7 +#define TMP8 8 +#define TMP9 9 +#define TMP10 10 +#define TMP11 11 +#define TMP12 12 +#define TMP13 13 + +#define CONST_UBLUE 14 +#define CONST_VRED 15 +#define CONST_2048 16 + +#define BLUE8_EVEN 18 +#define BLUE8_ODD 19 +#define RED8_EVEN 20 +#define RED8_ODD 21 +#define GREEN8_EVEN 22 +#define GREEN8_ODD 23 + +#define BLUE8_2_EVEN 24 +#define BLUE8_2_ODD 25 +#define RED8_2_EVEN 26 +#define RED8_2_ODD 27 +#define GREEN8_2_EVEN 28 +#define GREEN8_2_ODD 29 + +#define CONST_YCOEFF 30 +#define ZEROS 31 + +#define PU_0 32 +#define PU_2 34 +#define PV_0 36 +#define PV_2 38 +#define PY_0 40 +#define PY_2 42 +#define PY_4 44 +#define PY_6 46 + +#define CONST_128 56 +#define CONST_1024 58 +#define CONST_VGREEN 60 +#define CONST_UGREEN 62 + +static inline void vis_init_consts(void) +{ + vis_set_gsr(7 << VIS_GSR_SCALEFACT_SHIFT); + + vis_ld64(const_2048[0], CONST_2048); + vis_ld64(const_1024[0], CONST_1024); + vis_ld64(const_Ugreen[0], CONST_UGREEN); + vis_ld64(const_Vgreen[0], CONST_VGREEN); + vis_fzeros(ZEROS); + vis_ld64(const_Ublue_Vred[0], CONST_UBLUE); + vis_ld32(const_Ycoeff[0], CONST_YCOEFF); + vis_ld64(const_128[0], CONST_128); +} + +static inline void vis_yuv2rgb(uint8_t *py, uint8_t *pu, uint8_t *pv, + int y_stride) +{ + vis_ld32(pu[0], TMP0); + + vis_ld32(pv[0], TMP2); + + vis_ld64(py[0], TMP4); + vis_mul8x16au(TMP0, CONST_2048, PU_0); + + vis_ld64_2(py, y_stride, TMP8); + vis_mul8x16au(TMP2, CONST_2048, PV_0); + + vis_pmerge(TMP4, TMP5, TMP6); + + vis_pmerge(TMP6, TMP7, TMP4); + + vis_pmerge(TMP8, TMP9, TMP10); + + vis_pmerge(TMP10, TMP11, TMP8); + vis_mul8x16au(TMP4, CONST_2048, PY_0); + + vis_psub16(PU_0, CONST_1024, PU_0); + vis_mul8x16au(TMP5, CONST_2048, PY_2); + + vis_psub16(PV_0, CONST_1024, PV_0); + vis_mul8x16au(TMP8, CONST_2048, PY_4); + + vis_psub16(PY_0, CONST_128, PY_0); + vis_mul8x16au(TMP9, CONST_2048, PY_6); + + vis_psub16(PY_2, CONST_128, PY_2); + vis_mul8x16(CONST_YCOEFF, PY_0, PY_0); + + vis_psub16(PY_4, CONST_128, PY_4); + vis_mul8x16(CONST_YCOEFF, PY_2, PY_2); + + vis_psub16(PY_6, CONST_128, PY_6); + vis_mul8x16(CONST_YCOEFF, PY_4, PY_4); + + vis_mul8x16(CONST_YCOEFF, PY_6, PY_6); + + vis_mul8sux16(CONST_UGREEN, PU_0, TMP0); + + vis_mul8sux16(CONST_VGREEN, PV_0, TMP2); + + vis_mul8x16(CONST_UBLUE, PU_0, TMP4); + + vis_mul8x16(CONST_VRED, PV_0, TMP6); + vis_padd16(TMP0, TMP2, TMP10); + + vis_padd16(PY_0, TMP4, TMP0); + + vis_padd16(PY_2, TMP4, TMP2); + vis_pack16(TMP0, BLUE8_EVEN); + + vis_padd16(PY_4, TMP4, TMP0); + vis_pack16(TMP2, BLUE8_ODD); + + vis_padd16(PY_6, TMP4, TMP2); + vis_pack16(TMP0, BLUE8_2_EVEN); + + vis_padd16(PY_0, TMP6, TMP0); + vis_pack16(TMP2, BLUE8_2_ODD); + + vis_padd16(PY_2, TMP6, TMP2); + vis_pack16(TMP0, RED8_EVEN); + + vis_padd16(PY_4, TMP6, TMP0); + vis_pack16(TMP2, RED8_ODD); + + vis_padd16(PY_6, TMP6, TMP2); + vis_pack16(TMP0, RED8_2_EVEN); + + vis_padd16(PY_0, TMP10, TMP0); + vis_pack16(TMP2, RED8_2_ODD); + + vis_padd16(PY_2, TMP10, TMP2); + vis_pack16(TMP0, GREEN8_EVEN); + + vis_padd16(PY_4, TMP10, TMP0); + vis_pack16(TMP2, GREEN8_ODD); + + vis_padd16(PY_6, TMP10, TMP2); + vis_pack16(TMP0, GREEN8_2_EVEN); + + vis_pack16(TMP2, GREEN8_2_ODD); + vis_pmerge(BLUE8_EVEN, BLUE8_ODD, BLUE8_EVEN); + + vis_pmerge(BLUE8_2_EVEN, BLUE8_2_ODD, BLUE8_2_EVEN); + + vis_pmerge(RED8_EVEN, RED8_ODD, RED8_EVEN); + + vis_pmerge(RED8_2_EVEN, RED8_2_ODD, RED8_2_EVEN); + + vis_pmerge(GREEN8_EVEN, GREEN8_ODD, GREEN8_EVEN); + + vis_pmerge(GREEN8_2_EVEN, GREEN8_2_ODD, GREEN8_2_EVEN); +} + +static inline void vis_unpack_32rgb(uint8_t *image, int stride) +{ + vis_pmerge(ZEROS, GREEN8_EVEN, TMP0); + vis_pmerge(RED8_EVEN, BLUE8_EVEN, TMP2); + + vis_pmerge(TMP0, TMP2, TMP4); + vis_st64(TMP4, image[0]); + + vis_pmerge(TMP1, TMP3, TMP6); + vis_st64_2(TMP6, image, 8); + + vis_pmerge(ZEROS, GREEN8_ODD, TMP8); + vis_pmerge(RED8_ODD, BLUE8_ODD, TMP10); + + vis_pmerge(TMP8, TMP10, TMP0); + vis_st64_2(TMP0, image, 16); + + vis_pmerge(TMP9, TMP11, TMP2); + vis_st64_2(TMP2, image, 24); + + image += stride; + + vis_pmerge(ZEROS, GREEN8_2_EVEN, TMP0); + vis_pmerge(RED8_2_EVEN, BLUE8_2_EVEN, TMP2); + + vis_pmerge(TMP0, TMP2, TMP4); + vis_st64(TMP4, image[0]); + + vis_pmerge(TMP1, TMP3, TMP6); + vis_st64_2(TMP6, image, 8); + + vis_pmerge(ZEROS, GREEN8_2_ODD, TMP8); + vis_pmerge(RED8_2_ODD, BLUE8_2_ODD, TMP10); + + vis_pmerge(TMP8, TMP10, TMP0); + vis_st64_2(TMP0, image, 16); + + vis_pmerge(TMP9, TMP11, TMP2); + vis_st64_2(TMP2, image, 24); +} + +static inline void vis_unpack_32bgr(uint8_t *image, int stride) +{ + vis_pmerge(ZEROS, GREEN8_EVEN, TMP0); + vis_pmerge(BLUE8_EVEN, RED8_EVEN, TMP2); + + vis_pmerge(TMP0, TMP2, TMP4); + vis_st64(TMP4, image[0]); + + vis_pmerge(TMP1, TMP3, TMP6); + vis_st64_2(TMP6, image, 8); + + vis_pmerge(ZEROS, GREEN8_ODD, TMP8); + vis_pmerge(BLUE8_ODD, RED8_ODD, TMP10); + + vis_pmerge(TMP8, TMP10, TMP0); + vis_st64_2(TMP0, image, 16); + + vis_pmerge(TMP9, TMP11, TMP2); + vis_st64_2(TMP2, image, 24); + + image += stride; + + vis_pmerge(ZEROS, GREEN8_2_EVEN, TMP0); + vis_pmerge(BLUE8_2_EVEN, RED8_2_EVEN, TMP2); + + vis_pmerge(TMP0, TMP2, TMP4); + vis_st64(TMP4, image[0]); + + vis_pmerge(TMP1, TMP3, TMP6); + vis_st64_2(TMP6, image, 8); + + vis_pmerge(ZEROS, GREEN8_2_ODD, TMP8); + vis_pmerge(BLUE8_2_ODD, RED8_2_ODD, TMP10); + + vis_pmerge(TMP8, TMP10, TMP0); + vis_st64_2(TMP0, image, 16); + + vis_pmerge(TMP9, TMP11, TMP2); + vis_st64_2(TMP2, image, 24); +} + +static inline void vis_yuv420_argb32(uint8_t *image, + uint8_t *py, uint8_t *pu, uint8_t *pv, + int width, int height, int rgb_stride, + int y_stride, int uv_stride) +{ + height >>= 1; + uv_stride -= width >> 1; + do { + int i = width >> 3; + do { + vis_yuv2rgb(py, pu, pv, y_stride); + vis_unpack_32rgb(image, rgb_stride); + py += 8; + pu += 4; + pv += 4; + image += 32; + } while (--i); + + py += (y_stride << 1) - width; + image += (rgb_stride << 1) - 4 * width; + pu += uv_stride; + pv += uv_stride; + } while (--height); +} + +static inline void vis_yuv420_abgr32(uint8_t *image, + uint8_t *py, uint8_t *pu, uint8_t *pv, + int width, int height, int rgb_stride, + int y_stride, int uv_stride) +{ + height >>= 1; + uv_stride -= width >> 1; + do { + int i = width >> 3; + do { + vis_yuv2rgb(py, pu, pv, y_stride); + vis_unpack_32bgr(image, rgb_stride); + py += 8; + pu += 4; + pv += 4; + image += 32; + } while (--i); + + py += (y_stride << 1) - width; + image += (rgb_stride << 1) - 4 * width; + pu += uv_stride; + pv += uv_stride; + } while (--height); +} + +static void vis_argb32(void *_id, uint8_t * const *src, + unsigned int v_offset) +{ + convert_rgb_t *id = (convert_rgb_t *) _id; + + vis_init_consts(); + vis_yuv420_argb32(id->rgb_ptr + id->rgb_stride * v_offset, + src[0], src[1], src[2], id->width, 16, + id->rgb_stride, id->y_stride, id->y_stride >> 1); +} + +static void vis_abgr32(void *_id, uint8_t * const *src, + unsigned int v_offset) +{ + convert_rgb_t *id = (convert_rgb_t *) _id; + + vis_init_consts(); + vis_yuv420_abgr32(id->rgb_ptr + id->rgb_stride * v_offset, + src[0], src[1], src[2], id->width, 16, + id->rgb_stride, id->y_stride, id->y_stride >> 1); +} + +mpeg2convert_copy_t *mpeg2convert_rgb_vis(int order, int bpp, + const mpeg2_sequence_t * seq) +{ + if (bpp == 32 && seq->chroma_height < seq->height) { + if (order == MPEG2CONVERT_RGB) + return vis_argb32; + if (order == MPEG2CONVERT_BGR) + return vis_abgr32; + } + + return NULL; /* Fallback to C */ +} + +#endif /* ARCH_SPARC */ diff --git a/src/libmpeg2new/libmpeg2/slice.c b/src/libmpeg2new/libmpeg2/slice.c index 095fc4c82..ce4508639 100644 --- a/src/libmpeg2new/libmpeg2/slice.c +++ b/src/libmpeg2new/libmpeg2/slice.c @@ -1,6 +1,7 @@ /* * slice.c * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2003 Peter Gubanov <peter@elecard.net.ru> * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. @@ -26,8 +27,8 @@ #include <inttypes.h> #include "../include/mpeg2.h" -#include "mpeg2_internal.h" #include "../include/attributes.h" +#include "mpeg2_internal.h" extern mpeg2_mc_t mpeg2_mc; extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); @@ -38,13 +39,6 @@ extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state); #include "vlc.h" -static int non_linear_quantizer_scale [] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 10, 12, 14, 16, 18, 20, 22, - 24, 28, 32, 36, 40, 44, 48, 52, - 56, 64, 72, 80, 88, 96, 104, 112 -}; - static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder) { #define bit_buf (decoder->bitstream_buf) @@ -76,24 +70,24 @@ static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder) if (decoder->picture_structure != FRAME_PICTURE) { if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; DUMPBITS (bit_buf, bits, 2); } - return macroblock_modes; + return macroblock_modes | MACROBLOCK_MOTION_FORWARD; } else if (decoder->frame_pred_frame_dct) { if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) - macroblock_modes |= MC_FRAME; - return macroblock_modes; + macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT; + return macroblock_modes | MACROBLOCK_MOTION_FORWARD; } else { if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; DUMPBITS (bit_buf, bits, 2); } if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; DUMPBITS (bit_buf, bits, 1); } - return macroblock_modes; + return macroblock_modes | MACROBLOCK_MOTION_FORWARD; } case B_TYPE: @@ -104,18 +98,18 @@ static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder) if (decoder->picture_structure != FRAME_PICTURE) { if (! (macroblock_modes & MACROBLOCK_INTRA)) { - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; DUMPBITS (bit_buf, bits, 2); } return macroblock_modes; } else if (decoder->frame_pred_frame_dct) { /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ - macroblock_modes |= MC_FRAME; + macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT; return macroblock_modes; } else { if (macroblock_modes & MACROBLOCK_INTRA) goto intra; - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; DUMPBITS (bit_buf, bits, 2); if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { intra: @@ -138,7 +132,7 @@ static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder) #undef bit_ptr } -static inline int get_quantizer_scale (mpeg2_decoder_t * const decoder) +static inline void get_quantizer_scale (mpeg2_decoder_t * const decoder) { #define bit_buf (decoder->bitstream_buf) #define bits (decoder->bitstream_bits) @@ -149,10 +143,14 @@ static inline int get_quantizer_scale (mpeg2_decoder_t * const decoder) quantizer_scale_code = UBITS (bit_buf, 5); DUMPBITS (bit_buf, bits, 5); - if (decoder->q_scale_type) - return non_linear_quantizer_scale [quantizer_scale_code]; - else - return quantizer_scale_code << 1; + decoder->quantizer_matrix[0] = + decoder->quantizer_prescale[0][quantizer_scale_code]; + decoder->quantizer_matrix[1] = + decoder->quantizer_prescale[1][quantizer_scale_code]; + decoder->quantizer_matrix[2] = + decoder->chroma_quantizer[0][quantizer_scale_code]; + decoder->quantizer_matrix[3] = + decoder->chroma_quantizer[1][quantizer_scale_code]; #undef bit_buf #undef bits #undef bit_ptr @@ -279,7 +277,7 @@ static inline int get_luma_dc_dct_diff (mpeg2_decoder_t * const decoder) dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); bit_buf <<= size; - return dc_diff; + return dc_diff << decoder->intra_dc_precision; } else { DUMPBITS (bit_buf, bits, 3); return 0; @@ -291,7 +289,7 @@ static inline int get_luma_dc_dct_diff (mpeg2_decoder_t * const decoder) NEEDBITS (bit_buf, bits, bit_ptr); dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); DUMPBITS (bit_buf, bits, size); - return dc_diff; + return dc_diff << decoder->intra_dc_precision; } #undef bit_buf #undef bits @@ -316,7 +314,7 @@ static inline int get_chroma_dc_dct_diff (mpeg2_decoder_t * const decoder) dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); bit_buf <<= size; - return dc_diff; + return dc_diff << decoder->intra_dc_precision; } else { DUMPBITS (bit_buf, bits, 2); return 0; @@ -328,35 +326,34 @@ static inline int get_chroma_dc_dct_diff (mpeg2_decoder_t * const decoder) NEEDBITS (bit_buf, bits, bit_ptr); dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); DUMPBITS (bit_buf, bits, size); - return dc_diff; + return dc_diff << decoder->intra_dc_precision; } #undef bit_buf #undef bits #undef bit_ptr } -#define SATURATE(val) \ -do { \ - if (unlikely ((uint32_t)(val + 2048) > 4095)) \ - val = SBITS (val, 1) ^ 2047; \ +#define SATURATE(val) \ +do { \ + val <<= 4; \ + if (unlikely (val != (int16_t) val)) \ + val = (SBITS (val, 1) ^ 2047) << 4; \ } while (0) -static void get_intra_block_B14 (mpeg2_decoder_t * const decoder) +static void get_intra_block_B14 (mpeg2_decoder_t * const decoder, + const uint16_t * const quant_matrix) { int i; int j; int val; - const uint8_t * scan = decoder->scan; - const uint8_t * quant_matrix = decoder->intra_quantizer_matrix; - int quantizer_scale = decoder->quantizer_scale; + const uint8_t * const scan = decoder->scan; int mismatch; const DCTtab * tab; uint32_t bit_buf; int bits; const uint8_t * bit_ptr; - int16_t * dest; + int16_t * const dest = decoder->DCTblock; - dest = decoder->DCTblock; i = 0; mismatch = ~dest[0]; @@ -379,7 +376,7 @@ static void get_intra_block_B14 (mpeg2_decoder_t * const decoder) j = scan[i]; bit_buf <<= tab->len; bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + val = (tab->level * quant_matrix[j]) >> 4; /* if (bitstream_get (1)) val = -val; */ val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); @@ -411,8 +408,7 @@ static void get_intra_block_B14 (mpeg2_decoder_t * const decoder) DUMPBITS (bit_buf, bits, 12); NEEDBITS (bit_buf, bits, bit_ptr); - val = (SBITS (bit_buf, 12) * - quantizer_scale * quant_matrix[j]) / 16; + val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16; SATURATE (val); dest[j] = val; @@ -448,29 +444,27 @@ static void get_intra_block_B14 (mpeg2_decoder_t * const decoder) } break; /* illegal, check needed to avoid buffer overflow */ } - dest[63] ^= mismatch & 1; - DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + dest[63] ^= mismatch & 16; + DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ decoder->bitstream_buf = bit_buf; decoder->bitstream_bits = bits; decoder->bitstream_ptr = bit_ptr; } -static void get_intra_block_B15 (mpeg2_decoder_t * const decoder) +static void get_intra_block_B15 (mpeg2_decoder_t * const decoder, + const uint16_t * const quant_matrix) { int i; int j; int val; - const uint8_t * scan = decoder->scan; - const uint8_t * quant_matrix = decoder->intra_quantizer_matrix; - int quantizer_scale = decoder->quantizer_scale; + const uint8_t * const scan = decoder->scan; int mismatch; const DCTtab * tab; uint32_t bit_buf; int bits; const uint8_t * bit_ptr; - int16_t * dest; + int16_t * const dest = decoder->DCTblock; - dest = decoder->DCTblock; i = 0; mismatch = ~dest[0]; @@ -492,7 +486,7 @@ static void get_intra_block_B15 (mpeg2_decoder_t * const decoder) j = scan[i]; bit_buf <<= tab->len; bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + val = (tab->level * quant_matrix[j]) >> 4; /* if (bitstream_get (1)) val = -val; */ val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); @@ -523,8 +517,7 @@ static void get_intra_block_B15 (mpeg2_decoder_t * const decoder) DUMPBITS (bit_buf, bits, 12); NEEDBITS (bit_buf, bits, bit_ptr); - val = (SBITS (bit_buf, 12) * - quantizer_scale * quant_matrix[j]) / 16; + val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16; SATURATE (val); dest[j] = val; @@ -561,31 +554,29 @@ static void get_intra_block_B15 (mpeg2_decoder_t * const decoder) } break; /* illegal, check needed to avoid buffer overflow */ } - dest[63] ^= mismatch & 1; - DUMPBITS (bit_buf, bits, 4); /* dump end of block code */ + dest[63] ^= mismatch & 16; + DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ decoder->bitstream_buf = bit_buf; decoder->bitstream_bits = bits; decoder->bitstream_ptr = bit_ptr; } -static int get_non_intra_block (mpeg2_decoder_t * const decoder) +static int get_non_intra_block (mpeg2_decoder_t * const decoder, + const uint16_t * const quant_matrix) { int i; int j; int val; - const uint8_t * scan = decoder->scan; - const uint8_t * quant_matrix = decoder->non_intra_quantizer_matrix; - int quantizer_scale = decoder->quantizer_scale; + const uint8_t * const scan = decoder->scan; int mismatch; const DCTtab * tab; uint32_t bit_buf; int bits; const uint8_t * bit_ptr; - int16_t * dest; + int16_t * const dest = decoder->DCTblock; i = -1; - mismatch = 1; - dest = decoder->DCTblock; + mismatch = -1; bit_buf = decoder->bitstream_buf; bits = decoder->bitstream_bits; @@ -612,7 +603,7 @@ static int get_non_intra_block (mpeg2_decoder_t * const decoder) j = scan[i]; bit_buf <<= tab->len; bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; + val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5; /* if (bitstream_get (1)) val = -val; */ val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); @@ -648,7 +639,7 @@ static int get_non_intra_block (mpeg2_decoder_t * const decoder) DUMPBITS (bit_buf, bits, 12); NEEDBITS (bit_buf, bits, bit_ptr); val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1; - val = (val * quantizer_scale * quant_matrix[j]) / 32; + val = (val * quant_matrix[j]) / 32; SATURATE (val); dest[j] = val; @@ -684,8 +675,8 @@ static int get_non_intra_block (mpeg2_decoder_t * const decoder) } break; /* illegal, check needed to avoid buffer overflow */ } - dest[63] ^= mismatch & 1; - DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + dest[63] ^= mismatch & 16; + DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ decoder->bitstream_buf = bit_buf; decoder->bitstream_bits = bits; decoder->bitstream_ptr = bit_ptr; @@ -697,17 +688,15 @@ static void get_mpeg1_intra_block (mpeg2_decoder_t * const decoder) int i; int j; int val; - const uint8_t * scan = decoder->scan; - const uint8_t * quant_matrix = decoder->intra_quantizer_matrix; - int quantizer_scale = decoder->quantizer_scale; + const uint8_t * const scan = decoder->scan; + const uint16_t * const quant_matrix = decoder->quantizer_matrix[0]; const DCTtab * tab; uint32_t bit_buf; int bits; const uint8_t * bit_ptr; - int16_t * dest; + int16_t * const dest = decoder->DCTblock; i = 0; - dest = decoder->DCTblock; bit_buf = decoder->bitstream_buf; bits = decoder->bitstream_bits; @@ -728,7 +717,7 @@ static void get_mpeg1_intra_block (mpeg2_decoder_t * const decoder) j = scan[i]; bit_buf <<= tab->len; bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + val = (tab->level * quant_matrix[j]) >> 4; /* oddification */ val = (val - 1) | 1; @@ -767,7 +756,7 @@ static void get_mpeg1_intra_block (mpeg2_decoder_t * const decoder) DUMPBITS (bit_buf, bits, 8); val = UBITS (bit_buf, 8) + 2 * val; } - val = (val * quantizer_scale * quant_matrix[j]) / 16; + val = (val * quant_matrix[j]) / 16; /* oddification */ val = (val + ~SBITS (val, 1)) | 1; @@ -805,7 +794,7 @@ static void get_mpeg1_intra_block (mpeg2_decoder_t * const decoder) } break; /* illegal, check needed to avoid buffer overflow */ } - DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ decoder->bitstream_buf = bit_buf; decoder->bitstream_bits = bits; decoder->bitstream_ptr = bit_ptr; @@ -816,17 +805,15 @@ static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder) int i; int j; int val; - const uint8_t * scan = decoder->scan; - const uint8_t * quant_matrix = decoder->non_intra_quantizer_matrix; - int quantizer_scale = decoder->quantizer_scale; + const uint8_t * const scan = decoder->scan; + const uint16_t * const quant_matrix = decoder->quantizer_matrix[1]; const DCTtab * tab; uint32_t bit_buf; int bits; const uint8_t * bit_ptr; - int16_t * dest; + int16_t * const dest = decoder->DCTblock; i = -1; - dest = decoder->DCTblock; bit_buf = decoder->bitstream_buf; bits = decoder->bitstream_bits; @@ -853,7 +840,7 @@ static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder) j = scan[i]; bit_buf <<= tab->len; bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; + val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5; /* oddification */ val = (val - 1) | 1; @@ -896,7 +883,7 @@ static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder) val = UBITS (bit_buf, 8) + 2 * val; } val = 2 * (val + SBITS (val, 1)) + 1; - val = (val * quantizer_scale * quant_matrix[j]) / 32; + val = (val * quant_matrix[j]) / 32; /* oddification */ val = (val + ~SBITS (val, 1)) | 1; @@ -934,7 +921,7 @@ static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder) } break; /* illegal, check needed to avoid buffer overflow */ } - DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ decoder->bitstream_buf = bit_buf; decoder->bitstream_bits = bits; decoder->bitstream_ptr = bit_ptr; @@ -951,19 +938,19 @@ static inline void slice_intra_DCT (mpeg2_decoder_t * const decoder, NEEDBITS (bit_buf, bits, bit_ptr); /* Get the intra DC coefficient and inverse quantize it */ if (cc == 0) - decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder); + decoder->DCTblock[0] = + decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder); else - decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder); - decoder->DCTblock[0] = - decoder->dc_dct_pred[cc] << (3 - decoder->intra_dc_precision); + decoder->DCTblock[0] = + decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder); if (decoder->mpeg1) { if (decoder->coding_type != D_TYPE) get_mpeg1_intra_block (decoder); } else if (decoder->intra_vlc_format) - get_intra_block_B15 (decoder); + get_intra_block_B15 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]); else - get_intra_block_B14 (decoder); + get_intra_block_B14 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]); mpeg2_idct_copy (decoder->DCTblock, dest, stride); #undef bit_buf #undef bits @@ -971,6 +958,7 @@ static inline void slice_intra_DCT (mpeg2_decoder_t * const decoder, } static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder, + const int cc, uint8_t * const dest, const int stride) { int last; @@ -978,11 +966,12 @@ static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder, if (decoder->mpeg1) last = get_mpeg1_non_intra_block (decoder); else - last = get_non_intra_block (decoder); + last = get_non_intra_block (decoder, + decoder->quantizer_matrix[cc ? 3 : 1]); mpeg2_idct_add (last, decoder->DCTblock, dest, stride); } -#define MOTION(table,ref,motion_x,motion_y,size,y) \ +#define MOTION_420(table,ref,motion_x,motion_y,size,y) \ pos_x = 2 * decoder->offset + motion_x; \ pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ if (unlikely (pos_x > decoder->limit_x)) { \ @@ -1009,7 +998,7 @@ static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder, (decoder->offset >> 1), ref[2] + offset, \ decoder->uv_stride, size/2) -#define MOTION_FIELD(table,ref,motion_x,motion_y,dest_field,op,src_field) \ +#define MOTION_FIELD_420(table,ref,motion_x,motion_y,dest_field,op,src_field) \ pos_x = 2 * decoder->offset + motion_x; \ pos_y = decoder->v_offset + motion_y; \ if (unlikely (pos_x > decoder->limit_x)) { \ @@ -1038,13 +1027,237 @@ static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder, (decoder->offset >> 1), ref[2] + offset, \ 2 * decoder->uv_stride, 4) +#define MOTION_DMV_420(table,ref,motion_x,motion_y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \ + ref[0] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + offset = (((decoder->offset + motion_x) >> 1) + \ + (((decoder->v_offset >> 1) + (motion_y & ~1)) * \ + decoder->uv_stride)); \ + table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[1] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[1] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[2] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[2] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 4) + +#define MOTION_ZERO_420(table,ref) \ + table[0] (decoder->dest[0] + decoder->offset, \ + (ref[0] + decoder->offset + \ + decoder->v_offset * decoder->stride), decoder->stride, 16); \ + offset = ((decoder->offset >> 1) + \ + (decoder->v_offset >> 1) * decoder->uv_stride); \ + table[4] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, decoder->uv_stride, 8); \ + table[4] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, decoder->uv_stride, 8) + +#define MOTION_422(table,ref,motion_x,motion_y,size,y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y_ ## size)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \ + motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ + ref[0] + offset, decoder->stride, size); \ + offset = (offset + (motion_x & (motion_x < 0))) >> 1; \ + motion_x /= 2; \ + xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (decoder->dest[1] + y * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + decoder->uv_stride, size); \ + table[4+xy_half] (decoder->dest[2] + y * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + decoder->uv_stride, size) + +#define MOTION_FIELD_422(table,ref,motion_x,motion_y,dest_field,op,src_field) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ + decoder->offset, ref[0] + offset, \ + 2 * decoder->stride, 8); \ + offset = (offset + (motion_x & (motion_x < 0))) >> 1; \ + motion_x /= 2; \ + xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + 2 * decoder->uv_stride, 8) + +#define MOTION_DMV_422(table,ref,motion_x,motion_y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \ + ref[0] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + offset = (offset + (motion_x & (motion_x < 0))) >> 1; \ + motion_x /= 2; \ + xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[1] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[1] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[2] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[2] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 8) + +#define MOTION_ZERO_422(table,ref) \ + offset = decoder->offset + decoder->v_offset * decoder->stride; \ + table[0] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, decoder->stride, 16); \ + offset >>= 1; \ + table[4] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, decoder->uv_stride, 16); \ + table[4] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, decoder->uv_stride, 16) + +#define MOTION_444(table,ref,motion_x,motion_y,size,y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y_ ## size)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \ + motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ + ref[0] + offset, decoder->stride, size); \ + table[xy_half] (decoder->dest[1] + y * decoder->stride + decoder->offset, \ + ref[1] + offset, decoder->stride, size); \ + table[xy_half] (decoder->dest[2] + y * decoder->stride + decoder->offset, \ + ref[2] + offset, decoder->stride, size) + +#define MOTION_FIELD_444(table,ref,motion_x,motion_y,dest_field,op,src_field) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ + decoder->offset, ref[0] + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[1] + dest_field * decoder->stride + \ + decoder->offset, ref[1] + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[2] + dest_field * decoder->stride + \ + decoder->offset, ref[2] + offset, \ + 2 * decoder->stride, 8) + +#define MOTION_DMV_444(table,ref,motion_x,motion_y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \ + ref[0] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[1] + decoder->offset, \ + ref[1] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[1] + decoder->stride + decoder->offset, \ + ref[1] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[2] + decoder->offset, \ + ref[2] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[2] + decoder->stride + decoder->offset, \ + ref[2] + decoder->stride + offset, \ + 2 * decoder->stride, 8) + +#define MOTION_ZERO_444(table,ref) \ + offset = decoder->offset + decoder->v_offset * decoder->stride; \ + table[0] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, decoder->stride, 16); \ + table[4] (decoder->dest[1] + decoder->offset, \ + ref[1] + offset, decoder->stride, 16); \ + table[4] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, decoder->stride, 16) + +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + static void motion_mp1 (mpeg2_decoder_t * const decoder, motion_t * const motion, mpeg2_mc_fct * const * const table) { -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) int motion_x, motion_y; unsigned int pos_x, pos_y, xy_half, offset; @@ -1064,202 +1277,239 @@ static void motion_mp1 (mpeg2_decoder_t * const decoder, motion->f_code[0] + motion->f_code[1]); motion->pmv[0][1] = motion_y; - MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fr_frame (mpeg2_decoder_t * const decoder, - motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - int motion_x, motion_y; - unsigned int pos_x, pos_y, xy_half, offset; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_motion_delta (decoder, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion->pmv[0][1] = motion_y; - - MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fr_field (mpeg2_decoder_t * const decoder, - motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - int motion_x, motion_y, field; - unsigned int pos_x, pos_y, xy_half, offset; - - NEEDBITS (bit_buf, bits, bit_ptr); - field = UBITS (bit_buf, 1); - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (decoder, - motion->f_code[1]); - /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ - motion->pmv[0][1] = motion_y << 1; - - MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); - - NEEDBITS (bit_buf, bits, bit_ptr); - field = UBITS (bit_buf, 1); - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[1][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = (motion->pmv[1][1] >> 1) + get_motion_delta (decoder, - motion->f_code[1]); - /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ - motion->pmv[1][1] = motion_y << 1; - - MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fr_dmv (mpeg2_decoder_t * const decoder, - motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y; - unsigned int pos_x, pos_y, xy_half, offset; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - NEEDBITS (bit_buf, bits, bit_ptr); - dmv_x = get_dmv (decoder); - - motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (decoder, - motion->f_code[1]); - /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ - motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; - dmv_y = get_dmv (decoder); - - m = decoder->top_field_first ? 1 : 3; - other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; - other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1; - MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); - - m = decoder->top_field_first ? 3 : 1; - other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; - other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1; - MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0); - - pos_x = 2 * decoder->offset + motion_x; - pos_y = decoder->v_offset + motion_y; - if (unlikely (pos_x > decoder->limit_x)) { - pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; - motion_x = pos_x - 2 * decoder->offset; - } - if (unlikely (pos_y > decoder->limit_y)) { - pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; - motion_y = pos_y - decoder->v_offset; - } - xy_half = ((pos_y & 1) << 1) | (pos_x & 1); - offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; - mpeg2_mc.avg[xy_half] - (decoder->dest[0] + decoder->offset, - motion->ref[0][0] + offset, 2 * decoder->stride, 8); - mpeg2_mc.avg[xy_half] - (decoder->dest[0] + decoder->stride + decoder->offset, - motion->ref[0][0] + decoder->stride + offset, 2 * decoder->stride, 8); - motion_x /= 2; motion_y /= 2; - xy_half = ((motion_y & 1) << 1) | (motion_x & 1); - offset = (((decoder->offset + motion_x) >> 1) + - (((decoder->v_offset >> 1) + (motion_y & ~1)) * - decoder->uv_stride)); - mpeg2_mc.avg[4+xy_half] - (decoder->dest[1] + (decoder->offset >> 1), - motion->ref[0][1] + offset, 2 * decoder->uv_stride, 4); - mpeg2_mc.avg[4+xy_half] - (decoder->dest[1] + decoder->uv_stride + (decoder->offset >> 1), - motion->ref[0][1] + decoder->uv_stride + offset, - 2 * decoder->uv_stride, 4); - mpeg2_mc.avg[4+xy_half] - (decoder->dest[2] + (decoder->offset >> 1), - motion->ref[0][2] + offset, 2 * decoder->uv_stride, 4); - mpeg2_mc.avg[4+xy_half] - (decoder->dest[2] + decoder->uv_stride + (decoder->offset >> 1), - motion->ref[0][2] + decoder->uv_stride + offset, - 2 * decoder->uv_stride, 4); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline void motion_reuse (const mpeg2_decoder_t * const decoder, - const motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ - int motion_x, motion_y; - unsigned int pos_x, pos_y, xy_half, offset; - - motion_x = motion->pmv[0][0]; - motion_y = motion->pmv[0][1]; - - MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); + MOTION_420 (table, motion->ref[0], motion_x, motion_y, 16, 0); } -static inline void motion_zero (const mpeg2_decoder_t * const decoder, - const motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ - unsigned int offset; - - table[0] (decoder->dest[0] + decoder->offset, - (motion->ref[0][0] + decoder->offset + - decoder->v_offset * decoder->stride), - decoder->stride, 16); - - offset = ((decoder->offset >> 1) + - (decoder->v_offset >> 1) * decoder->uv_stride); - table[4] (decoder->dest[1] + (decoder->offset >> 1), - motion->ref[0][1] + offset, decoder->uv_stride, 8); - table[4] (decoder->dest[2] + (decoder->offset >> 1), - motion->ref[0][2] + offset, decoder->uv_stride, 8); -} +#define MOTION_FUNCTIONS(FORMAT,MOTION,MOTION_FIELD,MOTION_DMV,MOTION_ZERO) \ + \ +static void motion_fr_frame_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \ + \ + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); \ +} \ + \ +static void motion_fr_field_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y, field; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + field = UBITS (bit_buf, 1); \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = ((motion->pmv[0][1] >> 1) + \ + get_motion_delta (decoder, motion->f_code[1])); \ + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \ + motion->pmv[0][1] = motion_y << 1; \ + \ + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + field = UBITS (bit_buf, 1); \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[1][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = ((motion->pmv[1][1] >> 1) + \ + get_motion_delta (decoder, motion->f_code[1])); \ + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \ + motion->pmv[1][1] = motion_y << 1; \ + \ + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); \ +} \ + \ +static void motion_fr_dmv_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + dmv_x = get_dmv (decoder); \ + \ + motion_y = ((motion->pmv[0][1] >> 1) + \ + get_motion_delta (decoder, motion->f_code[1])); \ + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; \ + dmv_y = get_dmv (decoder); \ + \ + m = decoder->top_field_first ? 1 : 3; \ + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; \ + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1; \ + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); \ + \ + m = decoder->top_field_first ? 3 : 1; \ + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; \ + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1; \ + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0);\ + \ + MOTION_DMV (mpeg2_mc.avg, motion->ref[0], motion_x, motion_y); \ +} \ + \ +static void motion_reuse_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + motion_x = motion->pmv[0][0]; \ + motion_y = motion->pmv[0][1]; \ + \ + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); \ +} \ + \ +static void motion_zero_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + unsigned int offset; \ + \ + motion->pmv[0][0] = motion->pmv[0][1] = 0; \ + motion->pmv[1][0] = motion->pmv[1][1] = 0; \ + \ + MOTION_ZERO (table, motion->ref[0]); \ +} \ + \ +static void motion_fi_field_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + uint8_t ** ref_field; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + ref_field = motion->ref2[UBITS (bit_buf, 1)]; \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \ + \ + MOTION (table, ref_field, motion_x, motion_y, 16, 0); \ +} \ + \ +static void motion_fi_16x8_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + uint8_t ** ref_field; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + ref_field = motion->ref2[UBITS (bit_buf, 1)]; \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[0][1] = motion_y; \ + \ + MOTION (table, ref_field, motion_x, motion_y, 8, 0); \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + ref_field = motion->ref2[UBITS (bit_buf, 1)]; \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[1][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[1][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion_y; \ + \ + MOTION (table, ref_field, motion_x, motion_y, 8, 8); \ +} \ + \ +static void motion_fi_dmv_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y, other_x, other_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (decoder); \ + \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \ + other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (decoder) + \ + decoder->dmv_offset); \ + \ + MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0); \ + MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0); \ +} \ + +MOTION_FUNCTIONS (420, MOTION_420, MOTION_FIELD_420, MOTION_DMV_420, + MOTION_ZERO_420) +MOTION_FUNCTIONS (422, MOTION_422, MOTION_FIELD_422, MOTION_DMV_422, + MOTION_ZERO_422) +MOTION_FUNCTIONS (444, MOTION_444, MOTION_FIELD_444, MOTION_DMV_444, + MOTION_ZERO_444) /* like motion_frame, but parsing without actual motion compensation */ static void motion_fr_conceal (mpeg2_decoder_t * const decoder) { -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) int tmp; NEEDBITS (bit_buf, bits, bit_ptr); @@ -1275,129 +1525,10 @@ static void motion_fr_conceal (mpeg2_decoder_t * const decoder) decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp; DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fi_field (mpeg2_decoder_t * const decoder, - motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - int motion_x, motion_y; - uint8_t ** ref_field; - unsigned int pos_x, pos_y, xy_half, offset; - - NEEDBITS (bit_buf, bits, bit_ptr); - ref_field = motion->ref2[UBITS (bit_buf, 1)]; - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_motion_delta (decoder, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion->pmv[0][1] = motion_y; - - MOTION (table, ref_field, motion_x, motion_y, 16, 0); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fi_16x8 (mpeg2_decoder_t * const decoder, - motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - int motion_x, motion_y; - uint8_t ** ref_field; - unsigned int pos_x, pos_y, xy_half, offset; - - NEEDBITS (bit_buf, bits, bit_ptr); - ref_field = motion->ref2[UBITS (bit_buf, 1)]; - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_motion_delta (decoder, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[0][1] = motion_y; - - MOTION (table, ref_field, motion_x, motion_y, 8, 0); - - NEEDBITS (bit_buf, bits, bit_ptr); - ref_field = motion->ref2[UBITS (bit_buf, 1)]; - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[1][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[1][1] + get_motion_delta (decoder, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion_y; - - MOTION (table, ref_field, motion_x, motion_y, 8, 8); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fi_dmv (mpeg2_decoder_t * const decoder, - motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - int motion_x, motion_y, other_x, other_y; - unsigned int pos_x, pos_y, xy_half, offset; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - NEEDBITS (bit_buf, bits, bit_ptr); - other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (decoder); - - motion_y = motion->pmv[0][1] + get_motion_delta (decoder, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion->pmv[0][1] = motion_y; - other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (decoder) + - decoder->dmv_offset); - - MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0); - MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0); -#undef bit_buf -#undef bits -#undef bit_ptr } static void motion_fi_conceal (mpeg2_decoder_t * const decoder) { -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) int tmp; NEEDBITS (bit_buf, bits, bit_ptr); @@ -1415,10 +1546,11 @@ static void motion_fi_conceal (mpeg2_decoder_t * const decoder) decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp; DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +} + #undef bit_buf #undef bits #undef bit_ptr -} #define MOTION_CALL(routine,direction) \ do { \ @@ -1436,14 +1568,14 @@ do { \ if (decoder->offset == decoder->width) { \ do { /* just so we can use the break statement */ \ if (decoder->convert) { \ - decoder->convert (decoder->fbuf_id, decoder->dest, \ + decoder->convert (decoder->convert_id, decoder->dest, \ decoder->v_offset); \ if (decoder->coding_type == B_TYPE) \ break; \ } \ - decoder->dest[0] += 16 * decoder->stride; \ - decoder->dest[1] += 4 * decoder->stride; \ - decoder->dest[2] += 4 * decoder->stride; \ + decoder->dest[0] += decoder->slice_stride; \ + decoder->dest[1] += decoder->slice_uv_stride; \ + decoder->dest[2] += decoder->slice_uv_stride; \ } while (0); \ decoder->v_offset += 16; \ if (decoder->v_offset > decoder->limit_y) { \ @@ -1460,7 +1592,7 @@ void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], { int offset, stride, height, bottom_field; - stride = decoder->width; + stride = decoder->stride_frame; bottom_field = (decoder->picture_structure == BOTTOM_FIELD); offset = bottom_field ? stride : 0; height = decoder->height; @@ -1469,15 +1601,9 @@ void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], decoder->picture_dest[1] = current_fbuf[1] + (offset >> 1); decoder->picture_dest[2] = current_fbuf[2] + (offset >> 1); - if (forward_fbuf) { - decoder->f_motion.ref[0][0] = forward_fbuf[0] + offset; - decoder->f_motion.ref[0][1] = forward_fbuf[1] + (offset >> 1); - decoder->f_motion.ref[0][2] = forward_fbuf[2] + (offset >> 1); - } else { - decoder->f_motion.ref[0][0] = 0; - decoder->f_motion.ref[0][1] = 0; - decoder->f_motion.ref[0][2] = 0; - } + decoder->f_motion.ref[0][0] = forward_fbuf[0] + offset; + decoder->f_motion.ref[0][1] = forward_fbuf[1] + (offset >> 1); + decoder->f_motion.ref[0][2] = forward_fbuf[2] + (offset >> 1); decoder->b_motion.ref[0][0] = backward_fbuf[0] + offset; decoder->b_motion.ref[0][1] = backward_fbuf[1] + (offset >> 1); @@ -1494,15 +1620,9 @@ void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], if (decoder->second_field && (decoder->coding_type != B_TYPE)) forward_fbuf = current_fbuf; - if (forward_fbuf) { - decoder->f_motion.ref[1][0] = forward_fbuf[0] + offset; - decoder->f_motion.ref[1][1] = forward_fbuf[1] + (offset >> 1); - decoder->f_motion.ref[1][2] = forward_fbuf[2] + (offset >> 1); - } else { - decoder->f_motion.ref[0][0] = 0; - decoder->f_motion.ref[0][1] = 0; - decoder->f_motion.ref[0][2] = 0; - } + decoder->f_motion.ref[1][0] = forward_fbuf[0] + offset; + decoder->f_motion.ref[1][1] = forward_fbuf[1] + (offset >> 1); + decoder->f_motion.ref[1][2] = forward_fbuf[2] + (offset >> 1); decoder->b_motion.ref[1][0] = backward_fbuf[0] + offset; decoder->b_motion.ref[1][1] = backward_fbuf[1] + (offset >> 1); @@ -1514,10 +1634,59 @@ void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], decoder->stride = stride; decoder->uv_stride = stride >> 1; + decoder->slice_stride = 16 * stride; + decoder->slice_uv_stride = + decoder->slice_stride >> (2 - decoder->chroma_format); decoder->limit_x = 2 * decoder->width - 32; decoder->limit_y_16 = 2 * height - 32; decoder->limit_y_8 = 2 * height - 16; decoder->limit_y = height - 16; + + if (decoder->mpeg1) { + decoder->motion_parser[0] = motion_zero_420; + decoder->motion_parser[MC_FRAME] = motion_mp1; + decoder->motion_parser[4] = motion_reuse_420; + } else if (decoder->picture_structure == FRAME_PICTURE) { + if (decoder->chroma_format == 0) { + decoder->motion_parser[0] = motion_zero_420; + decoder->motion_parser[MC_FIELD] = motion_fr_field_420; + decoder->motion_parser[MC_FRAME] = motion_fr_frame_420; + decoder->motion_parser[MC_DMV] = motion_fr_dmv_420; + decoder->motion_parser[4] = motion_reuse_420; + } else if (decoder->chroma_format == 1) { + decoder->motion_parser[0] = motion_zero_422; + decoder->motion_parser[MC_FIELD] = motion_fr_field_422; + decoder->motion_parser[MC_FRAME] = motion_fr_frame_422; + decoder->motion_parser[MC_DMV] = motion_fr_dmv_422; + decoder->motion_parser[4] = motion_reuse_422; + } else { + decoder->motion_parser[0] = motion_zero_444; + decoder->motion_parser[MC_FIELD] = motion_fr_field_444; + decoder->motion_parser[MC_FRAME] = motion_fr_frame_444; + decoder->motion_parser[MC_DMV] = motion_fr_dmv_444; + decoder->motion_parser[4] = motion_reuse_444; + } + } else { + if (decoder->chroma_format == 0) { + decoder->motion_parser[0] = motion_zero_420; + decoder->motion_parser[MC_FIELD] = motion_fi_field_420; + decoder->motion_parser[MC_16X8] = motion_fi_16x8_420; + decoder->motion_parser[MC_DMV] = motion_fi_dmv_420; + decoder->motion_parser[4] = motion_reuse_420; + } else if (decoder->chroma_format == 1) { + decoder->motion_parser[0] = motion_zero_422; + decoder->motion_parser[MC_FIELD] = motion_fi_field_422; + decoder->motion_parser[MC_16X8] = motion_fi_16x8_422; + decoder->motion_parser[MC_DMV] = motion_fi_dmv_422; + decoder->motion_parser[4] = motion_reuse_422; + } else { + decoder->motion_parser[0] = motion_zero_444; + decoder->motion_parser[MC_FIELD] = motion_fi_field_444; + decoder->motion_parser[MC_16X8] = motion_fi_16x8_444; + decoder->motion_parser[MC_DMV] = motion_fi_dmv_444; + decoder->motion_parser[4] = motion_reuse_444; + } + } } static inline int slice_init (mpeg2_decoder_t * const decoder, int code) @@ -1529,7 +1698,7 @@ static inline int slice_init (mpeg2_decoder_t * const decoder, int code) const MBAtab * mba; decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = - decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision; + decoder->dc_dct_pred[2] = 16384; decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; @@ -1543,13 +1712,14 @@ static inline int slice_init (mpeg2_decoder_t * const decoder, int code) decoder->v_offset = (code - 1) * 16; offset = 0; if (!(decoder->convert) || decoder->coding_type != B_TYPE) - offset = (code - 1) * decoder->stride * 4; + offset = (code - 1) * decoder->slice_stride; - decoder->dest[0] = decoder->picture_dest[0] + offset * 4; + decoder->dest[0] = decoder->picture_dest[0] + offset; + offset >>= (2 - decoder->chroma_format); decoder->dest[1] = decoder->picture_dest[1] + offset; decoder->dest[2] = decoder->picture_dest[2] + offset; - decoder->quantizer_scale = get_quantizer_scale (decoder); + get_quantizer_scale (decoder); /* ignore intra_slice and all the extra data */ while (bit_buf & 0x80000000) { @@ -1587,9 +1757,9 @@ static inline int slice_init (mpeg2_decoder_t * const decoder, int code) while (decoder->offset - decoder->width >= 0) { decoder->offset -= decoder->width; if (!(decoder->convert) || decoder->coding_type != B_TYPE) { - decoder->dest[0] += 16 * decoder->stride; - decoder->dest[1] += 4 * decoder->stride; - decoder->dest[2] += 4 * decoder->stride; + decoder->dest[0] += decoder->slice_stride; + decoder->dest[1] += decoder->slice_uv_stride; + decoder->dest[2] += decoder->slice_uv_stride; } decoder->v_offset += 16; } @@ -1629,7 +1799,7 @@ void mpeg2_slice (mpeg2_decoder_t * const decoder, const int code, /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */ if (macroblock_modes & MACROBLOCK_QUANT) - decoder->quantizer_scale = get_quantizer_scale (decoder); + get_quantizer_scale (decoder); if (macroblock_modes & MACROBLOCK_INTRA) { @@ -1663,72 +1833,49 @@ void mpeg2_slice (mpeg2_decoder_t * const decoder, const int code, slice_intra_DCT (decoder, 0, dest_y + 8, DCT_stride); slice_intra_DCT (decoder, 0, dest_y + DCT_offset, DCT_stride); slice_intra_DCT (decoder, 0, dest_y + DCT_offset + 8, DCT_stride); - slice_intra_DCT (decoder, 1, decoder->dest[1] + (offset >> 1), - decoder->uv_stride); - slice_intra_DCT (decoder, 2, decoder->dest[2] + (offset >> 1), - decoder->uv_stride); - - if (decoder->coding_type == D_TYPE) { - NEEDBITS (bit_buf, bits, bit_ptr); - DUMPBITS (bit_buf, bits, 1); + if (likely (decoder->chroma_format == 0)) { + slice_intra_DCT (decoder, 1, decoder->dest[1] + (offset >> 1), + decoder->uv_stride); + slice_intra_DCT (decoder, 2, decoder->dest[2] + (offset >> 1), + decoder->uv_stride); + if (decoder->coding_type == D_TYPE) { + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); + } + } else if (likely (decoder->chroma_format == 1)) { + uint8_t * dest_u = decoder->dest[1] + (offset >> 1); + uint8_t * dest_v = decoder->dest[2] + (offset >> 1); + DCT_stride >>= 1; + DCT_offset >>= 1; + slice_intra_DCT (decoder, 1, dest_u, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride); + } else { + uint8_t * dest_u = decoder->dest[1] + offset; + uint8_t * dest_v = decoder->dest[2] + offset; + slice_intra_DCT (decoder, 1, dest_u, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + 8, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + 8, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + DCT_offset + 8, + DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + DCT_offset + 8, + DCT_stride); } } else { - if (decoder->picture_structure == FRAME_PICTURE) - switch (macroblock_modes & MOTION_TYPE_MASK) { - case MC_FRAME: - if (decoder->mpeg1) - MOTION_CALL (motion_mp1, macroblock_modes); - else - MOTION_CALL (motion_fr_frame, macroblock_modes); - break; - - case MC_FIELD: - MOTION_CALL (motion_fr_field, macroblock_modes); - break; - - case MC_DMV: - MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); - break; - - case 0: - /* non-intra mb without forward mv in a P picture */ - decoder->f_motion.pmv[0][0] = 0; - decoder->f_motion.pmv[0][1] = 0; - decoder->f_motion.pmv[1][0] = 0; - decoder->f_motion.pmv[1][1] = 0; - MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); - break; - } - else - switch (macroblock_modes & MOTION_TYPE_MASK) { - case MC_FIELD: - MOTION_CALL (motion_fi_field, macroblock_modes); - break; - - case MC_16X8: - MOTION_CALL (motion_fi_16x8, macroblock_modes); - break; - - case MC_DMV: - MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); - break; - - case 0: - /* non-intra mb without forward mv in a P picture */ - decoder->f_motion.pmv[0][0] = 0; - decoder->f_motion.pmv[0][1] = 0; - decoder->f_motion.pmv[1][0] = 0; - decoder->f_motion.pmv[1][1] = 0; - MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); - break; - } + motion_parser_t * parser; + + parser = + decoder->motion_parser[macroblock_modes >> MOTION_TYPE_SHIFT]; + MOTION_CALL (parser, macroblock_modes); if (macroblock_modes & MACROBLOCK_PATTERN) { int coded_block_pattern; int DCT_offset, DCT_stride; - int offset; - uint8_t * dest_y; if (macroblock_modes & DCT_TYPE_INTERLACED) { DCT_offset = decoder->stride; @@ -1740,30 +1887,123 @@ void mpeg2_slice (mpeg2_decoder_t * const decoder, const int code, coded_block_pattern = get_coded_block_pattern (decoder); - offset = decoder->offset; - dest_y = decoder->dest[0] + offset; - if (coded_block_pattern & 0x20) - slice_non_intra_DCT (decoder, dest_y, DCT_stride); - if (coded_block_pattern & 0x10) - slice_non_intra_DCT (decoder, dest_y + 8, DCT_stride); - if (coded_block_pattern & 0x08) - slice_non_intra_DCT (decoder, dest_y + DCT_offset, - DCT_stride); - if (coded_block_pattern & 0x04) - slice_non_intra_DCT (decoder, dest_y + DCT_offset + 8, - DCT_stride); - if (coded_block_pattern & 0x2) - slice_non_intra_DCT (decoder, - decoder->dest[1] + (offset >> 1), - decoder->uv_stride); - if (coded_block_pattern & 0x1) - slice_non_intra_DCT (decoder, - decoder->dest[2] + (offset >> 1), - decoder->uv_stride); + if (likely (decoder->chroma_format == 0)) { + int offset = decoder->offset; + uint8_t * dest_y = decoder->dest[0] + offset; + if (coded_block_pattern & 1) + slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride); + if (coded_block_pattern & 2) + slice_non_intra_DCT (decoder, 0, dest_y + 8, + DCT_stride); + if (coded_block_pattern & 4) + slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset, + DCT_stride); + if (coded_block_pattern & 8) + slice_non_intra_DCT (decoder, 0, + dest_y + DCT_offset + 8, + DCT_stride); + if (coded_block_pattern & 16) + slice_non_intra_DCT (decoder, 1, + decoder->dest[1] + (offset >> 1), + decoder->uv_stride); + if (coded_block_pattern & 32) + slice_non_intra_DCT (decoder, 2, + decoder->dest[2] + (offset >> 1), + decoder->uv_stride); + } else if (likely (decoder->chroma_format == 1)) { + int offset; + uint8_t * dest_y; + + coded_block_pattern |= bit_buf & (3 << 30); + DUMPBITS (bit_buf, bits, 2); + + offset = decoder->offset; + dest_y = decoder->dest[0] + offset; + if (coded_block_pattern & 1) + slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride); + if (coded_block_pattern & 2) + slice_non_intra_DCT (decoder, 0, dest_y + 8, + DCT_stride); + if (coded_block_pattern & 4) + slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset, + DCT_stride); + if (coded_block_pattern & 8) + slice_non_intra_DCT (decoder, 0, + dest_y + DCT_offset + 8, + DCT_stride); + + DCT_stride >>= 1; + DCT_offset = (DCT_offset + offset) >> 1; + if (coded_block_pattern & 16) + slice_non_intra_DCT (decoder, 1, + decoder->dest[1] + (offset >> 1), + DCT_stride); + if (coded_block_pattern & 32) + slice_non_intra_DCT (decoder, 2, + decoder->dest[2] + (offset >> 1), + DCT_stride); + if (coded_block_pattern & (2 << 30)) + slice_non_intra_DCT (decoder, 1, + decoder->dest[1] + DCT_offset, + DCT_stride); + if (coded_block_pattern & (1 << 30)) + slice_non_intra_DCT (decoder, 2, + decoder->dest[2] + DCT_offset, + DCT_stride); + } else { + int offset; + uint8_t * dest_y, * dest_u, * dest_v; + + coded_block_pattern |= bit_buf & (63 << 26); + DUMPBITS (bit_buf, bits, 6); + + offset = decoder->offset; + dest_y = decoder->dest[0] + offset; + dest_u = decoder->dest[1] + offset; + dest_v = decoder->dest[2] + offset; + + if (coded_block_pattern & 1) + slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride); + if (coded_block_pattern & 2) + slice_non_intra_DCT (decoder, 0, dest_y + 8, + DCT_stride); + if (coded_block_pattern & 4) + slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset, + DCT_stride); + if (coded_block_pattern & 8) + slice_non_intra_DCT (decoder, 0, + dest_y + DCT_offset + 8, + DCT_stride); + + if (coded_block_pattern & 16) + slice_non_intra_DCT (decoder, 1, dest_u, DCT_stride); + if (coded_block_pattern & 32) + slice_non_intra_DCT (decoder, 2, dest_v, DCT_stride); + if (coded_block_pattern & (32 << 26)) + slice_non_intra_DCT (decoder, 1, dest_u + DCT_offset, + DCT_stride); + if (coded_block_pattern & (16 << 26)) + slice_non_intra_DCT (decoder, 2, dest_v + DCT_offset, + DCT_stride); + if (coded_block_pattern & (8 << 26)) + slice_non_intra_DCT (decoder, 1, dest_u + 8, + DCT_stride); + if (coded_block_pattern & (4 << 26)) + slice_non_intra_DCT (decoder, 2, dest_v + 8, + DCT_stride); + if (coded_block_pattern & (2 << 26)) + slice_non_intra_DCT (decoder, 1, + dest_u + DCT_offset + 8, + DCT_stride); + if (coded_block_pattern & (1 << 26)) + slice_non_intra_DCT (decoder, 2, + dest_v + DCT_offset + 8, + DCT_stride); + } } decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = - decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision; + decoder->dc_dct_pred[2] = 16384; } NEXT_MACROBLOCK; @@ -1796,19 +2036,17 @@ void mpeg2_slice (mpeg2_decoder_t * const decoder, const int code, if (mba_inc) { decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = - decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision; + decoder->dc_dct_pred[2] = 16384; if (decoder->coding_type == P_TYPE) { - decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; - decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; - do { - MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + MOTION_CALL (decoder->motion_parser[0], + MACROBLOCK_MOTION_FORWARD); NEXT_MACROBLOCK; } while (--mba_inc); } else { do { - MOTION_CALL (motion_reuse, macroblock_modes); + MOTION_CALL (decoder->motion_parser[4], macroblock_modes); NEXT_MACROBLOCK; } while (--mba_inc); } diff --git a/src/libmpeg2new/libmpeg2/uyvy.c b/src/libmpeg2new/libmpeg2/uyvy.c new file mode 100644 index 000000000..7f107ffad --- /dev/null +++ b/src/libmpeg2new/libmpeg2/uyvy.c @@ -0,0 +1,123 @@ +/* + * uyvy.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2003 Regis Duchesne <hpreg@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <inttypes.h> + +#include "mpeg2.h" +#include "mpeg2convert.h" + +typedef struct { + int width; + int stride; + int chroma420; + uint8_t * out; +} convert_uyvy_t; + +static void uyvy_start (void * _id, const mpeg2_fbuf_t * fbuf, + const mpeg2_picture_t * picture, + const mpeg2_gop_t * gop) +{ + convert_uyvy_t * instance = (convert_uyvy_t *) _id; + + instance->out = fbuf->buf[0]; + instance->stride = instance->width; + if (picture->nb_fields == 1) { + if (! (picture->flags & PIC_FLAG_TOP_FIELD_FIRST)) + instance->out += 2 * instance->stride; + instance->stride <<= 1; + } +} + +#ifdef WORDS_BIGENDIAN +#define PACK(a,b,c,d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d)) +#else +#define PACK(a,b,c,d) (((d) << 24) | ((c) << 16) | ((b) << 8) | (a)) +#endif + +static void uyvy_copy (void * const _id, uint8_t * const * src, + const unsigned int v_offset) +{ + const convert_uyvy_t * const id = (convert_uyvy_t *) _id; + uint8_t * _dst; + uint8_t * py, * pu, * pv; + int i, j; + + _dst = id->out + 2 * id->stride * v_offset; + py = src[0]; pu = src[1]; pv = src[2]; + + i = 16; + do { + uint32_t * dst = (uint32_t *) _dst; + + j = id->width >> 4; + do { + dst[0] = PACK (pu[0], py[0], pv[0], py[1]); + dst[1] = PACK (pu[1], py[2], pv[1], py[3]); + dst[2] = PACK (pu[2], py[4], pv[2], py[5]); + dst[3] = PACK (pu[3], py[6], pv[3], py[7]); + dst[4] = PACK (pu[4], py[8], pv[4], py[9]); + dst[5] = PACK (pu[5], py[10], pv[5], py[11]); + dst[6] = PACK (pu[6], py[12], pv[6], py[13]); + dst[7] = PACK (pu[7], py[14], pv[7], py[15]); + py += 16; + pu += 8; + pv += 8; + dst += 8; + } while (--j); + py -= id->width; + pu -= id->width >> 1; + pv -= id->width >> 1; + _dst += 2 * id->stride; + py += id->stride; + if (! (--i & id->chroma420)) { + pu += id->stride >> 1; + pv += id->stride >> 1; + } + } while (i); +} + +int mpeg2convert_uyvy (int stage, void * _id, const mpeg2_sequence_t * seq, + int stride, uint32_t accel, void * arg, + mpeg2_convert_init_t * result) +{ + convert_uyvy_t * instance = (convert_uyvy_t *) _id; + + if (seq->chroma_width == seq->width) + return 1; + + if (instance) { + instance->width = seq->width; + instance->chroma420 = (seq->chroma_height < seq->height); + result->buf_size[0] = seq->width * seq->height * 2; + result->buf_size[1] = result->buf_size[2] = 0; + result->start = uyvy_start; + result->copy = uyvy_copy; + } else { + result->id_size = sizeof (convert_uyvy_t); + } + + return 0; +} diff --git a/src/libmpeg2new/libmpeg2/vlc.h b/src/libmpeg2new/libmpeg2/vlc.h index 8fa6b75bd..57448ce04 100644 --- a/src/libmpeg2new/libmpeg2/vlc.h +++ b/src/libmpeg2new/libmpeg2/vlc.h @@ -121,7 +121,7 @@ static const MBtab MB_P [] = { #define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD static const MBtab MB_B [] = { - {0, 0}, {INTRA|QUANT, 6}, + {0, 6}, {INTRA|QUANT, 6}, {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, {INTRA, 5}, {INTRA, 5}, @@ -170,53 +170,53 @@ static const DMVtab DMV_2 [] = { static const CBPtab CBP_7 [] = { - {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, - {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, - {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, - {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6}, - {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, - {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5}, - {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, - {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5}, - {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5}, - {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5}, - {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, - {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, - {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5}, - {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5}, - {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5}, + {0x11, 7}, {0x12, 7}, {0x14, 7}, {0x18, 7}, + {0x21, 7}, {0x22, 7}, {0x24, 7}, {0x28, 7}, + {0x3f, 6}, {0x3f, 6}, {0x30, 6}, {0x30, 6}, + {0x09, 6}, {0x09, 6}, {0x06, 6}, {0x06, 6}, + {0x1f, 5}, {0x1f, 5}, {0x1f, 5}, {0x1f, 5}, + {0x10, 5}, {0x10, 5}, {0x10, 5}, {0x10, 5}, + {0x2f, 5}, {0x2f, 5}, {0x2f, 5}, {0x2f, 5}, + {0x20, 5}, {0x20, 5}, {0x20, 5}, {0x20, 5}, + {0x07, 5}, {0x07, 5}, {0x07, 5}, {0x07, 5}, + {0x0b, 5}, {0x0b, 5}, {0x0b, 5}, {0x0b, 5}, + {0x0d, 5}, {0x0d, 5}, {0x0d, 5}, {0x0d, 5}, + {0x0e, 5}, {0x0e, 5}, {0x0e, 5}, {0x0e, 5}, + {0x05, 5}, {0x05, 5}, {0x05, 5}, {0x05, 5}, + {0x0a, 5}, {0x0a, 5}, {0x0a, 5}, {0x0a, 5}, + {0x03, 5}, {0x03, 5}, {0x03, 5}, {0x03, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, - {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, - {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, - {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, - {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4}, + {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4}, + {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4}, + {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3} + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3} }; static const CBPtab CBP_9 [] = { - {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, - {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, - {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, - {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8}, - {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8}, - {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8}, - {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8}, - {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8}, - {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8}, - {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8}, - {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8}, - {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8}, - {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8}, - {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8}, - {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8}, - {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8} + {0, 9}, {0x00, 9}, {0x39, 9}, {0x36, 9}, + {0x37, 9}, {0x3b, 9}, {0x3d, 9}, {0x3e, 9}, + {0x17, 8}, {0x17, 8}, {0x1b, 8}, {0x1b, 8}, + {0x1d, 8}, {0x1d, 8}, {0x1e, 8}, {0x1e, 8}, + {0x27, 8}, {0x27, 8}, {0x2b, 8}, {0x2b, 8}, + {0x2d, 8}, {0x2d, 8}, {0x2e, 8}, {0x2e, 8}, + {0x19, 8}, {0x19, 8}, {0x16, 8}, {0x16, 8}, + {0x29, 8}, {0x29, 8}, {0x26, 8}, {0x26, 8}, + {0x35, 8}, {0x35, 8}, {0x3a, 8}, {0x3a, 8}, + {0x33, 8}, {0x33, 8}, {0x3c, 8}, {0x3c, 8}, + {0x15, 8}, {0x15, 8}, {0x1a, 8}, {0x1a, 8}, + {0x13, 8}, {0x13, 8}, {0x1c, 8}, {0x1c, 8}, + {0x25, 8}, {0x25, 8}, {0x2a, 8}, {0x2a, 8}, + {0x23, 8}, {0x23, 8}, {0x2c, 8}, {0x2c, 8}, + {0x31, 8}, {0x31, 8}, {0x32, 8}, {0x32, 8}, + {0x34, 8}, {0x34, 8}, {0x38, 8}, {0x38, 8} }; @@ -289,7 +289,7 @@ static const DCTtab DCT_B14_10 [] = { }; static const DCTtab DCT_B14_8 [] = { - { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, + { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, @@ -326,7 +326,7 @@ static const DCTtab DCT_B15_10 [] = { }; static const DCTtab DCT_B15_8 [] = { - { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, + { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, |